為什麼要使用headless mode?
因為執行的效率上較高。
Headless browser is a browser that can be used without a graphical interface. It can be controlled programmatically to automate tasks, such as doing QA (Quality Assurance) tests.
比較麻煩的網頁裡的 captcha 需要另外再用OCR 程式來處理。遇到開放性的QA 問題,需要使用類似 ChatGPT 來解決。
常見的headless 解法有那些?
- PhantomJS
https://phantomjs.org/ - CasperJS
(This repository has been archived by the owner on Jun 20, 202)
https://github.com/casperjs/casperjs - Nightmare
http://www.nightmarejs.org/ - Mocha
https://mochajs.org/ - Puppeteer
https://pptr.dev/ - Selenium
https://www.selenium.dev/ - Playwright
https://playwright.dev/
要如何檢測使用者是使用 selenium 的 headless 模式?
網路上有人提供入門版的檢查:
https://github.com/LouisKlimek/HeadlessDetectJS
上面提供了幾個 selenium 的 headless 的檢測方式, 都很入門, 實際上並沒有幫助! 在只修改了 user_agent 的情況下, 就讓 headlessDetector.getHeadlessScore() 傳回值==0, 是 undetected.
ticketmaster.sg 的 iamNotaRobot 檢查:
function pxLang(a) {if (a) a = a.toLowerCase();const m={en:"en","en-us":"en","fr-ca":"fr","es-mx":"es"};var b = m[a];if (!b) b = 'en';return b;}
function iamNotaRobotPX(lang, brand = 'api', action = 'captcha', epsCaptcha, baseUrl) {
epsCaptcha.innerHTML = "<div class='box'><div class='c1' id='t1'></div><div class='c2' id='t2'></div><ul class='c3' id='t3'></ul><div class='c4' id='t4'></div></div><div class='be'></div><section class='container'><div class='content-wrapper'><div class='content'><div id='captcha-box'></div></div></div><div class='page-footer-wrapper'></div></section><div class='box2'><div class='c5' id='t5'></div><div data-cs-mask class='c6' id='t6'></div></div>";
const captchaDiv = epsCaptcha.querySelector('#captcha-box');
captchaDiv.innerHTML = '<div id="px-captcha-container"><div class="px-captcha-header"></div><div class="px-captcha-background"></div><div id="px-captcha"></div><div class="px-captcha-message"></div><span class="px-captcha-report"></span></div>';
window._pxSelectedLocale = pxLang(lang);
window._pxAppId = _pxAppId;
window._pxHostUrl = `https://collector-${_pxAppId}.perimeterx.net`;
window._pxJsClientSrc = `//client.perimeterx.net/${_pxAppId}/main.min.js`;
window._pxFirstPartyEnabled = 'true';
window._pxTranslation={fr:[{selector:".px-captcha-header",text:"Veuillez vérifier que vous êtes un humain"},{selector:".px-captcha-message",text:"Appuyez et maintenez enfoncé pour confirmer que vous êtes un humain (et non un bot)."}],es:[{selector:".px-captcha-header",text:"Por favor, compruebe que es un ser humano."},{selector:".px-captcha-message",text:"Mantén presionado para confirmar que eres un humano (y no un bot)."}],en:[{selector:".px-captcha-header",text:"Please Verify You Are A Human"},{selector:".px-captcha-message",text:"Press & Hold to confirm you are a human (and not a bot)."}]};
var pc = document.createElement('script');
const captchaHost = 'https://captcha.px-cdn.net';
const altCaptchaHost = 'https://captcha.px-cloud.net';
pc.src = `${captchaHost}/${_pxAppId}/captcha.js?a=c&m=0`;
document.head.appendChild(pc);
pc.onerror = function () {
pc = document.createElement('script');
pc.src = `${altCaptchaHost}/${_pxAppId}/captcha.js?a=c&m=0`;
document.head.appendChild(pc);
};
document.head.appendChild(pc);
loadCommonResources (baseUrl, 'captcha');
}
function setGlobalVariables(vars) {
const globalVariabes = document.createElement('script');
globalVariabes.setAttribute('type', 'text/javascript');
globalVariabes.textContent = vars;
document.head.appendChild(globalVariabes);
}
function loadCommonResources (baseUrl, action) {
const actionScript = document.createElement('script');
actionScript.setAttribute('type', 'text/javascript');
actionScript.setAttribute('src', `${baseUrl}/asset/${action}.js`);
actionScript.onload = () => eval('load()');
document.head.appendChild(actionScript);
actionScript.remove();
const actionCss = document.createElement('link');
actionCss.setAttribute('type', 'text/css');
actionCss.setAttribute('href', `${baseUrl}/asset/eps.css`)
actionCss.setAttribute('rel', 'stylesheet')
document.head.appendChild(actionCss);
const viewport = document.createElement('meta')
viewport.setAttribute('name', 'viewport');
viewport.setAttribute('content', 'width=device-width, initial-scale=1');
document.head.appendChild(viewport);
}
function iamNotaRobotD(lang, brand = 'api', action = 'captcha', epsCaptcha, baseUrl) {
let xhr = new XMLHttpRequest();
xhr.open('GET', baseUrl + '/amigood');
xhr.setRequestHeader('x-lang', lang);
xhr.setRequestHeader('brand', brand);
xhr.setRequestHeader('requesting-host', window.location.host);
xhr.onreadystatechange = function() {
if (xhr.readyState === XMLHttpRequest.DONE) {
const parser = new DOMParser();
const epsContent = parser.parseFromString(xhr.responseText, 'text/html');
const scripts = Array.from(epsContent.querySelectorAll('script'));
scripts.filter(script => script.hasAttribute('src') && script.getAttribute('src').includes('geetest')).map(
script => {
const scriptEL = document.createElement('script');
scriptEL.setAttribute('src', script.getAttribute('src'));
document.head.appendChild(scriptEL);
script.remove();
epsCaptcha.innerHTML = epsContent.body.innerHTML;
scriptEL.onload = () => {
scripts.filter(script => script.textContent.includes('#captcha-box') ||
script.textContent.includes('function solvedCaptcha(payload)')).forEach(element => {
const scriptEL = document.createElement('script');
scriptEL.textContent = element.textContent;
scriptEL.setAttribute('type', 'text/javascript');
document.head.appendChild(scriptEL);
element.remove()
});
}
});
loadCommonResources (baseUrl, 'captcha');
}
};
xhr.send();
};
function iamNotaRobot(lang, brand = 'api', action = 'captcha') {
const realBrand = (brand === 'api' ? 'tm' : brand);
const epsCaptcha = document.querySelector('#eps-captcha');
epsCaptcha.style = "background: rgb(255, 255, 255);";
const baseUrl = Array.from(document.querySelectorAll('script'))
.filter(script => script.hasAttribute('src') && script.getAttribute('src').includes('eps-mgr'))
.map(script=> {
const elements = script.getAttribute('src').split('/');
return elements[0] + "//" + elements[2];
})[0];
if (action == 'block') {
setGlobalVariables (`var rid="${dt}"; var ip="${client_ip}";var al="${lang}";`);
epsCaptcha.innerHTML = "<div class='container content-wrapper'><div class='box'><div class='c1' id='t1'></div><div class='c2' id='t2'></div> <ul class='c3' id='t3'></ul><div class='c4' id='t4'></div></div><div class='box2'><div class='c5' id='t5'></div><div data-cs-mask class='c6' id='t6'></div></div></div></div>";
loadCommonResources (baseUrl, 'block');
return;
}
setGlobalVariables(`var tc="";var rid="${dt}";var rr="";var host="${window.location.host}";var ip="${client_ip}";var action="${action}";var al="${lang}";var ss="${realBrand}.css";`);
if (eps_dg == 'p') {iamNotaRobotPX(lang, brand, action, epsCaptcha, baseUrl);}
else {iamNotaRobotD(lang, brand, action, epsCaptcha, baseUrl);}
}
ticketmaster.sg 的 eps-mgr 檢查用主程式:
var s = document.createElement('script'); s.src = 'https://epsf.ticketmaster.sg/asset/iamNotaRobot.js'; s.charset = 'utf-8'; document.head.appendChild(s); var nd_enabled = false; var client_ip = '12.22.33.44'; var dt = '2023-11-27T03:46:12Z800'; var h = window.location.host; var d_f = new Map([["www.ticketmaster.co.uk", ["d", "i", "g"]], ["my.ticketmaster.com", ["p"]], ["my.livenation.com", ["p"]], ["my.ticketmaster.ca", ["p"]], ["citylive.trium.fr", ["p"]], ["am.ticketmaster.com", ["h", "d"]], ["auth.ticketmaster.com", ["p", "g"]], ["www.ticketmaster.com", ["d", "p", "g"]], ["checkout.ticketmaster.com", ["d", "p", "g"]], ["ticketmaster.ae", ["p", "d"]], ["ticketmaster.se", ["p", "d"]], ["ticketmaster.at", ["p", "d"]], ["ticketmaster.be", ["p", "d"]], ["ticketmaster.ch", ["p", "d"]], ["ticketmaster.cz", ["p", "d"]], ["ticketmaster.de", ["p", "d"]], ["ticketmaster.dk", ["p", "d"]], ["ticketmaster.es", ["p", "d"]], ["ticketmaster.fi", ["p", "d"]], ["ticketmaster.nl", ["p", "d"]], ["ticketmaster.no", ["p", "d"]], ["ticketmaster.pl", ["p", "d"]], ["ticketmaster.co.za", ["p", "d"]], ["zz.ticketmaster.eu", ["p", "d"]]]); let features = d_f.get(h); if (features && features.length > 0) { for (let i = 0; i < features.length; i++) { if (features[i] == 'h') { try { var tagLoaded, sm, humanConfig = {}, humanScriptSrc = 'https://s.jwndnv.com/static/2.64.1/pagespeed.js?mo=2&ci=411638&dt=4116381628729041890000&pd=acc&spa=1&dom=s.jwndnv.com', maxFailCount = 5, debug = !0, maxHeaderSize = 4e3, appName = 'tm_pp', regex = 'am.ticketmaster.com' === h || 'stg1-am.ticketmaster.com' === h ? /\/render-ticket\/secure-barcode/ : /.+/, humanRequestFailCount = (setHumanConfig( { protectedRequestRules: [ { apiDomain: h, pathRegexes: [regex] } ], excludedRequestRules: [] } ), 0), humanScriptURLdomain = getLocation(humanScriptSrc).hostname, signalHandler = function (e) { for (var t = { OZ_TC: e.OZ_TC }, n = e.OZ_DT, a = 0; t['OZ_DT' + a] = n.substring(0, maxHeaderSize), n = n.substring(maxHeaderSize); a++); var r = utf8_to_b64(e.OZ_SG); for (a = 0; ; a++) { 1; if (t['OZ_SG' + a] = r.substring(0, maxHeaderSize), !(r = r.substring(maxHeaderSize))) break } return t }; function setHumanConfig(e) { humanConfig = e } function utf8_to_b64(e) { try { return btoa(encodeURIComponent(e).replace(/%([0-9A-F]{2})/g, function (e, t) { return String.fromCharCode(parseInt(t, 16)) } )) } catch (e) { return errorHandler(e, 'Error converting UTF8 to B64.'), '' } } function errorHandler(e, t) { t = (t += ' (') + (null != e ? e.message : 'null') + ')'; debug && console.log(t) } function buildRouteRegex() { try { for (var e = 0; e < humanConfig.protectedRequestRules.length; e++) humanConfig.protectedRequestRules[e].domainRegex = new RegExp(humanConfig.protectedRequestRules[e].apiDomain, 'i'), 0 != humanConfig.protectedRequestRules[e].pathRegexes.length && (t = humanConfig.protectedRequestRules[e].pathRegexes.map(e => 'string' == typeof e ? e : new RegExp(e).source).join('|'), humanConfig.protectedRequestRules[e].routesRegex = new RegExp(t, 'i')); for (var t, e = 0; e < humanConfig.excludedRequestRules.length; e++) humanConfig.excludedRequestRules[e].domainRegex = new RegExp(humanConfig.excludedRequestRules[e].apiDomain, 'i'), 0 != humanConfig.excludedRequestRules[e].pathRegexes.length && (t = humanConfig.excludedRequestRules[e].pathRegexes.map(e => 'string' == typeof e ? e : new RegExp(e).source).join('|'), humanConfig.excludedRequestRules[e].routesRegex = new RegExp(t, 'i')) } catch (e) { errorHandler(e, 'Error creating Regex.') } } function getLocation(e) { var t = e.match(/^(https?\:)\/\/(([^:\/?#]*)(?:\:([0-9]+))?)([\/]{0,1}[^?#]*)(\?[^#]*|)(#.*|)$/); return t && { href: e, protocol: t[1], host: t[2], hostname: t[3], port: t[4], pathname: t[5], search: t[6], hash: t[7] } } function checkRoute(t, n) { try { return 0 < humanConfig.protectedRequestRules.filter(e => !(!e.domainRegex.test(t) || !e.routesRegex.test(n))).length && 0 == humanConfig.excludedRequestRules.filter(e => !(!e.domainRegex.test(t) || !e.routesRegex.test(n))).length } catch (e) { return errorHandler(e, 'Failed while comparing XHR Route to Regex.'), !1 } } var ht = document.createElement('script'), hs = (ht.type = 'text/javascript', ht.src = humanScriptSrc, tagLoaded = new Promise(function (e, t) { ht.onload = function () { try { sm = window.$$$.start( { si: appName } ) } catch (e) { t() } e() } } ).catch(e => { errorHandler(e, 'Error in tagLoaded Promise.') } ), document.getElementsByTagName('script')[0]); hs.parentNode.insertBefore(ht, hs) } catch (e) { errorHandler(e, 'failed while creating Script Object.') } buildRouteRegex(), function () { try { var t = XMLHttpRequest.prototype.open; XMLHttpRequest.prototype.open = function () { if (maxFailCount <= humanRequestFailCount) return t.apply(this, [].slice.call(arguments)); try { this._method = arguments[0]; return /^https?:\/\//i.test(arguments[1]) ? this._urlObj = getLocation(arguments[1]) : this._urlObj = getLocation(new URL(arguments[1], document.baseURI).href), this._urlObj.hostname.toLowerCase() == humanScriptURLdomain.toLowerCase() ? this._dontPatch = !0 : this._dontPatch = !1, this._urlObj && !checkRoute(this._urlObj.hostname, this._urlObj.pathname) && (this._dontPatch = !0), this._async = arguments.length < 3 || arguments[2], t.apply(this, [].slice.call(arguments)) } catch (e) { return humanRequestFailCount++, console.log(e), t.apply(this, [].slice.call(arguments)) } } } catch (e) { errorHandler(e, 'Failed while monkey patching XMLHttpRequest.open().') } try { var r = XMLHttpRequest.prototype.send; XMLHttpRequest.prototype.send = function (a) { maxFailCount <= humanRequestFailCount || this._dontPatch ? r.call(this, a) : tagLoaded.then(() => { sm.report().then(signalHandler).then(e => { for (var t = Object.keys(e), n = 0; n < t.length; n++) this.setRequestHeader(t[n], e[t[n]]); r.call(this, a) } ).catch(e => { humanRequestFailCount++, errorHandler(e, 'Error in Live Signal Route report() call.'), r.call(this, a) } ) } ).catch(e => { humanRequestFailCount++, r.call(this, a) } ) } } catch (e) { errorHandler(e, 'Failed while monkey patching XMLHttpRequest.send().') } } (); try { const U = window.fetch; window.fetch = async function (o, s) { return maxFailCount <= humanRequestFailCount ? U.call(this, o, s) : tagLoaded.then(() => { var e, t; return t = ('string' != typeof o || /^https?:\/\//i.test(o) ? (e = getLocation('string' == typeof o ? new URL(o).href : o.url).hostname, getLocation('string' == typeof o ? new URL(o).href : o.url)) : (e = getLocation(new URL(o, document.baseURI).href).hostname, getLocation(new URL(o, document.baseURI).href))).pathname, e.toLowerCase() != humanScriptURLdomain.toLowerCase() && checkRoute(e, t) ? sm.report().then(signalHandler).then(e => { for (var t = new Request(o, s), n = new Headers(t.headers), a = Object.keys(e), r = 0; r < a.length; r++) n.append(a[r], e[a[r]]); return U.call(this, t, { headers: n } ) } ).catch(e => (humanRequestFailCount++, errorHandler(e, 'Error in Live Signal Route report() call.'), U.call(this, o, s))) : U.call(this, o, s) } ).catch(e => (humanRequestFailCount++, errorHandler(e, 'The tag load promise failed'), U.call(this, o, s))) } } catch (e) { errorHandler(e, 'Failed while monkey patching fetch().') } } else if (features[i] == 'd') { var eps_dg = 'd'; s = document.createElement('script'); s.src = 'https://epsf.ticketmaster.sg/eps-d'; s.charset = 'utf-8'; document.head.appendChild(s); } else if (features[i] == 'n') { nd_enabled = true; } else if (features[i] == 'g') { { const gecHost = 'epsf.ticketmaster.sg'; (async() => { const a = '6LdWxZEkAAAAAIHtgtxW_lIfRHlcLWzZMMiwx9E1'; var e = document.createElement('style'), e = (e.innerHTML = '.grecaptcha-badge { visibility: hidden; }', document.head.appendChild(e), document.createElement('script')), t = (e.src = 'https://www.google.com/recaptcha/enterprise.js?render=' + a, document.head.appendChild(e), () => (((window.digitalData || {} ).page || {} ).pageInfo || {} ).pageName), n = () => ((window.grecaptcha || {} ).enterprise || {} ).ready; let c = t(), o = n(); for (; void 0 === c || void 0 === o; ) c = t(), o = n(), await new Promise(e => setTimeout(e, 1e3)); o(async() => { var e = c.replace(/[^A-Z]+/gi, '_'), t = await grecaptcha.enterprise.execute(a, { action: e } ), e = `https://${gecHost}/gec/v2/${window.location.hostname}/${a}/${encodeURIComponent(e)}/` + encodeURIComponent(t); (new Image).src = e } ) } )(); } } else if (features[i] == 'p') { var eps_dg = 'p'; var pp = document.createElement('script'); pp.src = 'https://epsf.ticketmaster.sg/eps-p'; pp.charset = 'utf-8'; document.head.appendChild(pp); } else if (features[i] == 'i') { i_sdk = document.createElement('script'); i_sdk.src = 'https://www.ipqscdn.com/api/ticketmaster.co.uk/FO7QZkCuyNhsd5pyrXGwbD1Pjo2nXPVR32tvxupSSKShG8NxLoZ7MR7vW01glZRWm4hN7fj1D1Rmp5S4V8sOpINsxOiQVcKlf5zBZUlJ106qq8PXg9lzu3DamqPsIacBRFbYxxCuXbOeSJZbJMn7f0o8iFC96hP1rXufJGwZdulqMVe8OsPZ54MgWz7VllPTPcFToQ8xHA4ivIo2gmrnUQ3pUm6WEkLwToo9T0axKQxqlIf7EesIBLM75G8hEfxd/learn.js'; i_sdk.charset = 'utf-8'; document.head.appendChild(i_sdk); }; } } else { var eps_dg = 'd'; s = document.createElement('script'); s.src = 'https://epsf.ticketmaster.sg/eps-d'; s.charset = 'utf-8'; document.head.appendChild(s); }; if (nd_enabled) { var epsSID = ''; var ns = document.createElement('script'); ns.src = 'https://epsf.ticketmaster.sg/asset/nd.js'; ns.charset = 'utf-8'; document.head.appendChild(ns); }
結論
居然headless 可以被檢測的出來,覺得很神奇!
因為 headless mode 也是可以執行 javascript, 而且也可以在 headless 裡截圖, 而且是有畫面的, 個人的猜測是ticketmaster 是使用 document.createElement(‘script’) 被執行時速度來決定,是否為 headless 模式。
個人的猜測 ticketmaster 有購買 IPQualityScore 提供的Bot Detection 服務:
https://www.ipqualityscore.com/features/bot-detection
IPQS Bot Detection & Real-Time Bot Prevention to Prevent Non-Human Traffic
https://www.youtube.com/watch?v=bkbmEIN7v1k