javascript headless browser check

為什麼要使用headless mode?

因為執行的效率上較高。

Headless browser is a browser that can be used without a graphical interface. It can be controlled programmatically to automate tasks, such as doing QA (Quality Assurance) tests.

比較麻煩的網頁裡的 captcha 需要另外再用OCR 程式來處理。遇到開放性的QA 問題,需要使用類似 ChatGPT 來解決。

常見的headless 解法有那些?

要如何檢測使用者是使用 selenium 的 headless 模式?

網路上有人提供入門版的檢查:
https://github.com/LouisKlimek/HeadlessDetectJS

上面提供了幾個 selenium 的 headless 的檢測方式, 都很入門, 實際上並沒有幫助! 在只修改了 user_agent 的情況下, 就讓 headlessDetector.getHeadlessScore() 傳回值==0, 是 undetected.

ticketmaster.sg 的 iamNotaRobot 檢查:

function pxLang(a) {if (a) a = a.toLowerCase();const m={en:"en","en-us":"en","fr-ca":"fr","es-mx":"es"};var b = m[a];if (!b) b = 'en';return b;}

function iamNotaRobotPX(lang, brand = 'api', action = 'captcha', epsCaptcha, baseUrl) {
    epsCaptcha.innerHTML = "<div class='box'><div class='c1' id='t1'></div><div class='c2' id='t2'></div><ul class='c3' id='t3'></ul><div class='c4' id='t4'></div></div><div class='be'></div><section class='container'><div class='content-wrapper'><div class='content'><div id='captcha-box'></div></div></div><div class='page-footer-wrapper'></div></section><div class='box2'><div class='c5' id='t5'></div><div data-cs-mask class='c6' id='t6'></div></div>";
    const captchaDiv = epsCaptcha.querySelector('#captcha-box');
    captchaDiv.innerHTML = '<div id="px-captcha-container"><div class="px-captcha-header"></div><div class="px-captcha-background"></div><div id="px-captcha"></div><div class="px-captcha-message"></div><span class="px-captcha-report"></span></div>';
    window._pxSelectedLocale = pxLang(lang);
    window._pxAppId = _pxAppId;
    window._pxHostUrl = `https://collector-${_pxAppId}.perimeterx.net`;
    window._pxJsClientSrc = `//client.perimeterx.net/${_pxAppId}/main.min.js`;
    window._pxFirstPartyEnabled = 'true';
    window._pxTranslation={fr:[{selector:".px-captcha-header",text:"Veuillez vérifier que vous êtes un humain"},{selector:".px-captcha-message",text:"Appuyez et maintenez enfoncé pour confirmer que vous êtes un humain (et non un bot)."}],es:[{selector:".px-captcha-header",text:"Por favor, compruebe que es un ser humano."},{selector:".px-captcha-message",text:"Mantén presionado para confirmar que eres un humano (y no un bot)."}],en:[{selector:".px-captcha-header",text:"Please Verify You Are A Human"},{selector:".px-captcha-message",text:"Press & Hold to confirm you are a human (and not a bot)."}]};
    var pc = document.createElement('script');
    const captchaHost = 'https://captcha.px-cdn.net';
    const altCaptchaHost = 'https://captcha.px-cloud.net';
    pc.src = `${captchaHost}/${_pxAppId}/captcha.js?a=c&m=0`;
    document.head.appendChild(pc);
    pc.onerror = function () {
        pc = document.createElement('script');
        pc.src = `${altCaptchaHost}/${_pxAppId}/captcha.js?a=c&m=0`;
        document.head.appendChild(pc);
    };
    document.head.appendChild(pc);
    loadCommonResources (baseUrl, 'captcha');
}

function setGlobalVariables(vars) {
    const globalVariabes = document.createElement('script');
    globalVariabes.setAttribute('type', 'text/javascript');
    globalVariabes.textContent = vars;
    document.head.appendChild(globalVariabes);
}

function loadCommonResources (baseUrl, action) {
    const actionScript = document.createElement('script');
    actionScript.setAttribute('type', 'text/javascript');
    actionScript.setAttribute('src',  `${baseUrl}/asset/${action}.js`);
    actionScript.onload = () => eval('load()');
    document.head.appendChild(actionScript);
    actionScript.remove();
    const actionCss = document.createElement('link');
    actionCss.setAttribute('type', 'text/css');
    actionCss.setAttribute('href', `${baseUrl}/asset/eps.css`)
    actionCss.setAttribute('rel', 'stylesheet')
    document.head.appendChild(actionCss);
    const viewport = document.createElement('meta')
    viewport.setAttribute('name', 'viewport');
    viewport.setAttribute('content', 'width=device-width, initial-scale=1');
    document.head.appendChild(viewport);
}

function iamNotaRobotD(lang, brand = 'api', action = 'captcha', epsCaptcha, baseUrl) {
    let xhr = new XMLHttpRequest();
    xhr.open('GET', baseUrl + '/amigood');
    xhr.setRequestHeader('x-lang', lang);
    xhr.setRequestHeader('brand', brand);
    xhr.setRequestHeader('requesting-host', window.location.host);
    xhr.onreadystatechange = function() {
        if (xhr.readyState === XMLHttpRequest.DONE) {
            const parser = new DOMParser();
            const epsContent = parser.parseFromString(xhr.responseText, 'text/html');
            const scripts = Array.from(epsContent.querySelectorAll('script'));
            scripts.filter(script => script.hasAttribute('src') && script.getAttribute('src').includes('geetest')).map(
                script => {
                    const scriptEL = document.createElement('script');
                    scriptEL.setAttribute('src', script.getAttribute('src'));
                    document.head.appendChild(scriptEL);
                    script.remove();
                    epsCaptcha.innerHTML = epsContent.body.innerHTML;
                    scriptEL.onload = () => {
                        scripts.filter(script => script.textContent.includes('#captcha-box') ||
                            script.textContent.includes('function solvedCaptcha(payload)')).forEach(element => {
                            const scriptEL = document.createElement('script');
                            scriptEL.textContent = element.textContent;
                            scriptEL.setAttribute('type', 'text/javascript');
                            document.head.appendChild(scriptEL);
                            element.remove()
                        });
                    }
                });
            loadCommonResources (baseUrl, 'captcha');
        }
    };
    xhr.send();
};

function iamNotaRobot(lang, brand = 'api', action = 'captcha') {
    const realBrand = (brand === 'api' ? 'tm' : brand);
    const epsCaptcha = document.querySelector('#eps-captcha');
    epsCaptcha.style = "background: rgb(255, 255, 255);";
    const baseUrl = Array.from(document.querySelectorAll('script'))
    .filter(script => script.hasAttribute('src') && script.getAttribute('src').includes('eps-mgr'))
    .map(script=> {
        const elements = script.getAttribute('src').split('/');
        return elements[0] + "//" + elements[2];
    })[0];
    if (action == 'block') {
        setGlobalVariables (`var rid="${dt}"; var ip="${client_ip}";var al="${lang}";`);
        epsCaptcha.innerHTML = "<div class='container content-wrapper'><div class='box'><div class='c1' id='t1'></div><div class='c2' id='t2'></div> <ul class='c3' id='t3'></ul><div class='c4' id='t4'></div></div><div class='box2'><div class='c5' id='t5'></div><div data-cs-mask class='c6' id='t6'></div></div></div></div>";
        loadCommonResources (baseUrl, 'block');
        return;
    }

    setGlobalVariables(`var tc="";var rid="${dt}";var rr="";var host="${window.location.host}";var ip="${client_ip}";var action="${action}";var al="${lang}";var ss="${realBrand}.css";`);

    if (eps_dg == 'p') {iamNotaRobotPX(lang, brand, action, epsCaptcha, baseUrl);}
    else {iamNotaRobotD(lang, brand, action, epsCaptcha, baseUrl);}
}

ticketmaster.sg 的 eps-mgr 檢查用主程式:

var s = document.createElement('script');
s.src = 'https://epsf.ticketmaster.sg/asset/iamNotaRobot.js';
s.charset = 'utf-8';
document.head.appendChild(s);
var nd_enabled = false;
var client_ip = '12.22.33.44';
var dt = '2023-11-27T03:46:12Z800';
var h = window.location.host;
var d_f = new Map([["www.ticketmaster.co.uk", ["d", "i", "g"]], ["my.ticketmaster.com", ["p"]], ["my.livenation.com", ["p"]], ["my.ticketmaster.ca", ["p"]], ["citylive.trium.fr", ["p"]], ["am.ticketmaster.com", ["h", "d"]], ["auth.ticketmaster.com", ["p", "g"]], ["www.ticketmaster.com", ["d", "p", "g"]], ["checkout.ticketmaster.com", ["d", "p", "g"]], ["ticketmaster.ae", ["p", "d"]], ["ticketmaster.se", ["p", "d"]], ["ticketmaster.at", ["p", "d"]], ["ticketmaster.be", ["p", "d"]], ["ticketmaster.ch", ["p", "d"]], ["ticketmaster.cz", ["p", "d"]], ["ticketmaster.de", ["p", "d"]], ["ticketmaster.dk", ["p", "d"]], ["ticketmaster.es", ["p", "d"]], ["ticketmaster.fi", ["p", "d"]], ["ticketmaster.nl", ["p", "d"]], ["ticketmaster.no", ["p", "d"]], ["ticketmaster.pl", ["p", "d"]], ["ticketmaster.co.za", ["p", "d"]], ["zz.ticketmaster.eu", ["p", "d"]]]);
let features = d_f.get(h);
if (features && features.length > 0)
{
for (let i = 0; i < features.length; i++)
{
if (features[i] == 'h')
{
try
{
var tagLoaded,
sm,
humanConfig = {},
humanScriptSrc = 'https://s.jwndnv.com/static/2.64.1/pagespeed.js?mo=2&ci=411638&dt=4116381628729041890000&pd=acc&spa=1&dom=s.jwndnv.com',
maxFailCount = 5,
debug = !0,
maxHeaderSize = 4e3,
appName = 'tm_pp',
regex = 'am.ticketmaster.com' === h || 'stg1-am.ticketmaster.com' === h ? /\/render-ticket\/secure-barcode/ : /.+/,
humanRequestFailCount = (setHumanConfig(
{
protectedRequestRules: [
{
apiDomain: h,
pathRegexes: [regex]
}
],
excludedRequestRules: []
}
), 0),
humanScriptURLdomain = getLocation(humanScriptSrc).hostname,
signalHandler = function (e)
{
for (var t =
{
OZ_TC: e.OZ_TC
}, n = e.OZ_DT, a = 0; t['OZ_DT' + a] = n.substring(0, maxHeaderSize), n = n.substring(maxHeaderSize); a++);
var r = utf8_to_b64(e.OZ_SG);
for (a = 0; ; a++)
{
1;
if (t['OZ_SG' + a] = r.substring(0, maxHeaderSize), !(r = r.substring(maxHeaderSize)))
break
}
return t
};
function setHumanConfig(e)
{
humanConfig = e
}
function utf8_to_b64(e)
{
try
{
return btoa(encodeURIComponent(e).replace(/%([0-9A-F]{2})/g, function (e, t)
{
return String.fromCharCode(parseInt(t, 16))
}
))
}
catch (e)
{
return errorHandler(e, 'Error converting UTF8 to B64.'),
''
}
}
function errorHandler(e, t)
{
t = (t += ' (') + (null != e ? e.message : 'null') + ')';
debug && console.log(t)
}
function buildRouteRegex()
{
try
{
for (var e = 0; e < humanConfig.protectedRequestRules.length; e++)
humanConfig.protectedRequestRules[e].domainRegex = new RegExp(humanConfig.protectedRequestRules[e].apiDomain, 'i'), 0 != humanConfig.protectedRequestRules[e].pathRegexes.length && (t = humanConfig.protectedRequestRules[e].pathRegexes.map(e => 'string' == typeof e ? e : new RegExp(e).source).join('|'), humanConfig.protectedRequestRules[e].routesRegex = new RegExp(t, 'i'));
for (var t, e = 0; e < humanConfig.excludedRequestRules.length; e++)
humanConfig.excludedRequestRules[e].domainRegex = new RegExp(humanConfig.excludedRequestRules[e].apiDomain, 'i'), 0 != humanConfig.excludedRequestRules[e].pathRegexes.length && (t = humanConfig.excludedRequestRules[e].pathRegexes.map(e => 'string' == typeof e ? e : new RegExp(e).source).join('|'), humanConfig.excludedRequestRules[e].routesRegex = new RegExp(t, 'i'))
}
catch (e)
{
errorHandler(e, 'Error creating Regex.')
}
}
function getLocation(e)
{
var t = e.match(/^(https?\:)\/\/(([^:\/?#]*)(?:\:([0-9]+))?)([\/]{0,1}[^?#]*)(\?[^#]*|)(#.*|)$/);
return t &&
{
href: e,
protocol: t[1],
host: t[2],
hostname: t[3],
port: t[4],
pathname: t[5],
search: t[6],
hash: t[7]
}
}
function checkRoute(t, n)
{
try
{
return 0 < humanConfig.protectedRequestRules.filter(e => !(!e.domainRegex.test(t) || !e.routesRegex.test(n))).length && 0 == humanConfig.excludedRequestRules.filter(e => !(!e.domainRegex.test(t) || !e.routesRegex.test(n))).length
}
catch (e)
{
return errorHandler(e, 'Failed while comparing XHR Route to Regex.'),
!1
}
}
var ht = document.createElement('script'),
hs = (ht.type = 'text/javascript', ht.src = humanScriptSrc, tagLoaded = new Promise(function (e, t)
{
ht.onload = function ()
{
try
{
sm = window.$$$.start(
{
si: appName
}
)
}
catch (e)
{
t()
}
e()
}
}
).catch(e =>
{
errorHandler(e, 'Error in tagLoaded Promise.')
}
), document.getElementsByTagName('script')[0]);
hs.parentNode.insertBefore(ht, hs)
}
catch (e)
{
errorHandler(e, 'failed while creating Script Object.')
}
buildRouteRegex(),
function ()
{
try
{
var t = XMLHttpRequest.prototype.open;
XMLHttpRequest.prototype.open = function ()
{
if (maxFailCount <= humanRequestFailCount)
return t.apply(this, [].slice.call(arguments));
try
{
this._method = arguments[0];
return /^https?:\/\//i.test(arguments[1]) ? this._urlObj = getLocation(arguments[1]) : this._urlObj = getLocation(new URL(arguments[1], document.baseURI).href),
this._urlObj.hostname.toLowerCase() == humanScriptURLdomain.toLowerCase() ? this._dontPatch = !0 : this._dontPatch = !1,
this._urlObj && !checkRoute(this._urlObj.hostname, this._urlObj.pathname) && (this._dontPatch = !0),
this._async = arguments.length < 3 || arguments[2],
t.apply(this, [].slice.call(arguments))
}
catch (e)
{
return humanRequestFailCount++,
console.log(e),
t.apply(this, [].slice.call(arguments))
}
}
}
catch (e)
{
errorHandler(e, 'Failed while monkey patching XMLHttpRequest.open().')
}
try
{
var r = XMLHttpRequest.prototype.send;
XMLHttpRequest.prototype.send = function (a)
{
maxFailCount <= humanRequestFailCount || this._dontPatch ? r.call(this, a) : tagLoaded.then(() =>
{
sm.report().then(signalHandler).then(e =>
{
for (var t = Object.keys(e), n = 0; n < t.length; n++)
this.setRequestHeader(t[n], e[t[n]]);
r.call(this, a)
}
).catch(e =>
{
humanRequestFailCount++,
errorHandler(e, 'Error in Live Signal Route report() call.'),
r.call(this, a)
}
)
}
).catch(e =>
{
humanRequestFailCount++,
r.call(this, a)
}
)
}
}
catch (e)
{
errorHandler(e, 'Failed while monkey patching XMLHttpRequest.send().')
}
}
();
try
{
const U = window.fetch;
window.fetch = async function (o, s)
{
return maxFailCount <= humanRequestFailCount ? U.call(this, o, s) : tagLoaded.then(() =>
{
var e,
t;
return t = ('string' != typeof o || /^https?:\/\//i.test(o) ? (e = getLocation('string' == typeof o ? new URL(o).href : o.url).hostname, getLocation('string' == typeof o ? new URL(o).href : o.url)) : (e = getLocation(new URL(o, document.baseURI).href).hostname, getLocation(new URL(o, document.baseURI).href))).pathname,
e.toLowerCase() != humanScriptURLdomain.toLowerCase() && checkRoute(e, t) ? sm.report().then(signalHandler).then(e =>
{
for (var t = new Request(o, s), n = new Headers(t.headers), a = Object.keys(e), r = 0; r < a.length; r++)
n.append(a[r], e[a[r]]);
return U.call(this, t,
{
headers: n
}
)
}
).catch(e => (humanRequestFailCount++, errorHandler(e, 'Error in Live Signal Route report() call.'), U.call(this, o, s))) : U.call(this, o, s)
}
).catch(e => (humanRequestFailCount++, errorHandler(e, 'The tag load promise failed'), U.call(this, o, s)))
}
}
catch (e)
{
errorHandler(e, 'Failed while monkey patching fetch().')
}
}
else if (features[i] == 'd')
{
var eps_dg = 'd';
s = document.createElement('script');
s.src = 'https://epsf.ticketmaster.sg/eps-d';
s.charset = 'utf-8';
document.head.appendChild(s);
}
else if (features[i] == 'n')
{
nd_enabled = true;
}
else if (features[i] == 'g')
{
{
const gecHost = 'epsf.ticketmaster.sg';
(async() =>
{
const a = '6LdWxZEkAAAAAIHtgtxW_lIfRHlcLWzZMMiwx9E1';
var e = document.createElement('style'),
e = (e.innerHTML = '.grecaptcha-badge { visibility: hidden; }', document.head.appendChild(e), document.createElement('script')),
t = (e.src = 'https://www.google.com/recaptcha/enterprise.js?render=' + a, document.head.appendChild(e), () => (((window.digitalData || {}
).page || {}
).pageInfo || {}
).pageName),
n = () => ((window.grecaptcha || {}
).enterprise || {}
).ready;
let c = t(),
o = n();
for (; void 0 === c || void 0 === o; )
c = t(), o = n(), await new Promise(e => setTimeout(e, 1e3));
o(async() =>
{
var e = c.replace(/[^A-Z]+/gi, '_'),
t = await grecaptcha.enterprise.execute(a,
{
action: e
}
),
e = `https://${gecHost}/gec/v2/${window.location.hostname}/${a}/${encodeURIComponent(e)}/` + encodeURIComponent(t);
(new Image).src = e
}
)
}
)();
}
}
else if (features[i] == 'p')
{
var eps_dg = 'p';
var pp = document.createElement('script');
pp.src = 'https://epsf.ticketmaster.sg/eps-p';
pp.charset = 'utf-8';
document.head.appendChild(pp);
}
else if (features[i] == 'i')
{
i_sdk = document.createElement('script');
i_sdk.src = 'https://www.ipqscdn.com/api/ticketmaster.co.uk/FO7QZkCuyNhsd5pyrXGwbD1Pjo2nXPVR32tvxupSSKShG8NxLoZ7MR7vW01glZRWm4hN7fj1D1Rmp5S4V8sOpINsxOiQVcKlf5zBZUlJ106qq8PXg9lzu3DamqPsIacBRFbYxxCuXbOeSJZbJMn7f0o8iFC96hP1rXufJGwZdulqMVe8OsPZ54MgWz7VllPTPcFToQ8xHA4ivIo2gmrnUQ3pUm6WEkLwToo9T0axKQxqlIf7EesIBLM75G8hEfxd/learn.js';
i_sdk.charset = 'utf-8';
document.head.appendChild(i_sdk);
};
}
}
else
{
var eps_dg = 'd';
s = document.createElement('script');
s.src = 'https://epsf.ticketmaster.sg/eps-d';
s.charset = 'utf-8';
document.head.appendChild(s);
};
if (nd_enabled)
{
var epsSID = '';
var ns = document.createElement('script');
ns.src = 'https://epsf.ticketmaster.sg/asset/nd.js';
ns.charset = 'utf-8';
document.head.appendChild(ns);
}

結論

居然headless 可以被檢測的出來,覺得很神奇!

因為 headless mode 也是可以執行 javascript, 而且也可以在 headless 裡截圖, 而且是有畫面的, 個人的猜測是ticketmaster 是使用 document.createElement(‘script’) 被執行時速度來決定,是否為 headless 模式。

個人的猜測 ticketmaster 有購買 IPQualityScore 提供的Bot Detection 服務:
https://www.ipqualityscore.com/features/bot-detection

IPQS Bot Detection & Real-Time Bot Prevention to Prevent Non-Human Traffic
https://www.youtube.com/watch?v=bkbmEIN7v1k

發佈留言

發佈留言必須填寫的電子郵件地址不會公開。 必填欄位標示為 *