javascript headless browser check

Posted in :

為什麼要使用headless mode?

因為執行的效率上較高。

Headless browser is a browser that can be used without a graphical interface. It can be controlled programmatically to automate tasks, such as doing QA (Quality Assurance) tests.

比較麻煩的網頁裡的 captcha 需要另外再用OCR 程式來處理。遇到開放性的QA 問題,需要使用類似 ChatGPT 來解決。

常見的headless 解法有那些?

要如何檢測使用者是使用 selenium 的 headless 模式?

網路上有人提供入門版的檢查:
https://github.com/LouisKlimek/HeadlessDetectJS

上面提供了幾個 selenium 的 headless 的檢測方式, 都很入門, 實際上並沒有幫助! 在只修改了 user_agent 的情況下, 就讓 headlessDetector.getHeadlessScore() 傳回值==0, 是 undetected.

ticketmaster.sg 的 iamNotaRobot 檢查:

function pxLang(a) {if (a) a = a.toLowerCase();const m={en:"en","en-us":"en","fr-ca":"fr","es-mx":"es"};var b = m[a];if (!b) b = 'en';return b;}

function iamNotaRobotPX(lang, brand = 'api', action = 'captcha', epsCaptcha, baseUrl) {
    epsCaptcha.innerHTML = "<div class='box'><div class='c1' id='t1'></div><div class='c2' id='t2'></div><ul class='c3' id='t3'></ul><div class='c4' id='t4'></div></div><div class='be'></div><section class='container'><div class='content-wrapper'><div class='content'><div id='captcha-box'></div></div></div><div class='page-footer-wrapper'></div></section><div class='box2'><div class='c5' id='t5'></div><div data-cs-mask class='c6' id='t6'></div></div>";
    const captchaDiv = epsCaptcha.querySelector('#captcha-box');
    captchaDiv.innerHTML = '<div id="px-captcha-container"><div class="px-captcha-header"></div><div class="px-captcha-background"></div><div id="px-captcha"></div><div class="px-captcha-message"></div><span class="px-captcha-report"></span></div>';
    window._pxSelectedLocale = pxLang(lang);
    window._pxAppId = _pxAppId;
    window._pxHostUrl = `https://collector-${_pxAppId}.perimeterx.net`;
    window._pxJsClientSrc = `//client.perimeterx.net/${_pxAppId}/main.min.js`;
    window._pxFirstPartyEnabled = 'true';
    window._pxTranslation={fr:[{selector:".px-captcha-header",text:"Veuillez vérifier que vous êtes un humain"},{selector:".px-captcha-message",text:"Appuyez et maintenez enfoncé pour confirmer que vous êtes un humain (et non un bot)."}],es:[{selector:".px-captcha-header",text:"Por favor, compruebe que es un ser humano."},{selector:".px-captcha-message",text:"Mantén presionado para confirmar que eres un humano (y no un bot)."}],en:[{selector:".px-captcha-header",text:"Please Verify You Are A Human"},{selector:".px-captcha-message",text:"Press & Hold to confirm you are a human (and not a bot)."}]};
    var pc = document.createElement('script');
    const captchaHost = 'https://captcha.px-cdn.net';
    const altCaptchaHost = 'https://captcha.px-cloud.net';
    pc.src = `${captchaHost}/${_pxAppId}/captcha.js?a=c&m=0`;
    document.head.appendChild(pc);
    pc.onerror = function () {
        pc = document.createElement('script');
        pc.src = `${altCaptchaHost}/${_pxAppId}/captcha.js?a=c&m=0`;
        document.head.appendChild(pc);
    };
    document.head.appendChild(pc);
    loadCommonResources (baseUrl, 'captcha');
}

function setGlobalVariables(vars) {
    const globalVariabes = document.createElement('script');
    globalVariabes.setAttribute('type', 'text/javascript');
    globalVariabes.textContent = vars;
    document.head.appendChild(globalVariabes);
}

function loadCommonResources (baseUrl, action) {
    const actionScript = document.createElement('script');
    actionScript.setAttribute('type', 'text/javascript');
    actionScript.setAttribute('src',  `${baseUrl}/asset/${action}.js`);
    actionScript.onload = () => eval('load()');
    document.head.appendChild(actionScript);
    actionScript.remove();
    const actionCss = document.createElement('link');
    actionCss.setAttribute('type', 'text/css');
    actionCss.setAttribute('href', `${baseUrl}/asset/eps.css`)
    actionCss.setAttribute('rel', 'stylesheet')
    document.head.appendChild(actionCss);
    const viewport = document.createElement('meta')
    viewport.setAttribute('name', 'viewport');
    viewport.setAttribute('content', 'width=device-width, initial-scale=1');
    document.head.appendChild(viewport);
}

function iamNotaRobotD(lang, brand = 'api', action = 'captcha', epsCaptcha, baseUrl) {
    let xhr = new XMLHttpRequest();
    xhr.open('GET', baseUrl + '/amigood');
    xhr.setRequestHeader('x-lang', lang);
    xhr.setRequestHeader('brand', brand);
    xhr.setRequestHeader('requesting-host', window.location.host);
    xhr.onreadystatechange = function() {
        if (xhr.readyState === XMLHttpRequest.DONE) {
            const parser = new DOMParser();
            const epsContent = parser.parseFromString(xhr.responseText, 'text/html');
            const scripts = Array.from(epsContent.querySelectorAll('script'));
            scripts.filter(script => script.hasAttribute('src') && script.getAttribute('src').includes('geetest')).map(
                script => {
                    const scriptEL = document.createElement('script');
                    scriptEL.setAttribute('src', script.getAttribute('src'));
                    document.head.appendChild(scriptEL);
                    script.remove();
                    epsCaptcha.innerHTML = epsContent.body.innerHTML;
                    scriptEL.onload = () => {
                        scripts.filter(script => script.textContent.includes('#captcha-box') ||
                            script.textContent.includes('function solvedCaptcha(payload)')).forEach(element => {
                            const scriptEL = document.createElement('script');
                            scriptEL.textContent = element.textContent;
                            scriptEL.setAttribute('type', 'text/javascript');
                            document.head.appendChild(scriptEL);
                            element.remove()
                        });
                    }
                });
            loadCommonResources (baseUrl, 'captcha');
        }
    };
    xhr.send();
};

function iamNotaRobot(lang, brand = 'api', action = 'captcha') {
    const realBrand = (brand === 'api' ? 'tm' : brand);
    const epsCaptcha = document.querySelector('#eps-captcha');
    epsCaptcha.style = "background: rgb(255, 255, 255);";
    const baseUrl = Array.from(document.querySelectorAll('script'))
    .filter(script => script.hasAttribute('src') && script.getAttribute('src').includes('eps-mgr'))
    .map(script=> {
        const elements = script.getAttribute('src').split('/');
        return elements[0] + "//" + elements[2];
    })[0];
    if (action == 'block') {
        setGlobalVariables (`var rid="${dt}"; var ip="${client_ip}";var al="${lang}";`);
        epsCaptcha.innerHTML = "<div class='container content-wrapper'><div class='box'><div class='c1' id='t1'></div><div class='c2' id='t2'></div> <ul class='c3' id='t3'></ul><div class='c4' id='t4'></div></div><div class='box2'><div class='c5' id='t5'></div><div data-cs-mask class='c6' id='t6'></div></div></div></div>";
        loadCommonResources (baseUrl, 'block');
        return;
    }

    setGlobalVariables(`var tc="";var rid="${dt}";var rr="";var host="${window.location.host}";var ip="${client_ip}";var action="${action}";var al="${lang}";var ss="${realBrand}.css";`);

    if (eps_dg == 'p') {iamNotaRobotPX(lang, brand, action, epsCaptcha, baseUrl);}
    else {iamNotaRobotD(lang, brand, action, epsCaptcha, baseUrl);}
}

ticketmaster.sg 的 eps-mgr 檢查用主程式:

var s = document.createElement('script');
s.src = 'https://epsf.ticketmaster.sg/asset/iamNotaRobot.js';
s.charset = 'utf-8';
document.head.appendChild(s);
var nd_enabled = false;
var client_ip = '12.22.33.44';
var dt = '2023-11-27T03:46:12Z800';
var h = window.location.host;
var d_f = new Map([["www.ticketmaster.co.uk", ["d", "i", "g"]], ["my.ticketmaster.com", ["p"]], ["my.livenation.com", ["p"]], ["my.ticketmaster.ca", ["p"]], ["citylive.trium.fr", ["p"]], ["am.ticketmaster.com", ["h", "d"]], ["auth.ticketmaster.com", ["p", "g"]], ["www.ticketmaster.com", ["d", "p", "g"]], ["checkout.ticketmaster.com", ["d", "p", "g"]], ["ticketmaster.ae", ["p", "d"]], ["ticketmaster.se", ["p", "d"]], ["ticketmaster.at", ["p", "d"]], ["ticketmaster.be", ["p", "d"]], ["ticketmaster.ch", ["p", "d"]], ["ticketmaster.cz", ["p", "d"]], ["ticketmaster.de", ["p", "d"]], ["ticketmaster.dk", ["p", "d"]], ["ticketmaster.es", ["p", "d"]], ["ticketmaster.fi", ["p", "d"]], ["ticketmaster.nl", ["p", "d"]], ["ticketmaster.no", ["p", "d"]], ["ticketmaster.pl", ["p", "d"]], ["ticketmaster.co.za", ["p", "d"]], ["zz.ticketmaster.eu", ["p", "d"]]]);
let features = d_f.get(h);
if (features && features.length > 0)
{
    for (let i = 0; i < features.length; i++)
    {
        if (features[i] == 'h')
        {
            try
            {
                var tagLoaded,
                sm,
                humanConfig = {},
                humanScriptSrc = 'https://s.jwndnv.com/static/2.64.1/pagespeed.js?mo=2&ci=411638&dt=4116381628729041890000&pd=acc&spa=1&dom=s.jwndnv.com',
                maxFailCount = 5,
                debug = !0,
                maxHeaderSize = 4e3,
                appName = 'tm_pp',
                regex = 'am.ticketmaster.com' === h || 'stg1-am.ticketmaster.com' === h ? /\/render-ticket\/secure-barcode/ : /.+/,
                humanRequestFailCount = (setHumanConfig(
                    {
                        protectedRequestRules: [
                            {
                                apiDomain: h,
                                pathRegexes: [regex]
                            }
                        ],
                        excludedRequestRules: []
                    }
                    ), 0),
                humanScriptURLdomain = getLocation(humanScriptSrc).hostname,
                signalHandler = function (e)
                {
                    for (var t =
                        {
                            OZ_TC: e.OZ_TC
                        }, n = e.OZ_DT, a = 0; t['OZ_DT' + a] = n.substring(0, maxHeaderSize), n = n.substring(maxHeaderSize); a++);
                    var r = utf8_to_b64(e.OZ_SG);
                    for (a = 0; ; a++)
                    {
                        1;
                        if (t['OZ_SG' + a] = r.substring(0, maxHeaderSize), !(r = r.substring(maxHeaderSize)))
                            break
                    }
                    return t
                };
                function setHumanConfig(e)
                {
                    humanConfig = e
                }
                function utf8_to_b64(e)
                {
                    try
                    {
                        return btoa(encodeURIComponent(e).replace(/%([0-9A-F]{2})/g, function (e, t)
                            {
                                return String.fromCharCode(parseInt(t, 16))
                            }
                            ))
                    }
                    catch (e)
                    {
                        return errorHandler(e, 'Error converting UTF8 to B64.'),
                        ''
                    }
                }
                function errorHandler(e, t)
                {
                    t = (t += ' (') + (null != e ? e.message : 'null') + ')';
                    debug && console.log(t)
                }
                function buildRouteRegex()
                {
                    try
                    {
                        for (var e = 0; e < humanConfig.protectedRequestRules.length; e++)
                            humanConfig.protectedRequestRules[e].domainRegex = new RegExp(humanConfig.protectedRequestRules[e].apiDomain, 'i'), 0 != humanConfig.protectedRequestRules[e].pathRegexes.length && (t = humanConfig.protectedRequestRules[e].pathRegexes.map(e => 'string' == typeof e ? e : new RegExp(e).source).join('|'), humanConfig.protectedRequestRules[e].routesRegex = new RegExp(t, 'i'));
                        for (var t, e = 0; e < humanConfig.excludedRequestRules.length; e++)
                            humanConfig.excludedRequestRules[e].domainRegex = new RegExp(humanConfig.excludedRequestRules[e].apiDomain, 'i'), 0 != humanConfig.excludedRequestRules[e].pathRegexes.length && (t = humanConfig.excludedRequestRules[e].pathRegexes.map(e => 'string' == typeof e ? e : new RegExp(e).source).join('|'), humanConfig.excludedRequestRules[e].routesRegex = new RegExp(t, 'i'))
                    }
                    catch (e)
                    {
                        errorHandler(e, 'Error creating Regex.')
                    }
                }
                function getLocation(e)
                {
                    var t = e.match(/^(https?\:)\/\/(([^:\/?#]*)(?:\:([0-9]+))?)([\/]{0,1}[^?#]*)(\?[^#]*|)(#.*|)$/);
                    return t &&
                    {
                        href: e,
                        protocol: t[1],
                        host: t[2],
                        hostname: t[3],
                        port: t[4],
                        pathname: t[5],
                        search: t[6],
                        hash: t[7]
                    }
                }
                function checkRoute(t, n)
                {
                    try
                    {
                        return 0 < humanConfig.protectedRequestRules.filter(e => !(!e.domainRegex.test(t) || !e.routesRegex.test(n))).length && 0 == humanConfig.excludedRequestRules.filter(e => !(!e.domainRegex.test(t) || !e.routesRegex.test(n))).length
                    }
                    catch (e)
                    {
                        return errorHandler(e, 'Failed while comparing XHR Route to Regex.'),
                        !1
                    }
                }
                var ht = document.createElement('script'),
                hs = (ht.type = 'text/javascript', ht.src = humanScriptSrc, tagLoaded = new Promise(function (e, t)
                        {
                            ht.onload = function ()
                            {
                                try
                                {
                                    sm = window.$$$.start(
                                        {
                                            si: appName
                                        }
                                        )
                                }
                                catch (e)
                                {
                                    t()
                                }
                                e()
                            }
                        }
                        ).catch(e =>
                        {
                            errorHandler(e, 'Error in tagLoaded Promise.')
                        }
                        ), document.getElementsByTagName('script')[0]);
                hs.parentNode.insertBefore(ht, hs)
            }
            catch (e)
            {
                errorHandler(e, 'failed while creating Script Object.')
            }
            buildRouteRegex(),
            function ()
            {
                try
                {
                    var t = XMLHttpRequest.prototype.open;
                    XMLHttpRequest.prototype.open = function ()
                    {
                        if (maxFailCount <= humanRequestFailCount)
                            return t.apply(this, [].slice.call(arguments));
                        try
                        {
                            this._method = arguments[0];
                            return /^https?:\/\//i.test(arguments[1]) ? this._urlObj = getLocation(arguments[1]) : this._urlObj = getLocation(new URL(arguments[1], document.baseURI).href),
                            this._urlObj.hostname.toLowerCase() == humanScriptURLdomain.toLowerCase() ? this._dontPatch = !0 : this._dontPatch = !1,
                            this._urlObj && !checkRoute(this._urlObj.hostname, this._urlObj.pathname) && (this._dontPatch = !0),
                            this._async = arguments.length < 3 || arguments[2],
                            t.apply(this, [].slice.call(arguments))
                        }
                        catch (e)
                        {
                            return humanRequestFailCount++,
                            console.log(e),
                            t.apply(this, [].slice.call(arguments))
                        }
                    }
                }
                catch (e)
                {
                    errorHandler(e, 'Failed while monkey patching XMLHttpRequest.open().')
                }
                try
                {
                    var r = XMLHttpRequest.prototype.send;
                    XMLHttpRequest.prototype.send = function (a)
                    {
                        maxFailCount <= humanRequestFailCount || this._dontPatch ? r.call(this, a) : tagLoaded.then(() =>
                        {
                            sm.report().then(signalHandler).then(e =>
                            {
                                for (var t = Object.keys(e), n = 0; n < t.length; n++)
                                    this.setRequestHeader(t[n], e[t[n]]);
                                r.call(this, a)
                            }
                            ).catch(e =>
                            {
                                humanRequestFailCount++,
                                errorHandler(e, 'Error in Live Signal Route report() call.'),
                                r.call(this, a)
                            }
                            )
                        }
                        ).catch(e =>
                        {
                            humanRequestFailCount++,
                            r.call(this, a)
                        }
                        )
                    }
                }
                catch (e)
                {
                    errorHandler(e, 'Failed while monkey patching XMLHttpRequest.send().')
                }
            }
            ();
            try
            {
                const U = window.fetch;
                window.fetch = async function (o, s)
                {
                    return maxFailCount <= humanRequestFailCount ? U.call(this, o, s) : tagLoaded.then(() =>
                    {
                        var e,
                        t;
                        return t = ('string' != typeof o || /^https?:\/\//i.test(o) ? (e = getLocation('string' == typeof o ? new URL(o).href : o.url).hostname, getLocation('string' == typeof o ? new URL(o).href : o.url)) : (e = getLocation(new URL(o, document.baseURI).href).hostname, getLocation(new URL(o, document.baseURI).href))).pathname,
                        e.toLowerCase() != humanScriptURLdomain.toLowerCase() && checkRoute(e, t) ? sm.report().then(signalHandler).then(e =>
                        {
                            for (var t = new Request(o, s), n = new Headers(t.headers), a = Object.keys(e), r = 0; r < a.length; r++)
                                n.append(a[r], e[a[r]]);
                            return U.call(this, t,
                            {
                                headers: n
                            }
                            )
                        }
                        ).catch(e => (humanRequestFailCount++, errorHandler(e, 'Error in Live Signal Route report() call.'), U.call(this, o, s))) : U.call(this, o, s)
                    }
                    ).catch(e => (humanRequestFailCount++, errorHandler(e, 'The tag load promise failed'), U.call(this, o, s)))
                }
            }
            catch (e)
            {
                errorHandler(e, 'Failed while monkey patching fetch().')
            }
        }
        else if (features[i] == 'd')
        {
            var eps_dg = 'd';
            s = document.createElement('script');
            s.src = 'https://epsf.ticketmaster.sg/eps-d';
            s.charset = 'utf-8';
            document.head.appendChild(s);
        }
        else if (features[i] == 'n')
        {
            nd_enabled = true;
        }
        else if (features[i] == 'g')
        {
            {
                const gecHost = 'epsf.ticketmaster.sg';
                (async() =>
                {
                    const a = '6LdWxZEkAAAAAIHtgtxW_lIfRHlcLWzZMMiwx9E1';
                    var e = document.createElement('style'),
                    e = (e.innerHTML = '.grecaptcha-badge { visibility: hidden; }', document.head.appendChild(e), document.createElement('script')),
                    t = (e.src = 'https://www.google.com/recaptcha/enterprise.js?render=' + a, document.head.appendChild(e), () => (((window.digitalData || {}
                                ).page || {}
                            ).pageInfo || {}
                        ).pageName),
                    n = () => ((window.grecaptcha || {}
                        ).enterprise || {}
                    ).ready;
                    let c = t(),
                    o = n();
                    for (; void 0 === c || void 0 === o; )
                        c = t(), o = n(), await new Promise(e => setTimeout(e, 1e3));
                    o(async() =>
                    {
                        var e = c.replace(/[^A-Z]+/gi, '_'),
                        t = await grecaptcha.enterprise.execute(a,
                            {
                                action: e
                            }
                            ),
                        e = `https://${gecHost}/gec/v2/${window.location.hostname}/${a}/${encodeURIComponent(e)}/` + encodeURIComponent(t);
                        (new Image).src = e
                    }
                    )
                }
                )();
            }
        }
        else if (features[i] == 'p')
        {
            var eps_dg = 'p';
            var pp = document.createElement('script');
            pp.src = 'https://epsf.ticketmaster.sg/eps-p';
            pp.charset = 'utf-8';
            document.head.appendChild(pp);
        }
        else if (features[i] == 'i')
        {
            i_sdk = document.createElement('script');
            i_sdk.src = 'https://www.ipqscdn.com/api/ticketmaster.co.uk/FO7QZkCuyNhsd5pyrXGwbD1Pjo2nXPVR32tvxupSSKShG8NxLoZ7MR7vW01glZRWm4hN7fj1D1Rmp5S4V8sOpINsxOiQVcKlf5zBZUlJ106qq8PXg9lzu3DamqPsIacBRFbYxxCuXbOeSJZbJMn7f0o8iFC96hP1rXufJGwZdulqMVe8OsPZ54MgWz7VllPTPcFToQ8xHA4ivIo2gmrnUQ3pUm6WEkLwToo9T0axKQxqlIf7EesIBLM75G8hEfxd/learn.js';
            i_sdk.charset = 'utf-8';
            document.head.appendChild(i_sdk);
        };
    }
}
else
{
    var eps_dg = 'd';
    s = document.createElement('script');
    s.src = 'https://epsf.ticketmaster.sg/eps-d';
    s.charset = 'utf-8';
    document.head.appendChild(s);
};
if (nd_enabled)
{
    var epsSID = '';
    var ns = document.createElement('script');
    ns.src = 'https://epsf.ticketmaster.sg/asset/nd.js';
    ns.charset = 'utf-8';
    document.head.appendChild(ns);
}

結論

居然headless 可以被檢測的出來,覺得很神奇!

因為 headless mode 也是可以執行 javascript, 而且也可以在 headless 裡截圖, 而且是有畫面的, 個人的猜測是ticketmaster 是使用 document.createElement(‘script’) 被執行時速度來決定,是否為 headless 模式。

個人的猜測 ticketmaster 有購買 IPQualityScore 提供的Bot Detection 服務:
https://www.ipqualityscore.com/features/bot-detection

IPQS Bot Detection & Real-Time Bot Prevention to Prevent Non-Human Traffic
https://www.youtube.com/watch?v=bkbmEIN7v1k

發佈留言

發佈留言必須填寫的電子郵件地址不會公開。 必填欄位標示為 *