在以下代码中,我使用$_SERVER['HTTP_USER_AGENT']
来检测访问者是否使用浏览器。但是这种方法仍然可以让机器人读取。
例如,如果我在Website上测试Facebook Debugger,则代码仍在重定向,错误:302 HTTP Redirect arrow-right https://zareklamy.com/?lang=en
。
我怎样才能仅对访问者而不是对机器人使用此重定向?
if ($_SERVER['HTTP_USER_AGENT'] != '') {
if ($http_lang == 'en-us' && $_COOKIE['lang'] != 'en-us' && $_COOKIE['lang'] != 'en-gb' && $_COOKIE['lang'] != 'it' && $_COOKIE['lang'] != 'pl') {
header('Location: https://zareklamy.com/?lang=en-us');
die();
} elseif ($http_lang == 'en-gb' && $_COOKIE['lang'] != 'en-us' && $_COOKIE['lang'] != 'en-gb' && $_COOKIE['lang'] != 'it' && $_COOKIE['lang'] != 'pl') {
header('Location: https://zareklamy.com/?lang=en-gb');
die();
} elseif ($http_lang == 'it' && $_COOKIE['lang'] != 'en-us' && $_COOKIE['lang'] != 'en-gb' && $_COOKIE['lang'] != 'it' && $_COOKIE['lang'] != 'pl') {
header('Location: https://zareklamy.com/?lang=it');
die();
} elseif ($http_lang == 'pl' && $_COOKIE['lang'] != 'en-us' && $_COOKIE['lang'] != 'en-gb' && $_COOKIE['lang'] != 'it' && $_COOKIE['lang'] != 'pl') {
header('Location: https://zareklamy.com/?lang=pl');
die();
} elseif ($_COOKIE['lang'] == 'en-us' || $_COOKIE['lang'] == 'en-gb' || $_COOKIE['lang'] == 'it' || $_COOKIE['lang'] == 'pl') {
} else {
header('Location: https://zareklamy.com/?lang=en-us');
die();
}
}
答案 0 :(得分:0)
检测机器人需要对用户代理进行更详细的分析。
警告-基于意见的内容。还有其他分析用户代理的方法,例如https://github.com/ua-parser/uap-php。
您可以使用browscap-php来更好地检测浏览器。然后,您可以检查所得对象的各种属性,以增加检测到机器人的可能性。
我说更好,因为机器人并非不可能模仿浏览器HTTP_USER_AGENT
标头,那样就可以逃避检测。
$cache = new \Roave\DoctrineSimpleCache\SimpleCacheAdapter($doctrineFileCache); // or maybe any other PSR-16 compatible caches
$logger = new \Monolog\Logger('name'); // or maybe any other PSR-3 compatible logger
$browscap = new \BrowscapPHP\Browscap($cache, $logger);
$info = $browscap->getBrowser();
if (!($info->isfake || $info->crawler)) {
...
}
Chrome浏览器的示例browscap-php结果($info
值)如下:
{
"browser_name_regex":"\/^mozilla\\\/5\\.0 \\(.*windows nt 10\\.0.*win64. x64.*\\).*applewebkit.*\\(.*khtml.*like.*gecko.*\\).*chrome\\\/.* safari\\\/.*$\/",
"browser_name_pattern":"mozilla\/5.0 (*windows nt 10.0*win64? x64*)*applewebkit*(*khtml*like*gecko*)*chrome\/* safari\/*",
"parent":"Chrome Generic",
"comment":"Chrome Generic",
"browser":"Chrome",
"browser_type":"Browser",
"browser_bits":"64",
"browser_maker":"Google Inc",
"browser_modus":"unknown",
"version":"0.0",
"majorver":"0",
"minorver":"0",
"platform":"Win10",
"platform_version":"10.0",
"platform_description":"Windows 10",
"platform_bits":"64",
"platform_maker":"Microsoft Corporation",
"alpha":false,
"beta":false,
"win16":false,
"win32":false,
"win64":true,
"frames":true,
"iframes":true,
"tables":true,
"cookies":true,
"backgroundsounds":false,
"javascript":true,
"vbscript":false,
"javaapplets":false,
"activexcontrols":false,
"ismobiledevice":false,
"istablet":false,
"issyndicationreader":false,
"crawler":false,
"isfake":false,
"isanonymized":false,
"ismodified":false,
"cssversion":"3",
"aolversion":"0",
"device_name":"Windows Desktop",
"device_maker":"unknown",
"device_type":"Desktop",
"device_pointing_method":"mouse",
"device_code_name":"Windows Desktop",
"device_brand_name":"unknown",
"renderingengine_name":"Blink",
"renderingengine_version":"unknown",
"renderingengine_description":"a WebKit Fork by Google",
"renderingengine_maker":"Google Inc"
}
答案 1 :(得分:0)
您可以执行以下操作:
<?php
function is_bot($user_agent) {
return preg_match('/(abot|dbot|ebot|hbot|kbot|lbot|mbot|nbot|obot|pbot|rbot|sbot|tbot|vbot|ybot|zbot|bot\.|bot\/|_bot|\.bot|\/bot|\-bot|\:bot|\(bot|crawl|slurp|spider|seek|accoona|acoon|adressendeutschland|ah\-ha\.com|ahoy|altavista|ananzi|anthill|appie|arachnophilia|arale|araneo|aranha|architext|aretha|arks|asterias|atlocal|atn|atomz|augurfind|backrub|bannana_bot|baypup|bdfetch|big brother|biglotron|bjaaland|blackwidow|blaiz|blog|blo\.|bloodhound|boitho|booch|bradley|butterfly|calif|cassandra|ccubee|cfetch|charlotte|churl|cienciaficcion|cmc|collective|comagent|combine|computingsite|csci|curl|cusco|daumoa|deepindex|delorie|depspid|deweb|die blinde kuh|digger|ditto|dmoz|docomo|download express|dtaagent|dwcp|ebiness|ebingbong|e\-collector|ejupiter|emacs\-w3 search engine|esther|evliya celebi|ezresult|falcon|felix ide|ferret|fetchrover|fido|findlinks|fireball|fish search|fouineur|funnelweb|gazz|gcreep|genieknows|getterroboplus|geturl|glx|goforit|golem|grabber|grapnel|gralon|griffon|gromit|grub|gulliver|hamahakki|harvest|havindex|helix|heritrix|hku www octopus|homerweb|htdig|html index|html_analyzer|htmlgobble|hubater|hyper\-decontextualizer|ia_archiver|ibm_planetwide|ichiro|iconsurf|iltrovatore|image\.kapsi\.net|imagelock|incywincy|indexer|infobee|informant|ingrid|inktomisearch\.com|inspector web|intelliagent|internet shinchakubin|ip3000|iron33|israeli\-search|ivia|jack|jakarta|javabee|jetbot|jumpstation|katipo|kdd\-explorer|kilroy|knowledge|kototoi|kretrieve|labelgrabber|lachesis|larbin|legs|libwww|linkalarm|link validator|linkscan|lockon|lwp|lycos|magpie|mantraagent|mapoftheinternet|marvin\/|mattie|mediafox|mediapartners|mercator|merzscope|microsoft url control|minirank|miva|mj12|mnogosearch|moget|monster|moose|motor|multitext|muncher|muscatferret|mwd\.search|myweb|najdi|nameprotect|nationaldirectory|nazilla|ncsa beta|nec\-meshexplorer|nederland\.zoek|netcarta webmap engine|netmechanic|netresearchserver|netscoop|newscan\-online|nhse|nokia6682\/|nomad|noyona|nutch|nzexplorer|objectssearch|occam|omni|open text|openfind|openintelligencedata|orb search|osis\-project|pack rat|pageboy|pagebull|page_verifier|panscient|parasite|partnersite|patric|pear\.|pegasus|peregrinator|pgp key agent|phantom|phpdig|picosearch|piltdownman|pimptrain|pinpoint|pioneer|piranha|plumtreewebaccessor|pogodak|poirot|pompos|poppelsdorf|poppi|popular iconoclast|psycheclone|publisher|python|rambler|raven search|roach|road runner|roadhouse|robbie|robofox|robozilla|rules|salty|sbider|scooter|scoutjet|scrubby|search\.|searchprocess|semanticdiscovery|senrigan|sg\-scout|shai\'hulud|shark|shopwiki|sidewinder|sift|silk|simmany|site searcher|site valet|sitetech\-rover|skymob\.com|sleek|smartwit|sna\-|snappy|snooper|sohu|speedfind|sphere|sphider|spinner|spyder|steeler\/|suke|suntek|supersnooper|surfnomore|sven|sygol|szukacz|tach black widow|tarantula|templeton|\/teoma|t\-h\-u\-n\-d\-e\-r\-s\-t\-o\-n\-e|theophrastus|titan|titin|tkwww|toutatis|t\-rex|tutorgig|twiceler|twisted|ucsd|udmsearch|url check|updated|vagabondo|valkyrie|verticrawl|victoria|vision\-search|volcano|voyager\/|voyager\-hc|w3c_validator|w3m2|w3mir|walker|wallpaper|wanderer|wauuu|wavefire|web core|web hopper|web wombat|webbandit|webcatcher|webcopy|webfoot|weblayers|weblinker|weblog monitor|webmirror|webmonkey|webquest|webreaper|websitepulse|websnarf|webstolperer|webvac|webwalk|webwatch|webwombat|webzinger|wget|whizbang|whowhere|wild ferret|worldlight|wwwc|wwwster|xenu|xget|xift|xirq|yandex|yanga|yeti|yodao|zao\/|zippp|zyborg|\.\.\.\.)/i', $user_agent);
}
//example usage
if (! is_bot($_SERVER["HTTP_USER_AGENT"])) echo "it's a human hit! //do your code here";
?>