是否有任何用户代理检索API?这样就可以按浏览器类类型进行过滤,例如mobile
,desktop
,bot
等。
来自this web site - 声称拥有自2006年以来最大的用户代理数据库,
我写了这个简单的tool来从中获取用户代理(此时通过浏览器):
function UserAgents () {
var COL_UA=1; // for search results use 2
var COL_CLASS=3; // for search results use 4
this.list=[];
this.Class='';
this.parse = function(Class) {
this.list=[];
var self=this;
this.Class=Class;
$('tr').each(function(index,item) {
var td=$(this).find('td');
var uac=$( td[COL_CLASS] ).text();
var ua = $( td[COL_UA] ).text()
var regex = new RegExp(Class);
if( regex.test(uac) ) { self.list.push(ua); }
})
}
this.download = function() {
if(this.list.length==0) return;
var pp = document.createElement('a');
pp.setAttribute('href', 'data:text/plain;charset=utf-8,' + encodeURIComponent(JSON.stringify( this.list )));
pp.setAttribute('download', "useragents_" + this.Class + ".txt");
pp.click();
}
}
因此,从控制台可以查询mobile
用户代理:
var ua=new UserAgents();
ua.parse('mobil');
ua.list
Array[400]
"Mozilla/5.0 (Linux; Android 5.0; LG-D855 Build/LRX21R.A1450702344; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/47.0.2526.100 Mobile Safari/537.36 ACHEETAHI/2100502020"
ua.list[ Math.floor(Math.random() * ua.list.length) ]
"Mozilla/5.0 (Linux; Android 4.4.2; D5103 Build/18.1.A.1.23) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.95 Mobile Safari/537.36"
和desktop
个用户代理:
ua.list
Array[200]
ua.list[ Math.floor(Math.random() * ua.list.length) ]
"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.111 Safari/537.36 OPR/34.0.2036.50 (Edition Campaign 09)"
ua.list[ Math.floor(Math.random() * ua.list.length) ]
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.2357.130 AOL/9.8 AOLBuild/4346.2019.US Safari/537.36"
ua.list[ Math.floor(Math.random() * ua.list.length) ]
或同时mobile
和desktop
:
ua.parse('mobil|desktop')
ua.list
Array[600]
您可以获得bot
个用户代理:
ua.parse('bot')
ua.list
Array[400]
ua.list[ Math.floor(Math.random() * ua.list.length) ]
"baiduSpider"
ua.list[ Math.floor(Math.random() * ua.list.length) ]
"Mozilla/5.0 (Android; Tablet; rv:10.0.4) Gecko/10.0.4 Firefox/10.0.4 Fennec/10.0.4 slurp"
ua.list[ Math.floor(Math.random() * ua.list.length) ]
"Mozilla/5.0 (Macintosh; Intel Mac OS X) Excel/14.59.0"
ua.list[ Math.floor(Math.random() * ua.list.length) ]
"Mozilla/5.0 (Windows Phone 8.0; Trident/7.0; rv:11.0; IEMobile/11.0; ARM; Touch; NOKIA; Nokia) like Gecko BingPreview/1.0b"
ua.list[ Math.floor(Math.random() * ua.list.length) ]
"Image Hunter 2.5.0 (iPad; iPhone OS 6.1.3; en_US)"
ua.list[ Math.floor(Math.random() * ua.list.length) ]
"Mozilla/5.0 (iPhone; CPU iPhone OS 6_0 like Mac OS X) AppleWebKit/536.26 (KHTML, like Gecko) Version/6.0 Mobile/10A5376e Safari/8536.25 (compatible; Fetcher/0.1)"
ua.list[ Math.floor(Math.random() * ua.list.length) ]
"yacybot (/global; amd64 Linux 2.6.32-042stab108.2; java 1.7.0_91; America/en) http://yacy.net/bot.html"
一旦解析了类类型,就可以将它们抓取到文件:
ua.download()
这将在本地下载名为useragents_classtype.txt
的文件。
还有其他办法吗?