我正在使用Diffbot analyze API来检测页面类型,我希望得到结果 此
{"stats":{"times": {"docParseTime":0,"docGlobalsTime":0,"fetchAndRenderTime":586,"typeTime":0},"fromCache":true,"types":{"recipe":0,"discussion":0,"audio":0,"error":0,"location":0,"faq":0,"image":0,"job":0,"download":0,"game":0,"product":0,"frontpage":0,"document":1,"article":0,"event":0,"chart":0,"serp":0,"reviewslist":0,"video":0,"profile":0}},"request":{"pageUrl":"http://www.irs.gov/pub/irs-pdf/fw4.pdf","api":"analyze","version":3,"options":["stats"]},"type":"other","objects":[]}
但目前我很喜欢 此
{"request":{"pageUrl":"http://static.nfl.com/static/content/public/image/rulebook/pdfs/2013%20-%20Rule%20Book.pdf","api":"analyze","version":3},"type":"other","objects":[]}
我必须传递'统计数据'请求中的参数。 但在请求中,我可以通过这个论点。 谢谢,
答案 0 :(得分:0)
嗨,我得到了它,这里是解决方案,只需自定义Diffbot lib文件或在您的文件中写入它,这是代码
var diffbot = new Diffbot('xxxxxxxxxxxxxxxxx');
diffbot.analyze({
uri: "http://www.visitcalifornia.in/media/pages/getting_around/maps/ORANGE-COUNTY.pdf",
html: true,
comments: true,
stats: true
}, function(err, response) {
}
这是自定义库代码
Diffbot.prototype.analyze = function (options, callback) {
for (var i in options) {
this[i] = options[i];
}
var options = this;
// support 'url'
if (options.url) {
options.uri = options.url;
delete options.url;
}
if (!options.uri) {
throw new Error("the URI is required.");
}
var diffbot_url = "http://api.diffbot.com/v3/analyze?token=" + this.token + "&url=" + encodeURIComponent(options.uri)+"&fields=stats";
if (options.stats) {
diffbot_url += "&stats=1";
}
request({uri: diffbot_url}, function(error, response, body) {
if (error) {
callback(error, undefined);
} else {
callback(false, JSON.parse(body));
}
});
}
它充当魅力!