尝试找出向社交媒体提供更好数据的方法(开放图表数据)。基本上,当facebook,twitter或pinetrest要求提供有关我页面上的链接的信息时,我想提供依赖于链接的og信息,而不是向他们发送空页面(好的,它发送他们不运行的javascripts)。
我尝试使用prerender和类似的,但不能让它正确运行。但我也意识到我宁愿让快速路由器识别它并根据请求服务静态页面。
作为第一步,我需要获取用户代理信息:
所以我想我会添加express-useragent,这似乎可以在我的测试网站上运行,但似乎不像facebooks刮刀经历过它。我可以看到它尝试获取图片,但从不更新OG或索引。 (以下代码应作为示例)
var express = require('express');
var router = express.Router();
var useragent = require('express-useragent');
//Set up log
var cfgBunyan = require('../config/bunyan')
var log = cfgBunyan.dbLogger('ROUTE')
router.use(useragent.express());
/* GET home page. */
router.get('/', function(req, res, next) {
console.log(req.useragent);
res.render('index');
});
router.get('/share/:service', function(req, res, next) {
res.render('index');
});
router.get('/pages/:name', function (req,res, next){
log.info('/pages/'+req.params.name)
res.render('pages/'+req.params.name);
});
router.get('/modals/:name', function (req,res, next){
res.render('modals/'+req.params.name);
});
router.get('/page/:name', function (req,res, next){
res.render('index');
});
module.exports = router;
我还可以使用谷歌测试刮刀,这给了我以下来源
source: 'Mozilla/5.0 (compatible; Google-Structured-Data-Testing-Tool +https://search.google.com/structured-data/testing-tool)' }
那么有没有人想出一个简单的方法来将Facebook和Twitter引导到另一条路线?或者坐着并以正确的方式检查不同来源?
答案 0 :(得分:1)
好的,所以我设法找出了一个潜在的解决方案。 基本上,我创建了一个名为isBot的函数,我调用它类似于身份验证的工作方式,它会将请求发送到isBot,并检查是否。 1.?_escaped_fragment_ =在网址中存在(谷歌和其他一些人使用它) 2.如果用户代理是一个已知的机器人(感谢prerender.io,从.htaccess借用你的服务列表)
设置很简单。 添加(你没有,Rob是对的)快速使用你的路由器(只是为了能够从标题中获取信息)
//var useragent = require('express-useragent'); //Not needed ror used
//router.use(useragent.express()); // Thought this was required, it is not
然后在你要检查机器人的任何路线中添加isBot:
router.get('/', isBot ,function(req, res, next) {
然后添加以下函数(它使用bunyan执行大量日志记录,因为我想要统计信息,您可以删除任何启动log.info的行,它应该仍然有用,或者添加bunyan,或者只是更改行到console.log。它只是输出。
如果代码决定代码不是机器人,它只是正常呈现
function isBot (req, res, next){
var isBotTest = false;
var botReq = "";
var botID= ""; //Just so we know why we think it is a bot
var knownBots = ["baiduspider", "facebookexternalhit", "twitterbot", "rogerbot", "linkedinbot","embedly|quora\ link\ preview","howyoubot","outbrain","pinterest","slackbot","vkShare","W3C_Validator"];
log.info({http_user_agent: req.get('User-Agent')});
//log.info({user_source: req.useragent.source}); //For debug, whats the HTTP_USER_AGENT, think this is the same
log.info({request_url: req.url}); //For debug, we want to know if there are any options
/* Lets start with ?_escaped_fragment_=, this seems to be a standard, if we have this is part of the request,
it should be either a search engine or a social media site askign for open graph rich sharing info
*/
var urlRequest=req.url
var pos= urlRequest.search("\\?_escaped_fragment_=")
if (pos != -1) {
botID="ESCAPED_FRAGMENT_REQ";
isBotTest = true; //It says its a bot, so we believe it, lest figure out if it has a request before or after
var reqBits = urlRequest.split("?_escaped_fragment_=")
console.log(reqBits[1].length)
if(reqBits[1].length == 0){ //If 0 length, any request is infront
botReq = reqBits[0];
} else {
botReq = reqBits[1];
}
} else { //OK, so it did not tell us it was a bot request, but maybe it is anyway
var userAgent = req.get('User-Agent');
for (var i in knownBots){
if (userAgent.search(knownBots[i]) != -1){
isBotTest = true;
botReq=urlRequest;
botID=knownBots[i];
}
}
}
if (isBotTest == true) {
log.info({botID: botID, botReq: botReq});
//send something to bots
} else {
log.info("We don't think this is one of those bots any more")
return next();
}
}
哦,目前它没有响应机器人请求。如果你想这样做,只需在//发送一些东西到机器人的行中添加一个res.render或res.send