所以我使用node.js和模块instagram-node-lib来下载Instagram帖子的元数据。我有几个要搜索的主题标签,我想下载所有现有的帖子(在分页期间处理请求失败)以及监控所有新帖子。
我设法破解了第一部分 - 下载所有现有帖子并处理失败(我注意到有时Instagram API会失败,所以我已经添加了冗余以记住我下载的最后一个成功页面从那一点再次尝试)。对于任何感兴趣的人,这是我的代码(请注意,我使用Postgres来保存帖子,并且为了便于阅读和商业目的,我已经缩写/混淆了一些代码)**道歉的长度代码,但我认为这对某人有用:
var db = new (require('./postgres'))
,api = require("instagram-node-lib")
;
var HASHTAGS = ["fluffy", "kittens"] //this is just an example!
,CLIENT_ID = "YOUR_CLIENT_ID"
,CLIENT_SECRET = "YOUR_CLIENT_SECRET"
,HOST = "https://api.instagram.com"
,PORT = 443
,PATH = "/v1/media/popular?client_id=" + CLIENT_ID
;
var hashtagIndex = 0
,settings
;
/**
* Initialise the module for use
*/
exports.initialise = function(){
api.set("client_id", CLIENT_ID);
api.set("client_secret", CLIENT_SECRET);
if( !settings){
settings = {
hashtags: []
}
for( var i in HASHTAGS){
settings.hashtags[i] = {
name: HASHTAGS[i],
maxTagId: null,
minTagId: null,
nextMaxTagId: null,
}
}
}
// console.log(settings);
db.initialiseSettings(); //I haven't included the code for this - basically just loads settings from the database, overwriting the defaults above if they exist, otherwise it creates them using the above object. I store the settings as a JSON object in the DB and parse them on load
execute();
}
function execute(){
var params = {
name: HASHTAGS[hashtagIndex],
complete: function(data, pagination){
var hashtag = settings.hashtags[hashtagIndex];
//from scratch
if( !hashtag.maxTagId){
console.log('Downloading old posts from scratch');
getOldPosts();
}
//still loading old (previously failed)
else if( hashtag.nextMaxTagId){
console.log('Downloading old posts from last saved position');
getOldPosts(hashtag.nextMaxTagId);
}
//new posts only
else {
console.log('Downloading new posts only');
getNewPosts(hashtag.minTagId);
}
},
error: function(msg, obj, caller){
apiError(msg, obj, caller);
}
}
api.tags.info(params);
}
function getOldPosts(maxTagId){
console.log();
var params = {
name: HASHTAGS[hashtagIndex],
count: 100,
max_tag_id: maxTagId || undefined,
complete: function(data, pagination){
console.log(pagination);
var hashtag = settings.hashtags[hashtagIndex];
//reached the end
if( pagination.next_max_tag_id == hashtag.maxTagId){
console.log('Downloaded all posts for #' + HASHTAGS[hashtagIndex]);
hashtag.nextMaxTagId = null; //reset nextMaxTagId - that way next time we execute the script we know to just look for new posts
saveSettings(function(){
next();
}); //Another function I haven't include - just saves the settings object, overwriting what is in the database. Once saved, executes the next() function
}
else {
//from scratch
if( !hashtag.maxTagId){
//these values will be saved once all posts in this batch have been saved. We set these only once, meaning that we have a baseline to compare to - enabling us to determine if we have reached the end of pagination
hashtag.maxTagId = pagination.next_max_tag_id;
hashtag.minTagId = pagination.min_tag_id;
}
//if there is a failure then we know where to start from - this is only saved to the database once the posts are successfully saved to database
hashtag.nextMaxTagId = pagination.next_max_tag_id;
//again, another function not included. saves the posts to database, then updates the settings. Once they have completed we get the next page of data
db.savePosts(data, function(){
saveSettings(function(){
getOldPosts(hashtag.nextMaxTagId);
});
});
}
},
error: function(msg, obj, caller){
apiError(msg, obj, caller);
//keep calm and try again - this is our failure redundancy
execute();
}
}
var posts = api.tags.recent(params);
}
/**
* Still to be completed!
*/
function getNewPosts(minTagId){
}
function next(){
if( hashtagIndex < HASHTAGS.length - 1){
console.log("Moving onto the next hashtag...");
hashtagIndex++;
execute();
}
else {
console.log("All hashtags processed...");
}
}
好的,这就是我对解决下一个难题的困境 - 下载新帖子(换句话说,只有那些自我上次下载所有帖子后才出现的新帖子)。我应该使用Instagram subscriptions还是有办法实现类似于我已经使用过的分页?我担心如果我使用以前的解决方案,那么如果我的服务器出现问题并且它会停机一段时间,那么我会错过一些帖子。我&#39;担心如果我使用后一种解决方案,则可能无法翻阅记录,因为Instagram API设置为启用转发分页而不是后向分页?
我曾尝试在Google Instagram API开发人员小组中发布问题几次,我的消息似乎没有出现在论坛中,所以我认为我采用可靠的stackoverflow