Question

很久以前，我进入Python是因为我想创建一个自己的网络爬虫机器人，该机器人可以从某个subreddit上获取帖子并将其上传到Twitter，但是不幸的是，我遇到了一些问题。

该机器人最多只能下载X个帖子

for submission in subreddit_info.hot(limit=X):

在下载了所说的帖子并鸣叫它们之后，脚本停止了，我需要再次运行它。再次运行bot并没有做什么用，它打印出对话框“ [bot]已经发过推文：（帖子）”，正如我对其编程的那样，事实是它不会搜索其他帖子。

将代码缩进至True时也未显示任何结果，并且该机器人将再次打印出该对话框，无论我允许它运行多长时间。

import praw
import json
import requests
import tweepy
import time
import os
import urllib.parse
from glob import glob

ACCESS_TOKEN = 'X'
ACCESS_TOKEN_SECRET = 'X'
CONSUMER_KEY = 'X'
CONSUMER_SECRET = 'X'

SUBREDDIT_TO_MONITOR = ''
IMAGE_DIR = 'img'
POSTED_CACHE = 'posted_posts.txt'
TWEET_SUFFIX = ''
TWEET_MAX_LEN = 280
DELAY_BETWEEN_TWEETS = 1
T_CO_LINKS_LEN = 12

def setup_connection_reddit(subreddit):
    print('[bot] Setting up connection with reddit')
    reddit_api = praw.Reddit(
        user_agent='Reddit Twitter Tool Monitoring {}'.format(SUBREDDIT_TO_MONITOR),
        client_id='X',
        client_secret='X')
    return reddit_api.subreddit(SUBREDDIT_TO_MONITOR)

def tweet_creator(subreddit_info):
    post_dict = {}
    post_ids = []

    print('[bot] Getting posts from reddit')
    for submission in subreddit_info.hot(limit=10):
        if not already_tweeted(submission.id):
            post_dict[submission.title] = {}
            post = post_dict[submission.title]
            post['link'] = submission.url
            post['img_path'] = get_image(submission.url)
            post_ids.append(submission.id)
        else:
            print('[bot] Already tweeted: {}'.format(str(submission)))

    return post_dict, post_ids

def already_tweeted(post_id):
    found = False
    with open(POSTED_CACHE, 'r') as in_file:
        for line in in_file:
            if post_id in line:
                found = True
                break
    return found

def strip_title(title, num_characters):
    if len(title) <= num_characters:
        return title
    else:
        return title[:num_characters - 1] + '…'


def get_image(img_url):
    if 'i.redd.it' in img_url:
        file_name = os.path.basename(urllib.parse.urlsplit(img_url).path)
        img_path = IMAGE_DIR + '/' + file_name
        print('[bot] Downloading image at URL ' + img_url + ' to ' + img_path)
        resp = requests.get(img_url, stream=True)
        if resp.status_code == 200:
            with open(img_path, 'wb') as image_file:
                for chunk in resp:
                    image_file.write(chunk)
            return img_path
        else:
            print('[bot] Image failed to download. Status code: ' + resp.status_code)
    else:
        print('[bot] Post doesn\'t point to an i.redd.it link')
    return ''


def tweeter(post_dict, post_ids):
    auth = tweepy.OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET)
    auth.set_access_token(ACCESS_TOKEN, ACCESS_TOKEN_SECRET)
    api = tweepy.API(auth)

    for post, post_id in zip(post_dict, post_ids):
        img_path = post_dict[post]['img_path']

        extra_text = ' ' + post_dict[post]['link'] + TWEET_SUFFIX
        extra_text_len = 1 + T_CO_LINKS_LEN + len(TWEET_SUFFIX)
        if img_path: 
            extra_text_len += T_CO_LINKS_LEN
        post_text = strip_title(post, TWEET_MAX_LEN - extra_text_len) + extra_text
        print('[bot] Posting this link on Twitter')
        print(post_text)
        if img_path:
            print('[bot] With image ' + img_path)
            api.update_with_media(filename=img_path, status=post_text)
        else:
            api.update_status(status=post_text)
        log_tweet(post_id)
        time.sleep(DELAY_BETWEEN_TWEETS)


def log_tweet(post_id):
    with open(POSTED_CACHE, 'a') as out_file:
        out_file.write(str(post_id) + '\n')


def main():
    if not os.path.exists(POSTED_CACHE):
        with open(POSTED_CACHE, 'w'):
            pass
    if not os.path.exists(IMAGE_DIR):
        os.makedirs(IMAGE_DIR)

    subreddit = setup_connection_reddit(SUBREDDIT_TO_MONITOR)
    post_dict, post_ids = tweet_creator(subreddit)
    tweeter(post_dict, post_ids)

    for filename in glob(IMAGE_DIR + '/*'):
        os.remove(filename)

if __name__ == '__main__':
    main()

这是代码，我猜想它与“ tweeter_creator”或“ already_tweeted”有关。我会感谢任何人的帮助。

此外，如果您知道与此问题相关的任何优秀教程，都可以将其链接起来，我很想自己学习。

谢谢！

如何让Twitter机器人搜索Reddit的新帖子？

0 个答案: