如何在reddit评论中获取链接并继续打开它?

时间:2013-07-17 02:38:40

标签: python python-2.7 urllib2 urllib reddit

我正在尝试为Reddit创建一个用于抓取图像URL的机器人,使用simpleCV查找面部并在脸部周围裁剪,然后锐化并增强剩余的图像。但是当我试着在评论中找到URL时,我遇到了一个问题。我不得不尝试几种不同的方式使代码在完全不同的布局中工作,但似乎没有任何工作。我似乎遇到了urllib和urllib2的大部分问题。

当我使用urllib2时,它会告诉我“列表对象没有'超时'属性”或者没有超时属性的东西。我并不想要超时,只要它需要得到它,我就可以获得该文件。当我去使用urllib时,我遇到了一个关于“set object has no attribute'trip'”的错误。

我开始对此我的结束了,但我想完成它。有人能帮助我吗?此外,如果有人因语法需要编辑此问题,请继续。我不太擅长标点符号。

这是我的代码:

from __future__ import unicode_literals, print_function
import re
from PIL import Image, ImageEnhance
import urllib
import io
from StringIO import StringIO
import praw
import time
import random
import pickle
import SimpleCV
import ftplib


#Make things simpler by declaring these up here
user_agent="""Bot by /u/mistriliasysmic. Medium intelligence. Finds images in comments                 requiring enhancement and enhances them."""
user="*******"
passwd="**********"

gem_data=open("gem.data","r+")

print("Loaded datafile")

random.seed()

session=praw.Reddit(user_agent=user_agent)
session.login(user,passwd)

already_done=pickle.load(gem_data)

print("""Sucess, logged in as
User: "{0}"
User-agent: "{2}""""".format(user,passwd,user_agent))
circlejerk=session.get_subreddit('test')
print("Loaded /r/test")

match_urls = re.compile(r"""((?:[a-z][\w-]+:(?:/{1,3}|[a-z0-9%])|www\d{0,3}[.]|[a-z0-9.\-]+[.‌​][a-z]{2,4}/)(?:[^\s()<>]+|(([^\s()<>]+|(([^\s()<>]+)))*))+(?:(([^\s()<>]+|(‌​([^\s()<>]+)))*)|[^\s`!()[]{};:'".,<>?«»“”‘’]))""", re.DOTALL)

def findNEWimg(comment):
text = comment.body
broken = set(re.findall(match_urls,text))
condition=False
if broken:
    condition = True
return condition, broken

#def openIMG(url):
#    fd = urllib.urlopen(url)
#    image_file = io.BytesIO(fd.read())
#    return image_file

def _sharpen(image, sharpness=1.6):
    """
    Returns a sharpened copy of an image.

    Resizing down or thumbnailing your images with PIL.Image tends to make them blurry. I apply this snipped to such images in order to regain some sharpness.
"""
sharpener = ImageEnhance.Sharpness(image)
sharpened_image = sharpener.enhance(sharpness)
return sharpened_image

def bot_action(c, links):
    fd = urllib.urlopen(links)
    im = image.open(StringIO(fd.read()))
    img = Image(im)
    segment = HaarCascade("face.xml")
    autoface = img.findHaarFeatures(segment)
    if (autoface is not None):
        face = autoface[-1].crop()
        fixed_face = _sharpen(face)
        fixed_face.save("C:/SavedInhances/"+im)
        session = ftplib.FTP('tmngp.heliohost.org','**********','*************')
        file = open('C:/SavedInhances/'+im,'rb')
        session.pwd()
        session.cwd('/public_html/Stored_Inh/')
        session.storbinary(im,file)
        file.close()
        session.quit()
        print (comment.author.name, comment.subreddit.display_name, comment.submission.title)
        comment.reply("""
        Hello!

         CSInhancer here! I have used facial recognition and enhanced to the best of my abilities!

        [Here]("""+'http://tmngp.heliohost.org/Stored_Inh/'+im+""") is your image.

        *^Hello! ^I'm ^a ^bot! ^I ^am ^always ^being ^refined ^in ^my ^downtime!*""")
        print("Responded to {0}".format(comment.body))

triggerword = "enhance"

while True:
    for comment in test.get_comments(limit=1000):
        if comment.id not in already_done and triggerword in comment.body:
            try:

                bot_condition_met, parsed = findNEWimg(comment)
                if bot_condition_met:
                    try:
                        bot_action(comment, parsed)

                    except KeyboardInterrupt:
                        running = False
                    except praw.errors.APIException, e:
                        print ("[ERROR]:", e)
                        print ("sleeping 30 sec")
                        sleep(30)
                    except Exception, e:
                        print ("[ERROR]:", e)
                        print ("Blindly handling error")
                        continue
            except praw.errors.RateLimitExceeded as detail:
                print(" Rate Limited. Waiting for Reddit to stop complaining: {0} minutes".format(str(detail).split(' ')[9]))
                print( str(detail))
                time.sleep(int(str(detail).split(' ')[9])*60)

                bot_condition_met, parsed = findNEWimg(comment)
                if bot_condition_met:
                    try:
                        bot_action(comment, parsed)

                    except KeyboardInterrupt:
                        running = False
                    except praw.errors.APIException, e:
                        print ("[ERROR]:", e)
                        print ("sleeping 30 sec")
                        sleep(30)
                    except Exception, e:
                        print ("[ERROR]:", e)
                        print ("Blindly handling error")
                        continue
            already_done.append(commend.id)
            pickle.dump(already_done,gem_data)
            gem_data.close()
            gem_data.open("gem.data","r+")
            print("Waiting 120 seconds before posting again.")
            time.sleep(120)

0 个答案:

没有答案