discord.py-rewrite-使用PyQt5进行动态Web爬网无法正常工作

时间:2019-12-09 01:32:07

标签: python web-scraping pyqt pyqt5 discord.py-rewrite

简而言之,我正在制作一个不和谐的机器人,该机器人将网站https://growtopiagame.com中的“每日世界”图片下载为D:\ Kelbot / render.png,然后通过命令将图片发送至频道被称为。但是,它不是一个静态网站,并且URL不在源代码中,因此我找到了使用PyQt5的解决方案:

import re
import bs4 as bs
import sys
import urllib.request
from PyQt5.QtWebEngineWidgets import QWebEnginePage
from PyQt5.QtWidgets import QApplication
from PyQt5.QtCore import QUrl
@client.command()
@commands.cooldown(1, 60, commands.BucketType.user)
async def wotd(ctx):
    class Page(QWebEnginePage):
        def __init__(self, url):
            self.app = QApplication(sys.argv)
            QWebEnginePage.__init__(self)
            self.html = ''
            self.loadFinished.connect(self._on_load_finished)
            self.load(QUrl(url))
            self.app.exec_()

        def _on_load_finished(self):
            self.html = self.toHtml(self.Callable)
            print('Load finished')

        def Callable(self, html_str):
            self.html = html_str
            self.app.quit()

    def main():
        page = Page('https://growtopiagame.com')
        soup = bs.BeautifulSoup(page.html, 'html.parser')
        js_test = soup.find('a', class_='world-of-day-image')
        link = []
        for x in js_test:
            link.append(str(x))
        urls = re.findall('http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', link[0])
        urllib.request.urlretrieve(urls[0], "D:\Kelbot/render.png")
    if __name__ == '__main__': main()
    await ctx.send(file=discord.File('render.png'))

当我从任务计划程序运行机器人时,它没有工作。因此,我尝试使用Python Shell和Visual Studio Code来运行它,并且它们都起作用。但是,当第二次调用该命令时,python shell和Visual Studio代码都重新启动,并且该机器人由于某种原因而被杀死。是否因为类与discord.py不兼容?我怎么可能解决这个问题。有没有比使用PyQt5更好的解决方案?

(有时我并没有得到图片,而是得到https://growtopiagame.com/resources/assets/images/load.gif,这是他们在显示实际的“每日世界”图片之前放置的图像,但是当我重新启动计算机时它会自行修复)

1 个答案:

答案 0 :(得分:1)

PyQt5与asyncio不兼容,尽管有些库试图使其与quamash,asyncqt,qasync兼容,但在您的情况下则没有必要,因为您要Qt要做的唯一任务不是刮网以获得图片的ulr并下载,因此解决方法是创建一个功能仅此的外部应用程序,然后在wotd函数中使用它:

├── downloader.py
├── .env
└── main.py

main.py

import asyncio
import os
import sys
import uuid

import discord
from discord.ext import commands

from dotenv import load_dotenv

bot = commands.Bot(command_prefix="!")


@commands.cooldown(1, 60, commands.BucketType.user)
@bot.command()
async def wotd(ctx):
    current_dir = os.path.dirname(os.path.realpath(__file__))
    images_dir = os.path.join(current_dir, "images")

    if not os.path.exists(images_dir) or not os.path.isdir(images_dir):
        os.mkdir(images_dir)

    output_filename = os.path.join(images_dir, "{}.png".format(uuid.uuid4()))

    args = [sys.executable, os.path.join(current_dir, "downloader.py"), output_filename]
    process = await asyncio.create_subprocess_exec(
        *args, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE
    )
    print("Started: %s, pid=%s" % (args, process.pid), flush=True)
    stdout, stderr = await process.communicate()
    if process.returncode == 0:
        print(
            "Done: %s, pid=%s, result: %s"
            % (args, process.pid, stdout.decode().strip()),
            flush=True,
        )
        await ctx.send(file=discord.File(output_filename))
        print("end", output_filename)
    else:
        print(
            "Failed: %s, pid=%s, result: %s"
            % (args, process.pid, stderr.decode().strip()),
            flush=True,
        )
        print("error")


@wotd.error
async def wotd_error(ctx, error):
    if isinstance(error, commands.CommandOnCooldown):
        msg = "This command is ratelimited, please try again in {:.2f}s".format(
            error.retry_after
        )
        await ctx.send(msg)
    print(ctx, error)


def main():
    load_dotenv()
    token = os.getenv("DISCORD_TOKEN")
    bot.run(token)


if __name__ == "__main__":
    main()

downloader.py

import sys

from PyQt5 import QtCore, QtWidgets, QtNetwork, QtWebEngineWidgets


class DownLoader(QtCore.QObject):
    def __init__(self, path, parent=None):
        super().__init__(parent)
        self.path = path

        url = "https://growtopiagame.com"
        self.manager = QtNetwork.QNetworkAccessManager(self)

        profile = QtWebEngineWidgets.QWebEngineProfile(
            QtCore.QUuid.createUuid().toString(QtCore.QUuid.Id128), self
        )
        self.page = QtWebEngineWidgets.QWebEnginePage(profile, self)
        self.page.loadProgress.connect(print)

        self.manager.finished.connect(self.on_finished)
        self.page.loadFinished.connect(self.on_load_finished)

        self.page.load(QtCore.QUrl(url))

    @QtCore.pyqtSlot(bool)
    def on_load_finished(self, ok):
        if ok:
            self.request_url()
        else:
            print("error", ok, file=sys.stderr)
            QtCore.QCoreApplication.exit(-1)

    def request_url(self):
        js = """
        function get_url(){
            var elements = document.getElementsByClassName("world-of-day-image")
            if(elements.length){
                var element = elements[0];
                if(element.children.length){
                    var e = element.children[0]
                    if(e.tagName == "IMG")
                        return e.src
                }
            }
            return "";
        }
        get_url();
        """
        self.page.runJavaScript(js, self.download)

    def download(self, url):
        if url:
            print(url)
            request = QtNetwork.QNetworkRequest(QtCore.QUrl(url))
            self.manager.get(request)
        else:
            QtCore.QTimer.singleShot(100, self.request_url)

    @QtCore.pyqtSlot(QtNetwork.QNetworkReply)
    def on_finished(self, reply):
        if reply.error() == QtNetwork.QNetworkReply.NoError:
            file = QtCore.QFile(self.path)
            if file.open(QtCore.QIODevice.WriteOnly):
                r = reply.readAll()
                print(len(r))
                file.write(r)
            file.close()
            QtCore.QCoreApplication.quit()
        else:
            print(reply.error(), reply.errorString(), file=sys.stderr)
            QtCore.QCoreApplication.exit(-1)


if __name__ == "__main__":
    app = QtWidgets.QApplication(sys.argv)
    parser = QtCore.QCommandLineParser()
    parser.addPositionalArgument("path", "Path of image")
    parser.process(app)
    args = parser.positionalArguments()
    if not args:
        print("not path", file=sys.stderr)
        sys.exit(-1)
    path = args[0]
    downloader = DownLoader(path)
    sys.exit(app.exec_())

.env

DISCORD_TOKEN=YOUR_TOKEN_HERE