如何使用python-telegram-bot发出异步请求?

时间:2020-01-14 15:23:28

标签: python beautifulsoup python-requests python-telegram-bot

我正在尝试创建一个电报机器人,该机器人将抓取通道中的所有链接,进行处理,然后将断开的链接发送给用户。我正在使用python-telegram-bot,BeautifulSoup和Requests库。为此,我创建了一个频道并上传了指向该频道的各种链接。

以下代码可以很好地运行,但是如果通道中的消息超过10,000条,则机器人需要很长时间才能完成。我想重做代码以发送异步请求。为了做到这一点,我遇到了一些问题。 Asyncio 使功能对更新程序不可见, grequests 添加了猴子修补程序,这使得updater.start_polling()不起作用,比请求快 Windows 8.1计算机上未安装。另外,我无法通过简单请求请求-未来完成此操作。

如何重做代码并使请求异步?

from telegram.ext import Updater, CommandHandler, MessageHandler, Handler, Filters, ConversationHandler
import logging, requests
from bs4 import BeautifulSoup

logging.basicConfig(format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
                    level=logging.INFO)

logger = logging.getLogger(__name__)

CHANNEL_LINK, MES_QUANTITY = range(2)


def start(update, context):
    context.bot.send_message(chat_id=update.effective_chat.id, text="Hello! Send me a link to your channel. Example:https://t.me/s/#**yourchannelname**#")

    return CHANNEL_LINK

def channel_link(update, context):
    channel_link.user_text = update.message.text
    if not "https://t.me/s/" in str(channel_link.user_text):
        update.message.reply_text("Link must look like this:https://t.me/s/#**yourchannelname**#")
    elif "https://t.me/s/" in str(channel_link.user_text):
        update.message.reply_text("Enter a number of messages on your channel.")
        return MES_QUANTITY

def mes_quantity(update, context):
    links = set([])
    res = set([])
    err400 = set([])
    err500 = set([])
    driverr = set([])
    not_404_drive_errors = set([])
    conerror = set([])
    soup_title_check = None
    post_num = update.message.text
    integer_post_num = int(post_num)
    try:
        if integer_post_num < 100000:
            update.message.reply_text("Duration of processing depends on quantity of messages. Please, wait.")
            POST_LINK = ('?before=')
            # Grabing all links, from last message to first
            while integer_post_num > 6:
                fin_link = requests.get(channel_link.user_text + POST_LINK + str(integer_post_num + 1))
                soup = BeautifulSoup(fin_link.text, 'html.parser')
                for link in soup.find_all('a'):
                    links.add(link.get('href'))
                for link in links:
                    if str(link).startswith('http'):
                        res.add(link)
                integer_post_num -= 20
            #Separating storages links (like Google Drive) from all links
            for link in res:
                try:
                    que = requests.get(link)
                    soup_title_check = BeautifulSoup(que.text, 'html.parser')
                    gootitle = soup_title_check.find("p", {"class":"errorMessage"})
                    yantitle = soup_title_check.find("div", {"class":"error error-page"})
                    maititle = soup_title_check.find("div", {"id":"http-error"})
                    if gootitle or yantitle or maititle:
                        driverr.add(link)
                    elif not gootitle and not yantitle and not maititle and link not in conerror:
                        not_404_drive_errors.add(link)
                except requests.exceptions.ConnectionError:
                    conerror.add(link)
            #Checking all other links for HTTP errors
            for link in not_404_drive_errors:
                que = requests.get(link)
                try:
                    if que.status_code == 200:
                        pass
                    elif que.status_code == 400:
                        err400.add(link + " Error description. ")
                    elif que.status_code == 401 or que.status_code == 407:
                        err400.add(link + " Error description. ")
                    elif que.status_code == 403:
                        err400.add(link + " Error description. ")
                    elif que.status_code == 404:
                        err400.add(link + " Error description. ")
                    elif que.status_code == 405:
                        err400.add(link + " Error description. ")
                    elif que.status_code == 406:
                        err400.add(link + " Error description. ")
                    elif que.status_code == 408:
                        err400.add(link + " Error description. ")
                    elif que.status_code == 409:
                        err400.add(link + " Error description. ")
                    elif que.status_code == 410:
                        err400.add(link + " Error description. ")
                    elif que.status_code == 411:
                        err400.add(link + " Error description. ")
                    elif que.status_code == 412:
                        err400.add(link + " Error description. ")
                    elif que.status_code == 413:
                        err400.add(link + " Error description. ")
                    elif que.status_code == 414:
                        err400.add(link + " Error description. ")
                    elif que.status_code == 415:
                        err400.add(link + " Error description. ")
                    elif que.status_code == 416:
                        err400.add(link + " Error description. ")
                    elif que.status_code == 417:
                        err400.add(link + " Error description. ")
                    elif que.status_code == 421:
                        err400.add(link + " Error description. ")
                    elif que.status_code == 422:
                        err400.add(link + " Error description. ")
                    elif que.status_code == 423:
                        err400.add(link + " Error description. ")
                    elif que.status_code == 426:
                        err400.add(link + " Error description. ")
                    elif que.status_code == 428:
                        err400.add(link + " Error description. ")
                    elif que.status_code == 429:
                        err400.add(link + " Error description. ")
                    elif que.status_code == 430:
                        err400.add(link + " Error description. ")
                    elif que.status_code == 434:
                        err400.add(link + " Error description. ")
                    elif que.status_code == 449:
                        err400.add(link + " Error description. ")
                    elif que.status_code == 451:
                        err400.add(link + " Error description. ")
                    elif que.status_code == 500:
                        err500.add(link + " Error description. ")
                    elif que.status_code == 501:
                        err500.add(link + " Error description. ")
                    elif que.status_code == 502:
                        err500.add(link + " Error description. ")
                    elif que.status_code == 503:
                        err500.add(link + " Error description. ")
                    elif que.status_code == 504:
                        err500.add(link + " Error description. ")
                    elif que.status_code == 505:
                        err500.add(link + " Error description. ")
                    elif que.status_code == 507:
                        err500.add(link + " Error description. ")
                    elif que.status_code == 509:
                        err500.add(link + " Error description. ")
                    elif que.status_code == 510:
                        err500.add(link + " Error description.  ")
                    elif que.status_code == 451:
                        err500.add(link + " Error description. ")
                except requests.exceptions.ConnectionError:
                    conerror.add(link)
            if not err400 and not err500 and not driverr and not conerror:
                update.message.reply_text("Everything is good!")
            if err400:
                update.message.reply_text(str(err400))
            if err500:
                update.message.reply_text(str(err500))
            if conerror:
                update.message.reply_text('Connection errors: ' + str(conerror))
            if driverr:
                update.message.reply_text('Follow the link to see error description.' + str(driverr))
            update.message.reply_text("Processing is done.")
            links.clear()
            res.clear()
            err400.clear()
            err500.clear()
            driverr.clear()
            not_404_drive_errors.clear()
            conerror.clear()
        elif integer_post_num > 100000:
            update.message.reply_text("Number is too big.")
    except ValueError:
        update.message.reply_text("You must enter a number.")

def shutdown(update, context):
    logger.info("User %s canceled the conversation.")
    update.message.reply_text('Exiting processing.')

    return ConversationHandler.END

def error(update, context):
    logger.warning('Update "%s" caused error "%s"', update, context.error)


def main():
    updater = Updater("MyToken", use_context=True)

    dp = updater.dispatcher

    conv_handler = ConversationHandler(
        entry_points=[CommandHandler('start', start)],

        states={
            CHANNEL_LINK: [MessageHandler(Filters.text, channel_link)],

            MES_QUANTITY: [MessageHandler(Filters.text, mes_quantity)],
        },

        fallbacks=[CommandHandler('shutdown', shutdown)]
    )

    dp.add_handler(conv_handler)

    dp.add_error_handler(error)

    updater.start_polling()

if __name__ == '__main__':
    main()

0 个答案:

没有答案
相关问题