为什么asyncio.run_in_executer不会增加循环中的任务数量

时间:2017-04-04 22:01:26

标签: python python-asyncio

我正在尝试学习python asyncio
我能理解什么是事件循环意味着什么协程意味着....
未来和任务意味着什么 我认为事件循环由一些人如何从协同程序中执行任务以及如何在某种队列中安排它们然后逐个执行它们 我的问题是方法run_in_executor
我试图理解一些python代码,以便将其转换为c ++
在这段代码中我理解:
 作者制作了额外的一个主题>>>>
    futures.append(executor.submit(do_work, symbol, day, files[symbol]))
 然后这个新线程使事件循环>>
csv.append(day, decompress(day, ***fetch_day(symbol, day)***))

`def fetch_day(symbol, day):
    local_data = threading.local()
    loop = getattr(local_data, 'loop', asyncio.new_event_loop())
    asyncio.set_event_loop(loop)
    ***loop = asyncio.get_event_loop()***#first event loop
    loop.set_debug(True)`  

计划24个任务然后>>
    `def create_tasks(symbol,day):

    start = 0

    if is_dst(day):
        start = 1

    url_info = {
        'currency': symbol,
        'year': day.year,
        'month': day.month - 1,
        'day': day.day
    }
    tasks = [asyncio.ensure_future(get(URL.format(**url_info, hour=i))) for i in range(0, 24)]

    # if is_dst(day):
    #     next_day = day + datetime.timedelta(days=1)
    #     url_info = {
    #         'currency': symbol,
    #         'year': next_day.year,
    #         'month': next_day.month - 1,
    #         'day': next_day.day
    #     }
    #     tasks.append(asyncio.ensure_future(get(URL.format(**url_info, hour=0))))
    return tasks`

`   tasks = create_tasks(symbol, day)
    #z=asyncio.Task.all_tasks[0]##############
    ***loop.run_until_complete(asyncio.wait(tasks))***
    #y=asyncio.Task.all_tasks[0]##############`  

每项任务
    async def get(url):#each task with total24 of get tasks ***loop = asyncio.get_event_loop()***#i dont know if same loop or new one buffer = BytesIO() id = url[35:].replace('/', " ") start = time.time() Logger.info("Fetching {0}".format(id)) for i in range(ATTEMPTS): try: #z=asyncio.Task.all_tasks[0]############## ***res = await loop.run_in_executor(None, lambda: requests.get(url, stream=True))***#this loop if same loop why not increase number of tasks with each get run of the 24 gets>>we should have 48 futures in total

“这里我不知道它是否进行了新循环或使用了名为1st thread的额外线程的相同循环”  使用循环run_in_executer创建新线程的函数

我想知道它是否正在制作新的循环
或者只是从额外的第一个线程使用相同的循环

然后如果它使用相同的循环>>>>>
那么为什么运行run_in_executer协程后任务处理程序的数量不会增加

“我的理解是run_in_executer coroutine将新任务添加到循环中,以便为什么它不会增加循环任务的数量

另一个想法是,还有另一组未来的循环任务,仅用于线程

我怎么能随时知道在循环中等待的期货数量?

代码是来自git hub的duka-master

这是代码:

main.py

#!/usr/bin/env python3.5
import sys
import logging
import argparse
from datetime import date, timedelta

#from duka.app import app
#from duka.core import valid_date, set_up_signals
#from duka.core.utils import valid_timeframe, TimeFrame
from app import app
from core import valid_date, set_up_signals
from core.utils import valid_timeframe, TimeFrame
VERSION = '0.2.1'


def main():
    parser = argparse.ArgumentParser(prog='duka', usage='%(prog)s [options]')
    parser.add_argument('-v', '--version', action='version',
                        version='Version: %(prog)s-{version}'.format(version=VERSION))
    parser.add_argument('symbols', metavar='SYMBOLS', type=str, nargs='?',
                        help='symbol list using format EURUSD EURGBP', default=["GBPJPY"])
    parser.add_argument('-d', '--day', type=valid_date, help='specific day format YYYY-MM-DD (default today)',
                        default=date.today() - timedelta(1))
    parser.add_argument('-s', '--startdate', type=valid_date, help='start date format YYYY-MM-DD (default today)')
    parser.add_argument('-e', '--enddate', type=valid_date, help='end date format YYYY-MM-DD (default today)')
    parser.add_argument('-t', '--thread', type=int, help='number of threads (default 20)', default=5)
    parser.add_argument('-f', '--folder', type=str, help='destination folder (default .)', default='.')
    parser.add_argument('-c', '--candle', type=valid_timeframe,
                        help='use candles instead of ticks. Accepted values M1 M2 M5 M10 M15 M30 H1 H4',
                        default=TimeFrame.TICK)
    parser.add_argument('--header', action='store_true', help='include CSV header (default false)', default=False)
    args = parser.parse_args()

    if args.startdate is not None:
        start = args.startdate
    else:
        start = args.day

    if args.enddate is not None:
        end = args.enddate
    else:
        end = args.day

    # Configure logging to show the name of the thread
    # where the log message originates.
    logging.basicConfig(
        level=logging.DEBUG,
        format='%(threadName)10s %(name)18s: %(message)s',
        stream=sys.stderr,
    )
    #logging.basicConfig(level=logging.DEBUG)

    set_up_signals()
    app(args.symbols, start, end, args.thread, args.candle, args.folder, args.header)


if __name__ == '__main__':
    main()

app.py

import concurrent
import threading
import time
from collections import deque
from datetime import timedelta, date

#from ..core import decompress, fetch_day, Logger
#from ..core.csv_dumper import CSVDumper
#from ..core.utils import is_debug_mode, TimeFrame
from core import decompress, fetch_day, Logger
from core.csv_dumper import CSVDumper
from core.utils import is_debug_mode, TimeFrame

SATURDAY = 5
day_counter = 0


def days(start, end):
    if start > end:
        return
    end = end + timedelta(days=1)
    today = date.today()
    while start != end:
        if start.weekday() != SATURDAY and start != today:
            yield start
        start = start + timedelta(days=1)


def format_left_time(seconds):
    if seconds < 0:
        return "--:--:--"
    m, s = divmod(seconds, 60)
    h, m = divmod(m, 60)
    return "%d:%02d:%02d" % (h, m, s)


def update_progress(done, total, avg_time_per_job, threads):
    progress = 1 if total == 0 else done / total
    progress = int((1.0 if progress > 1.0 else progress) * 100)
    remainder = 100 - progress
    estimation = (avg_time_per_job * (total - done) / threads)
    if not is_debug_mode():
        print('\r[{0}] {1}%  Left : {2}  '.format('#' * progress + '-' * remainder, progress,
                                                  format_left_time(estimation)), end='')


def how_many_days(start, end):
    return sum(1 for _ in days(start, end))


def avg(fetch_times):
    if len(fetch_times) != 0:
        return sum(fetch_times) / len(fetch_times)
    else:
        return -1


def name(symbol, timeframe, start, end):
    ext = ".csv"

    for x in dir(TimeFrame):
        if getattr(TimeFrame, x) == timeframe:
            ts_str = x

    name = symbol + "_" + ts_str + "_" + str(start)

    if start != end:
        name += "_" + str(end)

    return name + ext


def app(symbols, start, end, threads, timeframe, folder, header):
    if start > end:
        return
    lock = threading.Lock()
    global day_counter
    total_days = how_many_days(start, end)

    if total_days == 0:
        return

    last_fetch = deque([], maxlen=5)
    update_progress(day_counter, total_days, -1, threads)

    def do_work(symbol, day, csv):
        global day_counter
        star_time = time.time()
        Logger.info("Fetching day {0}".format(day))
        try:
            csv.append(day, decompress(day, ***fetch_day(symbol, day)***))
        except Exception as e:
            print("ERROR for {0}, {1} Exception : {2}".format(day, symbol, str(e)))
        elapsed_time = time.time() - star_time
        last_fetch.append(elapsed_time)
        with lock:
            day_counter += 1
        Logger.info("Day {0} fetched in {1}s".format(day, elapsed_time))

    futures = []

    with concurrent.futures.ThreadPoolExecutor(max_workers=threads) as executor:

        files = {symbol: CSVDumper(symbol, timeframe, start, end, folder, header) for symbol in symbols}

        for symbol in symbols:
            for day in days(start, end):
                ***futures.append(executor.submit(do_work, symbol, day, files[symbol]))***#>>>>>>>>>>first extra thread

        for future in concurrent.futures.as_completed(futures):
            if future.exception() is None:
                update_progress(day_counter, total_days, avg(last_fetch), threads)
            else:
                Logger.error("An error happen when fetching data : ", future.exception())

        Logger.info("Fetching data terminated")
        for file in files.values():
            file.dump()

    update_progress(day_counter, total_days, avg(last_fetch), threads)

fetch.py​​

import asyncio
import datetime
import threading
import time
from functools import reduce
from io import BytesIO, DEFAULT_BUFFER_SIZE

import requests

#from ..core.utils import Logger, is_dst
from core.utils import Logger, is_dst

URL = "https://www.dukascopy.com/datafeed/{currency}/{year}/{month:02d}/{day:02d}/{hour:02d}h_ticks.bi5"
ATTEMPTS = 5


***async def get(url):***#each task with total24 of get tasks
    ***loop = asyncio.get_event_loop()***#i dont know if same loop or new one
    buffer = BytesIO()
    id = url[35:].replace('/', " ")
    start = time.time()
    Logger.info("Fetching {0}".format(id))
    for i in range(ATTEMPTS):
        try:
            #z=asyncio.Task.all_tasks[0]##############
            ***res = await loop.run_in_executor(None, lambda: requests.get(url, stream=True))***#this loop if same loop why not increase number of tasks with each get run of the 24 gets>>we should have 48 futures in total
            if res.status_code == 200:
                for chunk in res.iter_content(DEFAULT_BUFFER_SIZE):
                    buffer.write(chunk)
                Logger.info("Fetched {0} completed in {1}s".format(id, time.time() - start))
                if len(buffer.getbuffer()) <= 0:
                    Logger.info("Buffer for {0} is empty ".format(id))
                return buffer.getbuffer()
            else:
                Logger.warn("Request to {0} failed with error code : {1} ".format(url, str(res.status_code)))
        except Exception as e:
            Logger.warn("Request {0} failed with exception : {1}".format(id, str(e)))
            time.sleep(0.5 * i)

    raise Exception("Request failed for {0} after ATTEMPTS attempts".format(url))


def create_tasks(symbol, day):

    start = 0

    if is_dst(day):
        start = 1

    url_info = {
        'currency': symbol,
        'year': day.year,
        'month': day.month - 1,
        'day': day.day
    }
    tasks = [asyncio.ensure_future(get(URL.format(**url_info, hour=i))) for i in range(0, 24)]

    # if is_dst(day):
    #     next_day = day + datetime.timedelta(days=1)
    #     url_info = {
    #         'currency': symbol,
    #         'year': next_day.year,
    #         'month': next_day.month - 1,
    #         'day': next_day.day
    #     }
    #     tasks.append(asyncio.ensure_future(get(URL.format(**url_info, hour=0))))
    return tasks


def fetch_day(symbol, day):
    local_data = threading.local()
    loop = getattr(local_data, 'loop', asyncio.new_event_loop())
    asyncio.set_event_loop(loop)
    ***loop = asyncio.get_event_loop()***#first event loop
    loop.set_debug(True)
    tasks = create_tasks(symbol, day)
    #z=asyncio.Task.all_tasks[0]##############
    ***loop.run_until_complete(asyncio.wait(tasks))***
    #y=asyncio.Task.all_tasks[0]##############
    def add(acc, task):
        acc.write(task.result())
        return acc

    return reduce(add, tasks, BytesIO()).getbuffer()

其他代码:

在此代码中run_in_thread在块列表中生成期货
所以在前面的代码中这些任务在哪里???????????

import asyncio
import concurrent.futures
import logging
import sys
import time


def blocks(n):
    log = logging.getLogger('blocks({})'.format(n))
    log.info('running')
    time.sleep(0.1)
    log.info('done')
    return n ** 2


async def run_blocking_tasks(executor):
    log = logging.getLogger('run_blocking_tasks')
    log.info('starting')

    log.info('creating executor tasks')
    loop = asyncio.get_event_loop()
    blocking_tasks = [
        loop.run_in_executor(executor, blocks, i)
        for i in range(6)
    ]
    log.info('waiting for executor tasks')
    completed, pending = await asyncio.wait(blocking_tasks)
    results = [t.result() for t in completed]
    log.info('results: {!r}'.format(results))

    log.info('exiting')


if __name__ == '__main__':
    # Configure logging to show the name of the thread
    # where the log message originates.
    logging.basicConfig(
        level=logging.INFO,
        format='%(threadName)10s %(name)18s: %(message)s',
        stream=sys.stderr,
    )

    # Create a limited thread pool.
    executor = concurrent.futures.ThreadPoolExecutor(
        max_workers=3,
    )

    event_loop = asyncio.get_event_loop()
    try:
        event_loop.run_until_complete(
            run_blocking_tasks(executor)
        )
    finally:
        event_loop.close()

`

1 个答案:

答案 0 :(得分:1)

requestsasyncio不兼容。请改用aiohttp

import aiohttp
import asyncio
import async_timeout

async def fetch(session, url):
    with async_timeout.timeout(10):
        async with session.get(url) as response:
            return await response.text()

async def main(loop):
    async with aiohttp.ClientSession(loop=loop) as session:
        html = await fetch(session, 'http://python.org')
        print(html)

loop = asyncio.get_event_loop()
loop.run_until_complete(main(loop))