pandas parallel urllib请求循环

时间:2018-01-12 22:04:03

标签: json pandas parsing dataframe urllib

我有一个代码,使用urllib通过API解析JSON数据,如下所示:

import pandas as pd
import json
import urllib.request
import os
import time
import csv
import datetime
    # Send URL Request & Get JSON Data
with urllib.request.urlopen("https://bittrex.com/api/v1.1/public/getmarketsummaries") as url:
    data = json.loads(url.read().decode())

    # Select Data from result section
    df = pd.DataFrame(data=data['result'])
# tickers = df['MarketName']
tickers= ["BTC-1ST", "BTC-2GIVE", "BTC-ABY", "BTC-ARDR", "BTC-WAVE"]
print(tickers)

for ticker in tickers:
    with urllib.request.urlopen("https://bittrex.com/Api/v2.0/pub/market/GetTicks?marketName=" + ticker + "&tickInterval=thirtyMin") as URL:
        data = json.loads(URL.read().decode())
        df2 = pd.DataFrame(data=data['result'])
        Market01 = "Market"
        df2[Market01] = ticker
        df2.to_csv('all.csv', encoding="utf-8-sig", index=False, mode='a', header=False)
        print("done " + ticker)

实际上它不是仅要求五种货币......它们是295货币的295请求..这需要5分钟来完成所有必需的数据到csv文件(很长时间)

我想知道是否有一种方法可以并行发送所有请求以减少时间,使用相同的选项将数据保存到csv文件作为数据帧

我多次搜索并找到了多处理器模块,但找不到与我的情况相似的样本

任何人都可以帮助我!!!!!!

1 个答案:

答案 0 :(得分:0)

这样的事情怎么样?

import pandas as pd
import json
import urllib.request
import os
from urllib import parse
import csv
import datetime
from multiprocessing import Process, Pool
import time

    # Send URL Request & Get JSON Data
with urllib.request.urlopen("https://bittrex.com/api/v1.1/public/getmarketsummaries") as url:
    data = json.loads(url.read().decode())

    # Select Data from result section
    df = pd.DataFrame(data=data['result'])
# tickers = df['MarketName']
tickers= ["BTC-1ST", "BTC-2GIVE", "BTC-ABY", "BTC-ARDR", "BTC-WAVE"]
print(tickers)

def http_get(url):
  result = {"url": url, "data": urllib.request.urlopen(url, timeout=5).read()}
  return result

urls = [ "https://bittrex.com/Api/v2.0/pub/market/GetTicks?marketName=" + ticker + "&tickInterval=thirtyMin" for ticker in tickers ]

pool = Pool(processes=5)

results = pool.map(http_get, urls)

for result in results:
    j = json.loads(result['data'].decode())
    df2 = pd.DataFrame(data=j)
    Market01 = "Market"
    marketName = parse.parse_qs(parse.urlparse(result['url']).query)['marketName'][0]
    df2[Market01] = marketName
    df2.to_csv('all.csv', encoding="utf-8-sig", index=False, mode='a', header=False)
    print("done " + marketName)