我有一个代码,使用urllib通过API解析JSON数据,如下所示:
import pandas as pd
import json
import urllib.request
import os
import time
import csv
import datetime
# Send URL Request & Get JSON Data
with urllib.request.urlopen("https://bittrex.com/api/v1.1/public/getmarketsummaries") as url:
data = json.loads(url.read().decode())
# Select Data from result section
df = pd.DataFrame(data=data['result'])
# tickers = df['MarketName']
tickers= ["BTC-1ST", "BTC-2GIVE", "BTC-ABY", "BTC-ARDR", "BTC-WAVE"]
print(tickers)
for ticker in tickers:
with urllib.request.urlopen("https://bittrex.com/Api/v2.0/pub/market/GetTicks?marketName=" + ticker + "&tickInterval=thirtyMin") as URL:
data = json.loads(URL.read().decode())
df2 = pd.DataFrame(data=data['result'])
Market01 = "Market"
df2[Market01] = ticker
df2.to_csv('all.csv', encoding="utf-8-sig", index=False, mode='a', header=False)
print("done " + ticker)
实际上它不是仅要求五种货币......它们是295货币的295请求..这需要5分钟来完成所有必需的数据到csv文件(很长时间)
我想知道是否有一种方法可以并行发送所有请求以减少时间,使用相同的选项将数据保存到csv文件作为数据帧
我多次搜索并找到了多处理器模块,但找不到与我的情况相似的样本
任何人都可以帮助我!!!!!!
答案 0 :(得分:0)
这样的事情怎么样?
import pandas as pd
import json
import urllib.request
import os
from urllib import parse
import csv
import datetime
from multiprocessing import Process, Pool
import time
# Send URL Request & Get JSON Data
with urllib.request.urlopen("https://bittrex.com/api/v1.1/public/getmarketsummaries") as url:
data = json.loads(url.read().decode())
# Select Data from result section
df = pd.DataFrame(data=data['result'])
# tickers = df['MarketName']
tickers= ["BTC-1ST", "BTC-2GIVE", "BTC-ABY", "BTC-ARDR", "BTC-WAVE"]
print(tickers)
def http_get(url):
result = {"url": url, "data": urllib.request.urlopen(url, timeout=5).read()}
return result
urls = [ "https://bittrex.com/Api/v2.0/pub/market/GetTicks?marketName=" + ticker + "&tickInterval=thirtyMin" for ticker in tickers ]
pool = Pool(processes=5)
results = pool.map(http_get, urls)
for result in results:
j = json.loads(result['data'].decode())
df2 = pd.DataFrame(data=j)
Market01 = "Market"
marketName = parse.parse_qs(parse.urlparse(result['url']).query)['marketName'][0]
df2[Market01] = marketName
df2.to_csv('all.csv', encoding="utf-8-sig", index=False, mode='a', header=False)
print("done " + marketName)