所以我想将一些数据压缩到几分钟。我在考虑使用循环时间?要在09:30:00到04:00:00之间为每分钟范围(也就是09:30:00-09:30:59)提取数据,请将其用于某些数学运算,然后将其保存在另一只大熊猫中数据帧。我不知道如何做到这一点,也不知道Google对于时间循环究竟是什么。
#Refrences
from time import *
import urllib.request as web
import pandas as pd
import os
forToday = 'http://netfonds.no/quotes/tradedump.php?csv_format=csv&paper='
def pullToday(exchange,stock):
dateToday = strftime("%Y-%m-%d", localtime())
fileName=('data/'+exchange+'/'+stock+'/'+dateToday+'.txt')
try:
if not os.path.isdir(os.path.dirname(fileName)):
os.makedirs(os.path.dirname(fileName))
except OSError:
print("Something went very wrong. Review the dir creation section")
pageBuffer=web.urlopen(forToday+stock+'.'+exchange)
pageData=pd.read_csv(pageBuffer,usecols=['time','price','quantity'])
for i in pageData.index:
pageData['time'][i]=pd.datetime.strptime(pageData['time'][i],'%Y%m%dT%H%M%S')
pageData['time'][i]-=pd.datetime.strptime(dateToday+"TZ06","%Y-%m-%dTZ%H")
print(pageData)
dataFile = open(fileName,'w')
dataFile.write('#Format: Timestamp;Volume;Low;High;Median\n')
dataFile.close()
pageData.to_csv(fileName,index=False,sep=';',mode='a',header=False)
def getList(fileName):
stockList = []
file = open(fileName+'.txt', 'r').read()
fileByLines = file.split('\n')
for eachLine in fileByLines:
if '#' not in eachLine:
lineByValues = eachLine.split('.')
stockList.append(lineByValues)
return stockList
start_time = time()
stockList = getList('stocks')
#for eachEntry in stockList:
# pullToday(eachEntry[0],eachEntry[1])
pullToday('O','AAPL')
delay=str(round((time()-start_time)))
print('Finished in ' + delay)
我想做什么:
for eachMinute in pageData:
for eachTrade in eachMinute:
avgPriceSum+=quantityOfTrade*priceOfTrade
minuteVolume+=quantityOfTrade
avgPriceSum/=minuteVolume