Python大熊猫发现中间50%

时间:2014-11-02 18:00:39

标签: python pandas web-scraping stocks

我使用python和pandas来处理股票价格数据,我想将其压缩到当天的总交易量,高,低,平均,交易量的25%,75交易量的百分比。我不确定如何找到25%和75%的水平所在。

#Refrences
from time import *
import urllib.request as web
import pandas as pd
import os

dateToday = "2014-10-31"

def pullData(exchange,stock,date):
    baseUrl='http://netfonds.no/quotes/tradedump.php?csv_format=csv'
    fullUrl=baseUrl+'&date='+date.replace("-","")+'&paper='+stock+'.'+exchange
    fileName=('netfonds/trades/'+stock+'.txt')
    try:
        if not os.path.isdir(os.path.dirname(fileName)):
            os.makedirs(os.path.dirname(fileName))
    except OSError:
        print("Directory Error")
    #print(fullUrl)    
    webBuffer=web.urlopen(fullUrl)
    webData=pd.read_csv(webBuffer,usecols=['price','quantity'])
    low = webData['price'].min()
    high = webData['price'].max()
    print(low,high)


def getList(fileName):
    stockList = []
    file = open(fileName+'.txt', 'r').read()
    fileByLines = file.split('\n')
    for eachLine in fileByLines:
        if '#' not in eachLine:
            lineByValues = eachLine.split('.')
            stockList.append(lineByValues)
    return stockList

def fromList():
    print("Parsing stock tickers...")
    stockList = getList('stocks')
    print("Found "+str(len(stockList))+" stocks")

    for eachEntry in stockList:
        start_time = time()
        try:
            print("Attempting to pull data for "+eachEntry[1])
            pullData(eachEntry[0],eachEntry[1],dateToday)
            print("Pulled succcessfully in "+str(round(time()-start_time))+" seconds")
        except Exception:
            print("Unable to pull data... "+eachEntry[1])

first_time = time()
fromList()
print("Program Finished! Took "+str(round((time()-first_time)/60))+' minutes')

2 个答案:

答案 0 :(得分:1)

pandas Series和DataFrame有一个describe方法,类似于R的摘要:

In [3]: import numpy as np

In [4]: import pandas as pd

In [5]: s = series.values()

In [6]: s.describe()
Out[6]: 
count    100.000000
mean       0.540376
std        0.296250
min        0.002514
25%        0.268722
50%        0.593436
75%        0.831067
max        0.991971

答案 1 :(得分:0)

我只需使用numpy.repeat()即可找到所需内容。

inflated=pd.DataFrame(np.repeat(webData['price'].values,webData['quantity'].values))