python中的多处理(从for循环到for循环的多处理)

时间:2016-02-27 20:33:16

标签: python multiprocessing

我有一个有效的脚本。它有一个for循环,可以通过结合多处理来提高速度。

没有多处理的代码如下:

    Symbol= Symbol[0:]   #slicing to coose which stocks to look at
    ################################for loop
    for item in Symbol:
        print item
        try:
            serious=web.DataReader([item], 'yahoo', start, end)['Adj Close']
            serious2=serious.loc[:, item].tolist()   #extract the column of 'Adj Close' 
            tickerlistori.append(item)
            valuemax = max(serious2)
            indexmax = serious2.index(max(serious2))
            valuemin = min(serious2)
            indexmin = serious2.index(min(serious2))         
            pricecurrent = serious2[-1]
            if valuemax>30 and valuemin<2 and pricecurrent<2.5:
                tickerlist.append(item)
                maxpricelist.append(valuemax)
                minpricelist.append(valuemin)
        except RemoteDataError: 
            pass
print tickerlist

下面的第二个代码块是“并行处理”

    Symbol= Symbol[0:]   #slicing to coose which stocks to look at
    ############ multi processing before the for loop
    def search1(Symbol):

        for item in Symbol:
            print item  #trying to see why the tickers are messed up
            try:
                serious=web.DataReader([item], 'yahoo', start, end)['Adj Close']
                serious2=serious.loc[:, item].tolist()   #extract the column of 'Adj Close' 
                tickerlistori.append(item)
                valuemax = max(serious2)
                indexmax = serious2.index(max(serious2))

                valuemin = min(serious2)
                indexmin = serious2.index(min(serious2))         


                pricecurrent = serious2[-1]

                if valuemax>30 and valuemin<2 and pricecurrent<2.5:
                    tickerlist.append(item)
                    maxpricelist.append(valuemax)
                    minpricelist.append(valuemin)
            except RemoteDataError: 
                pass


    pool = Pool(processes=4) 
    tickerlist = pool.map(search1, Symbol)
print tickerlist

第一个工作正常,但第二个,虽然代码确实运行没有错误,但是输入pool.map(search1, Symbol)的符号似乎不正确。

提前致谢。

(符号应该是股票代码清单)

---------------在做出改变之后tdelaney建议

import matplotlib.pyplot as plt
import csv
import pandas as pd
import datetime
import pandas.io.data as web
from pandas.io.data import DataReader, SymbolWarning, RemoteDataError
from filesortfunct import filesort
from scipy import stats
from scipy.stats.stats import pearsonr
import numpy as np
import math
from multiprocessing import Pool
import warnings
warnings.filterwarnings("ignore")


#decide the two dates between which to look at stock prices
start = datetime.datetime.strptime('2/10/2015', '%m/%d/%Y')
end = datetime.datetime.strptime('2/25/2016', '%m/%d/%Y')

#intended to collect indeces and min/max prices
#global tickerlist, maxpricelist, minpricelist, tickerlistori
tickerlistori=[]    #list of stocks available from google finance
tickerlist=[]      
maxpricelist = []
minpricelist =[]


datanamelist= ['NYSE.csv']#,'NASDAQ.csv','AMEX.csv']
for each in datanamelist:


    #print each   #print out which stock exchange is being looked at
    dataname= each  #csv file from which to extract stock tickers
    new = 'new'


    df = pd.read_csv(dataname, sep=',')
    df = df[['Symbol']]

    df.to_csv(new+dataname, sep=',', index=False)

    x=open(new+dataname,'rb')    #convert it into a form more managable
    f = csv.reader(x) # csv is binary

    Symbol = zip(*f) 

    #print type(Symbol)   #list format

    Symbol=Symbol[0]   #pick out the first column

   # Symbol = Symbol[1:len(Symbol)]  #remove the first row "symbol" header
    Symbol = Symbol[3210:len(Symbol)] 


    Symbol= Symbol[0:]   #slicing to coose which stocks to look at
    #print Symbol


    def search1(item):
        print item  #trying to see why the tickers are messed up
        try:
            serious=web.DataReader([item], 'yahoo', start, end)['Adj Close']
            serious2=serious.loc[:, item].tolist()   #extract the column of 'Adj Close' 
            valuemax = max(serious2)
            indexmax = serious2.index(max(serious2))
            valuemin = min(serious2)
            indexmin = serious2.index(min(serious2))         
            pricecurrent = serious2[-1]

            if valuemax>30 and valuemin<2 and pricecurrent<2.5:
                return item, valuemax, valuemin
        except RemoteDataError: 
            pass


    pool = Pool(processes=4) 
    pool.start()
    for result in pool.map(search1, Symbol):

        if result:
            tickerlist.append(result[0])
            maxpricelist.append(result[1])
            minpricelist.append(result[2])

print tickerlist

1 个答案:

答案 0 :(得分:3)

你有几个问题:

  • map将枚举Symbol并为每个人运行工作程序。工作者不需要在for循环中再次枚举它
  • 您更新全局列表...但这些列表对子流程是全局的。父母永远不会看到他们

这是更新

Symbol= Symbol[0:]   #slicing to coose which stocks to look at
############ multi processing before the for loop
def search1(item):
    print item  #trying to see why the tickers are messed up
    try:
        serious=web.DataReader([item], 'yahoo', start, end)['Adj Close']
        serious2=serious.loc[:, item].tolist()   #extract the column of 'Adj Close' 
        valuemax = max(serious2)
        indexmax = serious2.index(max(serious2))
        valuemin = min(serious2)
        indexmin = serious2.index(min(serious2))         
        pricecurrent = serious2[-1]

        if valuemax>30 and valuemin<2 and pricecurrent<2.5:
            return item, valuemax, valuemin
    except RemoteDataError: 
        pass


pool = Pool(processes=4) 
for result in pool.map(search1, Symbol):
    if result:
            tickerlist.append(result[0])
            maxpricelist.append(result[1])
            minpricelist.append(result[2])
print tickerlist