我试图每5分钟从FTP服务器获取一次数据。我需要做一些过程,比如获取高,低值。为此,我将.sec.gz转换为一个csv(csv很适合我识别valuse)。要在一个文件中处理大约4000行。就像一天中每隔5分钟一样。首先我要恢复csv然后我&#39 ; m替换csv中的标题。在这个过程中,一些文件在第二个csv中被不完整的数据所困。
我的代码是:
ip="ip address"
password='password'
file_connect=[]
file_sec=[]
interval=1
starttime=time.time()
for interval in range(1000):
ftp=FTP('ip address')
ftp.login('username',password)
#print "File List:"
files=ftp.dir()
date_filename='c:/DATA_INTV_NEW/'
filematch='C:/DATA_INTV_NEW/*'
filer=ftp.nlst(date_filename)
for files in filer:
if files!='C:/DATA_INTV_NEW/June062014':
data1=files
#print data1
date_a=data1.split('/')
date_b=date_a[2]
#print date_b
day_all=date_b[-6:-4]
year_all=date_b[-4:]
month_all=date_b[0:-6]
monthDict = {'January':'1', 'February':'2', 'March':'3', 'April':'4', 'May':'5', 'June':'6',
'July':'7', 'August':'8', 'September':'9', 'October':'10', 'November':'11', 'December':'12'}
month_all = monthDict[month_all]
Date=day_all+"/"+month_all+"/"+year_all
#print Date
filenames=ftp.nlst(filematch)
#print filenames
mypath1='C:\\test\\'+date_b
#os.makedirs(mypath1)
#if (interval==1):
# mypath1='C:\\test\\'+date_b
#os.makedirs(mypath1)
for file_ in natsorted(filenames):
if ((file_ not in file_connect) & (file_ in filenames)):
if file_.endswith('.sec.gz'):
file_c=file_.split('/')
file_d=file_c[3]
print file_d
Files=date_b+'\\'+file_d
print Files
ftp.retrbinary('RETR C:/DATA_INTV_NEW/'+Files, open(mypath1+'\\'+file_d, 'wb').write)
file_connect.append(file_)
ftp.quit()
#time.sleep(60)
#Setting the path
path = mypath1+'//*.sec.gz'
file_s=glob.glob(path)
#Creating a historical data csv where all the data to be fed
fl=open(mypath1+"//final2.csv","ab")
writer = csv.DictWriter(fl, fieldnames = ["S No", "Instrument Name", "High", "Low", "Open", "Close", "V" ,"Time" , "Date"])
writer.writeheader()
header = ["Instrument Name", "High", "Low", "Open", "Close", "V", "Time", "Date"]
#Setting the start time as 9:15 am
start_time=timedelta(hours=9,minutes=15,seconds=0)
#Real process
#if (files==file_s):
for file_group in natsorted(file_s):
if ((file_group not in file_sec) & (file_group in file_s)):
f = gzip.open(file_group, 'rb')
a=f.read()
def stripped(x):
return "".join([i for i in x if 31 < ord(i) < 127])
b = stripped(a)
if 'OPT' in b:
c=string.split(b, 'OPT')
#Stripped Opt symbol is added again
for line in c:
if line[0:2]!='FUT':
line='OPT'+line
d=line
#Split lines into lines wrt Futures
if 'FUT' in d:
e=string.split(d,'FUT')
#Stripped fut symbol is added again
for line in e:
if line[0:2]!='OPT':
line='FUT'+line
#Separating the fields using Comma
i=0
#Conversion to list
g=list(line)
for g[i] in g:
if (i==5 or i==16 or i==28 or i==39 or i==42 or i==44 or i==55 or i==68 or i==79 or i==92 or i==103 or i==116 or i==127 or i==139 or i==150 or i==161 or i==172 or i==183 or i==194 or i==205 or i==216 or i==228):
g.insert(i+1,",")
i+=1
#Conversion back to string
h="".join(g)
#Writing into .txt or .csv file
num, ext = os.path.basename(file_group).split(".", 1)
output_filename = os.path.join(os.path.dirname(file_group), "%s.csv" % (num,))
fn=open(output_filename,"ab")
fn.write(h + '\n')
fn.close
#Removing Blank & none types
with open(output_filename,"rb") as inf:
next(inf) # skip header row
#Adding header
num1, ext1 = os.path.basename(file_group).split(".", 1)
output_filename1 = os.path.join(os.path.dirname(file_group), "%sa.csv" % (num1,))
f1=open(output_filename1,"ab")
writer = csv.DictWriter(f1, fieldnames = ["Instrument Name", "Symbol", "Expiry Date", "Strike Price", "Option Type","Market Type", "Buy Price", "Buy Quantity", "Sell Price", "Sell Quantity", "Last Traded Price", "Total Traded Quantity", "Average Traded Price", "Open Price", "High Price", "Low Price", "Close Price", "High", "Low", "Open", "Close", "V", "Quant", "Time", "Date"])
writer.writeheader()
#Removing emptylines
for line in inf:
if not line:
empty_lines += 1
continue
#print line
count_c=1
g1=list(line[count_c] for count_c in range(23))
h1="".join(g1)
f1.write(h1)
f1.close #stucking in this part
start_time=start_time+timedelta(seconds=300)
file_sec.append(file_group)
time.sleep(300.0 - ((time.time() - starttime) % 300.0))
请帮助我解决问题。我将显示输出的内容。例如我的可读文件包含内容:
OPT{T
OPTSTK AMBUJACEM 24/12/2014 240 CE N 3.1 2000 3.3 4000 3.25 24000 3.21 4 4 2.9 3.5
OPTSTK BPCL 24/12/2014 820 CE N 6.85 1500 7.35 1500 7 21000 7.42 8.15 8.75 6.95 7.7
OPTSTK DIVISLAB 24/12/2014 1750 CE N 36.4 1500 48.2 375 46.05 0 0 0 0 0 46.05
在目标文件中:
Instrument Name Symbol Expiry Date Strike Price Option Type Market Type Buy Price Buy Quantity Sell Price Sell Quantity Last Traded Price Total Traded Quantity Average Traded Price Open Price High Price Low Price Close Price
OPTSTK AMBUJACEM 24/12/2014 240 CE N 3.1 2000 3.3 4000 3.25 24000 3.21 4 4 2.9 3.5
OPTSTK BPCL 24/12/2014 820 CE N 6.85 1500 7.35 1500 7 21000 7.42 8.15 8.75 6.95 7.7
OPTSTK DIVISLAB #here columns are missing