**注意我修改了原版下面的代码,以显示适用于我需要的代码
下午好,
关于csv数据组合的很多问题,但到目前为止我还没有找到任何可以帮助我解决代码需求的问题。
我有大的固定标题CSV:
1)在12小时内生产。我需要查看一周价值的csv才能合并 2)在2列信息上过滤单个CSV(否则为多行) 3)垂直附加到单个csv'主表'中,其命名约定为“最后一班的日期”
**文件以单独的CSV形式出现。我需要他们加入一个
** FYI - 代码后的数据集(我刚刚为此目的切出了16列数据)
以下是我到目前为止的情况。为这个烂摊子道歉!
import os, csv
import pandas as pd
import io
import glob
from datetime import date
import time
import collections
# Process data and filter #
def ProcessData( data ):
processedData = []
for row in data:
if row[ 15 ] == ( 'OPERATING' ):
outputRow = row[ 0:3 ] + row[ 15:17 ]
processedData.append( outputRow )
return processedData
# Process and write #
def ProcessAndWrite( data, filename ):
processedData = ProcessData( data )
name, ext = os.path.splitext( filename )
outputfilename = name + '_week_combined.csv'
print "writing data to " + str( outputfilename )
with open(outputfilename, 'wb') as csvfile:
writer = csv.writer(csvfile)
for row in processedData:
writer.writerow(row)
# select the correct weeks worth of files #
def filedate( data, datetime ):
root = 'E:\Rs\\'
date_outputfilename_list = []
for file in date_outputfilename_list:
folder, file_name = os.path.split(file[1])
file_date = time.strftime("%y-%m-%d", file[0])
date_name_list.append((file_date, file_name))
date_count_dict = {}
date_name_dict = {}
for date, name in date_name_list:
date_count_dict=collections.defaultdict( int )
date_name_dict.setdefault(date, []).append(name)
import pprint
print("Files with the same date:")
pprint.pprint(date_name_dict)
print('-'*60)
print("Same dates count:")
pprint.pprint(date_count_dict)
# Function #
if __name__ == "__main__":
import sys
path = r'E:\Rs'
filenames = glob.glob(os.path.join(path, '*.csv'))
filenames.sort()
data = []
for filename in filenames:
with open(filename, 'r') as csvfile:
reader = csv.reader(csvfile, delimiter = ',')
header = []
for headerCount in range( 2 ):
header.append(next(reader))
data.extend( [ row for row in reader ] )
if( filedate ):
ProcessAndWrite( data, filename )
data = [ProcessData]
if ( len( data ) > 0 ):
ProcessAndWrite( data, filename )
数据集: position_x,position_y,position_z,start_time,opreason,stage, 标题2,标题2,标题2,标题2,标题2,标题2 649794,4764274,1147,2 / 11/2016 00:00,操作,声音,
修改后的脚本适用于我的目的
import os, csv # Import csv library
import io
import glob
import datetime
import time
import collections
def ProcessData( data ): # Function definition: filter data
processedData = [] # empty process data list
for row in data:
if (row[ 15 ] == 'OPERATING' and row[ 6 ] == 'truck'): # Filter explination
n1=datetime.datetime.strptime(row[3], '%Y-%m-%d %H:%M:%S') # Strip date from timedate for duration calc
n2=datetime.datetime.strptime(row[4], '%Y-%m-%d %H:%M:%S') # Strip date from timedate for duration calc
diff = n2 - n1 # duration calc
outputRow = row[ 0:3 ] + row[ 3:5 ] + [diff.total_seconds()]
processedData.append( outputRow ) # process the last of the list information from the csv and append new file
return processedData # Final Processed data
def ProcessAndWrite( data, filename ): # Function Definition: Write data
processedData = ProcessData( data )
name, ext = os.path.splitext( filename ) # Split the file name from the original to define the output as weeks mastersheet
outputfilename = name + '_week_combined.csv'
print "writing data to " + str( outputfilename ) # Screen output describing file to look for
with open(outputfilename, 'wb') as csvfile: # 'wb' is write binary file
writer = csv.writer(csvfile) # Next line is a hack to put headers in the csv
writer.writerow(['position_x','position_y','position_z','start_time','end_time','model','number','speed','o','stage','duration', 'cummulative_duration'])
for row in processedData:
writer.writerow(row)
if __name__ == "__main__": # Run script directly through python (not imported)
import sys
path = r'E:\\' # Set correct folder location for file merge
filenames = glob.glob(os.path.join(path, '*.csv')) # Select correct files for merge
filenames.sort() # Sort the folder so that the files are in date order to make sure you dont crash the script later
data = [] # Blank data list
def dateFromFilename( name ): # Function to select the correct files from truck speed folder
path,filename = os.path.split(name)
splitName = filename.split('_')
dateStr = splitName[0]
date = datetime.datetime.strptime(dateStr,'%Y-%m-%d') # Split file name date and words
return date # Need to put this is so it returns an actual value!
firstFileDate = None
lastFilename = None
for filename in filenames: # Select file
currentFileDate = dateFromFilename( filename )
if firstFileDate:
diff = currentFileDate - firstFileDate
# somehow convert this to days
if ( diff.days >= 1 ): # Selct the previous 24hrs worth of data
ProcessAndWrite( data, lastFilename ) # Call function to write data
data = []
firstFileDate = currentFileDate
lastFilename = filename
with open(filename, 'r') as csvfile: # For new CSV files
reader = csv.reader(csvfile, delimiter = ',') # read the csv
header = [] # Blank header list (do this to skip the header rows for merge)
for headerCount in range( 3 ): # Start reading from line 3
header.append(next(reader))
data.extend( [ row for row in reader ] ) # extend is to continue the data stacking with the next csv data
if ( len( data ) > 0 ): # If the list of data has data then continue to process and write
ProcessAndWrite( data, filename )