Dtypes和记忆

时间:2018-03-18 06:55:20

标签: pandas

您好我正在使用pandas读取表函数并定义所有我的dtypes以尝试提高速度和内存。使用下面的代码仍然会收到以下错误:

  

C:\用户\ gallachj \应用程序数据\本地\连续\ anaconda3 \ lib中\站点包\ IPython的\核心\ interactiveshell.py:2728:   DtypeWarning:列(26)具有混合类型。指定dtype选项   导入或设置low_memory = False。交互=交互性,   compiler = compiler,result = result)

关于为什么会这样做的任何建议?

startTime = datetime.now() start = time.time()

from datetime import datetime

for i in Index: os.chdir(os.path.join(r'''C:\Lifetests''',sns[i]))

if os.path.isfile(sns[i] + '_stitch.txt') and 'Power Meter (W)' and 'Date Time' in open(sns[i] + '.txt').read():` 
    df = pd.read_table(sns[i] + '.txt', sep='\t', header=0, error_bad_lines=False, dtype= {"Sample":"int64", "Date Time":"object","Elapsed Time (hr)":"float64","Power Meter (V)":"float64", "Power Meter (W)":"float64","BRF TEC (V)":"float64","LBO TEC (V)":"float64", "Base Plate Thermistor (ohm)":"float64","Base Plate Temp (C)":"float64","Internal Photodiode (W)":"float64","Internal Photodiode (ADC)":"float64","Current (A)":"float64","Main Temp Drive (V)":"float64","DIO-0 (hex)":"object","DIO-2 (hex)":"object","DIO-4 (hex)":"object","DIO-5 (hex)":"object","Annotation":"object"})`

   `df['Date Time'] = pd.to_datetime(df['Date Time']) 
    Time_sorted = df.sort_values(by=['Date Time']) 
    Time_sorted = Time_sorted.drop_duplicates(subset='Date Time', keep='last')  
    Time_sorted.to_csv(sns[i] + '_timestamp_sort.txt', sep='\t')`


elif os.path.isfile(sns[i] + '_stitch.txt') and '(G18.1)' and 'Date Time' in open(sns[i] + '.txt').read(): 
    df = pd.read_table(sns[i] + '.txt', sep='\t', header=0, error_bad_lines=False, dtype= {"Sample (G18.1)":"int64", "Date Time":"object","Elapsed Time (hr)":"float64","Head Hours (hr)":"float64", "FAP Hours (hr)":"float64","Detector Power (W)":"float64","Controller Power (W)":"float64", "Controller Current (A)":"float64","Resonator Temp (degC)":"float64","BRF Temp (degC)":"float64","SHG Temp (degC)":"float64","THG Temp (degC)":"float64","Etalon Temp (degC)":"float64","Heat Sink Temp (degC)":"float64","Resonator Drive (V)":"float64","BRF Drive (V)":"float64","SHG Drive (V)":"float64","THG Drive (V)":"float64","Etalon Drive (V)":"float64", "Legacy Fault (hex)":"object","Head Fault (hex)":"object","Head Fault Latch (hex)":"object", "A Temp Sense (adc)":"float64","B Temp Sense (adc)":"float64","C Temp Sense (adc)":"float64","D Temp Sense (adc)":"float64","Resonator Temp Sense (adc)":"float64", "Heat Sink Temp Sense (adc)":"float64","Photodiode (adc)":"float64","Microcontroller Temp Sense (adc)":"float64", "Head DAC1 Output (dac)":"float64","Head DAC2 Output (dac)":"float64",\
                       "A Heater PWM (%)":"float64","B Heater PWM (%)":"float64", "C Heater PWM (%)":"float64","D Heater PWM (%)":"float64","Head GPIOA (hex)":"object","Head GPIOB (hex)":"object", "Head GPIOC (hex)":"object","Head GPIOD (hex)":"object","Annotation":"object"})#, dtype ={'Date Time': np.dtype('M')})
    df['Date Time'] = pd.to_datetime(df['Date Time'])
    Time_sorted = df.sort_values(by=['Date Time'])
    Time_sorted = Time_sorted.drop_duplicates(subset='Date Time', keep='last')   
    Time_sorted.to_csv(sns[i] + '_timestamp_sort.txt', sep='\t')


elif os.path.isfile(sns[i] + '_stitch.txt') and '(mW)' and 'Date Time' in open(sns[i] + '.txt').read():    
    df = pd.read_table(sns[i] + '.txt', sep='\t', header=0, error_bad_lines=False, dtype= {"Sample":"int64","Date Time":"object","Elapsed Time (hr)":"float64","Power (V)":"float64","Power (mW)":"float64","Current (A)":"float64","Main Temp (degC)":"float64","BRF Temp (degC)":"float64","SHG Temp (degC)":"float64","THG Temp (degC)":"float64","Main Temp Drive (volt)":"float64","Alt PD (adc)":"float64","UV PD (adc)":"float64","IR PD (adc)":"float64","DIO0 (hex)":"object","DIO2 (hex)":"object","DIO4 (hex)":"object","DIO5 (hex)":"object","Annotation":"object"})#, dtype ={'Date Time': np.dtype('M')})
    df['Date Time'] = pd.to_datetime(df['Date Time'])
    Time_sorted = df.sort_values(by=['Date Time'])
    Time_sorted = Time_sorted.drop_duplicates(subset='Date Time', keep='last')   
    Time_sorted.to_csv(sns[i] + '_timestamp_sort.txt', sep='\t')

else:
    print("{} does not exist ".format(sns[i] + '.txt'))

print(datetime.now() - startTime) print('It took', time.time()-start, 'seconds.')

0 个答案:

没有答案