我试图将十个数据帧合并为一个,我要从九个数据帧中取一列并将其添加到主数据帧中。它适用于两个合并,然后给我这个。
Traceback (most recent call last):
File "C:\Users\name\Desktop\project\test.py", line 34, in <module>
file_checks(path)
File "C:\Users\name\Desktop\project\test.py", line 27, in file_checks
main = pd.merge(main, samp_df[["Date", col]],on="Date", how="left")
File "C:\Users\name\Anaconda3\lib\site-packages\pandas\core\reshape\merge.py", line 48, in merge
return op.get_result()
File "C:\Users\name\Anaconda3\lib\site-packages\pandas\core\reshape\merge.py", line 560, in get_result
concat_axis=0, copy=self.copy)
File "C:\Users\name\Anaconda3\lib\site-packages\pandas\core\internals\managers.py", line 2061, in concatenate_block_managers
concatenate_join_units(join_units, concat_axis, copy=copy),
File "C:\Users\name\Anaconda3\lib\site-packages\pandas\core\internals\concat.py", line 242, in concatenate_join_units
for ju in join_units]
File "C:\Users\name\Anaconda3\lib\site-packages\pandas\core\internals\concat.py", line 242, in <listcomp>
for ju in join_units]
File "C:\Users\name\Anaconda3\lib\site-packages\pandas\core\internals\concat.py", line 225, in get_reindexed_values
fill_value=fill_value)
File "C:\Users\name\Anaconda3\lib\site-packages\pandas\core\algorithms.py", line 1651, in take_nd
out = np.empty(out_shape, dtype=dtype)
MemoryError
这是我的代码:
# import libraries
import pandas as pd
import numpy as np
import datetime
import os.path
#Path to files
path = 'my_path'
#Column types
main_dtype = {"COAST" : "object","EAST" : "object","FAR_WEST" : "object","NORTH" : "object",\
"NORTH_C" : "object","SOUTHERN" : "object","SOUTH_C" : "object","WEST" : "object","ERCOT" : "object"}
def merge_data(path):
# Load the main dataframe
main = pd.read_csv("hist_load.csv", dtype=main_dtype)
main["Date"]=pd.to_datetime(main["Date"]).dt.strftime('%m-%d-%Y')
for file in os.listdir(path):
if file.endswith("sample.csv"): # Loop through the files for files with specific ending
samp_df = pd.read_csv(file) # Load the files and extracts name from header
city_name = samp_df.columns[7][:-10]
samp_df = pd.read_csv(file, header=[1]) # Reloads file and skips header
samp_df["Date"]=pd.to_datetime(samp_df["Date"]).dt.strftime('%m-%d-%Y')
samp_df['Precip'] = samp_df['Precip'].replace('-', 0) #replace missing values
samp_df.rename(columns = {'Precip' : city_name.lower() + '_precip'}, inplace=True) # rename col
col = city_name.lower() + '_precip'
main = pd.merge(main, samp_df[["Date", col]],on="Date", how="left") # Merge dataframes