inp_file=os.getcwd()
files_comp = pd.read_csv(inp_file,"B00234*.csv", na_values = missing_values, nrows=10)
for f in files_comp:
df_calculated = pd.read_csv(f, na_values = missing_values, nrows=10)
col_length=len(df.columns)-1
您好,我如何在一个循环中读取4个csv文件。在读取上述格式的CSV时出现错误。请帮助我
答案 0 :(得分:0)
您基本上需要这个:
files=os.listdir(path)
,然后仅保留以模式开头并以.csv
结尾的文件名。
您也可以使用正则表达式(通过导入re
库以提高其复杂性,或使用glob.glob
)来改进它。 filesnames = os.listdir(path)
filesnames = [f for f in filesnames if (f.startswith("B00234") and f.lower().endswith(".csv"))]
dfs = list()
for filename in filesnames:
df = pd.read_csv(filename)
dfs.append(df)
我们将首先制作一些伪数据,然后将其保存到一些.csv
和.txt
文件中。其中一些.csv
文件将以"B00234"
开头,而另一些文件则不会。我们将把虚拟数据写入这些文件。然后有选择地仅将.csv
文件读入数据帧列表 dfs
。
import pandas as pd
from IPython.display import display
# Define Temporary Output Folder
path = './temp_output'
# Clean Temporary Output Folder
import shutil
reset = True
if os.path.exists(path) and reset:
shutil.rmtree(path, ignore_errors=True)
# Create Content
df0 = pd.DataFrame(np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]),
columns=['a', 'b', 'c'])
display(df0)
# Make Path
import os
if not os.path.exists(path):
os.makedirs(path)
else:
print('Path Exists: {}'.format(path))
# Make Filenames
filenames = list()
for i in range(10):
if i<5:
# Create Files starting with "B00234"
filenames.append("B00234_{}.csv".format(i))
filenames.append("B00234_{}.txt".format(i))
else:
# Create Files starting with "B00678"
filenames.append("B00678_{}.csv".format(i))
filenames.append("B00678_{}.txt".format(i))
# Create files
# Make files with extensions: .csv and .txt
# and file names starting
# with and without: "B00234"
for filename in filenames:
fpath = path + '/' + filename
if filename.lower().endswith(".csv"):
df0.to_csv(fpath, index=False)
else:
with open(fpath, 'w') as f:
f.write(df0.to_string())
# Get list of target files
files = os.listdir(path)
files = [f for f in files if (f.startswith("B00234") and f.lower().endswith(".csv"))]
print('\nList of target files: \n\t{}\n'.format(files))
# Read each csv file into a dataframe
dfs = list() # a list of dataframes
for csvfile in files:
fpath = path + '/' + csvfile
print("Reading file: {}".format(csvfile))
df = pd.read_csv(fpath)
dfs.append(df)
列表dfs
应该包含五个元素,每个元素都是从文件中读取的数据帧。
输出量:
a b c
0 1 2 3
1 4 5 6
2 7 8 9
List of target files:
['B00234_3.csv', 'B00234_4.csv', 'B00234_0.csv', 'B00234_2.csv', 'B00234_1.csv']
Reading file: B00234_3.csv
Reading file: B00234_4.csv
Reading file: B00234_0.csv
Reading file: B00234_2.csv
Reading file: B00234_1.csv