我试图获得for fn in dirs:
以下的第一个for循环来迭代目录中的文件列表并合并具有相似文件名的文件。循环似乎只执行一次,而不是dirs = os.listdir(path)
中列出的所有文件。我尝试了其他帖子中提到的几件事,但没有任何效果。
你能帮助我理解为什么这个for循环不是迭代的吗?
import pandas as pd
import os
import glob
path = r"C:\Users\St\Documents\House\m2"
dirs = os.listdir(path) #list of .csv files
for fn in dirs:
for fn in glob.glob(os.path.join(path, '*2.csv')):
W = fn
if len(W) == 48: # some filenames are one character shorter
root2 = W[:43]
else:
root2 = W[:42]
for fn in glob.glob(os.path.join(path, '*J.csv')):
J = fn
if len(J) == 48:
jroot = J[:43]
else:
jroot = J[:42]
for fn in glob.glob(os.path.join(path, '*T.csv')):
T = fn
if len(T) == 48:
troot = T[:43]
else:
troot = T[:42]
for fn in glob.glob(os.path.join(path, '*S.csv')):
S = fn
if len(S) == 48:
sroot = S[:43]
else:
sroot = S[:42]
for fn in glob.glob(os.path.join(path, '*D.csv')):
D = fn
if len(D) == 48:
droot = D[:43]
else:
droot = D[:42]
for fn in glob.glob(os.path.join(path, '*R.csv')):
R = fn
if len(R) == 48:
rroot = R[:43]
else:
rroot = R[:42]
WW = pd.read_csv(W) #read csv so I can use pd.merge
JJ = pd.read_csv(J)
TT = pd.read_csv(T)
SS = pd.read_csv(S)
DD = pd.read_csv(D)
RR = pd.read_csv(R)
if jroot == root2: #merge files with names that start with same characters
JW = pd.merge(WW, JJ, on="JN")
if jroot == troot:
TW = pd.merge(JW, TT, on="TN")
if jroot == sroot:
SW = pd.merge(TW, SS, on="SN")
if jroot == droot:
DW = pd.merge(SW, DD, on="DN")
if jroot == rroot:
RW = pd.merge(RR, DW, on="HN")
data = RW
data.to_csv(jroot + "x.csv") #export final merged dataframe to .csv
一个正确合并的数据帧将返回该文件夹。我期待数百人。