我试图首先在值列表中标识一些唯一值,然后从这些唯一值重建原始列表。例如,假设在目录中我有以下数据集
a_test_1.txt a_test_2.txt a_test_3.txt b_test_1.txt b_test_2.txt b_test_3.txt
我想唯一地标识a_和b_。我想我成功了。接下来,我要使用包含a_和b_的列表,然后返回到原始文件列表。这是我使用的代码
import os, fnmatch
def find(pattern, path):
result = []
for root, dirs, files in os.walk(path):
for name in files:
if fnmatch.fnmatch(name, pattern):
result.append(os.path.join(root, name))
return result
ids_train=[]
#base='/data/data_us4/home/plaquestudy/nhm_processing/plaque_CNNSeg/attempt/'
base = 'attempt/'
for path in find('*.txt',base):
ids_train.append(path)
ids2=[]
for idd in ids_train:
ids2.append(idd.split('test')[0])
ids2=list(set(ids2))
# I will do some operations on ids2 here which I am skipping for simplicity
for idd in ids2:
print(idd)
print("now printing resconstructed")
for idd in ids2:
for data in find(idd.strip(base)+'*',base):
print(data,idd.strip(base)+'*')
attempt/a_
attempt/b_
now printing resconstructed
attempt/b_test_1.txt b_*
attempt/b_test_2.txt b_*
attempt/b_test_3.txt b_*
现在我尝试使用绝对路径而不是相对路径
import os, fnmatch
def find(pattern, path):
result = []
for root, dirs, files in os.walk(path):
for name in files:
if fnmatch.fnmatch(name, pattern):
result.append(os.path.join(root, name))
return result
ids_train=[]
base='/data/data_us4/home/plaquestudy/nhm_processing/plaque_CNNSeg/attempt/'
#base = 'attempt/'
for path in find('*.txt',base):
ids_train.append(path)
ids2=[]
for idd in ids_train:
ids2.append(idd.split('test')[0])
ids2=list(set(ids2))
# I will do some operations on ids2 here which I am skipping for simplicity
for idd in ids2:
print(idd)
print("now printing resconstructed")
for idd in ids2:
for data in find(idd.strip(base)+'*',base):
print(data,idd.strip(base)+'*')
/data/data_us4/home/plaquestudy/nhm_processing/plaque_CNNSeg/attempt/b_
/data/data_us4/home/plaquestudy/nhm_processing/plaque_CNNSeg/attempt/a_
now printing resconstructed
/data/data_us4/home/plaquestudy/nhm_processing/plaque_CNNSeg/attempt/b_test_1.txt b*
/data/data_us4/home/plaquestudy/nhm_processing/plaque_CNNSeg/attempt/b_test_2.txt b*
/data/data_us4/home/plaquestudy/nhm_processing/plaque_CNNSeg/attempt/b_test_3.txt b*
/data/data_us4/home/plaquestudy/nhm_processing/plaque_CNNSeg/attempt/a_test_3.txt *
/data/data_us4/home/plaquestudy/nhm_processing/plaque_CNNSeg/attempt/b_test_1.txt *
/data/data_us4/home/plaquestudy/nhm_processing/plaque_CNNSeg/attempt/a_test_2.txt *
/data/data_us4/home/plaquestudy/nhm_processing/plaque_CNNSeg/attempt/b_test_2.txt *
/data/data_us4/home/plaquestudy/nhm_processing/plaque_CNNSeg/attempt/b_test_3.txt *
/data/data_us4/home/plaquestudy/nhm_processing/plaque_CNNSeg/attempt/a_test_1.txt *
在两种情况下,我都无法重建所需的列表
答案 0 :(得分:0)
Strip命令被误解为Strip从字符串的开头和结尾删除了单个字符。需要的是更换。这段代码正常工作
import os, fnmatch
def find(pattern, path):
result = []
for root, dirs, files in os.walk(path):
for name in files:
if fnmatch.fnmatch(name, pattern):
result.append(os.path.join(root, name))
return result
ids_train=[]
#base='/data/data_us4/home/plaquestudy/nhm_processing/plaque_CNNSeg/attempt/'
base = 'attempt/'
for path in find('*.txt',base):
ids_train.append(path)
ids2=[]
for idd in ids_train:
ids2.append(idd.split('test')[0])
ids2=list(set(ids2))
# I will do some operations on ids2 here which I am skipping for simplicity
for idd in ids2:
print(idd)
print("now printing resconstructed")
for idd in ids2:
for data in find(idd.replace(base,'')+'*',base):
print(data,idd.replace(base,'')+'*')
attempt/a_
attempt/b_
now printing resconstructed
attempt/a_test_3.txt a_*
attempt/a_test_2.txt a_*
attempt/a_test_1.txt a_*
attempt/b_test_1.txt b_*
attempt/b_test_2.txt b_*
attempt/b_test_3.txt b_*