def prodInfo():
from collections import Counter
prodHolder = {}
tempdict = {}
try:
os.chdir(copyProd)
for root, dirs, files in os.walk('.'):
for data in files:
fullpath = os.path.join(root, data)
with open(fullpath, 'rt') as fp:
for info in fp:
info = info.strip()
if info.startswith('prodType'):
info0 = info.split('=')[1]
info0 = info0.replace(';','')
info0 = info0.replace('"','')
if info.startswith('acq'):
info1 = info.split('=')[1]
info1 = info1.replace(';','')
info1 = info1.replace('"','')
if info.startswith('ID_num'):
info2 = info.split('=')[1]
info2 = info2.replace(';','')
info2 = info2.replace('"','')
print info0 + info1 + info2
产生这个结果:
SD Acq645467 356788
SD Acq645467 356788
SD Acq645467 356788
SD Acq645467 356788
SD Acq645467 356788
SD Acq645467 356788
SD Acq645467 356788
SD Acq645467 356788
SD Acq645467 356788
Image Acq645467 356788
Image Acq645467 356788
Image Acq645467 356788
Image Acq645467 356788
SD Acq644869 356849
SD Acq644869 356849
Image Acq644869 356849
SD Acq644247 356851
SD Acq644247 356851
Image Acq644247 356851
我想存储结果,并且能够计算次数' SD'对于每个特定的身份证号码(356788/356849/356851)以及有多少'图像'对于每个身份证号码。
结果如下:
9 - SD / 4 - 356788的图像
2 - SD / 1 - 356849的图像
2 - SD / 1 - 356851的图像
我虽然最好将项目存储在字典中,但无法成功计算值。这是我用来将信息存储在字典中的代码。
prodHolder[info2] = {'SD/Image': info0, 'Acq' : info1}
total_Acq = prodHolder
print prodHolder
结果是:
{' 356788':{' SD / Image':' SD',' Acq':Acq645467'}}。 ..
每次运行该函数时,都会输入一组不同的值,从而产生不同的结果。
答案 0 :(得分:0)
所以这里有两个问题。
我使用csv(逗号分隔值)。 Python有一个很好的模块(csv)
您可以修改代码,同时从文件中读取(就像它已经做的那样),它会将info0
,info1
和info2
写入{{1文件:
.csv
这将创建一个文件def prodInfo():
from collections import Counter
prodHolder = {}
tempdict = {}
try:
os.chdir(copyProd)
for root, dirs, files in os.walk('.'):
for data in files:
fullpath = os.path.join(root, data)
with open(fullpath, 'r') as fp,\
open('./stack59.write.csv', 'w') as fw:
writer = csv.writer(fw)
for info in fp:
# [ . . . ]
# Yadda yadda yadda
print info0 + info1 + info2
writer.writerow([info0, info1, info2])
,如下所示:
stack59.write.csv
为此,itertools.groupby可能适合您的需求。你可能也想看看迭代器做了什么(见this,this和this)
首先,我将数据存储到矩阵中:
SD,Acq645467,356788
SD,Acq645467,356788
SD,Acq645467,356788
[ . . . ]
SD,Acq644247,356851
SD,Acq644247,356851
Image,Acq644247,356851
然后,您可以根据需要对def prodInfo():
from collections import Counter
prodHolder = {}
tempdict = {}
data_matrix = [] # NEW !
try:
os.chdir(copyProd)
for root, dirs, files in os.walk('.'):
for data in files:
# [ . . . ]
# Yadda, yadda, yadda...
print info0 + info1 + info2
data_matrix.append([info0, info1, info2]) # NEW!
进行分组。例如:
data_matrix
哪个输出:
# First, group by picture id (356788, 356849...), which is
# the third column of the data
for group_by_id in itertools.groupby(data_matrix,
lambda x: x[2]):
# Now, within those groups, group by type, the first column
# of the data (SD, Image...)
for group_by_type in itertools.groupby([a for a in group_by_id[1]],
lambda y: y[0]):
print "%s: %s %s" % (group_by_id[0],
len([a for a in group_by_type[1]]),
group_by_type[0])
print ''