我一直试图操纵一些代码来查看第3列中的不同uniqueclassindicator
,如果第3列是2
,那么请查看同一uniqueclassindicator
内的上一行从该行检索数据。如果第3列和第6列都是1
,我的输出也应该实现。
from collections import defaultdict
import csv
# you probably can think up better names
fields = ('TitleA', 'TitleB', 'TitleIndicator', 'TitleRNum', 'TitleC', 'TitleD', 'TitlePNum', 'TitleBF', 'TitleCheck')
entries = defaultdict(dict)
with open("exampledata.csv", 'rb') as fd:
reader = csv.DictReader(fd, fields)
for counter, row in enumerate(reader):
if counter != 0:
TitleRNum = int(row['TitleRNum'])
if row['TitlePNum']:
TitlePNum = int(row['TitlePNum'])
else:
TitlePNum = ""
Check = row['TitleCheck']
Name = row['TitleB']
key = (TitleRNum, TitleRNum)
previous = entries[row['TitleIndicator']]
if Check:
# Scenario 1
if (1, 1) in previous:
if (key[0] == 2 and key[1]>=2) or key[1] is None: # If Rank 2 and Position is Anything
if TitleRNum == 2:
p = previous[(2, 1)]
print '{p[TitleB]} {r[TitleB]} {p[TitleRNum]} {r[TitleRNum]} {p[TitlePNum]} {r[TitlePNum]} {p[TitleBF]} {r[TitleBF]} {p[TitleCheck]} {r[TitleCheck]}'.format(p=p, r=row)
# remember this row for later rows to match against.
previous[key] = row
TitleA,TitleB,TitleIndicator,TitleRNum,TitleC,TitleD,TitlePNum,TitleBF,TitleCheck
DataA,Joe,uniqueclassindicator1,1,125,3.659819202,1,984.2,Yes
DataA,Bob,uniqueclassindicator1,2,125,4.212490883,2,994.2,Yes
DataA,Dilon,uniqueclassindicator1,3,125,10.4587985,3,1023.2,Yes
DataA,Jessie,uniqueclassindicator1,4,125,12.68794408,9,,
DataA,Sammy,uniqueclassindicator1,5,125,13.91781792,9,,
DataA,Tommy,uniqueclassindicator1,6,125,15.68662715,9,,
DataA,Bobby,uniqueclassindicator1,7,125,16.45724048,9,,
DataA,Liam,uniqueclassindicator1,8,125,17.80601624,9,,
DataA,George,uniqueclassindicator1,9,125,29.77286312,9,,
DataA,Aaron,uniqueclassindicator1,10,125,34.68115514,9,,
DataA,Tim,uniqueclassindicator2,1,125,4.495485874,3,992.13,E
DataA,Tom,uniqueclassindicator2,2,125,4.899056741,2,951.1,E
DataA,Ryan,uniqueclassindicator2,3,125,6.856404461,1,932.1,E
DataA,Jack,uniqueclassindicator2,4,125,8.773751853,9,,
DataA,Jennifer,uniqueclassindicator2,5,125,9.16233388,9,,
DataA,Sarah,uniqueclassindicator2,6,125,10.16514897,9,,
DataA,Joanne,uniqueclassindicator2,7,125,18.43432845,9,,
DataA,Jess,uniqueclassindicator2,8,125,19.28867787,9,,
DataA,Test1,uniqueclassindicator3,1,125,4.634033328,9,,
DataA,Test2,uniqueclassindicator3,2,125,6.470141577,9,,
DataA,Test3,uniqueclassindicator3,3,125,7.337664044,3,33.59,Yes
DataA,Test4,uniqueclassindicator3,4,125,7.614649866,1,30,D
DataA,Test5,uniqueclassindicator3,5,125,10.02454642,9,,
DataA,Test6,uniqueclassindicator3,6,125,11.95552344,9,,
DataA,Test7,uniqueclassindicator3,7,125,14.36021059,2,33.58,E
DataA,Test8,uniqueclassindicator3,8,125,14.73472778,9,,
DataA,Test9,uniqueclassindicator3,9,125,24.30844993,9,,
DataA,Mark,uniqueclassindicator4,1,125,4.634033328,1,395.1,F
DataA,Jason,uniqueclassindicator4,2,125,6.470141577,9,,
DataA,Dan,uniqueclassindicator4,3,125,7.337664044,9,,
DataA,Tessa,uniqueclassindicator4,4,125,7.614649866,2,395.2,F
DataA,Laura,uniqueclassindicator4,5,125,10.02454642,9,,
DataA,Lizz,uniqueclassindicator4,6,125,11.95552344,9,,
DataA,Hannah,uniqueclassindicator4,7,125,14.36021059,3,395.3,F
DataA,Ian,uniqueclassindicator4,8,125,14.73472778,9,,
DataA,Janet,uniqueclassindicator4,9,125,24.30844993,9,,
Joe,Bob,1,2,1,2,984.2,994.2,Yes,Yes
Mark,Jason,1,2,1,9,295.1,,F,,
因此,为了澄清TitleIndicator / Uniqueclassindicator定义的每个组,如果第6列和第3列都等于1,我希望能够从该组的前两行中提取数据。
如果有人可以告诉我如何修复此代码,我们将不胜感激。 非常感谢SMNALLY
答案 0 :(得分:5)
嗯,竞争已经结束,但我仍然想提出解决方案。以下是详细的评论答案:
# Import "csv.DictReader" and put it in the name "dr".
from csv import DictReader as dr
# These are the columns we will be working with.
cols = "TitleB", "TitleRNum", "TitlePNum", "TitleBF", "TitleCheck"
# This is a variable to hold a previous row for future processing.
# It severs the same purpose as the "entries" dict in Sudipta Chatterjee's answer.
# I set it to 0 simply so its value is false. You could also set it to "False" or "None".
mark = 0
# Open the CSV file in binary mode.
with open("exampledata.csv", "rb") as f:
# This loops through what is returned by "DictReader".
#
# The expression "f.readline().strip().split(",")" reads the first line of the file,
# (which is the column names), strips off the newline at the end,
# and then gets the column names by splitting the line on commas.
for row in dr(f, f.readline().strip().split(",")):
# This checks if "mark" is true.
# If it is, then that means "mark" contains a previous row to be processed.
if mark:
# This line takes the row stored in "mark" as well as the current row
# and puts them together, separating the values with commas using "str.join".
print ",".join([",".join([mark[c], row[c]]) for c in cols])
# This is a compact statement equivalent to:
#
# if row["TitlePNum"] == row["TitleRNum"] == "1":
# mark = row
# else:
# mark = 0
#
# It sees if the "TitlePNum" and "TitleRNum" columns in the current row are both "1".
# If so, it saves that row in "mark" for future processing.
#
# It is basically the same thing as the
#
# if TitleRNum == 1 and TitlePNum == 1:
# entries[row['TitleIndicator']] = row
#
# part in Sudipta Chatterjee's answer.
mark = row if row["TitlePNum"]==row["TitleRNum"]=="1" else 0
以下是答案的正常表达方式:
from csv import DictReader as dr
cols = "TitleB", "TitleRNum", "TitlePNum", "TitleBF", "TitleCheck"
mark = 0
with open("exampledata.csv", "rb") as f:
for row in dr(f, f.readline().strip().split(",")):
if mark:
print ",".join([",".join([mark[c], row[c]]) for c in cols])
mark = row if row["TitlePNum"]==row["TitleRNum"]=="1" else 0
输出:
Joe,Bob,1,2,1,2,984.2,994.2,Yes,Yes
Mark,Jason,1,2,1,9,395.1,,F,
正如您所看到的,我的解决方案更小,效率更高。
答案 1 :(得分:2)
这有效:
from collections import defaultdict
import csv
# you probably can think up better names
fields = ('TitleA', 'TitleB', 'TitleIndicator', 'TitleRNum', 'TitleC', 'TitleD', 'TitlePNum', 'TitleBF', 'TitleCheck')
entries = defaultdict(dict)
with open("exampledata.csv", 'rb') as fd:
reader = csv.DictReader(fd, fields)
for counter, row in enumerate(reader):
if counter != 0:
TitleRNum = int(row['TitleRNum'])
# If this row has a TitlePNum, keep it, otherwise reset to -1
TitlePNum = -1
if row['TitlePNum']:
TitlePNum = int(row['TitlePNum'])
# If we have already seen a row with the same class
# that has 1 at both RNum and PNum,
# use that to print locally
if row['TitleIndicator'] in entries:
previousRow = entries[row['TitleIndicator']]
currentRow = row
itemsToPrint = ['TitleB', 'TitleRNum', 'TitlePNum', 'TitleBF', 'TitleCheck']
output = ""
for item in itemsToPrint:
output += previousRow[item] + ',' + currentRow[item] + ','
# Finally, strip the last comma and print
output = output[:-1]
print output
# Remove the previous entry from the dict
del entries[row['TitleIndicator']]
# If both RNum and PNum are 1, then save this as a candidate for future reference
if TitleRNum == 1 and TitlePNum == 1:
entries[row['TitleIndicator']] = row
答案 2 :(得分:2)
你在帖子的标题中使用了“键” - 所以我在这里提供了一个字典的解决方案:)哇(!)能够做到这一点感觉很好。
from csv import DictReader
# 1) read in the data and store it row-wise in the list 'data'
data, numclasses = [], []
with open("exampledata.csv", 'rb') as fd:
reader = DictReader(fd)
for counter, row in enumerate(reader):
data.append(row)
numclasses.append(row['TitleIndicator'][-1])
numclasses = len(list(set(numclasses))) # returns unique no. of classes
# 2) group data in a dictionary where each key uniquely corresponds to a class
datagrouped = {"class%s"%(i + 1): [] for i in range(numclasses)}
for row in data:
classID = row['TitleIndicator'][-1]
datagrouped["class%s"%classID].append(row)
# 3) go through each class within the dictionary, then go through the data
# within the class (row-wise), and print out rows that meet requirements.
for classname in datagrouped.keys(): # class loop
uniq_class = datagrouped[classname]
for i, row in enumerate(uniq_class): # row loop
if i > 0:
cond1 = row['TitleRNum'] == '2'
prev_row = uniq_class[i - 1]
cond2 = prev_row['TitleRNum'] == '1' and prev_row['TitlePNum'] == '1'
if cond1 & cond2:
print ["%s"%x for x in prev_row.itervalues()]
print ["%s"%x for x in row.itervalues()]
当我在同一目录中使用exampledata.csv
运行时,我得到以下输出:
['DataA', 'Mark', '125', '4.634033328', '1', '395.1', '1', 'F', 'uniqueclassindicator4']
['DataA', 'Jason', '125', '6.470141577', '2', '', '9', '', 'uniqueclassindicator4']
['DataA', 'Joe', '125', '3.659819202', '1', '984.2', '1', 'Yes', 'uniqueclassindicator1']
['DataA', 'Bob', '125', '4.212490883', '2', '994.2', '2', 'Yes', 'uniqueclassindicator1']