我有一个文件,其中包含每个项目11行的块。我想迭代每个块并提取块中每行的数据。我这样做了:
file_removed = open("input_removed.txt")
json_result = open("output_json.json", "w+")
datalist = []
while True:
data = {}
name = next(file_removed)
name = re.sub("\n", "", name)
data["name"] = name
familyName = next(file_removed)
familyName = re.sub("\n", "", familyName)
data["familyName"] = familyName
wGuideline = next(file_removed)
wGuideline = re.sub("Watering guidelines\s+","", wGuideline)
wGuideline = re.sub("\n", "", wGuideline)
data["water"] = wGuideline
FerLine = next(file_removed)
FerLine = re.sub("Fertilizer suggestions\s+ ","",FerLine)
FerLine = re.sub("\n", "", FerLine)
data["fertilizer"] = FerLine
MistLine = next(file_removed)
MistLine = re.sub("Mist requirements\s+","",MistLine)
MistLine = re.sub("\n", "", MistLine)
data["mist"] = MistLine
LightLine = next(file_removed)
LightLine = re.sub("Light preferences\s+","", LightLine)
LightLine = re.sub("\n", "", LightLine)
data["light"] = LightLine
TempLine = next(file_removed)
TempLine = re.sub("Temperature preference\s+","",TempLine)
TempLine = re.sub("\n", "", TempLine)
data["temperature"] = TempLine
print(TempLine)
phLine = next(file_removed)
phLine = re.sub("pH range\s+", "", phLine)
phLine = re.sub("\n", "", phLine)
data["ph"] = phLine
AcidLine = next(file_removed)
AcidLine = re.sub("Acidity preference\s+", "",TempLine)
AcidLine = re.sub("\n", "", TempLine)
data["acid"] = AcidLine
ToxicLine = next(file_removed)
ToxicLine = re.sub("Toxicity\s+", "",AcidLine)
ToxicLine = re.sub("\n", "", AcidLine)
data["toxic"] = ToxicLine
ClimateLine = next(file_removed)
ClimateLine = re.sub("Climate\s+", "",ClimateLine)
ClimateLine = re.sub("\n", "", ClimateLine)
data["climate"]= ClimateLine
datalist.append(data)
try:
next(file_removed)
except StopIteration:
break;
您可以看到我实施的打印(TempLine)以检查我的版本是否正常工作。但是在FIRST迭代之后,每个WHILE循环只迭代一行!
有人可以为我解释这种行为吗?
答案 0 :(得分:2)
问题是,next()
块中的最后一个try
会读取下一个块的第一行,但不会捕获它,因此该行会丢失。每次迭代读取12条记录,而不是11条记录,但只处理11条记录。
试试这个(有两个新行和一个更改的行):
import re
file_removed = open("input_removed.txt")
json_result = open("output_json.json", "w+")
datalist = []
name = None # Added
while True:
data = {}
if name is None: # Added
name = next(file_removed)
name = re.sub("\n", "", name)
data["name"] = name
familyName = next(file_removed)
familyName = re.sub("\n", "", familyName)
data["familyName"] = familyName
wGuideline = next(file_removed)
wGuideline = re.sub("Watering guidelines\s+","", wGuideline)
wGuideline = re.sub("\n", "", wGuideline)
data["water"] = wGuideline
FerLine = next(file_removed)
FerLine = re.sub("Fertilizer suggestions\s+ ","",FerLine)
FerLine = re.sub("\n", "", FerLine)
data["fertilizer"] = FerLine
MistLine = next(file_removed)
MistLine = re.sub("Mist requirements\s+","",MistLine)
MistLine = re.sub("\n", "", MistLine)
data["mist"] = MistLine
LightLine = next(file_removed)
LightLine = re.sub("Light preferences\s+","", LightLine)
LightLine = re.sub("\n", "", LightLine)
data["light"] = LightLine
TempLine = next(file_removed)
TempLine = re.sub("Temperature preference\s+","",TempLine)
TempLine = re.sub("\n", "", TempLine)
data["temperature"] = TempLine
print(TempLine)
phLine = next(file_removed)
phLine = re.sub("pH range\s+", "", phLine)
phLine = re.sub("\n", "", phLine)
data["ph"] = phLine
AcidLine = next(file_removed)
AcidLine = re.sub("Acidity preference\s+", "",TempLine)
AcidLine = re.sub("\n", "", TempLine)
data["acid"] = AcidLine
ToxicLine = next(file_removed)
ToxicLine = re.sub("Toxicity\s+", "",AcidLine)
ToxicLine = re.sub("\n", "", AcidLine)
data["toxic"] = ToxicLine
ClimateLine = next(file_removed)
ClimateLine = re.sub("Climate\s+", "",ClimateLine)
ClimateLine = re.sub("\n", "", ClimateLine)
data["climate"]= ClimateLine
datalist.append(data)
try:
name = next(file_removed) # Changed
except StopIteration:
break;
可以对此代码进行其他改进,但进一步的更改会减少直接问题。