我的代码工作正常,但是我不确定为什么当我尝试输出到csv时。当我使用“打印”时,行数是两倍。
我似乎无法解释多余的行是从哪里来的。
这是我保存到csv时的代码
url1 = 'https://yugioh.fandom.com/wiki/Set_Card_Lists:Deck_Build_Pack:_Mystic_Fighters_(OCG-JP)'
output_file1_2 = "DBMF - CardList - tr2.csv" #change this to your own file output
def OutputHTMLFileSummary2(url,html_tag,output_file):
array = []
source = urllib.request.urlopen(url).read()
soup = bs.BeautifulSoup(source, 'html.parser')
f = csv.writer(open(output_file, "w", encoding="utf-8"))
links = soup.find_all(html_tag)
counter = 0.0
for link in links:
counter += 1
if (counter/2) != 0.0:
array.append([f.text.strip().replace("\xa0\n\t", "") for f in link.find_all("td")])
print(counter)
else:
pass
print(array)
for i in range(len(array)):
f.writerow([array[i]])
OutputHTMLFileSummary2(url1,"tr",output_file6)
file = open(output_file6, encoding="utf-8")
reader = csv.reader(file)
lines= len(list(reader))
print(lines)
答案 0 :(得分:0)
似乎可以为我工作。我对打印输出进行了一些调整,但它们的大小相同。
import bs4 as bs
import urllib
import csv
url1 = 'https://yugioh.fandom.com/wiki/Set_Card_Lists:Deck_Build_Pack:_Mystic_Fighters_(OCG-JP)'
output_file = "DBMF - CardList - tr2.csv" #change this to your own file output
def OutputHTMLFileSummary2(url,html_tag,output_file):
array = []
source = urllib.request.urlopen(url).read()
soup = bs.BeautifulSoup(source, 'html.parser')
with open(output_file, "w", encoding="utf-8") as src:
f = csv.writer(src)
links = soup.find_all(html_tag)
counter = 0.0
for link in links:
counter += 1
if (counter/2) != 0.0: # <---- do NOT do this...
array.append([f.text.strip().replace("\xa0\n\t", "") for f in link.find_all("td")])
print(counter)
else:
pass
for idx, item in enumerate(array):
print(f'{idx}: {item}')
#print(array)
for i in range(len(array)):
f.writerow([array[i]])
OutputHTMLFileSummary2(url1,"tr",output_file)
file = open(output_file, encoding="utf-8")
reader = csv.reader(file)
#lines= len(list(reader))
print('\nFrom the file...\n')
for idx, item in enumerate(reader):
print(f'{idx}: {item}')
file.close()