我正在尝试检查一行是否已存在。如果没有,则必须在行中写入内容。我的CSV文件始终为空。
# import libraries
import csv
import urllib2
from bs4 import BeautifulSoup
# integer for first article id
articleid = 4449
articles = 4459
while articleid < articles:
# specify the url and article id
url = 'http://www.bkfrem.dk/default.asp?vis=nyheder&id='+str(articleid)
articleid += 1
# query the website and return the html to the variable
page = urllib2.urlopen(url)
# parse the html using beautiful soup and store in variable soup
soup = BeautifulSoup(page, 'html.parser')
# create CSV file
csvfile = csv.writer(open('news.csv', 'a'))
# take out the <div> of name and get its value and text
title_box = soup.find('h1', attrs={'style': 'margin-bottom:0px'})
title = title_box.text.encode('utf-8').strip()
date_box = soup.find('div', attrs={'style': 'font-style:italic; padding-bottom:10px'})
date = date_box.text.encode('utf-8').strip()
articleText_box = soup.find('div', attrs={'class': 'news'})
articleText = articleText_box.text.encode('utf-8').strip()
# print the data (encoded) to the CSV file
with open('news.csv', 'rb') as csvfileO:
f_reader = csv.reader(csvfileO, delimiter=',')
for row in f_reader:
if articleText not in row:
csvfile.writerow(["Title", "Date", "Text"])
csvfile.writerow((title, date, articleText))
我做错了什么,因为它是空的?
答案 0 :(得分:-1)
for row in f_reader:
if articleText not in
csvfile.writerow(["Title", "Date", "Text"])
csvfile.writerow((title, date, articleText))
您有if articleText not in
不是在什么?你应该让它指向要验证的东西。
if articleText not in "Something":
csvfile.writerow(["Title", "Date", "Text"])
csvfile.writerow((title, date, articleText))