我试图从网址抓取一些数据并将其保存到Windows中的CSV文件中。该文件在Ubuntu中运行完美,甚至在某些Windows机器上运行,但不在我的机器上运行。这是我的代码。
from bs4
import BeautifulSoup
import requests#
import csv#
import pandas as pd
import csv
import time
import random
url = 'some url'
print(url)
headers = {
'Connection': 'keep-alive',
'Upgrade-Insecure-Requests': '1',
'User-Agent': random.choice(user_agent_list),
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'Accept-Encoding': 'gzip, deflate, sdch',
'Accept-Language': 'en-US,en;q=0.8'
}
r = requests.get(url, headers = headers)
data = r.text
soup = BeautifulSoup(data, "html.parser")
try:
tagline = soup.select("div.clp-lead__headline")[0].text
except:
tagline = ''#
print(tagline)
try:
rating = soup.select(".tooltip-container.tooltip--rate-count-container")[0].select('span')[0].text# print(rating)
except:
rating = ''
try:
what_you_learn = soup.select(".what-you-get")[0]# print(what_you_learn)
except:
what_you_learn = ''
try:
includes_things = soup.select(".incentives")[0].select(".clp-component-render")[0]# print(len(includes_things))# print(includes_things.text)
except:
includes_things = ''
try:
description = soup.select(".description__title")[0].parent# print(description)
except:
description = ''
return tagline, rating, what_you_learn, includes_things, description
with open('some.csv', newline = '', encoding = "utf8") as csvfile:
courses = csv.reader(csvfile, delimiter = ',')
index = 0
for row in courses:
index += 1
if index < 3500: #print(index)
continue
if index > 4000:
break
url = row[3]
if len(url) < 4:
continue# print(url)
tagline, rating, what_you_learn, includes_things, description = scape_data(url)
with open('u_courses.csv', 'a', encoding = 'utf-8') as csvfile2:
fieldnames = ['url', 'tagline', 'rating', 'what_you_learn', 'includes_things', 'description']
writer = csv.DictWriter(csvfile, fieldnames = fieldnames)# print(description)
writer = csv.writer(csvfile2)
writer.writerow([url, tagline, rating, what_you_learn, includes_things, description])
time.sleep(3)
以下是我在PowerShell上运行时遇到的错误:
OSError: [Errno 9] Bad file descriptor
During handling of the above exception, another exception occurred:
Traceback(most recent call last):
File ".\some.py", line 117, in < module >
writer.writerow([url, tagline, rating, what_you_learn, includes_things, description])
OSError: [Errno 9] Bad file descriptor
我正在运行Windows 10,Python 3.6.5和Python在我的系统上运行完美。此外,我还从代码中删除用户代理列表,该代码只是一个浏览器列表。
我可能在哪里出错?