我正在尝试抓取网页,部分代码如下所示。但是我得到了错误。非常感谢你的帮助。 我正在尝试抓取网页,部分代码如下所示。但是我得到了错误。非常感谢您的帮助。
# -*- coding: utf-8 -*-
import scrapy
from scrapy import cmdline
import re
import pandas as pd
import os
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.wait import WebDriverWait
# browser = webdriver.Chrome(executable_path='/Users/zhen/Downloads/chromedriver')#声明浏览器对象
class GofundSpider(scrapy.Spider):
name = 'gofund'
if __name__ == '__main__':
cmdline.execute('scrapy crawl gofund'.split())
Traceback (most recent call last):
File "gofund.py", line 111, in <module>
cmdline.execute('scrapy crawl gofund'.split())
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/scrapy/cmdline.py", line 113, in execute
settings = get_project_settings()
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/scrapy/utils/project.py", line 69, in get_project_settings
settings.setmodule(settings_module_path, priority='project')
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/scrapy/settings/__init__.py", line 287, in setmodule
module = import_module(module)
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/importlib/__init__.py", line 127, in import_module
return _bootstrap._gcd_import(name[level:], package, level)
File "<frozen importlib._bootstrap>", line 1014, in _gcd_import
File "<frozen importlib._bootstrap>", line 991, in _find_and_load
File "<frozen importlib._bootstrap>", line 961, in _find_and_load_unlocked
File "<frozen importlib._bootstrap>", line 219, in _call_with_frames_removed
File "<frozen importlib._bootstrap>", line 1014, in _gcd_import
File "<frozen importlib._bootstrap>", line 991, in _find_and_load
File "<frozen importlib._bootstrap>", line 973, in _find_and_load_unlocked
ModuleNotFoundError: No module named 'gofundme'
答案 0 :(得分:1)
将Spider作为脚本运行的更好方法
import scrapy
from scrapy.crawler import CrawlerProcess
class GofundSpider(scrapy.Spider):
# Your spider Here
...
process = CrawlerProcess(settings={
'FEED_FORMAT': 'csv',
#all your settings here
})
process.crawl(GofundSpider)
process.start()