所以我在python中创建了这个web scraper,我正在使用Anaconda在Pycharm中构建它。我的脚本工作正常,但不知怎的,我觉得我在计算机上弄乱了一些东西,现在我收到了这个错误。
ImportError:无法导入名称viewkeys
此外,我似乎无法将模块“mechanize”安装到anaconda目录中。
以下是我的代码。如果有人能帮助解决这个问题,那就太好了!
from bs4 import BeautifulSoup
import urllib
import numpy as np
#import selenium.webdriver as webdriver
import urllib
import requests
import mechanize
import urlparse
import lxml
import sys
import re
def get_Sample_URL(Sample_Name):
br = mechanize.Browser()
br.open("https://www.lpi.usra.edu/meteor/")
"SELECTING THE FORM"
br.select_form(nr=0)
# Entering in the sample name
br.form['sea'] = '%s' %Sample_Name
br.submit()
Link_List = list()
for i in br.links():
# Append the Base URL and the URL of the Sample
new_url = urlparse.urljoin(i.base_url, i.url)
# Insert URL's into a list
Link_List.append(new_url)
br.close()
if len(Link_List)>9999999999:
# print len(Link_List
print ("Can't find data for: "), Sample_Name, "Perhaps try searching this one manually"
#Manual_Search_File = np.loadtxt("/Users/edenmolina/Desktop/Metiorite_Manual_Search.txt", dtype='string', unpack=True
Sample_URL = 0
return Sample_URL
#9 is the mumber of links for a sample that is not in the online database#
elif len(Link_List)<=9:
#print len(Link_List)
#print ("No results found on database for: "), Sample_Name
Sample_URL = 0
return Sample_URL
else:
#print len(Link_List)
#print len(Link_List), Sample_Name
Sample_URL = Link_List[-4]
return Sample_URL
"""Get the Classification"""
def get_Classification(URL):
source = urllib.urlopen("%s"%URL).read()
soup = BeautifulSoup(source, "html.parser")
Classification_List = []
for k, td in enumerate(soup.find_all("td", {'class', 'inside'})):
Classification = td.find_all("b")
Classification_List.append(Classification)
#print Classification_List[3]
print (Classification_List[3][1].text)
"Define a fucntion that get the name, weathering, and shock stage of the sample"
def get_Name_Weathering_Shock(url):
"Get the source code from the website"
source = urllib.urlopen("%s" % url).read()
# Convert the data to a Beautiful Soup object
soup = BeautifulSoup(source, "html.parser")
"""
Print out the title of the webpage"""
#print soup.title.string
""""Getting the name"""
Name_List = []
for i, td in enumerate(soup.find_all("th", {'class', 'insidehead'})):
Name = td.find_all("b")
Name_List.append(Name)
print ("Name Check: ", Name_List[0][0].text)
# Get the data in the td subsections from the website
data = soup.find_all('tr')
# Getting the website data
Website_Data = list()
for tr in data:
td = tr.find_all('td')
row = [i.text for i in td]
Website_Data.append(row)
Weathering_Grade = [w for w in Website_Data if "Weathering grade:" in w]
Shock_Stage = [s for s in Website_Data if "Shock stage:" in s]
#Prints out the weathering and shock stage of the sample
try:
print (Weathering_Grade[1][1])
np.savetxt("/Users/edenmolina/Desktop/Meteorite Data/%s.txt" % (Name[0][0].text), Weathering_Grade[1][1])
except:
print ("No Weathering")
try:
print (Shock_Stage[1][1])
except:
print ("No Shock Stage")
def get_Info(url, weatheringOrshock):
"Get source code of website"
source = urllib.urlopen("%s" % url).read()
# Convert the data to a Beautiful Soup object
soup = BeautifulSoup(source, "html.parser")
data = soup.find_all('tr')
data = list()
for tr in data:
td = tr.find_all('td')
row = [i.text for i in td]
data.append(row)
information = [w for w in data if "%s" %weatheringOrshock in w]
try:
print (information[1][1])
return information[1][1]
except:
print ("No %s" %weatheringOrshock)
#get_SampleData("https://www.lpi.usra.edu/meteor/metbull.php?sea=NWA+001&sfor=names&ants=&falls=&valids=&stype=contains&lrec=50&map=ge&browse=&country=All&srt=name&categ=All&mblist=All&rect=&phot=&snew=0&pnt=Normal%20table&code=17011")
#AllData("NWA 002")
#LOAD THE SAMPLE NAMES FROM A TEXT FILE#
SampleNames_Text = np.loadtxt("/Users/edenmolina/Desktop/MetioriteNames.txt",delimiter="\n", dtype=np.str)
Number_of_Loops = len(SampleNames_Text)
"""FOR SAVING THE DATA"""
#Iterates through each of the samples in the text file and outputs the name, weathering, and the shock stage (if applicable)
for i in range(Number_of_Loops):
print (SampleNames_Text[i])
Sample_URL = get_Sample_URL("%s" % SampleNames_Text[i])
if Sample_URL == 0 or len(Sample_URL) < 80.0:
print ("")
elif len(Sample_URL)<80.0:
print ("Try Searching This Manually")
else:
Weathering = get_Info(Sample_URL, "Weathering grade:")
Shock = get_Info(Sample_URL, "Shock stage:")
Classification = get_Classification(Sample_URL)
URL = get_Sample_URL("%s" %SampleNames_Text[i])
print ("\n")
错误的堆栈跟踪
Traceback (most recent call last):
File "/Users/edenmolina/PycharmProjects/Meteorite/DataBase_Data_Extractor_V3.py", line 163, in <module>
Sample_URL = get_Sample_URL("%s" % SampleNames_Text[i])
File "/Users/edenmolina/PycharmProjects/Meteorite/DataBase_Data_Extractor_V3.py", line 34, in get_Sample_URL
br.select_form(nr=0)
File "/Users/edenmolina/Library/Python/2.7/lib/python/site-packages/mechanize/_mechanize.py", line 619, in select_form
global_form = self._factory.global_form
File "/Users/edenmolina/Library/Python/2.7/lib/python/site-packages/mechanize/_html.py", line 260, in global_form
self.forms()
File "/Users/edenmolina/Library/Python/2.7/lib/python/site-packages/mechanize/_html.py", line 267, in forms
self._current_forms, self._current_global_form = self._get_forms()
File "/Users/edenmolina/Library/Python/2.7/lib/python/site-packages/mechanize/_html.py", line 282, in _get_forms
if self.root is None:
File "/Users/edenmolina/Library/Python/2.7/lib/python/site-packages/mechanize/_html.py", line 247, in root
response, verify=False))
File "/Users/edenmolina/Library/Python/2.7/lib/python/site-packages/mechanize/_html.py", line 145, in content_parser
from html5lib import parse
File "/Users/edenmolina/Library/Python/2.7/lib/python/site-packages/html5lib/__init__.py", line 16, in <module>
from .html5parser import HTMLParser, parse, parseFragment
File "/Users/edenmolina/Library/Python/2.7/lib/python/site-packages/html5lib/html5parser.py", line 2, in <module>
from six import with_metaclass, viewkeys, PY3
ImportError: cannot import name viewkeys
答案 0 :(得分:0)
如果您正在使用版本控制,请返回之前的提交,看看是否可以让应用程序正常运行。这样,您就可以隔离导致您遇到问题的更改。
此类错误的一个常见原因是Python中的循环导入。我不会在堆栈中看到它,但是你在这里。谷歌无论如何,看看这是不是在这里发生了什么。
您可以而且应该做的另一件事是为您的Python应用程序提供虚拟环境。您可以使用conda命令创建一个。获得环境后,请安装此应用程序所需的所有库,然后再次尝试运行该应用程序。
我看到您正在使用的某个库正在使用six
。 six用于编写与Python 2和Python 3兼容的代码。因此,您可能更改了解释器或环境,导致此错误。
如果您的应用有效,请告诉我,我想知道导致错误的原因。感谢。