我正在尝试使用以下代码提取链接
def Soup(htmsrc):
return BeautifulSoup(htmsrc,
'html.parser')
def get_html_sel(url, t=15):
logger.info('Searching: {0}'.format(url))
try:
driver = webdriver.Chrome(chromedriver)
driver.get(url)
time.sleep(t)
htmsrc = driver.page_source
driver.quit()
return (htmsrc)
except NoSuchWindowException:
sys.exit('The window closed unexpectedly.')
def get_filehostlink(url):
for file_hoster_key, file_hoster_value in FILE_HOSTERS.iteritems():
try:
link = '{0}{1}'.format(url,file_hoster_value)
soup = Soup(get_html_sel(link,t=15))
return soup.find('iframe',src=re.compile(file_hoster_key))['src']
except:
traceback.print_exc()
continue
else:
break
get_filehostlink('http://kissasian.com/Drama/Your-Lie-in-April/Movie?id=33186')
使用硒和氯化物完美地工作。
但是,我发现chromedriver很麻烦。所以我决定按如下方式切换到phantomjs:
def get_html_sel(url, t=15):
logger.info('Searching: {0}'.format(url))
try:
driver = webdriver.PhantomJS()
driver.get(url)
time.sleep(t)
htmsrc = driver.page_source
driver.quit()
return (htmsrc)
except:
traceback.print_exc()
然而,当我使用phantomjs时失败了。我在get_filehostlink函数中收到以下错误
Traceback (most recent call last):
in get_filehostlink
return soup.find('iframe',src=re.compile(file_hoster_key))['src']
TypeError: 'NoneType' object has no attribute '__getitem__'
我做错了什么?