我写了一个脚本来测试网站“www.humkinar.com.pk”。这个脚本测试元素id cara6,cara7等特定元素的可用性。脚本工作正常,当我不使用虚拟显示时,我得到完整的页面源文件。但是当我使用虚拟显示器时,我得到的页面来源不完整。我正在使用pyvirtualdisplay库进行虚拟显示。从页面源中提取数据非常漂亮。
这是我的代码:
# -*- coding: utf-8 -*-
from bs4 import BeautifulSoup
import urllib2
from selenium import webdriver
from selenium.common.exceptions import NoSuchElementException
from selenium.common.exceptions import TimeoutException
from pyvirtualdisplay import Display
import time
def frontendCategoryTester(soup, category, start_range, end_range, id_name):
message = ""
print "Starting "+category+" Test"
for current_no in range(start_range,end_range):
Elements = soup.findAll('a', attrs={"class":id_name+str(current_no)})
print "Testing : "+category+" id "+id_name+str(current_no)
if len(Elements) == 0:
displayError = "Display error: "+category+" id "+id_name+str(current_no)
print displayError
message = message+displayError+"\n"
else:
imgUrl = Elements[0].find('img')['src']
imgUrl = imgUrl.replace('+', '%20')
try:
getimage = urllib2.urlopen(imgUrl)
imgStatusCode = getimage.getcode()
except Exception as e:
imageUrl = "Element Id: "+id_name+str(current_no)+", Image url error: "+imgUrl
print imageUrl+"\n"+str(e)+"\n"
message = message+imageUrl+"\n"+str(e)+"\n"
print "Ending "+category+" Test\n"
return message
def frontendCarasolTester(soup, category, start_range, end_range, id_name):
message = ""
print "Starting "+category+" Test"
for current_no in range(start_range,end_range):
Elements = soup.findAll('a', attrs={"class":id_name+str(current_no)})
print "Testing: "+category+" id "+id_name+str(current_no)
if len(Elements) == 2:
if len(Elements) == 0:
displayError = "Display error: "+category+" id "+id_name+str(current_no)
print displayError
message = message+displayError+"\n"
else:
imgUrl = Elements[0].find('img')['src']
imgUrl = imgUrl.replace('+', '%20')
try:
getimage = urllib2.urlopen(imgUrl)
imgStatusCode = getimage.getcode()
except Exception as e:
imageUrl = "Element Id: "+id_name+str(current_no)+", Image url error: "+imgUrl
print imageUrl+"\n"+str(e)+"\n"
message = message+imageUrl+"\n"+str(e)+"\n"
else:
print category+" Animation Problem id "+id_name+str(current_no)
print "Ending "+category+" Test\n"
return message
def frontendTweetTester(soup, category, start_range, end_range, id_name):
message = ""
print "Starting "+category+" Test"
for current_no in range(start_range,end_range):
for tweet_no in range(0,3):
Elements = soup.findAll('a', attrs={"class":id_name+str(current_no)+str(tweet_no)})
print "Testing: "+category+" id "+id_name+str(current_no)+str(tweet_no)
if len(Elements) == 0:
print "Display error: "+category+" id "+id_name+str(current_no)+str(tweet_no)
print "Ending "+category+" Test\n"
return message
main_url = "http://www.humkinar.com.pk"
fobj = open("frontEndSrc.txt", 'w')
print "Starting Script"
try:
#if i remove these two lines code works fine
display = Display(visible=0, size=(1024, 768))
display.start()
browser = webdriver.Firefox()
browser.get(main_url)
except TimeoutException:
print "Time Out Exception"
browser.maximize_window()
for x in range(0, 2800, 5):
browser.execute_script("window.scrollTo(0, "+str(x)+")")
print ".",
print "\n"
time.sleep(5)
html = browser.page_source
soup = BeautifulSoup(html, "html.parser")
fobj.write(soup.prettify().encode('utf8'))
fobj.close()
emailMessage = ""
#Carasol Test
emailMessage = emailMessage + frontendCarasolTester(soup, "carasol", 1, 11, "cara")
emailMessage = emailMessage + frontendCarasolTester(soup, "Gallery", 1, 7, "gallaryItem")
#Categories Test
emailMessage = emailMessage + frontendCategoryTester(soup, "Sports", 1, 5, "sportNewsRow")
emailMessage = emailMessage + frontendCategoryTester(soup, "Books", 1, 5, "bookNewsRow")
emailMessage = emailMessage + frontendCategoryTester(soup, "Health", 1, 5, "healthNewsRow")
emailMessage = emailMessage + frontendCategoryTester(soup, "Science", 1, 5, "sciNewsRow")
emailMessage = emailMessage + frontendCategoryTester(soup, "Entertainment", 1, 5, "entNewsRow")
#Tweets Test
emailMessage = emailMessage + frontendTweetTester(soup, "Tweet", 1, 4, "tweetsNews")
print "\nMoving towards Web Tab:"
print "........................."
webTabMessage = ""
searchArea = browser.find_element_by_id('search')
searchTerm = "اللہ"
searchArea.send_keys(searchTerm.decode('utf8'))
searchArea.submit()
html = browser.page_source
soup = BeautifulSoup(html, "html.parser")
QuranMajeedTranslation = soup.find('div', attrs={"id":"trans"})
try:
q = len(QuranMajeedTranslation)
print "Quran Majeed Translation Succes"
except Exception:
QMError = "Quran Majeed Translation Error"
print QMError
webTabMessage = webTabMessage + QMError
MeaningFlip = soup.find('div', attrs={"class":"flip"})
try:
q = len(MeaningFlip)
print "Meaning Widget Success"
except Exception:
MeanError = "Meaning Widget Error"
print MeanError
webTabMessage = webTabMessage + MeanError
MeaningFlip = soup.find('li', attrs={"class":"bookTest"})
try:
q = len(MeaningFlip)
print "Book Widget Success"
except Exception:
BookError = "Book Widget Error"
print BookError
webTabMessage = webTabMessage + BookError
browser.close()
print emailMessage
print webTabMessage