尝试抓取网页上的数据,我检查当前的网址,以确保我在预期的网页上。但是,它最终会引发错误,而且似乎是在检查URL时。我无法弄清楚原因,何时发生并不一致。有时它会在脚本中显示几页,有时它只有几页。
Traceback (most recent call last):
File "scrape.py", line 5, in <module>
scraper.start_search("ebook")
File "/home/ubuntu/workspace/scraper/school/scraper.py", line 56, in start_search
self.scrape_item(product_el)
File "/home/ubuntu/workspace/scraper/school/scraper.py", line 97, in scrape_item
if self.driver.current_url.split("/")[3] != "search":
File "/usr/local/lib/python2.7/dist-packages/selenium/webdriver/remote/webdriver.py", line 493, in current_url
return self.execute(Command.GET_CURRENT_URL)['value']
File "/usr/local/lib/python2.7/dist-packages/selenium/webdriver/remote/webdriver.py", line 236, in execute
response = self.command_executor.execute(driver_command, params)
File "/usr/local/lib/python2.7/dist-packages/selenium/webdriver/remote/remote_connection.py", line 415, in execute
return self._request(command_info[0], url, body=data)
File "/usr/local/lib/python2.7/dist-packages/selenium/webdriver/remote/remote_connection.py", line 489, in _request
resp = opener.open(request, timeout=self._timeout)
File "/usr/lib/python2.7/urllib2.py", line 404, in open
response = self._open(req, data)
File "/usr/lib/python2.7/urllib2.py", line 422, in _open
'_open', req)
File "/usr/lib/python2.7/urllib2.py", line 382, in _call_chain
result = func(*args)
File "/usr/lib/python2.7/urllib2.py", line 1214, in http_open
return self.do_open(httplib.HTTPConnection, req)
File "/usr/lib/python2.7/urllib2.py", line 1184, in do_open
raise URLError(err)
urllib2.URLError: <urlopen error [Errno 111] Connection refused>
看似相关的代码只是:
if self.driver.current_url.split("/")[3] != "search":
time.sleep(random.randint(1, 3))
self.driver.back()
我使用的是Python 2.7,Selenium和PhantomJS。
答案 0 :(得分:0)
我不知道为什么会这样,但我也看到private static Mat getpartialedge(Mat image){
double nonEdgeRate = 0.6;
double thresholdRate = 0.6;
double w = image.cols();
double h = image.rows();
int bins = 256;
Mat sobel = new Mat();
Mat sobelx = new Mat();
Mat sobely = new Mat();
Mat sobelxabs = new Mat();
Mat sobelyabs = new Mat();
Size gsz = new Size(5, 5);
if(false) {
Imgproc.Canny(image, sobel, 41, 71);
}else {
//Imgproc.GaussianBlur(graycopy,graycopy, gsz, 2);
//Imgproc.dilate(image, image, kernel8);
Imgproc.GaussianBlur(image, image, gsz, 2);
int apertureSize = 3;
Imgproc.Sobel(image, sobelx, CvType.CV_16S, 1, 0, apertureSize, 1, 0);
Core.convertScaleAbs(sobelx, sobelxabs);
Imgproc.Sobel(image, sobely, CvType.CV_16S, 0, 1, apertureSize, 1, 0);
Core.convertScaleAbs(sobely, sobelyabs);
Core.addWeighted(sobelxabs, 1, sobelyabs, 1, 0, sobel);
sobel.convertTo(sobel, CvType.CV_8U);
Mat equalized = new Mat();
Imgproc.equalizeHist(sobel, equalized);
Imgcodecs.imwrite(filePath + "aftersobel(eq).png", equalized);
Imgcodecs.imwrite(filePath + "aftersobel.png", sobel);
Mat hist = new Mat();
List<Mat> matList = new ArrayList<Mat>();
matList.add(sobel);
Imgproc.calcHist(matList, new MatOfInt(0), new Mat(), hist, new MatOfInt(bins), new MatOfFloat(0f, 256f));
float accu = 0;
float t = (float) (nonEdgeRate * w * h);
float bon = 0;
float[] accutemp = new float[bins];
for (int i = 0; i < bins; i++) {
float tf[] = new float[1];
hist.get(i, 0, tf);
accu = accu + tf[0];
accutemp[i] = accu;
if (accu > t) {
bon = (float) i;
break;
}
}
Imgproc.threshold(sobel, sobel, bon, 255, Imgproc.THRESH_BINARY);
double ut = bon;
double lt = thresholdRate * bon;
Imgproc.Canny(image, sobel, lt, ut);
//Imgproc.dilate(sobel, sobel, kernel2);
}
return sobel;
}
是片状的。您是否尝试通过一些异常处理来缓解这种情况?
current_url