以下脚本成功从单个IP(url_ip)中提取正确的信息。但在尝试构建循环过程后,请求调用会因连接错误而失败。 (以下错误) *注意 - 草率代码,所以要注意。
from lxml import html
import requests
import smtplib
# STATIC URL
#TODO PULL A LIST OF IP ADDRESSES AND BUILD THE URL FOR EACH SYSTEM
#IPs = ['192.168.3.152','192.168.3.194']
def crawler(url_ip):
global eqid, counter, serial
print "Starting Crawler Service for: " + url_ip
url = "http://" + url_ip + "/cgi-bin/dynamic/printer/config/reports/deviceinfo.html"
urleqid = "http://" + url_ip + "/cgi-bin/dynamic/topbar.html"
response = requests.get(url)
tree = html.fromstring(response.text)
counter = tree.xpath('//td[contains(p,"Count")]/following-sibling::td/p/text()')
serial = tree.xpath('//td[contains(p, "Serial")]/following-sibling::td/p/text()')
counter = counter[0].split(' ')[3]
serial = serial[0].split(' ')[3]
responseeqid = requests.get(urleqid)
treeequid = html.fromstring(responseeqid.text)
eqid = treeequid.xpath('//descendant-or-self::node()/child::b[contains(., "Location")]/text()')[1].split(' ')[-1]
print " -- equipment id found: " + eqid
print " -- count found: " + counter
print " -- serial found: " + serial
print "Stopping Crawler Service for: " + url_ip
return
def send_mail(eqid,counter,serial):
GMAIL_USERNAME = "removed"
GMAIL_PASSWORD = "removed"
recipient = "removed"
email_subject = "Test"
body_of_email = "Equipment ID = " + eqid + "<br>Total Meter Count = " + counter + "<br>Serial Number = " + serial + "<br><br>"
session = smtplib.SMTP('smtp.gmail.com', 587)
session.ehlo()
session.starttls()
session.login(GMAIL_USERNAME, GMAIL_PASSWORD)
headers = "\r\n".join(["from: " + GMAIL_USERNAME,
"subject: " + email_subject,
"to: " + recipient,
"mime-version: 1.0",
"content-type: text/html"])
# body_of_email can be plain text or html!
content = headers + "\r\n\r\n" + body_of_email
session.sendmail(GMAIL_USERNAME, recipient, content)
return
with open('iplist.txt') as fp:
for line in fp:
crawler(line);
#send_mail(eqid,counter,serial);
错误日志:
Starting Crawler Service for: 192.168.3.152
Traceback (most recent call last):
File "getmeters.py", line 63, in <module>
crawler(ipstring);
File "getmeters.py", line 17, in crawler
response = requests.get(url)
File "/Library/Python/2.7/site-packages/requests/api.py", line 68, in get
return request('get', url, **kwargs)
File "/Library/Python/2.7/site-packages/requests/api.py", line 50, in request
response = session.request(method=method, url=url, **kwargs)
File "/Library/Python/2.7/site-packages/requests/sessions.py", line 464, in request
resp = self.send(prep, **send_kwargs)
File "/Library/Python/2.7/site-packages/requests/sessions.py", line 576, in send
r = adapter.send(request, **kwargs)
File "/Library/Python/2.7/site-packages/requests/adapters.py", line 415, in send
raise ConnectionError(err, request=request)
requests.exceptions.ConnectionError: ('Connection aborted.', gaierror(8, 'nodename nor servname provided, or not known'))
我认为这是由于价值&#34;线&#34;被处理为列表对象而不是字符串,所以我转换为str(行),但也失败了。
答案 0 :(得分:1)
我怀疑你在文件行的末尾有行结尾(\ n),你可能需要将它们去掉。否则您的网址会变成
http://192.168.3.152
/cgi-bin/dynamic/printer/config/reports/deviceinfo.html"
而不是预期的
http://192.168.3.152/cgi-bin/dynamic/printer/config/reports/deviceinfo.html"