我编写了一个简单的python代码,该代码从txt文件读取域列表,并根据返回的结果检查每个域是否为WordPress网站。
代码如下:
import requests
#Loop domains list
with open('domains2') as f:
for line in f:
domain = line
source = requests.get(domain)
if "wp-include" in source:
results = 'Yes'
else:
results = 'No'
print(line , ' : ' , results)
错误如下:
Traceback (most recent call last):
File "./test4.py", line 8, in <module>
source = requests.get(domain)
File "/usr/local/lib/python2.7/dist-packages/requests/api.py", line 75, in get
return request('get', url, params=params, **kwargs)
File "/usr/local/lib/python2.7/dist-packages/requests/api.py", line 60, in request
return session.request(method=method, url=url, **kwargs)
File "/usr/local/lib/python2.7/dist-packages/requests/sessions.py", line 533, in request
resp = self.send(prep, **send_kwargs)
File "/usr/local/lib/python2.7/dist-packages/requests/sessions.py", line 646, in send
r = adapter.send(request, **kwargs)
File "/usr/local/lib/python2.7/dist-packages/requests/adapters.py", line 516, in send
raise ConnectionError(e, request=request)
requests.exceptions.ConnectionError: HTTPConnectionPool(host='testing.com%0a', port=80): Max retries exceeded with url: / (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x7fd5a00c4d50>: Failed to establish a new connection: [Errno -2] Name or service not known',))
仅当我手动将source值设置为以下并且不从列表中读取域且结果正确时,我才能运行我的代码:
source = requests.get(domain).text
答案 0 :(得分:1)
import requests
#Loop domains list
with open('domains2') as f:
for line in f:
domain = line.rstrip()
source = requests.get(domain)
if "wp-include" in source.text:
results = 'Yes'
else:
results = 'No'
print(line , ' : ' , results)
source.text获取请求响应,rstrip()删除\ n
答案 1 :(得分:0)
将域转换为有效网址(用于请求)(python3):
#!/usr/bin/env python
import requests
import re
from urllib import parse
def get_domains(file):
res = []
with open(file) as f:
for x in f:
url = x.strip()
p = parse.urlparse(url, 'http')
netloc = p.netloc or p.path
path = p.path if p.netloc else ''
if not netloc.startswith('www.'):
netloc = 'www.' + netloc
p = parse.ParseResult('http', netloc, path, *p[3:])
res.append(p.geturl())
return res
def is_wordpress(url):
print(f"getting: {url}")
content = requests.get(url).text
if re.search('wp-include', content):
return True
else:
return False
def main():
result = {}
for domain in get_domains('domain.txt'):
result[domain] = is_wordpress(domain)
print(result)
if __name__ == '__main__':
main()