此Python脚本对从website.txt文件加载的URL进行GET请求。 然后它将检查响应中是否有“关键字”。如果找到密钥,它将保存在“ WorkingSites.txt”中。
一切正常,但是放慢了速度,因为它只能同时检查一个网址。同时检查10个URL的最佳和最简便的方法是什么?
能否请您在下面的脚本中提供示例
谢谢
import requests
import sys
if len(sys.argv) != 2:
print "\n\033[34;1m[*]\033[0m python " + sys.argv[0] \
+ ' websites.txt '
exit(0)
targetfile = open(sys.argv[1], 'r')
while True:
success = open('WorkingSites.txt', 'a')
host = targetfile.readline().replace('\n', '')
if not host:
break
if not host.startswith('http'):
host = 'http://' + host
print '\033[34;1m[*]\033[0m Check : ' + host
try:
r = requests.request('get', host, timeout=5,
headers={'Content-Type': 'application/x-www-form-urlencoded'
,
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3163.100 Safari/537.36'
})
text = 'KEYWORD'
except:
print '\033[31;1m[-]\033[0m Failed : No Response\n'
pass
continue
if text in r.text:
print '\033[32;1m[+]\033[0m success : ' + host + '\n'
success.write(host + '\n')
else:
print '\033[31;1m[-]\033[0m Failed : ' + host + '\n'
print "\033[34;1m[*]\033[0m Output Saved On : WorkingSites.txt"
答案 0 :(得分:0)
import concurrent.futures
import requests
import sys
from timeit import default_timer
import psutil
INPUT = 'websites.txt'
OUTPUT = 'WorkingSites.txt'
SUCCESS = open(OUTPUT, 'a')
START_TIME = default_timer()
def fetch(host):
KEYWORD = 'KEYWORD'
try:
with requests.get(host, timeout=5, headers={
'Content-Type':'application/x-www-form-urlencoded',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3163.100 Safari/537.36'
}) as response:
print('{0} {1} {2} {3}'.format(host, psutil.cpu_percent(), psutil.virtual_memory()[2], "{:5.2f}s".format(default_timer() - START_TIME)))
if (response.status_code == 200 and KEYWORD in response.text):
SUCCESS.write(host+'\n')
return response
except requests.exceptions.RequestException as e:
pass
async def get_data_asynchronous():
with open(INPUT) as fi:
hosts = fi.read().splitlines()
for host in hosts:
if not host.startswith('http'):
host = 'http://' + host
with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
loop = asyncio.get_event_loop()
START_TIME = default_timer()
futures = [
loop.run_in_executor(
executor,
fetch,
host,
)
for host in hosts
]
for response in await asyncio.gather(*futures):
pass
def main():
loop = asyncio.get_event_loop()
loop.run_until_complete(asyncio.ensure_future(get_data_asynchronous()))
print("\033[34;1m[*]\033[0m Output Saved On : "+OUTPUT)
main()
这是我的脚本,目前它对第一个网址非常有效,但是无论出于什么原因,它都会变慢。
这是我的脚本或PC的问题吗?有人可以测试它是否已将数千个网址上传到pastebin https://pastebin.com/raw/5wtrpcDQ
谢谢你!