#!/usr/bin/env python
import threading
import urllib, sys,os
import Queue
concurrent = 200
queue = Queue.Queue(concurrent*2)
try:
aim = sys.argv[1].lower()
dic = open(sys.argv[2],'r')
except:
print "Usage: %s url wordlist" % sys.argv[0]
sys.exit(1)
class Scanner(threading.Thread):
def __init__(self,queue):
threading.Thread.__init__(self)
self.queue=queue
def run(self):
while True:
self.path = self.queue.get()
self.geturl = urllib.urlopen(aim+'/'+self.path)
self.status = self.geturl.getcode()
self.url = aim+self.path
self.result = self.url+'=>'+str(self.status)
print self.result
self.writeresult(self.result)
self.queue.task_done()
def writeresult(self,result):
fp = open('result.txt','a+')
fp.write(result+'\n')
fp.close()
def main():
for i in range(concurrent):
t = Scanner(queue)
t.setDaemon(True)
t.start()
for path in dic.readlines():
queue.put(path.strip())
queue.join()
if __name__ == '__main__':
main()
这是一个扫描网站目录的python程序,扫描完成后, 它甚至没有退出ctrl + c 我想知道它何时完成扫描如何自动退出程序。
当它正在进行时,它也会出现这样的问题:
Exception in thread Thread-130:
Traceback (most recent call last):
File "/usr/local/Cellar/python/2.7.3/Frameworks/Python.framework/Versions/2.7/lib/python2.7/threading.py", line 551, in __bootstrap_inner
self.run()
File "tt.py", line 28, in run
self.geturl = urllib.urlopen(aim+'/'+self.path)
File "/usr/local/Cellar/python/2.7.3/Frameworks/Python.framework/Versions/2.7/lib/python2.7/urllib.py", line 86, in urlopen
return opener.open(url)
File "/usr/local/Cellar/python/2.7.3/Frameworks/Python.framework/Versions/2.7/lib/python2.7/urllib.py", line 207, in open
return getattr(self, name)(url)
File "/usr/local/Cellar/python/2.7.3/Frameworks/Python.framework/Versions/2.7/lib/python2.7/urllib.py", line 344, in open_http
h.endheaders(data)
File "/usr/local/Cellar/python/2.7.3/Frameworks/Python.framework/Versions/2.7/lib/python2.7/httplib.py", line 954, in endheaders
self._send_output(message_body)
File "/usr/local/Cellar/python/2.7.3/Frameworks/Python.framework/Versions/2.7/lib/python2.7/httplib.py", line 814, in _send_output
self.send(msg)
File "/usr/local/Cellar/python/2.7.3/Frameworks/Python.framework/Versions/2.7/lib/python2.7/httplib.py", line 776, in send
self.connect()
File "/usr/local/Cellar/python/2.7.3/Frameworks/Python.framework/Versions/2.7/lib/python2.7/httplib.py", line 757, in connect
self.timeout, self.source_address)
File "/usr/local/Cellar/python/2.7.3/Frameworks/Python.framework/Versions/2.7/lib/python2.7/socket.py", line 553, in create_connection
for res in getaddrinfo(host, port, 0, SOCK_STREAM):
IOError: [Errno socket error] [Errno 8] nodename nor servname provided, or not known
答案 0 :(得分:0)
程序原样,它将在所有线程完成后关闭。 但是为了轻松摆脱所有这些错误,在你的函数运行中,从类开始,在执行True:claus之后,将try中的所有内容放在try:except:这样的子句
try:
code
except:
pass
它并不是最干净的方式,但考虑到你的目标,它会完成这项工作,并且会让你摆脱这些例外,这意味着某些URL已经超时了。
答案 1 :(得分:0)
我想要一些练习,所以我试了一下并改变了很多。它能为您提供全套结果吗?您需要使用原始参数读取替换路径。
import threading
import urllib
import Queue
concurrent = 5
aim = 'http://edition.cnn.com'
paths = ['2013/10/12/opinion/kazin-tea-party/index.html?hpt=hp_t5',
'2013/10/11/opinion/opinion-hay-nobel-opcw/index.html?hpt=hp_t5',
'2013/10/11/opinion/rosin-women-in-charge/index.html?hpt=hp_t5',
'some invalid path',
'2013'] # also an invalid path
def main():
work_q = Queue.Queue()
result_q = Queue.Queue()
# start the scanners and the result writer
scanners = [Scanner(work_q, result_q) for i in range(concurrent)]
for s in scanners:
s.start()
results_file_path = 'results.txt'
result_writer = ResultWriter(result_q, 'results.txt')
result_writer.start()
# send all the work and wait for it to be completed
for path in paths:
work_q.put(path.strip())
work_q.join()
# tell everyone to stop
# you could just kill the threads but you writer needs to close the file
for s in scanners:
work_q.put(Scanner.STOP_TOKEN)
result_q.put(ResultWriter.STOP_TOKEN) # make sure file gets closed
# wait for everyone to actually stop
for s in scanners:
s.join()
result_writer.join()
print 'the scan has finished and results are in {}'.format(results_file_path)
class Scanner(threading.Thread):
STOP_TOKEN = '<<stop>>'
def __init__(self, work_q, result_q):
threading.Thread.__init__(self)
self.work_q = work_q
self.result_q = result_q
def run(self):
while True:
path = status = None # reset in case of error
try:
try:
path = self.work_q.get(timeout=0.00001)
except Queue.Empty:
continue
if path == self.STOP_TOKEN:
break # stop looking for work
get_url = urllib.urlopen(aim + '/' + path)
status = get_url.getcode()
except Exception as e:
status = 'unhandled error ({})'.format(e)
self.result_q.put((path, status))
self.work_q.task_done()
class ResultWriter(threading.Thread):
STOP_TOKEN = '<<stop>>'
def __init__(self, result_q, results_file_path):
threading.Thread.__init__(self)
self.result_q = result_q
self.results_file_path = results_file_path
def run(self):
with open(self.results_file_path, 'w') as results_file:
while True:
try:
result = self.result_q.get(timeout=0.00001)
except Queue.Empty:
continue
if result == self.STOP_TOKEN:
break # stop looking for results
path, status = result
results_file.write('{}=>{}\n'.format(path, status))
if __name__ == '__main__':
main()