Python多处理 - 太慢

时间:2015-11-12 22:21:35

标签: python multithreading python-2.7 passwords multiprocessing

我为特定功能构建了一个多处理密码破解程序(使用wordlist),与使用单个进程相比,它减少了所需的时间。

最初的问题是,它会向您显示破解的密码并终止工作人员,但其余的工作人员会继续工作,直到他们用完了哈希的话!不理想。

我向前迈出的新一步是使用Manager.Event()来终止其余的工作人员,这可以按照我的希望(经过一些试验和错误)后完成,但现在应用程序需要的时间远远超过单个进程所需的时间,我确定这必须归功于if内的pwd_find()功能,但我想我会寻求一些建议。

#!/usr/bin/env python

import hashlib, os, time, math
from hashlib import md5
from multiprocessing import Pool, cpu_count, Manager

def screen_clear(): # Small function for clearing the screen on Unix or Windows
    if os.name == 'nt':
        return os.system('cls')
    else:
        return os.system('clear')

cores = cpu_count() # Var containing number of cores (Threads)

screen_clear()

print ""
print "Welcome to the Technicolor md5 cracker"
print ""

user = raw_input("Username: ")
print ""
nonce = raw_input("Nonce: ")
print ""
hash = raw_input("Hash: ")
print ""
file = raw_input("Wordlist: ")
screen_clear()
print "Cracking the password for \"" + user + "\" using " 
time1 = time.time() # Begins the 'Clock' for timing

realm = "Technicolor Gateway" # These 3 variables dont appear to change
qop = "auth"
uri = "/login.lp"

HA2 = md5("GET" + ":" + uri).hexdigest() # This hash doesn't contain any changing variables so doesn't need to be recalculated

file = open(file, 'r') # Opens the wordlist file
wordlist = file.readlines() # This enables us to use len()
length = len(wordlist)

screen_clear()
print "Cracking the password for \"" + user + "\" using " + str(length) + " words"

break_points = []  # List that will have start and stopping points
for i in range(cores):  # Creates start and stopping points based on length of word list
    break_points.append({"start":int(math.ceil((length+0.0)/cores * i)), "stop":int(math.ceil((length+0.0)/cores * (i + 1)))})

def pwd_find(start, stop, event):
    for number in range(start, stop):
        if not event.is_set():
            word = (wordlist[number])
            pwd = word.replace("\n","") # Removes newline character
            HA1 = md5(user + ":" + realm + ":" + pwd).hexdigest()
            hidepw = md5(HA1 + ":" + nonce +":" + "00000001" + ":" + "xyz" + ":" + qop + ":" + HA2).hexdigest()
            if hidepw == hash:
                screen_clear()
                time2 = time.time() # stops the 'Clock'
                timetotal = math.ceil(time2 - time1) # Calculates the time taken
                print "\"" + pwd + "\"" + " = " + hidepw + " (in " + str(timetotal) + " seconds)"
                print ""
                event.set()
                p.terminate
                p.join
        else:
            p.terminate
            p.join

if __name__ == '__main__':  # Added this because the multiprocessor module sometimes acts funny without it.

    p = Pool(cores)  # Number of processes to create.
    m = Manager()
    event = m.Event()
    for i in break_points:  # Cycles though the breakpoints list created above.
        i['event'] = event
        a = p.apply_async(pwd_find, kwds=i, args=tuple())  # This will start the separate processes.
    p.close() # Prevents any more processes being started
    p.join() # Waits for worker process to end

if event.is_set():
    end = raw_input("hit enter to exit")
    file.close() # Closes the wordlist file
    screen_clear()
    exit()
else:
    screen_clear()
    time2 = time.time() # Stops the 'Clock'
    totaltime = math.ceil(time2 - time1) # Calculates the time taken
    print "Sorry your password was not found (in " + str(totaltime) + " seconds) out of " + str(length) + " words"
    print ""
    end = raw_input("hit enter to exit")
    file.close() # Closes the wordlist file
    screen_clear()
    exit()

编辑(适用于@noxdafox):

def finisher(answer):
    if answer:
        p.terminate()
        p.join()
        end = raw_input("hit enter to exit")
        file.close() # Closes the wordlist file
        screen_clear()
        exit()

def pwd_find(start, stop):
    for number in range(start, stop):
        word = (wordlist[number])
        pwd = word.replace("\n","") # Removes newline character
        HA1 = md5(user + ":" + realm + ":" + pwd).hexdigest()
        hidepw = md5(HA1 + ":" + nonce +":" + "00000001" + ":" + "xyz" + ":" + qop + ":" + HA2).hexdigest()
        if hidepw == hash:
            screen_clear()
            time2 = time.time() # stops the 'Clock'
            timetotal = math.ceil(time2 - time1) # Calculates the time taken
            print "\"" + pwd + "\"" + " = " + hidepw + " (in " + str(timetotal) + " seconds)"
            print ""
            return True
        elif hidepw != hash:
            return False

if __name__ == '__main__':  # Added this because the multiprocessor module sometimes acts funny without it.

    p = Pool(cores)  # Number of processes to create.
    for i in break_points:  # Cycles though the breakpoints list created above.
        a = p.apply_async(pwd_find, kwds=i, args=tuple(), callback=finisher)  # This will start the separate processes.
    p.close() # Prevents any more processes being started
    p.join() # Waits for worker process to end

2 个答案:

答案 0 :(得分:1)

我认为你的预感是正确的。您正在检查快速循环内的同步原语。我可能只会检查事件是否经常设置。您可以尝试找到最佳位置,在那里检查它,不要做太多工作,但不要经常让您减慢程序。

答案 1 :(得分:1)

您可以使用Pool原语来解决您的问题。您不需要共享一个事件对象,该对象访问是同步的并且速度很慢。

Here我举例说明如何在给定工人所需结果的情况下终止游泳池。

您可以通过返回特定值来简单地通知池,并在回调中终止池。