我从此网站上其他用户编写的其他脚本修改了此脚本。我尽力了解它,但我遇到了困难。当我尝试使用charset破解哈希值可能只是小写字母时,它可以正常工作。但是当我尝试破解其中包含数字和字母的哈希时,除非我将“spc”变量从1000000增加到100000000,否则它将无效。
import itertools
import math
import string
import multiprocessing
import hashlib
import traceback
import sys
def hashstring(string, algorithm):
return hashlib.new(algorithm, string).hexdigest()
def gen_product(prefix, charset, length):
for string in itertools.product(charset, repeat=length):
yield prefix + "".join(string)
def string_generator(prefix, hash, suffix_length, length, charset, hashalg):
num_done = 0
if length <= suffix_length:
assert prefix == ""
for possible in gen_product("", charset, length):
if hashstring(possible, hashalg) == hash:
return possible
else:
assert len(prefix) + suffix_length == length
for possible in gen_product(prefix, charset, suffix_length):
if hashstring(possible, hashalg) == hash:
return possible
return None
def run_string_generator(*args):
try:
return string_generator(*args)
except:
raise Exception("".join(traceback.format_exception(*sys.exc_info())))
def do_work(pool, hash, charset, length, hashalg, spc=100000000):
n = len(charset)
suffix_len = int(math.ceil(math.log(spc) / math.log(n)) - 1)
max_short_len = min(suffix_len, length)
for length in range(1, max_short_len + 1):
result = pool.apply_async(run_string_generator, args = ("", hash, suffix_len, length, charset, hashalg))
if result.get() != None:
return result.get()
for length in range(max_short_len + 1, length + 1):
for prefix in gen_product("", charset, length - suffix_len):
result = pool.apply_async(run_string_generator, args = (prefix, hash, suffix_len, length, charset, hashalg))
if result.get() != None:
return result.get()
return None
def parallel_bruteforce(hash, charset, length, hashalg="md5", spc=1000000, cores=None):
pool = multiprocessing.Pool(cores)
result = do_work(pool, hash, charset, length, hashalg, spc)
pool.close()
pool.join()
return result
if __name__ == "__main__":
print "Starting..."
#The hash is an md5 encryption of "test1"
print parallel_bruteforce("5a105e8b9d40e1329780d62ea2265d8a", string.ascii_lowercase + string.digits, 5, spc=100000000)
编辑: 使用原始代码的其他帖子的链接是https://stackoverflow.com/a/20135250/1769995
答案 0 :(得分:2)
提前抱歉我现在没时间解释这个。这是我之前的答案的编辑,它保留了并行性,并且如果散列被破解,则“尽早”停止所有工作者。通常,您不希望传递从不在调用之间变化的参数,因此我在模块级别执行的操作比您更多。附加的代码显示:
workers will cycle through the last 3 chars
. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
[etc]
. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
the plaintext is 'test1'
在这里。请注意,它通过实现PAIRS
(字母表中所有2个字符的字符串列表)来播放另一个低级效率技巧。从长远来看,这再次挽救了数十亿的冗余连接。
import string
import hashlib
from itertools import product
CHARSET = string.ascii_lowercase + string.digits
MAX_LENGTH = 5
NUM_PROCESSES = None # defaults to all available cores
HASHALG = "md5"
HASH = "5a105e8b9d40e1329780d62ea2265d8a"
PAIRS = ["".join(t) for t in product(CHARSET, repeat=2)]
def make_bases(count):
bases = [PAIRS] * (count // 2)
if count & 1:
bases.insert(0, CHARSET)
return bases
# string_gen is what the workers run. Everything else
# runs in the main program.
def string_gen(prefix, suffix_len, length):
# Generate all strings of length `length` starting with `prefix`.
# If length > suffix_len, only the last suffix_len characters
# need to be generated.
if length <= suffix_len:
assert prefix == ""
bases = make_bases(length)
else:
assert len(prefix) + suffix_len == length
bases = make_bases(suffix_len)
for t in product(*bases):
result = prefix + "".join(t)
# do something with result
if hashlib.new(HASHALG, result).hexdigest() == HASH:
return result
def record_done(result):
global all_done, the_secret
print ".",
if result is not None:
print
the_secret = result
all_done = True
pool.close()
pool.terminate() # stop all workers! we're done
def do_work(pool, strings_per_chunk=1000000):
global all_done, the_secret
all_done = False
the_secret = None
# What's the most chars we can cycle through without
# exceeding strings_per_chunk?
N = len(CHARSET)
suffix_len = 1
while N**suffix_len <= strings_per_chunk:
suffix_len += 1
suffix_len -= 1
print "workers will cycle through the last", suffix_len, "chars"
# There's no point to splitting up very short strings.
max_short_len = min(suffix_len, MAX_LENGTH)
for length in range(1, max_short_len + 1):
pool.apply_async(string_gen, args=("", suffix_len, length),
callback=record_done)
if all_done:
return
# And now the longer strings.
for length in range(max_short_len + 1, MAX_LENGTH + 1):
for t in product(*make_bases(length - suffix_len)):
prefix = "".join(t)
pool.apply_async(string_gen, args=(prefix, suffix_len, length),
callback=record_done)
if all_done:
return
if __name__ == "__main__":
import multiprocessing
pool = multiprocessing.Pool(NUM_PROCESSES)
do_work(pool)
pool.close()
pool.join()
if the_secret is None:
print "didn't crack it!"
else:
print "the plaintext is", repr(the_secret)
注意:如上所述,对于较大的问题规模和/或非常小的strings_per_chunk
,该代码“过于平行”。主程序可以比工作进程处理它们更快地发出apply_async()
个调用,因此multiprocessing
机器最终可能会尝试排队数十亿个工作项。然后,您可能会耗尽RAM或其他系统资源。当然这也是可以解决的; - )
multiprocessing
没有暴露任何方法来限制其内部队列,所以“一个自然的”解决方案是添加一个具有我们自己的队列的层。这样可以在multiprocessing
的内部任务队列中为每个处理器保留最多3个待处理任务,但只要其自己的队列长于此值,就会阻止主程序生成更多前缀。当哈希被破解时,还提出了引发EarlyExit
异常的逻辑;这比使用全局标记更容易和更干净。以下内容旨在从record_done()
向下替换上述所有内容:
class EarlyExit(Exception):
def __init__(self, result):
Exception.__init__(self)
self.result = result
class CustomDispatcher:
def __init__(self, pool):
from collections import deque
self.pool = pool
self.q = deque()
def queue_work(self, *args):
while len(self.q) > NUM_PROCESSES * 3:
# provided the workers have significant work to do,
# it will "take a long time" to finish the work
# already queued. Rather than swamp the mp machinery
# with even more pending tasks, wait for some to
# finish first.
self.unqueue()
self.q.append(self.pool.apply_async(string_gen, args))
def unqueue(self):
if self.q:
# note: the main program spends most of its time
# blocked on the .get(); that's because it can
# generate prefixes far faster than workers can
# process them
result = self.q.popleft().get()
print ".",
if result is not None:
print
raise EarlyExit(result)
def drain(self):
while self.q:
self.unqueue()
def do_work(dispatch, strings_per_chunk=10000000):
# What's the most chars we can cycle through without
# exceeding strings_per_chunk?
N = len(CHARSET)
suffix_len = 1
while N**suffix_len <= strings_per_chunk:
suffix_len += 1
suffix_len -= 1
print "workers will cycle through the last", suffix_len, "chars"
print "so each dot represents", \
format(len(CHARSET)**suffix_len, ","), "strings"
# There's no point to splitting up very short strings.
max_short_len = min(suffix_len, MAX_LENGTH)
for length in range(1, max_short_len + 1):
dispatch.queue_work("", suffix_len, length)
# And now the longer strings.
for length in range(max_short_len + 1, MAX_LENGTH + 1):
for t in product(*make_bases(length - suffix_len)):
dispatch.queue_work("".join(t), suffix_len, length)
dispatch.drain() # check remaining tasks for a winner
if __name__ == "__main__":
import multiprocessing
pool = multiprocessing.Pool(NUM_PROCESSES)
dispatch = CustomDispatcher(pool)
try:
do_work(dispatch)
except EarlyExit as e:
print "the plaintext is", repr(e.result)
else:
print "didn't crack it!"
pool.close()
pool.terminate() # stop all workers! we're done
pool.join()
随着字母表的大小和/或产生的最长字符串的大小的增加,可能性的组合爆炸可能意味着你将永远等待结果,但至少有了这个改变,你将不会耗尽RAM - 并且您将以接近100%的容量利用所有核心。
答案 1 :(得分:1)
注意:
result = pool.apply_async(run_string_generator, args = (prefix, hash, suffix_len, length, charset, hashalg))
if result.get() != None:
return result.get()
破坏了所有的并行性。 result.get()
阻止,直到工作进程完成其任务。那时只有一个工人可以活动。您是否注意到只有一个核心处于活动状态?
答案 2 :(得分:0)
我首先注意到spc
最好被命名 - 如果描述的话,我们不需要对其目的进行逆向工程。它通过parallel_bruteforce从main传递到do_work,仅在一行中使用:
suffix_len = int(math.ceil(math.log(spc) / math.log(n)) - 1)
这一行暗示它与长度有关,但是以一种相当复杂的数学方式。 n
是字符集大小。对数的使用适合于计算特定数字集合所需的字符串的长度,以表示多个不同的值。例如,log(50000)/log(16)
约为3.9 - 告诉我们需要4个十六进制数来计算50000个值,这与0xffff == 65535匹配良好。这样的除数对数在对数基数之间转换;通常只实现自然和基数10对数(python中的log和log10),但是logx(n)=log(n)/log(x)
,regardless of the base of log。
有了这个,我们可以看到spc
最有可能与搜索空间有关 - 尝试的尝试次数。它在这里使用的方式必须意味着该算法进行指定次数的尝试,而不是特定宽度所需的尝试次数。如果必须提高宽度,则需要提高spc以匹配:spc=len(charset)**width
。这自然地揭示了粗暴强迫与更广泛的字符集的指数性质;它会变慢。