Wordlist生成器。拆分文件大小。怎么样?

时间:2011-04-30 23:31:53

标签: python

我试图让这个python脚本创建一个新文件,并在达到某个文件大小后继续生成单词组合。

f=open('wordlist', 'w')

def xselections(items, n):
    if n==0: yield []
    else:
        for i in xrange(len(items)):
            for ss in xselections(items, n-1):
                yield [items[i]]+ss

# Numbers = 48 - 57
# Capital = 65 - 90
# Lower = 97 - 122
numb = range(48,58)
cap = range(65,91)
low = range(97,123)
choice = 0
while int(choice) not in range(1,8):
    choice = raw_input('''
    1) Numbers
    2) Capital Letters
    3) Lowercase Letters
    4) Numbers + Capital Letters
    5) Numbers + Lowercase Letters
    6) Numbers + Capital Letters + Lowercase Letters
    7) Capital Letters + Lowercase Letters
    : ''')

choice = int(choice)
poss = []
if choice == 1:
    poss += numb
elif choice == 2:
    poss += cap
elif choice == 3:
    poss += low
elif choice == 4:
    poss += numb
    poss += cap
elif choice == 5:
    poss += numb
    poss += low
elif choice == 6:
    poss += numb
    poss += cap
    poss += low
elif choice == 7:
    poss += cap
    poss += low

bigList = []
for i in poss:
    bigList.append(str(chr(i)))

MIN = raw_input("What is the min size of the word? ")
MIN = int(MIN)
MAX = raw_input("What is the max size of the word? ")
MAX = int(MAX)
for i in range(MIN,MAX+1):
    for s in xselections(bigList,i): f.write(''.join(s) + '\n')

2 个答案:

答案 0 :(得分:4)

您可以将文件旋转行为封装在类中。当您编写一些数据时,write方法将首先检查写入是否超出文件大小限制;然后它调用rotate方法关闭当前文件并打开一个新文件,递增文件名上的序列号:

import os

class LimitWriter(object):

    def __init__(self, basepath, bytelimit):
        self._basepath = basepath
        self._bytelimit = bytelimit
        self._sequence = 0
        self._output = None
        self._bytes = 0
        self._rotate()

    def _rotate(self):
        if self._output:
            self._output.close()
        path = '%s.%06d' % (self._basepath, self._sequence)
        self._output = open(path, 'wb')
        self._bytes = 0
        self._sequence += 1

    def write(self, data):
        size = len(data)
        if (self._bytes + size) > self._bytelimit:
            self._rotate()
        self._bytes += size
        self._output.write(data)

out = LimitWriter('wordlist', 1024 * 1024 * 1)

for i in range(MIN,MAX+1):
    for s in xselections(bigList,i):
        out.write(''.join(s) + '\n')

会输出一系列小于1MB的文件:

1.0M  wordlist.000000
1.0M  wordlist.000001
252K  wordlist.000002

更新 - 有关使用Python的一些内置功能的一些提示,可帮助您缩短代码并使其更容易理解。我已经包含了解释每个部分的评论。

以下是我在下面使用的模块的文档:itertoolsstring

import itertools
import os
from string import digits, lowercase, uppercase

# PUT LimitWriter CLASS DEFINITION HERE

LIMIT = 1024 * 1024 * 1

choice = 0
while int(choice) not in range(1,8):
    choice = raw_input('''
    1) Numbers
    2) Capital Letters
    3) Lowercase Letters
    4) Numbers + Capital Letters
    5) Numbers + Lowercase Letters
    6) Numbers + Capital Letters + Lowercase Letters
    7) Capital Letters + Lowercase Letters
    : ''')

MIN = int(raw_input("What is the min size of the word? "))
MAX = int(raw_input("What is the max size of the word? "))

# replace your ranges and large if/else with this
choices = {
    1: digits,
    2: uppercase,
    3: lowercase,
    4: uppercase + lowercase,
    5: digits + lowercase,
    6: digits + uppercase + lowercase,
    7: uppercase + lowercase
    }

# pick one of the sets with the user's choice
chars = choices[int(choice)]

out = LimitWriter('wordlist', LIMIT)

# generate all permutations of the characters from min to max
for length in range(MIN, MAX+1):
    for tmp in itertools.permutations(chars, length):
        out.write(''.join(tmp) + '\n')

答案 1 :(得分:1)

这是最终的工作代码。更改函数generate_wordlist中的变量mbXY以确定每个文件的大小上限,只要它大于此大小。此文件已更新为在Python 3.2下运行

import itertools
import subprocess
import os
from string import digits, ascii_lowercase, ascii_uppercase, punctuation

if os.name == 'nt':
    def clear_console():
        subprocess.call("cls", shell=True)
        return
else:
    def clear_console():
        subprocess.call("clear", shell=True)
        return

def generate_phone_numbers(area_code):
    f = open('phones.txt', 'w')
    for i in range(2010000, 9999999):
        f.write(area_code + str(i) + '\n')

def generate_wordlist(lst_chars, min_digit, max_digit, lst_name):
    mb1   = 1024000
    mb10  = 1024000 * 10
    mb100 = 1024000 * 100
    mb250 = 1024000 * 250
    mb500 = 1024000 * 500
    gb1   = 1024000 * 1000
    file_size_limit = mb10
    out = file_writer(lst_name, file_size_limit)
    for curr_length in range(min_digit, max_digit + 1):
        for curr_digit in itertools.product(lst_chars, repeat=curr_length):
                out.write(''.join(curr_digit) + '\n')

class file_writer(object):

    def __init__(self, basepath, bytelimit):
        self._basepath = basepath
        self._bytelimit = bytelimit
        self._sequence = 0
        self._output = None
        self._bytes = 0
        self._rotate()

    def _rotate(self):
        if self._output:
            self._output.close()
        path = '%s.%06d' % (self._basepath, self._sequence)
        self._output = open(path, 'wb')
        self._bytes = 0
        self._sequence += 1

    def write(self, data):
        size = len(data)
        if (self._bytes + size) > self._bytelimit:
            self._rotate()
        self._bytes += size
        self._output.write(bytes(data, "utf-8"))

choice = 0

while int(choice) not in range(1,6):
    clear_console()
    print ('')
    print ('  wgen - Menu')
    choice = input('''
  1. Phone numbers.
  2. Numbers.
  3. Numbers + Lowercase.
  4. Numbers + Lowercase + Uppercase.
  5. Numbers + Lowercase + Uppercase + Punctuation.

  Enter Option: ''')

print ('')

choice = int(choice)

if choice == 1:
    area_code = input('''
  Please enter Area Code: ''')
    area_code = str(area_code)
    area_code = area_code.strip()
    if len(area_code) == 3:
        print ('')
        print ('  Generating phone numbers for area code ' + area_code + '.')
        print ('  Please wait...')
        generate_phone_numbers(area_code)

if choice == 2:
    min_digit = input('  Minimum digit? ')
    min_digit = int(min_digit)
    print ('')
    max_digit = input('  Maximum digit? ')
    max_digit = int(max_digit)
    lst_chars = digits
    lst_name = 'num'
    print ('')
    print ('  Generating numbers between ' + str(min_digit) + ' and ' + str(max_digit) + ' digits.')
    print ('  Please wait...')
    generate_wordlist(lst_chars, min_digit, max_digit, lst_name)

if choice == 3:
    min_digit = input('  Minimum digit? ')
    min_digit = int(min_digit)
    print ('')
    max_digit = input('  Maximum digit? ')
    max_digit = int(max_digit)
    lst_chars = digits + ascii_lowercase
    lst_name = 'num_low'
    print ('')
    print ('  Generating numbers & lowercase between ' + str(min_digit) + ' and ' + str(max_digit) + ' digits.')
    print ('  Please wait...')
    generate_wordlist(lst_chars, min_digit, max_digit, lst_name)

if choice == 4:
    min_digit = input('  Minimum digit? ')
    min_digit = int(min_digit)
    print ('')
    max_digit = input('  Maximum digit? ')
    max_digit = int(max_digit)
    lst_chars = digits + ascii_lowercase + ascii_uppercase
    lst_name = 'num_low_upp'
    print ('')
    print ('  Generating numbers, lowercase & uppercase between ' + str(min_digit) + ' and ' + str(max_digit) + ' digits.')
    print ('  Please wait...')
    generate_wordlist(lst_chars, min_digit, max_digit, lst_name)

if choice == 5:
    min_digit = input('  Minimum digit? ')
    min_digit = int(min_digit)
    print ('')
    max_digit = input('  Maximum digit? ')
    max_digit = int(max_digit)
    lst_chars = digits + ascii_lowercase + ascii_uppercase + punctuation
    lst_name = 'num_low_upp_pun'
    print ('')
    print ('  Generating numbers, lowercase, uppercase & punctuation between ' + str(min_digit) + ' and ' + str(max_digit) + ' digits.')
    print ('  Please wait...')
    generate_wordlist(lst_chars, min_digit, max_digit, lst_name)