我试图让这个python脚本创建一个新文件,并在达到某个文件大小后继续生成单词组合。
f=open('wordlist', 'w')
def xselections(items, n):
if n==0: yield []
else:
for i in xrange(len(items)):
for ss in xselections(items, n-1):
yield [items[i]]+ss
# Numbers = 48 - 57
# Capital = 65 - 90
# Lower = 97 - 122
numb = range(48,58)
cap = range(65,91)
low = range(97,123)
choice = 0
while int(choice) not in range(1,8):
choice = raw_input('''
1) Numbers
2) Capital Letters
3) Lowercase Letters
4) Numbers + Capital Letters
5) Numbers + Lowercase Letters
6) Numbers + Capital Letters + Lowercase Letters
7) Capital Letters + Lowercase Letters
: ''')
choice = int(choice)
poss = []
if choice == 1:
poss += numb
elif choice == 2:
poss += cap
elif choice == 3:
poss += low
elif choice == 4:
poss += numb
poss += cap
elif choice == 5:
poss += numb
poss += low
elif choice == 6:
poss += numb
poss += cap
poss += low
elif choice == 7:
poss += cap
poss += low
bigList = []
for i in poss:
bigList.append(str(chr(i)))
MIN = raw_input("What is the min size of the word? ")
MIN = int(MIN)
MAX = raw_input("What is the max size of the word? ")
MAX = int(MAX)
for i in range(MIN,MAX+1):
for s in xselections(bigList,i): f.write(''.join(s) + '\n')
答案 0 :(得分:4)
您可以将文件旋转行为封装在类中。当您编写一些数据时,write方法将首先检查写入是否超出文件大小限制;然后它调用rotate方法关闭当前文件并打开一个新文件,递增文件名上的序列号:
import os
class LimitWriter(object):
def __init__(self, basepath, bytelimit):
self._basepath = basepath
self._bytelimit = bytelimit
self._sequence = 0
self._output = None
self._bytes = 0
self._rotate()
def _rotate(self):
if self._output:
self._output.close()
path = '%s.%06d' % (self._basepath, self._sequence)
self._output = open(path, 'wb')
self._bytes = 0
self._sequence += 1
def write(self, data):
size = len(data)
if (self._bytes + size) > self._bytelimit:
self._rotate()
self._bytes += size
self._output.write(data)
out = LimitWriter('wordlist', 1024 * 1024 * 1)
for i in range(MIN,MAX+1):
for s in xselections(bigList,i):
out.write(''.join(s) + '\n')
会输出一系列小于1MB的文件:
1.0M wordlist.000000
1.0M wordlist.000001
252K wordlist.000002
更新 - 有关使用Python的一些内置功能的一些提示,可帮助您缩短代码并使其更容易理解。我已经包含了解释每个部分的评论。
以下是我在下面使用的模块的文档:itertools,string。
import itertools
import os
from string import digits, lowercase, uppercase
# PUT LimitWriter CLASS DEFINITION HERE
LIMIT = 1024 * 1024 * 1
choice = 0
while int(choice) not in range(1,8):
choice = raw_input('''
1) Numbers
2) Capital Letters
3) Lowercase Letters
4) Numbers + Capital Letters
5) Numbers + Lowercase Letters
6) Numbers + Capital Letters + Lowercase Letters
7) Capital Letters + Lowercase Letters
: ''')
MIN = int(raw_input("What is the min size of the word? "))
MAX = int(raw_input("What is the max size of the word? "))
# replace your ranges and large if/else with this
choices = {
1: digits,
2: uppercase,
3: lowercase,
4: uppercase + lowercase,
5: digits + lowercase,
6: digits + uppercase + lowercase,
7: uppercase + lowercase
}
# pick one of the sets with the user's choice
chars = choices[int(choice)]
out = LimitWriter('wordlist', LIMIT)
# generate all permutations of the characters from min to max
for length in range(MIN, MAX+1):
for tmp in itertools.permutations(chars, length):
out.write(''.join(tmp) + '\n')
答案 1 :(得分:1)
这是最终的工作代码。更改函数generate_wordlist中的变量mbXY以确定每个文件的大小上限,只要它大于此大小。此文件已更新为在Python 3.2下运行
import itertools
import subprocess
import os
from string import digits, ascii_lowercase, ascii_uppercase, punctuation
if os.name == 'nt':
def clear_console():
subprocess.call("cls", shell=True)
return
else:
def clear_console():
subprocess.call("clear", shell=True)
return
def generate_phone_numbers(area_code):
f = open('phones.txt', 'w')
for i in range(2010000, 9999999):
f.write(area_code + str(i) + '\n')
def generate_wordlist(lst_chars, min_digit, max_digit, lst_name):
mb1 = 1024000
mb10 = 1024000 * 10
mb100 = 1024000 * 100
mb250 = 1024000 * 250
mb500 = 1024000 * 500
gb1 = 1024000 * 1000
file_size_limit = mb10
out = file_writer(lst_name, file_size_limit)
for curr_length in range(min_digit, max_digit + 1):
for curr_digit in itertools.product(lst_chars, repeat=curr_length):
out.write(''.join(curr_digit) + '\n')
class file_writer(object):
def __init__(self, basepath, bytelimit):
self._basepath = basepath
self._bytelimit = bytelimit
self._sequence = 0
self._output = None
self._bytes = 0
self._rotate()
def _rotate(self):
if self._output:
self._output.close()
path = '%s.%06d' % (self._basepath, self._sequence)
self._output = open(path, 'wb')
self._bytes = 0
self._sequence += 1
def write(self, data):
size = len(data)
if (self._bytes + size) > self._bytelimit:
self._rotate()
self._bytes += size
self._output.write(bytes(data, "utf-8"))
choice = 0
while int(choice) not in range(1,6):
clear_console()
print ('')
print (' wgen - Menu')
choice = input('''
1. Phone numbers.
2. Numbers.
3. Numbers + Lowercase.
4. Numbers + Lowercase + Uppercase.
5. Numbers + Lowercase + Uppercase + Punctuation.
Enter Option: ''')
print ('')
choice = int(choice)
if choice == 1:
area_code = input('''
Please enter Area Code: ''')
area_code = str(area_code)
area_code = area_code.strip()
if len(area_code) == 3:
print ('')
print (' Generating phone numbers for area code ' + area_code + '.')
print (' Please wait...')
generate_phone_numbers(area_code)
if choice == 2:
min_digit = input(' Minimum digit? ')
min_digit = int(min_digit)
print ('')
max_digit = input(' Maximum digit? ')
max_digit = int(max_digit)
lst_chars = digits
lst_name = 'num'
print ('')
print (' Generating numbers between ' + str(min_digit) + ' and ' + str(max_digit) + ' digits.')
print (' Please wait...')
generate_wordlist(lst_chars, min_digit, max_digit, lst_name)
if choice == 3:
min_digit = input(' Minimum digit? ')
min_digit = int(min_digit)
print ('')
max_digit = input(' Maximum digit? ')
max_digit = int(max_digit)
lst_chars = digits + ascii_lowercase
lst_name = 'num_low'
print ('')
print (' Generating numbers & lowercase between ' + str(min_digit) + ' and ' + str(max_digit) + ' digits.')
print (' Please wait...')
generate_wordlist(lst_chars, min_digit, max_digit, lst_name)
if choice == 4:
min_digit = input(' Minimum digit? ')
min_digit = int(min_digit)
print ('')
max_digit = input(' Maximum digit? ')
max_digit = int(max_digit)
lst_chars = digits + ascii_lowercase + ascii_uppercase
lst_name = 'num_low_upp'
print ('')
print (' Generating numbers, lowercase & uppercase between ' + str(min_digit) + ' and ' + str(max_digit) + ' digits.')
print (' Please wait...')
generate_wordlist(lst_chars, min_digit, max_digit, lst_name)
if choice == 5:
min_digit = input(' Minimum digit? ')
min_digit = int(min_digit)
print ('')
max_digit = input(' Maximum digit? ')
max_digit = int(max_digit)
lst_chars = digits + ascii_lowercase + ascii_uppercase + punctuation
lst_name = 'num_low_upp_pun'
print ('')
print (' Generating numbers, lowercase, uppercase & punctuation between ' + str(min_digit) + ' and ' + str(max_digit) + ' digits.')
print (' Please wait...')
generate_wordlist(lst_chars, min_digit, max_digit, lst_name)