createFile()---将在'a1.txt'和'a2.txt'中生成随机字符串 findCommon2-在两个文件中查找公用。 适用于1000万个数据,但想要1亿个数据。 可以使用哈希???
import string
import random
import time
import hashlib
def createFile(name,size):
start = time.time()
pattern = string.ascii_lowercase + string.digits + string.ascii_uppercase
f1 = open(name,'w')
for i in range(size):
word = ''.join(random.choices(pattern,k=10))
f1.write(word)
f1.write('\n')
f1.close()
stop = time.time()
print("Gen Time",stop-start)
def findCommon2(f1, f2):
start = time.time()
with open(f1,'r') as file1:
with open(f2, 'r') as file2:
common = set(file1).intersection(file2)
# print(file1)
stop = time.time()
print("Comparision Time",stop-start)
print(common)
f1 = 'a1.txt'
f2 = 'a2.txt'
createFile(f1, size)
createFile(f2, size)
findCommon(f1, f2)