我的代码当前解压缩一个zip文件夹并遍历该文件夹中的每个文件。但是,我需要它来读取该zip文件夹中名为“ file.txt”的特定文件名。我尝试仅从zip文件夹中读取“ file.txt”,但最终运行了该文件夹中的所有文件。有什么建议么?我的代码输出到一个文本文件,我需要读取的只是“ file.txt”,但它读取所有以“ .txt”结尾的文件,如实际结果所示
import re
import os
from zipfile import ZipFile
def pan():
print("Extract single file from ZIP")
with ZipFile('input_files.zip', 'r') as zipObj:
listOfFileNames = zipObj.namelist()
#zipObj.extractall()
for fileName in listOfFileNames:
if fileName.endswith('.txt'):
zipObj.read(fileName, 'file.txt')
##def main():
comma_string = ', '
outfile = "output2.txt" #this will be the filename that the code will write to
wp_string = " White Space Detected"
tab_string = " tab detected"
mc_string = " Missing carriage return"
ne_string = " No Error" #If there is no whitespace,tabs, then there is no error
baconFile = open(outfile,"wt")
print('Filename\tLine\tnumber of numbers\tstring separated by a comma\twhite space found\ttab found\tcarriage return found\n') #This prints the master column in the python shell and this is the way the code should collect the data
baconFile.write('Filename\tLine\tnumber of numbers\tstring separated by a comma\twhite space found\ttab found\tcarriage return found\n') #This prints the master column in the output file and this is the way the code should collect the data
for filename in os.listdir(os.getcwd() + "/input_files"):
with open("input_files/" + filename, 'r') as f:
output_contents(filename, f, baconFile)
baconFile.close() #closes the for loop that the code is writing to
def output_contents(filename, f, baconFile): #using open() function to open the file inside the directory
index = 0
for line in f:
#create a list of all of the numerical values in our line
content = line.split(',') #this will be used to count the amount numbers before and after comma
whitespace_found = False
tab_found = False
false_string = "False (end of file)"
carriage_found = false_string
sigfigs = ""
index += 1 #adds 1 for every line if it finds what the command wants
if " " in line: #checking for whitespace
whitespace_found = True
if "\t" in line: #checking for tabs return
tab_found = True
if '\n' in line: #checking if there is a newline after the end of each line
carriage_found = True
sigfigs = (','.join(str(len(g)) for g in re.findall(r'\d+\.?(\d+)?', line ))) #counts the sigsfigs after decimal point
print(filename + "\t{0:<4}\t{1:<17}\t{2:<27}\t{3:17}\t{4:9}\t{5:21}"
.format(index, len(content), sigfigs, str(whitespace_found), str(tab_found), str(carriage_found))) #whatever is inside the .format() is the way it the data is stored into
baconFile.write('\n')
baconFile.write( filename + "\t{0:<4}\t{1:<17}\t{2:<27}\t{3:17}\t{4:9}\t{5:21}"
.format(index, len(content), sigfigs, str(whitespace_found), str(tab_found), str(carriage_found)))
if __name__ == '__main__':
#main()
pan()
预期:
Filename Line number of numbers string separated by a comma white space found tab found carriage return found
file.txt 1 3 0,3,4 False False True
file.txt 2 3 0,0,1 True True True
file.txt 3 3 7,3,0,1 False False True
file.txt 4 1 1 False False True
file.txt 5 3 5,0,1 False False False (end of file)
实际:
Filename Line number of numbers string separated by a comma white space found tab found carriage return found
alot.txt 1 3 0,3,4 False False True
alot.txt 2 3 0,0,1 True True True
alot.txt 3 3 7,3,0,1 False False True
alot.txt 4 1 1 False False True
alot.txt 5 3 5,0,1 False False False (end of file)
file.txt 1 3 0,3,4 False False True
file.txt 2 3 0,0,1 True True True
file.txt 3 3 5,0,1 False False False (end of file)
file_2.txt 1 3 0,3,4 False False True
file_2.txt 2 3 0,0,1 True True True
file_2.txt 3 3 5,0,1 False False False (end of file)
INCOME.txt 1 3 0,3,4 False False True
INCOME.txt 2 3 0,0,1 True True True
INCOME.txt 3 3 5,0,1 False False True
INCOME.txt 4 3 4,3,0 True True True
INCOME.txt 5 3 7,3,0,1 False False True
INCOME.txt 6 1 1 False False False (end of file)
答案 0 :(得分:0)
您没有提取任何文件或文件夹,因为您的 pan 功能不正确。 [Python 3.Docs]: zipfile - ZipFile.read(name, pwd=None)收到2个参数(成员名和(可选)密码),但是您传递的内容是错误的。
此版本的 pan 将从<中提取任何 file.txt (在所有 dir 中)。 em> .zip 存档,并将其保存在当前的 dir 中(及其路径在存档中):
def pan(archive_name="input_files.zip", member_base_name="file.txt"):
print("Extract files by name from ZIP")
with ZipFile(archive_name, "r") as zip_file:
for name in zip_file.namelist():
if name.split("/")[-1] == member_base_name:
print("Handling: {0:s}".format(name))
zip_file.extract(name)
#zip_file.read(name) # This line is useless if doing nothing with the file content.
@ EDIT0 :
获得更多详细信息后,这是 pan 函数的更新版本(我删除了所有未使用的变量):
def pan():
print("Extract single file from ZIP")
archive_name = "input_files.zip"
file_name = "file.txt"
lines = []
with ZipFile(archive_name, "r") as zip_file:
for member_name in zip_file.namelist():
if member_name == file_name:
lines = zip_file.read(member_name).decode().split("\n")
bacon_file_name = "output2.txt"
heading = "Filename\tLine\tnumber of numbers\tstring separated by a comma\twhite space found\ttab found\tcarriage return found\n"
print(heading)
with open(bacon_file_name, "wt") as bacon_file:
bacon_file.write(heading)
output_contents(file_name, lines, bacon_file)