if __name__ == '__main__':
website_path = GetValidDirectory("Enter the path to the website : ")
websitelink_list = [] #List of tuples containing the source link information
print(websitelink_list)
htmlfiles = [] #List of the html files we analyze on the site
for dirname, subdir_list, file_list in os.walk(website_path):
for file in file_list:
#Check the file extension. Only analyze files that are .html
filename, fileext = os.path.splitext(file)
if fileext == ".html":
relative_file = MakeFullPath(dirname, file)
full_file = os.path.realpath(relative_file)
htmlfiles.append( (full_file, relative_file) )
filelinks = FileLinkTuples(dirname, file)
print(filelinks)
websitelink_list.extend(filelinks)
#Files that do not have a link to them.
功能:
def GetValidDirectory(prompt : str) -> str:
""" Returns a valid directory entered from the user.
Arguments :
prompt : str - The text displayed to the user asking them to enter a directory.
Returns : String of a valid directory that the user entered.
Errors : Displays a warning if the user does not enter a valid directory.
"""
while True:
try:
str_prompt = input(prompt)
prompt_dir = os.path.realpath(str_prompt)
ch_dir = os.chdir(str_prompt)
except IOError:
print("Please enter a valid directory")
continue
return prompt_dir
def MakeFullPath(path : str, filename : str) -> str:
""" combines path and filename for a full filename. If path does not end in \\ then it appends it
Arguments :
path : str, the path to the file
filename : str, the name of the file.
Returns :
path concatenated with filename.
If path does not end with a backslash, then append one if path has data in it.
"""
no_data = []
if os.listdir(path) != no_data:
relative_file = os.path.join(path,filename)
return relative_file
def FileLinkTuples(path : str, filename : str) -> list:
"""Returns a tuple of links from the file
Arguments :
path : str - The path to the filename. Might be relative path.
filename : str - The name of the html file to open and analyze.
Returns : list of tuples.
Each tuple has the 4 values.
HTMLFile - HTML file and path to the source file the link was found in.
fulllinkpath - Full path to the link on the system. Not a relative path.
linkpath - Path and name to the link specified. Relative address.
file exists - Boolean indicating if the file at the full link path exists.
Example
[ (('sample\\index.html', 'C:\\Website Analysis\\downloads.html', 'downloads.html', True)
]
"""
filelink_list = []
a = path.split('\\')
b = os.path.join(a[-1],filename)
c = os.path.realpath(filename)
if os.path.isfile(filename) == "True":
filelink_list.append((b,c,filename,"True"))
return filelink_list
有人可以分析我的代码并告诉我为什么(full_file,relative_file)和filelinks的类型不会附加到主程序中的空列表中?
对于这项任务,我获得了Unittest。这些功能通过了测试,但我不知道为什么主程序中的列表没有更新?
当我打印websitelink_list和htmlfiles时,它们都是空的。
我对def FileLinkTuples做错了什么?特别是
if os.path.isfile(filename) == "True":
filelink_list.append((b,c,filename,"True"))
由于
答案 0 :(得分:0)
据我所知,您的代码至少会为htmlfiles
产生非空的结果。我看到的主要问题是:
if os.path.isfile(filename) == "True":
该函数返回一个布尔值,而不是一个字符串,因此您应该检查布尔值True
,或者甚至完全省略它:
if os.path.isfile(filename):
修复之后,如果您的目录确实包含websitelink_list
个文件,那么您应该期待*.html
中的某些内容。
现在,如果您根本没有在htmlfiles
中看到任何内容(即使目录中包含*.html
个文件),那么它必须是您未在此处显示的其他内容,因为它似乎在我的电脑上正常工作。