Question

if __name__ == '__main__':
website_path = GetValidDirectory("Enter the path to the website : ")

websitelink_list = []   #List of tuples containing the source link information
print(websitelink_list)
htmlfiles = []          #List of the html files we analyze on the site

for dirname, subdir_list, file_list in os.walk(website_path):

    for file in file_list:

        #Check the file extension.  Only analyze files that are .html
        filename, fileext = os.path.splitext(file)
        if fileext == ".html":
            relative_file = MakeFullPath(dirname, file)

            full_file = os.path.realpath(relative_file)

            htmlfiles.append( (full_file, relative_file) )

            filelinks = FileLinkTuples(dirname, file)
            print(filelinks)
            websitelink_list.extend(filelinks)


#Files that do not have a link to them.

功能：

def GetValidDirectory(prompt : str) -> str:
""" Returns a valid directory entered from the user.

     Arguments :
          prompt : str - The text displayed to the user asking them to enter a directory.
     Returns : String of a valid directory that the user entered.
     Errors  : Displays a warning if the user does not enter a valid directory.
"""

while True:
    try:
        str_prompt = input(prompt)
        prompt_dir = os.path.realpath(str_prompt)
        ch_dir = os.chdir(str_prompt)
    except IOError:
        print("Please enter a valid directory")
        continue
    return prompt_dir

def MakeFullPath(path : str, filename : str) -> str:
""" combines path and filename for a full filename.  If path does not end in \\ then it appends it

     Arguments : 
          path : str, the path to the file
          filename : str, the name of the file.
     Returns : 
          path concatenated with filename.  
             If path does not end with a backslash, then append one if path has data in it.

"""
no_data = []
if os.listdir(path) != no_data:
   relative_file = os.path.join(path,filename)
return relative_file


def FileLinkTuples(path : str, filename : str) -> list:
"""Returns a tuple of links from the file 
     Arguments :
          path : str - The path to the filename.  Might be relative path.
          filename : str - The name of the html file to open and analyze.
     Returns : list of tuples.  
          Each tuple has the 4 values. 
               HTMLFile - HTML file and path to the source file the link was found in.
               fulllinkpath - Full path to the link on the system.  Not a relative path.
               linkpath - Path and name to the link specified.  Relative address. 
               file exists - Boolean indicating if the file at the full link path exists.
           Example 
                [ (('sample\\index.html', 'C:\\Website Analysis\\downloads.html', 'downloads.html', True)
                ]

"""

filelink_list = []
a = path.split('\\')
b = os.path.join(a[-1],filename)
c = os.path.realpath(filename)
if os.path.isfile(filename) == "True":
    filelink_list.append((b,c,filename,"True"))
return filelink_list

有人可以分析我的代码并告诉我为什么（full_file，relative_file）和filelinks的类型不会附加到主程序中的空列表中？

对于这项任务，我获得了Unittest。这些功能通过了测试，但我不知道为什么主程序中的列表没有更新？

当我打印websitelink_list和htmlfiles时，它们都是空的。

我对def FileLinkTuples做错了什么？特别是

if os.path.isfile(filename) == "True":
    filelink_list.append((b,c,filename,"True"))

由于

Answer 1

据我所知，您的代码至少会为htmlfiles产生非空的结果。我看到的主要问题是：

if os.path.isfile(filename) == "True":

该函数返回一个布尔值，而不是一个字符串，因此您应该检查布尔值True，或者甚至完全省略它：

if os.path.isfile(filename):

修复之后，如果您的目录确实包含websitelink_list个文件，那么您应该期待*.html中的某些内容。

现在，如果您根本没有在htmlfiles中看到任何内容（即使目录中包含*.html个文件），那么它必须是您未在此处显示的其他内容，因为它似乎在我的电脑上正常工作。

将元组添加到空列表中

1 个答案: