我用Wget下载网页 我想问“是否可以从本地html文件中获取URL?”
我使用python来分析html文件内容。我想打印所有文件的URL。
我正在尝试在此程序中添加更多功能,所以我想如果我可以打印URL跟随结果,那么用户可以轻松点击链接获取网页。
这是我的代码:
def search(self):
keyword = self.entry.get()
mypath = "/Users/Tsu-AngChou/MasterProject/Practice/try_test/"
files = listdir(mypath)
translator = str.maketrans("","",string.punctuation)
count1 = 0
test_list = []
test_list2 = []
for f in files:
fullpath = join(mypath, f)
if f == '.DS_Store':
os.remove(f)
elif isfile(fullpath):
# print(f)
for html_cont in range(1):
response = open(f,'r',encoding='utf-8')
html_cont = response.read()
soup = bs(html_cont, 'html.parser')
regular_string = soup.get_text()
new_string = regular_string.translate(translator).split()
new_list = [item[:14] for item in new_string]
a = dict.fromkeys(new_list, f)
wordfreq = []
c = new_list
for w in c:
wordfreq.append(c.count(w))
fre = dict(zip(c,wordfreq))
sentence= new_list
keyword1= keyword
words = sentence
if keyword in fre:
test_list.append(a[keyword])
test_list2.append(fre[keyword])
count1 = count1+1
for (i, subword) in enumerate(words):
if (subword == keyword1):
test_list3= i+1
for i in range(0,count1-1):
for j in range(0,count1-1-i):
if (test_list2[j]<test_list2[j+1]):
temp=test_list[j]
temp2=test_list2[j]
test_list[j]=test_list[j+1]
test_list2[j]=test_list2[j+1]
test_list[j+1]=temp
test_list2[j+1]=temp2
for i in range(0,count1):
print(keyword, "Filename:", test_list[i], "Frequency:", test_list2[i])
return a
This is my output, and I want to have the link follow every result.