此代码是从https://www.youtube.com/watch?v=XhXOsgdC9h0
复制的import requests
from bs4 import BeautifulSoup
import operator
#get all the words in the website
def all_words(url,):
word_list = []
source_code = requests.get(url).text #connect to url
soup = BeautifulSoup(source_code, "html.parser")
for before_text in soup.find_all("a", {"class": "result-title"}):
content = before_text.string
words = content.lower().split() #trun all words into lower case and split
for each_word in words:
word_list.append(each_word)
clean_up_list(word_list)
#remove all the symbols in each word
def clean_up_list(word_list):
clean_word_list = []
for word in word_list:
symbols = "!@#$%^&*()_+[]\"\-;',./<>?"
for i in range(0, len(symbols)):
word = word.replace(symbols[i], "") #replace all symbols with blank space
if len(word) > 0: #only word with length > 0 can be added
print(word)
clean_word_list.append(word)
create_dictionary(clean_word_list)
#count the frequency of each word in the list
def create_dictionary(clean_word_list):
word_count = {}
for word in clean_word_list:
if word in word_count:
word_count[word] += 1
else:
word_count[word] = 1
for key, value in sorted(word_count.items(), key = operator.itemgetter(1)): #sort the item from the largest value
print (key, value)
all_words("https://cnj.craigslist.org/search/sys")
因此代码只是试图计算网站中每个单词的频率。我不明白我只调用函数all_words
,但实际上它运行代码中的所有函数。它是如何发生的?
谢谢!
答案 0 :(得分:1)
您在函数末尾调用all_words("https://cnj.craigslist.org/search/sys")
的函数末尾调用clean_up_list(word_list)
create_dictionary(clean_word_list)
。
所以你的3个函数被调用。