试图从每个进行中的页面获取评论我只获得第一页的评论。但是,每个进程网址都正在循环播放,评论不会被删除。
我导入了所有必要的包。
我的代码是:
for i in range(0,700,5):
url2 = 'https://www.tripadvisor.com/Hotel_Review-g35805-d87576-Reviews-or' + str(i) + '-Travelodge_Hotel_Downtown_Chicago-Chicago_Illinois.html'
browser = webdriver.Chrome()
#opens URL in chrome
browser.get(url2)
print(url2)
#Stalls load to make it look like human browsing
time.sleep(5) # seconds
html = browser.page_source
#create soup object
soup = BeautifulSoup(html, "lxml")
for r in soup.find_all('div', 'reviewSelector'):
Review = r.p.text
#splits up list by word
words = word_tokenize(review)
#noramlizes words in list to lowercase
clean_words = [word.lower() for word in words if word not in set(string.punctuation)]
#stopwords object
english_stops = set(stopwords.words('english'))
#gets the words; not including stopwords
clean_words = [word for word in clean_words if word not in english_stops]
#Make an easier wordnetlemmatizer object for ease of use
wordnet_lemmatizer = WordNetLemmatizer()
#reduce variation in the words
lemma_list = []
lemma_list = [wordnet_lemmatizer.lemmatize(word) for word in clean_words]
#append reviews to the list
reviews.append(lemma_list)
print(lemma_list)
使用selenium和webdriver.chrome()打开网页。