我正在使用代码通过网络抓取客户评论。一切都按照我想要的代码工作,但是我无法正确获得评级的类或属性,因此代码始终为Ratings
列返回空白结果。
有人可以帮助我找到正确的属性并修复Ratings
代码行吗?
from bs4 import BeautifulSoup
import requests
import pandas as pd
import json
print ('all imported successfuly')
# Initialize an empty dataframe
df = pd.DataFrame()
for x in range(1, 37):
names = []
headers = []
bodies = []
ratings = []
published = []
updated = []
reported = []
link = (f'https://www.trustpilot.com/review/fabfitfun.com?page={x}')
print (link)
req = requests.get(link)
content = req.content
soup = BeautifulSoup(content, "lxml")
articles = soup.find_all('article', {'class':'review'})
for article in articles:
names.append(article.find('div', attrs={'class': 'consumer-information__name'}).text.strip())
headers.append(article.find('h2', attrs={'class':'review-content__title'}).text.strip())
try:
bodies.append(article.find('p', attrs={'class':'review-content__text'}).text.strip())
except:
bodies.append('')
try:
#ratings.append(article.find('div', attrs={'class':'star-rating star-rating--medium'}).text.strip())
ratings.append(article.find('div', attrs={'class': 'star-rating star-rating--medium'})['alt'])
except:
ratings.append('')
dateElements = article.find('div', attrs={'class':'review-content-header__dates'}).text.strip()
jsonData = json.loads(dateElements)
published.append(jsonData['publishedDate'])
updated.append(jsonData['updatedDate'])
reported.append(jsonData['reportedDate'])
# Create your temporary dataframe of the first iteration, then append that into your "final" dataframe
temp_df = pd.DataFrame({'User Name': names, 'Header': headers, 'Body': bodies, 'Rating': ratings, 'Published Date': published, 'Updated Date':updated, 'Reported Date':reported})
df = df.append(temp_df, sort=False).reset_index(drop=True)
print ('pass1')
df.to_csv('FabfitfunReviews007.csv', index=False, encoding='utf-8')
print ('excel done')
答案 0 :(得分:3)
只需在您的代码中更改此行:
#include <iostream>
#include <fstream>
#include <vector>
#include <fstream>
#include <iterator>
#include <iomanip>
using namespace std;
int main () {
//open file and get size
streampos begin,end;
ifstream myfile ("TRPTRANS.DAT", ios::binary);
begin = myfile.tellg();
myfile.seekg (0, ios::end);
end = myfile.tellg();
int n;
n=(end-begin);
cout << "size is: " << n<<endl;
//read file
vector<char> randomBytes(n);
myfile.read(&randomBytes[0], n);
//display bytes
for (auto& el : randomBytes)
cout << setfill('0') << setw(2) << hex << (0xff & (unsigned int)el);
cout << '\n';
return 0;
}
ratings.append(article.find_all("img", alt=True)[0]["alt"])
然后输出到:
df.Rating
在文章中找到0 1 star: Bad
1 5 stars: Excellent
2 5 stars: Excellent
3 5 stars: Excellent
4 5 stars: Excellent
5 5 stars: Excellent
6 5 stars: Excellent
标签并从其中检索替代文本似乎更容易。