我目前正在使用此代码从网站抓取一组值:
#import libraries
import requests
from bs4 import BeautifulSoup
import numpy as np
for reviewtype in soup2.findAll('div', {'row_label'}):
if 'Excellent' or 'Very Good' or 'Average' or 'Poor' or 'Terrible' in reviewtype.text:
reviewtype.text.strip()
reviewbar = reviewtype.findNext('span')
reviewfill = reviewbar.findNext('span')
reviewnum = reviewfill.findNext('span')
reviewnumlist = reviewnum.text.encode('ascii', 'ignore').strip().split(r'</span>')
print np.array(reviewnumlist)
它提取数据如下:
['254']
['30']
['5']
['5']
['2']
我希望它能像这样出现:
['254'], ['30'], ['5'], ['5'], ['2']
但我目前的方法并不奏效。任何见解都会有所帮助
答案 0 :(得分:0)
创建一个空列表,在最后添加相关项目并print
,而不是在forloop
内单独打印对象。
#import libraries
import requests
from bs4 import BeautifulSoup
import numpy as np
result = []
for reviewtype in soup2.findAll('div', {'row_label'}):
if 'Excellent' or 'Very Good' or 'Average' or 'Poor' or 'Terrible' in reviewtype.text:
reviewtype.text.strip()
reviewbar = reviewtype.findNext('span')
reviewfill = reviewbar.findNext('span')
reviewnum = reviewfill.findNext('span')
reviewnumlist = reviewnum.text.encode('ascii', 'ignore').strip().split(r'</span>')
result.append(revirenumlist)
print result