我正在尝试进行学习以制作易用的eBay列表,我的商品框长为48,但只有26个商品的评价为div,我的得分为IndexError: list index out of range
,如何跳过此行或如何写? item_rating为空,以“ N / A”为例。我尝试继续,但无法修复。实际上,对于这种情况,对于诸如item_shipping,...等不同的变量,这是一个普遍的问题。
已更新
import requests
from bs4 import BeautifulSoup
import pandas as pd
URL='https://www.ebay.com/b/Makeup-Products/31786/bn_1865570' #'https://www.ebay.com/b/Makeup-Products/31786/bn_1865570' #https://www.ebay.com/b/Eye-Makeup/172020/bn_1880663
response=requests.get(URL)
soup= BeautifulSoup(response.content, 'html.parser')
columns=["Name","Price","Rating","Location"]
#Product features
main_table=soup.find('ul',attrs={'class':'b-list__items_nofooter'})
item_boxes=main_table.find_all('div',attrs={'class':'s-item__info clearfix'})
item = item_boxes[0]
df=pd.DataFrame(columns=columns)
for item in item_boxes:
item_name = item.findAll('h3')
try:
item_name_row = item_name[0].text.replace('\n','')
except:
item_name = "N/A"
item_price = item.find_all('span',{'class':'s-item__price'})
try:
item_price_row = item_price[0].text.replace('\n','')
except:
item_price_row = "N/A"
try:
item_rating = item.findAll('div',{'class':'s-item__reviews'})[0].div
item_rating_row = item_rating.text
except:
item_rating_row = None
try:
item_location = item_location = item.find_all('span',{'class':'s-item__location s-item__itemLocation'})[0]
item_location_row = item_location.text
except:
item_location_row = None
row = [ item_name_row, item_price_row, item_rating_row, item_location_row ]
df =df.append(pd.Series(row,index=columns),ignore_index=True)
df.to_csv('ebay1.csv', index=False)
if item_rating != None:
row = [item_name[0].text.replace('\n','') for name in item_name] + [item_price[0].text.replace('\n','') for price in item_price] + [item_rating.text.replace('\n','') for rating in item_rating] + [item_location_row[0].replace('\n','') for location in item_location]
elif item_location != None:
row = [item_name[0].text.replace('\n','') for name in item_name] + [item_price[0].text.replace('\n','') for price in item_price] + [item_rating.text.replace('\n','') for rating in item_rating] + [item_location_row[0].replace('\n','') for location in item_location]
else:
row = [ item_name[0].text.replace('\n','') for name in item_name] + [item_price[0].text.replace('\n','') for price in item_price] + [item_rating] + [item_location_row]
df =df.append(pd.Series(row,index=columns),ignore_index=True)
df.to_csv('ebay4.csv', index=False)
答案 0 :(得分:1)
在这里,这是没有评分的列表,没有评分:
import requests
from bs4 import BeautifulSoup
import pandas as pd
URL='https://www.ebay.com/b/Makeup-Products/31786/bn_1865570'
response=requests.get(URL)
soup= BeautifulSoup(response.content, 'html.parser')
columns=['name',"price","rating"]
#Product features
main_table=soup.find('ul',attrs={'class':'b-list__items_nofooter'})
item_boxes=main_table.find_all('div',attrs={'class':'s-item__info clearfix'})
item = item_boxes[0]
df=pd.DataFrame(columns=columns)
for item in item_boxes:
item_name = item.findAll('h3')
item_price = item.find_all('span',{'class':'s-item__price'})
try:
item_rating = item.findAll('div',{'class':'s-item__reviews'})[0].div
except:
item_rating = None
if item_rating != None:
row = [item_name[0].text.replace('\n','') for name in item_name] + [item_price[0].text.replace('\n','') for price in item_price] + [item_rating.text.replace('\n','') for rating in item_rating]
else:
row = [ item_name[0].text.replace('\n','') for name in item_name] + [item_price[0].text.replace('\n','') for price in item_price] + [item_rating]
df =df.append(pd.Series(row,index=columns),ignore_index=True)
df.to_csv('ebay1.csv', index=False)
这是我使用的一种,是您的一种修改形式,用于为邻居罩废弃重新着色数据:
import requests
from bs4 import BeautifulSoup
import pandas as pd
URL='https://www.recolorado.com/find-real-estate/80817/1-pg/exclusive-dorder/price-dorder/photo-tab/'
response=requests.get(URL)
soup= BeautifulSoup(response.content, 'html.parser')
columns=['address',"price","active","bedrooms","bathrooms","sqft","courtesy"]
#Product features
main_table=soup.find('div',attrs={'class':'page--column', 'data-id':'listing-results'})
item_boxes=main_table.find_all('div',attrs={'class':'listing--information listing--information__photo'})
df=pd.DataFrame(columns=columns)
for item in item_boxes:
price = item.find('li', attrs={'class': 'listing--detail listing--detail__photo listing--detail__price'})
price_row = price.text.replace('\r','').replace('\n','').replace(' ', '')
#print(price_row)
address = item.find('h2', attrs={'class': 'listing--street listing--street__photo'})
address_row = address.text.replace(', ', '')
#print(address_row)
active_listing = item.find('div', attrs={'class': 'listing--status listing--status__photo listing--status__Under Contract'})
try:
active_row = active_listing.text
except:
active_row = "N/A"
#print(active_row)
bedrooms = item.find('li', attrs={'class': 'listing--detail listing--detail__photo listing--detail__bedrooms'})
try:
bedrooms_row = bedrooms.text.replace('\r','').replace('\n','').replace(' ', '')
except:
bedrooms_row = "N/A"
#print(bedrooms_row)
bathrooms = item.find('li', attrs={'class': 'listing--detail listing--detail__photo listig--detail__bathrooms'})
try:
bathrooms_row = bathrooms.text.replace('\r','').replace('\n','').replace(' ', '')
except:
bathrooms_row = "N/A"
#print(bathrooms_row)
sqft = item.find('li', attrs={'class': 'listing--detail listing--detail__photo listing--detail__sqft'})
try:
sqft = item.find('li', attrs={'class': 'listing--detail listing--detail__photo listing--detail__sqft'})
sqft_row = sqft.text.replace('\r','').replace('\n','').replace(' ', '')
except:
sqft_row = "N/A"
#print(sqft_row)
courtesy = item.find('div', attrs={'class': 'listing--courtesy listing--courtesy__photo show-mobile'})
try:
courtesy_row = courtesy.text.replace('\r','').replace('\n','').replace(' ', '')
except:
courtesy_row = "N/A"
#print(courtesy_row)
row = [ address_row, price_row, active_row, bedrooms_row, bathrooms_row, sqft_row, courtesy_row ]
df =df.append(pd.Series(row,index=columns),ignore_index=True)
df
# address price active bedrooms bathrooms sqft courtesy
#0 6920 South US Highway 85-87 $699,000 N/A 5Bedrooms● 4Bathrooms● 3,978Sqft CourtesyofColdwellBankerResidentialBK
#1 7095 Prado Drive $414,900 Under Contract 9Bedrooms● 4Bathrooms● 3,000Sqft CourtesyofKellerWilliamsClientsChoiceRealty
#2 7941 Whistlestop Lane $399,500 N/A 3Bedrooms● 3Bathrooms● 2,577Sqft CourtesyofRE/MAXRealEstateGroupInc
#3 7287 Van Wyhe Court $389,900 Under Contract 4Bedrooms● 3Bathrooms● 2,750Sqft CourtesyofPinkRealty
#4 10737 Hidden Prairie Parkway $369,900 Under Contract 4Bedrooms● 3Bathrooms● 2,761Sqft CourtesyofKellerWilliamsPartnersRealty
#5 7327 Van Wyhe Court $362,400 N/A 3Bedrooms● 2Bathrooms● 1,640Sqft CourtesyofPinkRealty
#6 7354 Chewy Court $359,000 N/A 3Bedrooms● 2Bathrooms● 1,680Sqft CourtesyofRedWhiteAndBlueRealtyGroupInc
#7 238 West Iowa Avenue $355,000 N/A N/A 4Bathrooms● 1,440Sqft CourtesyofAllenRealty
#8 8181 Wagon Spoke Trail $350,000 Under Contract 4Bedrooms● 3Bathrooms● 2,848Sqft CourtesyofKellerWilliamsPremierRealty,LLC
#9 0 Missouri $350,000 N/A N/A N/A N/A CourtesyofRE/MAXNORTHWESTINC
#10 10817 Hidden Prairie Parkway $340,000 Under Contract 3Bedrooms● 3Bathrooms● 2,761Sqft CourtesyofKellerWilliamsPartnersRealty
#11 8244 Campground Drive $335,000 Under Contract 4Bedrooms● 3Bathrooms● 2,018Sqft CourtesyofPinkRealty
我会尽快在这里重新尝试一个ebay网站,如果您还有其他链接,请在评论中留下它,我很高兴看到我是否可以报废
更新:
在另一页上对此进行了尝试,并且有效
import requests
from bs4 import BeautifulSoup
import pandas as pd
URL='https://www.ebay.com/b/Eye-Makeup/172020/bn_1880663' #'https://www.ebay.com/b/Makeup-Products/31786/bn_1865570'
response=requests.get(URL)
soup= BeautifulSoup(response.content, 'html.parser')
columns=['name',"price","rating"]
#Product features
main_table=soup.find('ul',attrs={'class':'b-list__items_nofooter'})
item_boxes=main_table.find_all('div',attrs={'class':'s-item__info clearfix'})
item = item_boxes[0]
df=pd.DataFrame(columns=columns)
for item in item_boxes:
item_name = item.findAll('h3')
try:
item_name_row = item_name[0].text.replace('\n','')
except:
item_name = "N/A"
item_price = item.find_all('span',{'class':'s-item__price'})
try:
item_price_row = item_price[0].text.replace('\n','')
except:
item_price_row = "N/A"
try:
item_rating = item.findAll('div',{'class':'s-item__reviews'})[0].div
item_rating_row = item_rating.text
except:
item_rating_row = None
row = [ item_name_row, item_price_row, item_rating_row ]
df =df.append(pd.Series(row,index=columns),ignore_index=True)
df.to_csv('ebay1.csv', index=False)
if item_rating != None:
row = [item_name[0].text.replace('\n','') for name in item_name] + [item_price[0].text.replace('\n','') for price in item_price] + [item_rating.text.replace('\n','') for rating in item_rating]
else:
row = [ item_name[0].text.replace('\n','') for name in item_name] + [item_price[0].text.replace('\n','') for price in item_price] + [item_rating]
df =df.append(pd.Series(row,index=columns),ignore_index=True)
df.to_csv('ebay1.csv', index=False)