我想用BeautifulSoup解析HTML表。我的问题是我只得到最后一行的值。 feature_list.append(features)为每一行添加一个字典。最后,feature_list中的每个字典都具有相同的值,但是您可以看到print(features ['Code'])为每一行提供“代码”。
有人可以帮我吗?
from bs4 import BeautifulSoup
from bs4.element import Tag
import numpy as np
import pandas as pd
import requests
def read_item_list_view(url):
html_doc = requests.get(url, timeout=5).content
soup = BeautifulSoup(html_doc, 'html.parser')
#parse generic features and item information
generic_features = {}
generic_features['Titel'] = soup.select_one('.field-name-title').string
generic_features['Image'] = soup.select_one('.field-name-field-immagine-prodotto').a.img['src']
generic_features_list = soup.select_one('.field-group-div')
for feature in generic_features_list.children:
generic_features[feature.div.string[:-2]] = feature.a.string
if soup.select_one('.field-name-field-note-prodotto').contents:
notes = []
for string in soup.select_one('.field-name-field-note-prodotto').stripped_strings:
notes.append(string)
generic_features['Notes'] = notes
#find feature table
table = soup.select_one('.field-name-product-dimensions-press-fitting').find('table')
#read html table with simple header
def read_table_with_simple_header(table, generic_features):
header = []
feature_list = []
features = generic_features
table_header = table.thead
for tr in table_header.contents:
if isinstance(tr, Tag):
for col in tr.children:
if isinstance(col, Tag):
header.append(col.text.strip())
table_content = table.tbody
for tr in table_content.contents:
if isinstance(tr, Tag):
cur_col = 0
for col in tr.children:
if isinstance(col, Tag):
features[header[cur_col]] = col.text.strip()
cur_col += 1
print(features['Code'])
feature_list.append(features)
return feature_list
return read_table_with_simple_header(table, generic_features)
print(read_item_list_view('https://www.vitillo.eu/de/press-fittings/metric-female-24deg-cone-90deg-elbow-l-type.html'))
答案 0 :(得分:0)
我认为问题是因为字典是可变类型,这意味着当您在每个循环上更改“功能”字典时,它每次都会更新相同的“功能”对象,这会影响先前附加的“功能”(即它们都是同一个对象。您只是每次都向其添加新引用)
您要做的是每次都附加一个新的字典对象。
更改
feature_list.append(features)
到
feature_list.append(dict(features))
我想这会解决您的问题。