我正在尝试使用python Web剪贴HTML表。 HTML页面中有很多表,但是我只想废弃某个表。我正在用漂亮的汤刮网。
我的代码如下:
page = get("http://uobgoldprice.com/history/2018/September/10/")
html = BeautifulSoup(page.content, 'html.parser')
for p in html.select('tr'):
if p.text == "ARGOR CAST BAR":
print (p.text)
我只需要显示“截至2018年9月10日星期一的费率”的表。
我该怎么做?
答案 0 :(得分:1)
您需要找到包含文本和作为表格的父项的元素:
import re
import requests
from bs4 import BeautifulSoup
page = requests.get("http://uobgoldprice.com/history/2018/September/10/")
html = BeautifulSoup(page.content, 'html.parser')
element = html.find(text=re.compile('Rate as at Monday, 10 September 2018'))
print(element.findParent('table'))
答案 1 :(得分:1)
from collections import defaultdict
import requests
from bs4 import BeautifulSoup
def get_page_html(url):
r = requests.get(url)
r.raise_for_status()
return r.text
def parse_last_table(html):
prev_key = None
result = defaultdict(list)
soup = BeautifulSoup(html, 'lxml')
last_table = soup.find_all('table')[-1]
for row in last_table.find_all('tr')[2:]:
try:
description, currency, unit, bank_sells, bank_buys = (
col.text.strip() for col in row.find_all('td')
)
except ValueError:
continue # blank/empty row
description = description or prev_key
result[description].append({
'currency': currency,
'unit': unit,
'bank_sells': bank_sells,
'bank_buys': bank_buys
})
prev_key = description
return result
输出:
>>> url = 'http://uobgoldprice.com/history/2018/September/10/'
>>> page_html = get_page_html(url)
>>> result = parse_last_table(page_html)
>>> import json; print(json.dumps(result, indent=2))
{
"ARGOR CAST BAR": [
{
"currency": "SGD",
"unit": "100 GM",
"bank_sells": "5,369.00 (+4.00)",
"bank_buys": "5,291.00 (+3.00)"
}
],
"CAST BARS": [
{
"currency": "SGD",
"unit": "1 KILOBAR",
"bank_sells": "53,201.00 (+36.00)",
"bank_buys": "52,933.00 (+36.00)"
}
],
"GOLD CERTIFICATE": [
{
"currency": "SGD",
"unit": "1 KILOCERT",
"bank_sells": "53,201.00 (+36.00)",
"bank_buys": "52,933.00 (+36.00)"
}
],
"GOLD SAVINGS A/C": [
{
"currency": "SGD",
"unit": "1 GM",
"bank_sells": "53.20 (+0.04)",
"bank_buys": "52.94 (+0.04)"
}
],
"GOLD BULLION COINS": [
{
"currency": "SGD",
"unit": "1/20 OZ(GNC,SLC &GML)",
"bank_sells": "131.00",
"bank_buys": "81.00"
},
{
"currency": "SGD",
"unit": "1/10 OZ",
"bank_sells": "211.00 (+1.00)",
"bank_buys": "163.00"
},
{
"currency": "SGD",
"unit": "1/4 OZ",
"bank_sells": "465.00",
"bank_buys": "410.00"
},
{
"currency": "SGD",
"unit": "1/2 OZ",
"bank_sells": "904.00 (+1.00)",
"bank_buys": "822.00 (+1.00)"
},
{
"currency": "SGD",
"unit": "1 OZ",
"bank_sells": "1,726.00 (+1.00)",
"bank_buys": "1,645.00 (+1.00)"
}
],
"PAMP GOLD BARS": [
{
"currency": "SGD",
"unit": "1/2 OZ",
"bank_sells": "876.00",
"bank_buys": "821.00 (+1.00)"
},
{
"currency": "SGD",
"unit": "1 GM",
"bank_sells": "82.00",
"bank_buys": "50.00"
},
{
"currency": "SGD",
"unit": "1 OZ",
"bank_sells": "1,711.00 (+1.00)",
"bank_buys": "1,644.00 (+1.00)"
},
{
"currency": "SGD",
"unit": "2.5 GM",
"bank_sells": "182.00",
"bank_buys": "130.00"
},
{
"currency": "SGD",
"unit": "5 GM",
"bank_sells": "322.00",
"bank_buys": "262.00"
},
{
"currency": "SGD",
"unit": "10 GM",
"bank_sells": "597.00 (+1.00)",
"bank_buys": "527.00 (+1.00)"
},
{
"currency": "SGD",
"unit": "20 GM",
"bank_sells": "1,132.00 (+1.00)",
"bank_buys": "1,056.00 (+1.00)"
},
{
"currency": "SGD",
"unit": "50 GM",
"bank_sells": "2,746.00 (+2.00)",
"bank_buys": "2,644.00 (+2.00)"
},
{
"currency": "SGD",
"unit": "100 GM",
"bank_sells": "5,414.00 (+3.00)",
"bank_buys": "5,291.00 (+3.00)"
}
],
"SILVER PASSBOOK ACCOUNT": [
{
"currency": "SGD",
"unit": "1 OZ",
"bank_sells": "19.86 (+0.09)",
"bank_buys": "19.30 (+0.09)"
}
]
}