我很ask愧地问这个。我在用replace_with()挣扎。只是想了解如何更改标签。
这里有个例子:
'use strict'
const async = require('async')
function api_hit(callback) {
setTimeout(() => {
console.log('Completed api_hit')
callback(null, 'api_hit')
}, 1000)
}
function delay(callback) {
setTimeout(() => {
console.log('Completed delay')
callback(null, 'delay')
}, 100)
}
function mysql_check(callback) {
setTimeout(() => {
console.log('Completed mysql_check')
callback(null, 'mysql_check')
}, 500)
}
var tasklist = [api_hit, delay, mysql_check];
if (tasklist.length > 0) {
async.series(
tasklist,
function (err, response) {
console.log(err);
console.log(response);
}
);
}
更改一些虚拟错误之后。我只需要获取原始文本,而无需更改标签:
from urllib.request import urlopen as uReq
from bs4 import BeautifulSoup as soup
my_url = "http://example.com/blabla/blublu/tata"
uClient = uReq(my_url)
page_html = uClient.read()
uClient.close()
page_soup = soup(page_html, "html.parser")
page = page_soup.select_one(".list")
print(page)
if page:
url = page.get("href")
newUrl = "http://example.com{}".format(url)
Client = uReq(newUrl)
pageHtml = Client.read()
Client.close()
pSoup = soup(pageHtml, "html.parser")
spot = pSoup.select(".view_details > h3")
spots = spot[0].text
print(spots)
contain = pSoup.select(".info > table")
#HERE I AM TRYING TO MAKE THE MAGIC
for table in contain:
table.find_all("table")
table.replace_with("div")
contains = contain[0]
print(contains)
如果有人可以在这黑暗的时刻给我一些光明,那将是一件令人高兴的事。
答案 0 :(得分:0)
尝试以此作为基础,因为它会将标记名table
更改为div
。
soup = BeautifulSoup('''<table>
<tr valign="top">
<td>
<div>Lorem ipsum...</div>
</td>
</tr>
</table>''', 'html.parser')
tag = soup.table
tag.name = 'div'
print(tag)
输出:
<div>
<tr valign="top">
<td>
<div>
Lorem ipsum...
</div>
</td>
</tr>
</div>