如何使用python获取标签div中的值

时间:2019-07-29 09:30:26

标签: python beautifulsoup tags

我想获取div标记中的得分值 这是我写的功能

def get_score(text):
    soup = BeautifulSoup(text,features="lxml")
    scores=soup.text[Score]
    score=scores.find_all(attrs={"Score":"value"})
    return(score) 

a="[<a aria-label=""Signaler le document BDE-BSS MOB ( Application ) (nouvelle fenêtre)"" class=""result-options report-result" "href=""/evaluation?data=eyJmcm9tIjoicmVmZXJlbmNlZF9zaXRlcyIsImRpZCI6MjE1MjIsInB1YmxpY1VybCI6Imh0dHA6XC9cL29yYW5nZWNhcnRvLnNzby5pbmZyYS5mdGdyb3VwXC9iaW5jYXJ0b1wvUGFnZXNcL0NvbXBvbmVudHNcL0NvbXBvbmVudC5hc3B4P2lkPTY2MSZ0YWI9RGVzY3JpcHRpb24iLCJwb3NpdGlvbiI6MSwic2NvcmUiOjE1Mzc4MCwiZGlmZlNjb3JlIjowLCJib29zdGVkIjowLCJ0aGVtZXMiOiJJbmZvcm1hdGlxdWUsIGdyb3VwZSIsInN0YXR1cyI6Im9rIiwiY2xvdWRWaWV3VG90YWxQcm9jZXNzaW5nVGltZSI6MTY4NTMyLCJjbG91ZFZpZXdJc1N1Z2dlc3Rpb25Qcm9wb3NlZCI6MCwicXVlcnlEYXRhIjp7InJkYXRhIjoiYmRlIiwiaWhtIjoiZnIiLCJjbG91ZHZpZXdSZGF0YSI6ImJkZSIsInF1ZXJ5SWQiOiI0NzQ1ZDBiMjE5NjY3ZTJkYzVkN2JkYjFmY2JlMjNhNSJ9LCJxdWVyeU5NYXRjaCI6MzgyMCwicXVlcnlOSGl0cyI6MzAyNCwiaXNOb3RGb3VuZCI6MCwib3JPcGVyYXRvciI6MCwiYmFzaWNhdCI6bnVsbCwicmVmZXJlciI6bnVsbH0=&amp;eval=eyJyZXF1ZXN0IjoiYmRlIiwib3JkZXIiOm51bGwsInJlc3BvbnNlVXJsIjoiaHR0cDpcL1wvb3JhbmdlY2FydG8uc3NvLmluZnJhLmZ0Z3JvdXBcL2JpbmNhcnRvXC9QYWdlc1wvQ29tcG9uZW50c1wvQ29tcG9uZW50LmFzcHg/aWQ9NjYxJnRhYj1EZXNjcmlwdGlvbiIsInJlc3BvbnNlUmFuayI6MSwicmVzcG9uc2VTY29yZSI6IjE1Mzc4MCIsInJlc3BvbnNlVGhlbWVzIjoiSW5mb3JtYXRpcXVlLCBncm91cGUiLCJyZXF1ZXN0UmVzdWx0c0NvdW50IjozMDI0fQ==&amp;typeicon=1&amp;url=https://enquete.orange.com/store/itw/answer/s/hmaw6nljru/k/Qd2BPas?idDeRequete=4745d0b219667e2dc5d7bdb1fcbe23a5&amp;requete=bde&amp;urlResultat=http%3A%2F%2Forangecarto.sso.infra.ftgroup%2Fbincarto%2FPages%2FComponents%2FComponent.aspx%3Fid%3D661%26tab%3DDescription&amp;mail=rym.boukriba%40orange.com&amp;rangURL=1" "target=""blank" "title=""Signaler le document BDE-BSS MOB ( Application ) (nouvelle fenêtre)""><span class=""icon-1013-Reseau""></span></a>], [<div class=""hit-debug-info""><div>Score : 153780.000000</div><div>Term score : 3495</div><div>Date Pallier : 4</div><div>Boost Site : 0</div><div>Boost Type : 10</div><div>Boost Actu : 1</div><div>Thèmes du hit : Informatique, groupe</div></div>]"

我只想要作为输出:153780.000000

2 个答案:

答案 0 :(得分:0)

由于您的div标签上没有标签,所以我不能仅“搜索”汤中的标签。 因此,根据您提供的来源,这就是我的操作方法。

page_source = """ a="[<a aria-label=""Signaler le document BDE-BSS MOB ( Application ) (nouvelle fenêtre)"" class=""result-options report-result" "href=""/evaluation?data=eyJmcm9tIjoicmVmZXJlbmNlZF9zaXRlcyIsImRpZCI6MjE1MjIsInB1YmxpY1VybCI6Imh0dHA6XC9cL29yYW5nZWNhcnRvLnNzby5pbmZyYS5mdGdyb3VwXC9iaW5jYXJ0b1wvUGFnZXNcL0NvbXBvbmVudHNcL0NvbXBvbmVudC5hc3B4P2lkPTY2MSZ0YWI9RGVzY3JpcHRpb24iLCJwb3NpdGlvbiI6MSwic2NvcmUiOjE1Mzc4MCwiZGlmZlNjb3JlIjowLCJib29zdGVkIjowLCJ0aGVtZXMiOiJJbmZvcm1hdGlxdWUsIGdyb3VwZSIsInN0YXR1cyI6Im9rIiwiY2xvdWRWaWV3VG90YWxQcm9jZXNzaW5nVGltZSI6MTY4NTMyLCJjbG91ZFZpZXdJc1N1Z2dlc3Rpb25Qcm9wb3NlZCI6MCwicXVlcnlEYXRhIjp7InJkYXRhIjoiYmRlIiwiaWhtIjoiZnIiLCJjbG91ZHZpZXdSZGF0YSI6ImJkZSIsInF1ZXJ5SWQiOiI0NzQ1ZDBiMjE5NjY3ZTJkYzVkN2JkYjFmY2JlMjNhNSJ9LCJxdWVyeU5NYXRjaCI6MzgyMCwicXVlcnlOSGl0cyI6MzAyNCwiaXNOb3RGb3VuZCI6MCwib3JPcGVyYXRvciI6MCwiYmFzaWNhdCI6bnVsbCwicmVmZXJlciI6bnVsbH0=&amp;eval=eyJyZXF1ZXN0IjoiYmRlIiwib3JkZXIiOm51bGwsInJlc3BvbnNlVXJsIjoiaHR0cDpcL1wvb3JhbmdlY2FydG8uc3NvLmluZnJhLmZ0Z3JvdXBcL2JpbmNhcnRvXC9QYWdlc1wvQ29tcG9uZW50c1wvQ29tcG9uZW50LmFzcHg/aWQ9NjYxJnRhYj1EZXNjcmlwdGlvbiIsInJlc3BvbnNlUmFuayI6MSwicmVzcG9uc2VTY29yZSI6IjE1Mzc4MCIsInJlc3BvbnNlVGhlbWVzIjoiSW5mb3JtYXRpcXVlLCBncm91cGUiLCJyZXF1ZXN0UmVzdWx0c0NvdW50IjozMDI0fQ==&amp;typeicon=1&amp;url=https://enquete.orange.com/store/itw/answer/s/hmaw6nljru/k/Qd2BPas?idDeRequete=4745d0b219667e2dc5d7bdb1fcbe23a5&amp;requete=bde&amp;urlResultat=http%3A%2F%2Forangecarto.sso.infra.ftgroup%2Fbincarto%2FPages%2FComponents%2FComponent.aspx%3Fid%3D661%26tab%3DDescription&amp;mail=rym.boukriba%40orange.com&amp;rangURL=1" "target=""blank" "title=""Signaler le document BDE-BSS MOB ( Application ) (nouvelle fenêtre)""><span class=""icon-1013-Reseau""></span></a>], [<div class=""hit-debug-info""><div>Score : 153780.000000</div><div>Term score : 3495</div><div>Date Pallier : 4</div><div>Boost Site : 0</div><div>Boost Type : 10</div><div>Boost Actu : 1</div><div>Thèmes du hit : Informatique, groupe</div></div>]" """

soup = BeautifulSoup(page_source, 'html.parser')
x = soup.find_all('div', '')
for i in x:
    if str(i.contents[0]).startswith("Score : "):
        print(str(i.contents[0]).split(" ")[2])

这输出。

153780.000000

答案 1 :(得分:0)

重新使用正则表达式。

import re
def get_score(text):
    soup = BeautifulSoup(text,features="lxml")
    scoretag=soup.find('div', text=re.compile("Score :"))
    score=scoretag.text.split("Score :")[1].strip()
    return(score)

print(get_score("<a aria-label=""Signaler le document BDE-BSS MOB ( Application ) (nouvelle fenêtre)"" class=""result-options report-result" "href=""/evaluation?data=eyJmcm9tIjoicmVmZXJlbmNlZF9zaXRlcyIsImRpZCI6MjE1MjIsInB1YmxpY1VybCI6Imh0dHA6XC9cL29yYW5nZWNhcnRvLnNzby5pbmZyYS5mdGdyb3VwXC9iaW5jYXJ0b1wvUGFnZXNcL0NvbXBvbmVudHNcL0NvbXBvbmVudC5hc3B4P2lkPTY2MSZ0YWI9RGVzY3JpcHRpb24iLCJwb3NpdGlvbiI6MSwic2NvcmUiOjE1Mzc4MCwiZGlmZlNjb3JlIjowLCJib29zdGVkIjowLCJ0aGVtZXMiOiJJbmZvcm1hdGlxdWUsIGdyb3VwZSIsInN0YXR1cyI6Im9rIiwiY2xvdWRWaWV3VG90YWxQcm9jZXNzaW5nVGltZSI6MTY4NTMyLCJjbG91ZFZpZXdJc1N1Z2dlc3Rpb25Qcm9wb3NlZCI6MCwicXVlcnlEYXRhIjp7InJkYXRhIjoiYmRlIiwiaWhtIjoiZnIiLCJjbG91ZHZpZXdSZGF0YSI6ImJkZSIsInF1ZXJ5SWQiOiI0NzQ1ZDBiMjE5NjY3ZTJkYzVkN2JkYjFmY2JlMjNhNSJ9LCJxdWVyeU5NYXRjaCI6MzgyMCwicXVlcnlOSGl0cyI6MzAyNCwiaXNOb3RGb3VuZCI6MCwib3JPcGVyYXRvciI6MCwiYmFzaWNhdCI6bnVsbCwicmVmZXJlciI6bnVsbH0=&amp;eval=eyJyZXF1ZXN0IjoiYmRlIiwib3JkZXIiOm51bGwsInJlc3BvbnNlVXJsIjoiaHR0cDpcL1wvb3JhbmdlY2FydG8uc3NvLmluZnJhLmZ0Z3JvdXBcL2JpbmNhcnRvXC9QYWdlc1wvQ29tcG9uZW50c1wvQ29tcG9uZW50LmFzcHg/aWQ9NjYxJnRhYj1EZXNjcmlwdGlvbiIsInJlc3BvbnNlUmFuayI6MSwicmVzcG9uc2VTY29yZSI6IjE1Mzc4MCIsInJlc3BvbnNlVGhlbWVzIjoiSW5mb3JtYXRpcXVlLCBncm91cGUiLCJyZXF1ZXN0UmVzdWx0c0NvdW50IjozMDI0fQ==&amp;typeicon=1&amp;url=https://enquete.orange.com/store/itw/answer/s/hmaw6nljru/k/Qd2BPas?idDeRequete=4745d0b219667e2dc5d7bdb1fcbe23a5&amp;requete=bde&amp;urlResultat=http%3A%2F%2Forangecarto.sso.infra.ftgroup%2Fbincarto%2FPages%2FComponents%2FComponent.aspx%3Fid%3D661%26tab%3DDescription&amp;mail=rym.boukriba%40orange.com&amp;rangURL=1" "target=""blank" "title=""Signaler le document BDE-BSS MOB ( Application ) (nouvelle fenêtre)""><span class=""icon-1013-Reseau""></span></a>], [<div class=""hit-debug-info""><div>Score : 153780.000000</div><div>Term score : 3495</div><div>Date Pallier : 4</div><div>Boost Site : 0</div><div>Boost Type : 10</div><div>Boost Actu : 1</div><div>Thèmes du hit : Informatique, groupe</div></div>"))

输出:

153780.000000

编辑:

from bs4 import BeautifulSoup
import re
data='''<a aria-label=""Signaler le document BDE-BSS MOB ( Application ) (nouvelle fenêtre)"" class=""result-options report-result" "href=""/evaluation?data=eyJmcm9tIjoicmVmZXJlbmNlZF9zaXRlcyIsImRpZCI6MjE1MjIsInB1YmxpY1VybCI6Imh0dHA6XC9cL29yYW5nZWNhcnRvLnNzby5pbmZyYS5mdGdyb3VwXC9iaW5jYXJ0b1wvUGFnZXNcL0NvbXBvbmVudHNcL0NvbXBvbmVudC5hc3B4P2lkPTY2MSZ0YWI9RGVzY3JpcHRpb24iLCJwb3NpdGlvbiI6MSwic2NvcmUiOjE1Mzc4MCwiZGlmZlNjb3JlIjowLCJib29zdGVkIjowLCJ0aGVtZXMiOiJJbmZvcm1hdGlxdWUsIGdyb3VwZSIsInN0YXR1cyI6Im9rIiwiY2xvdWRWaWV3VG90YWxQcm9jZXNzaW5nVGltZSI6MTY4NTMyLCJjbG91ZFZpZXdJc1N1Z2dlc3Rpb25Qcm9wb3NlZCI6MCwicXVlcnlEYXRhIjp7InJkYXRhIjoiYmRlIiwiaWhtIjoiZnIiLCJjbG91ZHZpZXdSZGF0YSI6ImJkZSIsInF1ZXJ5SWQiOiI0NzQ1ZDBiMjE5NjY3ZTJkYzVkN2JkYjFmY2JlMjNhNSJ9LCJxdWVyeU5NYXRjaCI6MzgyMCwicXVlcnlOSGl0cyI6MzAyNCwiaXNOb3RGb3VuZCI6MCwib3JPcGVyYXRvciI6MCwiYmFzaWNhdCI6bnVsbCwicmVmZXJlciI6bnVsbH0=&amp;eval=eyJyZXF1ZXN0IjoiYmRlIiwib3JkZXIiOm51bGwsInJlc3BvbnNlVXJsIjoiaHR0cDpcL1wvb3JhbmdlY2FydG8uc3NvLmluZnJhLmZ0Z3JvdXBcL2JpbmNhcnRvXC9QYWdlc1wvQ29tcG9uZW50c1wvQ29tcG9uZW50LmFzcHg/aWQ9NjYxJnRhYj1EZXNjcmlwdGlvbiIsInJlc3BvbnNlUmFuayI6MSwicmVzcG9uc2VTY29yZSI6IjE1Mzc4MCIsInJlc3BvbnNlVGhlbWVzIjoiSW5mb3JtYXRpcXVlLCBncm91cGUiLCJyZXF1ZXN0UmVzdWx0c0NvdW50IjozMDI0fQ==&amp;typeicon=1&amp;url=https://enquete.orange.com/store/itw/answer/s/hmaw6nljru/k/Qd2BPas?idDeRequete=4745d0b219667e2dc5d7bdb1fcbe23a5&amp;requete=bde&amp;urlResultat=http%3A%2F%2Forangecarto.sso.infra.ftgroup%2Fbincarto%2FPages%2FComponents%2FComponent.aspx%3Fid%3D661%26tab%3DDescription&amp;mail=rym.boukriba%40orange.com&amp;rangURL=1" "target=""blank" "title=""Signaler le document BDE-BSS MOB ( Application ) (nouvelle fenêtre)""><span class=""icon-1013-Reseau""></span></a>], [<div class=""hit-debug-info""><div>Score : 153780.000000</div><div>Term score : 3495</div><div>Date Pallier : 4</div><div>Boost Site : 0</div><div>Boost Type : 10</div><div>Boost Actu : 1</div><div>Thèmes du hit : Informatique, groupe</div></div>'''
soup = BeautifulSoup(data, features="lxml")
scorestag=soup.find_all('div', text=re.compile("Score :"))
scores=[score.text.split("Score :")[1].strip() for score in scorestag]
print(scores)