def return_class(count):
if count % 2 == 0:
return "smartphone_odd"
else:
return "smartphone_even"
def processor_list_online():
processsorList = []
url = "http://www.notebookcheck.net/Smartphone-Processors-Benchmark-List.149513.0.html"
htmlfile = urllib.urlopen(url)
soup = BeautifulSoup(htmlfile, 'html.parser')
count = 1
temp_count=0
while True:
for i in soup.find_all('tr', attrs={'class': return_class(count)}):
temp=0
for j in i.find_all('td', attrs={'class': 'specs'}):
if temp==1:
processsorList += [j.text]
temp+=1
temp_count+=1
count+=1
print temp_count
return processsorList
答案 0 :(得分:2)
您的代码过于复杂,您可以使用 css选择器来查找所需的元素:
from bs4 import BeautifulSoup
import requests
soup = BeautifulSoup(requests.get("http://www.notebookcheck.net/Smartphone-Processors-Benchmark-List.149513.0.html").content)
procs = [a.text for a in soup.select("#sortierbare_tabelle tr td.specs a")]
print(procs)
所有处理器都列在表中,其中 id sortierbare_tabelle
,处理器的名称位于 td的中标签内/ em>使用 specs 类。
这给了你:
[u'Apple A9X', u'Samsung Exynos 8890 Octa', u'HiSilicon Kirin 955', u'HiSilicon Kirin 950', u'Apple A10 Fusion', u'Samsung Exynos 7420 Octa', u'Qualcomm Snapdragon 820 MSM8996', u'Apple A9', u'Intel Atom x7-Z8700', u'Qualcomm Snapdragon 810 MSM8994', u'Qualcomm Snapdragon 652 MSM8976', u'Qualcomm Snapdragon 808 MSM8992', u'Qualcomm Snapdragon 650 MSM8956', u'Intel Atom Z3795', u'Intel Atom Z3785', u'Intel Atom Z3775', u'Intel Atom Z3775D', u'Intel Atom Z3770', u'Intel Atom Z3770D', u'Intel Atom x5-Z8500', u'Nvidia Tegra X1', u'Apple A8X', u'Nvidia Tegra K1 (Denver)', u'Samsung Exynos 5433 Octa', u'Apple A8', u'Nvidia Tegra K1', u'Qualcomm Snapdragon 805 APQ8084', u'Intel Atom Z3580', u'Intel Atom Z3736F', u'Intel Atom Z3736G', u'Intel Atom x5-Z8350', u'Intel Atom x5-Z8300', u'Intel Atom Z3745', u'Intel Atom Z3745D', u'Intel Atom Z3740', u'Intel Atom Z3740D', u'Intel Atom Z3735D', u'Intel Atom Z3735E', u'Intel Atom Z3735F', u'Intel Atom Z3735G', u'Qualcomm Snapdragon 801 MSM8974AC', u'Samsung Exynos 5430 Octa', u'Mediatek MT6595', u'Qualcomm Snapdragon 801 APQ8074AB', u'Qualcomm Snapdragon 801 MSM8974AB', u'Nvidia Tegra 4', u'Intel Atom Z3680', u'Intel Atom Z3680D', u'Qualcomm Snapdragon 801 MSM8974AA', u'HiSilicon Kirin 925', u'Qualcomm Snapdragon 800 MSM8974', u'Samsung Exynos 5420 Octa', u'Qualcomm Snapdragon 625', u'HiSilicon Kirin 650', u'Mediatek Helio P10 MT6755', u'Apple A7', u'Intel Atom Z3570', u'Intel Atom Z3560', u'Samsung Exynos 5410 Octa', u'Intel Atom Z3480', u'Intel Atom x3-C3440', u'Samsung Exynos 5260 Hexa', u'Mediatek MT8135', u'Intel Atom Z3530', u'Samsung Exynos 5250 Dual', u'Samsung Exynos 7580 Octa', u'Qualcomm Snapdragon 617 MSM8952', u'Qualcomm Snapdragon 615 MSM8939', u'Rockchip RK3288', u'Qualcomm Snapdragon 610 MSM8936', u'MediaTek MT8163 V/A 1.5 GHz', u'Mediatek MT6592', u'Qualcomm Snapdragon 600 APQ8064T', u'Samsung Exynos 7578', u'HiSilicon Kirin 910T', u'MediaTek MT8163 V/B 1.3 GHz', u'MediaTek MT8161', u'Intel Atom x3-C3230RK', u'Qualcomm Snapdragon 415 MSM8929', u'Intel Atom Z3460', u'Qualcomm Snapdragon S4 Pro APQ8064A', u'Mediatek MT8165', u'Mediatek MT6732', u'Mediatek MT6735', u'Rockchip RK3188', u'Qualcomm Snapdragon 410 MSM8916', u'Qualcomm Snapdragon 410 APQ8016', u'HiSilicon Kirin 910', u'Intel Atom Z2760', u'Apple A6x', u'Intel Atom Z2580', u'Qualcomm Snapdragon S4 Pro MSM8960DT', u'Qualcomm Snapdragon S4 Pro MSM8960T', u'Qualcomm Snapdragon 400 8930AB', u'Qualcomm Snapdragon S4 Plus APQ8060A', u'Qualcomm Snapdragon S4 Plus MSM8960', u'Qualcomm Snapdragon S4 Plus MSM8260A', u'Intel Atom Z2560', u'AMD Z-60', u'AMD Z-01', u'Intel Atom Z560', u'Intel Atom Z550', u'Apple A6', u'Intel Atom x3-C3130', u'Samsung Exynos 4412 Quad', u'NVIDIA Tegra 3', u'Mediatek MT8127', u'Mediatek MT6589T', u'Mediatek MT8389', u'Mediatek MT8125', u'Spreadtrum SC9830A', u'Mediatek MT8121', u'Mediatek MT6582', u'Mediatek MT6582M', u'Mediatek MT6580M', u'Qualcomm Snapdragon 212 APQ8009', u'Qualcomm Snapdragon 400 MSM8926', u'Qualcomm Snapdragon 400 MSM8226', u'Qualcomm Snapdragon 400 APQ8026', u'Mediatek MT6589', u'Qualcomm Snapdragon 200 MSM8212', u'Qualcomm Snapdragon 210 MSM8909', u'Marvell PXA1088', u'Qualcomm Snapdragon S4 Plus MSM8930', u'Intel Atom Z2480', u'Intel Atom Z540', u'Intel Atom Z530', u'Intel Atom Z670', u'Intel Atom Z2460', u'Intel Atom Z520', u'Qualcomm Snapdragon S4 Plus MSM8227', u'Samsung Exynos 4212 1.5 GHz', u'Texas Instruments OMAP 4470', u'Rockchip RK3066 1.5 GHz', u'Qualcomm Snapdragon S4 Play MSM8625Q', u'Qualcomm Snapdragon 200 8225Q', u'Qualcomm Snapdragon S4 Play MSM8225Q', u'MediaTek MT8312', u'Renesas MP5232', u'Broadcom BCM21664T', u'Marvell PXA986', u'Qualcomm Snapdragon S3 MSM8660', u'Qualcomm Snapdragon S3 MSM8260', u'Samsung Exynos 4210 1.4 GHz', u'Texas Instruments OMAP 4460', u'Rockchip RK3168', u'Samsung Exynos 4210 1.2 GHz', u'MediaTek MT8377', u'Broadcom BCM28155', u'Texas Instruments OMAP 4430', u'MediaTek MT6572', u'Spreadtrum SC8830', u'Apple A5x', u'Qualcomm Snapdragon S4 Play MSM8225', u'Intel Atom Z2420', u'Apple A5', u'Nvidia Tegra 2 (250)', u'Qualcomm Snapdragon 200 8210', u'MediaTek MT8317T', u'MediaTek MT6577', u'ST-Ericsson NovaThor U8500', u'ST-Ericsson NovaThor U8420', u'Intel Atom Z510', u'Intel Atom Z2000', u'MediaTek MT6575', u'Intel Atom Z500', u'Qualcomm Snapdragon S2 MSM8255', u'AllWinner A10', u'Apple A4', u'AllWinner A13', u'WonderMedia PRIZM WM8950', u'Samsung Hummingbird S5PC110 / Exynos 3110', u'Qualcomm Snapdragon S1 MSM7227A', u'Qualcomm Snapdragon S1 MSM7225A', u'Rockchip RK2918', u'Actions ACT-ATM7029', u'Qualcomm Snapdragon S1 QSD8250', u'Qualcomm Snapdragon S1 MSM7227']
您可能想要查看一个基本的python教程,因为您在代码中执行的很多操作都过于复杂,比如使用processsorList += [j.text]
而不是processsorList.append(j.text)
。还有pep8 syle guide
要处理处理器没有链接的位置,只需获取所有tr,看看它们是否匹配任何类,首先查找锚点,如果不存在则只需拉出第一个td.spec:
soup = BeautifulSoup(requests.get("http://www.notebookcheck.net/Smartphone-Processors-Benchmark-List.149513.0.html").content)
procs = []
for tr in soup.select("#sortierbare_tabelle tr + tr"):
if tr.get("class") in [["smartphone_odd"],["smartphone_even"]]:
anc = tr.select_one("td.specs a")
procs.append(anc.text if anc else tr.select_one("td.specs").text)
这给了你:
['Apple A9X', 'Samsung Exynos 8890 Octa', 'HiSilicon Kirin 955', 'HiSilicon Kirin 950', 'Apple A10 Fusion', 'Samsung Exynos 7420 Octa', 'Qualcomm Snapdragon 820 MSM8996', 'Apple A9', 'Intel Atom x7-Z8700', 'Qualcomm Snapdragon 810 MSM8994', 'Qualcomm Snapdragon 652 MSM8976', 'Qualcomm Snapdragon 808 MSM8992', 'Qualcomm Snapdragon 650 MSM8956', 'Intel Atom Z3795', 'Intel Atom Z3785', 'Intel Atom Z3775', 'Intel Atom Z3775D', 'Intel Atom Z3770', 'Intel Atom Z3770D', 'Intel Atom x5-Z8500', 'Nvidia Tegra X1', 'Apple A8X', 'Nvidia Tegra K1 (Denver)', '\xa0575', '\xa0581', 'Samsung Exynos 5433 Octa', 'Apple A8', 'Nvidia Tegra K1', 'Qualcomm Snapdragon 805 APQ8084', 'Intel Atom Z3580', 'Intel Atom Z3736F', 'Intel Atom Z3736G', 'Intel Atom x5-Z8350', 'Intel Atom x5-Z8300', 'Intel Atom Z3745', 'Intel Atom Z3745D', 'Intel Atom Z3740', 'Intel Atom Z3740D', 'Intel Atom Z3735D', 'Intel Atom Z3735E', 'Intel Atom Z3735F', 'Intel Atom Z3735G', 'Qualcomm Snapdragon 801 MSM8974AC', 'Samsung Exynos 5430 Octa', 'Mediatek MT6595', 'Qualcomm Snapdragon 801 APQ8074AB', 'Qualcomm Snapdragon 801 MSM8974AB', 'Nvidia Tegra 4', 'Intel Atom Z3680', 'Intel Atom Z3680D', '\xa0701', 'Qualcomm Snapdragon 801 MSM8974AA', 'HiSilicon Kirin 925', 'Qualcomm Snapdragon 800 MSM8974', 'Samsung Exynos 5420 Octa', '\xa0712', '\xa0717', '\xa0718*', 'Qualcomm Snapdragon 625', '\xa0720', 'HiSilicon Kirin 650', '\xa0722', 'Mediatek Helio P10 MT6755', 'Apple A7', 'Intel Atom Z3570', 'Intel Atom Z3560', 'Samsung Exynos 5410 Octa', 'Intel Atom Z3480', 'Intel Atom x3-C3440', 'Samsung Exynos 5260 Hexa', '\xa0755', 'Mediatek MT8135', 'Intel Atom Z3530', 'Samsung Exynos 5250 Dual', '\xa0779*', '\xa0780', 'Samsung Exynos 7580 Octa', 'Qualcomm Snapdragon 617 MSM8952', '\xa0783', 'Qualcomm Snapdragon 615 MSM8939', '\xa0785*', 'Rockchip RK3288', '\xa0795', 'Qualcomm Snapdragon 610 MSM8936', 'MediaTek MT8163 V/A 1.5 GHz', 'Mediatek MT6592', 'Qualcomm Snapdragon 600 APQ8064T', 'Samsung Exynos 7578', 'HiSilicon Kirin 910T', 'MediaTek MT8163 V/B 1.3 GHz', 'MediaTek MT8161', 'Intel Atom x3-C3230RK', '\xa0809*', 'Qualcomm Snapdragon 415 MSM8929', 'Intel Atom Z3460', 'Qualcomm Snapdragon S4 Pro APQ8064A', '\xa0830*', 'Mediatek MT8165', 'Mediatek MT6732', 'Mediatek MT6735', 'Rockchip RK3188', 'Qualcomm Snapdragon 410 MSM8916', 'Qualcomm Snapdragon 410 APQ8016', '\xa0852', 'HiSilicon Kirin 910', 'Intel Atom Z2760', '\xa0929', '\xa0930', '\xa0931', 'Apple A6x', 'Intel Atom Z2580', 'Qualcomm Snapdragon S4 Pro MSM8960DT', 'Qualcomm Snapdragon S4 Pro MSM8960T', 'Qualcomm Snapdragon 400 8930AB', 'Qualcomm Snapdragon S4 Plus APQ8060A', 'Qualcomm Snapdragon S4 Plus MSM8960', 'Qualcomm Snapdragon S4 Plus MSM8260A', 'Intel Atom Z2560', 'AMD Z-60', 'AMD Z-01', 'Intel Atom Z560', 'Intel Atom Z550', 'Apple A6', 'Intel Atom x3-C3130', 'Samsung Exynos 4412 Quad', 'NVIDIA Tegra 3', 'Mediatek MT8127', 'Mediatek MT6589T', 'Mediatek MT8389', 'Mediatek MT8125', 'Spreadtrum SC9830A', '\xa01006', '\xa01007', 'Mediatek MT8121', 'Mediatek MT6582', 'Mediatek MT6582M', 'Mediatek MT6580M', '\xa01012', 'Qualcomm Snapdragon 212 APQ8009', 'Qualcomm Snapdragon 400 MSM8926', 'Qualcomm Snapdragon 400 MSM8226', 'Qualcomm Snapdragon 400 APQ8026', 'Mediatek MT6589', 'Qualcomm Snapdragon 200 MSM8212', 'Qualcomm Snapdragon 210 MSM8909', 'Marvell PXA1088', 'Qualcomm Snapdragon S4 Plus MSM8930', '\xa01022', 'Intel Atom Z2480', 'Intel Atom Z540', 'Intel Atom Z530', 'Intel Atom Z670', 'Intel Atom Z2460', 'Intel Atom Z520', 'Qualcomm Snapdragon S4 Plus MSM8227', 'Samsung Exynos 4212 1.5 GHz', 'Texas Instruments OMAP 4470', '\xa01065', 'Rockchip RK3066 1.5 GHz', 'Qualcomm Snapdragon S4 Play MSM8625Q', 'Qualcomm Snapdragon 200 8225Q', 'Qualcomm Snapdragon S4 Play MSM8225Q', 'MediaTek MT8312', 'Renesas MP5232', 'Broadcom BCM21664T', 'Marvell PXA986', 'Qualcomm Snapdragon S3 MSM8660', 'Qualcomm Snapdragon S3 MSM8260', 'Samsung Exynos 4210 1.4 GHz', 'Texas Instruments OMAP 4460', 'Rockchip RK3168', 'Samsung Exynos 4210 1.2 GHz', 'MediaTek MT8377', 'Broadcom BCM28155', 'Texas Instruments OMAP 4430', 'MediaTek MT6572', 'Spreadtrum SC8830', 'Apple A5x', 'Qualcomm Snapdragon S4 Play MSM8225', 'Intel Atom Z2420', 'Apple A5', 'Nvidia Tegra 2 (250)', 'Qualcomm Snapdragon 200 8210', 'MediaTek MT8317T', 'MediaTek MT6577', 'ST-Ericsson NovaThor U8500', 'ST-Ericsson NovaThor U8420', 'Intel Atom Z510', 'Intel Atom Z2000', 'MediaTek MT6575', 'Intel Atom Z500', 'Qualcomm Snapdragon S2 MSM8255', '\xa01114', 'AllWinner A10', '\xa01116', 'Apple A4', 'AllWinner A13', 'WonderMedia PRIZM WM8950', 'Samsung Hummingbird S5PC110 / Exynos 3110', 'Qualcomm Snapdragon S1 MSM7227A', 'Qualcomm Snapdragon S1 MSM7225A', '\xa01123', '\xa01124', 'Rockchip RK2918', '\xa01126', '\xa01127', '\xa01128', 'Actions ACT-ATM7029', 'Qualcomm Snapdragon S1 QSD8250', '\xa01133*', 'Qualcomm Snapdragon S1 MSM7227']
答案 1 :(得分:-1)
def processor_list_online():
processsorList = []
url = "http://www.notebookcheck.net/Smartphone-Processors-Benchmark-List.149513.0.html"
htmlfile = urllib.urlopen(url)
soup = BeautifulSoup(htmlfile, 'html.parser')
count = 1
temp_count=0
x=str()
while True:
if x=="Qualcomm Snapdragon S1 MSM7227": #last processor in the list in web browser
break
else:
for i in soup.find_all('tr'):
count+=1
temp=0
for j in i.find_all('td', attrs={'class': 'specs'}):
if temp==1:
processsorList += [j.text]
x=j.text
temp+=1
temp_count+=1
print temp_count
return processsorList