如何使用python pandas选择特定的HTML表TH,例如,通过解析HTML页面,使用python panda代码从TAG 2.4.33
的表中进行选择
root@1ec99b8b97af:/opt# python lookuptag.py
Id Tag Created Layers Size Delete
0 bb84b573f76 2.4.33 2 years ago 22 179.6 MB Delete
1 bb84b573f76 2.4.33-t2 2 years ago 22 179.6 MB Delete
2 5c97c0e3531 v8-2.4.33 1 year ago 22 180.7 MB Delete
这是我的Python熊猫代码,我可以使用该代码打印HTML
import requests
import pandas as pd
url = 'http://docker-registry:8080/repo/tags/httpd'
html = requests.get(url).content
df_list = pd.read_html(html, header =0, flavor = 'bs4')
df = df_list[-1]
print(df)
答案 0 :(得分:0)
def FetchTable(context,tablexpath):
url = 'https://www.espncricinfo.com/table/series/8048/season/2020/indian-premier-league'
tables = pd.read_html(url)
table = tables[0].applymap(str)
return table
def LookupValueInColumnTwoKeys(context, source_table, reference_column_1, reference_value_1, reference_column_2, reference_value_2, lookup_column):
lookup_column = lookup_column.replace(' ', '')
reference_value_2 = reference_value_2.replace(' ', '')
reference_value_1 = reference_value_1.replace(' ', '')
referenceindex=0
referenceindex1=0
referenceindexfound=False
referenceindexfound1=False
lookupcolumnindex =0
rowindex=0
rowindexfound=False
lookupcolumnindexfound=False
for headers in source_table.columns:
if referenceindexfound == False:
referenceindex=referenceindex+1
if referenceindexfound1 == False:
referenceindex1=referenceindex1+1
if lookupcolumnindexfound == False:
lookupcolumnindex=lookupcolumnindex+1
if headers == reference_column_1 :
referenceindexfound = True
if headers == reference_column_2 :
referenceindexfound1 = True
if headers == lookup_column:
lookupcolumnindexfound = True
if referenceindexfound == True & lookupcolumnindexfound == True & referenceindexfound1 == True:
break
for tablerow in source_table.values:
print(tablerow)
if rowindexfound == False:
print(tablerow[referenceindex-1])
print(tablerow[referenceindex1-1])
if tablerow[referenceindex-1].find(reference_value_1)!= -1 and tablerow[referenceindex1-1].find(reference_value_2)!= -1 :
rowindexfound = True
#rowindex=rowindex+1
else:
rowindex=rowindex+1
else:
break
print("source table"+source_table.values[rowindex][lookupcolumnindex-1])
return source_table.values[rowindex][lookupcolumnindex-1]
Another files
from behave import *
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as ec
from readTableDataFromDB import readTableDataFromDB
from pandacode import WebTableValidationHelper as pandacode
from selenium.webdriver.chrome.options import Options
context.driver.get("https://www.espncricinfo.com/table/series/8048/season/2020/indian-premier-league")
matrix = pandacode.FetchTable(context,"//*[@class='table table-sm standings-widget-table text-center mb-0 border-bottom']")
3ismatrixequal = pandacode.VerifyTable(context,matrix,matrix)
#print(ismatrixequal)
lookupvalue = pandacode.LookupValueFromColumnSingleKey(context,matrix,"TEAM", "Delhi Capitals", "PT")
print(lookupvalue)
another code:
def LookupValueFromColumnSingleKey1(context, source_table,reference_column_1, rowName, columnName):
referenceindex=0
referenceindexfound=False
columnindex =0
rowindex=0
rowindexfound=False
columnindexfound=False
for headers in source_table.columns:
if referenceindexfound == False:
referenceindex= referenceindex+1
if columnindexfound == False:
columnindex= columnindex+1
if headers == reference_column_1 :
referenceindexfound = True
if headers == columnName:
columnindexfound = True
if referenceindexfound == True & columnindexfound == True:
break
for tablerow in source_table.values:
#print(tablerow)
if rowindexfound == False:
rowindex=rowindex+1
for tupledata in tablerow:
#if tupledata.find(rowName)!= -1: c
if tupledata.lower() == rowName.lower():
print(tupledata)
rowindexfound = True
#print("source table"+source_table.values[rowindex-1][columnindex-1])
#print(source_table[columnindex][rowindex])
return source_table.values[rowindex-1][columnindex-1]