数据=
<div class="dojoxGridView" id="dojox_grid__View_1" role="presentation" style="width: 1900px; height: 721px; left: 1px; top: 0px;" widgetid="dojox_grid__View_1">
<input class="dojoxGridHiddenFocus" dojoattachpoint="hiddenFocusNode" role="presentation" type="checkbox"/>
<input class="dojoxGridHiddenFocus" role="presentation" type="checkbox"/>
<div class="dojoxGridScrollbox" dojoattachpoint="scrollboxNode" role="presentation" style="height: 721px;">
<div class="dojoxGridContent" dojoattachpoint="contentNode" hidefocus="hidefocus" role="presentation" style="height: 504px; width: 1900px;">
<div role="presentation" style="position: absolute; left: 0px; top: 0px;">
<div aria-selected="false" class="dojoxGridRow" role="row" style="">
<table border="0" cellpadding="0" cellspacing="0" class="dojoxGridRowTable" role="presentation" style="width: 1900px;">
<tbody>
<tr>
<td class="dojoxGridCell" idx="0" role="gridcell" style="display:none;width:100px;" tabindex="-1">
78126
</td>
<td class="dojoxGridCell" idx="1" role="gridcell" style="width:10%;" tabindex="-1">
Approved Plan
</td>
<td class="dojoxGridCell" idx="2" role="gridcell" style="width:10%;" tabindex="-1">
G-10
</td>
<td class="dojoxGridCell" idx="3" role="gridcell" style="width:40%;" tabindex="-1">
ROOF PLAN
</td>
</tr>
</tbody>
</table>
</div>
输入=
source = driver.page_source
soup = BeautifulSoup(source, "lxml")
print(soup. prettify())
for article in soup.find_all('div', class_='dojoxGridContent'):
drawing_no = article.find_all('td', class_='dojoxGridCell', idx='3')
# ->need one more line to extract text
print(""drawing_no")
Output =
<td class="dojoxGridCell" idx="3" role="gridcell" style="width:40%;" tabindex="-1">ROOF PLAN</td> ...
我只想提取“屋顶计划”,我应该如何编辑我的代码? 我尝试了drawing_no.text和drawing_no.value,但它说“无属性”。 感谢您的帮助!
答案 0 :(得分:0)
请尝试以下代码。但是通常,如果您传入idx = 3,它将仅返回一个元素。如果要从多个元素中提取文本,则可能需要使用更通用的标识符。
import lxml
from lxml import html
html_string = """
<div class="dojoxGridView" id="dojox_grid__View_1" role="presentation" style="width: 1900px; height: 721px; left: 1px; top: 0px;" widgetid="dojox_grid__View_1">
<input class="dojoxGridHiddenFocus" dojoattachpoint="hiddenFocusNode" role="presentation" type="checkbox"/>
<input class="dojoxGridHiddenFocus" role="presentation" type="checkbox"/>
<div class="dojoxGridScrollbox" dojoattachpoint="scrollboxNode" role="presentation" style="height: 721px;">
<div class="dojoxGridContent" dojoattachpoint="contentNode" hidefocus="hidefocus" role="presentation" style="height: 504px; width: 1900px;">
<div role="presentation" style="position: absolute; left: 0px; top: 0px;">
<div aria-selected="false" class="dojoxGridRow" role="row" style="">
<table border="0" cellpadding="0" cellspacing="0" class="dojoxGridRowTable" role="presentation" style="width: 1900px;">
<tbody>
<tr>
<td class="dojoxGridCell" idx="0" role="gridcell" style="display:none;width:100px;" tabindex="-1">
78126
</td>
<td class="dojoxGridCell" idx="1" role="gridcell" style="width:10%;" tabindex="-1">
Approved Plan
</td>
<td class="dojoxGridCell" idx="2" role="gridcell" style="width:10%;" tabindex="-1">
G-10
</td>
<td class="dojoxGridCell" idx="3" role="gridcell" style="width:40%;" tabindex="-1">
ROOF PLAN
</td>
</tr>
</tbody>
</table>
</div>
</div>
</div>
</div>
</div>
"""
tree = html.fromstring(html_string)
ROOFPLAN = tree.xpath('//tbody/tr//td[@idx="3"]/text()')
print(''.join(ROOFPLAN).strip())
答案 1 :(得分:0)
尝试关注代码
source="""<div class="dojoxGridView" id="dojox_grid__View_1" role="presentation" style="width: 1900px; height: 721px; left: 1px; top: 0px;" widgetid="dojox_grid__View_1">
<input class="dojoxGridHiddenFocus" dojoattachpoint="hiddenFocusNode" role="presentation" type="checkbox"/>
<input class="dojoxGridHiddenFocus" role="presentation" type="checkbox"/>
<div class="dojoxGridScrollbox" dojoattachpoint="scrollboxNode" role="presentation" style="height: 721px;">
<div class="dojoxGridContent" dojoattachpoint="contentNode" hidefocus="hidefocus" role="presentation" style="height: 504px; width: 1900px;">
<div role="presentation" style="position: absolute; left: 0px; top: 0px;">
<div aria-selected="false" class="dojoxGridRow" role="row" style="">
<table border="0" cellpadding="0" cellspacing="0" class="dojoxGridRowTable" role="presentation" style="width: 1900px;">
<tbody>
<tr>
<td class="dojoxGridCell" idx="0" role="gridcell" style="display:none;width:100px;" tabindex="-1">
78126
</td>
<td class="dojoxGridCell" idx="1" role="gridcell" style="width:10%;" tabindex="-1">
Approved Plan
</td>
<td class="dojoxGridCell" idx="2" role="gridcell" style="width:10%;" tabindex="-1">
G-10
</td>
<td class="dojoxGridCell" idx="3" role="gridcell" style="width:40%;" tabindex="-1">
ROOF PLAN
</td>
</tr>
</tbody>
</table>
</div>"""
from bs4 import BeautifulSoup
soup = BeautifulSoup(source,"html.parser")
for article in soup.find_all('div', class_='dojoxGridContent'):
drawing_no = article.find('td', class_='dojoxGridCell', idx='3')
if drawing_no:
print(drawing_no.get_text())
答案 2 :(得分:0)
您可以使用 public void CreateTableIfMissing()
{
var info = new StorageInfo(); // initialized with tablename and connectionstring
var storageAccount = CloudStorageAccount.Parse(info.ConnectionString);
var tableClient = storageAccount.CreateCloudTableClient();
var table = tableClient.GetTableReference(info.TableName);
try
{
table.CreateIfNotExists();
var s = DateTime.Now.ToString("o");
var entry = new TableEntity("partkey" + s, "rowkey" + s);
var op = TableOperation.Insert(entry);
table.Execute(op);
}
catch (Exception e)
{
Console.WriteLine(e);
throw;
}
}
属性并按其值进行选择
idx