在第二页上抓取TD

时间:2018-10-18 15:07:08

标签: html vba web-scraping

我是一名业余脚本编写者,致力于更新我在2012年使用的脚本,该脚本可导航到列表中的多个页面之一并提取表数据。我注意到今天我没有提取任何数据,并且在查看页面源代码时还有第二个HTML文档。

<html> 
<head> 
</head> 
<body bgcolor="#FFFFFF">


</body>
</html>


<HTML><HEAD><META HTTP-EQUIV="Content-Type" CONTENT="text/html;charset=windows-1252"><META NAME="Generator" CONTENT="Microsoft Word 97"><TITLE>Department of Defense Distribution No</TITLE><META NAME="Template" CONTENT="C:\PROGRAM FILES\MICROSOFT OFFICE\OFFICE\html.dot"></HEAD><BODY LINK="#0000ff" VLINK="#800080" background="/Cavs/TenderView/gray.gif" BACKGROUND="#400040"><A NAME="SECA"></A><H1 ALIGN="CENTER"><FONT COLOR="#004000" SIZE="+3">GLOBAL Freight Management</FONT></H1><CENTER><TABLE BGCOLOR="#D9E8D9" WIDTH="90%"><TR><TD WIDTH="550"><FONT COLOR="#000000" SIZE="+1">Department of Defense</TD><TD WIDTH="200" ALIGN="LEFT"><FONT COLOR="#000000" SIZE="+1">Distribution Number: </FONT><FONT SIZE="+1">1525405</TD></TR><TR><TD WIDTH="550"><FONT COLOR="#000000" SIZE="+1">Standard Tender of Freight Services</FONT></TD><TD WIDTH="200" ALIGN="LEFT"><FONT COLOR="#000000" SIZE="+1">Distribution Date: </FONT><FONT SIZE="+1">09/18/2018</TD></TR></TABLE></CENTER><BR><CENTER> <A HREF="javascript:history.back()">Back</A> | <A HREF="#SECA">Sec A</A> | <A HREF="#SECB">Sec B</A> | <A HREF="#SECC">Sec C</A> | <A HREF="#SECE1">Sec E1</A> | <A HREF="#SECF.2">Sec F.2</A></CENTER><HR><CENTER><TABLE BORDER="2" BGCOLOR="#D9E8D9"><TR><TD WIDTH="211" ALIGN="CENTER"><FONT COLOR="#008040" SIZE=+1><B>Section A - Carrier Information</TD></TR></TABLE><P></P><TABLE BGCOLOR="#D9E8D9" WIDTH="90%" BORDER="1"><TR><TD WIDTH="130"><FONT COLOR="#000000"><B>Issuing Carrier</B></FONT></TD><TD WIDTH="450">R & R EXPRESS, INC.</TD></TR><TR><TD><FONT COLOR="#000000"><B>SCAC<B></FONT></TD><TD>HMPP</TD></TR><TR><TD><FONT COLOR="#000000"><B>Street Address</B></FONT></TD><TD>3 Crafton Square</TD></TR><TR><TD><FONT COLOR="#000000"><B>City-St-Zip</B></FONT></TD><TD>Pittsburgh , PA   15205</TD></TR><TR><TD><FONT COLOR="#000000"><B>Telephone</B></FONT></TD><TD>559-667-4026  </TD></TR><TR><TD>&nbsp</TD><TD>559-308-9181  </TD></TR></TABLE><P></P><TABLE BGCOLOR="#D9E8D9" WIDTH="90%" BORDER="1"><TR><TD WIDTH="130"><FONT COLOR="#000000"><B>Mode</B></FONT></TD><TD WIDTH="450">B - Truck</TD></TR><TR><TD><FONT COLOR="#000000"><B>Tender No.</B></FONT></TD><TD>000580</TD></TR><TR><TD><FONT COLOR="#000000"><B>Supplement</B></FONT></TD><TD>00</TD></TR><TR><TD><FONT COLOR="#000000"><B>Solicitation</B></FONT></TD><TD>&nbsp</TD></TR><TR><TD><FONT COLOR="#000000"><B>Contract Number</B></FONT></TD><TD>&nbsp</TD></TR><TR><TD><FONT COLOR="#000000"><B>Cancels</B></FONT></TD><TD>&nbsp</TD></TR><TR><TD><FONT COLOR="#000000"><B>Effect of Supplement</B></FONT></TD><TD>&nbsp__&nbsp<FONT COLOR="#000000"> Add</FONT>&nbsp__&nbsp<FONT COLOR="#000000"> Delete</FONT>&nbsp__&nbsp<FONT COLOR="#000000"> Increase</FONT>&nbsp__&nbsp<FONT COLOR="#000000"> Decrease</FONT>&nbsp__&nbsp<FONT COLOR="#000000"> Change</FONT></TD></TR><TR><TD><FONT COLOR="#000000"><B>Nature of Change</B><FONT COLOR="#000000">:</FONT></FONT></TD><TD>&nbsp</TD></TR></TABLE><P></P><TABLE BGCOLOR="#D9E8D9" WIDTH="90%" BORDER="1"><TR><TD ALIGN="CENTER"><FONT COLOR="#000000"><B>Issue Date</B></FONT></TD><TD ALIGN="CENTER"><FONT COLOR="#000000"><B>Effective Date</B></FONT></TD><TD ALIGN="CENTER"><FONT COLOR="#000000"><B>Expiration Date</B></FONT></TD></TR><TR><TD ALIGN="CENTER">09/17/2018</TD><TD ALIGN="CENTER">09/17/2018</TD><TD ALIGN="CENTER">09/17/2020</TD></TR></TABLE><P></P><TABLE BGCOLOR="#D9E8D9" WIDTH="90%" BORDER="1"><TR><TD WIDTH="130"><FONT COLOR="#000000"><B>Application:</B></FONT></TD><TD WIDTH="225">&nbsp&nbsp<B>X&nbsp</B> <FONT COLOR="#000000"> Interstate</TD><TD WIDTH="225"><FONT COLOR="#000000"><B> Intrastate: </B></FONT>Not Applicable</TD></TR><TR><TD><FONT COLOR="#000000"><B>Operating Auth.</B></FONT></TD><TD COLSPAN="2">468429&nbsp</TD></TR><TR><TD><FONT COLOR="#000000"><B>Equipment Types</B></FONT></TD><TD COLSPAN="2">AF3, AZ3, AF6, AG6, A40, AX, AU, QA6</TD></TR><TR><TD><FONT COLOR="#000000"><B>Classification Used</B></FONT></TD><TD COLSPAN="2">&nbsp&nbsp<B>X&nbsp</B><FONT COLOR="#000000"> NMFC</FONT>&nbsp__&nbsp<FONT COLOR="#000000"> UFC</FONT>&nbsp&nbsp<B>X&nbsp</B><FONT COLOR="#000000"> DOD Unique</FONT>&nbsp__&nbsp<FONT COLOR="#000000"> STCC</FONT></TD></TR><TR><TD><FONT COLOR="#000000"><B></TABLE><P></P><TABLE BGCOLOR="#D9E8D9" WIDTH="90%" BORDER="1"><TR><TD WIDTH="195" ALIGN="CENTER"><FONT COLOR="#000000"><B>Commodity Number</B></FONT></TD><TD WIDTH="400" ALIGN="CENTER"><FONT COLOR=""><B>Description</B></FONT></TD></TR><TR><TD ALIGN="CENTER">999912-</TD><TD ALIGN="LEFT">FAK (See MFTRP 1C for Cargo Liabil ity)  </TD></TR><TR><TD ALIGN="CENTER">999912-01</TD><TD ALIGN="LEFT">FAK, INCLUDING CRATED HOUSEHOLD GOODS AND UNACCOMPANIED BAGGAGE, DPM SHIPMENTS (See MFTRP 1C for Ca rgo Liability)</TD></TR><TR><TD ALIGN="CENTER">137300-</TD><TD ALIGN="LEFT">Military Impedimenta (camp equipa ge, subsistence stores, medical st ores, emergency ammunition or othe r property of the Unite...</TD></TR><TR><TD ALIGN="CENTER">145995-</TD><TD ALIGN="LEFT">Containers, military projectile,  shipping and storage, cylindrical,  sheet steel, 16 gauge or thicker,  with or without intern...</TD></TR><TR><TD ALIGN="CENTER">146250-</TD><TD ALIGN="LEFT">Guns, NOI (Cannon, Howitzers, or  Mortars):  </TD></TR><TR><TD ALIGN="CENTER">146770-</TD><TD ALIGN="LEFT">Torpedo Tube Mechanism or Parts t hereof, in boxes  </TD></TR><TR><TD ALIGN="CENTER">146790-</TD><TD ALIGN="LEFT">Torpedoes, submarine, without exp losives, dummy, without propelling  devices </TD></TR><TR><TD ALIGN="CENTER">147650-</TD><TD ALIGN="LEFT">Military Equipment, for repair of  pneumatic floats, plywood boats o r pontoons, in boxes </TD></TR><TR><TD ALIGN="CENTER">162813-</TD><TD ALIGN="LEFT">Projectile Or Rocket Parts, Noi:Pl astic or rubber; plastic or rubber  and metal combined, see Note, ite m 162817; plate or shee...</TD></TR><TR><TD ALIGN="CENTER">181160-</TD><TD ALIGN="LEFT">Tanks:Fuel, aircraft, aluminum, ex ternal jettisonable type, in boxes , crates or steel containers: </TD></TR><TR><TD ALIGN="CENTER">184975-</TD><TD ALIGN="LEFT">Tools, Or Parts Named:Jacks, hydra ulic, other than cantilever, not w heeled: </TD></TR><TR><TD ALIGN="CENTER">186980-</TD><TD ALIGN="LEFT">Towbars or Towbar Assemblies, air craft ground towing or towing and  steering combined, not self-propel led, with or without wh...</TD></TR><TR><TD ALIGN="CENTER">188560-</TD><TD ALIGN="LEFT">Vehicles, Other Than Self-propelle d:Barrows, Carts, Trucks or Wagons , NOI, hand, in packages, see Note , item 188561, subject ...</TD></TR><TR><TD ALIGN="CENTER">189140-</TD><TD ALIGN="LEFT">Vehicles, Other Than Self-propelle d:Freight Carts, Trucks, Trailers  or Wagons, horse drawn or trailer,  NOI, with or without b...</TD></TR><TR><TD ALIGN="CENTER">189600-</TD><TD ALIGN="LEFT">Vehicles, Other Than Self-propelle d:Trailers, freight, straddle type , NOI, with hydraulic lift, weighi ng each 5,000 pounds or...</TD></TR><TR><TD ALIGN="CENTER">190100-</TD><TD ALIGN="LEFT">VEHICLES, MOTOR, see Notes, items  190122 and 190124:  </TD></TR></TABLE><P><FONT COLOR="#000000">Released Value: Rates quoted are subject to a released value not to exceed </FONT>________<FONT COLOR="#000000"> per pound per article (vehicle) as prepared for shipment, except as provided in Item 190 (Released Value) of the applicable Military Freight Traffic Rules Publication. (NOT APPLICABLE TO GT TENDERS) </FONT></P><P></P><FONT COLOR="#000000">***</FONT> This is a Voluntary, Any Service tender. <FONT COLOR="#000000">***</FONT><P><A NAME="SECB"></A> </P><CENTER> <A HREF="javascript:history.back()">Back</A> | <A HREF="#SECA">Sec A</A> | <A HREF="#SECB">Sec B</A> | <A HREF="#SECC">Sec C</A> | <A HREF="#SECE1">Sec E1</A> | <A HREF="#SECF.2">Sec F.2</A></CENTER><HR><TABLE BORDER="2" BGCOLOR="#D9E8D9"><TR><TD WIDTH="300" ALIGN="CENTER"><FONT COLOR="#008040" SIZE=+1><B>Section B - General Terms and Conditions</B></FONT></TD></TR></TABLE><P></P><P ALIGN="LEFT"><FONT COLOR="#000000">1. Governing Publications..............: </FONT>MFTURP-1</P><P ALIGN="LEFT"><FONT COLOR="#000000">2. Combination/Proportional Rates: </FONT></P><P ALIGN="LEFT">&nbsp__&nbsp<FONT COLOR="#000000">  The Rates and Charges in this tender MAY be used as factors in the construction of any COMBINATION of through rates, charges or other provisions</FONT></FONT></P><P ALIGN="LEFT">&nbsp&nbsp<B>X&nbsp</B><FONT COLOR="#000000">  The rates and charges in this tender MAY NOT be used in contruction of COMBINATION rates and charges</FONT></FONT></P><P ALIGN="LEFT">&nbsp&nbsp<B>X&nbsp</B><FONT COLOR="#000000">  The rates in this tender may be used as PROPORTIONAL rates only</FONT></FONT></P><P><A NAME="SECC"></A> </P><CENTER> <A HREF="javascript:history.back()">Back</A> | <A HREF="#SECA">Sec A</A> | <A HREF="#SECB">Sec B</A> | <A HREF="#SECC">Sec C</A> | <A HREF="#SECE1">Sec E1</A> | <A HREF="#SECF.2">Sec F.2</A></CENTER><HR><TABLE BORDER="2" BGCOLOR="#D9E8D9"><TR><TD WIDTH="300" ALIGN="CENTER"><FONT COLOR="#008040" SIZE=+1><B>Section C - Carrier's Offer and Instructions<B></FONT></TD></TR></TABLE><P></P><P ALIGN="LEFT"><FONT COLOR="#000000"> 1. For Questions Concerning Tender Development, Telephone: </FONT>(559)667-4026</P><P ALIGN="LEFT"><FONT COLOR="#000000"> 2. By (Name and Title of Authorized Officer or Agent)............: </FONT>CHRISSY COX - AGENT<P><P ALIGN="LEFT"><FONT COLOR="#000000"> 3. Address: </FONT>1926 S CONYER VISALIA, CA 93277</P><P ALIGN="LEFT"><FONT COLOR="#000000"> 4. Date......: </FONT>03/01/2017</P></CENTER><P><A NAME="SECE1"></A></P><CENTER> <A HREF="javascript:history.back()">Back</A> | <A HREF="#SECA">Sec A</A> | <A HREF="#SECB">Sec B</A> | <A HREF="#SECC">Sec C</A> | <A HREF="#SECE1">Sec E1</A> | <A HREF="#SECF.2">Sec F.2</A></CENTER><HR><CENTER><TABLE BORDER="1" BGCOLOR="#FFFF80"><TR><TD WIDTH="300" ALIGN="CENTER">Section E1 - State to State Rate Matrix</TD></TR></TABLE><BR><TABLE WIDTH="50%" BORDER="1"><TR><TR><TD WIDTH="100"><FONT COLOR="#0000FF">Rate Qualifier</TD><TD WIDTH="200">PM Per Mile Per Vehicle Used</TD></TR><TR><TD><FONT COLOR="#0000FF">Between</FONT></TD><TD>&nbsp</TD></TR><TR><TD><FONT COLOR="#0000FF">Minimum Charge</FONT></TD><TD>2200.0</TD></TR><TR><TD><FONT COLOR="#0000FF">Minimum Weight</FONT></TD><TD>&nbsp</TD></TR></TABLE><BR><TABLE WIDTH="90%" BORDER="1"><TR><TD COLSPAN="4" ALIGN="CENTER"><FONT SIZE="+1" COLOR="#0000FF">The folowing Points are excepted from the application of Section E1 of this tender.</FONT></TD></TR><TR><TD WIDTH="100"><FONT COLOR="#0000FF">SPLC</TD><TD WIDTH="200"><FONT COLOR="#0000FF">Location</TD><TD WIDTH="50"><FONT COLOR="#0000FF">O</TD><TD WIDTH="50"><FONT COLOR="#0000FF">D</TD></TR><TR><TD COLSPAN="4">Not Applicable</TD></TR></TABLE><BR><TABLE WIDTH="90%" BORDER="1"><TR><TH COLSPAN="8"><FONT SIZE="+1" COLOR="#0000FF">Sequence 001</FONT></TH></TR><TR><TH><FONT COLOR="#0000FF">To\From</FONT></TH><TH><FONT COLOR="#0000FF">MT</FONT></TH><TH><FONT COLOR="#0000FF">WY</FONT></TH><TH><FONT COLOR="#0000FF">CO</FONT></TH><TH><FONT COLOR="#0000FF">UT</FONT></TH><TH><FONT COLOR="#0000FF">NM</FONT></TH><TH><FONT COLOR="#0000FF">AZ</FONT></TH><TH><FONT COLOR="#0000FF">AK</FONT></TH></TR><TR><TD>ME</TD><TD>3.50     </TD><TD>3.50     </TD><TD>3.50     </TD><TD>3.50     </TD><TD>3.50     </TD><TD>3.50     </TD><TD>3.50     </TD></TR><TR><TD>NH</TD><TD>3.50     </TD><TD>3.50     </TD><TD>3.50     </TD><TD>3.50     </TD><TD>3.50     </TD><TD>3.50     </TD><TD>3.50     </TD></TR><TR><TD>VT</TD><TD>3.50     </TD><TD>3.50     </TD><TD>3.50     </TD><TD>3.50     </TD><TD>3.50     </TD><TD>3.50     </TD><TD>3.50     </TD></TR><TR><TD>MA</TD><TD>3.50     </TD><TD>3.50     </TD><TD>3.50     </TD><TD>3.50     </TD><TD>3.50     </TD><TD>3.50     </TD><TD>3.50     </TD></TR><TR><TD>RI</TD><TD>3.50     </TD><TD>3.50     </TD><TD>3.50     </TD><TD>3.50     </TD><TD>3.50     </TD><TD>3.50     </TD><TD>3.50     </TD></TR><TR><TD>CT</TD><TD>3.50     </TD><TD>3.50     </TD><TD>3.50     </TD><TD>3.50     </TD><TD>3.50     </TD><TD>3.50     </TD><TD>3.50     </TD></TR><TR><TD>NY</TD><TD>3.50     </TD><TD>3.50     </TD><TD>3.50     </TD><TD>3.50     </TD><TD>3.50     </TD><TD>3.50     </TD><TD>3.50     </TD></TR><TR><TD>NJ</TD><TD>3.50     </TD><TD>3.50     </TD><TD>3.50     </TD><TD>3.50     </TD><TD>3.50     </TD><TD>3.50     </TD><TD>3.50     </TD></TR><TR><TD>PA</TD><TD>3.50     </TD><TD>3.50     </TD><TD>3.50     </TD><TD>3.50     </TD><TD>3.50     </TD><TD>3.50     </TD><TD>3.50     </TD></TR><TR><TD>DE</TD><TD>3.50     </TD><TD>3.50     </TD><TD>3.50     </TD><TD>3.50     </TD><TD>3.50     </TD><TD>3.50     </TD><TD>3.50     </TD></TR><TR><TD>MD</TD><TD>3.50     </TD><TD>3.50     </TD><TD>3.50     </TD><TD>3.50     </TD><TD>3.50     </TD><TD>3.50     </TD><TD>3.50     </TD></TR><TR><TD>VA</TD><TD>3.50     </TD><TD>3.50     </TD><TD>3.50     </TD><TD>3.50     </TD><TD>3.50     </TD><TD>3.50     </TD><TD>3.50     </TD></TR><TR><TD>WV</TD><TD>3.50     </TD><TD>3.50     </TD><TD>3.50     </TD><TD>3.50     </TD><TD>3.50     </TD><TD>3.50     </TD><TD>3.50     </TD>

您能建议我在第二秒内访问表数据吗?

这是我一直在使用并致力于更新的代码。

Public Sub ETA()

'Can change the coreURL to be assigned from a worksheet
Dim sws As SHDocVw.ShellWindows
Dim ieDoc As Object
Dim n As Integer
Dim objWeb As QueryTable
Dim CoreURL As String, wb As Object
Dim appIE As Object, tenderID As String
Dim sup As String, vlu As String

Application.ScreenUpdating = True
'Set main URL to evaluate open IE windows
CoreURL = "https://siteofmychoice"
Set sws = New SHDocVw.ShellWindows
'Cycle through all open IE windows and assign the window whose URL
'matches that decided above to a variable
For n = 0 To sws.Count - 1
    If Left(sws.Item(n).LocationURL, Len(CoreURL)) = CoreURL Then
        Set appIE = sws.Item(n)
        Exit For
    End If
Next n
Set ieDoc = appIE.document
  k = 2 ' tenderID
'Workbooks("TENDER UPDATE LIST.xlsm").Activate
Set wb = Workbooks("HMPP Complete Tender List.xlsm").Sheets("Complete")
tenderID = wb.Range("a" & k)
sup = wb.Range("b" & k)
vlu = tenderID & sup & "U"
Debug.Print vlu


While k <> "1223"
   tenderID = wb.Range("a" & k)

With appIE

 tndr = k
ieDoc.getElementsByTagName("a").Item(tndr).Click
   Do While appIE.Busy Or appIE.readyState <> READYSTATE_COMPLETE
DoEvents
Loop

'''In tender Page
'On Error Resume Next
Set s = ieDoc.getElementsByTagName("TD").Item(0) 'Tender
   txt = s.getAttribute("innertext")
       Worksheets("Complete").Range("q" & k).Value = txt
       Debug.Print txt
'On Error GoTo 0

k = k + 1

ieDoc.getElementsByTagName("a").Item(1).Click
   Do While appIE.Busy Or appIE.readyState <> READYSTATE_COMPLETE
DoEvents
Loop

tenderID = wb.Range("a" & k)
sup = wb.Range("b" & k)
vlu = tenderID & sup & "U"


ThisWorkbook.Save

End With
Wend 'tenderID
End Sub

1 个答案:

答案 0 :(得分:0)

我不知道页面交互与从文件读取是否有所不同。如果我从文件中读取HTML,则可以使用以下代码获得927 td个元素。您尝试将HTML从页面转移到HTML文档中,而所有文档都应以同一DOM树结尾。

考虑到HTML和事实页面的变化可能非常丑陋,我对位置匹配保持警惕,但是我可以使用

来获取标书#
Debug.Print html.getElementsByTagName("table")(3).getElementsByTagName("tr")(1).getElementsByTagName("td")(1).innerText

我展示了循环所有td元素,并根据其相对于包含文本Tender No.的节点的位置来查找标书#。

VBA(所有td元素):

Option Explicit
Public Sub test()
    Dim html As HTMLDocument, hTable As HTMLTable
    Set html = GetHTMLFileContent("C:\Users\User\Desktop\test.html")

    Dim elements As Object, i As Long
    Set elements = html.querySelectorAll("td")

    For i = 0 To elements.Length - 1
        If elements.item(i).innerText = "Tender No." Then
            Debug.Print elements.item(i).innerText & vbTab & elements.item(i + 1).innerText
            Exit For
        End If
    Next      
    Debug.Print "# td elements = " & elements.Length  
End Sub

Public Function GetHTMLFileContent(ByVal filePath As String) As HTMLDocument
    Dim fso As Object, hFile As Object, hString As String, html As HTMLDocument
    Set html = New HTMLDocument
    Set fso = CreateObject("Scripting.FileSystemObject")
    Set hFile = fso.OpenTextFile(filePath)

    Do Until hFile.AtEndOfStream
        hString = hFile.ReadAll()
    Loop

    html.body.innerHTML = hString
    Set GetHTMLFileContent = html
End Function

参考:

VBE>工具>引用>添加对Microsoft HTML对象库的引用