使用Excel VBA屏幕抓取互联网 - 无法引用第二页

时间:2013-04-03 17:18:10

标签: html excel vba web-scraping screen-scraping

我的问题相对简单,令人抓狂。这与我在互联网和StackOverflow上发现的许多问题没有什么不同,但没有任何建议能帮助我解决这个问题....

使用Excel2010我没有将数据输入到单个字段中,提交它(第1部分)并捕获几行数据(第2部分),以列表/表格格式粘贴到Excel中(第30部分 - 并执行999,999次... ......第1部分和第3部分正在运行 - 第2部分拒绝承认新的互联网窗口,并且所有gettagnames和SelectTable变通方法只使用原始URL - 附加是使用Sendkeys的绝望尝试 - 更糟糕的是! - 第一个循环 - 然后绝对没有!

无论如何,代码应该相当简单 - 对于编码顺序中的一些混乱的应用程序 - 这取决于我开始用scalple削减位但是在几个小时的混乱之后使用了hatchets ......

Dim HTMLdoc As HTMLDocument
Dim ie As InternetExplorer

Sub EPF_FSA()

'Application.DisplayAlerts = False
Application.EnableEvents = False

Dim iHTML_Element As IHTMLElement
Dim sURL As String
Dim miss1 As Integer
Dim FrmNo As Long
Dim FrmName As String
Dim Address1 As String
Dim Address2 As String
Dim Address3 As String
Dim Address4 As String
Dim Address5 As String
Dim Address6 As String
Dim Address7 As String
Dim Address8 As String
Dim AnyLuck As String
Dim RowNum As Integer
Dim ColNum As Integer

RowNum = 1
ColNum = 1


FrmNo = 100111

While FrmNo <= 100112
'Do While FrmNo <= 100112
On Error GoTo Err_Clear
sURL = "http://www.fsa.gov.uk/register/epfSearchForm.do"


Set ie = CreateObject("internetexplorer.application")
'Set Ex = CreateObject("MicrosoftExcel.application")
ie.navigate sURL
ie.Visible = True

Do
' Wait till the Browser is loaded
Loop Until ie.readyState = READYSTATE_COMPLETE

Set HTMLdoc = ie.document

HTMLdoc.all.epfref.Value = FrmNo

For Each iHTML_Element In HTMLdoc.getElementsByTagName("input")

If iHTML_Element.Type = "submit" Then miss1 = miss1 + 1
If miss1 = 2 Then iHTML_Element.Click: Exit For
Next

Err_Clear:
If Err <> 0 Then Err.Clear
Resume Next
'PART 2 ********************************************************************
Do
' Wait till the Browser is loaded
Loop Until ie.readyState = READYSTATE_COMPLETE

Call SendKeys("^a")
DoEvents
Call SendKeys("^c")
DoEvents

      ActiveSheet.PasteSpecial Format:="Text", link:=False, DisplayAsIcon:=False
        range("A2").Select

'Copy and select relevant text to sheet 2

  Worksheets("Sheet1").Activate
 FrmName = Cells(39, "A").Value

 Address1 = Cells(59, "A").Value
 Address2 = Cells(60, "A").Value
 Address3 = Cells(61, "A").Value
 Address4 = Cells(62, "A").Value
 Address5 = Cells(63, "A").Value
 Address6 = Cells(64, "A").Value
 Address7 = Cells(65, "A").Value
 Address8 = Cells(66, "A").Value
 AnyLuck = Cells(47, "A").Value


  Worksheets("Sheet2").Activate
 Cells(RowNum, "A").Value = FrmNo
 Cells(RowNum, "B").Value = FrmName
 Cells(RowNum, "C").Value = Address1
 Cells(RowNum, "D").Value = Address2
 Cells(RowNum, "E").Value = Address3
 Cells(RowNum, "F").Value = Address4
 Cells(RowNum, "G").Value = Address5
 Cells(RowNum, "H").Value = Address6
 Cells(RowNum, "I").Value = Address7
 Cells(RowNum, "J").Value = Address8
 Cells(RowNum, "K").Value = AnyLuck
   RowNum = RowNum + 1
'ActiveCell.Offset(1, 0).Select
  Worksheets("Sheet1").Activate
    Cells.Select
    Selection.Delete Shift:=xlUp
    range("A2").Select
'MsgBox (FrmNo & Chr(10) & FrmName)
'Part 3
FrmNo = FrmNo + 1
ie.Quit
ie.Quit
Wend
'Loop

Application.EnableEvents = True

End Sub

1 个答案:

答案 0 :(得分:0)

看起来您可以直接进入结果页面。尝试:

sUrl = "http://www.fsa.gov.uk/register/epfRefSearch.do?epfRef="
sUrl = sUrl & frmNo

然后只需导航到该页面。然后,实际细节位于ID为“box”的div中