所以,我已经建立了一个webscraper( LOTS 的帮助)从特定网站提取信息。在大多数情况下,它工作正常。但是,它偶尔会在我的电子表格中跳过一行。意思是,它转到第10行的链接,但是然后将它抓取的内容粘贴到第11行,跳过第11行链接,然后从第12行正常进行(这些行号只是示例;错误是间歇性的,连续)。另外,如果我将IE.Visibile
设置为 False ,问题会变得更糟,更频繁地发生。
任何人都可以帮我搞清楚这一点,以便刮刀更加一致......?这是我的代码:
Private Sub Wait(ByVal nSec As Long)
nSec = nSec + Timer
While nSec > Timer
DoEvents
Wend
End Sub
Sub Get_Things()
Dim ie As Object
Dim retStr As String
Dim sht As Worksheet
Dim LastRow As Long
Dim rCell As Range
Dim rRng As Range
Dim Count As Long
Dim Status As String
Dim BadCount As Long
Set sht = ThisWorkbook.Worksheets("Spells")
BadCount = 0
LastRow = sht.Cells(sht.Rows.Count, "A").End(xlUp).Row
Set ie = CreateObject("internetexplorer.application")
Set rRng = sht.Range("b4904:b" & LastRow)
Status = "Starting at row "
For Each rCell In rRng.Cells
Count = rCell.Row
Application.StatusBar = BadCount & " dead links so far. " & Status & Count & "of " & LastRow & "."
Wait 1
If rCell = "" Then
With ie
.Navigate rCell.Offset(0, -1).Value
.Visible = False
End With
Do While ie.Busy
DoEvents
Loop
Wait 1
On Error GoTo ErrHandler
rCell.Value = ie.Document.getElementById("content").innerText
rCell.WrapText = False
Status = "This row successfully scraped. Moving on to row "
Application.StatusBar = BadCount & " dead links so far. " & Status & Count + 1 & "of " & LastRow & "."
Status = "Previous row succeded. Now at row "
98 Wait 1
End If
Next rCell
If BadCount > 0 Then
Application.StatusBar = "Macro finshed running with " & BadCount & " errors."
Else
Application.StatusBar = "Finished."
End If
Exit Sub
ErrHandler:
rCell.Value = ""
Status = "Previous row failed. Moving on to row "
BadCount = BadCount + 1
Application.StatusBar = "This row is a dead link. " & BadCount & " dead links so far. Moving on to row " & Count + 1 & "of " & LastRow & "."
Resume 98
End Sub