这是我拼凑/编写以启动公司网站的代码,将下拉菜单调整为“SKU”,从excel中的B列获取SKU并点击提交按钮。然后,它找到第81个TD元素并将内部文本粘贴到我的excel中并移动到下一个sku。
问题在于,由于查询时间的原因,我需要等待4秒才能将数据返回到第81个TD元素。超过4700 skus,这需要很长时间才能完成。我可以将多箱excel应用程序运行到8个运行不同的块,每个增量为6k,以分散工作量,这将我的总运行时间减少到大约8小时而不是47小时。
多年前,我能够在没有IE窗口的情况下完成数千页的抓取数据,但这是因为每个页面都包含我正在指导的html中的数据。在这里,我只有1个网址,这是我对每个sku感兴趣的ASP查询返回结果。
无论如何,为了获得每个sku的预期结果,我不必在IE窗口内运行它,以便每次都必须将数据输入到字段中,从而更快地实现这一点吗?还有关于我需要的数据如何在查询返回结果中?
Enum READYSTATE
READYSTATE_UNINITIALIZED = 0
READYSTATE_LOADING = 1
READYSTATE_LOADED = 2
READYSTATE_INTERACTIVE = 3
READYSTATE_COMPLETE = 4
End Enum
Sub ImportZ2()
Dim ie As Object
Dim frm As Variant
Dim element As Variant
Dim subButton As Variant
Dim typeOption
Dim ws As Worksheet
Dim rowNumber, startRow, endRow, time1, time2, y, thisWebsite
Dim TDelements As IHTMLElementCollection
Dim TDelement As HTMLTableCell
Dim TD As Object
Dim LISTele As Variant
thisWebsite = "http://foo.bar.corporate.report.server"
startRow = 1
endRow = 6000
rowNumber = startRow
Set ws = ThisWorkbook.Worksheets("test")
Set ie = CreateObject("InternetExplorer.Application")
ie.navigate thisWebsite
While ie.READYSTATE <> 4: DoEvents: Wend
ie.Visible = False
Set typeOption = ie.document.getElementById("ctl32_ctl04_ctl03_ddValue")
y = 5
colNumber = 6
rowNumber = rowNumber - 1
Do While rowNumber < endRow
rowNumber = rowNumber + 1
'this conditional allows the program to see if the data has already been
'submitted for this sku, if it has then move on to the next sku, if their
'has been an error or no data entered yet, then it proceeds to submitting
'the query 'for this sku
If Len(ws.Cells(rowNumber, 6).Value) < 30 Then
'These IDs do not change. typeOption is a 2 choice dropdown and is required
'in order to search. frm is the field i enter my excel data into. subButton
'is to execute the search.
Set frm = ie.document.getElementById("ctl32_ctl04_ctl05_txtValue")
Set subButton = ie.document.getElementById("ctl32_ctl04_ctl00")
For Each LISTele In typeOption.getElementsByTagName("option")
If LISTele.innerText = "SKU" Then LISTele.Selected = True: Exit For
Next
frm.Value = ws.Cells(rowNumber, 2).Value
subButton.Click
'each query needs 4 seconds for the data in the 81st TD element to fully
'load so i can put that into excel.
time1 = Now
time2 = Now + TimeValue("0:00:04")
Do Until time1 >= time2
DoEvents
time1 = Now()
Loop
Set TDelements = ie.document.getElementsByTagName("TD")
'query results do not have standard names, ids, or anything for the TD
'fields. each new query has different td information than the one prior.
'example, i search 1 sku and the id for the 81st td = a838hs3bs80, i search
'another sku or even the same one again and the id for the 81st td is now =
'dj38s00283
'the only thing that stays the same is that its the 81st TD element that I
'want the innerText for
For Each TDelement In TDelements
If y < 76 Then
y = y + 1
Else
If y > 76 Then
Exit For
Else
Debug.Print "Processing row: "; rowNumber
ws.Cells(rowNumber, 6).Formula = Application.WorksheetFunction.Clean(TDelement.innerText)
y = y + 1
ws.Cells(rowNumber, 6).Select
End If
End If
Next TDelement
Else
End If
y = 5
Loop
End Sub