Web Scraping Excel初学者问题:需要对象

时间:2013-12-20 05:51:22

标签: excel vba

我正在开发一个webscraper。 代码如下:

'============ Module 1 ======================

Sub test()

Dim eRow As Long
Dim ele As Object

Set sht = Sheets("Sheet1")
RowCount = 1
sht.Range("A" & RowCount) = "Title"
sht.Range("B" & RowCount) = "Company"
sht.Range("C" & RowCount) = "Location"
sht.Range("D" & RowCount) = "Description"

eRow = Sheet1.Cells(Rows.Count, 1).End(xlUp).Offset(1, 0).Row

Set objIE = CreateObject("InternetExplorer.Application")

myjobtype = InputBox("Enter type of job eg. sales, admin  ")
myzip = InputBox("Enter zipcode of area where you wish to work")

With objIE
    .Visible = False
    .navigate "http://www.jobs.com/"

Do While .Busy Or _
    .readyState <> 4
DoEvents
Loop

Set what = .document.getElementsByName("q")
what.Item(0).Value myjobtype

Set zipcode = .document.getElementsByName("where")
zipcode.Item(0).Value = myzip

'*====================================================*
.document.getElementByld("JobsButton").Click
'*=====================================================*


Do While .Busy Or _
    .readyState <> 4
DoEvents
Loop

For Each ele In .document.all

    Select Case ele.classname
    Case "Result"
    RowCount = RowCount + 1
    Case "Title"
    sht.Range("A" & RowCount) = ele.innertext
    Case "Company"
    sht.Range(B & RowCount) = ele.innertext
    Case "Location"
    sht.Range("C" & RowCount) = ele.innertext
    Case "Description”"
    sht.Range("D" & RowCount) = ele.innertext

End Select
Next ele
End With

Macro1

Set objIE = Nothing

End Sub

'======================模块2 ====================== =

Sub Macro1()

' Macrol Macro
' Formatting imported data

Columns("A:D").Select
Selection.Columns.AutoFit
With Selection
     .VerticalAlignment = xlTop
     .Orientation = 0
     .Addlndent = False
     .IndentLevel = 0
     .ShrinkToFit = False
     .ReadingOrder = xlContext
End With
Range("D1").Select
Columns("D:D").ColwmnW1dth = 50
Columns("A:D").Select
Selection.Rows.AutoFit
End Sub

“============================================== ==============

此代码的问题在于之前有一个用于单击按钮的ID,但现在他们已删除了ID。我尝试使用许多示例替换 === 中的内容而没有成功,大多数时候错误是我从本网站使用的其他建议的“对象需要”。有人可以帮我替换 === 中点击按钮的部分..

该网站上按钮的HTML现在更改为

<button type="submit" class="btn">Search Now</button>

=============================================== ============== 这就是代码的工作原理:http://www.youtube.com/watch?v=cSoRVZKRkvY

1 个答案:

答案 0 :(得分:1)

.document.getElementsByTagName("button")(0)应该能够捕获搜索按钮

我为你做了更多。在.document.getElementsByTagName("button")(0).click

之后使用以下代码
Set resultset = .document.getElementsByTagName("article")
RowCount = 2
For Each result In resultset
    sht.Range("A" & RowCount) = result.ChildNodes(0).innertext
    sht.Range("B" & RowCount) = result.ChildNodes(1).innertext
    sht.Range("C" & RowCount) = result.ChildNodes(2).innertext
    sht.Range("D" & RowCount) = result.ChildNodes(3).innertext
    RowCount = RowCount + 1
Next