我想从电子商务网站中删除商品标题,价格,卖家和图片网址。需要将结果复制到A-D列中的活动工作表中。以下代码最初是由@QHarr之一为Amazon开发的。有任何帮助对其进行更新以获得所需结果的帮助吗?
我已将图像包含在结果中。谢谢
Public Sub WriteOutProductInfo()
'VBE>Tools>References> Microsoft HTML Object Library
Dim html As MSHTML.HTMLDocument
Set html = New MSHTML.HTMLDocument
With CreateObject("MSXML2.XMLHTTP")
' change the url for the page of amazon from where to copy data
.Open "GET", "https://www.daraz.lk/catalog/?from=input&q=sarees&ppath=31186:3287", False
.setRequestHeader "User-Agent", "Mozilla/5.0"
.send
html.body.innerHTML = .responseText
End With
' 1. declare additional headers as variable
Dim headers(), titles As Object, prices As Object, original_prices As Object
Dim seller As Object
headers = Array("Title", "Price")
With html
Set titles = .querySelectorAll(".c3gUW0,.c13VH6")
Set prices = .querySelectorAll(".------------")
End With
Dim results(), r As Long, priceInfo As String
ReDim results(1 To titles.Length, 1 To UBound(headers) + 1)
For r = 0 To titles.Length - 1
results(r + 1, 1) = titles.Item(r).innerText
Next
Dim ws As Worksheet
Set ws = ThisWorkbook.Worksheets("Sheet1")
With ws
.Cells(1, 1).Resize(1, UBound(headers) + 1) = headers
.Cells(2, 1).Resize(UBound(results, 1), UBound(results, 2)) = results
End With
End sub
答案 0 :(得分:0)
由于各种原因,您发布的宏有些令人毛骨悚然。使用它来达成您的目标:
Sub WriteOutProductInfo()
'This macro works on the sheet it's startetd from
Dim browser As Object
Dim url As String
Dim nodesAllOffers As Object
Dim nodeOneOffer As Object
Dim currentRow As Long
'Row, for title and price for current offer
currentRow = ActiveSheet.Cells(Rows.Count, 1).End(xlUp).Row + 1
'Make sure that line 2 is the start line if nothing has been entered in the table yet
If currentRow = 1 Then currentRow = 2
'Your sample url
'You can loop over various urls from a table with the following code
'(Loop not included here)
url = "https://www.daraz.lk/catalog/?from=input&q=sarees&ppath=31186:3287"
'Initialize Internet Explorer, set visibility,
'call URL and wait until page is fully loaded
Set browser = CreateObject("internetexplorer.application")
browser.Visible = False
browser.navigate url
Do Until browser.ReadyState = 4: DoEvents: Loop
'Get all html elements with the css class "c3KeDq" in a node collection
'These are all offers of the called page, which contain in each case
'the connected information (title and price)
'
'This is a sample of the html code for one offer
'<div class="c3KeDq">
' <div class="c3vCyH">
' </div>
' <div class="c16H9d">
' <a age="0" href="//www.daraz.lk/products/ladies-office-and-saree-wear-brown-i102604104-s1009196133.html?search=1"
' title="Ladies Office And Saree Wear - Brown">
' Ladies Office And Saree Wear - Brown
' </a>
' </div>
' <div class="c3gUW0">
' <span class="c13VH6">
' Rs. 1,150
' </span>
' </div>
' <div class="c3lr34">
' </div>
' <div class="c15YQ9">
' <span class="c2i43- c1enUu" title="Sri Lanka">
' Sri Lanka
' </span>
' </div>
' <div class="c2attd">
' <div class="c31VUX">
' <button age="0" type="button" class="ant-btn c1xzE_ ant-btn-primary ant-btn-lg">
' ADD TO CART
' </button>
' </div>
' </div>
'</div>
Set nodesAllOffers = browser.document.getElementsByClassName("c3KeDq")
If Not nodesAllOffers Is Nothing Then
'If we got the node collection
'We step through all offers and pull the titel and price out
For Each nodeOneOffer In nodesAllOffers
'Get title
ActiveSheet.Cells(currentRow, 1).Value = getValueByClassName(nodeOneOffer, "c16H9d")
'Get price
ActiveSheet.Cells(currentRow, 2).Value = getValueByClassName(nodeOneOffer, "c13VH6")
'Next offer (row)
currentRow = currentRow + 1
Next nodeOneOffer
End If
'Clean up
browser.Quit
Set browser = Nothing
Set nodesAllOffers = Nothing
Set nodeOneOffer = Nothing
End Sub
此功能:
Private Function getValueByClassName(htmlNode As Object, cssClassName As String) As String
'This function works with a single node
'If htmlNode is a node collection, you will got only the first value
Dim nodeByClassName As Object
Dim resultString As String
'Try to get wanted node
Set nodeByClassName = htmlNode.getElementsByClassName(cssClassName)(0)
If Not nodeByClassName Is Nothing Then
'If we got the class node
'Take innertext
resultString = Trim(nodeByClassName.innertext)
End If
'Clean up
Set nodeByClassName = Nothing
'Return result string
getValueByClassName = resultString
End Function