为了在运行时从网页中解析不同产品的标题而创建一个scraper,我什么都没得到。我可以看到所需内容在javascript元素中。由于我不想使用任何转换器来收集这些标题,我希望有人能帮助我使用字典。我最初尝试的是:
Sub RmartData()
Dim http As New XMLHTTP60, html As New HTMLDocument
Dim titelem As Object, title As Object, protitle As Object
With http
.Open "GET", "https://redmart.com/bakery", False
.send
html.body.innerHTML = .responseText
End With
Set titelem = html.getElementsByClassName("description")
For Each title In titelem
Set protitle = title.getElementsByTagName("a")
x = x + 1
Cells(x, 1) = protitle(0).innerText
Next title
End Sub
检查元素我看到了:
<div class="description"><!-- react-empty: 288 --><h4 title="Gardenia Hotdog Buns 4's"><a href="/product/gardenia-hotdog-buns-4's-13400">Gardenia Hotdog Buns 4's</a></h4><div><span class="size">220 g</span><a href="#" data-tooltip="Guaranteed fresh for 3+ days, inc. delivery day" class="tag green small tooltip">3D+</a></div><div id="BVRRInlineRating-13400" class="ratings"> <div class="bv-cleanslate bv-cv2-cleanslate"> <div data-bv-v="ratingItem:37" class="bv-shared bv-core-container-134 bv-rating-top-statistic">
<!--[if lt IE 7]> <div class="bv-compat bvie6 bvie-lt8 bvie"> <![endif]--> <!--[if IE 7]> <div class="bv-compat bvie7 bvie-lt8 bvie"> <![endif]--> <!--[if IE 8]> <div class="bv-compat bvie8 bvie"> <![endif]--> <!--[if IE 9]> <div class="bv-compat bvie9 bvie"> <![endif]--> <!--[if gt IE 9]> <!--><div class="bv-compat"> <!--<![endif]--> <div class="bv-inline-rating-container"> <dl class="bv-stars-container" role="presentation"> <dd class="bv-rating-ratio" role="presentation"> <span class="bv-rating-stars-container"> <span class="bv-rating-stars bv-rating-stars-off" aria-hidden="true"> ★★★★★ </span> <span class="bv-rating-stars-on bv-rating-stars" style="width:40% !important;" aria-hidden="true"> ★★★★★ </span> <span class="bv-off-screen">2 out of 5 stars. Read reviews.</span> </span> </dd> <dd class="bv-rating-ratio-count" role="presentation"> <span class="bv-rating-label"> (1) </span> </dd> </dl> </div> </div>
</div> </div></div></div>
在Chrome开发人员工具中,点击响应按钮我就明白了:
<!DOCTYPE html><html lang="en" xmlns="http://www.w3.org/1999/xhtml" xmlns:og="http://ogp.me/ns#" xmlns:fb="http://www.facebook.com/2008/fbml" itemscope itemtype="http://schema.org/WebPage"><head><meta charset="utf-8"><title>Online Grocery Shopping - Groceries Delivery Singapore | RedMart</title><meta name="description" content="Singapore's leading online grocery service. Fresh groceries, household essentials and specialty products delivered to your door."><meta name="keywords" content="online groceries, grocery delivery singapore, online grocery shopping, online supermarket"><meta name="p:domain_verify" content="bf4d2b7c4d98534d44b1f2ff3bb02ee7"><meta name="msvalidate.01" content="A8CF80424AEF2B51E2C1313E6EA133BD"/><meta name="google-site-verification" content="AXdSfFmvBak9HqZjP7ihAAN5v2iVUu7BEsbCIcvJgwg"/><meta property="og:type" content="website"><meta property="og:title" content="Online Grocery Shopping - Groceries Delivery Singapore"><meta property="og:description" content="Singapore's leading online grocery service. Fresh groceries, household essentials and specialty products delivered to your door."><meta property="og:url" content="https://redmart.com/"><meta property="og:image" content="https://s3-ap-southeast-1.amazonaws.com/media.redmart.com/assets/logo-FB.png"><meta property="og:site_name" content="RedMart"><meta property="fb:app_id" content="219268451429695"><meta name="git:tag" content="" 2="" 13="" 0=""><meta name="git:rev" content="68f1442"><meta name="git:commit_id" content="68f14427b86c2d64a3733140b5c205a013eb00e2"><meta name="git:date" content="2017-05-11 09:57:13 +0800"><meta name="viewport" content="width=1024"><link rel="icon" type="image/png" href="/img/favicon.ca3eb8fb.png"><meta name="apple-itunes-app" content="app-id=606780396, app-argument={{url}}"><meta name="google-play-app" content="app-id=com.redmart.redmart"><link rel="apple-touch-icon" href="http://media.redmart.com/assets/appicon-152x152.png"><link rel="alternate" href="android-app://com.redmart.redmart"/><meta name="apple-mobile-web-app-capable" content="yes"><meta name="mobile-web-app-capable" content="yes"><link rel="stylesheet" href="/styles/app.afc1f1db.css"><link rel="canonical" href="https://redmart.com/"/><style>.spinner{width:80px;height:80px;margin:0;padding: 0;border:1px solid #ee4054;-webkit-border-radius:50%;border-radius:50%;border-top-color:transparent;border-left-color:transparent;-webkit-animation:spin 600ms infinite linear;-moz-animation:spin 600ms infinite linear;-o-animation:spin 600ms infinite linear;-ms-animation:spin 600ms infinite linear;animation:spin 600ms infinite linear}@-moz-keyframes spin{100%{-webkit-transform:rotate(360deg);-moz-transform:rotate(360deg);-o-transform:rotate(360deg);-ms-transform:rotate(360deg);transform:rotate(360deg)}}@-webkit-keyframes spin{100%{-webkit-transform:rotate(360deg);-moz-transform:rotate(360deg);-o-transform:rotate(360deg);-ms-transform:rotate(360deg);transform:rotate(360deg)}}@-o-keyframes spin{100%{-webkit-transform:rotate(360deg);-moz-transform:rotate(360deg);-o-transform:rotate(360deg);-ms-transform:rotate(360deg);transform:rotate(360deg)}}@keyframes spin{100%{-webkit-transform:rotate(360deg);-moz-transform:rotate(360deg);-o-transform:rotate(360deg);-ms-transform:rotate(360deg);transform:rotate(360deg)}}</style><body style="background: #ECF0F1"><div id="app" class="firstLoad"><header id="navbar"></header><div id="middle"><aside id="leftSidebar"></aside><section id="layoutContainerWrapper"><section id="stickyNotification"></section><section id="contentBanner"></section><section id="contentTabs"></section><section id="contentWrapper"><section id="contentNavigation"></section><article id="contentSection"></article></section></section><aside id="rightSidebar"></aside></div><footer id="footer"></footer></div><div id="notifications"></div><div id="lightbox" class="hide"></div><div id="modal"></div><div id="tooltips"></div><div id="overlay"></div><div id="offline" class="fullScreen" style="display:none"><div><svg id="logo" viewBox="0 0 1024 237.391"><use xlink:href="#svg-rmlogo"></use></svg><div class="spinner"></div></div></div><script type="text/javascript" src="//display.ugc.bazaarvoice.com/static/redmart/en_SG/bvapi.js"></script><script type="text/javascript" src="//cdn.optimizely.com/js/6758860916.js"></script><script src="/js/loader.8c36ef90.js"></script><svg id="svgtemplater" class="svgtemplater" style="display: none"><g id="svg-icon-cart"><g><path d="M6.334,84.48c62.087,0,124.174,0,186.258,0c7.046,0,7.046-10.924,0-10.924c-62.084,0-124.174,0-186.258,0 C-0.716,73.556-0.716,84.48,6.334,84.48L6.334,84.48z"/></g><g><path d="M1.063,80.473c9.465,29.911,18.931,59.809,28.392,89.715c2.119,6.692,12.668,3.833,10.537-2.901 c-9.465-29.906-18.931-59.814-28.392-89.721C9.479,70.877-1.068,73.734,1.063,80.473L1.063,80.473z"/></g><g><path d="M34.726,174.2c43.357,0,86.718,0,130.075,0c7.042,0,7.042-10.924,0-10.924c-43.357,0-86.718,0-130.075,0 C27.675,163.276,27.675,174.2,34.726,174.2L34.726,174.2z"/></g><g><path d="M170.059,170.188c9.27-29.906,18.529-59.812,27.799-89.715c2.092-6.747-8.458-9.613-10.532-2.907 c-9.27,29.906-18.529,59.818-27.794,89.721C157.443,174.033,167.984,176.897,170.059,170.188L170.059,170.188z"/></g><g><path d="M114.548,88.85C101.13,68.68,87.711,48.512,74.29,28.341c-3.879-5.834-13.344-0.371-9.429,5.514 c13.415,20.172,26.833,40.338,40.255,60.511C108.995,100.2,118.465,94.737,114.548,88.85L114.548,88.85z"/></g><g><path d="M10.192,82.882c8.33-8.331,16.658-16.659,24.988-24.985c4.984-4.984-2.736-12.71-7.727-7.724 c-8.33,8.328-16.658,16.656-24.988,24.982C-2.515,80.142,5.205,87.869,10.192,82.882L10.192,82.882z"/></g><g><path d="M31.32,59.498c46.187,0,92.368,0,138.56,0c7.042,0,7.042-10.924,0-10.924c-46.188,0-92.373,0-138.56,0 C24.277,48.574,24.277,59.498,31.32,59.498L31.32,59.498z"/></g><g><path d="M166.014,57.897c7.949,7.951,15.9,15.903,23.854,23.851c4.985,4.987,12.705-2.739,7.725-7.724 c-7.95-7.95-15.9-15.903-23.851-23.851C168.753,45.188,161.029,52.914,166.014,57.897L166.014,57.897z"/></g></g><g id="svg-icon-frozen"><title>Page 1</title><desc>Created with Sketch.</desc><defs></defs><g id="Page-1" stroke="none" stroke-width="1" fill="none" fill-rule="evenodd" stroke-linecap="round"><g transform="translate(1.000000, 1.000000)" stroke="#FFFFFF" stroke-width="3.5"><path d="M25.4453,0.75 L25.4453,54.904" id="Stroke-1"></path><polyline id="Stroke-3" points="33.8522 4.1041 25.4152 12.5411 17.0382 4.1651"></polyline><polyline id="Stroke-5" points="33.8522 51.5496 25.4152 43.1126 17.0382 51.4886"></polyline><path d="M1.9959,14.2884 L48.8949,41.3654" id="Stroke-7"></path><polyline id="Stroke-9" points="9.1042 8.6849 12.1922 20.2109 0.7502 23.2769"></polyline><polyline id="Stroke-11" points="50.1932 32.4076 38.6672 35.4956 41.7332 46.9386"></polyline><path d="M48.8946,14.2883 L1.9956,41.3653" id="Stroke-13"></path><polyline id="Stroke-15" points="50.1932 23.246 38.6672 20.158 41.7332 8.715"></polyline><polyline id="Stroke-17" points="9.1042 46.9688 12.1922 35.4428 0.7502 32.3768"></polyline></g></g></g><g id="svg-icon-leaf"><g id="Side-Nav" stroke="none" stroke-width="1" fill="none" fill-rule="evenodd"><g id="CA-252" transform="translate(-536.000000, -621.000000)"><g id="Group-2" transform="translate(506.000000, 570.000000)"><g id="Group" transform="translate(28.000000, 41.000000)"><g id="Group-5" transform="translate(0.000000, 9.000000)"><rect id="Rectangle-1-Copy-2" x="0" y="0" width="42" height="42"></rect><path d="M21.9947368,7.3162 L21.9947368,16.25 L16.1934316,10.7096 C15.1032,11.4772 14.0726526,12.2828 13.1714211,13.1416 C5.61142105,20.3597 5.72283158,26.2706 8.05847368,29.5595 L2.1,35.25 L2.1,40.95 L11.3331474,32.1321 C14.9241474,33.6179 20.5424211,32.97 27.2389895,26.5746 C35.6803263,18.5148 39.9,1.05 39.9,1.05 C39.9,1.05 30.4460211,3.1457 21.9947368,7.3162" id="Fill-17" fill="#92BC00"></path></g></g></g></g></g></g><g id="svg-icon-notify"><path d="M69.874,2.001c-37.469,0-67.843,30.375-67.843,67.843 c0,37.469,30.374,67.845,67.843,67.845c37.471,0,67.842-30.376,67.842-67.845C137.716,32.375,107.345,2.001,69.874,2.001z M69.874,129.794c-33.108,0-59.947-26.839-59.947-59.95c0-33.109,26.839-59.948,59.947-59.948c33.111,0,59.947,26.839,59.947,59.948 C129.821,102.955,102.985,129.794,69.874,129.794z"/><g><path d="M63.731,96.46c0-3.749,2.603-6.456,6.144-6.456c3.749,0,6.144,2.707,6.144,6.456 c0,3.644-2.395,6.454-6.144,6.454C66.231,102.914,63.731,100.104,63.731,96.46z M66.334,81.571l-1.458-49.978h9.997l-1.458,49.978 H66.334z"/></g></g><g id="svg-icon-otpshield"><image display="none" overflow="visible" enable-background="new " width="56" height="60" xlink:href="8C3C01A583C3DD0C.png" transform="matrix(0.48 0 0 0.48 4.9863 5.3413)"></image><g><g><path fill="#464E55" d="M24.577,3.256c-7.892,0-11.713-2.993-11.725-3.023c-0.061-0.145-0.199-0.244-0.361-0.248 c0,0-0.004,0-0.008,0c-0.156,0-0.301,0.097-0.361,0.242C12.11,0.259,8.262,3.256,0.397,3.256c-0.22,0-0.396,0.179-0.396,0.395 l2.486,13.421c0.647,5.437,9.438,9.694,9.838,9.873c0.053,0.021,0.104,0.03,0.16,0.03c0.051,0,0.105-0.01,0.156-0.03 c0.402-0.179,9.311-4.438,9.843-9.873l2.483-13.421C24.972,3.435,24.795,3.256,24.577,3.256z M12.361,24.018 c-0.312-0.137-7.114-3.416-7.704-7.725L2.673,5.785c0-0.171,0.138-0.31,0.311-0.31c6.157,0,9.211-2.346,9.218-2.371 c0.049-0.115,0.161-0.19,0.285-0.19c0.001,0,0.001,0,0.002,0l0.006,0.002c0.125,0.005,0.234,0.079,0.278,0.193 c0.011,0.022,3.043,2.366,9.222,2.366c0.17,0,0.307,0.139,0.307,0.31l-1.982,10.509c-0.635,4.186-7.393,7.59-7.705,7.725 c-0.039,0.021-0.084,0.029-0.123,0.029C12.444,24.047,12.403,24.041,12.361,24.018z"/></g><g opacity="0.25"><path fill="#464E55" d="M20.792,13.779l1.508-7.994c0-0.171-0.137-0.31-0.307-0.31c-6.179,0-9.211-2.344-9.222-2.366 c-0.044-0.114-0.153-0.188-0.278-0.193l-0.006-0.002V13.45L20.792,13.779z"/></g><g opacity="0.25"><g><polygon fill="#464E55" points="4.182,13.779 4.185,13.779 2.675,5.785 "/></g><g><path fill="#464E55" d="M4.659,16.293c0,4.303,7.39,7.59,7.706,7.725c0.038,0.021,0.083,0.029,0.122,0.029V13.45l-8.303,0.329 L4.659,16.293z"/></g></g></g></g><g id="svg-icon-question"><path d="M69.874,2.001c-37.469,0-67.843,30.375-67.843,67.843 c0,37.469,30.374,67.845,67.843,67.845c37.471,0,67.842-30.376,67.842-67.845C137.716,32.375,107.345,2.001,69.874,2.001z M69.874,129.794c-33.108,0-59.947-26.839-59.947-59.95c0-33.109,26.839-59.948,59.947-59.948c33.111,0,59.947,26.839,59.947,59.948 C129.821,102.955,102.985,129.794,69.874,129.794z"/><g><path d="M79.238,49.913c0-5.291-3.294-9.682-9.682-9.682c-4.392,0-8.386,2.195-11.479,5.789l-4.692-4.292 c4.194-4.791,9.684-8.285,16.769-8.285c10.282,0,17.071,6.09,17.071,15.972c0,13.077-16.072,18.368-14.475,32.346h-7.186 C63.368,66.186,79.238,60.496,79.238,49.913z M69.355,90.145c3.294,0,5.988,2.597,5.988,6.39c0,3.594-2.694,6.189-5.988,6.189 c-3.292,0-5.888-2.596-5.888-6.189C63.468,92.741,66.064,90.145,69.355,90.145z"/></g></g><g id="svg-icon-search"><path d="M61.616,7.624c-28.363,0-51.359,22.996-51.359,51.358c0,28.368,22.996,51.358,51.359,51.358 c28.368,0,51.359-22.99,51.359-51.358C112.975,30.62,89.983,7.624,61.616,7.624z M61.616,98.927 c-22.059,0-39.948-17.885-39.948-39.943c0-22.06,17.89-39.948,39.948-39.948s39.944,17.889,39.944,39.948 C101.56,81.042,83.675,98.927,61.616,98.927z"/><path d="M90.777,92.183l4.038-4.038c1.106-1.116,2.923-1.116,4.039,0l36.315,36.315c1.106,1.116,1.106,2.923,0,4.039 l-4.038,4.038c-1.115,1.106-2.923,1.106-4.039,0L90.777,96.221C89.661,95.105,89.661,93.29,90.777,92.183z"/></g><g id="svg-icon-shop-bag"><path d="M26.683,8.706h-5.302C20.572,4.598,18.261,0,14.239,0c-4.038,0-6.344,4.598-7.143,8.706H2.315L0,39.396 h23.885l6.71-3.344L26.683,8.706z M10.603,8.706c0.156-0.824,0.344-1.781,0.551-2.314c0.892-2.227,1.613-2.905,3.085-2.905 c1.475,0,2.193,0.679,3.076,2.906c0.215,0.535,0.406,1.496,0.555,2.313H10.603z M22.421,35.817l0.421-23.713l1.224-0.047 l2.714,21.685L22.421,35.817z"/></g></svg><div id="mainSpinner" class="spinner" style="position:absolute; left:50%; top:50%; margin-top:-40px; margin-left:-40px"></div><script type="application/ld+json">{
"@context": "http://schema.org",
"@type": "WebSite",
"url": "https://redmart.com/",
"potentialAction": {
"@type": "SearchAction",
"target": "https://redmart.com/search/{productSearch}",
"query-input": "required name=productSearch"
}
}</script>
答案 0 :(得分:1)
由提供的链接https://redmart.com/bakery提供的网页源HTML不包含必要的数据,它使用AJAX。网站https://redmart.com有一个API可用。响应以JSON格式返回。浏览页面e。 G。在Chrome中,然后打开“开发人员工具”窗口( F12 ),“网络”选项卡,重新加载( F5 )页面并检查记录的XHR。大多数相关数据是URL返回的JSON字符串:
https://api.redmart.com/v1.5.7/catalog/search?extent=2&pageSize=6&sort=1&category=bakery
您可以使用以下VBA代码检索上述信息。 将JSON.bas模块导入VBA项目以进行JSON处理。
Option Explicit
Sub Scrape_redmart_com()
Dim sResponse As String
Dim oQuery As Object
Dim sQuery As String
Dim sState As String
Dim vJSON
Dim aResult()
Dim aProdSets()
Dim aProds()
Dim aRows()
Dim aHeader()
Dim j As Long
Dim i As Long
Dim u As Long
' Set query parameters
Set oQuery = CreateObject("Scripting.Dictionary")
With oQuery
.Add "extent", "2"
.Add "pageSize", "6"
.Add "sort", "1"
.Add "category", "bakery"
End With
sQuery = EncodeQueryParams(oQuery)
' Retrieve JSON data
XmlHttpRequest "GET", "https://api.redmart.com/v1.5.7/catalog/search?" & sQuery, "", "", "", sResponse
' Parse JSON response
JSON.Parse sResponse, vJSON, sState
If sState <> "Object" Then
MsgBox "Invalid JSON response"
Exit Sub
End If
' Init output
ThisWorkbook.Sheets(1).Cells.Delete
aResult = Array() ' Set ubound to -1
' Iterate over product sets
aProdSets = vJSON("productSets")
For j = 0 To UBound(aProdSets)
aProds = aProdSets(j)("products")
' Populate result array
u = UBound(aResult) + 1
ReDim Preserve aResult(u + UBound(aProds))
For i = 0 To UBound(aProds)
' Copy properties from product set into each product
Set aResult(u + i) = ExtractKeys( _
aProdSets(j), _
Array( _
"category", _
"on_sale_count", _
"total"), _
aResult(u + i) _
)
' Extract selected properties from product
Set aResult(u + i) = ExtractKeys( _
aProds(i), _
Array( _
"id", _
"title", _
"desc", _
"details", _
"product_life", _
"measure", _
"pricing", _
"sku", _
"img"), _
aResult(u + i) _
)
DoEvents
Next
Next
' Convert combined result to arrays for output
JSON.ToArray aResult, aRows, aHeader
' Output
With ThisWorkbook.Sheets(1)
OutputArray .Cells(1, 1), aHeader
Output2DArray .Cells(2, 1), aRows
.Columns.AutoFit
End With
MsgBox "Completed"
End Sub
Sub XmlHttpRequest(sMethod As String, sUrl As String, arrSetHeaders, sFormData, sRespHeaders As String, sContent As String)
Dim arrHeader
'With CreateObject("Msxml2.ServerXMLHTTP")
' .SetOption 2, 13056 ' SXH_SERVER_CERT_IGNORE_ALL_SERVER_ERRORS
With CreateObject("MSXML2.XMLHTTP")
.Open sMethod, sUrl, False
If IsArray(arrSetHeaders) Then
For Each arrHeader In arrSetHeaders
.SetRequestHeader arrHeader(0), arrHeader(1)
Next
End If
.send sFormData
sRespHeaders = .GetAllResponseHeaders
sContent = .responseText
End With
End Sub
Function EncodeQueryParams(oParams As Object) As String
Dim aParams
Dim i As Long
aParams = oParams.Keys()
For i = 0 To UBound(aParams)
aParams(i) = EncodeUriComponent((aParams(i))) & "=" & EncodeUriComponent((oParams(aParams(i))))
Next
EncodeQueryParams = Join(aParams, "&")
End Function
Function EncodeUriComponent(strText As String) As String
Static objHtmlfile As Object
If objHtmlfile Is Nothing Then
Set objHtmlfile = CreateObject("htmlfile")
objHtmlfile.parentWindow.execScript "function encode(s) {return encodeURIComponent(s)}", "jscript"
End If
EncodeUriComponent = objHtmlfile.parentWindow.encode(strText)
End Function
Function ExtractKeys(oSource, aKeys, Optional oTarget = Nothing) As Object
Dim vKey
If TypeName(oTarget) <> "Dictionary" Then Set oTarget = CreateObject("Scripting.Dictionary")
For Each vKey In aKeys
If oSource.Exists(vKey) Then
If IsObject(oSource(vKey)) Then
Set oTarget(vKey) = oSource(vKey)
Else
oTarget(vKey) = oSource(vKey)
End If
End If
Next
Set ExtractKeys = oTarget
End Function
Sub OutputArray(oDstRng As Range, aCells As Variant)
With oDstRng
.Parent.Select
With .Resize(1, UBound(aCells) - LBound(aCells) + 1)
.NumberFormat = "@"
.Value = aCells
End With
End With
End Sub
Sub Output2DArray(oDstRng As Range, aCells As Variant)
With oDstRng
.Parent.Select
With .Resize( _
UBound(aCells, 1) - LBound(aCells, 1) + 1, _
UBound(aCells, 2) - LBound(aCells, 2) + 1)
.NumberFormat = "@"
.Value = aCells
End With
End With
End Sub
我的输出如下:
BTW,同样的方法适用于以下答案:1,2,3,4,5,6, 7和8。