使用WebHelpers无法从JSON填充Excel电子表格

时间:2019-05-04 14:18:36

标签: json excel vba web-scraping

我在使用VBA使用WebHelpers从JSON填充表时遇到了麻烦。可以在以下位置访问JSON:http://54.152.85.66:5000/get-product-info。该表非常简单,只有六列,约有8k行。

代码如下:

Sub LoadRLSiteData()
Dim helperData As Object
Dim helperDict As Dictionary
Set helperData = 
WebHelpers.ParseJson(getXMLPage("http://54.152.85.66:5000/get-product-info"))
Debug.Print "helperData has " & helperData.Count & " items"
' HERE YOU SHOULD LOOP OVER helperData AND PUT INTO SHEET "Helper"
End Sub

Function getXMLPage(link) As String
On Error GoTo recovery
Dim retryCount As Integer
retryCount = 0
Dim ie As MSXML2.XMLHTTP60
Set ie = New MSXML2.XMLHTTP60
the_start:
ie.Open "GET", link, False
ie.setRequestHeader "Content-type", "application/json"
ie.send

While ie.readyState <> 4
    DoEvents
Wend

Debug.Print " "
Debug.Print "MSXML HTTP Request to " & link
Debug.Print ie.Status; "XMLHTTP status "; ie.statusText; " at "; Time
getXMLPage = ie.responseText
Exit Function
recovery:
retryCount = retryCount + 1
Debug.Print "Error number: " & Err.Number _
        & " " & Err.Description & " Retry " & retryCount
        Application.StatusBar = "Error number: " & Err.Number _
        & " " & Err.Description & " Retry " & retryCount

If retryCount < 4 Then GoTo the_start Else Exit Function
End Function

这是表格的外观:

DESIRED RESULT

WebHelpers.ParseJson(getXMLPage(“ http://54.152.85.66:5000/get-product-info”))返回一个对象,该对象似乎是九个词典的集合,但是我似乎无法弄清如何访问字典中的项目,因此可以放他们成片。

我根据QHarr的答案修改了代码,如下所示:

Option Explicit
Sub LoadRLSiteData()
Dim newHeaders() As Variant
newHeaders = Array("category", "products_category", "products_master_prod_id", "products_page_name_dub", "products_product_webcat", "products_url")
GetInfo "Helper Sample", "http://54.152.85.66:5000/get-product-info", newHeaders
newHeaders = Array("category", "products_category", "products_master_prod_id", "products_page_name_dub", "products_product_webcat", "products_url")
GetInfo "Images Sample", "http://54.152.85.66:5000/query-missing-images", newHeaders
newHeaders = Array("category", "problem", "url")
GetInfo "Problems Sample", "http://54.152.85.66:5000/get-problems", newHeaders
End Sub
Public Sub GetInfo(mySheet As String, link As String, myHeaders As Variant)
Dim helperData As Object
Dim headers(), item As Object, results(), key As Variant
Dim subItem As Object, r As Long, c As Long, cat As String
Worksheets(mySheet).Activate
Set helperData = WebHelpers.ParseJson(getXMLPage(link))
headers = myHeaders
ReDim results(1 To 100000, 1 To UBound(headers) + 1)
r = 1
Debug.Print "GetInfo unpacking JSON dictionaries"
For Each item In helperData                  'col of dict
    DoEvents
    cat = item("category")
    For Each subItem In item("products")
        c = 2
        results(r, 1) = cat
        For Each key In subItem.Keys
            results(r, c) = subItem(key)
            c = c + 1
        Next
        r = r + 1
    Next
Next
Debug.Print "GetInfo loading values to worksheet"
ActiveSheet.Cells(1, 1).Resize(1, UBound(headers) + 1) = headers
ActiveSheet.Cells(2, 1).Resize(UBound(results, 1), UBound(results, 2)) = results
Debug.Print "GetInfo finished"
End Sub
Function getXMLPage(link) As String
On Error GoTo recovery
Dim retryCount As Integer
retryCount = 0
Dim ie As MSXML2.XMLHTTP60
Set ie = New MSXML2.XMLHTTP60
the_start:
ie.Open "GET", link, False
ie.setRequestHeader "Content-type", "application/json"
ie.send

While ie.readyState <> 4
    DoEvents
Wend

Debug.Print " "
Debug.Print "MSXML HTTP Request to " & link
Debug.Print ie.Status; "XMLHTTP status "; ie.statusText; " at "; Time
getXMLPage = ie.responseText
Exit Function
recovery:
retryCount = retryCount + 1
Debug.Print "Error number: " & Err.Number _
        & " " & Err.Description & " Retry " & retryCount
        Application.StatusBar = "Error number: " & Err.Number _
        & " " & Err.Description & " Retry " & retryCount

If retryCount < 4 Then GoTo the_start Else Exit Function

End Function

除了具有不同架构的第三个URL(“ get-problems”)外,此解决方案效果很好,但似乎可以从架构中提取标头,而不是对其进行硬编码,并且标题中的变量也是如此对于helperData循环中的每个项目。这样可以使解决方案更干净,更通用。

1 个答案:

答案 0 :(得分:1)

我使用的是不同的json parser,但是这使字典和集合解开了。如果将代码从jsonconverter.bas安装到项目中,请转到VBE>工具>引用>添加对Microsoft脚本运行时的引用。您首先可以从下面的End With

中查看使用方法

[]是用For Each循环并由索引访问的集合; {}是通过密钥访问的字典。

您可以在此处看到一些结构:


VBA:

Option Explicit   
Public Sub GetInfo()
    Dim helperData As Object
    With CreateObject("MSXML2.XMLHTTP")
        .Open "GET", "http://54.152.85.66:5000/get-product-info", False
        .send
        Set helperData = jsonConverter.ParseJson(.responseText)
    End With
    Dim headers(), item As Object, results(), key As Variant
    Dim subItem As Object, r As Long, c As Long, cat As String
    headers = Array("category", "products_category", "products_master_prod_id", "products_page_name_dub", "products_product_webcat", "products_url")
    ReDim results(1 To 100000, 1 To UBound(headers) + 1)
    r = 1
    For Each item In helperData                        'col of dict
        cat = item("category")
        For Each subItem In item("products")
            c = 2
            results(r, 1) = cat
            For Each key In subItem.keys
                results(r, c) = subItem(key)
                c = c + 1
            Next
            r = r + 1
        Next
    Next
    ActiveSheet.Cells(1, 1).Resize(1, UBound(headers) + 1) = headers
    ActiveSheet.Cells(2, 1).Resize(UBound(results, 1), UBound(results, 2)) = results
End Sub

示例输出:

enter image description here


与您的集成后,我会期望像这样:

Option Explicit
Public Sub GetInfo()
    Dim helperData As Object
    Dim headers(), item As Object, results(), key As Variant
    Dim subItem As Object, r As Long, c As Long, cat As String
    Set helperData = WebHelpers.ParseJson(getXMLPage("http://54.152.85.66:5000/get-product-info"))
    headers = Array("category", "products_category", "products_master_prod_id", "products_page_name_dub", "products_product_webcat", "products_url")
    ReDim results(1 To 100000, 1 To UBound(headers) + 1)
    r = 1
    For Each item In helperData                  'col of dict
        cat = item("category")
        For Each subItem In item("products")
            c = 2
            results(r, 1) = cat
            For Each key In subItem.keys
                results(r, c) = subItem(key)
                c = c + 1
            Next
            r = r + 1
        Next
    Next
    ActiveSheet.Cells(1, 1).Resize(1, UBound(headers) + 1) = headers
    ActiveSheet.Cells(2, 1).Resize(UBound(results, 1), UBound(results, 2)) = results
End Sub