我有一个HTML文档,其中包含Javascript块中的地理信息。它是此网页的源代码:https://energy.ehawaii.gov/epd/public/energy-projects-map.html
这可以被视为地图,也可以视为列表。
我想要实现的是在Excel中使用该列表,但是使用" Latitude"和#34;经度"的字段。 Google地图标记指定了Javascript中的LatLng
。
如何使用VB之类的东西来处理HTML文件的源代码,并组织成一个包含以下字段/列的表:
<a ... title="such and such">
)<p>Technology: Solar</p>
)google.maps.LatLng(latitude, longitude);
所有帮助表示赞赏!
答案 0 :(得分:1)
尝试基于XMLHTTP请求的此VBScript解决方案。只需复制下面的代码,粘贴到文本文件,将其保存为.vbs
并运行它。脚本尚未优化,所有请求都不是异步的,因此我的PC上大约需要40秒才能获取所有数据。
Option Explicit
Dim arrCells(), arrList, arrTmp, sRespHeaders, sRespText, arrSetHeaders, i, j, iTotal, oApp, oWB, oWS, oOutput
' Create output window
Output oOutput
' Get cookies
oOutput.write "Get cookies"
XmlHttpGet "https://energy.ehawaii.gov/epd/public/energy-projects-map.html", Array(), sRespHeaders, sRespText
ParseResponse "^Set-(Cookie): ([\S]*?=[\S]*?);[\s\S]*?$", sRespHeaders, arrSetHeaders
' Get project list
oOutput.write "Get project list"
arrList = Array()
XmlHttpGet "https://energy.ehawaii.gov/epd/public/energy-projects-list.json?sEcho=2&iColumns=5&sColumns=&iDisplayStart=1&iDisplayLength=0&mDataProp_0=0&mDataProp_1=1&mDataProp_2=2&mDataProp_3=3&mDataProp_4=4&sSearch=&bRegex=false&sSearch_0=&bRegex_0=false&bSearchable_0=true&sSearch_1=&bRegex_1=false&bSearchable_1=true&sSearch_2=&bRegex_2=false&bSearchable_2=true&sSearch_3=&bRegex_3=false&bSearchable_3=true&sSearch_4=&bRegex_4=false&bSearchable_4=true&iSortCol_0=0&sSortDir_0=asc&iSortingCols=1&bSortable_0=true&bSortable_1=true&bSortable_2=true&bSortable_3=true&bSortable_4=true", arrSetHeaders, "", sRespText
ParseProjects sRespText, arrList, iTotal
oOutput.write "Get project list: " & (UBound(arrList) + 1) & " of " & iTotal
' Rearrange to 2-dimensional array, get LatLng
ReDim arrCells(UBound(arrList), 8) ' Name, Technology, Island, Capacity, Location, RID, Type, Lat, Lng
For i = 0 To UBound(arrList)
For j = 0 To 6
arrCells(i, j) = arrList(i)(j)
Next
oOutput.write "Get LatLng: " & (i + 1) & " of " & iTotal
arrTmp = RequestLatLng(arrList(i)(5))
arrCells(i, 7) = arrTmp(0)
arrCells(i, 8) = arrTmp(1)
Next
' Create Excel worksheet, output data
oOutput.write "Export to Excel"
Set oApp = CreateObject("Excel.Application")
oApp.Visible = True
Set oWB = oApp.Workbooks.Add(-4167) ' xlWBATWorksheet
Set oWS = oWB.Worksheets(1)
oWS.Range(oWS.Cells(1, 1), oWS.Cells(UBound(arrCells) + 1, 9)).Value = arrCells
oWS.Columns.AutoFit
oWB.Saved = True
oOutput.write "Completed"
Sub XmlHttpGet(sQuery, arrSetHeaders, sRespHeaders, sRespText)
Dim arrHeader
With CreateObject("MSXML2.ServerXMLHTTP")
.SetOption 2, 13056 ' SXH_SERVER_CERT_IGNORE_ALL_SERVER_ERRORS
.Open "GET", sQuery, False
For Each arrHeader In arrSetHeaders
.SetRequestHeader arrHeader(0), arrHeader(1)
Next
.Send ""
sRespHeaders = .GetAllResponseHeaders
sRespText = .ResponseText
End With
End Sub
Sub ParseResponse(sPattern, sResponse, aData)
Dim oMatch, aTmp, sSubMatch
aData = Array()
With CreateObject("VBScript.RegExp")
.Global = True
.MultiLine = True
.Pattern = sPattern
For Each oMatch In .Execute(sResponse)
If oMatch.SubMatches.Count = 1 Then
PushItem aData, oMatch.SubMatches(0)
Else
aTmp = Array()
For Each sSubMatch In oMatch.SubMatches
PushItem aTmp, sSubMatch
Next
PushItem aData, aTmp
End If
Next
End With
End Sub
Sub PushItem(aList, vItem)
ReDim Preserve aList(UBound(aList) + 1)
aList(UBound(aList)) = vItem
End Sub
Sub ParseProjects(sJson, arrProj, iTotalRecords)
Dim i, q
With CreateObject("htmlfile")
With .parentwindow
.execscript ";", "jscript"
.eval ("json = " & sJson & ";")
iTotalRecords = CInt(.json.iTotalRecords)
Do While .json.aaData.Length
ReDim Preserve arrProj(UBound(arrProj) + 1)
With .json.aaData.Shift()
arrProj(UBound(arrProj)) = Array(.Shift(), .Shift(), .Shift(), .Shift(), .Shift(), .Shift(), .Shift())
End With
Loop
End With
End With
End Sub
Function RequestLatLng(sRid)
Dim sRespText, arrTmp, sTmp
XmlHttpGet "https://energy.ehawaii.gov/epd/public/energy-project-details.html?rid=" & sRid, Array(), "", sRespText
arrTmp = Split(sRespText, "google.maps.LatLng(")
If UBound(arrTmp) >= 1 Then
sTmp = arrTmp(1)
arrTmp = Split(sTmp, "),")
If UBound(arrTmp) >= 1 Then
RequestLatLng = Split(arrTmp(0), ", ")
Exit Function
End If
End If
RequestLatLng = Array("#", "#")
End Function
Sub Output(oWnd)
Set oWnd = ShowWindow("energy.ehawaii.gov", "", 354, 118)
End Sub
Function ShowWindow(sTitle, sBG, iWidth, iHeight)
Set ShowWindow = CreateWindow()
With ShowWindow
With .document
.title = sTitle
.getElementsByTagName("head")(0).appendChild .createElement("style")
.styleSheets(0).cssText = "* {font: 8pt tahoma; margin: 5px;}"
.body.style.background = "buttonface"
.body.style.backgroundRepeat = "no-repeat"
.body.style.backgroundImage = "url(" & sBG & ")"
.body.innerHTML = ""
End With
.resizeTo .screen.availWidth, .screen.availHeight
.resizeTo iWidth + .screen.availWidth - .document.body.offsetWidth, iHeight + .screen.availHeight - .document.body.offsetHeight
.moveTo CInt((.screen.availWidth - iWidth) / 2), CInt((.screen.availHeight - iHeight) / 2)
.execScript "var handlers, thunks = {body_onunload: function() {handlers.WSHQuit()}};"
Execute "Class clsHandlers: Public Sub WSHQuit(): WScript.Quit: End Sub: End Class"
Set .handlers = New clsHandlers
Set .document.body.onunload = .thunks.body_onunload
.execScript "var write = function(t) {document.body.innerHTML = t};"
End With
End Function
Function CreateWindow()
' source http://forum.script-coding.com/viewtopic.php?pid=75356#p75356
Dim sSignature, oShellWnd, oProc
On Error Resume Next
sSignature = Left(CreateObject("Scriptlet.TypeLib").Guid, 38)
Do
Set oProc = CreateObject("WScript.Shell").Exec("mshta ""about:<head><script>moveTo(-32000,-32000);document.title=' '</script><hta:application id=app border=thick minimizebutton=no maximizebutton=no scroll=no showintaskbar=yes contextmenu=no selection=no innerborder=no icon=""""/><object id='shell' classid='clsid:8856F961-340A-11D0-A96B-00C04FD705A2'><param name=RegisterAsBrowser value=1></object><script>shell.putproperty('" & sSignature & "',document.parentWindow);</script></head>""")
Do
If oProc.Status > 0 Then Exit Do
For Each oShellWnd In CreateObject("Shell.Application").Windows
Set CreateWindow = oShellWnd.GetProperty(sSignature)
If Err.Number = 0 Then Exit Function
Err.Clear
Next
Loop
Loop
End Function
答案 1 :(得分:0)
当然可以。但是在StackOverflow,我们解决的问题不是为现有解决方案编写代码。
通过它你将获得带有拉特和多头的JSON。你可以使用很多解决方案。我亲自为Excel做了一个简单的Scrape HTML Add-In。你需要学习的只是正则表达式。