我要
运行时错误424
第68行(行)
request.Open“ GET”,Url,False
而且我不知道如何解决。
我之前发布的问题; How to scrape specific part of online english dictionary?
我的最终目标是获得这样的结果;
A B
beginning bɪˈɡɪnɪŋ
behalf bɪˈhæf
behave bɪˈheɪv
behaviour bɪˈheɪvjər
belong bɪˈlɔːŋ
below bɪˈloʊ
bird bɜːrd
biscuit ˈbɪskɪt
这是我编写的代码,主要是基于我在互联网上找到的其他人的代码。
' Microsoft ActiveX Data Objects x.x Library
' Microsoft XML, v3.0
' Microsoft VBScript Regular Expressions
Sub ParseHelp()
' Word reference from
Dim Url As String
Url = "https://www.oxfordlearnersdictionaries.com/definition/english/" & Cells(ActiveCell.Row, "B").Value
' Get dictionary's html
Dim Html As String
Html = GetHtml(Url)
' Check error
If InStr(Html, "<TITLE>Not Found</Title>") > 0 Then
MsgBox "404"
Exit Sub
End If
' Extract phonetic alphabet from HTML
Dim wrapPattern As String
wrapPattern = "<span class='name' (.*?)</span>"
Set wrapCollection = FindRegexpMatch(Html, wrapPattern)
' MsgBox StripHtml(CStr(wrapCollection(1)))
' Fill phonetic alphabet into cell
If Not wrapCollection Is Nothing Then
Dim wrap As String
On Error Resume Next
wrap = StripHtml(CStr(wrapCollection(1)))
If Err.Number <> 0 Then
wrap = ""
End If
Cells(ActiveCell.Row, "C").Value = wrap
Else
MsgBox "not found"
End If
End Sub
Public Function StripHtml(Html As String) As String
Dim RegEx As New RegExp
Dim sOut As String
Html = Replace(Html, "</li>", vbNewLine)
Html = Replace(Html, " ", " ")
With RegEx
.Global = True
.IgnoreCase = True
.MultiLine = True
.Pattern = "<[^>]+>"
End With
sOut = RegEx.Replace(Html, "")
StripHtml = sOut
Set RegEx = Nothing
End Function
Public Function GetHtml(Url As String) As String
Dim xmlhttp As Object
Set xmlhttp = CreateObject("MSXML2.serverXMLHTTP")
Dim converter As New ADODB.stream
' Get
request.Open "GET", Url, False
request.send
' raw bytes
converter.Open
converter.Type = adTypeBinary
converter.Write request.responseBody
' read
converter.Position = 0
converter.Type = adTypeText
converter.Charset = "utf-8"
' close
GetHtml = converter.ReadText
converter.Close
End Function
Public Function FindRegexpMatch(txt As String, pat As String) As Collection
Set FindRegexpMatch = New Collection
Dim rx As New RegExp
Dim matcol As MatchCollection
Dim mat As Match
Dim ret As String
Dim delimiter As String
txt = Replace(txt, Chr(10), "")
txt = Replace(txt, Chr(13), "")
rx.Global = True
rx.IgnoreCase = True
rx.MultiLine = True
rx.Pattern = pat
Set matcol = rx.Execute(txt)
'MsgBox "Match:" & matcol.Count
On Error GoTo ErrorHandler
For Each mat In matcol
'FindRegexpMatch.Add mat.SubMatches(0)
FindRegexpMatch.Add mat.Value
Next mat
Set rx = Nothing
' Insert code that might generate an error here
Exit Function
ErrorHandler:
' Insert code to handle the error here
MsgBox "FindRegexpMatch. " & Err.GetException()
Resume Next
End Function
任何帮助将不胜感激。
答案 0 :(得分:1)
以下是如何从A列中读取值并将发音写到B列的示例。它使用css选择器来匹配子节点,然后升级到parentNode以确保获取整个发音。您可以通过多种方式在父节点上进行匹配以获得第二个发音。请注意,我使用一个父节点和Replace
,因为发音可能跨越多个子节点。
如果要进行大量查找,请成为一个很好的网民,并在代码中稍加等待,以免请求引起轰动。
Option Explicit
Public Sub WriteOutPronounciations()
Dim html As MSHTML.HTMLDocument, i As Long, ws As Worksheet
Dim data As String, lastRow As Long, urls()
Set ws = ThisWorkbook.Worksheets("Sheet1")
lastRow = ws.Cells(ws.rows.Count, "A").End(xlUp).row 'you need at least two words in column A or change the redim.
urls = Application.Transpose(ws.Range("A1:A" & lastRow).Value)
ReDim results(1 To UBound(urls))
Set html = New MSHTML.HTMLDocument
With CreateObject("MSXML2.ServerXMLHTTP")
For i = LBound(urls) To UBound(urls)
.Open "GET", "https://www.oxfordlearnersdictionaries.com/definition/english/" & urls(i), False
.send
html.body.innerHTML = .responseText
data = Replace$(Replace$(html.querySelector(".name ~ .wrap").ParentNode.innerText, "/", vbNullString), Chr$(10), Chr$(32))
results(i) = Right$(data, Len(data) - 4)
Next
End With
With ThisWorkbook.Worksheets(1)
.Cells(1, 2).Resize(UBound(results, 1), 1) = Application.Transpose(results)
End With
End Sub
必需的参考(VBE>工具>参考):
如果您选择了API route,那么这是一个小例子。您可以使用Prototype帐户在一个月内拨打1000个免费电话。下一个最佳选择,具体取决于您希望拨打多少个电话,看起来像是10,001个电话(一个额外的PAYG通话将价格减半)。 #呼叫将受到单词是头词还是首先需要词缀查询呼叫的影响。您所需的端点结构为GET /entries/{source_lang}/{word_id}?fields=pronunciations
,尽管它似乎并没有大量过滤。您将需要一个json解析器来处理返回的json,例如github.com/VBA-tools/VBA-JSON/blob/master/JsonConverter.bas。从那里下载原始代码,并添加到名为JsonConverter的标准模块中。然后,您需要转到VBE>工具>引用>添加对Microsoft脚本运行时的引用。从复制的代码中删除最上面的属性行。
Option Explicit
Public Sub WriteOutPronounciations()
Dim html As MSHTML.HTMLDocument, i As Long, ws As Worksheet
Dim data As String, lastRow As Long, words()
'If not performing lemmas lookup then must be head word e.g. behave, behalf
Const appId As String = "yourAppId"
Const appKey As String = "yourAppKey"
Set ws = ThisWorkbook.Worksheets("Sheet1")
lastRow = ws.Cells(ws.rows.Count, "A").End(xlUp).row
words = Application.Transpose(ws.Range("A1:A" & lastRow).Value)
ReDim results(1 To UBound(words))
Set html = New MSHTML.HTMLDocument
Dim json As Object
With CreateObject("MSXML2.ServerXMLHTTP")
For i = LBound(words) To UBound(words)
.Open "GET", "https://od-api.oxforddictionaries.com/api/v2/entries/en-us/" & LCase$(words(i)) & "?fields=pronunciations", False
.setRequestHeader "app_id", appId
.setRequestHeader "app_key", appKey
.setRequestHeader "ContentType", "application/json"
.send
Set json = JsonConverter.ParseJson(.responseText)
results(i) = IIf(json("results")(1)("type") = "headword", json("results")(1)("lexicalEntries")(1)("pronunciations")(2)("phoneticSpelling"), "lemmas lookup required")
Set json = Nothing
Next
End With
With ThisWorkbook.Worksheets(1)
.Cells(1, 2).Resize(UBound(results, 1), 1) = Application.Transpose(results)
End With
End Sub