我需要获取一些除了body标签之外的任何HTML元素都不包含的文本,但问题是文本被其他标签分解并需要进入单独的单元格。
例如:
<a id="00:00:00" class="ts">[00:00:00]</a> <font class="mn">Name1</font> First bit of text<br/>
<a id="00:00:09" class="ts">[00:00:09]</a> <font class="mn">Name2</font> Second Line of Text<br/>
<a id="00:01:17" class="ts">[00:01:17]</a> <font class="mn">Name3</font> A third line of text<br/>
<a id="00:01:59" class="ts">[00:01:59]</a> <font class="mn">Name4</font> The final line of text<br/>
我能够将时间戳和名称都添加到各自的列中,但我无法弄清楚如何将每行文本放入相应的行中。
到目前为止,这是我的代码:
Dim i As Integer
Dim Timestamp As Object
Dim Name As Object
my_url = "path_to_url.html"
Set html_doc = CreateObject("htmlfile")
Set xml_obj = CreateObject("MSXML2.XMLHTTP")
xml_obj.Open "GET", my_url, False
xml_obj.send
html_doc.body.innerHTML = xml_obj.responseText
Set xml_obj = Nothing
Set Timestamp = html_doc.body.getElementsByTagName("a")
Set Name = html_doc.body.getElementsByTagName("font")
i = 2
For Each itm In Timestamp
If itm.getAttribute("className") = "ts" Then
Cells(i, 1).Value = itm.innerText
i = i + 1
End If
Next
i = 2
For Each itm In Name
If itm.getAttribute("className") = "mn" Then
Cells(i, 2).Value = itm.innerText
i = i + 1
End If
Next
我在考虑以某种方式使用<br/>
并使用LEFT,但我不确定这是否是最佳方法。提前谢谢。
答案 0 :(得分:0)
只要这是响应中唯一的内容而且没有其他部分你可以做这样的事情:
编辑:修订为除了
以外的其他内容Sub Tester()
Const RW_START As Long = 5
Const SPLITTER = "{xxxx}"
Dim i As Integer, html_doc, itm
Dim Timestamp As Object
Dim Name As Object
Dim arr, sep, txt
Set html_doc = CreateObject("htmlfile")
html_doc.body.innerHTML = Range("A1").Value 'for my testing...
Set Timestamp = html_doc.body.getElementsByTagName("a")
Set Name = html_doc.body.getElementsByTagName("font")
i = RW_START
For Each itm In Timestamp
If itm.getAttribute("className") = "ts" Then
Cells(i, 1).Value = itm.innerText
itm.innerText = "" '<<<
i = i + 1
End If
Next
i = RW_START
For Each itm In Name
If itm.getAttribute("className") = "mn" Then
Cells(i, 2).Value = itm.innerText
itm.innerText = IIf(i = RW_START, "", SPLITTER) '<<<
i = i + 1
End If
Next
'get the remaining text and split on newline (<br>)
arr = Split(html_doc.body.innerText, SPLITTER)
i = RW_START
For Each itm In arr
itm = Trim(itm)
'remove trailing vbLf
If Right(itm, 1) = vblf Then itm = Left(itm, Len(itm)-1)
Cells(i, 3).Value = Trim(itm)
i = i + 1
Next
End Sub