如果问题不是很清楚,请道歉。这是一个更彻底的版本:
我有一个包含两列的电子表格:文件路径&amp;文章标题。文件路径包含文章的路径(html文件),其标题是我手动复制并从html文件粘贴到另一列。我需要这样做几百次,所以我很想知道是否有一种方法可以实现自动化。文章标题位于每个html页面的第二个<span>
的第一个<h2>
内。
示例:
单元格A1:F:\ 2003 \ 030714.html
细胞B1:篮子编织的艺术
细胞A2:F:\ 2003 \ 030718.html
细胞B2:为猫做饭
是否有某种魔法可以帮助实现这一目标?如果我可以做一个VLOOKUP
,这将是一块蛋糕,但不幸的是,开始的web开发者和我的中级excel用户都很困惑。
提前致谢!
答案 0 :(得分:1)
选择包含要更新文章标题的文件路径的单元格范围,然后运行此过程。它将检查每个文件是否存在,如果存在,将创建一个文件流对象以打开并读取该文件。它将文章标题作为第一个Span标记之后的第二组H2标记之间的文本返回。不允许检查是否已到达第一个Span标记的末尾。希望这会有所帮助。
Sub UpdateArticleTitle()
Dim rngPath As Range
Dim tsObj As Object, tsFile As Object
Dim strLine As String
Dim bytSpanCount As Byte, bytH2Count As Byte
Dim strArticleTitle As String
' Go throught the range of selected fileds
For Each rngPath In ActiveWindow.RangeSelection
' Continue if the file exists
If Dir(rngPath.Value, vbNormal) <> "" Then
' Initialize the variables
bytSpanCount = 0
bytH2Count = 0
strArticleTitle = ""
' Create a file system object
Set tsObj = CreateObject("Scripting.FileSystemObject")
' Open the HTML file
Set tsFile = tsObj.Opentextfile(rngPath.Value)
Do Until tsFile.AtEndOfStream
' Read the file
strLine = tsFile.ReadLine
' Search for the first occurrence of <span>
If bytSpanCount = 0 Then
If InStr(1, LCase(strLine), "<span>") > 0 Then bytSpanCount = 1
' If <span> has been found, then search for <h2>
ElseIf bytSpanCount = 1 Then
If InStr(1, LCase(strLine), "<h2>") > 0 Then
If bytH2Count = 0 Then
bytH2Count = 1
' The second occurence of <h2> has been reached so extract the Article Title
Else
' Get all lines until the closing </h2> tag is found
Do Until InStr(1, LCase(strLine), "</h2>") > 0
strLine = strLine & tsFile.ReadLine
Loop
' Set the article title
strArticleTitle = Mid(strLine, InStr(1, LCase(strLine), "<h2>") + Len("<h2>"), InStr(1, LCase(strLine), "</h2>") - InStr(1, LCase(strLine), "<h2>") - Len("<h2>"))
' Exit the loop
Exit Do
End If
End If
End If
Loop
' Close the file
tsFile.Close
' Update the article title in the sheet
rngPath.Offset(0, 1).Value = strArticleTitle
Else
' Clear the article title if the file isn't found
rngPath.Offset(0, 1).ClearContents
End If
Next rngPath
Set tsObj = Nothing
Set tsFile = Nothing
End Sub