有没有办法获取HTML并将其导入Excel,以便将其格式化为富文本格式(最好使用VBA)?基本上,当我粘贴到Excel单元格时,我希望将其转为:
<html><p>This is a test. Will this text be <b>bold</b> or <i>italic</i></p></html>
进入这个:
这是一个测试。此文本是粗体还是斜体
答案 0 :(得分:25)
是的,有可能:)事实上,让Internet Explorer为您做脏事;)
已经过测试
我的假设
<强> CODE 强>
Sub Sample()
Dim Ie As Object
Set Ie = CreateObject("InternetExplorer.Application")
With Ie
.Visible = False
.Navigate "about:blank"
.document.body.InnerHTML = Sheets("Sheet1").Range("A1").Value
.document.body.createtextrange.execCommand "Copy"
ActiveSheet.Paste Destination:=Sheets("Sheet1").Range("A1")
.Quit
End With
End Sub
<强>快照强>
HTH
西特
答案 1 :(得分:10)
您可以将HTML代码复制到剪贴板,然后将其特殊粘贴为Unicode文本。 Excel将在单元格中呈现HTML。看看这篇文章http://www.dailydoseofexcel.com/archives/2005/02/23/html-in-cells-ii/
帖子中的相关宏代码:
Private Sub Worksheet_Change(ByVal Target As Range)
Dim objData As DataObject
Dim sHTML As String
Dim sSelAdd As String
Application.EnableEvents = False
If Target.Cells.Count = 1 Then
If LCase(Left(Target.Text, 6)) = "<html>" Then
Set objData = New DataObject
sHTML = Target.Text
objData.SetText sHTML
objData.PutInClipboard
sSelAdd = Selection.Address
Target.Select
Me.PasteSpecial "Unicode Text"
Me.Range(sSelAdd).Select
End If
End If
Application.EnableEvents = True
End Sub
答案 2 :(得分:7)
如果IE示例不起作用,请使用此示例。无论如何,这应该比开始更快 一个IE实例。
这是一个基于
的完整解决方案
http://www.dailydoseofexcel.com/archives/2005/02/23/html-in-cells-ii/
注意,如果你的innerHTML是所有数字,例如'12345',HTML格式 因为它以不同的方式处理数字而不能完全在excel中工作?但添加一个字符,例如 最后的尾随空间,例如。 12345 +“&amp; nbsp;”格式好。
Sub test()
Cells(1, 1).Value = "<HTML>1<font color=blue>a</font>" & _
"23<font color=red>4</font></HTML>"
Dim rng As Range
Set rng = ActiveSheet.Cells(1, 1)
Worksheet_Change rng, ActiveSheet
End Sub
Private Sub Worksheet_Change(ByVal Target As Range, ByVal sht As Worksheet)
Dim objData As DataObject ' Set a reference to MS Forms 2.0
Dim sHTML As String
Dim sSelAdd As String
Application.EnableEvents = False
If Target.Cells.Count = 1 Then
Set objData = New DataObject
sHTML = Target.Text
objData.SetText sHTML
objData.PutInClipboard
Target.Select
sht.PasteSpecial Format:="Unicode Text"
End If
Application.EnableEvents = True
End Sub
答案 3 :(得分:7)
我知道这个帖子很古老,但在分配了innerHTML之后,ExecWB为我工作了:
.ExecWB 17, 0
'Select all contents in browser
.ExecWB 12, 2
'Copy them
然后将内容粘贴到Excel中。由于这些方法容易出现运行时错误,但在调试模式下尝试一次或两次后工作正常,您可能必须告诉Excel如果遇到错误再试一次。我通过将此错误处理程序添加到sub来解决了这个问题,并且工作正常:
Sub ApplyHTML()
On Error GoTo ErrorHandler
...
Exit Sub
ErrorHandler:
Resume
'I.e. re-run the line of code that caused the error
Exit Sub
End Sub
答案 4 :(得分:5)
我遇到了BornToCode在原始解决方案的评论中首次发现的错误。由于不熟悉Excel和VBA,我花了一秒时间才弄清楚如何实现tiQU的解决方案。所以我将其作为“For Dummies”解决方案发布在
之下Sub Sample()
Dim Ie As Object
Set Ie = CreateObject("InternetExplorer.Application")
With Ie
.Visible = False
.Navigate "about:blank"
.document.body.InnerHTML = Sheets("Sheet1").Range("I2").Value
'update to the cell that contains HTML you want converted
.ExecWB 17, 0
'Select all contents in browser
.ExecWB 12, 2
'Copy them
ActiveSheet.Paste Destination:=Sheets("Sheet1").Range("J2")
'update to cell you want converted HTML pasted in
.Quit
End With
End Sub
答案 5 :(得分:0)
好!很滑。
我很失望Excel不允许我们粘贴到合并的单元格中,而且还将包含中断的结果粘贴到“目标”单元格下面的连续行中,因为这意味着它根本不适合我。我尝试了一些调整(取消合并/重新合并等),但随后Excel将任何内容都放到了中断之下,所以这是一个死胡同。
最终,我想出了一个例程,该例程将处理简单的标签,而不使用导致合并字段问题的“本机” Unicode转换器。希望其他人对此有用:
Public Sub AddHTMLFormattedText(rngA As Range, strHTML As String, Optional blnShowBadHTMLWarning As Boolean = False)
' Adds converts text formatted with basic HTML tags to formatted text in an Excel cell
' NOTE: Font Sizes not handled perfectly per HTML standard, but I find this method more useful!
Dim strActualText As String, intSrcPos As Integer, intDestPos As Integer, intDestSrcEquiv() As Integer
Dim varyTags As Variant, varTag As Variant, varEndTag As Variant, blnTagMatch As Boolean
Dim intCtr As Integer
Dim intStartPos As Integer, intEndPos As Integer, intActualStartPos As Integer, intActualEndPos As Integer
Dim intFontSizeStartPos As Integer, intFontSizeEndPos As Integer, intFontSize As Integer
varyTags = Array("<b>", "</b>", "<i>", "</i>", "<u>", "</u>", "<sub>", "</sub>", "<sup>", "</sup>")
' Remove unhandled/unneeded tags, convert <br> and <p> tags to line feeds
strHTML = Trim(strHTML)
strHTML = Replace(strHTML, "<html>", "")
strHTML = Replace(strHTML, "</html>", "")
strHTML = Replace(strHTML, "<p>", "")
While LCase(Right$(strHTML, 4)) = "</p>" Or LCase(Right$(strHTML, 4)) = "<br>"
strHTML = Left$(strHTML, Len(strHTML) - 4)
strHTML = Trim(strHTML)
Wend
strHTML = Replace(strHTML, "<br>", vbLf)
strHTML = Replace(strHTML, "</p>", vbLf)
strHTML = Trim(strHTML)
ReDim intDestSrcEquiv(1 To Len(strHTML))
strActualText = ""
intSrcPos = 1
intDestPos = 1
Do While intSrcPos <= Len(strHTML)
blnTagMatch = False
For Each varTag In varyTags
If LCase(Mid$(strHTML, intSrcPos, Len(varTag))) = varTag Then
blnTagMatch = True
intSrcPos = intSrcPos + Len(varTag)
If intSrcPos > Len(strHTML) Then Exit Do
Exit For
End If
Next
If blnTagMatch = False Then
varTag = "<font size"
If LCase(Mid$(strHTML, intSrcPos, Len(varTag))) = varTag Then
blnTagMatch = True
intEndPos = InStr(intSrcPos, strHTML, ">")
intSrcPos = intEndPos + 1
If intSrcPos > Len(strHTML) Then Exit Do
Else
varTag = "</font>"
If LCase(Mid$(strHTML, intSrcPos, Len(varTag))) = varTag Then
blnTagMatch = True
intSrcPos = intSrcPos + Len(varTag)
If intSrcPos > Len(strHTML) Then Exit Do
End If
End If
End If
If blnTagMatch = False Then
strActualText = strActualText & Mid$(strHTML, intSrcPos, 1)
intDestSrcEquiv(intSrcPos) = intDestPos
intDestPos = intDestPos + 1
intSrcPos = intSrcPos + 1
End If
Loop
' Clear any bold/underline/italic/superscript/subscript formatting from cell
rngA.Font.Bold = False
rngA.Font.Underline = False
rngA.Font.Italic = False
rngA.Font.Subscript = False
rngA.Font.Superscript = False
rngA.Value = strActualText
' Now start applying Formats!"
' Start with Font Size first
intSrcPos = 1
intDestPos = 1
Do While intSrcPos <= Len(strHTML)
varTag = "<font size"
If LCase(Mid$(strHTML, intSrcPos, Len(varTag))) = varTag Then
intFontSizeStartPos = InStr(intSrcPos, strHTML, """") + 1
intFontSizeEndPos = InStr(intFontSizeStartPos, strHTML, """") - 1
If intFontSizeEndPos - intFontSizeStartPos <= 3 And intFontSizeEndPos - intFontSizeStartPos > 0 Then
Debug.Print Mid$(strHTML, intFontSizeStartPos, intFontSizeEndPos - intFontSizeStartPos + 1)
If Mid$(strHTML, intFontSizeStartPos, 1) = "+" Then
intFontSizeStartPos = intFontSizeStartPos + 1
intFontSize = 11 + 2 * Mid$(strHTML, intFontSizeStartPos, intFontSizeEndPos - intFontSizeStartPos + 1)
ElseIf Mid$(strHTML, intFontSizeStartPos, 1) = "-" Then
intFontSizeStartPos = intFontSizeStartPos + 1
intFontSize = 11 - 2 * Mid$(strHTML, intFontSizeStartPos, intFontSizeEndPos - intFontSizeStartPos + 1)
Else
intFontSize = Mid$(strHTML, intFontSizeStartPos, intFontSizeEndPos - intFontSizeStartPos + 1)
End If
Else
' Error!
GoTo HTML_Err
End If
intEndPos = InStr(intSrcPos, strHTML, ">")
intSrcPos = intEndPos + 1
intStartPos = intSrcPos
If intSrcPos > Len(strHTML) Then Exit Do
While intDestSrcEquiv(intStartPos) = 0 And intStartPos < Len(strHTML)
intStartPos = intStartPos + 1
Wend
If intStartPos >= Len(strHTML) Then GoTo HTML_Err ' HTML is bad!
varEndTag = "</font>"
intEndPos = InStr(intSrcPos, LCase(strHTML), varEndTag)
If intEndPos = 0 Then GoTo HTML_Err ' HTML is bad!
While intDestSrcEquiv(intEndPos) = 0 And intEndPos > intSrcPos
intEndPos = intEndPos - 1
Wend
If intEndPos > intSrcPos Then
intActualStartPos = intDestSrcEquiv(intStartPos)
intActualEndPos = intDestSrcEquiv(intEndPos)
rngA.Characters(intActualStartPos, intActualEndPos - intActualStartPos + 1) _
.Font.Size = intFontSize
End If
End If
intSrcPos = intSrcPos + 1
Loop
'Now do remaining tags
intSrcPos = 1
intDestPos = 1
Do While intSrcPos <= Len(strHTML)
If intDestSrcEquiv(intSrcPos) = 0 Then
' This must be a Tag!
For intCtr = 0 To UBound(varyTags) Step 2
varTag = varyTags(intCtr)
intStartPos = intSrcPos + Len(varTag)
While intDestSrcEquiv(intStartPos) = 0 And intStartPos < Len(strHTML)
intStartPos = intStartPos + 1
Wend
If intStartPos >= Len(strHTML) Then GoTo HTML_Err ' HTML is bad!
If LCase(Mid$(strHTML, intSrcPos, Len(varTag))) = varTag Then
varEndTag = varyTags(intCtr + 1)
intEndPos = InStr(intSrcPos, LCase(strHTML), varEndTag)
If intEndPos = 0 Then GoTo HTML_Err ' HTML is bad!
While intDestSrcEquiv(intEndPos) = 0 And intEndPos > intSrcPos
intEndPos = intEndPos - 1
Wend
If intEndPos > intSrcPos Then
intActualStartPos = intDestSrcEquiv(intStartPos)
intActualEndPos = intDestSrcEquiv(intEndPos)
With rngA.Characters(intActualStartPos, intActualEndPos - intActualStartPos + 1).Font
If varTag = "<b>" Then
.Bold = True
ElseIf varTag = "<i>" Then
.Italic = True
ElseIf varTag = "<u>" Then
.Underline = True
ElseIf varTag = "<sup>" Then
.Superscript = True
ElseIf varTag = "<sub>" Then
.Subscript = True
End If
End With
End If
intSrcPos = intSrcPos + Len(varTag) - 1
Exit For
End If
Next
End If
intSrcPos = intSrcPos + 1
intDestPos = intDestPos + 1
Loop
Exit_Sub:
Exit Sub
HTML_Err:
' There was an error with the Tags. Show warning if requested.
If blnShowBadHTMLWarning Then
MsgBox "There was an error with the Tags in the HTML file. Could not apply formatting."
End If
End Sub
请注意,这与标签的嵌套无关,而是仅对每个打开的标签都要求一个关闭标签,并假设最接近开始标签的关闭标签适用于开始标签。正确嵌套的标签可以正常工作,而嵌套不正确的标签将不会被拒绝,并且可能会也可能不会。
答案 6 :(得分:0)
要将HTML / Word放在Excel形状中并在Excel单元格中找到它,
通过这种方式,即使是带有表和其他内容的HTML也不会被拆分成多个单元格。
private void btnPutHTMLIntoExcelShape_Click(object sender, EventArgs e)
{
var fFile = new FileInfo(@"C:\Temp\temp.html");
StreamWriter SW = fFile.CreateText();
SW.Write(hecNote.DocumentHtml);
SW.Close();
Word.Application wrdApplication;
Word.Document wrdDocument;
wrdApplication = new Word.Application();
wrdApplication.Visible = true;
wrdDocument = wrdApplication.Documents.Add(@"C:\Temp\temp.html");
wrdDocument.ActiveWindow.Selection.WholeStory();
wrdDocument.ActiveWindow.Selection.Copy();
Excel.Application excApplication;
Excel.Workbook excWorkbook;
Excel._Worksheet excWorksheet;
Excel.Range excRange = null;
excApplication = new Excel.Application();
excApplication.Visible = true;
excWorkbook = excApplication.Workbooks.Add(Type.Missing);
excWorksheet = (Excel.Worksheet)excWorkbook.Worksheets.get_Item(1);
excWorksheet.Name = "Work";
excRange = excWorksheet.get_Range("A1");
excRange.Select();
excWorksheet.PasteSpecial("Microsoft Word Document Object");
Excel.Shape O = excWorksheet.Shapes.Item(1);
this.Text = $"{O.Height} x {O.Width}";
((Excel.Range)excWorksheet.Rows[1, Type.Missing]).RowHeight = O.Height;
}