我有.doc,.txt,.docx的字节数据,我想将其转换为字符串,我做了以下事情,但没有得到确切的结果:
Public ByteData As Byte() = // my data
Dim str As String = String.Empty
str = System.Text.Encoding.UTF8.GetString(objCandidateInfo.ByteData, 0, objCandidateInfo.ByteData.Length)
str = Convert.ToBase64String(objCandidateInfo.ByteData)
被修改
所以现在我使用Word Application进行转换,这段代码正常运行 这是我的代码
Private Shared ObjwordApp As Word.Application
Private Shared nullobj As Object = System.Reflection.Missing.Value
Private Shared doc As Word.Document
Shared Sub New()
ObjwordApp = New Word.Application()
End Sub
Public Shared Sub InitializeClass()
ObjwordApp.Visible = False
End Sub
Private Shared Sub OpenWordFile(ByVal StrFilePath As Object)
Try
ObjwordApp.Visible = False
Catch ex As Exception
ObjwordApp = New Word.Application()
End Try
Try
doc = ObjwordApp.Documents.Open(StrFilePath, nullobj, nullobj, nullobj, nullobj, nullobj, nullobj, nullobj, nullobj, nullobj, nullobj, nullobj)
Catch ex As Exception
CloseWordFile()
ObjwordApp.Visible = False
End Try
End Sub
Private Shared Sub CopyWordContent()
Try
doc.ActiveWindow.Selection.WholeStory()
doc.ActiveWindow.Selection.Copy()
Catch ex As Exception
Clipboard.Clear()
End Try
End Sub
Private Shared Sub CloseWordFile()
Try
doc.Close()
Catch ex As Exception
End Try
End Sub
Public Shared Function ReadWordFile(ByVal StrFilePath As String, ByVal StrDataFormat As String) As String
Dim StrFileContent = String.Empty
If (File.Exists(StrFilePath)) Then
Try
OpenWordFile(StrFilePath)
CopyWordContent()
Catch ex As Exception
Finally
CloseWordFile()
End Try
Try
Dim dataObj As IDataObject = Clipboard.GetDataObject()
If (dataObj.GetDataPresent(StrDataFormat)) Then
StrFileContent = dataObj.GetData(StrDataFormat)
Else
StrFileContent = ""
End If
Clipboard.Clear()
Catch ex As Exception
End Try
End If
Return StrFileContent
End Function
当我将字节数组保存到DB时,我调用下面的函数并将其转换为rtf,它没有转换,当我将调试器附加到它dataObj
时Nothing
代码1
Dim str As String = String.Empty
Try
'str = System.Text.Encoding.UTF8.GetString(objCandidateInfo.ByteData, 0, objCandidateInfo.ByteData.Length)
'str = Convert.ToBase64String(objCandidateInfo.ByteData)
'str = System.Text.Encoding.ASCII.GetString(objCandidateInfo.ByteData, 0, objCandidateInfo.ByteData.Length)
str = ClsDocumentManager.ReadContent(objCandidateInfo.ByteData, DataFormats.Rtf)
Catch ex As Exception
End Try
我以字节和文本格式保存数据db,所以当我从db(我保存的字节值并将其转换为rtf)调用它时,其工作代码是
代码2
rtbAttachment.Rtf = ClsDocumentManager.ReadContent(byteAttachment, DataFormats.Rtf)
这些是ClsDocumentManager
class
Public Shared Function GetRandomNo() As Integer
Dim RandomNo As New Random()
Return RandomNo.Next(Convert.ToInt32(DateTime.Now().Minute.ToString() & DateTime.Now().Second.ToString() & DateTime.Now().Hour.ToString()))
End Function
Public Shared Function ReadContent(ByVal byteArray As Byte(), ByVal StrReadFormat As String) As String
Dim StrFileContent As String = String.Empty
Try
If (Not IsNothing(byteArray)) Then
Dim StrFileName As String = GetRandomNo().ToString() & ".doc"
StrFileName = ClsSingleton.aTempFolderName & StrFileName
If (CreateWordFile(byteArray, StrFileName)) Then
StrFileContent = ClsWordManager.ReadWordFile(StrFileName, StrReadFormat)
If (File.Exists(StrFileName)) Then
File.Delete(StrFileName)
End If
End If
End If
Catch ex As Exception
End Try
Return StrFileContent
End Function
Public Shared Function CreateWordFile(ByVal byteArray As Byte(), ByVal StrFileName As String) As Boolean
Dim boolResult As Boolean = False
Try
If (Not IsNothing(byteArray)) Then
If (Not File.Exists(StrFileName)) Then
Dim objFileStream As New FileStream(StrFileName, FileMode.Create, FileAccess.Write)
objFileStream.Write(byteArray, 0, byteArray.Length)
objFileStream.Close()
boolResult = True
End If
End If
Catch ex As Exception
boolResult = False
End Try
Return boolResult
End Function
调试时错误代码
Dim dataObj As IDataObject = Clipboard.GetDataObject()
If (dataObj.GetDataPresent(StrDataFormat)) Then
StrFileContent = dataObj.GetData(StrDataFormat)
Else
StrFileContent = ""
End If
`dataObj` is `Nothing` only when calling from **Code 1**
已更新
**`ClsDocumentManager`**
Imports System.IO
Public Class ClsDocumentManager
Public Shared Function GetRandomNo() As Integer
Dim RandomNo As New Random()
Return RandomNo.Next(Convert.ToInt32(DateTime.Now().Minute.ToString() & DateTime.Now().Second.ToString() & DateTime.Now().Hour.ToString()))
End Function
Public Shared Function ReadContent(ByVal byteArray As Byte(), ByVal StrReadFormat As String) As String
Dim StrFileContent As String = String.Empty
Try
If (Not IsNothing(byteArray)) Then
Dim StrFileName As String = GetRandomNo().ToString() & ".doc"
StrFileName = ClsSingleton.aTempFolderName & StrFileName
If (CreateWordFile(byteArray, StrFileName)) Then
StrFileContent = ClsWordManager.ReadWordFile(StrFileName, StrReadFormat)
If (File.Exists(StrFileName)) Then
File.Delete(StrFileName)
End If
End If
End If
Catch ex As Exception
End Try
Return StrFileContent
End Function
Public Shared Function CreateWordFile(ByVal byteArray As Byte(), ByVal StrFileName As String) As Boolean
Dim boolResult As Boolean = False
Try
If (Not IsNothing(byteArray)) Then
If (Not File.Exists(StrFileName)) Then
Dim objFileStream As New FileStream(StrFileName, FileMode.Create, FileAccess.Write)
objFileStream.Write(byteArray, 0, byteArray.Length)
objFileStream.Close()
boolResult = True
End If
End If
Catch ex As Exception
boolResult = False
End Try
Return boolResult
End Function
End Class
这是我的ClsWordManager
班级
Imports System.IO
Imports System.Text
Public Class ClsWordManager
Private Shared ObjwordApp As Word.Application
Private Shared nullobj As Object = System.Reflection.Missing.Value
Private Shared doc As Word.Document
Shared Sub New()
ObjwordApp = New Word.Application()
End Sub
Public Shared Sub InitializeClass()
ObjwordApp.Visible = False
End Sub
Private Shared Sub OpenWordFile(ByVal StrFilePath As Object)
Try
ObjwordApp.Visible = False
Catch ex As Exception
ObjwordApp = New Word.Application()
End Try
Try
doc = ObjwordApp.Documents.Open(StrFilePath, nullobj, nullobj, nullobj, nullobj, nullobj, nullobj, nullobj, nullobj, nullobj, nullobj, nullobj)
Catch ex As Exception
CloseWordFile()
ObjwordApp.Visible = False
End Try
End Sub
Private Shared Sub CopyWordContent()
Try
doc.ActiveWindow.Selection.WholeStory()
doc.ActiveWindow.Selection.Copy()
Catch ex As Exception
Clipboard.Clear()
End Try
End Sub
Private Shared Sub CloseWordFile()
Try
doc.Close()
Catch ex As Exception
End Try
End Sub
Public Shared Function ReadWordFile(ByVal StrFilePath As String, ByVal StrDataFormat As String) As String
Dim StrFileContent = String.Empty
If (File.Exists(StrFilePath)) Then
Try
OpenWordFile(StrFilePath)
CopyWordContent()
Catch ex As Exception
Finally
CloseWordFile()
End Try
Try
Dim dataObj As IDataObject = Clipboard.GetDataObject()
If (dataObj.GetDataPresent(StrDataFormat)) Then
StrFileContent = dataObj.GetData(StrDataFormat)
Else
StrFileContent = ""
End If
Clipboard.Clear()
Catch ex As Exception
End Try
End If
Return StrFileContent
End Function
End Class
所以现在问题是当我在下面的代码中转换它时:在arguement中查看ByteAttachmets
,它将字节转换为字符串
Public Function UpdateCandidateAttachment(ByVal CandidateID As Integer, ByVal ByteAttachmets As Byte(), ByVal StrExtension As String) As Integer
Dim Result As Integer = -1
Try
Dim objDataLayer As New ClsDataLayer()
Dim str As String = Nothing
Try
'str = System.Text.Encoding.UTF8.GetString(objCandidateInfo.ByteData, 0, objCandidateInfo.ByteData.Length)
'str = Convert.ToBase64String(objCandidateInfo.ByteData)
'str = System.Text.Encoding.ASCII.GetString(objCandidateInfo.ByteData, 0, objCandidateInfo.ByteData.Length)
str = ClsDocumentManager.ReadContent(ByteAttachmets, DataFormats.Rtf)
Catch ex As Exception
End Try
objDataLayer.AddParameter("@CANDIDATE_ID", CandidateID)
objDataLayer.AddParameter("@ATTACHMENT_DATA", ByteAttachmets)
objDataLayer.AddParameter("@CREATED_BY", ClsCommons.IntUserId)
objDataLayer.AddParameter("@EXTENSION", StrExtension)
Result = objDataLayer.ExecuteNonQuery("TR_PROC_UpdateCandidateAttachment")
Catch ex As Exception
MsgBox(ex.Message)
End Try
Return Result
End Function
当我从下面的代码中按属性调用它时:查看objCandidateInfo.ByteData
,它无效。
Public Function AddUpdateCandidate(ByVal objCandidateInfo As ClsCandidateInfo) As Integer
Dim Result As Integer = -1
Try
If (ClsCommons.IsValidEmail(objCandidateInfo.StrEmail)) Then
Dim str As String = Nothing
Try
'str = System.Text.Encoding.UTF8.GetString(objCandidateInfo.ByteData, 0, objCandidateInfo.ByteData.Length)
'str = Convert.ToBase64String(objCandidateInfo.ByteData)
'str = System.Text.Encoding.ASCII.GetString(objCandidateInfo.ByteData, 0, objCandidateInfo.ByteData.Length)
Dim byteAttachment As Byte() = objCandidateInfo.ByteData
str = ClsDocumentManager.ReadContent(byteAttachment, DataFormats.Rtf)
Catch ex As Exception
End Try
Dim objDataLayer As New ClsDataLayer()
objDataLayer.AddParameter("@REQUIREMENT_ID", objCandidateInfo.RequirementId)
objDataLayer.AddParameter("@Candidate_Name", objCandidateInfo.StrCandidateName)
objDataLayer.AddParameter("@Current_Organization", objCandidateInfo.StrCurrentCompany)
objDataLayer.AddParameter("@Current_Designation", objCandidateInfo.StrCurrentDesignation)
If (objCandidateInfo.StrExp.Trim() = "") Then
objDataLayer.AddParameter("@Overall_Exp", DBNull.Value)
Else
Dim DecExp As Decimal = -1
If (Decimal.TryParse(objCandidateInfo.StrExp, DecExp)) Then
objDataLayer.AddParameter("@Overall_Exp", DecExp)
Else
objDataLayer.AddParameter("@Overall_Exp", DBNull.Value)
End If
End If
objDataLayer.AddParameter("@Qualification", objCandidateInfo.StrQualification)
objDataLayer.AddParameter("@Location", objCandidateInfo.StrCurrentLocation)
objDataLayer.AddParameter("@Current_CTC", objCandidateInfo.StrCurrentCTC)
objDataLayer.AddParameter("@Expected_CTC", objCandidateInfo.StrExpectedCTC)
objDataLayer.AddParameter("@Phone_No", objCandidateInfo.StrPhoneNo)
objDataLayer.AddParameter("@Mobile", objCandidateInfo.StrMobile)
objDataLayer.AddParameter("@Notice_Period", objCandidateInfo.StrNoticePeriod)
objDataLayer.AddParameter("@Remarks", objCandidateInfo.StrRemarks)
If (objCandidateInfo.StrYearofExp.Trim() = "") Then
objDataLayer.AddParameter("@Years_of_Experience", DBNull.Value)
Else
Dim DecExp As Decimal = -1
If (Decimal.TryParse(objCandidateInfo.StrYearofExp, DecExp)) Then
objDataLayer.AddParameter("@Years_of_Experience", DecExp)
Else
objDataLayer.AddParameter("@Years_of_Experience", DBNull.Value)
End If
End If
objDataLayer.AddParameter("@Address", objCandidateInfo.StrAddress)
objDataLayer.AddParameter("@Email", objCandidateInfo.StrEmail)
If (objCandidateInfo.intIndustry > 0) Then
objDataLayer.AddParameter("@Industry", objCandidateInfo.intIndustry)
Else
objDataLayer.AddParameter("@Industry", DBNull.Value)
End If
If (objCandidateInfo.intFunctionalArea > 0) Then
objDataLayer.AddParameter("@Functional_Area", objCandidateInfo.intFunctionalArea)
Else
objDataLayer.AddParameter("@Functional_Area", DBNull.Value)
End If
If (objCandidateInfo.StrDob.Trim() = "") Then
objDataLayer.AddParameter("@DOB", DBNull.Value)
Else
Try
objDataLayer.AddParameter("@DOB", Convert.ToDateTime(objCandidateInfo.StrDob))
Catch ex As Exception
objDataLayer.AddParameter("@DOB", DBNull.Value)
End Try
End If
If (objCandidateInfo.intSourceBy > 0) Then
objDataLayer.AddParameter("@Source", objCandidateInfo.intSourceBy)
Else
objDataLayer.AddParameter("@Source", DBNull.Value)
End If
objDataLayer.AddParameter("@SKILL_SET", objCandidateInfo.strSkillSet)
objDataLayer.AddParameter("@ATTACHMENT_DATA", objCandidateInfo.ByteData)
objDataLayer.AddParameter("@EXTENSION", objCandidateInfo.StrExtension)
objDataLayer.AddParameter("@CREATED_BY", ClsCommons.IntUserId)
Result = objDataLayer.ExecuteNonQuery("TR_PROC_AddUpdateFullCandidateData")
Else
MsgBox("Data is not extracted, Some Error Occured, please update your software.")
End If
Catch ex As Exception
MsgBox(ex.Message)
End Try
Return Result
End Function
我希望我清除我的查询
答案 0 :(得分:1)
(经过几次更改后编辑。)
如果您只想获取文件的文本内容,则需要以不同方式处理文本文件和二进制文件。如果输入文件格式是基于文本的(.txt,.htm等),您可以将其视为字符串,尽管您仍然需要知道要使用的编码。
但是,如果输入文件格式是二进制的(如.doc,.docx等),则不能直接将字节数组转换为字符串,因为文件内容不仅仅代表文本 - 字节描述布局,格式化和有关该文件的其他信息。在这种情况下,您需要使用Word或其他一些第三方库来为您处理文件数据。
要使用自动化获取Word文档的内容,只需创建Word.Application
的实例,打开文档,选择其活动窗口中的所有文本,然后使用Selection.Text
属性将文本输入一个字符串。类似的东西:
oDocument.ActiveWindow.Selection.WholeStory()
sText = oDocument.ActiveWindow.Selection.Text
Selection
对象是Word中Range
的实例。这为您提供了文档的简单,无格式内容。您可以将其转换为字节数组或将其用作字符串。要将其转换为字节数组,您需要使用编码,因为必须将内存中的字符转换为字节。
如果要将内容转换为RTF格式,则需要使用第三方工具(或自行实现RTF格式) - RTF不是纯文本格式,结构相当复杂。
您还可以使用Word以RTF格式保存文档 - 查找Document.SaveAs2()
方法来执行此操作。这将文档以RTF格式保存到磁盘。如果您需要在数据库中使用此数据,只需读取.rtf文件(File.ReadAllBytes()
),然后将字节保存到数据库中。