我正在构建Windows窗体C#应用程序。我需要从Excel文档中读取希伯来语文本,然后做一些事情,然后将一些希伯来语文本写入Word文档。目前,我无法从Excel读取希伯来语(将其打印到控制台时出现问号)。我知道这个问题与编码问题有关。
代码:
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using System.IO;
using System.Runtime.InteropServices;
using Excel = Microsoft.Office.Interop.Excel;
namespace MLDRApplication
{
public class ExcelReader
{
public static List<Customer> ExcelReadergetCustomers(string path)
{
List<Customer> customersList = new List<Customer>();
//Create COM Objects. Create a COM object for everything that is referenced
Excel.Application excelApp = new Excel.Application();
Excel.Workbook excelWorkbook = excelApp.Workbooks.Open(@"C:\Users\fares\OneDrive\Documents\customersInitValues.xlsx");
Excel.Worksheet customersWorksheet = excelWorkbook.Sheets[1];
Excel.Worksheet laundryUnitsWorksheet = excelWorkbook.Sheets[2];
Excel.Range customersRange = customersWorksheet.UsedRange;
Excel.Range laundryUnitsRange = laundryUnitsWorksheet.UsedRange;
int customersSheetRowCount = customersRange.Rows.Count;
int customersSheetColCount = customersRange.Columns.Count;
int laundryUnitsSheetRowCount = laundryUnitsRange.Rows.Count;
int laundryUnitsSheetColCount = laundryUnitsRange.Columns.Count;
//iterate over the rows and columns and print to the console as it appears in the file
//excel is not zero based!!
for (int i = 1; i <= customersSheetRowCount; i++)
{
for (int j = 1; j <= customersSheetColCount; j++)
{
//new line
if (j == 1)
Console.Write("\r\n");
//write the value to the console
//Console.OutputEncoding = System.Text.Encoding.GetEncoding("Windows-1255");
if (customersRange.Cells[i, j] != null && customersRange.Cells[i, j].Value2 != null)
{
//customersList.Add()
Console.Write(customersRange.Cells[i, j].Value2.ToString() + "\t");
}
}
}
for (int i = 1; i <= laundryUnitsSheetRowCount; ++i)
{
for (int j = 1; j <= laundryUnitsSheetColCount; ++j)
{
//new line
if (j == 1)
Console.Write("\r\n");
//write the value to the console
//Console.OutputEncoding = System.Text.Encoding.GetEncoding("Windows-1255");
if (laundryUnitsRange.Cells[i, j] != null && laundryUnitsRange.Cells[i, j].Value2 != null)
{
//customersList.Add()
Console.Write(laundryUnitsRange.Cells[i, j].Value2.ToString() + "\t");
}
}
}
//cleanup
GC.Collect();
GC.WaitForPendingFinalizers();
//rule of thumb for releasing com objects:
// never use two dots, all COM objects must be referenced and released individually
// ex: [somthing].[something].[something] is bad
//release com objects to fully kill excel process from running in the background
Marshal.ReleaseComObject(customersRange);
Marshal.ReleaseComObject(customersWorksheet);
Marshal.ReleaseComObject(laundryUnitsRange);
Marshal.ReleaseComObject(laundryUnitsWorksheet);
//close and release
excelWorkbook.Close();
Marshal.ReleaseComObject(excelWorkbook);
//quit and release
excelApp.Quit();
Marshal.ReleaseComObject(excelApp);
return customersList;
}
}
}
我阅读了本次会议here,但不了解此问题并获得正确的解决方案。
答案 0 :(得分:0)
您的问题不在阅读中。
这是因为控制台应用程序的默认编码不支持希伯来语。
在控制台应用程序中,您可以更改编码,在代码开头添加以下行:
<script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/2.1.3/jquery.min.js"></script>
<div id="card1" style="width: 100px; height: 100px; background-color: red;"></div>
<h4 id="artist1" class="artistname ">testname</h4>
编辑: 显然
您不能在控制台中打印Unicode字符,它仅支持 当前代码页中可用的字符。性格 不可用的将转换为最接近的等效项,或者 问号。
所以您可以做一个小的“ hack”,只需将默认输出流更改为一个文件:
Console.OutputEncoding = Encoding.GetEncoding("Windows-1255");
,并在运行结束时将所有内容写入其中。
这是一个“快速而肮脏的” 解决方案,用于永久解决方案,考虑将日志记录库用作log4net。