当我使用OLEDB时,从Excel工作表中读取3200行只需2-3秒。现在我改为OpenXML格式,现在从Excel表读取3200行需要1分钟以上。
以下是我的代码:
public static DataTable ReadExcelFileDOM(string filename)
{
DataTable table;
using (SpreadsheetDocument myDoc = SpreadsheetDocument.Open(filename, true))
{
WorkbookPart workbookPart = myDoc.WorkbookPart;
Sheet worksheet = workbookPart.Workbook.Descendants<Sheet>().First();
WorksheetPart worksheetPart =
(WorksheetPart)(workbookPart.GetPartById(worksheet.Id));
SheetData sheetData =
worksheetPart.Worksheet.Elements<SheetData>().First();
List<List<string>> totalRows = new List<List<string>>();
int maxCol = 0;
foreach (Row r in sheetData.Elements<Row>())
{
// Add the empty row.
string value = null;
while (totalRows.Count < r.RowIndex - 1)
{
List<string> emptyRowValues = new List<string>();
for (int i = 0; i < maxCol; i++)
{
emptyRowValues.Add("");
}
totalRows.Add(emptyRowValues);
}
List<string> tempRowValues = new List<string>();
foreach (Cell c in r.Elements<Cell>())
{
#region get the cell value of c.
if (c != null)
{
value = c.InnerText;
// If the cell represents a numeric value, you are done.
// For dates, this code returns the serialized value that
// represents the date. The code handles strings and Booleans
// individually. For shared strings, the code looks up the
// corresponding value in the shared string table. For Booleans,
// the code converts the value into the words TRUE or FALSE.
if (c.DataType != null)
{
switch (c.DataType.Value)
{
case CellValues.SharedString:
// For shared strings, look up the value in the shared
// strings table.
var stringTable = workbookPart.
GetPartsOfType<SharedStringTablePart>().FirstOrDefault();
// If the shared string table is missing, something is
// wrong. Return the index that you found in the cell.
// Otherwise, look up the correct text in the table.
if (stringTable != null)
{
value = stringTable.SharedStringTable.
ElementAt(int.Parse(value)).InnerText;
}
break;
case CellValues.Boolean:
switch (value)
{
case "0":
value = "FALSE";
break;
default:
value = "TRUE";
break;
}
break;
}
}
Console.Write(value + " ");
}
#endregion
// Add the cell to the row list.
int i = Convert.ToInt32(c.CellReference.ToString().ToCharArray().First() - 'A');
// Add the blank cell in the row.
while (tempRowValues.Count < i)
{
tempRowValues.Add("");
}
tempRowValues.Add(value);
}
// add the row to the totalRows.
maxCol = processList(tempRowValues, totalRows, maxCol);
Console.WriteLine();
}
table = ConvertListListStringToDataTable(totalRows, maxCol);
}
return table;
}
/// <summary>
/// Add each row to the totalRows.
/// </summary>
/// <param name="tempRows"></param>
/// <param name="totalRows"></param>
/// <param name="MaxCol">the max column number in rows of the totalRows</param>
/// <returns></returns>
private static int processList(List<string> tempRows, List<List<string>> totalRows, int MaxCol)
{
if (tempRows.Count > MaxCol)
{
MaxCol = tempRows.Count;
}
totalRows.Add(tempRows);
return MaxCol;
}
private static DataTable ConvertListListStringToDataTable(List<List<string>> totalRows, int maxCol)
{
DataTable table = new DataTable();
for (int i = 0; i < maxCol; i++)
{
table.Columns.Add();
}
foreach (List<string> row in totalRows)
{
while (row.Count < maxCol)
{
row.Add("");
}
table.Rows.Add(row.ToArray());
}
return table;
}
是否有一种有效的方法可以在某处更改此代码,以便读取过程可以更快一些。我如何将其更改为代码以便更快地读取。感谢。
答案 0 :(得分:1)
我尝试了您的代码并注意到在非常简单的示例中我花了大约4秒来完成。
将.xls file
修改为给定的详细信息(列:区域前缀,城市,日期,功能......)并添加 3,600行你的代码最多约10秒。
我认为您应该删除所有Console.WriteLine语句,因为这些语句会降低处理xls file
的速度。删除所有这些 StopWatch显示1.26秒以获得相同的行数。
你可以找到一些原因,即使在SO: Console.WriteLine slow上,console.WriteLine也很慢。在这个问题中有一个答案指向OutputDebugString
...
答案 1 :(得分:0)
我在代码中发现了一些缺点。
您可以尝试我的ExcelDataReader而没有这些缺点。见https://github.com/gSerP1983/OpenXml.Excel.Data
读取DataTable示例:
class Program
{
static void Main(string[] args)
{
var dt = new DataTable();
using (var reader = new ExcelDataReader(@"data.xlsx"))
{
dt.Load(reader);
}
Console.WriteLine("done: " + dt.Rows.Count);
Console.ReadKey();
}
}