我在将大型Excel文件加载到datatable时遇到问题。我尝试了许多第三方软件,如NPOI和Epplus,但无法解决我的问题。在我做了很多搜索后,我发现了一个使用Stream
一次加载部分文件的建议。但是这篇文章只讨论概念,我不知道如何一次读取文件的一部分并将文件的各个部分组合起来并解析它。
我知道有很多第三方软件可以完成这项任务。但我真的想知道最基本的方式。 有人可以给我一些例子吗?
答案 0 :(得分:1)
如果您有large excel
文档,那么最好使用Open XML SDK
,对于较小尺寸的文件,我认为EPPLUS
更适合。
对于EPPLUS
您可以使用以下代码:
public DataTable GetDataTableFromExcel(string path)
{
var tbl = new DataTable();
using (var pck = new OfficeOpenXml.ExcelPackage())
{
//reading the excel file using the stream
using (var stream = File.OpenRead(path))
{
pck.Load(stream);
}
//Reading the data from the 1st sheet, you can add the code to read other sheets
var ws = pck.Workbook.Worksheets.First();
//now adding the columns to the table and assuming the first row of the sheet is contaning columns if not change the we.Cells property
foreach (var firstRowCell in ws.Cells[1, 1, 1, ws.Dimension.End.Column])
{
tbl.Columns.Add(firstRowCell.Text);
}
//adding data to datatable
for (int rowNum = 1; rowNum < ws.Dimension.End.Row; rowNum++)
{
var wsRow = ws.Cells[rowNum, 1, rowNum, ws.Dimension.End.Column];
DataRow row = tbl.Rows.Add();
foreach (var cell in wsRow)
{
cell.Calculate();
row[cell.Start.Column - 1] = cell.Value;
}
}
return tbl;
}
对于OPEN XML SDK
您可以使用以下代码:
public DataTable GetDataTableFromExcel(string path)
{
var dataTable = new DataTable();
using (SpreadsheetDocument doc = SpreadsheetDocument.Open(path, false))
{
//to read data from the 1st sheet
Worksheet worksheet = SpreedsheetHelper.GetWorksheetPart(doc.WorkbookPart, "myFirstSheetname").Worksheet;
SheetData sheetData = worksheet.GetFirstChild<SheetData>();
IEnumerable<Row> rows = sheetData.Descendants<Row>();
var cells = SpreedsheetHelper.GetRowCells(rows.ElementAt(0));
//creating the columns
foreach (Cell cell in cells)
{
var colname = SpreedsheetHelper.TryGetCellValue(doc, cell);
colname = colname == null ? "" : colname;
dataTable.Columns.Add(colname, SpreedsheetHelper.GetCellDatatype(cell));
}
//adding data to datatable
foreach (Row row in rows)
{
DataRow dataRow = dataTable.NewRow();
var rowcells = SpreedsheetHelper.GetRowCells(row);
var cellindex = 0;
foreach (Cell cell in rowcells)
{
var value = SpreedsheetHelper.TryGetCellValue(doc, cell);
value = value == null ? "" : value;
dataRow[cellindex] = value;
cellindex++;
}
dataTable.Rows.Add(dataRow);
}
}
//to handle the blank row added at the top of datatable
dataTable.Rows.RemoveAt(0);
return dataTable;
}
希望这会有所帮助。
答案 1 :(得分:0)
- 从NUGET包管理器安装最新版本的 NPOI 。
- 使用以下方法将excel流读入表中。很好 测试和工作代码。传递excel流,表名和要读的页数
醇>
public void ReadFromExcel(Stream excelFileStream, DataTable dt, int sheetsToRead, out string processingError)
{
XSSFWorkbook workbook = new XSSFWorkbook(excelFileStream); // write data in workbook from xls document.
XSSFSheet sheet = (XSSFSheet)workbook.GetSheetAt(0); // read the current table data
XSSFRow headerRow = (XSSFRow)sheet.GetRow(0); // read the current row data
// LastCellNum is the number of cells of current rows
int cellCount = headerRow.LastCellNum;
bool isBlanKRow = false;
processingError = "";
try
{
if (dt.Rows.Count == 0)
{
//Reading First Row as Header for Excel Sheet;
try
{
for (int j = headerRow.FirstCellNum; j < cellCount; j++)
{
// get data as the column header of DataTable
DataColumn column = new DataColumn(headerRow.GetCell(j).StringCellValue);
dt.Columns.Add(column);
}
}
catch (Exception Ex)
{
logger.Error("Error", Ex);
processingError = Ex.Message;
throw;
}
}
// Number of Sheets to Read
for (int sheetindex = 0; sheetindex < sheetsToRead; sheetindex++)
{
sheet = (XSSFSheet)workbook.GetSheetAt(sheetindex);
if (null != sheet)
{
// LastRowNum to get the number of rows of current table
int rowCount = sheet.LastRowNum + 1;
// Reading Rows and Copying it to Data Table;
try
{
for (int i = (sheet.FirstRowNum + 1); i < rowCount; i++)
{
XSSFRow row = (XSSFRow)sheet.GetRow(i);
DataRow dataRow = dt.NewRow();
isBlanKRow = true;
try
{
for (int j = row.FirstCellNum; j < cellCount; j++)
{
if (null != row.GetCell(j) && !string.IsNullOrEmpty(row.GetCell(j).ToString()) && !string.IsNullOrWhiteSpace(row.GetCell(j).ToString()))
{
dataRow[j] = row.GetCell(j).ToString();
isBlanKRow = false;
}
}
}
catch (Exception Ex)
{
processingError = Ex.Message;
throw;
}
if (!isBlanKRow)
{
dt.Rows.Add(dataRow);
}
}
}
catch (Exception Ex)
{
processingError = Ex.Message;
throw;
}
}
}
}
catch (Exception Ex)
{
processingError = Ex.Message;
throw;
}
finally
{
workbook.UnlockStructure();
workbook.UnlockRevision();
workbook.UnlockWindows();
workbook = null;
sheet = null;
}
}
答案 2 :(得分:0)
我有办法解决内存不足的问题。我的代码可以解决100K-200K行或25MB excel文件。
SpreadsheetDocument doc = SpreadsheetDocument.Open(fullFilePath, false) //read file
WorkbookPart x = doc.WorkbookPart;
OpenXmlReader reader = OpenXmlReader.Create(workbookPart.WorksheetParts.First());
int totalRow = 0;
while (reader.Read())//read excel file every tag
{
if (reader.ElementType == typeof(Row))//start to deal with data when meet row tag
{
if (totalRow == 0)//i want to skip header row
{
totalRow++;
reader.ReadFirstChild();//start reading the child element of row tag
do
{
if (reader.ElementType == typeof(Cell))//start to deal with the data in cell
{
Cell cell = (Cell)reader.LoadCurrentElement();//load into the element
//you can get data if you need header info
}
} while (reader.ReadNextSibling());//read another sibling cell tag. it will stop until the last sibling cell.
}
else
{
reader.ReadFirstChild();
do
{
if (reader.ElementType == typeof(Cell))
{
Cell cell = (Cell)reader.LoadCurrentElement();
var container = GetValue(x, cell);// because not every data will directly store in cell tag. I have to deal with some situation in the GetValue function.
}
}while (reader.ReadNextSibling());
}
}
}
private string GetValue(WorkbookPart workbookPart, Cell cell)
{
var cellValue = cell.CellValue;
string value = (cellValue == null) ? cell.InnerText : cellValue.InnerText;//get info in cell tag
if (cell.DataType != null && cell.DataType.Value == CellValues.SharedString)//when info store in sharedstringtable you have to get info in there
{
return workbookPart.SharedStringTablePart.SharedStringTable.ChildElements.GetItem(int.Parse(value)).InnerText;
}
return value;
}
excel的xml格式如下所示。您可以查看excel的xml格式,然后进一步了解处理大型excel的方法。