我正在使用Microsoft DocumentFormat.OpenXml SDK从excel文件中读取数据。 在这样做时,我会考虑一个单元格是否有空白值(如果是,也可以读取)。
现在,面对其中一个excel工作表的问题,其中workSheet.SheetDimension为null,因此代码抛出异常。
使用的代码:
类OpenXMLHelper { //一个辅助函数,用于使用OpenXML打开Excel文件,并返回包含所有数据的DataTable //工作表 // //使用OLEDB读取Excel数据时遇到了很多问题(例如,ACE驱动程序不再出现在新服务器上, // OLEDB由于安全问题而无法正常工作,并且公然忽略了工作表顶部的空白行,所以这是一个更多 //稳定的数据读取方法 //
public static DataTable ExcelWorksheetToDataTable(string pathFilename)
{
try
{
DataTable dt = new DataTable();
string dimensions = string.Empty;
using (FileStream fs = new FileStream(pathFilename, FileMode.Open, FileAccess.Read, FileShare.ReadWrite))
{
using (SpreadsheetDocument document = SpreadsheetDocument.Open(fs, false))
{
// Find the sheet with the supplied name, and then use that
// Sheet object to retrieve a reference to the first worksheet.
//Sheet theSheet = document.WorkbookPart.Workbook.Descendants<Sheet>().Where(s => s.Name == worksheetName).FirstOrDefault();
//--Sheet theSheet = document.WorkbookPart.Workbook.Descendants<Sheet>().FirstOrDefault();
//--if (theSheet == null)
//-- throw new Exception("Couldn't find the worksheet: "+ theSheet.Id);
// Retrieve a reference to the worksheet part.
//WorksheetPart wsPart = (WorksheetPart)(document.WorkbookPart.GetPartById(theSheet.Id));
//--WorksheetPart wsPart = (WorksheetPart)(document.WorkbookPart.GetPartById(theSheet.Id));
WorkbookPart workbookPart = document.WorkbookPart;
WorksheetPart wsPart = workbookPart.WorksheetParts.FirstOrDefault();
Worksheet workSheet = wsPart.Worksheet;
dimensions = workSheet.SheetDimension.Reference.InnerText; // Get the dimensions of this worksheet, eg "B2:F4"
int numOfColumns = 0;
int numOfRows = 0;
CalculateDataTableSize(dimensions, ref numOfColumns, ref numOfRows);
//System.Diagnostics.Trace.WriteLine(string.Format("The worksheet \"{0}\" has dimensions \"{1}\", so we need a DataTable of size {2}x{3}.", worksheetName, dimensions, numOfColumns, numOfRows));
SheetData sheetData = workSheet.GetFirstChild<SheetData>();
IEnumerable<Row> rows = sheetData.Descendants<Row>();
string[,] cellValues = new string[numOfColumns, numOfRows];
int colInx = 0;
int rowInx = 0;
string value = "";
SharedStringTablePart stringTablePart = document.WorkbookPart.SharedStringTablePart;
// Iterate through each row of OpenXML data, and store each cell's value in the appropriate slot in our [,] string array.
foreach (Row row in rows)
{
for (int i = 0; i < row.Descendants<Cell>().Count(); i++)
{
// *DON'T* assume there's going to be one XML element for each column in each row...
Cell cell = row.Descendants<Cell>().ElementAt(i);
if (cell.CellValue == null || cell.CellReference == null)
continue; // eg when an Excel cell contains a blank string
// Convert this Excel cell's CellAddress into a 0-based offset into our array (eg "G13" -> [6, 12])
colInx = GetColumnIndexByName(cell.CellReference); // eg "C" -> 2 (0-based)
rowInx = GetRowIndexFromCellAddress(cell.CellReference) - 1; // Needs to be 0-based
// Fetch the value in this cell
value = cell.CellValue.InnerXml;
if (cell.DataType != null && cell.DataType.Value == CellValues.SharedString)
{
value = stringTablePart.SharedStringTable.ChildElements[Int32.Parse(value)].InnerText;
}
cellValues[colInx, rowInx] = value;
}
}
// Copy the array of strings into a DataTable.
// We don't (currently) make any attempt to work out which columns should be numeric, rather than string.
for (int col = 0; col < numOfColumns; col++)
{
//dt.Columns.Add("Column_" + col.ToString());
dt.Columns.Add(cellValues[col, 0]);
}
//foreach (Cell cell in rows.ElementAt(0))
//{
// dt.Columns.Add(GetCellValue(doc, cell));
//}
for (int row = 0; row < numOfRows; row++)
{
DataRow dataRow = dt.NewRow();
for (int col = 0; col < numOfColumns; col++)
{
dataRow.SetField(col, cellValues[col, row]);
}
dt.Rows.Add(dataRow);
}
dt.Rows.RemoveAt(0);
//#if DEBUG
// // Write out the contents of our DataTable to the Output window (for debugging)
// string str = "";
// for (rowInx = 0; rowInx < maxNumOfRows; rowInx++)
// {
// for (colInx = 0; colInx < maxNumOfColumns; colInx++)
// {
// object val = dt.Rows[rowInx].ItemArray[colInx];
// str += (val == null) ? "" : val.ToString();
// str += "\t";
// }
// str += "\n";
// }
// System.Diagnostics.Trace.WriteLine(str);
//#endif
return dt;
}
}
}
catch (Exception ex)
{
return null;
}
}
public static void CalculateDataTableSize(string dimensions, ref int numOfColumns, ref int numOfRows)
{
// How many columns & rows of data does this Worksheet contain ?
// We'll read in the Dimensions string from the Excel file, and calculate the size based on that.
// eg "B1:F4" -> we'll need 6 columns and 4 rows.
//
// (We deliberately ignore the top-left cell address, and just use the bottom-right cell address.)
try
{
string[] parts = dimensions.Split(':'); // eg "B1:F4"
if (parts.Length != 2)
throw new Exception("Couldn't find exactly *two* CellAddresses in the dimension");
numOfColumns = 1 + GetColumnIndexByName(parts[1]); // A=1, B=2, C=3 (1-based value), so F4 would return 6 columns
numOfRows = GetRowIndexFromCellAddress(parts[1]);
}
catch
{
throw new Exception("Could not calculate maximum DataTable size from the worksheet dimension: " + dimensions);
}
}
public static int GetRowIndexFromCellAddress(string cellAddress)
{
// Convert an Excel CellReference column into a 1-based row index
// eg "D42" -> 42
// "F123" -> 123
string rowNumber = System.Text.RegularExpressions.Regex.Replace(cellAddress, "[^0-9 _]", "");
return int.Parse(rowNumber);
}
public static int GetColumnIndexByName(string cellAddress)
{
// Convert an Excel CellReference column into a 0-based column index
// eg "D42" -> 3
// "F123" -> 5
var columnName = System.Text.RegularExpressions.Regex.Replace(cellAddress, "[^A-Z_]", "");
int number = 0, pow = 1;
for (int i = columnName.Length - 1; i >= 0; i--)
{
number += (columnName[i] - 'A' + 1) * pow;
pow *= 26;
}
return number - 1;
}
}[enter image description here][1]
答案 0 :(得分:0)
SheetDimension
部分是可选的(因此您不能总是依赖它是最新的)。请参阅OpenXML规范的以下部分:
18.3.1.35维度(工作表维度)
此元素指定工作表的使用范围。它指定的行和列边界 在工作表中使用了单元格。 这是可选的,不是必需的。 使用的单元格包括具有公式,文本内容和单元格的单元格 格式。格式化整个列时,只有第一个单元格 该列被视为已使用。
因此,没有任何SheetDimension
部分的Excel文件完全有效,因此您不应该依赖它存在于Excel文件中。
因此我建议简单地解析Row
部分中包含的所有SheetData
个元素,并且&#34; count&#34;行数(而不是读取SheetDimensions
部分以获得行数/列数)。这样,您还可以考虑到Excel文件可能在数据之间包含完全空白的行。