我正在使用一个小测试电子表格,只有一列数字和一个字符串 - 大规模测试将在稍后进行。我尝试了几种类似于我即将发布的实现,但没有一种实现读取数据。下面的代码主要来自另一个StackOverflow线程,它似乎已经工作了 - 对我来说不是这样。我想我会让你们检查/调试/帮助这个版本,因为它可能比我今天写的任何内容都要少。
static void ReadExcelFileSAX(string fileName)
{
using (SpreadsheetDocument spreadsheetDocument = SpreadsheetDocument.Open(fileName, true))
{
WorkbookPart workbookPart = spreadsheetDocument.WorkbookPart;
WorksheetPart worksheetPart = workbookPart.WorksheetParts.First();
OpenXmlPartReader reader = new OpenXmlPartReader(worksheetPart);
string text;
string rowNum;
while (reader.Read())
{
if (reader.ElementType == typeof(Row))
{
do
{
if (reader.HasAttributes)
{
rowNum = reader.Attributes.First(a => a.LocalName == "r").Value;
Console.Write("rowNum: " + rowNum); //we never even get here, I tested it with a breakpoint
}
} while (reader.ReadNextSibling()); // Skip to the next row
Console.ReadKey();
break; // We just looped through all the rows so no need to continue reading the worksheet
}
if (reader.ElementType == typeof(Cell))
{
}
if (reader.ElementType != typeof(Worksheet)) // Dont' want to skip the contents of the worksheet
reader.Skip(); // Skip contents of any node before finding the first row.
}
reader.Close();
Console.WriteLine();
Console.ReadKey();
}
}
而且,在旁注中,使用OpenXml SDK还有什么好的替代方法我不知何故错过了吗?
答案 0 :(得分:21)
我认为您选错了WorksheetPart
来阅读行。
该行
workbookPart.WorksheetParts.First();
获取集合的第一个WorksheetPart
,但不得
必须是您在Microsoft Excel中看到的第一个工作表。
因此,遍历所有WorksheetParts
,您应该会看到一些输出
控制台窗口。
static void ReadExcelFileSAX(string fileName)
{
using (SpreadsheetDocument spreadsheetDocument =
SpreadsheetDocument.Open(fileName, true))
{
WorkbookPart workbookPart = spreadsheetDocument.WorkbookPart;
// Iterate through all WorksheetParts
foreach (WorksheetPart worksheetPart in workbookPart.WorksheetParts)
{
OpenXmlPartReader reader = new OpenXmlPartReader(worksheetPart);
string text;
string rowNum;
while (reader.Read())
{
if (reader.ElementType == typeof(Row))
{
do
{
if (reader.HasAttributes)
{
rowNum = reader.Attributes.First(a => a.LocalName == "r").Value;
Console.Write("rowNum: " + rowNum);
}
} while (reader.ReadNextSibling()); // Skip to the next row
break; // We just looped through all the rows so no
// need to continue reading the worksheet
}
if (reader.ElementType != typeof(Worksheet))
reader.Skip();
}
reader.Close();
}
}
}
要读取所有单元格值,请使用以下函数(省略所有错误处理详细信息):
static void ReadAllCellValues(string fileName)
{
using (SpreadsheetDocument spreadsheetDocument = SpreadsheetDocument.Open(fileName, false))
{
WorkbookPart workbookPart = spreadsheetDocument.WorkbookPart;
foreach(WorksheetPart worksheetPart in workbookPart.WorksheetParts)
{
OpenXmlReader reader = OpenXmlReader.Create(worksheetPart);
while (reader.Read())
{
if (reader.ElementType == typeof(Row))
{
reader.ReadFirstChild();
do
{
if (reader.ElementType == typeof(Cell))
{
Cell c = (Cell)reader.LoadCurrentElement();
string cellValue;
if (c.DataType != null && c.DataType == CellValues.SharedString)
{
SharedStringItem ssi = workbookPart.SharedStringTablePart.SharedStringTable.Elements<SharedStringItem>().ElementAt(int.Parse(c.CellValue.InnerText));
cellValue = ssi.Text.Text;
}
else
{
cellValue = c.CellValue.InnerText;
}
Console.Out.Write("{0}: {1} ", c.CellReference, cellValue);
}
} while (reader.ReadNextSibling());
Console.Out.WriteLine();
}
}
}
}
}
在上面的代码中,您会看到必须使用数据类型为SharedString
的单元格
SharedStringTablePart
。
答案 1 :(得分:0)
要读取空白单元格,我使用的是在行读取器外部分配的变量,在while循环中,我正在检查列索引是否大于或不来自我的变量,因为它在每个单元格读取后递增。如果这不匹配,我正在用我想要的值填充我的专栏。这是我用来将空白单元格追踪到我尊重列值的技巧。这是代码:
public static DataTable ReadIntoDatatableFromExcel(string newFilePath)
{
/*Creating a table with 20 columns*/
var dt = CreateProviderRvenueSharingTable();
try
{
/*using stream so that if excel file is in another process then it can read without error*/
using (Stream stream = new FileStream(newFilePath, FileMode.Open, FileAccess.Read, FileShare.ReadWrite))
{
using (SpreadsheetDocument spreadsheetDocument = SpreadsheetDocument.Open(stream, false))
{
var workbookPart = spreadsheetDocument.WorkbookPart;
var workbook = workbookPart.Workbook;
/*get only unhide tabs*/
var sheets = workbook.Descendants<Sheet>().Where(e => e.State == null);
foreach (var sheet in sheets)
{
var worksheetPart = (WorksheetPart)workbookPart.GetPartById(sheet.Id);
/*Remove empty sheets*/
List<Row> rows = worksheetPart.Worksheet.Elements<SheetData>().First().Elements<Row>()
.Where(r => r.InnerText != string.Empty).ToList();
if (rows.Count > 1)
{
OpenXmlReader reader = OpenXmlReader.Create(worksheetPart);
int i = 0;
int BTR = 0;/*Break the reader while empty rows are found*/
while (reader.Read())
{
if (reader.ElementType == typeof(Row))
{
/*ignoring first row with headers and check if data is there after header*/
if (i < 2)
{
i++;
continue;
}
reader.ReadFirstChild();
DataRow row = dt.NewRow();
int CN = 0;
if (reader.ElementType == typeof(Cell))
{
do
{
Cell c = (Cell)reader.LoadCurrentElement();
/*reader skipping blank cells so data is getting worng in datatable's rows according to header*/
if (CN != 0)
{
int cellColumnIndex =
ExcelHelper.GetColumnIndexFromName(
ExcelHelper.GetColumnName(c.CellReference));
if (cellColumnIndex < 20 && CN < cellColumnIndex - 1)
{
do
{
row[CN] = string.Empty;
CN++;
} while (CN < cellColumnIndex - 1);
}
}
/*stopping execution if first cell does not have any value which means empty row*/
if (CN == 0 && c.DataType == null && c.CellValue == null)
{
BTR++;
break;
}
string cellValue = GetCellValue(c, workbookPart);
row[CN] = cellValue;
CN++;
/*if any text exists after T column (index 20) then skip the reader*/
if (CN == 20)
{
break;
}
} while (reader.ReadNextSibling());
}
/*reader skipping blank cells so fill the array upto 19 index*/
while (CN != 0 && CN < 20)
{
row[CN] = string.Empty;
CN++;
}
if (CN == 20)
{
dt.Rows.Add(row);
}
}
/*escaping empty rows below data filled rows after checking 5 times */
if (BTR > 5)
break;
}
reader.Close();
}
}
}
}
}
catch (Exception ex)
{
throw ex;
}
return dt;
}
private static string GetCellValue(Cell c, WorkbookPart workbookPart)
{
string cellValue = string.Empty;
if (c.DataType != null && c.DataType == CellValues.SharedString)
{
SharedStringItem ssi =
workbookPart.SharedStringTablePart.SharedStringTable
.Elements<SharedStringItem>()
.ElementAt(int.Parse(c.CellValue.InnerText));
if (ssi.Text != null)
{
cellValue = ssi.Text.Text;
}
}
else
{
if (c.CellValue != null)
{
cellValue = c.CellValue.InnerText;
}
}
return cellValue;
}
public static int GetColumnIndexFromName(string columnNameOrCellReference)
{
int columnIndex = 0;
int factor = 1;
for (int pos = columnNameOrCellReference.Length - 1; pos >= 0; pos--) // R to L
{
if (Char.IsLetter(columnNameOrCellReference[pos])) // for letters (columnName)
{
columnIndex += factor * ((columnNameOrCellReference[pos] - 'A') + 1);
factor *= 26;
}
}
return columnIndex;
}
public static string GetColumnName(string cellReference)
{
/* Advance from L to R until a number, then return 0 through previous position*/
for (int lastCharPos = 0; lastCharPos <= 3; lastCharPos++)
if (Char.IsNumber(cellReference[lastCharPos]))
return cellReference.Substring(0, lastCharPos);
throw new ArgumentOutOfRangeException("cellReference");
}
代码适用于: 1.此代码读取空白单元格 2.阅读完成后跳过空行。 3.从第一个按升序读取表格 4.如果另一个进程正在使用excel文件,OpenXML仍然会读取它。