我正在编写一个使用OpenXML来读取excel文件的程序。该文件还包含日期,所以我需要一个识别它们的函数。在stackoverflow上,我找到了这个解决方案:How to distinguish inline numbers from OLE Automation date numbers in OpenXML SpreadSheet?。
我已尝试将其应用于我的代码,但每次出现以下错误时都会:
“OpenXML_Prova.exe中发生了'System.NullReferenceException'类型的未处理异常
附加信息:对象引用未设置为对象的实例。“
在GetDateTimeCellFormats的第一行,其中似乎NumberingFormats始终为null。
以下是完整的代码:
using DocumentFormat.OpenXml;
using DocumentFormat.OpenXml.Packaging;
using DocumentFormat.OpenXml.Spreadsheet;
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
using System.Threading.Tasks;
namespace OpenXML_Prova
{
class Program
{
DocumentFormat.OpenXml.Spreadsheet.NumberingFormats numb = new NumberingFormats();
static uint[] builtInDateTimeNumberFormatIDs = new uint[] { 14, 15, 16, 17, 18, 19, 20, 21, 22, /*27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 45, 46, 47, 50, 51, 52, 53, 54, 55, 56, 57, 58 */};
static Dictionary<uint, NumberingFormat> builtInDateTimeNumberFormats = builtInDateTimeNumberFormatIDs.ToDictionary(id => id, id => new NumberingFormat { NumberFormatId = id });
static Regex dateTimeFormatRegex = new Regex(@"((?=([^[]*\[[^[\]]*\])*([^[]*[ymdhs]+[^\]]*))|.*\[(h|mm|ss)\].*)", RegexOptions.Compiled);
static void Main(string[] args) {
Program prova = new Program();
prova.ReadFile();
}
void ReadFile()
{
var filePath = @"C:\\Users\\m.p\\Desktop\\report_fatturato_brevissimo.xlsx";
using (FileStream fs = new FileStream(filePath, FileMode.Open, FileAccess.Read, FileShare.ReadWrite))
{
using (SpreadsheetDocument doc = SpreadsheetDocument.Open(fs, false))
{
WorkbookPart workbookPart = doc.WorkbookPart;
SharedStringTablePart sstpart = workbookPart.GetPartsOfType<SharedStringTablePart>().First();
SharedStringTable sst = sstpart.SharedStringTable;
WorksheetPart worksheetPart = workbookPart.WorksheetParts.First();
Worksheet sheet = worksheetPart.Worksheet;
var cells = sheet.Descendants<Cell>();
var rows = sheet.Descendants<Row>();
Console.WriteLine("Row count = {0}", rows.LongCount());
Console.WriteLine("Cell count = {0}", cells.LongCount());
// Or... via each row
foreach (Row row in rows)
{
foreach (Cell c in row.Elements<Cell>())
{
if ((c.DataType != null) && (c.DataType == CellValues.SharedString))
{
int ssid = int.Parse(c.CellValue.Text);
string str = sst.ChildElements[ssid].InnerText;
Console.Write/*Line*/(/*"Shared string {0}: {1}", ssid, */str + "\t");
}
else
{
if (IsDateTimeCell(workbookPart, c) == true)
{
int ssid = int.Parse(c.CellValue.Text);
DateTime date = DateTime.FromOADate(Double.Parse(c.CellValue.Text));
Console.Write/*Line*/(/*"Shared string {0}: {1}", ssid, */date + "\t");
}
else if (c.CellValue != null)
{
Console.Write/*Line*/(/*"Cell contents: {0}", */c.CellValue.Text + "\t");
}
}
}
Console.WriteLine("");
}
}
}
Console.Read();
}
public static Dictionary<uint, NumberingFormat> GetDateTimeCellFormats(WorkbookPart wbPart)
{
//Error on the following instruction: NumberingFormats seems to be always null
var dateNumberFormats = wbPart.WorkbookStylesPart.Stylesheet.NumberingFormats
.Descendants<NumberingFormat>()
.Where(nf => dateTimeFormatRegex.Match(nf.FormatCode.Value).Success)
.ToDictionary(nf => nf.NumberFormatId.Value);
var cellFormats = wbPart.WorkbookStylesPart.Stylesheet.CellFormats
.Descendants<CellFormat>();
var dateCellFormats = new Dictionary<uint, NumberingFormat>();
uint styleIndex = 0;
foreach (var cellFormat in cellFormats)
{
if (cellFormat.ApplyNumberFormat != null && cellFormat.ApplyNumberFormat.Value)
{
if (dateNumberFormats.ContainsKey(cellFormat.NumberFormatId.Value))
{
dateCellFormats.Add(styleIndex, dateNumberFormats[cellFormat.NumberFormatId.Value]);
}
else if (builtInDateTimeNumberFormats.ContainsKey(cellFormat.NumberFormatId.Value))
{
dateCellFormats.Add(styleIndex, builtInDateTimeNumberFormats[cellFormat.NumberFormatId.Value]);
}
}
styleIndex++;
}
return dateCellFormats;
}
//Usage Example
public static bool IsDateTimeCell(WorkbookPart wbPart, Cell cell)
{
if (cell.StyleIndex == null)
return false;
var dateTimeCellFormats = GetDateTimeCellFormats(wbPart);
return dateTimeCellFormats.ContainsKey(cell.StyleIndex);
}
}
}
答案 0 :(得分:0)
OpenXML has 164 built-in formats。默认的DateTime格式是这些内置类型的一部分。
仅当您使用Excel(自定义格式)创建新格式时,这些格式才会包含在NumberingFormats
的{{1}}部分中。
我建议您像下面一样包装代码以检查Stylesheet
,以避免NumberingFormats
NullReferenceException
然后,您需要添加一些代码来检测日期时间格式。 这个SO article tries用于确定可能的日期时间样式。请参阅上面的一些注释和第一个链接,以获取更详细的规范参考。