编辑:
由于内存问题,我现在尝试使用:http://blogs.msdn.com/b/brian_jones/archive/2010/06/22/writing-large-excel-files-with-the-open-xml-sdk.aspx
但仍然遇到一些问题。
String strWham = strExtract + strExtract2012;
System.Data.SqlClient.SqlCommand objCmd = new System.Data.SqlClient.SqlCommand(strWham, objConn);
objCmd.CommandTimeout = 3000;
System.Data.SqlClient.SqlDataReader objReader;
objReader = objCmd.ExecuteReader();
string path = @"\\wsi\userdata\pterrazas\AccountingReports\ExpThrough201212.xlsx";
while (objReader.Read())
{
using (var myDoc = SpreadsheetDocument.Create(path, DocumentFormat.OpenXml.SpreadsheetDocumentType.Workbook))
{
WorkbookPart workbookPart = myDoc.WorkbookPart;
/* The next line causes the error:
** Error: Object Reference not set to an instance of an object**
*/
WorksheetPart worksheetPart = workbookPart.WorksheetParts.First();
string origninalSheetId = workbookPart.GetIdOfPart(worksheetPart);
WorksheetPart replacementPart = workbookPart.AddNewPart<WorksheetPart>();
string replacementPartId = workbookPart.GetIdOfPart(replacementPart);
OpenXmlReader reader = OpenXmlReader.Create(worksheetPart);
OpenXmlWriter writer = OpenXmlWriter.Create(replacementPart);
Row r = new Row();
Cell c = new Cell();
CellFormula f = new CellFormula();
f.CalculateCell = true;
f.Text = "RAND()";
c.Append(f);
CellValue v = new CellValue();
c.Append(v);
int numRows = 1;
int numCols = 1;
while (reader.Read())
{
if (reader.ElementType == typeof(SheetData))
{
if (reader.IsEndElement)
continue;
writer.WriteStartElement(new SheetData());
for (int row = 0; row < numRows; row++)
{
writer.WriteStartElement(r);
for (int col = 0; col < numCols; col++)
{
writer.WriteElement(c);
numCols++;
}
writer.WriteEndElement();
numRows++;
}
writer.WriteEndElement();
}
else
{
if (reader.IsStartElement)
{
writer.WriteStartElement(reader);
}
else if (reader.IsEndElement)
{
writer.WriteEndElement();
}
}
}
reader.Close();
writer.Close();
Sheet sheet = workbookPart.Workbook.Descendants<Sheet>().Where(s => s.Id.Value.Equals(origninalSheetId)).First();
sheet.Id.Value = replacementPartId;
workbookPart.DeletePart(worksheetPart);
}
}
objConn.Close();
}
catch (Exception ex)
无法正常配置以毫无例外地运行!
感谢您的帮助!
答案 0 :(得分:1)
我想到了如何在sql db字符串中使用xmlreader / xmlwriter和sqldatareader,它使用的内存非常少。这使用几乎没有任何内存在10分钟内产生了300,000行xlsx文件!
// 2012年到目前为止
String strExtract2012 = "WHERE client.typecode = 'I' AND Policy.UniqAgency IN(65536,65537,65538,65539,65540) AND Line.ExpirationDate < '1/1/2013' " +
"ORDER BY polagencycode, polbranch, clientlookup, policynumber, lineeff, linetypecode";
String strWham = strExtract + strExtract2012;
System.Data.SqlClient.SqlCommand objCmd = new System.Data.SqlClient.SqlCommand(strWham, objConn);
objCmd.CommandTimeout = 3000;
System.Data.SqlClient.SqlDataReader objReader;
objReader = objCmd.ExecuteReader();
string path = @"\\filepath\ExpThrough201212.xlsx";
string blankpath = @"\\filepath\blank.xlsx"; - put this blank xlsx file in the *filepath*
File.Copy(blankpath, path, true);
if (objReader.Read())
{
using (SpreadsheetDocument myDoc = SpreadsheetDocument.Open(path, true))
{
WorkbookPart workbookPart = myDoc.WorkbookPart;
WorksheetPart worksheetPart = workbookPart.WorksheetParts.First();
string origninalSheetId = workbookPart.GetIdOfPart(worksheetPart);
WorksheetPart replacementPart = workbookPart.AddNewPart<WorksheetPart>();
string replacementPartId = workbookPart.GetIdOfPart(replacementPart);
OpenXmlReader reader = OpenXmlReader.Create(worksheetPart);
OpenXmlWriter writer = OpenXmlWriter.Create(replacementPart);
while (reader.Read())
{
if (reader.ElementType == typeof(SheetData))
{
if (reader.IsEndElement)
continue;
writer.WriteStartElement(new SheetData());
Row rr = new Row();
writer.WriteStartElement(rr);
//Add Header
for (int count = 0; count < objReader.FieldCount; count++)
{
String FieldName = objReader.GetName(count);
DocumentFormat.OpenXml.Spreadsheet.Cell cell = new DocumentFormat.OpenXml.Spreadsheet.Cell();
cell.DataType = DocumentFormat.OpenXml.Spreadsheet.CellValues.String;
cell.CellValue = new DocumentFormat.OpenXml.Spreadsheet.CellValue(objReader.GetName(count));
//headerRow.AppendChild(cell);
writer.WriteElement(cell);
}
writer.WriteEndElement();
//writer.WriteEndElement();
//sheetData.AppendChild(headerRow);
while (objReader.Read())
{
//DocumentFormat.OpenXml.Spreadsheet.Row newRow = new DocumentFormat.OpenXml.Spreadsheet.Row();
Row r = new Row();
writer.WriteStartElement(r);
for (int col = 0; col < objReader.FieldCount; col++)
{
String FieldValue = objReader.GetValue(col).ToString();
//columns.Add(FieldValue);
DocumentFormat.OpenXml.Spreadsheet.Cell cell = new DocumentFormat.OpenXml.Spreadsheet.Cell();
cell.DataType = DocumentFormat.OpenXml.Spreadsheet.CellValues.String;
cell.CellValue = new DocumentFormat.OpenXml.Spreadsheet.CellValue(FieldValue);
//newRow.AppendChild(cell);
writer.WriteElement(cell);
}
//.AppendChild(newRow);
writer.WriteEndElement();
}
writer.WriteEndElement();
}
else
{
if (reader.IsStartElement)
{
writer.WriteStartElement(reader);
}
else if (reader.IsEndElement)
{
writer.WriteEndElement();
}
}
}
reader.Close();
writer.Close();
Sheet sheet = workbookPart.Workbook.Descendants<Sheet>().Where(s => s.Id.Value.Equals(origninalSheetId)).First();
sheet.Id.Value = replacementPartId;
workbookPart.DeletePart(worksheetPart);
}
}
objReader.Close();
答案 1 :(得分:0)
您为objReader
中的每一行创建一个新文档; while (objReader.Read())
行应该在文件创建代码中移动,但这不会导致您当前看到的问题。
您看到的问题是因为您用作基础的示例会读取现有文件(使用SpreadsheetDocument.Open
)并将数据复制到该文件但您正在创建一个全新的文件从头开始(使用SpreadsheetDocument.Create
)。这就是为什么他的文档已经包含workbookpart
但你的文件没有。{1}}。由于您的workbookPart
变量为null
,因此您收到了对象引用错误。
Vincent Tan发表了一篇关于如何使用纯SAX方法here编写大型Excel文件的精彩文章。他提到了使用您提供的链接中的代码可能遇到的一些问题,因此我强烈建议您阅读。
使用 作为基础,您可以通过以下方式获得更多或更少的内容:
String strWham = strExtract + strExtract2012;
System.Data.SqlClient.SqlCommand objCmd = new System.Data.SqlClient.SqlCommand(strWham, objConn);
objCmd.CommandTimeout = 3000;
System.Data.SqlClient.SqlDataReader objReader;
objReader = objCmd.ExecuteReader();
string path = @"\\wsi\userdata\pterrazas\AccountingReports\ExpThrough201212.xlsx";
using (SpreadsheetDocument myDoc = SpreadsheetDocument.Create(path, SpreadsheetDocumentType.Workbook))
{
List<OpenXmlAttribute> xmlAttributes;
OpenXmlWriter writer;
//add a workbookpart manually
myDoc.AddWorkbookPart();
WorksheetPart worksheetpart = myDoc.WorkbookPart.AddNewPart<WorksheetPart>();
//create an XML writer for the worksheetpart
writer = OpenXmlWriter.Create(worksheetpart);
writer.WriteStartElement(new Worksheet());
writer.WriteStartElement(new SheetData());
int rowNumber = 1;
while (objReader.Read())
{
xmlAttributes = new List<OpenXmlAttribute>();
// this is the row index
xmlAttributes.Add(new OpenXmlAttribute("r", null, rowNumber.ToString()));
//write the row start element with the attributes added above
writer.WriteStartElement(new Row(), xmlAttributes);
for (int columnNumber = 1; columnNumber < objReader.FieldCount; columnNumber++)
{
//reset the attributes
xmlAttributes = new List<OpenXmlAttribute>();
// this is the data type ("t"), with CellValues.String ("str")
// you might need to change this depending on your source data
// you might also consider using the Shared Strings table instead
xmlAttributes.Add(new OpenXmlAttribute("t", null, "str"));
//add the cell reference (A1, B1, A2... etc)
xmlAttributes.Add(new OpenXmlAttribute("r", null, GetExcelColumnName(rowNumber, columnNumber)));
//write the start of the cell element with the type and cell reference attributes
writer.WriteStartElement(new Cell(), xmlAttributes);
//write the cell value
writer.WriteElement(new CellValue(objReader[columnNumber].ToString()));
//write the cell end element
writer.WriteEndElement();
}
//write the row end element
writer.WriteEndElement();
rowNumber++;
}
//write the sheetdata end element
writer.WriteEndElement();
//write the worksheet end element
writer.WriteEndElement();
writer.Close();
//create a writer for the workbookpart
writer = OpenXmlWriter.Create(myDoc.WorkbookPart);
//write the start element of a workbook to the workbook part
writer.WriteStartElement(new Workbook());
//write the start element of a sheets item to the workbook part
writer.WriteStartElement(new Sheets());
//write the whole element of a sheet to the workbook part
//note we link it to the id of the worksheetpart populated above
writer.WriteElement(new Sheet()
{
Name = "Sheet1",
SheetId = 1,
Id = myDoc.WorkbookPart.GetIdOfPart(worksheetpart)
});
//write the sheets end element
writer.WriteEndElement();
//write the workbook end element
writer.WriteEndElement();
writer.Close();
myDoc.Close();
}
objConn.Close();
我使用的方法GetExcelColumnName
改编自this StackOverflow问题。
private static string GetExcelColumnName(int rowNumber, int columnNumber)
{
int dividend = columnNumber;
string columnName = String.Empty;
int modulo;
while (dividend > 0)
{
modulo = (dividend - 1) % 26;
columnName = Convert.ToChar(65 + modulo).ToString() + columnName;
dividend = (int)((dividend - modulo) / 26);
}
return columnName + rowNumber.ToString();
}
请注意,根据源数据的类型,您可能希望根据代码中的注释更改输出的类型。您可能还想查看Shared Strings概念,即将字符串写入单独的文件,然后将该字符串的 index 写为单元格值而不是字符串本身。
就编写Excel文件的原始性能而言,我认为以上可能是使用OpenXML SDK的最快方法。然而,这当然是以复杂性为代价的。
我希望有所帮助。