我遇到一个问题,文件生成过程可以按预期工作,但是当我打开excel文件时,它说文件已损坏,需要修复。修复完成后,将打开文件,并且所有数据都在其中。
我收到的错误消息如下:
已删除的记录:/xl/worksheets/sheet1.xml部分中的单元格信息
我的代码如下:
using (var workbookDocument = SpreadsheetDocument.Create(staging, DocumentFormat.OpenXml.SpreadsheetDocumentType.Workbook))
{
var count = query.Count();
var worksheetNumber = 1;
var worksheetCapacity = Convert.ToInt32(100000);
var worksheetCount = Convert.ToInt32(Math.Ceiling(Convert.ToDouble(count) / worksheetCapacity));
var workbookPart = workbookDocument.AddWorkbookPart();
var worksheetInfo = new List<WorksheetData>();
OpenXmlWriter worksheetWriter;
while (worksheetNumber <= worksheetCount)
{
var worksheetLine = 1;
var worksheetName = sheet + "_" + Convert.ToString(worksheetNumber);
var worksheetPart = workbookDocument.WorkbookPart.AddNewPart<WorksheetPart>
var worksheetId = workbookDocument.WorkbookPart.GetIdOfPart(worksheetPart);
var worksheetKey = Convert.ToUInt32(worksheetNumber);
var worksheetAttributes = new List<OpenXmlAttribute>();
worksheetAttributes.Add(new OpenXmlAttribute("r", null, worksheetLine.ToString()));
worksheetInfo.Add(new WorksheetData() { Id = worksheetId, Key = worksheetKey, Name = worksheetName });
worksheetWriter = OpenXmlWriter.Create(worksheetPart);
worksheetWriter.WriteStartElement(new Worksheet());
worksheetWriter.WriteStartElement(new SheetData());
worksheetWriter.WriteStartElement(new Row(), worksheetAttributes);
for (var i = 0; i < headers.Count; i++)
{
var worksheetCell = new DocumentFormat.OpenXml.Spreadsheet.Cell();
var worksheetValue = new DocumentFormat.OpenXml.Spreadsheet.CellValue(headers[i]);
worksheetAttributes.Clear();
worksheetAttributes.Add(new OpenXmlAttribute("t", null, "str"));
worksheetAttributes.Add(new OpenXmlAttribute("r", null, GetColumnReference(worksheetLine, i)));
worksheetWriter.WriteStartElement(worksheetCell, worksheetAttributes);
worksheetWriter.WriteElement(worksheetValue);
worksheetWriter.WriteEndElement();
}
worksheetWriter.WriteEndElement();
worksheetLine++;
var skip = ((worksheetNumber - 1) * worksheetCapacity);
var results = query.SelectProperties(columns).Skip(skip).Take(worksheetCapacity).ToList();
for (var j = 0; j < results.Count; j++)
{
worksheetAttributes.Clear();
worksheetAttributes.Add(new OpenXmlAttribute("r", null, worksheetLine.ToString()));
worksheetWriter.WriteStartElement(new Row());
for (var k = 0; k < columns.Count(); k++)
{
var column = columns[k].Split((".").ToCharArray()).Last();
var value = results[j].GetType().GetField(column).GetValue(results[j]);
var type = value?.GetType().Name;
var text = ExportFormatter.Format(type, value);
worksheetAttributes.Clear();
worksheetAttributes.Add(new OpenXmlAttribute("t", null, "str"));
worksheetAttributes.Add(new OpenXmlAttribute("r", null, GetColumnReference(worksheetLine, j)));
worksheetWriter.WriteStartElement(new Cell());
worksheetWriter.WriteElement(new CellValue(text));
worksheetWriter.WriteEndElement();
}
worksheetWriter.WriteEndElement();
worksheetLine++;
}
worksheetWriter.WriteEndElement();
worksheetWriter.WriteEndElement();
worksheetWriter.Close();
worksheetNumber++;
}
worksheetWriter = OpenXmlWriter.Create(workbookDocument.WorkbookPart);
worksheetWriter.WriteStartElement(new Workbook());
worksheetWriter.WriteStartElement(new Sheets());
for (var i = 0; i < worksheetInfo.Count; i++)
{
worksheetWriter.WriteElement(new Sheet()
{
Name = worksheetInfo[i].Name,
SheetId = worksheetInfo[i].Key,
Id = worksheetInfo[i].Id
});
}
worksheetWriter.WriteEndElement();
worksheetWriter.WriteEndElement();
worksheetWriter.Close();
}
我使用下面的类来跟踪工作表信息:
private class WorksheetData
{
public String Id { get; set; }
public UInt32 Key { get; set; }
public String Name { get; set; }
}
谁能确定为什么会这样? 也许还有多余的结束标记或丢失的标记?
答案 0 :(得分:1)
最后使它起作用,有一些问题。
最终的代码如下:
public static ExportInfo Export<T>(this IQueryable<T> query, String temp, String path, List<ExportField> fields)
{
var worker = new ExportWorker();
return worker.Export<T>(query, temp, path, fields);
}
public static class ExportFormatter
{
public static String Format(String type, Object value)
{
if (value == null)
{
return "";
}
else
{
var text = "";
switch (type)
{
case "Decimal":
var decimalValue = (Decimal)value;
text = decimal.Round(decimalValue, 2, MidpointRounding.AwayFromZero).ToString();
break;
case "DateTimeOffset":
var dateTimeOffset = (DateTimeOffset)value;
text = dateTimeOffset.ToUniversalTime().ToString("MM/dd/yyyy");
break;
case "DateTime":
var dateTime = (DateTime)value;
text = dateTime.ToUniversalTime().ToString("MM/dd/yyyy");
break;
default:
text = Convert.ToString(value);
break;
}
return text;
}
public class ExportWorker
{
String record;
String staging;
String destination;
Thread thread;
Timer timer;
public ExportInfo Export<T>(IQueryable<T> query, String temp, String path, List<ExportField> fields)
{
var selections = from a in fields group a by new { a.Field } into g select new { g.Key.Field, Header = g.Max(x => x.Header) };
var headers = (from a in selections select a.Header).ToList();
var columns = (from a in selections select a.Field).Distinct().ToList();
var entity = query.ElementType.ToString();
var array = entity.Split((".").ToCharArray());
var sheet = array[array.Length - 1];
var key = Guid.NewGuid().ToString().Replace("-", "_");
var name = key + ".xlsx";
var log = key + ".txt";
var timeout = 60 * 60000;
staging = temp + name;
destination = path + name;
record = path + log;
thread = new Thread(
new ThreadStart(() =>
{
try
{
using (var workbookDocument = SpreadsheetDocument.Create(staging, DocumentFormat.OpenXml.SpreadsheetDocumentType.Workbook))
{
var count = query.Count();
var worksheetNumber = 1;
var worksheetCapacity = Convert.ToInt32(100000);
var worksheetCount = Convert.ToInt32(Math.Ceiling(Convert.ToDouble(count) / worksheetCapacity));
var workbookPart = workbookDocument.AddWorkbookPart();
var worksheetInfo = new List<WorksheetData>();
OpenXmlWriter worksheetWriter;
while (worksheetNumber <= worksheetCount)
{
var worksheetLine = 1;
var worksheetThrottle = 0;
var worksheetName = sheet + "_" + Convert.ToString(worksheetNumber);
var worksheetPart = workbookDocument.WorkbookPart.AddNewPart<WorksheetPart>();
var worksheetId = workbookDocument.WorkbookPart.GetIdOfPart(worksheetPart);
var worksheetKey = Convert.ToUInt32(worksheetNumber);
var worksheetAttributes = new List<OpenXmlAttribute>();
worksheetAttributes.Add(new OpenXmlAttribute("r", null, worksheetLine.ToString()));
worksheetInfo.Add(new WorksheetData() { Id = worksheetId, Key = worksheetKey, Name = worksheetName });
worksheetWriter = OpenXmlWriter.Create(worksheetPart);
worksheetWriter.WriteStartElement(new Worksheet());
worksheetWriter.WriteStartElement(new SheetData());
worksheetWriter.WriteStartElement(new Row(), worksheetAttributes);
for (var i = 0; i < headers.Count; i++)
{
var worksheetCell = new Cell();
worksheetCell.DataType = CellValues.String;
worksheetCell.CellValue = new CellValue(headers[i]);
worksheetWriter.WriteElement(worksheetCell);
worksheetAttributes.Clear();
}
worksheetWriter.WriteEndElement();
worksheetLine++;
var skip = ((worksheetNumber - 1) * worksheetCapacity);
var results = query.SelectProperties(columns).Skip(skip).Take(worksheetCapacity).ToList();
for (var j = 0; j < results.Count; j++)
{
if (worksheetThrottle >= 5) { worksheetThrottle = 0; System.Threading.Thread.Sleep(1); }
worksheetAttributes.Clear();
worksheetAttributes.Add(new OpenXmlAttribute("r", null, worksheetLine.ToString()));
worksheetWriter.WriteStartElement(new Row(), worksheetAttributes);
for (var k = 0; k < columns.Count(); k++)
{
var column = columns[k].Split((".").ToCharArray()).Last();
var value = results[j].GetType().GetField(column).GetValue(results[j]);
var type = value?.GetType().Name;
var text = (String)ExportFormatter.Format(type, value);
var worksheetCell = new Cell();
worksheetCell.DataType = CellValues.String;
worksheetCell.CellValue = new CellValue(text);
worksheetWriter.WriteElement(worksheetCell);
worksheetAttributes.Clear();
}
worksheetWriter.WriteEndElement();
worksheetLine++;
worksheetThrottle++;
}
worksheetWriter.WriteEndElement();
worksheetWriter.WriteEndElement();
worksheetWriter.Close();
worksheetNumber++;
}
worksheetWriter = OpenXmlWriter.Create(workbookDocument.WorkbookPart);
worksheetWriter.WriteStartElement(new Workbook());
worksheetWriter.WriteStartElement(new Sheets());
for (var i = 0; i < worksheetInfo.Count; i++)
{
worksheetWriter.WriteElement(new Sheet()
{
Name = worksheetInfo[i].Name,
SheetId = worksheetInfo[i].Key,
Id = worksheetInfo[i].Id
});
}
worksheetWriter.WriteEndElement();
worksheetWriter.WriteEndElement();
worksheetWriter.Close();
}
var logsfile = File.CreateText(record);
var datafile = (new DirectoryInfo(temp)).GetFiles().FirstOrDefault(a => a.Name == name);
datafile.MoveTo(destination);
logsfile.WriteLine("succeeded");
logsfile.Close();
}
catch (Exception ex)
{
try { File.Delete(staging); } catch (Exception) { }
var logsfile = File.CreateText(record);
logsfile.WriteLine("failed");
logsfile.WriteLine(ex.ToString());
logsfile.Close();
}
})
);
thread.Start();
timer = new Timer(Expire, null, timeout, Timeout.Infinite);
return new ExportInfo() { File = destination, Log = record };
}
void Expire(object state)
{
try { File.Delete(staging); } catch (Exception) { }
var logsfile = File.CreateText(record);
logsfile.WriteLine("timeout");
logsfile.Close();
thread.Abort();
}
private class WorksheetData
{
public String Id { get; set; }
public UInt32 Key { get; set; }
public String Name { get; set; }
}
}
进行这些调整后,导出效果很好。 另外,open xml解决了我在内存管理方面遇到的许多问题。 使用上述方法,我可以在大约10分钟的时间内导出3个文件,每个文件包含150万行(40列)。 在导出过程中,CPU利用率从未超过35%,并且从未使用过超过1/10的内存。太棒了...