所有可能了解DataSet和DataTables的论坛读者或Accord.net提供的ExcelReader
在"研究"项目我正在努力使用SVM分类器来分析blob形状我正在使用excel文档来中间存储属性数据。这可以是250.000记录或更多的大数据集(最终excel文件将是100 - 200 Mb),但写入必须分批完成2000条记录,因为我正在将blob的图像文件加载到内存中以计算属性。因此,我正在批量(附加)到xlsx文件。这需要很长时间 - 而且我不明白为什么。我使用Accord.net网站上的ExcelReader示例作为模板。步骤是;
然后我将新数据附加到工作表中 foreach(记录中的blobProperties r) {
worksheet.Rows.Add(new object[] {
(double)r.area,
(double)r.perimeter,
.......
(bool)r.qualifies});
line++;
}
最后,我创建了修改后的excel文件。旧文件被覆盖了。
bool success = CreateExcelFile.CreateExcelDocument(workbook, xlsFileName);
只要在我要处理的图像列表中留下任何斑点,就会重复步骤1 - 5。
这适用于较小的数据集(< 10.000记录)但是当我来到较大的数据集时,需要花费数小时来完成此处理。对于具有250.000条记录的数据集,我需要通过此循环迭代125次。
我这样做是错误的,即这不是将数据附加到Excel文件的方法吗?据我所知,对于DataSet或DataTable类以及xlsx文件,总共200 Mb的250,000条记录并不多。
public void write(string file, string sheetName, int iSheetNR, List<blobProperties> records)
{
// This method is used to write ; seperated data to a text file readable for excel. It takes in a list of blob-properties and a file name for the project.
// A file with the same name and CSV extention is created or appended to.
//
// string csvFileName = file.Replace(".blb", ".csv");
string xlsFileName = file.Replace(".blb", ".xlsx");
DataSet workbook = null;
DataTable worksheet = null;
int line = 0;
bool xlsFileCreated = false;
// initialise Excel file
if (File.Exists(xlsFileName))
{
// load the existing dataset (workbook) with tables (sheets)
ExcelReader db = new ExcelReader(xlsFileName, true, false);
string [] sheetList = db.GetWorksheetList();
if (sheetList.Contains(sheetName))
{
workbook = new DataSet();
foreach(string sn in sheetList)
{
worksheet = db.GetWorksheet(sn);
workbook.Tables.Add(worksheet);
}
}
else
{
workbook = new DataSet();
foreach (string sn in sheetList)
{
worksheet = db.GetWorksheet(sn);
workbook.Tables.Add(worksheet);
}
worksheet = new DataTable(sheetName);
workbook.Tables.Add(worksheet);
xlsFileCreated = true;
}
// Set the worksheet to write/append to if the file existed
if (workbook.Tables.Contains(sheetName))
{
worksheet = workbook.Tables[sheetName];
}
}
else
{
// Set the worksheet to write/append to if the file is new
workbook = new DataSet();
worksheet = new DataTable(sheetName);
workbook.Tables.Add(worksheet);
xlsFileCreated = true;
}
if(xlsFileCreated)
{
// Define the columns (Headers)
worksheet.Columns.Add("area", Type.GetType("System.Decimal"));
worksheet.Columns.Add("perimeter", Type.GetType("System.Decimal"));
worksheet.Columns.Add("areaRatio", Type.GetType("System.Decimal"));
worksheet.Columns.Add("C", Type.GetType("System.Decimal"));
worksheet.Columns.Add("height", Type.GetType("System.Decimal"));
worksheet.Columns.Add("width", Type.GetType("System.Decimal"));
worksheet.Columns.Add("cog.X", Type.GetType("System.Decimal"));
worksheet.Columns.Add("cog.Y", Type.GetType("System.Decimal"));
worksheet.Columns.Add("Mx", Type.GetType("System.Decimal"));
worksheet.Columns.Add("My", Type.GetType("System.Decimal"));
worksheet.Columns.Add("Ix", Type.GetType("System.Decimal"));
worksheet.Columns.Add("Iy", Type.GetType("System.Decimal"));
worksheet.Columns.Add("NrOfFish", Type.GetType("System.Int32"));
worksheet.Columns.Add("idNumber", Type.GetType("System.Int32"));
worksheet.Columns.Add("qualifies", Type.GetType("System.Boolean"));
}
// Write each blob property to the table - one record per id
foreach (blobProperties r in records)
{
// worksheet.Rows.Add(new object[] { (double)r.area, (double)r.perimeter, (double)r.areaRatio, (double)r.C, (double)r.height, (double)r.width, (double)r.cog.X, (double)r.cog.Y, (double)r.Mx, (double)r.My, (double)r.Ix, (double)r.Iy, (int)r.NrOfFish, (int)r.idNumber });
worksheet.Rows.Add(new object[] {
(double)r.area,
(double)r.perimeter,
(double)r.areaRatio,
(double)r.C,
(double)r.height,
(double)r.width,
(double)r.cog.X,
(double)r.cog.Y,
(double)r.Mx,
(double)r.My,
(double)r.Ix,
(double)r.Iy,
(int)r.NrOfFish,
(int)r.idNumber,
(bool)r.qualifies});
line++;
}
if (xlsFileCreated)
{
// worksheet.Rows.Add(worksheet);
}
GC.Collect();
bool success = CreateExcelFile.CreateExcelDocument(workbook, xlsFileName);
/ * 如果(成功) { MessageBox.Show(&#34;成功&#34;,&#34;成功创建训练数据的Excel文件&#34;,MessageBoxButtons.OK); } 其他 { MessageBox.Show(&#34;失败&#34;,&#34;未创建Excel文件&#34;,MessageBoxButtons.OK); } * / }
答案 0 :(得分:0)
您可以使用以下方式将DataTable与数据一起填充,然后将其导出到Excel文件。
public void Mydata(List<blobProperties> records)
{
DataTable dataTable = ConvertToDataTable(records);
var fileName = string.Format("records_{0}.xls", DateTime.Now.ToString("yyyyMMddHHmmss"));
ExportToExcel(dataTable, fileName);
}
private DataTable ConvertToDataTable<T>(List<T> items)
{
DataTable dataTable = new DataTable(typeof(T).Name);
//Get all the properties
PropertyInfo[] Props = typeof(T).GetProperties(BindingFlags.Public | BindingFlags.Instance);
foreach (PropertyInfo prop in Props)
{
//Setting column names as Property names
dataTable.Columns.Add(prop.Name);
}
foreach (T item in items)
{
var values = new object[Props.Length];
for (int i = 0; i < Props.Length; i++)
{
//inserting property values to datatable rows
values[i] = Props[i].GetValue(item, null);
}
dataTable.Rows.Add(values);
}
return dataTable;
}
private void ExportToExcel(DataTable dataTable, string fileName)
{
GridView gridView = new GridView();
gridView.DataSource = dataTable;
gridView.DataBind();
var response = System.Web.HttpContext.Current.Response;
response.ClearContent();
response.Buffer = true;
response.AddHeader("content-disposition", string.Format("attachment; filename={0}", fileName));
response.ContentType = "application/ms-excel";
response.Charset = "";
StringWriter stringWriter = new StringWriter();
HtmlTextWriter htmlTextWriter = new HtmlTextWriter(stringWriter);
gridView.RenderControl(htmlTextWriter);
response.Output.Write(stringWriter.ToString());
response.Flush();
response.End();
}
请注意,属性的名称将是列标题。