作为输入,我有一组excel文件,里面有几个工作表。我需要为每个工作表导出一个csv文件。下面是我的代码,但它很慢。它建立在这个previous post中提出的解决方案的基础之上。请考虑我必须在相当大的.xlsx文件(大约300Mb)上运行它。
问题:有什么办法可以改善这个吗?
void Main()
{
string folder = @"\\PATH_TO_FOLDER\";
var files = Directory.GetFiles(folder, "*.xlsx", SearchOption.TopDirectoryOnly);
foreach (string file in files)
{
ConvertToCsv(file, Directory.GetParent(file) + @"\\output\");
}
}
public static void ConvertToCsv(string file, string targetFolder)
{
FileInfo finfo = new FileInfo(file);
ExcelPackage package = new ExcelPackage(finfo);
// if targetFolder doesn't exist, create it
if (!Directory.Exists(targetFolder)) {
Directory.CreateDirectory(targetFolder);
}
var worksheets = package.Workbook.Worksheets;
int sheetcount = 0;
foreach (ExcelWorksheet worksheet in worksheets)
{
sheetcount++;
var maxColumnNumber = worksheet.Dimension.End.Column;
var currentRow = new List<string>(maxColumnNumber);
var totalRowCount = worksheet.Dimension.End.Row+1;
var currentRowNum = 1;
//No need for a memory buffer, writing directly to a file
//var memory = new MemoryStream();
string file_name = targetFolder + Path.GetFileNameWithoutExtension(file) + "_" + sheetcount + ".csv";
using (var writer = new StreamWriter(file_name, false, Encoding.UTF8))
{
//the rest of the code remains the same
for (int i = 1; i < totalRowCount; i++)
{
i.Dump();
// populate line with semi columns separators
string line = "";
for (int j = 1; j < worksheet.Dimension.End.Column+1; j++)
{
if (worksheet.Cells[i, j].Value != null)
{
string cell = worksheet.Cells[i, j].Value.ToString() + ";";
line += cell;
}
}
// write line
writer.WriteLine(line);
}
}
}
}