我想"清洁" CSV文件:
行或列不是完全空的,例如: """""""""""& #34;"""""""""",& #34;""""&#34 ;, (以行的形式) 要么 """""""""""& #34;"""""""&#34 ;, (以行的形式) OR
"",
"",
"",
"",
"",
"",
"",
(以列形式)
这些行或列可以位于CSV文件中的任何位置。
到目前为止我所拥有的:
private void button1_Click(object sender, EventArgs e)
{
string sourceFile = @"XXXXX.xlsx";
string worksheetName = "Sample";
string targetFile = @"C:\Users\xxxx\xls_test\XXXX.csv";
// Creates the CSV file based on the XLS file
ExcelToCSVCoversion(sourceFile, worksheetName, targetFile);
// Manipulate the CSV: Clean empty rows
DeleteEmptyRoadFromCSV(targetFile);
}
static void ExcelToCSVCoversion(string sourceFile, string worksheetName,
string targetFile)
{
string connectionString = @"Provider =Microsoft.ACE.OLEDB.12.0;Data Source=" + sourceFile
+ @";Extended Properties=""Excel 12.0 Xml;HDR=YES""";
OleDbConnection connection = null;
StreamWriter writer = null;
OleDbCommand command = null;
OleDbDataAdapter dataAdapter = null;
try
{
// Represents an open connection to a data source.
connection = new OleDbConnection(connectionString);
connection.Open();
// Represents a SQL statement or stored procedure to execute
// against a data source.
command = new OleDbCommand("SELECT * FROM [" + worksheetName + "$]",
connection);
// Specifies how a command string is interpreted.
command.CommandType = CommandType.Text;
// Implements a TextWriter for writing characters to the output stream
// in a particular encoding.
writer = new StreamWriter(targetFile);
// Represents a set of data commands and a database connection that are
// used to fill the DataSet and update the data source.
dataAdapter = new OleDbDataAdapter(command);
DataTable dataTable = new DataTable();
dataAdapter.Fill(dataTable);
for (int row = 0; row < dataTable.Rows.Count; row++)
{
string rowString = "";
for (int column = 0; column < dataTable.Columns.Count; column++)
{
rowString += "\"" + dataTable.Rows[row][column].ToString() + "\",";
}
writer.WriteLine(rowString);
}
Console.WriteLine();
Console.WriteLine("The excel file " + sourceFile + " has been converted " +
"into " + targetFile + " (CSV format).");
Console.WriteLine();
}
catch (Exception exception)
{
Console.WriteLine(exception.ToString());
Console.ReadLine();
}
finally
{
if (connection.State == ConnectionState.Open)
{
connection.Close();
}
connection.Dispose();
command.Dispose();
dataAdapter.Dispose();
writer.Close();
writer.Dispose();
}
}
static void DeleteEmptyRoadFromCSV(string fileName)
{
//string nonEmptyLines = @"XXXX.csv";
var nonEmptyLines = File.ReadAllLines(fileName)
.Where(x => !x.Split(',')
.Take(2)
.Any(cell => string.IsNullOrWhiteSpace(cell))
// use `All` if you want to ignore only if both columns are empty.
).ToList();
File.WriteAllLines(fileName, nonEmptyLines);
}
最后,我尝试使用以下创意: Remove Blank rows from csv c#。但我的输出完全没有变化。
欢迎任何帮助!
谢谢。
答案 0 :(得分:1)
您可以在保存csv之前从表中删除列/行。 方法没有经过测试,但你应该得到这个概念。
static void ExcelToCSVCoversion(string sourceFile, string worksheetName,
string targetFile)
{
string connectionString = @"Provider =Microsoft.ACE.OLEDB.12.0;Data Source=" + sourceFile
+ @";Extended Properties=""Excel 12.0 Xml;HDR=YES""";
OleDbConnection connection = null;
StreamWriter writer = null;
OleDbCommand command = null;
OleDbDataAdapter dataAdapter = null;
try
{
// Represents an open connection to a data source.
connection = new OleDbConnection(connectionString);
connection.Open();
// Represents a SQL statement or stored procedure to execute
// against a data source.
command = new OleDbCommand("SELECT * FROM [" + worksheetName + "$]",
connection);
// Specifies how a command string is interpreted.
command.CommandType = CommandType.Text;
// Implements a TextWriter for writing characters to the output stream
// in a particular encoding.
writer = new StreamWriter(targetFile);
// Represents a set of data commands and a database connection that are
// used to fill the DataSet and update the data source.
dataAdapter = new OleDbDataAdapter(command);
DataTable dataTable = new DataTable();
dataAdapter.Fill(dataTable);
var emptyRows =
dataTable.Select()
.Where(
row =>
dataTable.Columns.Cast<DataColumn>()
.All(column => string.IsNullOrEmpty(row[column].ToString()))).ToArray();
Array.ForEach(emptyRows, x => x.Delete());
var emptyColumns =
dataTable.Columns.Cast<DataColumn>()
.Where(column => dataTable.Select().All(row => string.IsNullOrEmpty(row[column].ToString())))
.ToArray();
Array.ForEach(emptyColumns, column => dataTable.Columns.Remove(column));
dataTable.AcceptChanges();
for (int row = 0; row < dataTable.Rows.Count; row++)
{
string rowString = "";
for (int column = 0; column < dataTable.Columns.Count; column++)
{
rowString += "\"" + dataTable.Rows[row][column].ToString() + "\",";
}
writer.WriteLine(rowString);
}
Console.WriteLine();
Console.WriteLine("The excel file " + sourceFile + " has been converted " +
"into " + targetFile + " (CSV format).");
Console.WriteLine();
}
catch (Exception exception)
{
Console.WriteLine(exception.ToString());
Console.ReadLine();
}
finally
{
if (connection.State == ConnectionState.Open)
{
connection.Close();
}
connection.Dispose();
command.Dispose();
dataAdapter.Dispose();
writer.Close();
writer.Dispose();
}
}
答案 1 :(得分:0)
请检查以下查询是否正常。我正在获取所有行:
var nonEmptyLines = File.ReadAllLines(FileName)
.Where(x => !x.Split(',')
.Take(2)
.Any(cell => string.IsNullOrWhiteSpace(cell))
// use `All` if you want to ignore only if both columns are empty.
).ToList();
我认为您可以使用以下内容:
var nonEmptyLines = File.ReadAllLines(File).
SkipWhile(cell=>{var arr=cell.Split(',');if(string.IsNullOrWhiteSpace(cell)){
return true;
}
else
{
return false;
}
});