我使用ExcelDataReader来读取excel文件,但正在处理的文件有16k列,每行有1200条记录。其中只有52列填充,其他列为空,所以我想删除带有行的空列。
尝试:
var dataset = reader.AsDataSet(new ExcelDataSetConfiguration() {
ConfigureDataTable = _ => new ExcelDataTableConfiguration() {
FilterRow = rowReader => rowReader.Depth != 0
}
});
帮助将不胜感激
答案 0 :(得分:3)
AFAIK,没有任何简单的方法可以过滤空的列或行,我的最佳尝试是:
// use ExcelDataTableConfiguration.FilterRow to filter empty rows
FilterRow = rowReader =>
{
var hasData = false;
for (var i = 0; i < rowReader.FieldCount; i++)
{
if (rowReader[i] == null || string.IsNullOrEmpty(rowReader[i].ToString()))
{
continue;
}
hasData = true;
break;
}
return hasData;
},
// use ExcelDataTableConfiguration.FilterColumn to filter empty columns
FilterColumn = (rowReader, colIndex) =>
{
var hasData = false;
rowReader.Reset();
// this will skip first row as it is name of column
rowReader.Read();
while (rowReader.Read())
{
if (rowReader[colIndex] == null ||
string.IsNullOrEmpty(rowReader[colIndex].ToString()))
{
continue;
}
hasData = true;
break;
}
// below codes do a trick!
rowReader.Reset();
rowReader.Read();
return hasData;
}
在IExcelDataReader
或FilterRow
中使用FilterColumn
自变量是邪恶的!
我认为在主要reader
中,每个rowReader
都引用了一个对象!因此,您应该始终了解如何使用它们,如您所见,我添加了一个技巧,使读者在使用后可以随时使用。 -HTH;)
答案 1 :(得分:0)
借助ShA.t's answer的一点启发,我能够使标题行和列准确地读取它们在excel文档中的起始位置。只要您知道行从何处开始,就只需要过滤空列。这是示例代码:
public class TrimmedTableSample
{
#region Properties
int HeaderRowIndex { get; set; }
#endregion
#region Methods
public void Read(string documentPath)
{
using (var stream = File.Open(documentPath, FileMode.Open, FileAccess.Read))
using (var reader = ExcelReaderFactory.CreateReader(stream))
{
var dataSet = reader.AsDataSet(new ExcelDataSetConfiguration()
{
UseColumnDataType = true,
ConfigureDataTable = (tableReader) => new ExcelDataTableConfiguration()
{
EmptyColumnNamePrefix = "Column ",
UseHeaderRow = true,
ReadHeaderRow = (reader) =>
{
bool empty = true;
HeaderRowIndex = 0;
while (empty)
{
for (var i = 0; i < reader.FieldCount && empty; i++)
empty = string.IsNullOrWhiteSpace(reader.GetString(i));
if (empty)
{
empty = reader.Read(); // Only continue if more content is available
HeaderRowIndex++; // Keep track of the first row position.
}
}
},
FilterColumn = (reader, index) =>
{
bool empty = false;
string sheet = reader.Name;
// Start reading the table from the beginning
reader.Reset();
// EDIT: Head over the our current excel sheet
while (reader.Name != sheet)
if (!reader.NextResult())
break;
// Head to the first row with content
int rowIndex = 0;
while (rowIndex < HeaderRowIndex)
{
reader.Read();
rowIndex++;
}
while (reader.Read())
{
// Decide if the current column is empty
if (reader[index] == null || string.IsNullOrEmpty(reader[index].ToString()))
continue;
empty = true;
break;
}
// Start over again (This allows the reader to automatically read the rest of the content itself)
reader.Reset();
// EDIT: Head over the our current excel sheet
while (reader.Name != sheet)
if (!reader.NextResult())
break;
reader.Read();
// Head over to the first row with content
rowIndex = 0;
while (rowIndex < HeaderRowIndex)
{
reader.Read();
rowIndex++;
}
// Return info on whether this column should be ignored or not.
return empty;
}
}
});
}
}
#endregion
}
祝你好运!