如何使用NPOI获得非参差不齐的右栏

时间:2016-09-16 18:02:23

标签: c# excel csv xls npoi

我有一个excel.xls)文件需要解析为.csv文件。我正在为NPOI使用c#库的最新稳定版本。问题是我正在变得粗糙CSV而不是规范行大小。

What the data in excel file looks like

输出csv文件是:

  

"姓""中间名""名字""普农"

     

"约翰"" L"" Doe的"" 555-555-5555"

     

"小""丁""蝙蝠"

     

"罗杰"" d""兔"" 123-456-7890"

我想要它发生的是在第二个数据行的末尾添加一个额外的分隔符(在" Bat"之后),如下所示:

  

"姓""中间名""名字""普农"

     

"约翰"" L"" Doe的"" 555-555-5555"

     

"小""丁""蝙蝠",

     

"罗杰"" d""兔"" 123-456-7890"

这是我的代码:

    public override bool ParseFile()
    {
        FileStream iFile = new FileStream(InputFileName, FileMode.Open);
        HSSFWorkbook wb = new HSSFWorkbook(iFile);
        ExcelExtractor extractor = new ExcelExtractor(wb);
        extractor.IncludeBlankCells = true;
        bool result = true;

        if (AllWorksheets)
        {
            for (int i = 0; i < wb.NumberOfSheets; i++)
                result = result && ParseWorksheet(wb, i);
        }
        else
        {
            result = ParseWorksheet(wb, 0);
        }
        return result;
    }
    protected char c = '"';
    public static string FormatValue(string s, bool AddQuotes, char quoteChar)
    {
        if (AddQuotes)
        {
            return quoteChar + s + quoteChar;
        }
        return s;
    }
    private bool ParseWorksheet(HSSFWorkbook wb, int SheetIndex)
    {

        bool result = true;
        HSSFSheet sheet = (HSSFSheet)wb.GetSheetAt(SheetIndex);


        if (sheet.FirstRowNum == sheet.LastRowNum && sheet.LastRowNum == 0) return result;

        System.IO.StreamWriter sw = new StreamWriter(OutputFileName, true);

        for (int i = sheet.FirstRowNum; i <= sheet.LastRowNum; i++)
        {
            string OutputRow = String.Empty;
            HSSFRow row = (HSSFRow)sheet.GetRow(i);
            int Column = 0;
            int MaxCol = 0;
            int temp = 0;

            for (int j = 0; j < row.LastCellNum; j++)
            {
                temp = row.LastCellNum;
                if (temp > MaxCol)
                {
                    MaxCol = temp;
                }
            }
            for (int j = 0; j < MaxCol; j++)
            {
                if (j == row.Cells[Column].ColumnIndex)
                {
                    switch (row.Cells[Column].CellType)
                    {
                        case NPOI.SS.UserModel.CellType.Boolean:
                            OutputRow += FormatValue(row.Cells[Column].BooleanCellValue.ToString(), AddQuotes, c) + Delimiter.ToString();
                            break;
                        case NPOI.SS.UserModel.CellType.Formula:
                            OutputRow += FormatValue(row.Cells[Column].CachedFormulaResultType.ToString(), AddQuotes, c) + Delimiter.ToString();
                            break;
                        case NPOI.SS.UserModel.CellType.Numeric:
                            OutputRow += FormatValue((NPOI.SS.UserModel.DateUtil.IsCellDateFormatted(row.Cells[Column]) ? row.Cells[Column].DateCellValue.ToShortDateString() : row.Cells[Column].NumericCellValue.ToString()), AddQuotes, c) + Delimiter.ToString();
                            break;
                        case NPOI.SS.UserModel.CellType.Blank:
                            OutputRow += Delimiter.ToString();
                            break;
                        case NPOI.SS.UserModel.CellType.String:
                            OutputRow += FormatValue(row.Cells[Column].StringCellValue.ToString().Replace('\n', ' ').TrimEnd(), AddQuotes, c) + Delimiter.ToString();//replace the new line character to space due to formatting issue.
                            break;
                        default:
                            result = false;
                            break;
                    }
                    Column++;
                }
                else
                {
                    OutputRow += Delimiter.ToString();
                }

            }

            OutputRow = OutputRow.Remove(OutputRow.Length - 1);
            sw.WriteLine(OutputRow);
        }


        sw.Flush();
        sw.Close();

        return result;
    }`

我们非常感谢任何建议。

1 个答案:

答案 0 :(得分:2)

这里有一些问题导致了这个问题。

首先,您要重新计算每一行的MaxCol。如果您想要一个非参差不齐的右边缘,那么您需要先找到MaxCol 所有行,然后然后生成输出。

其次,您正在使用row.Cells[]尝试获取该行的特定单元格。 Cells[]忽略空值。因此,如果您碰巧在行中某处有一个空白单元格,那么所有剩余的值都将向左移动,并且数组的长度将小于MaxCol。如果您尝试在至少有一个空值的行上访问row.Cells[MaxCol - 1],则会导致异常 此问题的解决方案是使用row.GetCell(index)方法。此方法返回列index(从0开始)的单元格,如果该单元格为空,则返回null。使用起来要简单得多,并且允许您消除代码中的特殊逻辑,该逻辑根据您的循环索引ColumnIndex检查当前单元格的j,以确保您获得的单元格是真的在你期望的专栏中。

作为一个额外的建议,我建议在内循环内只检索一次当前单元格并将其分配给变量,而不是多次重新检索它。这将使您的代码更高效,更易于阅读。

以下是ParseWorksheet方法的修订代码,其中包含以上所有更改:

private bool ParseWorksheet(HSSFWorkbook wb, int SheetIndex)
{
    bool result = true;
    HSSFSheet sheet = (HSSFSheet)wb.GetSheetAt(SheetIndex);

    if (sheet.FirstRowNum == sheet.LastRowNum && sheet.LastRowNum == 0) return result;

    StreamWriter sw = new StreamWriter(OutputFileName, true);

    int MaxCol = 0;
    for (int i = sheet.FirstRowNum; i <= sheet.LastRowNum; i++)
    {
        HSSFRow row = (HSSFRow)sheet.GetRow(i);
        MaxCol = Math.Max(MaxCol, row.LastCellNum);
    }

    for (int i = sheet.FirstRowNum; i <= sheet.LastRowNum; i++)
    {
        string OutputRow = String.Empty;
        HSSFRow row = (HSSFRow)sheet.GetRow(i);

        for (int j = 0; j < MaxCol; j++)
        {
            HSSFCell cell = (HSSFCell)row.GetCell(j);
            if (cell != null)
            {
                switch (cell.CellType)
                {
                    case NPOI.SS.UserModel.CellType.Boolean:
                        OutputRow += FormatValue(cell.BooleanCellValue.ToString(), AddQuotes, c) + Delimiter;
                        break;
                    case NPOI.SS.UserModel.CellType.Formula:
                        OutputRow += FormatValue(cell.CachedFormulaResultType.ToString(), AddQuotes, c) + Delimiter;
                        break;
                    case NPOI.SS.UserModel.CellType.Numeric:
                        OutputRow += FormatValue((NPOI.SS.UserModel.DateUtil.IsCellDateFormatted(cell) ? cell.DateCellValue.ToShortDateString() : cell.NumericCellValue.ToString()), AddQuotes, c) + Delimiter;
                        break;
                    case NPOI.SS.UserModel.CellType.Blank:
                        OutputRow += Delimiter;
                        break;
                    case NPOI.SS.UserModel.CellType.String:
                        OutputRow += FormatValue(cell.StringCellValue.ToString().Replace('\n', ' ').TrimEnd(), AddQuotes, c) + Delimiter; //replace the new line character to space due to formatting issue.
                        break;
                    default:
                        result = false;
                        break;
                }
            }
            else
            {
                OutputRow += Delimiter;
            }
        }

        OutputRow = OutputRow.Remove(OutputRow.Length - 1);
        sw.WriteLine(OutputRow);
    }

    sw.Flush();
    sw.Close();

    return result;
}