使用OpenXML将Excel导入DataTable错误地获取数据

时间:2017-09-14 14:57:20

标签: c# .net openxml

我使用OpenXML将excel导入DataTable。合并我的第一行只显示标题信息,第二行是标题行。从excel文件填充DataTable时,我收到的数据不正确。 DataTable正确填充,直到存在任何空白单元格。当有空白单元格时,它们将移动到相邻的数据呈现单元格。为什么会这样?代码中有什么问题?你能帮我用OpenXML以正确的方式获取数据吗?导入数据后,这是我的Excel文件和数据表。

数据表 enter image description here

Import_Data.xlsx enter image description here

我的代码示例如下:

using DocumentFormat.OpenXml.Packaging;
using DocumentFormat.OpenXml.Spreadsheet;
using System.Collections.Generic;
using System.IO;
using System.Linq;

namespace OpenXMLDemo
{
    class Program
    {
        static void Main(string[] args)
        {
            Program p = new Program();

            var data = p.ExtractExcel(@"C:\TempData\");
        }

        public System.Data.DataTable ExtractExcel(string fullPath)
        {
            var excelFileToImport = Directory.GetFiles(fullPath, "Data_Import.xlsx", SearchOption.AllDirectories);

            //Create a new DataTable.
            System.Data.DataTable dt = new System.Data.DataTable();

            //Open the Excel file in Read Mode using OpenXML
            using (SpreadsheetDocument doc = SpreadsheetDocument.Open(excelFileToImport[0], false))
            {
                WorksheetPart titlesWorksheetPart = GetWorksheetPart(doc.WorkbookPart, "Titles");

                Worksheet titlesWorksheet = titlesWorksheetPart.Worksheet;

                //Fetch all the rows present in the worksheet
                IEnumerable<Row> rows = titlesWorksheet.GetFirstChild<SheetData>().Descendants<Row>();

                //Loop through the Worksheet rows
                foreach (Row row in rows)
                {
                    //Use the first row to add columns to DataTable.
                    if (row.RowIndex.Value == 1)
                    {

                    }
                    else if (row.RowIndex.Value == 2)
                    {
                        foreach (Cell cell in row.Descendants<Cell>())
                        {
                            dt.Columns.Add(GetValue(doc, cell));
                        }
                    }
                    else
                    {
                        //Add rows to DataTable.
                        dt.Rows.Add();
                        int i = 0;
                        foreach (Cell cell in row.Descendants<Cell>())
                        {
                            dt.Rows[dt.Rows.Count - 1][i] = GetValue(doc, cell);
                            i++;
                        }
                    }
                }
            }

            return dt;
        }
        private string GetValue(SpreadsheetDocument doc, Cell cell)
        {
            string value = cell.CellValue.InnerText;
            if (cell.DataType != null && cell.DataType.Value == CellValues.SharedString)
            {
                return doc.WorkbookPart.SharedStringTablePart.SharedStringTable.ChildElements.GetItem(int.Parse(value)).InnerText;
            }
            return value;
        }

        public WorksheetPart GetWorksheetPart(WorkbookPart workbookPart, string sheetName)
        {
            string relId = workbookPart.Workbook.Descendants<Sheet>().First(s => sheetName.Equals(s.Name)).Id;
            return (WorksheetPart)workbookPart.GetPartById(relId);
        }
    }
}

1 个答案:

答案 0 :(得分:0)

我在PaulF提供的链接的帮助下重写了部分代码。感谢!!!

using DocumentFormat.OpenXml.Packaging;
using DocumentFormat.OpenXml.Spreadsheet;
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text.RegularExpressions;

namespace OpenXMLDemo
{
    class Program
    {
        static void Main(string[] args)
        {
            Program p = new Program();

            System.Data.DataTable data = p.ExtractExcel(@"C:\TempData\");
        }

        public System.Data.DataTable ExtractExcel(string fullPath)
        {
            var excelFileToImport = Directory.GetFiles(fullPath, "Data_Import.xlsx", SearchOption.AllDirectories);

            //Create a new DataTable.
            System.Data.DataTable dt = new System.Data.DataTable();

            //Open the Excel file in Read Mode using OpenXML
            using (SpreadsheetDocument doc = SpreadsheetDocument.Open(excelFileToImport[0], false))
            {
                WorksheetPart titlesWorksheetPart = GetWorksheetPart(doc.WorkbookPart, "Titles");

                Worksheet titlesWorksheet = titlesWorksheetPart.Worksheet;

                //Fetch all the rows present in the worksheet
                IEnumerable<Row> rows = titlesWorksheet.GetFirstChild<SheetData>().Descendants<Row>();

                foreach (Cell cell in rows.ElementAt(1))
                {
                    dt.Columns.Add(GetCellValue(doc, cell)); // this will include 2nd row a header row
                }

                //Loop through the Worksheet rows
                foreach (Row row in rows)
                {
                    if (row.RowIndex.Value > 2) //this will exclude first two rows
                    {
                        System.Data.DataRow tempRow = dt.NewRow();
                        int columnIndex = 0;
                        foreach (Cell cell in row.Descendants<Cell>())
                        {
                            // Gets the column index of the cell with data
                            int cellColumnIndex = (int)GetColumnIndexFromName(GetColumnName(cell.CellReference));
                            cellColumnIndex--; //zero based index
                            if (columnIndex < cellColumnIndex)
                            {
                                do
                                {
                                    tempRow[columnIndex] = ""; //Insert blank data here;
                                    columnIndex++;
                                }
                                while (columnIndex < cellColumnIndex);
                            }
                            tempRow[columnIndex] = GetCellValue(doc, cell);

                            columnIndex++;
                        }
                        dt.Rows.Add(tempRow);
                    }
                }
            }
            return dt;
        }
        public static string GetCellValue(SpreadsheetDocument document, Cell cell)
        {
            SharedStringTablePart stringTablePart = document.WorkbookPart.SharedStringTablePart;
            if (cell.CellValue == null)
            {
                return "";
            }
            string value = cell.CellValue.InnerXml;
            if (cell.DataType != null && cell.DataType.Value == CellValues.SharedString)
            {
                return stringTablePart.SharedStringTable.ChildElements[Int32.Parse(value)].InnerText;
            }
            else
            {
                return value;
            }
        }
        /// <summary>
        /// Given a cell name, parses the specified cell to get the column name.
        /// </summary>
        /// <param name="cellReference">Address of the cell (ie. B2)</param>
        /// <returns>Column Name (ie. B)</returns>
        public static string GetColumnName(string cellReference)
        {
            // Create a regular expression to match the column name portion of the cell name.
            Regex regex = new Regex("[A-Za-z]+");
            Match match = regex.Match(cellReference);
            return match.Value;
        }
        /// <summary>
        /// Given just the column name (no row index), it will return the zero based column index.
        /// Note: This method will only handle columns with a length of up to two (ie. A to Z and AA to ZZ). 
        /// A length of three can be implemented when needed.
        /// </summary>
        /// <param name="columnName">Column Name (ie. A or AB)</param>
        /// <returns>Zero based index if the conversion was successful; otherwise null</returns>
        public static int? GetColumnIndexFromName(string columnName)
        {
            //return columnIndex;
            string name = columnName;
            int number = 0;
            int pow = 1;
            for (int i = name.Length - 1; i >= 0; i--)
            {
                number += (name[i] - 'A' + 1) * pow;
                pow *= 26;
            }
            return number;
        }
        public WorksheetPart GetWorksheetPart(WorkbookPart workbookPart, string sheetName)
        {
            string relId = workbookPart.Workbook.Descendants<Sheet>().First(s => sheetName.Equals(s.Name)).Id;
            return (WorksheetPart)workbookPart.GetPartById(relId);
        }
    }
}