我在C#中使用服务器端的xslx Excel文件。在电子表格中,总共有15列(单元格)。在单元格行中,缺少某些值。所以第一行是我的标题将正确拥有15个单元格。但是我的数据行,一些单元格可能有空值,因此Open XML有一组“锯齿状”的单元格值。第1行将包含完整的15个单元格,第2行可能包含13个单元格,因为其中两个值为空。什么!如何正确映射此数据?它基本上将一切都移到左边,我的单元格值是错误的。我错过了什么?看起来他们正在Open XML术语中“崩溃”。
WorkbookPart workbookPart = spreadSheetDocument.WorkbookPart;
IEnumerable<Sheet> sheets = spreadSheetDocument.WorkbookPart.Workbook.GetFirstChild<Sheets>().Elements<Sheet>();
string relationshipId = sheets.First().Id.Value;
WorksheetPart worksheetPart = (WorksheetPart)spreadSheetDocument.WorkbookPart.GetPartById(relationshipId);
Worksheet workSheet = worksheetPart.Worksheet;
SheetData sheetData = workSheet.GetFirstChild<SheetData>();
IEnumerable<Row> rows = sheetData.Descendants<Row>();
澄清:这是提出这个问题的另一种方式。如果我想将内容放在Excel文件中并将其放入DataTable,该怎么办?我希望所有数据列都正确排列。我怎么能做到这一点?
这个问题比我在这里要好:reading Excel Open XML is ignoring blank cells
答案 0 :(得分:5)
据我所知,你正在连续迭代Cell并假设你读的第一个单元格在A列,第二列在B列中,依此类推?
我建议您(解析?)正则表达式
中的列位置/引用DocumentFormat.OpenXml.Spreadsheet.Cell currentcell
currentcell.CellReference
CellReference以“A1”表示法为您提供单元格引用。提取列部分(“A,B,CC等)
您必须为一行中的每个单元格执行此操作。然后,如果缺少列的单元格,只需填写占位符值。 Null还是DbNull呢?
我不知道是否有另一种方法可以找出细胞所属的列。
答案 1 :(得分:3)
您可以实现所需的一种方法是找出所有行中最大的列索引,然后填充所有空单元格值将为空白。这将使您的所有列都正确排列。
这是一个快速摘要,可以找出最大的列索引:
int? biggestColumnIndex = 0;
foreach (Row row in rows)
{
if (row.Descendants<Cell>().Any())
{
// Figure out the if this row has a bigger column index than the previous rows
int? columnIndex = GetColumnIndexFromName(((Cell)(row.LastChild)).CellReference);
biggestColumnIndex = columnIndex.HasValue && columnIndex > biggestColumnIndex ? columnIndex : biggestColumnIndex;
}
}
/// <summary>
/// Given just the column name (no row index), it will return the zero based column index.
/// Note: This method will only handle columns with a length of up to two (ie. A to Z and AA to ZZ).
/// A length of three can be implemented when needed.
/// </summary>
/// <param name="columnName">Column Name (ie. A or AB)</param>
/// <returns>Zero based index if the conversion was successful; otherwise null</returns>
public static int? GetColumnIndexFromName(string columnName)
{
int? columnIndex = null;
string[] colLetters = Regex.Split(columnName, "([A-Z]+)");
colLetters = colLetters.Where(s => !string.IsNullOrEmpty(s)).ToArray();
if (colLetters.Count() <= 2)
{
int index = 0;
foreach (string col in colLetters)
{
List<char> col1 = colLetters.ElementAt(index).ToCharArray().ToList();
int? indexValue = Letters.IndexOf(col1.ElementAt(index));
if (indexValue != -1)
{
// The first letter of a two digit column needs some extra calculations
if (index == 0 && colLetters.Count() == 2)
{
columnIndex = columnIndex == null ? (indexValue + 1) * 26 : columnIndex + ((indexValue + 1) * 26);
}
else
{
columnIndex = columnIndex == null ? indexValue : columnIndex + indexValue;
}
}
index++;
}
}
return columnIndex;
}
然后在获得最大列索引后调用InsetCellsForCellRange
方法,用空白单元格填充所有空单元格。然后读入您的数据,他们应该排队。 (所有辅助方法都低于InsetCellsForCellRange
方法)
/// <summary>
/// Inserts cells if required for a rectangular range of cells
/// </summary>
/// <param name="startCellReference">Upper left cell of the rectangle</param>
/// <param name="endCellReference">Lower right cell of the rectangle</param>
/// <param name="worksheetPart">Worksheet part to insert cells</param>
public static void InsertCellsForCellRange(string startCellReference, string endCellReference, WorksheetPart worksheetPart)
{
uint startRow = GetRowIndex(startCellReference);
uint endRow = GetRowIndex(endCellReference);
string startColumn = GetColumnName(startCellReference);
string endColumn = GetColumnName(endCellReference);
// Insert the cells row by row if necessary
for (uint currentRow = startRow; currentRow <= endRow; currentRow++)
{
string currentCell = startColumn + currentRow.ToString();
string endCell = IncrementCellReference(endColumn + currentRow.ToString(), CellReferencePartEnum.Column);
// Check to make sure all cells exist in the range; if not create them
while (!currentCell.Equals(endCell))
{
if (GetCell(worksheetPart, currentCell) == null)
{
InsertCell(GetColumnName(currentCell), GetRowIndex(currentCell), worksheetPart);
}
// Move the reference to the next cell in the range
currentCell = IncrementCellReference(currentCell, CellReferencePartEnum.Column);
}
}
}
/// <summary>
/// Given a cell name, parses the specified cell to get the row index.
/// </summary>
/// <param name="cellReference">Address of the cell (ie. B2)</param>
/// <returns>Row Index (ie. 2)</returns>
public static uint GetRowIndex(string cellReference)
{
// Create a regular expression to match the row index portion the cell name.
Regex regex = new Regex(@"\d+");
Match match = regex.Match(cellReference);
return uint.Parse(match.Value);
}
/// <summary>
/// Given a cell name, parses the specified cell to get the column name.
/// </summary>
/// <param name="cellReference">Address of the cell (ie. B2)</param>
/// <returns>Column Name (ie. B)</returns>
public static string GetColumnName(string cellReference)
{
// Create a regular expression to match the column name portion of the cell name.
Regex regex = new Regex("[A-Za-z]+");
Match match = regex.Match(cellReference);
return match.Value;
}
/// <summary>
/// Increments the reference of a given cell. This reference comes from the CellReference property
/// on a Cell.
/// </summary>
/// <param name="reference">reference string</param>
/// <param name="cellRefPart">indicates what is to be incremented</param>
/// <returns></returns>
public static string IncrementCellReference(string reference, CellReferencePartEnum cellRefPart)
{
string newReference = reference;
if (cellRefPart != CellReferencePartEnum.None && !String.IsNullOrEmpty(reference))
{
string[] parts = Regex.Split(reference, "([A-Z]+)");
if (cellRefPart == CellReferencePartEnum.Column || cellRefPart == CellReferencePartEnum.Both)
{
List<char> col = parts[1].ToCharArray().ToList();
bool needsIncrement = true;
int index = col.Count - 1;
do
{
// increment the last letter
col[index] = Letters[Letters.IndexOf(col[index]) + 1];
// if it is the last letter, then we need to roll it over to 'A'
if (col[index] == Letters[Letters.Count - 1])
{
col[index] = Letters[0];
}
else
{
needsIncrement = false;
}
} while (needsIncrement && --index >= 0);
// If true, then we need to add another letter to the mix. Initial value was something like "ZZ"
if (needsIncrement)
{
col.Add(Letters[0]);
}
parts[1] = new String(col.ToArray());
}
if (cellRefPart == CellReferencePartEnum.Row || cellRefPart == CellReferencePartEnum.Both)
{
// Increment the row number. A reference is invalid without this componenet, so we assume it will always be present.
parts[2] = (int.Parse(parts[2]) + 1).ToString();
}
newReference = parts[1] + parts[2];
}
return newReference;
}
/// <summary>
/// Returns a cell Object corresponding to a specifc address on the worksheet
/// </summary>
/// <param name="workSheetPart">WorkSheet to search for cell adress</param>
/// <param name="cellAddress">Cell Address (ie. B2)</param>
/// <returns>Cell Object</returns>
public static Cell GetCell(WorksheetPart workSheetPart, string cellAddress)
{
return workSheetPart.Worksheet.Descendants<Cell>()
.Where(c => cellAddress.Equals(c.CellReference))
.SingleOrDefault();
}
/// <summary>
/// Inserts a new cell at the specified colName and rowIndex. If a cell
/// already exists, then the existing cell is returned.
/// </summary>
/// <param name="colName">Column Name</param>
/// <param name="rowIndex">Row Index</param>
/// <param name="worksheetPart">Worksheet Part</param>
/// <returns>Inserted Cell</returns>
public static Cell InsertCell(string colName, uint rowIndex, WorksheetPart worksheetPart)
{
return InsertCell(colName, rowIndex, worksheetPart, null);
}
/// <summary>
/// Inserts a new cell at the specified colName and rowIndex. If a cell
/// already exists, then the existing cells are shifted to the right.
/// </summary>
/// <param name="colName">Column Name</param>
/// <param name="rowIndex">Row Index</param>
/// <param name="worksheetPart">Worksheet Part</param>
/// <param name="cell"></param>
/// <returns>Inserted Cell</returns>
public static Cell InsertCell(string colName, uint rowIndex, WorksheetPart worksheetPart, Cell insertCell)
{
Worksheet worksheet = worksheetPart.Worksheet;
SheetData sheetData = worksheet.GetFirstChild<SheetData>();
string insertReference = colName + rowIndex;
// If the worksheet does not contain a row with the specified row index, insert one.
Row row;
if (sheetData.Elements<Row>().Where(r => r.RowIndex == rowIndex).Count() != 0)
{
row = sheetData.Elements<Row>().Where(r => r.RowIndex == rowIndex).First();
}
else
{
row = new Row() { RowIndex = rowIndex };
sheetData.Append(row);
}
Cell retCell = row.Elements<Cell>().FirstOrDefault(c => c.CellReference.Value == colName + rowIndex);
// If retCell is not null and we are not inserting a new cell, then just skip everything and return the cell
if (retCell != null)
{
// NOTE: if conditions are not combined because we want to skip the parent 'else when the outside 'if' is true.
// if retCell is not null and we are inserting a new cell, then move all existing cells to the right.
if (insertCell != null)
{
// Get all the cells in the row with equal or higher column values than the one being inserted.
// Add the cell to be inserted into the temp list and re-index all of the cells.
List<Cell> cells = row.Descendants<Cell>().Where(c => String.Compare(c.CellReference.Value, insertReference) >= 0).ToList();
cells.Insert(0, insertCell);
string cellReference = insertReference;
foreach (Cell cell in cells)
{
// Update the references for the rows cells.
cell.CellReference = new StringValue(cellReference);
IncrementCellReference(cellReference, CellReferencePartEnum.Column);
}
// actually insert the new cell into the row
retCell = row.InsertBefore(insertCell, retCell); // at this point, retCell still points to the row that had the insertReference
}
}
// Else retCell is null, this means no cell exists at the specified location so we need to put a new cell in that space.
// If a cell was passed into this method, then it will be inserted. If not, a new one will be inserted.
else
{
// Cells must be in sequential order according to CellReference. Determine where to insert the new cell.
// Sequencial order can't be string comparison order, has to be Excel order ("A", "B", ... "AA", "BB", etc)
Cell refCell = null;
foreach (Cell cell in row.Elements<Cell>())
{
string cellColumn = Regex.Replace(cell.CellReference.Value, @"\d", "");
if (colName.Length <= cellColumn.Length && string.Compare(cell.CellReference.Value, insertReference, true) > 0)
{
refCell = cell;
break;
}
}
// Insert cell parameter is supplied, otherwise, create a new cell
retCell = insertCell ?? new Cell() { CellReference = insertReference };
row.InsertBefore(retCell, refCell);
}
return retCell;
}
//Other missing pieces
public enum CellReferencePartEnum
{
None,
Column,
Row,
Both
}
private static List<char> Letters = new List<char>() { 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', ' ' };