节点 - 使用合并的单元格解析excel文件

时间:2016-11-07 21:11:15

标签: node.js excel parsing

我必须解析各种excel文件,其中一些是彻底的破坏(我无法控制格式,因为它们是外部提供的)。其中一些还有多张纸。

对于包含数据行的标准文件,我已经成功使用了xlsx-to-json,但对于其他人我到目前为止还没有解决方案,这是一个常见的例子。

----------------------------------------------------------
|              |____normal___|____normal___|____normal___|
|    merged    |____normal___|____normal___|____normal___|
|              |____normal___|____normal___|____normal___|
----------------------------------------------------------

我不知道如何开始研究这件事情,请告知,我确定SOMEONE遇到了这个问题。

2 个答案:

答案 0 :(得分:0)

考虑使用js-xlsx,它有Worksheet Object

  

ws ['!merges']:与工作表中合并的单元格对应的范围对象数组。

答案 1 :(得分:0)

这仅在只有一个“数组属性”且数组中的对象必须具有至少两个属性时才有效。 对不起我的英语,我是台湾人。 需要: js:“ file:/// C:/Users/frank/Documents/code/excel-to-json/xlsx.full.min.js”

<input type="file" id="fileUpload" accept=".xls,.xlsx" />
<button id="btn" type="button">convert</button>
<script>
  // only supports one "Array Property" and its items must have aleast two property
  let selectedFile;
  document.getElementById("fileUpload").addEventListener("change", (e) => {
    selectedFile = e.target.files[0];
  });
  document.getElementById("btn").addEventListener("click", (e) => {
    if (selectedFile) {
      console.log("file selected!");
      const fileReader = new FileReader();

      fileReader.onload = (e) => {
        const data = e.target.result;

        const workbook = XLSX.read(data, {
          type: "binary",
        });
        workbook.SheetNames.forEach((currentSheet) => {
          let result = [];
          const sheet = workbook.Sheets[currentSheet];
          // convert to csv
          // example:
          // original:
          // "pid,color,qty\n
          // ps001,red,10\n
          // ps002,black,12
          // "
          // output:
          // [
          //   ["pid","color","qty"],
          //   ["ps001","red","10"],
          //   ["ps002","red","12"]
          // ]
          let csvArray = XLSX.utils
            .sheet_to_csv(sheet)
            .split("\n")
            .map((row) => {
              return row.split(",");
            });
          // store cols for later use
          let cols = {};
          // check if there is an array property
          let hasArray = false;
          // begin from row 0 
          // store the property name and index
          // if there are merged cells skip
          csvArray[0].forEach((cell, index) => {
            if (!cell) return;
            cols[cell] = { index };
          });
          // csvArray: [
          //   ["pid","color",""],
          //   ["","name","qty"]
          //   ...
          // ]
          // cols: {
          //   pid: {
          //     index: 0,
          //   },
          //   colors: {
          //     index: 1,
          //     props: {
          //       name: 1,
          //       qty: 2,
          //     },
          //   },
          // }
          sheet["!merges"].forEach((merge) => {
            let { s, e } = merge;
            // 如果是在第0行而且 开始的r(行) 等于结束的行
            // if the merged cell is on row 0 and the start col == end col
            // it means this is a array property
            if (s.r == 0 && s.r == e.r) {
              hasArray = true;
              // calculate the numbers of property
              let arrayObjPropsQtyLeft = e.c - s.c + 1;
              for (const key in cols) {
                // if the index == start col, then its array property
                if (cols[key].index == s.c) {
                  cols[key].props = {};

                  while (arrayObjPropsQtyLeft > 0) {
                    arrayObjPropsQtyLeft -= 1;

                    let position = e.c - arrayObjPropsQtyLeft;

                    cols[key].props[csvArray[1][position]] = {
                      col: position,
                    };
                  }
                }
              }
            }
          });

          if (hasArray) {
            sheet["!merges"].forEach((merge) => {
              let { s, e } = merge;
              // start from row 2
              if (s.r > 1) {

                let row = {};
                // for every col
                for (const key in cols) {
                  const props = cols[key].props;
                  // if the property has props, then its a array property
                  if (props) {
                    row[key] = [];
                    // caculate array length
                    let ArrayLength = e.r - s.r + 1;

                    while (ArrayLength > 0) {
                      ArrayLength -= 1;

                      let arrayObj = {};

                      for (const k in props) {
                        // obj[k] = csvArray["position in csvArray"]
                        arrayObj[k] =
                          csvArray[e.r - ArrayLength][props[k].col];
                      }

                      row[key].push(arrayObj);
                    }

                  } else {

                    row[key] = csvArray[s.r][s.c];
                  }
                }
                result.push(row);
              }
            });
          } else {
            csvArray.forEach((row, index) => {
              if (index >= 1) {

                let rowData = {};
                for (const key in cols) {

                  rowData[key] = csvArray[index][cols[key].index];
                }
                result.push(rowData);
              }
            });
          }

          const divEL = document.createElement("div");
          divEL.classList.add("json");
          document.body.appendChild(divEL);

          const h2El = document.createElement("h2");
          h2El.innerText = currentSheet;
          divEL.appendChild(h2El);

          const preEl = document.createElement("pre");
          preEl.innerHTML = JSON.stringify(result, null, 2);
          divEL.appendChild(preEl);

          console.log(result);
        });
      };
      fileReader.readAsBinaryString(selectedFile);
    }
  });
</script>