从电子表格中的单元格值中解析字符串和对象

时间:2018-02-21 02:05:45

标签: regex google-apps-script split google-sheets

我收到的电子表格包含令人讨厌的价值观,并为此付出了努力。所需数据位于A列的每个单元格中。单元格的值为{"p0":70,"u3":71,"s7110":40},t45,{"t78":60,"s3310":15},p37,p36,{"p29":44,"s8110":95},p85,p14,{"s2710":41},u47。这些值的数量大约是1000.我必须解析这些值。我需要来自这些值的字符串和对象。我可以忽略解析值的顺序。我不能手动解析。所以我决定使用脚本。

我尝试使用“split”解析。

var sheet = SpreadsheetApp.getActiveSheet();
var values = sheet.getRange("A1:A" + sheet.getLastRow()).getValues();
var result = [];
for (var i = 0; i < values.length; i++)
{
  result.push(values[i][0].split(","));
}

我将t45,p37,p36,p85,p14,u47作为字符串。但“分裂”也会分裂所有对象。例如,{"p0":70,"u3":71,"s7110":40}分为{"p0":70"u3":71"s7110":40}。有办法解决这个问题吗?

样本值是这样的。每行都在单元格A1,A2,A3,A4,A5中。

{"p0":70,"u3":71,"s7110":40},t45,{"t78":60,"s3310":15},p37,p36,{"p29":44,"s8110":95},p85,p14,{"s2710":41},u47
s6610,{"t25":70,"u8":43,"p35":86},u85,u74,{"s7710":83},{"p70":70,"u67":84},{"u71":43,"s1210":73},{"u45":84,"s710":15},{"u14":79,"p22":45},p31
u73,u12,{"t51":98,"u57":96},u31,p41,s1110,s6610,p55,{"t57":71,"s7510":83,"u62":17},u73
t50,{"t83":22,"p18":76},{"p47":12,"s8710":18,"u11":35},{"t14":74,"u72":51},{"p74":21,"t77":77},{"u62":84,"s3010":11},p81,u36,p67,{"t79":12,"u2":70,"s6010":98}
{"u54":51,"t31":31},t56,s4110,{"s3110":84,"t25":92,"p80":19},s3210,{"p65":54,"s8510":45},{"t73":78,"s6210":11},{"s2110":98,"p11":16},{"p61":55,"t88":75},p38

非常感谢你的时间。对于我不成熟的问题,我很抱歉。

2 个答案:

答案 0 :(得分:1)

B1:

=ARRAYFORMULA(REGEXEXTRACT(A1:A5&",",REGEXREPLACE(REGEXREPLACE(A1:A5&",","{.*?}","($0)"),"([A-Za-z]\d+),","($1),")))

除了逗号,()之外,我们将所有对象和字符串括起来,然后在以后提取它们。

编辑:更简单的锚点:要分割的,逗号后面没有"

=ARRAYFORMULA(SUBSTITUTE(SPLIT(SUBSTITUTE(A1:A5,","&CHAR(34),""),","),"",","&CHAR(34)))


=ARRAYFORMULA(split(REGEXREPLACE(A1:A5,"(,)([^"&CHAR(34)&"])","$2"),""))

答案 1 :(得分:1)

您可以使用RegExp将字符替换为可以在不影响任何其他内容的情况下找到的唯一字符。

function myFunction() {
  var L,newArray,thisElement;

  var myStrng = '{"p0":70,"u3":71,"s7110":40},t45,{"t78":60,"s3310":15},p37,p36,{"p29":44,"s8110":95},p85,p14,{"s2710":41},u47 \
s6610,{"t25":70,"u8":43,"p35":86},u85,u74,{"s7710":83},{"p70":70,"u67":84},{"u71":43,"s1210":73},{"u45":84,"s710":15},{"u14":79,"p22":45},p31 \
u73,u12,{"t51":98,"u57":96},u31,p41,s1110,s6610,p55,{"t57":71,"s7510":83,"u62":17},u73 \
t50,{"t83":22,"p18":76},{"p47":12,"s8710":18,"u11":35},{"t14":74,"u72":51},{"p74":21,"t77":77},{"u62":84,"s3010":11},p81,u36,p67,{"t79":12,"u2":70,"s6010":98} \
{"u54":51,"t31":31},t56,s4110,{"s3110":84,"t25":92,"p80":19},s3210,{"p65":54,"s8510":45},{"t73":78,"s6210":11},{"s2110":98,"p11":16},{"p61":55,"t88":75},p38';

  var re = new RegExp("\},","g");
  var parsedObj = myStrng.replace(re,"}zq^");//Replace all }, characters with }zq^

  //Logger.log(parsedObj)

  parsedObj = parsedObj.replace(/,\{/g,"zq^{");//Replace all ,{ characters with zq^{

  //Logger.log(parsedObj)
  parsedObj = parsedObj.replace(/\}\{/g,"}zq^{");//Replace all back to back brackets
  parsedObj = parsedObj.replace(/\} \{/g,"}zq^{");//Replace all back to back brackets with a space between

  parsedObj = parsedObj.split("zq^");//split on zq^

  L = parsedObj.length;

  newArray = [];

  for (var i=0;i<L;i++) {
    thisElement = parsedObj[i];
    //Logger.log('thisElement: ' + thisElement)

    if (thisElement.indexOf("{") !== -1) {
      newArray.push(thisElement);
      continue;
    }

    if (thisElement.indexOf(",") !== -1) {
      thisElement = thisElement.split(",");

      for (var j =0;j<thisElement.length;j++) {
        newArray.push(thisElement[j]);
      }
      continue;
    }

    if (thisElement.indexOf(" ") !== -1) {
      thisElement = thisElement.split(" ");

      for (var j =0;j<thisElement.length;j++) {
        newArray.push(thisElement[j]);
      }
      continue;
    }

    newArray.push(thisElement);
  }

  L = newArray.length;

  for (var i=0;i<L;i++) {
    Logger.log(newArray[i])

  }
}