Javascript和正则表达式导入CSV文件

时间:2013-04-26 11:05:03

标签: javascript regex

以下是从CSV导入的一行原始文本:

10,"2013-04-17 16:29:36",out,"BTC sold: [tid:0450750450454505] 0.85985758 BTC at 69.88355 € (0.55% fee)",0.85985758,0

我正在尝试编辑的代码非常简单,来自[本教程页面] [1](*主要代码的副本位于本页底部):

主要的正则表达式代码部分是:

var objPattern = new RegExp(
  (
    // Delimiters.
    "(\\" + strDelimiter + "|\\r?\\n|\\r|^)" +

    // Quoted fields.
    "(?:\"([^\"]*(?:\"\"[^\"]*)*)\"|" +

    // Standard fields.
    "([^\"\\" + strDelimiter + "\\r\\n]*))"
  ),
  "gi"
);

我想要的是,在我开头复制的原始文本中,我想删除或忽略括号,括号()之间的内容,并使单词“at”为新列的分隔符。当我导入CSV时,新列的值将是“at”之后的值。

等效的正则表达式代码应为:     [(?)] |(?()) 和     (\蝙蝠\ b)中

我尝试了各种选项,例如在分隔符部分的regexp中添加(\ bat \ b),但它不起作用。

非常感谢你的帮助!

整个代码:

function onOpen() {
  var ss = SpreadsheetApp.getActiveSpreadsheet();
  var csvMenuEntries = [{name: "Load from CSV file", functionName: "importFromCSV"}];
  ss.addMenu("CSV", csvMenuEntries);
}

function search() {
  // Prompt the user for a search term
  var searchTerm = Browser.inputBox("Enter the string to search for:");

  // Get the active spreadsheet and the active sheet
  var ss = SpreadsheetApp.getActiveSpreadsheet();
  var sheet = ss.getActiveSheet();

  // Set up the spreadsheet to display the results
  var headers = [["File Name", "File Type", "URL"]];
  sheet.clear();
  sheet.getRange("A1:C1").setValues(headers);

  // Search the files in the user's Docs List for the search term
  var files = DocsList.find(searchTerm);

  // Loop through the results and display the file name, file type, and URL
  for (var i = 0; i < files.length; i++) {
    sheet.getRange(i+2, 1, 1, 1).setValue(files[i].getName());
    sheet.getRange(i+2, 2, 1, 1).setValue(files[i].getType());
    if (files[i].getType() == "document") {
      urlBase = "https://docs.google.com/Doc?docid=";
    }
    else if (files[i].getType() == "spreadsheet") {
      urlBase = "https://spreadsheets.google.com/ccc?key=";
    }
    else if (files[i].getType() == "presentation") {
      urlBase = "https://docs.google.com/present/view?id=";
    }
    else {
      urlBase = "https://docs.google.com/fileview?id=";
    }
    sheet.getRange(i+2, 3, 1, 1).setValue(urlBase + files[i].getId());
  }
}

function importFromCSV() {
  var fileName = "history_BTC.csv";


 var files = DocsList.getFiles();
  var csvFile = "";



  for (var i = 0; i < files.length; i++) {
    if (files[i].getName() == fileName) {
      csvFile = files[i].getContentAsString();

      break;
    }
  }

  var csvData = CSVToArray(csvFile, ",");
  var ss = SpreadsheetApp.getActiveSpreadsheet();
  var sheet = ss.getActiveSheet();
  for (var i = 0; i < csvData.length; i++) {
    sheet.getRange(i+1, 1, 1, csvData[i].length).setValues(new Array(csvData[i]));
  }
}

// This will parse a delimited string into an array of
// arrays. The default delimiter is the comma, but this
// can be overriden in the second argument.

function CSVToArray( strData, strDelimiter ){
  // Check to see if the delimiter is defined. If not,
  // then default to comma.
  strDelimiter = (strDelimiter || "," );


  // Create a regular expression to parse the CSV values.
  var objPattern = new RegExp(
    (
      // Delimiters.
      "(\\" + strDelimiter + "|\\r?\\n|\bat\b|\\r|^)" +

      // Quoted fields.
      "(?:\"([^\"]*(?:\"\"[^\"]*)*)\"|" +

      // Standard fields.
      "([^\"\\" + strDelimiter + "\\r\\n]*))"

    ),
    "gi"
  );


  // Create an array to hold our data. Give the array
  // a default empty first row.
  var arrData = [[]];

  // Create an array to hold our individual pattern
  // matching groups.
  var arrMatches = null;


  // Keep looping over the regular expression matches
  // until we can no longer find a match.
  while (arrMatches = objPattern.exec( strData )){

    // Get the delimiter that was found.
    var strMatchedDelimiter = arrMatches[ 1 ];

    // Check to see if the given delimiter has a length
    // (is not the start of string) and if it matches
    // field delimiter. If id does not, then we know
    // that this delimiter is a row delimiter.
    if (
      strMatchedDelimiter.length &&
      (strMatchedDelimiter != strDelimiter)
    ){

      // Since we have reached a new row of data,
      // add an empty row to our data array.
      arrData.push( [] );

    }


    // Now that we have our delimiter out of the way,
    // let's check to see which kind of value we
    // captured (quoted or unquoted).
    if (arrMatches[ 2 ]){

      // We found a quoted value. When we capture
      // this value, unescape any double quotes.
      var strMatchedValue = arrMatches[ 2 ].replace(
        new RegExp( "\"\"", "g" ),
        "\""
      );

    } else {

      // We found a non-quoted value.
      var strMatchedValue = arrMatches[ 3 ];

    }


    // Now that we have our value string, let's add
    // it to the data array.
    arrData[ arrData.length - 1 ].push( strMatchedValue );
  }

  // Return the parsed data.
  return( arrData );
}

1 个答案:

答案 0 :(得分:0)

首先,将“at”替换为“,”。不需要正则表达式。

然后这是一个简单的替换:

a=myString.replace(" at ", ",");
b=a.replace(" at ", ",");
c=b.replace(/(\[[^\]]+\]|\([^)]+\))/gi, "");