根据内部的前两个数字对字符串中的行重新排序

时间:2015-12-27 03:09:57

标签: javascript regex sorting replace

我从一个不相关的源中获取了一些字符串data,其中包含(以及其他文本)遵循类似于^INSERT INTO \w VALUES (\d, .*$的模式的行。我想要做的是根据第一行(如果第二个是数字,然后是第二个)列的值对这些行进行排序。

到目前为止,我所使用的代码适用于第一个值有严格增量的行,但是当值重复时,逻辑中断并输出undefined。我还想知道如果在第一个数字重复的情况下我可以根据第二个数字进行排序。

对多个属性进行排序本身对我来说不是问题,问题在于我的对象设置方式(由于每个ID使用1个对象密钥,因此不允许重复ID)。 / p>

var data = 
"--\n\
-- PostgreSQL database dump\n\
--\n\
\n\
SET statement_timeout = 0;\n\
SET lock_timeout = 0;\n\
SET client_encoding = 'UTF8';\n\
SET standard_conforming_strings = on;\n\
SET check_function_bodies = false;\n\
SET client_min_messages = warning;\n\
\n\
-- Working fine\n\
INSERT INTO c VALUES (3, 'c', 45);\n\
INSERT INTO c VALUES (1, 'a', 11);\n\
INSERT INTO c VALUES (5, 'e', 77);\n\
INSERT INTO c VALUES (4, 'd', 76);\n\
INSERT INTO c VALUES (2, 'b', 33);\n\
\n\
--\n\
-- Name: a; Type: TABLE; Schema: public\n\
--\n\
CREATE TABLE a (\n\
    first integer NOT NULL,\n\
    second integer NOT NULL\n\
);\n\
\n\
--\n\
-- Name: a_first_second; Type: CONSTRAINT; Schema: public\n\
--\n\
ALTER TABLE ONLY a\n\
    ADD CONSTRAINT a_first_second PRIMARY KEY (first, second);\n\
\n\
-- Breaks\n\
INSERT INTO a VALUES (1, 1);\n\
INSERT INTO a VALUES (2, 5);\n\
INSERT INTO a VALUES (4, 2);\n\
INSERT INTO a VALUES (5, 6);\n\
INSERT INTO a VALUES (2, 4);\n\
INSERT INTO a VALUES (3, 7);\n\
INSERT INTO a VALUES (4, 6);\n\
INSERT INTO a VALUES (2, 7);\n\
INSERT INTO a VALUES (6, 9);\n\
\n\
-- All good here\n\
INSERT INTO b VALUES (2, 'b', 'Description');\n\
INSERT INTO b VALUES (3, 'c', 'Description\n\
2nd line of description');\n\
INSERT INTO b VALUES (1, 'a', 'Description');\n\
INSERT INTO b VALUES (4, 'd', 'Description\n\
2nd line of description\n\
3rd line of description');\n\
INSERT INTO b VALUES (5, 'e', 'Description');";
/*/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////*/document.body.innerHTML='<code><pre></pre></code>';console = {log:function(input){document.getElementsByTagName('pre')[0].innerHTML+=input+'\n'}};
var groups = {},
    test = /INSERT INTO "?([a-z]+)"?\s*VALUES\s*\((\d+),[\s\S]+?;/g;
data.replace(test,function(row,group,field){
  if (typeof groups[group] !== 'object')
    groups[group] = {};
  groups[group][field] = row;
  return row;
});
var sortedGroupKeys = {},
    groupStep = {};
for (var j = 0, k = Object.keys(groups), l = k.length; j<l; j++){
  var group = k[j];
  sortedGroupKeys[group] = Object.keys(groups[group]).sort(function(a,b){
      return parseInt(a, 10) - parseInt(b, 10);
  });
  groupStep[group] = 0;
}
data = data.replace(test,function(row,group){
  var nextSortedKeyIndex = groupStep[group]++,
      nextSortedKey = sortedGroupKeys[group][nextSortedKeyIndex];

  return groups[group][nextSortedKey];
});
console.log(data);

2 个答案:

答案 0 :(得分:1)

这里的基本概要是:

  • 拆分并迭代表插入块
    • 拆分为&#34;插入命令行&#34;
    • 对行进行排序
    • 加入行
  • 加入块

&#13;
&#13;
var data = 
"-- Working fine\n\
INSERT INTO c VALUES (3, 'c', 45);\n\
INSERT INTO c VALUES (1, 'a', 11);\n\
INSERT INTO c VALUES (5, 'e', 77);\n\
INSERT INTO c VALUES (4, 'd', 76);\n\
INSERT INTO c VALUES (2, 'b', 33);\n\
\n\
-- Breaks (but not really broken anymore)\n\
INSERT INTO a VALUES (1, 1);\n\
INSERT INTO a VALUES (2, 5);\n\
INSERT INTO a VALUES (6, 11);\n\
INSERT INTO a VALUES (6, 20);\n\
INSERT INTO a VALUES (4, 2);\n\
INSERT INTO a VALUES (5, 6);\n\
INSERT INTO a VALUES (2, 4);\n\
INSERT INTO a VALUES (3, 7);\n\
INSERT INTO a VALUES (4, 6);\n\
INSERT INTO a VALUES (2, 7);\n\
INSERT INTO a VALUES (6, 9);\n\
INSERT INTO a VALUES (6, 10);\n\
\n\
-- All good here\n\
INSERT INTO b VALUES (2, 'b', 'Description');\n\
INSERT INTO b VALUES (3, 'c', 'Description\n\
2nd line of description');\n\
INSERT INTO b VALUES (1, 'a', 'Description');\n\
INSERT INTO b VALUES (4, 'd', 'Description\n\
2nd line of description\n\
3rd line of description');\n\
INSERT INTO b VALUES (5, 'e', 'Description');";
/*/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////*/document.body.innerHTML='<code><pre></pre></code>';console = {log:function(input){document.getElementsByTagName('pre')[0].innerHTML+=input+'\n'}};

var blockRegex = /((?:\bINSERT\s+INTO\s+[^\s]+\s+VALUES\s*\((?:[^"')]+|"[^"\\]*(?:\\.[^"\\]*)*"|'[^'\\]*(?:\\.[^'\\]*)*')*\);[\r\n\\]*)+)/i,
    rowsRegex = /(?:\bINSERT\s+INTO\s+[^\s]+\s+VALUES\s*\((?:[^"')]+|"[^"\\]*(?:\\.[^"\\]*)*"|'[^'\\]*(?:\\.[^'\\]*)*')*\);[\r\n\\]*)/ig,
    result = data.split(blockRegex),
    resultRows;

for (var i = 0; i < result.length; i++) {
 resultRows = result[i].match(rowsRegex);
 if (resultRows) {
   resultRows.alphanumSort();
   result[i] = resultRows.join('');
 }
}
 data = result.join('');

console.log(data);
&#13;
<script src="http://www.davekoelle.com/files/alphanum.js"></script>
&#13;
&#13;
&#13;

注意

这使用了自然排序功能:http://www.davekoelle.com/alphanum.html

答案 1 :(得分:0)

在@ Shaunt的建议的基础上,我更改了脚本以使用基于每个表存储的完整语句数组,然后我在自定义排序函数中单独解析。

var parseRow = function(r){
      var match = r.match(/VALUES \((\d+)(?:, (\d+|NULL))?[, )]/);
      if (!match)
        return [];
      return [match[1], match[2]];
    }, 
    test = /INSERT INTO "?([a-z]+)"?\s*VALUES\s*\((\d+),[\s\S]+?;/g,
    Tables = {},
    TableCounters = {};

data.replace(test,function(row,table){
  if (typeof Tables[table] !== 'object')
    Tables[table] = [];
  Tables[table].push(row);
  TableCounters[table] = 0;
  return row;
});
for (var j = 0, k = Object.keys(Tables), l = k.length; j<l; j++){
  var table = k[j];
  Tables[table].sort(function(a,b){
    a = parseRow(a);
    b = parseRow(b);

    var ix = 0;
    if (a[0] === b[0] && a[1] !== 'NULL' && b[1] !== 'NULL')
      ix++;

    a[ix] = parseInt(a[ix], 10);
    b[ix] = parseInt(b[ix], 10);

    return a[ix] > b[ix] ? 1 : (a[ix] < b[ix] ? -1 : 0);
  })
}
data = data.replace(test,function(row,table){
  var nextRowIndex = TableCounters[table]++;

  return Tables[table][nextRowIndex];
});

工作示例:

&#13;
&#13;
var data = 
"--\n\
-- PostgreSQL database dump\n\
--\n\
\n\
SET statement_timeout = 0;\n\
SET lock_timeout = 0;\n\
SET client_encoding = 'UTF8';\n\
SET standard_conforming_strings = on;\n\
SET check_function_bodies = false;\n\
SET client_min_messages = warning;\n\
\n\
-- Working fine\n\
INSERT INTO c VALUES (3, 'c', 45);\n\
INSERT INTO c VALUES (1, 'a', 11);\n\
INSERT INTO c VALUES (5, 'e', 77);\n\
INSERT INTO c VALUES (4, 'd', 76);\n\
INSERT INTO c VALUES (2, 'b', 33);\n\
\n\
--\n\
-- Name: a; Type: TABLE; Schema: public\n\
--\n\
CREATE TABLE a (\n\
    first integer NOT NULL,\n\
    second integer NOT NULL\n\
);\n\
\n\
--\n\
-- Name: a_first_second; Type: CONSTRAINT; Schema: public\n\
--\n\
ALTER TABLE ONLY a\n\
    ADD CONSTRAINT a_first_second PRIMARY KEY (first, second);\n\
\n\
-- Breaks\n\
INSERT INTO a VALUES (1, 1);\n\
INSERT INTO a VALUES (2, 5);\n\
INSERT INTO a VALUES (4, 2);\n\
INSERT INTO a VALUES (5, 6);\n\
INSERT INTO a VALUES (2, 4);\n\
INSERT INTO a VALUES (3, 7);\n\
INSERT INTO a VALUES (4, 6);\n\
INSERT INTO a VALUES (2, 7);\n\
INSERT INTO a VALUES (6, 9);\n\
\n\
-- All good here\n\
INSERT INTO b VALUES (2, 'b', 'Description');\n\
INSERT INTO b VALUES (3, 'c', 'Description\n\
2nd line of description');\n\
INSERT INTO b VALUES (1, 'a', 'Description');\n\
INSERT INTO b VALUES (4, 'd', 'Description\n\
2nd line of description\n\
3rd line of description');\n\
INSERT INTO b VALUES (5, 'e', 'Description');";
/*//////////////////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////*/document.body.innerHTML='<code><pre></pre></code>';console = {log:function(input){document.getElementsByTagName('pre')[0].innerHTML+=input+'\n'}};
var parseRow = function(r){
      var match = r.match(/VALUES \((\d+)(?:, (\d+|NULL))?[, )]/);
      if (!match)
        return [];
      return [match[1], match[2]];
    }, 
    test = /INSERT INTO "?([a-z]+)"?\s*VALUES\s*\((\d+),[\s\S]+?;/g,
    Tables = {},
    TableCounters = {};

data.replace(test,function(row,table){
  if (typeof Tables[table] !== 'object')
    Tables[table] = [];
  Tables[table].push(row);
  TableCounters[table] = 0;
  return row;
});
for (var j = 0, k = Object.keys(Tables), l = k.length; j<l; j++){
  var table = k[j];
  Tables[table].sort(function(a,b){
    a = parseRow(a);
    b = parseRow(b);

    var ix = 0;
    if (a[0] === b[0] && a[1] !== 'NULL' && b[1] !== 'NULL')
      ix++;

    a[ix] = parseInt(a[ix], 10);
    b[ix] = parseInt(b[ix], 10);

    return a[ix] > b[ix] ? 1 : (a[ix] < b[ix] ? -1 : 0);
  })
}
data = data.replace(test,function(row,table){
  var nextRowIndex = TableCounters[table]++;

  return Tables[table][nextRowIndex];
});
console.log(data);
&#13;
&#13;
&#13;