Javascript Regex - 使用replace将结构化字符串解析为对象

时间:2012-11-27 16:44:43

标签: javascript regex parsing


一个想法是使用带有函数作为参数的String.replace。 所以在功能中你可以得到比赛的所有部分。 我的测试/例子到现在为止:


    !Norm: DIN 7985;
        M2: 2, 2, 2;
        M10: 20,25;
!Norm: DIN 7985 TX;
    M4: 4,  4    , 4;


var sTmp = "!Norm: DIN 7985;\n    M2: 2, 2, 2;\n    M3:3,3;\n    M10: 20,25;\n     !Norm: DIN 7985 TX;\n    M2: 6,    10    , 16;";

function replacer(match, p1, p2, p3, p4, offset, string){
    console.log("match:", match);
    console.log("p1:", p1);
    console.log("p2:", p2);
    console.log("p3:", p3);
    console.log("p4:", p4);
    console.log("offset:", offset);
    console.log("string:", string);
    return "#";
sTmp.replace(/\s*!Norm:\s?(.+);\s+(M\d+:.*\s*;)/g, replacer);

(在萤火虫中测试) 控制台日志(shortend):

match: !Norm: DIN 7985; M2: 2, 2, 2;
p1: DIN 7985
p2: M2: 2, 2, 2;
p3: 0
p4: !Norm: DIN 7985; M2: 2, 2, 2; M3:3,3; M10: 20,25; ....
offset: undefined
string: undefined
match: !Norm: DIN 7985 TX; M4: 4, 4 , 4;
p1: DIN 7985 TX
p2: M4: 4, 4 , 4;
p3: 52
p4: !Norm: DIN 7985; M2: 2, 2, 2; M3:3,3; M10: 20,25; !Norm: DIN 7985 TX; M4: 4, 4 , 4;

所以我可以看到这个想法有效 - 它符合规范,我将信息放在一个子字符串中。 现在有M3:......零件。 那么是否有一个选项来指定(M\d+:.*\s*;)部分与下一个匹配!Norm:而不是;在第一次出现? 我认为应该可以用一个先行或其他什么方式?


    oDataTmp = {
    DIN 7985 :      {
                        M2        : ["2", "2", "2"],
                        M3        : ["3", "3"],
                        M10       : ["20", "25"],
    DIN 7985 TX :   {
                        M4        : ["4", "4", "4"],

我知道您可以通过拆分执行此操作,然后逐行解析。 我喜欢完成这项大脑工作的挑战,并了解如何做到这一点: - )

3 个答案:

答案 0 :(得分:2)




  • 第1组:DIN部分。
  • 第2组:当前的所有剩余设置!Norm。

此正则表达式并不特别指望关键字NORM。所以它可能是其他任何东西。如果要捕获它,只需在第一个\ w +。



/            # start regex
\s*          # match optional whitespace
!\w+:        # match word between '!' and ':'
\s*          # match optional whitespace
([^;]+);     # capture group 1 - match all characters (without ';') up to the next ';'
\s*          # match optional whitespace
(            # start capture group 2
    (?:          # group (non-capture)
        \s*          # match optional whitespace
        [^:!]+:      # match all characters (without ':' and '!') up to the next ':'
        [^;]+;       # match all characters (without ';') up to the next ';'
    )+           # group end; match this group 1 to n times
)            # end capture group 2
/g           # end regex; set g-Flag for global

答案 1 :(得分:1)






/            # start regex
\s*          # match optional whitespace
!Norm:       # match '!Norm:'
\s?          # match optional whitespace
(.+);        # capture group 1 - match all characters (whitout '\n') up to the next ';'
\s+          # match 1..n whitespaces
(            # start capture group 2
    (?:          # group (non-capture)
        (?!          # negative lookahead
            [!]Norm      # match '!Norm'
        )            # end negative lookahead
        [\s\S]       # match a white space or other than white space character
                     # this group match a single character as long as it dont start are new !Norm
    )*           # group end; match this group 0..n times
)            # end capture group 2
/g           # end regex; set g-Flag for global

答案 2 :(得分:0)

所以在这里我有一个完整的解决方案 使用的正则表达式来自两个答案的组合:

var sData = "!Norm: DIN 933;\n !Norm: DIN 7985;\n    M2: 2, 2, 2;\n    M3:3,3;\n    M10: 20,25;\n     !Norm: DIN 7985 TX;\n    M2: 6,    10    , 16;";

var oData = {};

// Parse sData with help of Regex replace
    function replacer(match, sNorm, sScrews, offset, string) {
        //console.log("match:", match);
        //console.log("sNorm:", sNorm);
        //console.log("sScrews:", sScrews);
        //console.log("offset:", offset);
        //console.log("string:", string);

        var oScrews = {};

            function(match, sScrewSize, sScrewList, offset, string) {
                //console.log("match:", match);
                //console.log("sScrewSize:", sScrewSize);
                //console.log("sScrewList:", sScrewList);
                //console.log("offset:", offset);
                //console.log("string:", string);

                oScrews[sScrewSize] = sScrewList.split(/[\s,]+/);

                return "§";

        oData[sNorm] = oScrews;

        return "#";

console.log("oData: ");


oData = {
    DIN 7985 :      {
                        M10 : ["20", "25"],
                        M2  : ["2", "2", "2"],
                        M3  : ["3", "3"],
    DIN 7985 TX :   {
                        M4  : ["4", "4", "4"],
    DIN 933 :       {}