创建正则表达式以将html解析为MXML语法

时间:2013-05-20 01:55:00

标签: javascript regex

我在stackoverflow上搜索了很多,发现非常有趣,包括:

How to create a Regular Expression for a span attribute?

Javascript regex to replace text div and < >

但事实证明我无法真正解析我的目标是用data-type属性替换div并删除字符串上的data-type属性。

这是我的表现。

//Doesn't work with multi lines, just get first occurrency and nothing more.
// Regex: /\s?data\-type\=(?:['"])?(\d+)(?:['"])?/

var source_code = $("body").html();

var rdiv = /div/gm; // remove divs
var mxml = source_code.match(/\S?data\-type\=(?:['"])?(\w+)(?:['"])?/);
var rattr =source_code.match(/\S?data\-type\=(?:['"])?(\w+)(?:['"])/gm);
var outra = source_code.replace(rdiv,'s:'+mxml[1]);
var nestr = outra.replace(rattr[0],'');// worked with only first element
console.log(nestr);
console.log(mxml);
console.log(rattr);

在此HTML示例页面

<div id="app" data-type="Application">
    <div data-type="Label"></div>
     <div data-type="Button"></div>
     <div data-type="VBox"></div>
     <div data-type="Group"></div>
</div>

对那件具体事情有何看法?我可能会遗漏一些东西,但我真的没有任何线索,否则就没有留下空间了。

我已经创建了一个jsFiddle来显示,只需打开浏览器控制台即可看到我的结果。

http://jsfiddle.net/uWCjV/

随意回答jsfiddle或更好地解释我的正则表达式,为什么它会失败。

在得到任何反馈之前,我会继续尝试查看是否可以设法替换该文本。

提前致谢。

1 个答案:

答案 0 :(得分:0)

将标记解析为对象树然后将其转换为MXML可能更容易。

这样的事情:

var source_code = $("body").html();

var openStartTagRx = /^\s*<div/i;
var closeStartTagRx = /^\s*>/i;
var closeTagRx = /^\s*<\/div>/i;
var attrsRx = new RegExp(
    '^\\s+' +
    '(?:(data-type)|([a-z-]+))' +    // group 1 is "data-type" group 2 is any attribute
    '\\=' +
    '(?:\'|")' +
    '(.*?)' +                        // group 3 is the data-type or attribute value
    '(?:\'|")',
    'mi');


function Thing() {
    this.type = undefined;
    this.attrs = undefined;
    this.children = undefined;
}

Thing.prototype.addAttr = function(key, value) {
    this.attrs = this.attrs || {};
    this.attrs[key] = value;
};

Thing.prototype.addChild = function(child) {
    this.children = this.children || [];
    this.children.push(child);
};


function getErrMsg(expected, str) {
    return 'Malformed source, expected: ' + expected + '\n"' + str.slice(0,20) + '"';
}


function parseElm(str) {

    var result,
        elm,
        childResult;

    if (!openStartTagRx.test(str)) {
        return;
    }
    elm = new Thing();
    str = str.replace(openStartTagRx, '');

    // parse attributes
    result = attrsRx.exec(str);
    while (result) {
        if (result[1]) {
            elm.type = result[3];
        } else {
            elm.addAttr(result[2], result[3]);
        }
        str = str.replace(attrsRx, '');
        result = attrsRx.exec(str);
    }

    // close off that tag
    if (!closeStartTagRx.test(str)) {
        throw new Error(getErrMsg('end of opening tag', str));
    }
    str = str.replace(closeStartTagRx, '');

    // if it has child tags
    childResult = parseElm(str);
    while (childResult) {
        str = childResult.str;
        elm.addChild(childResult.elm);
        childResult = parseElm(str);
    }

    // the tag should have a closing tag
    if (!closeTagRx.test(str)) {
        throw new Error(getErrMsg('closing tag for the element', str));
    }
    str = str.replace(closeTagRx, '');
    return {
        str: str,
        elm: elm
    };
}


console.log(parseElm(source_code).elm); 

jsFiddle

将您提供的标记解析为以下内容:

{ 
  "type" : "Application"
  "attrs" : { "id" : "app" },
  "children" : [
    { "type" : "Label" },
    { "type" : "Button" },
    { "type" : "VBox" },
    { "type" : "Group" }
  ],
}

它是递归的,因此嵌入的组也被解析。