在JavaScript中构建自定义的简单解析器以进行进一步处理

时间:2018-10-16 23:13:43

标签: javascript parsing recursion

对于我的一个项目,我必须分析一堆简单的html格式的字符串并进一步处理它们。

假设我有一个像这样的字符串:

Hey <font face="Times New Roman" size="14">this text look <i>cool<i> and <b>fancy <i>very fancy<i></b> however </font> I'm ok <u>with</u> it.

我想通过递归函数获得的数组是一个具有各种对象的数组,其子对象的结构如下:

var myArr = myParse(myString);

myArr[0] = {
    "text": "Hey",
}
myArr[0] = {
    "text": "this text look ",
    "face": "Times New Roman",
    "size": 14
}
myArr[1] = {
    "text": "cool",
    "face": "Times New Roman",
    "size": 14,
    "italic": true
}
myArr[2] = {
    "text": " and ",
    "face": "Times New Roman",
    "size": 14
}
myArr[3] = {
    "text": "fancy ",
    "face": "Times New Roman",
    "size": 14,
    "bold": true
}
myArr[4] = {
    "text": "fancy ",
    "face": "Times New Roman",
    "size": 14,
    "bold": true
}
myArr[5] = {
    "text": "very fancy",
    "face": "Times New Roman",
    "size": 14,
    "italic": true,
    "bold": true
}
myArr[6] = {
    "text": " however ",
    "face": "Times New Roman",
    "size": 14
}
myArr[6] = {
    "text": " I'm ok "
}
myArr[7] = {
    "text": "with",
    "underline": true
}
myArr[8] = {
    "text": " it."
}

我试图将其构造为递归函数,但是我不确定如何继续使其完全起作用。.

function myParse(str, arr) {

    if(!arr) arr = [];

    var regex = /<font(.+?)>(.+?)<\/font>|<i>(.+?)<\/i>|<b>(.+?)<\/b>|<u>(.+?)<\/u>/g;
    var match = regex.exec(str);
    while (match != null) {
        for (var i = 0; i < match.length; i++) {

            // this way i can identify with matches:
            // match[1] - font specifics
            // match[2] - font tag content
            // match[3] - italic tag content
            // match[4] - bold tag content
            // match[5] - underline tag content

            var temp_object = {};

            var temp_object.text = matched_text;
            // process here a second regex to obtain font name and size.. like:
            var regex = /face="(.+?)"|size="(.+?)"/g;
            ...
            var temp_object.italic = match[3] ? true : false;
            var temp_object.bold = match[4] ? true : false;
            var temp_object.underline = match[5] ? true : false;

            // at some point i'm pretty sure that i have to..
            // put a marker let's say a
            var marker = Math.floor(Math.random() * 5000).toString();
            str.replace(matched_text, marker)

            // then recurse the parse(str, arr)  
        }
        match = regex.exec(str);
    }

}

0 个答案:

没有答案