Confluence维基标记 - 使用正则表达式进行表行类型确定

时间:2017-01-13 12:41:47

标签: c# regex confluence wiki-markup

confluence wiki v6.0中有两种不同的表类型。

因此,我在第一行中努力确定表格类型 (我通过使用正则表达式new Regex(@"(\|(\r\n|\r|\n)(.*?)\|)+");检测新行来分割行,然后使用Matches进行拆分,但是却

表格行可能如下所示:

如果标题

  

||标题1 ||标题2 ||标题3 ||

如果常规行

  

|细胞A1 |细胞A2 |细胞A3 |

如果垂直表格行

  

||标题|单元格B2 |细胞B3 |

我尝试使用像^(\|\|.*?\|)这样的表达式,但发现它也适用于标题。

由于标题标记功能,我尝试使用此^(\|\|.*?\|\|)之后,但如果它是常规行

则无济于事

那么是否可以实现行类型的确定,或者至少可以说是使用Regex的垂直行?

或者编写一些可以逐步处理行的内容会更好吗?

1 个答案:

答案 0 :(得分:0)

在不使用正则表达式的情况下编写它,在javascript中,它看起来像

简单字符串扫描程序

var Scanner = (function(){
    function Scanner(text){
        this.currentString = text.split('');
        this.position = 0;
        this.errorList = [];
        this.getChar = function(){
            var me = this,
                pos = me.position,
                string = me.currentString,
                stringLength = string.length;

            if(pos < stringLength){
                return string[pos];
            }

            return -1;
        };

        this.nextChar = function(){
            var me = this,
                pos = me.position,
                string = me.currentString,
                stringLength = string.length;

            if(pos < stringLength){
                me.position++;
                return;
            }

            me.error("EOL reached");
        };

        this.error = function(errorMsg){
            var me = this,
                error = "Error at position " + me.position +"\nMessage: "+errorMsg+".\n";
                errors = me.errorList;

            errors.push[error];
        };      

        return this;
    };

    return Scanner;

})();

简单解析器

 /**
     LINE ::= { CELL }

     CELL ::= '|' CELL1
     CELL1 ::= HEADER_CELL | REGULAR_CELL

     HEADER_CELL ::=  '|'  TEXT
     REGULAR_CELL ::=  TEXT

 */

 function RowParser(){
    this.scanner = {}; 
    this.rawText = "";
    this.cellsData = [];

    return this;
};

RowParser.prototype = {
    parseRow: function(row){
        var me = this;

        me.scanner = new Scanner(row);
        me.rawText = row;
        me.cellsData = [];

        me.proceedNext();
    },

    proceedNext: function(){
        var me = this,
            scanner = me.scanner;

        while(scanner.getChar() === '|'){
            me.proceedCell();
        }

        if (scanner.getChar() !== -1)
        {
            scanner.error("EOL expected, "+ scanner.getChar() +" got");
        }

        return;
    },

    proceedCell: function(){
        var me = this,
            scanner = me.scanner;

        if(scanner.getChar() === '|'){
            scanner.nextChar();
            me.proceedHeaderCell();
        }
    },

    proceedHeaderCell: function(){
        var me = this,
            scanner = me.scanner;

        if(scanner.getChar() === '|'){
            me.onHeaderCell();
        } else { 
            me.onRegularCell();
        }
    },

    onHeaderCell: function(){
        var me = this,
            scanner = me.scanner,
            cellType = TableCellType.info,
            cellData = {
                type: cellType.Header
            }

        if(scanner.getChar() === '|'){
            scanner.nextChar();
            me.proceedInnerText(cellType.Header);
        }else{
            scanner.error("Expected '|' got "+ currentChar +".");
        }           
    },

    onRegularCell:function(){
        var me = this,
            scanner = me.scanner,
            cellType = TableCellType.info;

        me.proceedInnerText(cellType.Regular);  
    },  

    proceedInnerText: function(cellType){
        var me = this,
            scanner = me.scanner,
            typeData = TableCellType.getValueById(cellType),
            innerText = [];

        while(scanner.getChar() !== '|' && scanner.getChar() !== -1){
            innerText.push(scanner.getChar());
            scanner.nextChar();
        }           

        me.cellsData.push({
            typeId: typeData.id,
            type: typeData.name,
            text: innerText.join("")
        });

        me.proceedNext();       
    },

    getRowData: function(){
        var me = this,
            scanner = me.scanner,
            data = me.cellsData,
            emptyCell;

        //Proceed cell data
        //if there no empty cell in the end - means no close tag
        var filteredData = data.filter(function(el){
            return el.text.length !== 0;
        });

        if(filteredData.length === data.length){
            scanner.error("No close tag at row "+ me.rawText +".");
            return;
        }           

        for (var i = 0; i < filteredData.length; i++) {
            filteredData[i].text = filteredData[i].text.trim();
        }

        return filteredData;
    }
};
上面提到的

CellTypeEnum

var TableCellType = {
    info:{
        Regular: 10,
        Header: 20
    },

    data:[
        {
            id: 10,
            name: "regular"
        },
        {
            id: 20,
            name: "header"
        }
    ],

    getValueById: function(id){
        var me = this,
            data = me.data,
            result = data.filter(function(el){
                return el.id === id;
            });

        return result[0];   
    }       
}

用法:

var rowParser = new RowParser();
var row = "||AAA||BBB||CCC||\n|Hi|all|people!|";
rowParser.parseRow(row);
var result = rowParser.getRowData();