正则表达式Metadtata

时间:2015-07-29 11:30:36

标签: java regex

我想检查正则表达式以识别它包含的匹配组。以下是我想要的API类型的示例:

String pattern = "^My name is \"([^\"]*)\" and I am (\d*) years old$"
Pattern p = Pattern.compile(pattern)

Group g1 = p.getGroups(0); // Representing the name group
g1.getStartPosition(); // should yeild position in regex string, e.g. 14
g1.getEndPosition();   // 21

Group g2 = p.getGroups(1); // Representing the age group
g2.getStartPosition(); // 34
g2.getEndPosition();   // 39

这不是java标准java.util.regex.Pattern提供的,但我想知道是否有任何现有的开源库可以让我以这种方式检查正则表达式?

我宁愿避免滚动自己,试图用java.lang.String API来挑选正则表达式字符串,因为这会特别麻烦。

1 个答案:

答案 0 :(得分:0)

这不是专业API,但我鼓励您尝试这个课程。我将其作为练习,它有几种与Matcher类似的方法,例如:group(int group)start(int group)end(int group)groupCount()。它很容易使用。

import java.util.ArrayList;
import java.util.TreeMap;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class Metamatcher {
    private String pattern;
    private TreeMap<Integer,Integer> groupsIndices;

    Metamatcher(String pattern){
        this.pattern = pattern;
        groupsIndices = getGroups();
    }
    /**
     * @param group ordinal number of group
     * @return starting index of a fragment of pattern, which contain group capturing
     */
    public int start(int group){
        ArrayList<Integer> indices = new ArrayList<Integer>(groupsIndices.keySet());
        indices.add(0,0);
        return indices.get(group);
    }

    /**
     * @param group ordinal number of group
     * @return ending index of a fragment of pattern, which contain group capturing
     */
    public int end(int group){
        ArrayList<Integer> indices = new ArrayList<Integer>(groupsIndices.values());
        indices.add(0,pattern.length());
        return indices.get(group);
    }

    /**
     * @param group ordinal number of group
     * @return String object containing fragment of regular expression which capture given group
     */
    public String group(int group){
        return pattern.substring(start(group), end(group));
    }

    /**
     * @return number of capturing groups within given regular expression
     */
    public int groupCount(){
        return groupsIndices.size();
    }

    public String toString(){
        StringBuilder result = new StringBuilder();
        result.append("Groups count: ")
                .append(groupCount())
                .append("\n");
        for(int i = 0; i <= groupCount(); i++){
            result.append("group(")
                    .append(i).append(") ")
                    .append(start(i))
                    .append("-")
                    .append(end(i))
                    .append("\t")
                    .append(group(i))
                    .append("\n");
        }
        return result.toString();
    }

    /**It extracts fragments of regular expression enclosed by parentheses, checks if these are capturing type,
     * and put start and end indices into Map object
     * @return Map contains fragments of regular expression which capture groups
     */
    private TreeMap<Integer,Integer> getGroups(){
        String copy = pattern;
        Pattern pattern = Pattern.compile("\\([^\\(\\)]+\\)");
        Matcher matcher = pattern.matcher(copy);
        TreeMap<Integer,Integer> temp = new TreeMap<Integer,Integer>();

        while(matcher.find()){
            if(isCapturingGroup(matcher.group(0))){
                temp.put(matcher.start(), matcher.end());
            }
            copy = copy.substring(0,matcher.start()) + replaceWithSpaces(matcher.group(0)) + copy.substring(matcher.end());
            matcher = pattern.matcher(copy);
        }

        return temp;
    }

    /**
     * @param fragment of regular expression, enclosed by brackets
     * @return true if given String consist regular expression which capture groups
     */
    private boolean isCapturingGroup(String fragment){
        return fragment.matches("((?<!\\\\)\\((?!\\?<?[:=!])[^\\(\\)]+\\))");
    }

    /**
     * Provide a filler String composed of spaces, to replace part enclosed by brackets
     * @param part String containing starting and ending with brackets,
     * @return String composed of spaces (' '), with length of part object,
     */
    private String replaceWithSpaces(String part){
        String filler = "";
        for(int i = 0; i < part.length(); i++){
            filler += " ";
        }
        return filler;
    }
}

我使用variuos输入测试它并使用regex101等工具比较输出,它适用于我。