Java:电话号码过滤和分配到“精英”

时间:2016-04-05 09:29:00

标签: java regex performance filter phone-number

任务是分配大量数字(总共200k)精英和高级数字。精英意味着数字非常漂亮和昂贵,高级意味着更美丽。

我的解决方案有效,但速度很慢。处理200k数字需要大约40分钟!问题是我必须使用蒙版生成数千个正则表达式模式,然后通过数千种模式处理数千个数字!

模式看起来像patternX,patternXY,patternAB,patternABC,patternXAB,patternXABC,patternXYZ,patternXYAB,patternXYAB,例如:

super.patternXYZ = "^\\d+XXYYZZ$|^\\d+ZZXYXY$|^\\d+YXXYYZZ..$";
super.patternXYAB = "^\\d+ABXXYY$|^\\d+ABXYXY$";

其中所有字母代表数字掩码:XXYY mathces 4488或9933(X<> = Y)和AABB匹配序列序列如3344或7788(A + 1 = B)

通过以下方式进行匹配:

@Override
public Set<String> performCollect() {
    for (String number : numbers) {
        if (isPatternXMatches(number)) {
            result.add(number);
        } else if (isPatternXYMatches(number)) {
            result.add(number);
        } 
        ...
    }
    return result;
}

为每个匹配生成正则表达式模式并且匹配执行:

protected boolean isPatternXYZMatches(String number) {
    for (int X = 0; X < 10; X++) {
        for (int Y = 0; Y < 10; Y++) {
            for (int Z = 0; Z < 10; Z++) {
                Pattern pattern = Pattern.compile(patternXYZ.replace("X", String.valueOf(X)).replace("Y", String.valueOf(Y)).replace("Z", String.valueOf(Z)));
                Matcher matcher = pattern.matcher(number);
                if (matcher.find()) {
                    return true;
                }
            }
        }
    }

    return false;
}

protected boolean isPatternXYABMatches(String number) {
    for (int X = 0; X < 10; X++) {
        for (int Y = 0; Y < 10; Y++) {
            for (int A = 0, B = 1; B < 10; A++, B++) {
                Pattern pattern = Pattern.compile(patternXYAB.replace("A", String.valueOf(A)).replace("B", String.valueOf(B)).replace("X", String.valueOf(X)).replace("Y", String.valueOf(Y)));
                Matcher matcher = pattern.matcher(number);
                if (matcher.find()) {
                    return true;
                }
            }
        }
    }
    return false;
}

问题:有没有人知道或者可以建议更好更快的解决方案?

1 个答案:

答案 0 :(得分:0)

我用自定义匹配器替换了正则表达式,现在在5秒而不是40分钟内处理了200k数字!

public Set<String> performCollect() {
    for (String number : numbers) {
        if (isNumberMatches(number)) {
            result.add(number);
        }
    }

    return result;
}

protected boolean isNumberMatches(String number) {
    NumberMatcher nm = new NumberMatcher(number, offset);
    for (NumberPattern pattern : patterns) {
        if (nm.processMatch(pattern)) {
            return true;
        }
    }
    return false;
}

...

public class NumberPattern {

    private char[] maskChars;
    private Integer weight;

    public NumberPattern(String mask, Integer weight) {
        maskChars = mask.toCharArray();
        this.weight = weight;
    }

    public char[] getMaskChars() {
        return maskChars;
    }

    public void setMaskChars(char[] maskChars) {
        this.maskChars = maskChars;
    }

    public Integer getWeight() {
        return weight;
    }

    public void setWeight(Integer weight) {
        this.weight = weight;
    }
}

...

public class NumberMatcher {

    private char[] numberChars;
    private int uniqueChars = 0;

    public NumberMatcher(String number, int offset) {
        numberChars = number.toCharArray();
        List<Character> chars = new ArrayList<>();

        for (Character ch : number.substring(offset).toCharArray()) {
            if (!chars.contains(ch)) {
                uniqueChars++;
                chars.add(ch);
            }
        }
    }

    public boolean processMatch(NumberPattern pattern) {
        if (pattern.getWeight() < uniqueChars) {
            return false;
        }

        Character X = null;
        Character Y = null;
        Character Z = null;
        Character A = null;
        Character B = null;
        Character C = null;
        Character D = null;

        final char[] patternChars = pattern.getMaskChars();
        int patternIndex = patternChars.length;
        int numberIndex = numberChars.length;

        while (patternIndex > 0) {
            patternIndex--;
            numberIndex--;
            char numberChar = numberChars[numberIndex];
            char patternChar = patternChars[patternIndex];

            switch (patternChar) {
                case 'A':
                    if (A == null) {
                        A = numberChar;
                        B = (char) (A + 1);
                        C = (char) (B + 1);
                        D = (char) (C + 1);
                    } else if (!A.equals(numberChar)) {
                        return false;
                    }
                    break;
                case 'B':
                    if (B == null) {
                        B = numberChar;
                        A = (char) (B - 1);
                        C = (char) (B + 1);
                        D = (char) (C + 1);
                    } else if (!B.equals(numberChar)) {
                        return false;
                    }
                    break;
                case 'C':
                    if (C == null) {
                        C = numberChar;
                        B = (char) (C - 1);
                        A = (char) (B - 1);
                        D = (char) (C + 1);
                    } else if (!C.equals(numberChar)) {
                        return false;
                    }
                    break;
                case 'D':
                    if (D == null) {
                        D = numberChar;
                        C = (char) (D - 1);
                        B = (char) (C - 1);
                        A = (char) (B - 1);
                    } else if (!D.equals(numberChar)) {
                        return false;
                    }
                    break;
                case 'X':
                    if (X == null) {
                        X = numberChar;
                    } else if (!X.equals(numberChar)) {
                        return false;
                    }
                    break;
                case 'Y':
                    if (Y == null) {
                        Y = numberChar;
                    } else if (!Y.equals(numberChar)) {
                        return false;
                    }
                    break;
                case 'Z':
                    if (Z == null) {
                        Z = numberChar;
                    } else if (!Z.equals(numberChar)) {
                        return false;
                    }
                    break;
                case '.':
                    break;
                case '0':
                    if (numberChar != '0') {
                        return false;
                    }
                    break;
            }
        }

        return true;
    }
}