我正在努力跳过某个unicode代码点范围的字符。我正在使用Java正则表达式,它不匹配范围内的所有代码点。贝娄是我的测试代码。我真的很感激任何投入。感谢
@Test
public void test() {
// to find and skip control chars in codepoint range U+0080—U+009F
Pattern skipCodePointsPattern = Pattern.compile("[\u0080 - \u009F]");
// works
Matcher matcher = skipCodePointsPattern.matcher("ab\u0080");
assertTrue(matcher.find());
assertTrue(matcher.start() == 2);
//works
matcher = skipCodePointsPattern.matcher("ab\u009F");
assertTrue(matcher.find());
assertTrue(matcher.start() == 2);
//does not work
matcher = skipCodePointsPattern.matcher("ab\u0081");
assertTrue(matcher.find());
//also does not work for code points U+0082, U+0083 etc..
}