我正在尝试编写一个程序来识别给定输入文本中的文本模式。例如,我的文本将是这样的:
This is a test xa .. blah blah
This is a test xd .. blah blah..
This is a test x3 .. blah blah..
This is a test xa .. blah blah
This is a test xd .. blah blah..
This is a test x3 .. blah blah..
This is a test xa .. blah blah
This is a test xd .. blah blah..
This is a test x3 .. blah blah..
This is a test bc .. blah blah..
This is a test some more useless text..
This is a test x3 .. blah blah..
This is a test some more useless text..
This is a test xa .. blah blah
This is a test some more useless text..
我需要在备用线路上找到重复 x'digit/text'
的序列,然后反复重复。因此,在上述情况下,它是xa, xd, x3
并且它自身重复3次。所以,在另一种情况下,可能x1, x2, x3, x4
重复5次。使用正则表达式可以解决这个问题吗?如果我编写Java程序,我该如何有效地检测这个序列?
答案 0 :(得分:2)
答案 1 :(得分:2)
我知道为没有付出努力的人(你甚至没有回答我关于要求的问题)开发解决方案是一种糟糕的stackoverflow风格。然而。
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
public class RepeatingPatternMain {
LineParser parser = new LineParser();
public RepeatingPatternMain(String fileName) throws IOException {
try (BufferedReader br = new BufferedReader(new FileReader(fileName))) {
String line = br.readLine();
while (line != null) {
parser.acceptLine(line);
line = br.readLine();
}
}
parser.done();
}
public static void main(String[] args) throws IOException {
if (args.length == 1) {
new RepeatingPatternMain(args[0]);
} else {
System.out.println("Usage: java RepeatingPatternMain <file>");
}
}
}
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class LineParser {
Pattern xPat = Pattern.compile(".*x([0-9a-z]).*");
RepeatingPatternRecognizer rpr = new RepeatingPatternRecognizer();
public void acceptLine(String line) {
Matcher m = xPat.matcher(line);
if (m.matches()) {
String charAfterX = m.group(1);
assert charAfterX.length() == 1 : charAfterX;
rpr.lineWithX(charAfterX.charAt(0), line);
} else {
rpr.lineWithoutX(line);
}
}
public void done() {
rpr.finish();
}
}
import java.util.ArrayList;
import java.util.List;
public class RepeatingPatternRecognizer {
private static final int minRepeats = 3;
List<LineForAnalysis> lines = new ArrayList<LineForAnalysis>();
public void lineWithX(char charAfterX, String line) {
lines.add(new LineForAnalysis(charAfterX, line));
}
static class LineForAnalysis {
final char charAfterX;
final String line;
public LineForAnalysis(char charAfterX, String line) {
this.charAfterX = charAfterX;
this.line = line;
}
}
public void lineWithoutX(String line) {
analyzeAndClear();
}
public void finish() {
analyzeAndClear();
}
private void analyzeAndClear() {
if (!lines.isEmpty()) {
int ix1 = 0;
outerLoop:
while (ix1 < lines.size()) {
// see if a repeating pattern starts at ix1
for (int ix2 = ix1 + 1; ix2 < lines.size(); ix2++) {
if (lines.get(ix1).charAfterX == lines.get(ix2).charAfterX) {
int patternLength = ix2 - ix1;
int ix3 = ix2 + 1;
while (ix3 < lines.size() && lines.get(ix3).charAfterX == lines.get(ix3 - patternLength).charAfterX) {
ix3++;
}
int repeatedPatternLength = ix3 - ix1;
if (repeatedPatternLength > minRepeats * patternLength) { // pattern found
int repeats = repeatedPatternLength / patternLength;
// a more elaborate solution may return the repeating pattern to the caller
System.out.println("Found a pattern repeated " + repeats + " times");
int repeatEndIndex = ix1 + repeats * patternLength;
for (int ix4 = ix1; ix4 < repeatEndIndex; ix4++) {
System.out.println(lines.get(ix4).line);
}
System.out.println();
ix1 = repeatEndIndex;
continue outerLoop;
}
}
}
// no repeating pattern found, try next index
ix1++;
}
lines.clear();
}
}
}