我是java regex
的新手。所以,我有一个包含不同节点的xml
文件。文件是 -
<Node id="855"/>PROFILE<Node id="862"/>:<Node id="863"/>
<Node id="864"/>8<Node id="865"/> <Node id="866"/>years<Node id="871"/> <Node id="872"/>IT<Node id="874"/> <Node id="875"/>industry<Node id="883"/> <Node id="884"/>experience<Node id="894"/> <Node id="895"/>in<Node id="897"/> <Node id="898"/>web<Node id="901"/> <Node id="902"/>based<Node id="907"/> <Node id="908"/>applications<Node id="920"/> <Node id="921"/>that<Node id="925"/> <Node id="926"/>involved<Node id="934"/> <Node id="935"/>extensive<Node id="944"/> <Node id="945"/>development<Node id="956"/> <Node id="957"/>work<Node id="961"/> <Node id="962"/>in<Node id="964"/> <Node id="965"/>Java<Node id="969"/>/<Node id="970"/>J<Node id="971"/>2<Node id="972"/>EE<Node id="974"/>,<Node id="975"/>Jquery<Node id="981"/>,<Node id="982"/>Jqgrid<Node id="988"/>,<Node id="989"/>Ajax<Node id="993"/>.<Node id="994"/>
<Node id="995"/>Good<Node id="999"/> <Node id="1000"/>experience<Node id="1010"/> <Node id="1011"/>in<Node id="1013"/> <Node id="1014"/>agile<Node id="1019"/> <Node id="1020"/>methodology<Node id="1031"/> <Node id="1032"/>.<Node id="1033"/>
我有一个字符串,我需要与此字符串匹配。
PROFILE:
8 years IT industry experience in web based applications that involved extensive development work in Java/J2EE,Jquery,Jqgrid,Ajax.
所以,
private void parseXml(ArrayList<String> elements, String filePath) {
boolean flag = false;
String nextId = "0";
String xmlData = getTextWithNodesDataFromXml(filePath);
for (String s : elements) {
System.out.println(s);
String token;
int id;
String regex = "";
if (flag == false) {
regex = "<Node id=\"([0-9]+)\"\\/>(" + s + ")";
flag = true;
Pattern pattern1 = Pattern.compile(regex);
Matcher matcher1 = pattern1.matcher(xmlData);
if (matcher1.find()) {
System.out.println("match found -->" + s);
}
}
SO第一个参数是一个数组列表,其中包含要匹配的字符串的标记,第二个是文件的路径。 xmlData
是我之前提到的节点,我需要与之匹配。所以,如果我发现PROFILE
匹配三次,那么如何检查整个字符串?我必须匹配这个节点的确切字符串?我怎样才能做到这一点?
答案 0 :(得分:0)
我建议比较可比较的东西:你展示的XML是句子的标记化。您尝试将其与整个String进行比较。
如果将XML转换为String数组并用recyclerView.addOnItemTouchListener(
new RecyclerItemClickListener(context, new RecyclerItemClickListener.OnItemClickListener() {
@Override public void onItemClick(View view, int position) {
// TODO Handle item click
}
})
);
标记句子PROFILE,则必须比较两个String数组。
也许,你不想要完全匹配?
在这种情况下,您必须计算相似性的百分比String.split("\\s+")
,确定一个阈值p
并仅保留PROFILE t
。
答案 1 :(得分:0)
我对所要求的内容做了一些假设,并创建了一个可行的解决方案,因为我可以最好地理解它。
我因避免阅读文件而作弊。
package stacktest;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class StringMatch
{
private String getTextWithNodesDataFromXml(String filePath)
{
// cheating here by passing the xml as a string in.
return filePath;
}
private int [] findMatches(List<String> elements, String filePath)
{
String xmlData = getTextWithNodesDataFromXml(filePath);
String outerRegex = "(<Node id=\"[0-9]+\"\\/>PROFILE<Node id=\"[0-9]+\"\\/>:<)";
Pattern outerPattern = Pattern.compile(outerRegex, Pattern.DOTALL);
Matcher outerMatcher = outerPattern.matcher(xmlData);
int outerMatches = 0;
boolean first = true;
int lastStart=0;
ArrayList<String> profiles = new ArrayList<String>();
while (outerMatcher.find())
{
String localXML = outerMatcher.group(1);
int startIndex = outerMatcher.start(1);
if (!first)
{
localXML = xmlData.substring(lastStart, startIndex);
profiles.add(localXML);
}
lastStart = startIndex;
first = false;
outerMatches++;
}
// Is there a hanging one at the end?
if (outerMatches > 0)
{
String localXML = xmlData.substring(lastStart);
profiles.add(localXML);
}
for (String profile: profiles)
{
// System.out.println(localXML);
String regex = "<Node id=\"([0-9]+)\"\\/>([^<]+)";
Pattern pattern1 = Pattern.compile(regex);
Matcher matcher1 = pattern1.matcher(profile);
ArrayList<String> toMatch = new ArrayList<String>();
ArrayList<String> idMatch = new ArrayList<String>();
while (matcher1.find())
{
String token = matcher1.group(2);
toMatch.add(token);
String id = matcher1.group(1);
idMatch.add(id);
outerMatches++;
}
if (elements.size() == toMatch.size())
{
boolean didFind = true;
for (int i=0; i< elements.size(); i++)
{
String element = elements.get(i);
String match = toMatch.get(i);
if (!element.equals(match))
{
didFind = false;
}
}
if (didFind)
{
int[] toReturn = new int[2];
toReturn[0] = Integer.parseInt(idMatch.get(0));
toReturn[1] = Integer.parseInt(idMatch.get(idMatch.size()-1));
return toReturn;
}
}
}
return null;
}
public static void main(String args[])
{
String nodes = "<Node id=\"855\"/>PROFILE<Node id=\"862\"/>:<Node id=\"863\"/>\n" +
"<Node id=\"864\"/>8<Node id=\"865\"/> <Node id=\"866\"/>years<Node id=\"871\"/> <Node id=\"872\"/>IT<Node id=\"874\"/> <Node id=\"875\"/>industry<Node id=\"883\"/> <Node id=\"884\"/>experience<Node id=\"894\"/> <Node id=\"895\"/>in<Node id=\"897\"/> <Node id=\"898\"/>web<Node id=\"901\"/> <Node id=\"902\"/>based<Node id=\"907\"/> <Node id=\"908\"/>applications<Node id=\"920\"/> <Node id=\"921\"/>that<Node id=\"925\"/> <Node id=\"926\"/>involved<Node id=\"934\"/> <Node id=\"935\"/>extensive<Node id=\"944\"/> <Node id=\"945\"/>development<Node id=\"956\"/> <Node id=\"957\"/>work<Node id=\"961\"/> <Node id=\"962\"/>in<Node id=\"964\"/> <Node id=\"965\"/>Java<Node id=\"969\"/>/<Node id=\"970\"/>J<Node id=\"971\"/>2<Node id=\"972\"/>EE<Node id=\"974\"/>,<Node id=\"975\"/>Jquery<Node id=\"981\"/>,<Node id=\"982\"/>Jqgrid<Node id=\"988\"/>,<Node id=\"989\"/>Ajax<Node id=\"993\"/>.<Node id=\"994\"/>\n" +
"<Node id=\"995\"/>Good<Node id=\"999\"/> <Node id=\"1000\"/>experience<Node id=\"1010\"/>";
String nodes2 = "<Node id=\"855\"/>PROFILE<Node id=\"862\"/>:<Node id=\"863\"/>\n" +
"<Node id=\"864\"/>8<Node id=\"865\"/> <Node id=\"866\"/>years<Node id=\"871\"/> <Node id=\"872\"/>IT<Node id=\"874\"/> <Node id=\"875\"/>industry<Node id=\"883\"/> <Node id=\"884\"/>experience<Node id=\"894\"/> <Node id=\"895\"/>in<Node id=\"897\"/> <Node id=\"898\"/>web<Node id=\"901\"/> <Node id=\"902\"/>based<Node id=\"907\"/> <Node id=\"908\"/>applications<Node id=\"920\"/> <Node id=\"921\"/>that<Node id=\"925\"/> <Node id=\"926\"/>involved<Node id=\"934\"/> <Node id=\"935\"/>extensive<Node id=\"944\"/> <Node id=\"945\"/>development<Node id=\"956\"/> <Node id=\"957\"/>work<Node id=\"961\"/> <Node id=\"962\"/>in<Node id=\"964\"/> <Node id=\"965\"/>Java<Node id=\"969\"/>/<Node id=\"970\"/>J<Node id=\"971\"/>2<Node id=\"972\"/>EE<Node id=\"974\"/>,<Node id=\"975\"/>Jquery<Node id=\"981\"/>,<Node id=\"982\"/>Jqgrid<Node id=\"988\"/>,<Node id=\"989\"/>Ajax<Node id=\"993\"/>.<Node id=\"994\"/>";
//"<Node id=\"995\"/>Good<Node id=\"999\"/> <Node id=\"1000\"/>experience<Node id=\"1010\"/>";
String nodes3 = "<Node id=\"1\"/>PROFILE<Node id=\"2\"/>:<Node id=\"3\"/>This<Node id=\"4\"/>is<Node id=\"5\"/>not<Node id=\"6\"/>the<Node id=\"7\"/>Profile<Node id=\"8\"/>\n" +
"<Node id=\"855\"/>PROFILE<Node id=\"862\"/>:<Node id=\"863\"/>\n" +
"<Node id=\"864\"/>8<Node id=\"865\"/> <Node id=\"866\"/>years<Node id=\"871\"/> <Node id=\"872\"/>IT<Node id=\"874\"/> <Node id=\"875\"/>industry<Node id=\"883\"/> <Node id=\"884\"/>experience<Node id=\"894\"/> <Node id=\"895\"/>in<Node id=\"897\"/> <Node id=\"898\"/>web<Node id=\"901\"/> <Node id=\"902\"/>based<Node id=\"907\"/> <Node id=\"908\"/>applications<Node id=\"920\"/> <Node id=\"921\"/>that<Node id=\"925\"/> <Node id=\"926\"/>involved<Node id=\"934\"/> <Node id=\"935\"/>extensive<Node id=\"944\"/> <Node id=\"945\"/>development<Node id=\"956\"/> <Node id=\"957\"/>work<Node id=\"961\"/> <Node id=\"962\"/>in<Node id=\"964\"/> <Node id=\"965\"/>Java<Node id=\"969\"/>/<Node id=\"970\"/>J<Node id=\"971\"/>2<Node id=\"972\"/>EE<Node id=\"974\"/>,<Node id=\"975\"/>Jquery<Node id=\"981\"/>,<Node id=\"982\"/>Jqgrid<Node id=\"988\"/>,<Node id=\"989\"/>Ajax<Node id=\"993\"/>.<Node id=\"994\"/>" +
"PROFILE<Node id=\"1021\"/>:<Node id=\"1022\"/>This<Node id=\"1023\"/>is<Node id=\"1024\"/>not<Node id=\"1025\"/>the<Node id=\"1026\"/>Profile<Node id=\"1027\"/>\n";
String[] el = { "PROFILE", ":", "\n",
"8", " ", "years", " ", "IT", " ", "industry", " ", "experience", " ", "in", " ", "web",
" ", "based", " ", "applications", " ", "that", " ", "involved", " ", "extensive", " ",
"development", " ", "work", " ", "in", " ", "Java", "/", "J", "2", "EE", ",", "Jquery",
",", "Jqgrid", ",", "Ajax", "."
};
List<String> elements = Arrays.asList(el);
StringMatch sm = new StringMatch();
printTest(sm.findMatches(elements, nodes));
printTest(sm.findMatches(elements, nodes2));
printTest(sm.findMatches(elements, nodes3));
}
private static void printTest(int[] vals)
{
if (vals != null)
{
System.out.println("found match from id: " + vals[0] + " to " + vals[1]);
}
else
{
System.out.println("no match");
}
System.out.println("--------------------------------");
}
}
该方法有三次测试调用,他们返回:
no match
--------------------------------
found match from id: 855 to 993
--------------------------------
found match from id: 855 to 993
--------------------------------