我想要一个正则表达式,它将从以下字符串中提取_A_
,12345
,Non_Literal_Left
和Non_Literal_Right
:
... ${_A_}, ${12345}, '${Literal}' $yada yada'$[]' '${Non_Literal_Left} ${Non_Literal_Right}'
我能得到的最接近的是文字的单引号限制:
Matcher matcher = Pattern.compile("\\$\\{(\\w+)\\}").matcher(s);
while (matcher.find()) {
result.add(matcher.group(1));
}
这会导致我想要的所有内容加上Literal
,我不想与之匹配。
提前致谢...
答案 0 :(得分:3)
你可以简单地使用负面的后视:
"(?<!')\\$\\{(\\w+)\\}"
现在只有在$
前面没有'
时才会匹配。
正如Matt Ball在评论中提到的那样,在最后添加另一个负面的预测可能是有道理的:
"(?<!')\\$\\{(\\w+)\\}(?!')"
但是,只有在'
中${Literal}'
的使用无效/不匹配时才会出现问题(在这种情况下,我的第一个正则表达式仍会匹配Literal
,后者不会)。
答案 1 :(得分:1)
那是一种快乐。
有些东西告诉我一个RegEx表达式会更清洁。
/**
* Utility class for parsing record field parameters in properties.
*
* @author Ryan
*
*/
public static class PropertyParser {
/**
* Stores the results of parsing a property.
*
* @author Ryan
*
*/
public static class ParsedParameters {
private final Set<String> literals;
private final Set<String> parameters;
private ParsedParameters() {
this.parameters = new HashSet<String>();
this.literals = new HashSet<String>();
}
/**
* Adds a literal property value to this object.
*
* @param string The literal property value to add to this object.
*/
private void addLiteral(String string) {
this.literals.add(string);
}
/**
* Adds a parameter name to this object.
*
* @param string The parameter name to add to this object.
*/
private void addParameter(String string) {
this.parameters.add(string);
}
/**
* Returns the set of literals stored in this object.
*
* @return The set of literals stored in this object.
*/
public Set<String> getLiterals() {
return this.literals;
}
/**
* Returns the set of parameters stored in this object.
*
* @return The set of parameters stored in this object.
*/
public Set<String> getParameters() {
return this.parameters;
}
}
private static final String BAD_FIELD_CHAR =
"Illegal character detected for field parameter: %c";
/**
* Extracts placeholder field name parameters from the input string.
* <p>
* Single quotes can be used to avoid the parser interpreting the ${...}
* as a field parameter.
* <p>
* For example, the parser would not detect any field parameters in the following string:
* <p>
* #!/bin/bash<br>
* # Echos the first argument<br>
* echo '${1}'<br>
* <p>
* The {@link #PropertySubstitutor()} is responsible for removing the single quotes
* surrounding the parameter when substituting the actual property value(s).
* <p>
* <b>Nested Parameters</b>
* <p>
* This parser itself will only parse the inner-most parameter or literal.
* <p>
* For example, ${Some${Value}} would actually be treated as a legal string, with
* 'Value' as the only field parameter extracted. During runtime substitution,
* this would result in ${Somebody} if the record value for the field "Value" was "body".
* <p>
* Theoretically, this parser could then be ran again to extract this generated parameter.
*
* @param string The property to parse for field parameters.
* @return An object containing the parsed parameters and literal values.
* @throws IllegalArgumentException If the property contains parameter syntax
* (i.e. ${text}) but contains illegal characters for the field.
* <p>
* Allowed characters for field names are alpha-numeric and underscores.
*/
public static ParsedParameters parseParametersAndLiterals(String string)
throws IllegalArgumentException {
if ((string == null) || string.isEmpty()) {
return new ParsedParameters();
}
ParsedParameters result = new ParsedParameters();
StringBuffer param = null;
Character badChar = null;
char c;
char p = '^';
boolean close = false;
boolean lQuote = false;
boolean open = false;
int l = string.length();
for (int i = 0; i < l; ++i) {
c = string.charAt(i);
if (!lQuote && (p == '\'') && (c == '$')) {
lQuote = true;
} else if ((p == '$') && (c == '{')) {
param = new StringBuffer();
open = true;
badChar = null;
} else if (open
&& (((c == '}') && (!lQuote || ((1 + i) == l))) || (lQuote && (p == '}')))) {
open = false;
close = true;
} else if (open) {
boolean validCharacter = Character.isLetterOrDigit(c) || (c == '_');
if (validCharacter || (lQuote && (c != '}'))) {
param.append(c);
}
if (!validCharacter && (c != '}')) {
badChar = c;
}
}
if (close) {
if ((badChar != null) && !(lQuote && (p == '}') && (c == '\''))) {
throw new IllegalArgumentException(String.format(BAD_FIELD_CHAR, badChar));
} else if (c != '\'') {
if (param.length() > 0) {
result.addParameter(param.toString());
}
} else {
result.addLiteral(param.toString());
}
lQuote = false;
close = false;
badChar = null;
}
p = c;
}
return result;
}
}
当然还有测试。
public class TestPropertyParser {
private Set<String> literals;
private Set<String> params;
private void assertLiteralsContains(String string) {
assertTrue(this.literals.contains(string));
}
private void assertParamsContains(String string) {
assertTrue(this.params.contains(string));
}
private void assertResultSizes(Integer paramSize, Integer literalSize) {
if (paramSize != null) {
assertNotNull(this.params);
assertEquals((int) paramSize, this.params.size());
} else {
assertNull(this.params);
}
if (literalSize != null) {
assertNotNull(this.literals);
assertEquals((int) literalSize, this.literals.size());
} else {
assertNull(this.literals);
}
}
private void parseAndSet(String stringToParse) {
ParsedParameters result = PropertyParser.parseParametersAndLiterals(stringToParse);
this.literals = result.getLiterals();
this.params = result.getParameters();
}
@Before
public void setup() {
this.params = new HashSet<String>();
this.literals = new HashSet<String>();
}
@Test(expected = IllegalArgumentException.class)
public void testParserInvalidParameterQuoteLeft() {
parseAndSet("'${Invalid Parameter}");
}
@Test(expected = IllegalArgumentException.class)
public void testParserInvalidParameterQuoteRight() {
parseAndSet("${Invalid Parameter}'");
}
@Test(expected = IllegalArgumentException.class)
public void testParserInvalidParameterSpaces() {
parseAndSet(" ${Invalid Parameter}");
}
@Test
public void testParserValidStrings() {
// Initialization condition.
assertResultSizes(0, 0);
// Null string.
parseAndSet(null);
assertResultSizes(0, 0);
// Empty string.
parseAndSet(new String());
assertResultSizes(0, 0);
// Single parameter.
parseAndSet("... ${_A_}, $yada yada'$[]' '${");
assertResultSizes(1, 0);
assertParamsContains("_A_");
// Many parameters and one literal.
parseAndSet("... ${_A_}, ${12345}, '${Literal}''${Non_Literal_Left} ${Non_Literal_Right}' ");
assertResultSizes(4, 1);
assertParamsContains("_A_");
assertParamsContains("12345");
assertParamsContains("Non_Literal_Left");
assertParamsContains("Non_Literal_Right");
assertLiteralsContains("Literal");
// Nested literal and odd bracket placements.
parseAndSet("''${Totally}''$}{$'${Single}");
assertResultSizes(1, 1);
assertParamsContains("Single");
assertLiteralsContains("Totally");
// Subset of ASCII characters.
parseAndSet("`1234567890-=qwertyuiop[]\\asdfghjkl;'zxcvbnm,./!@#$%^&*()_+QWERTYUIOP{}|ASDFGHJKL:\"ZXCVBNM<>?");
assertResultSizes(0, 0);
// Illegal characters in literal.
parseAndSet("'${This literal is completely valid}'");
assertResultSizes(0, 1);
assertLiteralsContains("This literal is completely valid");
// Test incomplete literal, no closure.
parseAndSet("'${This literal is never closed");
assertResultSizes(0, 0);
// Test incomplete parameter from left.
parseAndSet("${Never_Closed");
assertResultSizes(0, 0);
// And again... with a parameter at the end.
parseAndSet("${Never_Closed${But_This_Is}");
assertResultSizes(1, 0);
assertParamsContains("But_This_Is");
// Empty parameter.
parseAndSet("${}");
assertResultSizes(0, 0);
// Restarting a new parameter within an already open parameter.
parseAndSet("${Perfectly valid${a}");
assertResultSizes(1, 0);
assertParamsContains("a");
// Variation of the above with quotes.
parseAndSet("'${Perfectly valid'${a}");
assertResultSizes(1, 0);
assertParamsContains("a");
// Variation of the above with quotes.
parseAndSet("${Perfectly valid'${a}");
assertResultSizes(1, 0);
assertParamsContains("a");
// Variation of the above with quotes.
parseAndSet("${Perfectly valid${a}'");
assertResultSizes(1, 0);
assertParamsContains("a");
// Variation of the above with quotes.
parseAndSet("${Perfectly valid'${a}'");
assertResultSizes(0, 1);
assertLiteralsContains("a");
// Variation of the above with spaces.
parseAndSet(" ${ Perfectly valid${a} ");
assertResultSizes(1, 0);
assertParamsContains("a");
// TODO Determine what the desired behavior is for nested literals and parameters.
// Test nested parameter in literal.
parseAndSet("'${Nested ${Parameter}}'");
assertResultSizes(1, 0);
assertParamsContains("Parameter");
// Nested parameter.
parseAndSet("${Nested_${Parameter}}'");
assertResultSizes(1, 0);
assertParamsContains("Parameter");
// Literal nested in a parameter.
parseAndSet(" ${Nested'${Literal}'}");
assertResultSizes(0, 1);
assertLiteralsContains("Literal");
}
}