我有一个用.jjt
文件编写的解析器。它与this非常相似。在我的情况下,唯一的变化是插入我自己的表达式评估方法。目前,1个表达式需要大约1毫秒才能成为解析器。我需要提高这个解析器的性能。我使用VisualVM进行性能分析,发现在44.5秒内运行我的代码,其中包括将文件行读入ArrayList<String>
并评估93个表达式(其中包含从文件行导出的参数值) ArrayList<String>
)使用我的解析器,在parseStream
方法中花了大约43秒。
我已经提到this link来改进我的解析器,我也尝试将ERROR_REPORTING
选项设置为FALSE
,但它没有帮助。
编辑1:
这是parser.jjt文件。
我在文件中分析了1000行的应用程序,大部分时间占用的方法是Start()
options {
JAVA_UNICODE_ESCAPE = true;
MULTI = true;
VISITOR = true;
VISITOR_EXCEPTION = "ParseException";
NODE_DEFAULT_VOID = true;
// NODE_PACKAGE = "org.nfunk.jep.parser";
// BUILD_NODE_FILES=false;
STATIC = false;
// DEBUG_TOKEN_MANAGER = true;
// DEBUG_PARSER = true;
// DEBUG_LOOKAHEAD = true;
}
/***************************************************************
PARSER BEGIN
***************************************************************/
PARSER_BEGIN(Parser)
package org.nfunk.jep;
import java.util.Vector;
import org.nfunk.jep.function.*;
import org.nfunk.jep.type.*;
public class Parser {
private JEP jep;
private SymbolTable symTab;
private OperatorSet opSet;
private int initialTokenManagerState = DEFAULT;
public Node parseStream(java.io.Reader stream, JEP jep_in)
throws ParseException {
restart(stream,jep_in);
// Parse the expression, and return the
enable_tracing();
Node node = Start();
if (node == null) throw new ParseException("No expression entered");
return node.jjtGetChild(0);
}
/**
* Restart the parse with the given stream.
* @since 2.3.0 beta 1
*/
public void restart(java.io.Reader stream, JEP jep_in)
{
ReInit(stream);
this.token_source.SwitchTo(initialTokenManagerState);
jep = jep_in;
symTab = jep.getSymbolTable();
opSet = jep.getOperatorSet();
}
/**
* Continue parsing without re-initilising stream.
* Allows renetrancy of parser so that strings like
* "x=1; y=2; z=3;" can be parsed.
* When a semi colon is encountered parsing finishes leaving the rest of the string unparsed.
* Parsing can be resumed from the current position by using this method.
* For example
* <pre>
* XJep j = new XJep();
* Parser parse = j.getParse();
* StringReader sr = new StringReader("x=1; y=2; z=3;");
* parse.restart(sr,j);
* Node node;
* try {
* while((node = j.continueParse())!=null) {
* j.println(node);
* } }catch(ParseException e) {}
* </pre>
*/
public Node continueParse() throws ParseException
{
ASTStart node = Start();
if (node==null) return null;
return node.jjtGetChild(0);
}
private void addToErrorList(String errorStr) {
jep.errorList.addElement(errorStr);
}
/**
* Sets the initial state that the token manager is in.
* Can be used to change how x.x is interpreted, either as a single
* identifier (DEFAULT) or as x <DOT> x (NO_DOT_IN_IDENTIFIERS)
* @param state the state to be in. Currently the only legal values are DEFAULT and NO_DOT_IN_IDENTIFIER
*/
public void setInitialTokenManagerState(int state)
{
initialTokenManagerState = state;
}
/**
* Translate all escape sequences to characters. Inspired by Rob Millar's
* unescape() method in rcm.util.Str fron the Web Sphinx project.
*
* @param inputStr String containing escape characters.
* @return String with all escape sequences replaced.
*/
private String replaceEscape(String inputStr) {
int len = inputStr.length();
int p = 0;
int i;
String metachars = "tnrbf\\\"'";
String chars = "\t\n\r\b\f\\\"'";
StringBuffer output = new StringBuffer();
while ((i = inputStr.indexOf('\\', p)) != -1) {
output.append(inputStr.substring(p, i));
if (i+1 == len) break;
// find metacharacter
char metac = inputStr.charAt(i+1);
// find the index of the metac
int k = metachars.indexOf(metac);
if (k == -1) {
// didn't find the metachar, leave sequence as found.
// This code should be unreachable if the parser
// is functioning properly because strings containing
// unknown escape characters should not be accepted.
output.append('\\');
output.append(metac);
} else {
// its corresponding true char
output.append(chars.charAt(k));
}
// skip over both escape character & metacharacter
p = i + 2;
}
// add the end of the input string to the output
if (p < len)
output.append(inputStr.substring(p));
return output.toString();
}
}
PARSER_END(Parser)
/***************************************************************
SKIP
***************************************************************/
<*> SKIP :
{
" "
| "\t"
| "\n"
| "\r"
| <"//" (~["\n","\r"])* ("\n"|"\r"|"\r\n")>
| <"/*" (~["*"])* "*" (~["/"] (~["*"])* "*")* "/">
}
/***************************************************************
TOKENS
***************************************************************/
<*> TOKEN : /* LITERALS */
{
< INTEGER_LITERAL:
<DECIMAL_LITERAL>
>
|
< #DECIMAL_LITERAL: ["0"-"9"] (["0"-"9"])* >
|
< FLOATING_POINT_LITERAL:
(["0"-"9"])+ "." (["0"-"9"])* (<EXPONENT>)?
| "." (["0"-"9"])+ (<EXPONENT>)?
| (["0"-"9"])+ <EXPONENT>
>
|
< #EXPONENT: ["e","E"] (["+","-"])? (["0"-"9"])+ >
|
< STRING_LITERAL:
"\""
( (~["\"","\\","\n","\r"])
| ("\\" ["n","t","b","r","f","\\","'","\""] )
)*
"\""
>
}
/* IDENTIFIERS
Letters before version 2.22
< #LETTER: ["_","a"-"z","A"-"Z"] >
In Ver 2.3.0.1 presence of . in an identifier is switchable.
In the DEFAULT lexical state identifiers can contain a .
In the NO_DOT_IN_IDENTIFIERS state identifiers cannot contain a .
the state can be set by using
Parser.setInitialTokenManagerState
*/
<DEFAULT> TOKEN:
{
<INDENTIFIER1: <LETTER1>(<LETTER1>|<DIGIT1>|".")*>
|
< #LETTER1:
[
"\u0024", // $
"\u0041"-"\u005a", // A - Z
"\u005f", // _
"\u0061"-"\u007a", // a - z
"\u00c0"-"\u00d6", // Upper case symbols of Latin-1 Supplement
"\u00d8"-"\u00f6", // Lower case symbols of Latin-1 Supplement
"\u00f8"-"\u00ff", // More lower case symbols of Latin-1 Supplement
"\u0100"-"\u1fff", // Many languages (including Greek)
"\u3040"-"\u318f", // Hiragana, Katakana, Bopomofo, Hangul Compatibility Jamo
"\u3300"-"\u337f", // CJK Compatibility
"\u3400"-"\u3d2d", // CJK Unified Ideographs Extension A
"\u4e00"-"\u9fff", // CJK Unified Ideographs
"\uf900"-"\ufaff" // CJK Compatibility Ideographs
]
>
|
< #DIGIT1: ["0"-"9"] >
}
<NO_DOT_IN_IDENTIFIERS> TOKEN:
{
<INDENTIFIER2: <LETTER2>(<LETTER2>|<DIGIT2>)*>
|
< #LETTER2:
[
"\u0024", // $
"\u0041"-"\u005a", // A - Z
"\u005f", // _
"\u0061"-"\u007a", // a - z
"\u00c0"-"\u00d6", // Upper case symbols of Latin-1 Supplement
"\u00d8"-"\u00f6", // Lower case symbols of Latin-1 Supplement
"\u00f8"-"\u00ff", // More lower case symbols of Latin-1 Supplement
"\u0100"-"\u1fff", // Many languages (including Greek)
"\u3040"-"\u318f", // Hiragana, Katakana, Bopomofo, Hangul Compatibility Jamo
"\u3300"-"\u337f", // CJK Compatibility
"\u3400"-"\u3d2d", // CJK Unified Ideographs Extension A
"\u4e00"-"\u9fff", // CJK Unified Ideographs
"\uf900"-"\ufaff" // CJK Compatibility Ideographs
]
>
|
< #DIGIT2: ["0"-"9"] >
}
/* OPERATORS */
<*> TOKEN:
{
< ASSIGN:"=" > // rjm
| < SEMI: ";" > // rjm
| < COMMA: "," > // rjm
| < GT: ">" >
| < LT: "<" >
| < EQ: "==" >
| < LE: "<=" >
| < GE: ">=" >
| < NE: "!=" >
| < AND: "&&" >
| < OR: "||" >
| < PLUS: "+" >
| < MINUS:"-" >
| < MUL: "*" >
| < DOT: "." > // rjm
| < DIV: "/" >
| < MOD: "%" >
| < NOT: "!" >
| < POWER:"^" >
| < CROSS:"^^" > // rjm
| < LSQ: "[" > // rjm
| < RSQ: "]" > // rjm
| < LRND: "(" > // rjm
| < RRND: ")" > // rjm
| < COLON: ":" > // rjm
}
/***************************************************************
GRAMMAR START
***************************************************************/
ASTStart Start() #Start :
{
}
{
Expression() ( <EOF> | <SEMI> ) { return jjtThis; }
| ( <EOF> | <SEMI> )
{
// njf - The next line is commented out in 2.3.0 since
// two "No expression entered" errors are reported
// in EvaluatorVisitor and Console (one from here
// the other from ParseStream() )
// Decided to just return null, and handle the error
// in ParseStream.
// addToErrorList("No expression entered");
return null;
}
}
// Expresions can be like
// x=3
// x=y=3 parsed as x=(y=3)
void Expression() : {}
{
LOOKAHEAD(LValue() <ASSIGN>) // need to prevent javacc warning with left recusion
AssignExpression() // rjm changes from OrExpresion
|
RightExpression()
}
void AssignExpression() : {} // rjm addition
{
( LValue() <ASSIGN> Expression()
{
if (!jep.getAllowAssignment()) throw new ParseException(
"Syntax Error (assignment not enabled)");
jjtThis.setOperator(opSet.getAssign());
}
)
#FunNode(2)
}
void RightExpression() :
{
}
{
OrExpression()
}
void OrExpression() :
{
}
{
AndExpression()
(
( <OR> AndExpression()
{
jjtThis.setOperator(opSet.getOr());
}
) #FunNode(2)
)*
}
void AndExpression() :
{
}
{
EqualExpression()
(
( <AND> EqualExpression()
{
jjtThis.setOperator(opSet.getAnd());
}
) #FunNode(2)
)*
}
void EqualExpression() :
{
}
{
RelationalExpression()
(
( <NE> RelationalExpression()
{
jjtThis.setOperator(opSet.getNE());
}
) #FunNode(2)
|
( <EQ> RelationalExpression()
{
jjtThis.setOperator(opSet.getEQ());
}
) #FunNode(2)
)*
}
void RelationalExpression() :
{
}
{
AdditiveExpression()
(
( <LT> AdditiveExpression()
{
jjtThis.setOperator(opSet.getLT());
}
) #FunNode(2)
|
( <GT> AdditiveExpression()
{
jjtThis.setOperator(opSet.getGT());
}
) #FunNode(2)
|
( <LE> AdditiveExpression()
{
jjtThis.setOperator(opSet.getLE());
}
) #FunNode(2)
|
( <GE> AdditiveExpression()
{
jjtThis.setOperator(opSet.getGE());
}
) #FunNode(2)
)*
}
void AdditiveExpression() :
{
}
{
MultiplicativeExpression()
(
( <PLUS> MultiplicativeExpression()
{
jjtThis.setOperator(opSet.getAdd());
}
) #FunNode(2)
|
( <MINUS> MultiplicativeExpression()
{
jjtThis.setOperator(opSet.getSubtract());
}
) #FunNode(2)
)*
}
void MultiplicativeExpression() :
{
}
{
UnaryExpression()
(
(
PowerExpression()
{
if (!jep.implicitMul) throw new ParseException(
"Syntax Error (implicit multiplication not enabled)");
jjtThis.setOperator(opSet.getMultiply());
}
) #FunNode(2)
|
( <MUL> UnaryExpression()
{
jjtThis.setOperator(opSet.getMultiply());
}
) #FunNode(2)
|
( <DOT> UnaryExpression()
{
jjtThis.setOperator(opSet.getDot());
}
) #FunNode(2)
|
( <CROSS> UnaryExpression()
{
jjtThis.setOperator(opSet.getCross());
}
) #FunNode(2)
|
( <DIV> UnaryExpression()
{
jjtThis.setOperator(opSet.getDivide());
}
) #FunNode(2)
|
( <MOD> UnaryExpression()
{
jjtThis.setOperator(opSet.getMod());
}
) #FunNode(2)
)*
}
void UnaryExpression() :
{
}
{
( <PLUS> UnaryExpression())
|
( <MINUS> UnaryExpression()
{
jjtThis.setOperator(opSet.getUMinus());
}
) #FunNode(1)
|
( <NOT> UnaryExpression()
{
jjtThis.setOperator(opSet.getNot());
}
) #FunNode(1)
|
PowerExpression()
}
void PowerExpression() :
{
}
{
UnaryExpressionNotPlusMinus()
[
( <POWER> UnaryExpression()
{
jjtThis.setOperator(opSet.getPower());
}
) #FunNode(2)
]
}
void UnaryExpressionNotPlusMinus() :
{
String identString = "";
int type;
}
{
AnyConstant()
|
LOOKAHEAD(ArrayAccess())
ArrayAccess()
|
LOOKAHEAD({ (getToken(1).kind == INDENTIFIER1 || getToken(1).kind == INDENTIFIER2) &&
jep.funTab.containsKey(getToken(1).image) })
Function()
|
Variable()
|
<LRND> Expression() <RRND>
|
// LOOKAHEAD(<LSQ> Expression() <COLON>)
// RangeExpression()
// |
ListExpression()
}
void ListExpression() #FunNode:
{
jjtThis.setOperator(opSet.getList());
}
{
<LSQ> Expression() ( <COMMA> Expression() )* <RSQ>
}
/*
void RangeExpression() #FunNode:
{
jjtThis.setOperator(opSet.getRange());
}
{
<LSQ> Expression() ( <COLON> Expression() )+ <RSQ>
}
*/
void LValue() :
{
}
{
LOOKAHEAD(ArrayAccess())
ArrayAccess()
| Variable()
}
void ArrayAccess() :
{
}
{
Variable() ListExpression()
{
jjtThis.setOperator(opSet.getElement());
} #FunNode(2)
}
void Variable() :
{
String identString = "";
}
{
(identString = Identifier()
{
if (symTab.containsKey(identString)) {
jjtThis.setVar(symTab.getVar(identString));
} else {
if (jep.allowUndeclared) {
jjtThis.setVar(symTab.makeVarIfNeeded(identString));
} else {
addToErrorList("Unrecognized symbol \"" + identString +"\"");
}
}
}
) #VarNode
}
void Function() :
{
int reqArguments = 0;
String identString = "";
}
{
( identString = Identifier()
{
if (jep.funTab.containsKey(identString)) {
//Set number of required arguments
reqArguments =
((PostfixMathCommandI)jep.funTab.get(identString)).getNumberOfParameters();
jjtThis.setFunction(identString,
(PostfixMathCommandI)jep.funTab.get(identString));
} else {
addToErrorList("!!! Unrecognized function \"" + identString +"\"");
}
}
<LRND> ArgumentList(reqArguments, identString) <RRND>
) #FunNode
}
void ArgumentList(int reqArguments, String functionName) :
{
int count = 0;
String errorStr = "";
}
{
[
Expression() { count++; }
(
<COMMA>
Expression() { count++; }
)*
]
{
if(reqArguments == -1) {
if(!((PostfixMathCommandI)jep.funTab.get(functionName)).checkNumberOfParameters(count))
{
errorStr = "Function \"" + functionName +"\" illegal number of arguments " + count;
addToErrorList(errorStr);
}
}
else if (reqArguments != count) {
errorStr = "Function \"" + functionName +"\" requires "
+ reqArguments + " parameter";
if (reqArguments!=1) errorStr += "s";
addToErrorList(errorStr);
}
}
}
String Identifier() :
{
Token t;
}
{
( t = <INDENTIFIER1> | t = <INDENTIFIER2> ) { return t.image; }
}
void AnyConstant() #Constant:
{
Token t;
Object value;
}
{
t=<STRING_LITERAL> {
// strip away double quotes at end of string
String temp = (t.image).substring(1,t.image.length()-1);
// replace escape characters
temp = replaceEscape(temp);
jjtThis.setValue(temp);
}
|
value = RealConstant() {
jjtThis.setValue(value);
// }
// |
// value = Array() {
// jjtThis.setValue(value);
}
}
/*
Vector Array() :
{
Object value;
Vector result = new Vector();
}
{
<LSQ>
value = RealConstant()
{
result.addElement(value);
}
(
<COMMA>
value = RealConstant()
{
result.addElement(value);
}
)*
<RSQ>
{
return result;
}
}
*/
Object RealConstant() :
{
Token t;
Object value;
}
{
(t=<INTEGER_LITERAL> | t=<FLOATING_POINT_LITERAL>)
{
try {
value = jep.getNumberFactory().createNumber(t.image);
} catch (Exception e) {
value = null;
addToErrorList("Can't parse \"" + t.image + "\"");
}
return value;
}
}
答案 0 :(得分:1)
在分析时应该看一下瓶颈,readStream花费的时间最多,但是哪个功能呢?一些分析器以图形方式对这些关键热点进行了红线化。
引起我注意的是replaceEscape
private static final String METACHARS = "tnrbf\\\"'";
private static final String CHARS = "\t\n\r\b\f\\\"'";
private String replaceEscape(String inputStr) {
int i = inputStr.indexOf('\\');
if (i == -1) { // 1. Heuristic strings without backslash
return inputStr;
}
int len = inputStr.length();
int p = 0;
StringBuilder output = new StringBuilder(); // 2. Faster StringBuilder
while (i != -1) {
if (i + 1 == len) break;
if (p < i) output.append(inputStr.substring(p, i));
p = i + 1;
char metac = inputStr.charAt(i+1);
// find the index of the metac
int k = METACHARS.indexOf(metac);
if (k != -1) {
// its corresponding true char
metac = CHARS.charAt(k));
++p; // Start copying after metachar
}
output.append(metac);
}
// add the end of the input string to the output
if (p < len)
output.append(inputStr.substring(p));
return output.toString();
}