我想知道如何使用 ParseTreeListener 和自定义监听器" MyRListener"来确定变量或函数的范围。它扩展了 RBaseListener 。我的想法是在代码文件中创建所有变量和函数的列表并保留结构,这意味着我想将子函数或变量添加为相应函数的子代(所以我保持一种层次结构)。
换句话说,我需要知道退出函数的时间。在其他一些ANTLR4语法中有一个用于退出函数的Listener事件,但遗憾的是不在R语法中(https://github.com/antlr/grammars-v4/tree/master/r)。
如果退出某个函数,我已经尝试使用 invokingState 作为指示符,但它似乎不适用于所有类型的R脚本,因为它已经失败.. < / p>
MyRListener.java :
import java.util.Arrays;
import java.util.HashSet;
import java.util.Set;
import java.util.Stack;
import model.code.antlr4.generated.RBaseListener;
import model.code.antlr4.generated.RParser;
import model.code.elements.FunctionElement;
public class MyRListener extends RBaseListener {
static class FunctionPair {
private FunctionElement functionElement;
private int invokingState;
public FunctionPair(FunctionElement functionElement, int invokingState) {
this.functionElement = functionElement;
this.invokingState = invokingState;
}
public FunctionElement getFunctionElement() {
return functionElement;
}
public int getInvokingState() {
return invokingState;
}
@Override
public String toString() {
return "FunctionPair [functionElement=" + functionElement + ", invokingState=" + invokingState + "]";
}
}
// https://cran.r-project.org/doc/FAQ/R-FAQ.html#What-are-valid-names_003f
private final String pattern_valid_name = "[a-zA-Z0-9._]*";
// https://stat.ethz.ch/R-manual/R-devel/library/base/html/Reserved.html
private final Set<String> reserved_words = new HashSet<String>(
Arrays.asList(new String[] { "if", "else", "repeat", "while", "function", "for", "in", "next", "break" }));
private Stack<FunctionPair> functionStack;
public MyRListener() {
this.functionStack = new Stack<FunctionPair>();
}
public void enterExpr(RParser.ExprContext ctx) {
String txt = ctx.getText().trim();
if (!(ctx.getChildCount() > 2))
return;
String name = ctx.getChild(0).getText().replace("\"", "");
String operator = ctx.getChild(1).getText().trim();
if (!reserved_words.contains(name) && name.matches(pattern_valid_name)
&& (txt.contains("<-function") || txt.contains("=function"))) {
System.out.println("Function:" + name);
FunctionElement parent = functionStack.isEmpty() ? null : functionStack.peek().functionElement;
FunctionElement func = new FunctionElement(name, parent);
FunctionPair fpair = new FunctionPair(func, ctx.invokingState);
if (!functionStack.isEmpty())
functionStack.peek().functionElement.addSubFunction(func);
functionStack.push(fpair);
} else if (!reserved_words.contains(name) && name.matches(pattern_valid_name) && txt.contains("<-")) {
if (operator.equals("=") || operator.equals("<-"))
System.out.println("Variable:" + name);
} else {
}
}
public void exitExpr(RParser.ExprContext ctx) {
if (functionStack.peek().getInvokingState() == ctx.invokingState) {
System.out.println("__Function " + functionStack.peek().getFunctionElement() + " ends here!");
System.out.print("__Children: ");
functionStack.peek().getFunctionElement().printChildren();
// cm is a list which is not in the minimal example for stackoverflow
this.cm.addCodeElement(functionStack.pop().functionElement);
}
}
public void enterForm(RParser.FormContext ctx) {
String name = ctx.getChild(0).getText().replace("\"", "");
//System.out.println("Parameter: " + name);
}
}
R.g4 :
grammar R;
prog: ( expr (';'|NL)
| NL
)*
EOF
;
/*
expr_or_assign
: expr ('<-'|'='|'<<-') expr_or_assign
| expr
;
*/
expr: expr '[[' sublist ']' ']' // '[[' follows R's yacc grammar
| expr '[' sublist ']'
| expr ('::'|':::') expr
| expr ('$'|'@') expr
| <assoc=right> expr '^' expr
| ('-'|'+') expr
| expr ':' expr
| expr USER_OP expr // anything wrappedin %: '%' .* '%'
| expr ('*'|'/') expr
| expr ('+'|'-') expr
| expr ('>'|'>='|'<'|'<='|'=='|'!=') expr
| '!' expr
| expr ('&'|'&&') expr
| expr ('|'|'||') expr
| '~' expr
| expr '~' expr
| expr ('<-'|'<<-'|'='|'->'|'->>'|':=') expr
| 'function' '(' formlist? ')' expr // define function
| expr '(' sublist ')' // call function
| '{' exprlist '}' // compound statement
| 'if' '(' expr ')' expr
| 'if' '(' expr ')' expr 'else' expr
| 'for' '(' ID 'in' expr ')' expr
| 'while' '(' expr ')' expr
| 'repeat' expr
| '?' expr // get help on expr, usually string or ID
| 'next'
| 'break'
| '(' expr ')'
| ID
| STRING
| HEX
| INT
| FLOAT
| COMPLEX
| 'NULL'
| 'NA'
| 'Inf'
| 'NaN'
| 'TRUE'
| 'FALSE'
;
exprlist
: expr ((';'|NL) expr?)*
|
;
formlist : form (',' form)* ;
form: ID
| ID '=' expr
| '...'
;
sublist : sub (',' sub)* ;
sub : expr
| ID '='
| ID '=' expr
| STRING '='
| STRING '=' expr
| 'NULL' '='
| 'NULL' '=' expr
| '...'
|
;
HEX : '0' ('x'|'X') HEXDIGIT+ [Ll]? ;
INT : DIGIT+ [Ll]? ;
fragment
HEXDIGIT : ('0'..'9'|'a'..'f'|'A'..'F') ;
FLOAT: DIGIT+ '.' DIGIT* EXP? [Ll]?
| DIGIT+ EXP? [Ll]?
| '.' DIGIT+ EXP? [Ll]?
;
fragment
DIGIT: '0'..'9' ;
fragment
EXP : ('E' | 'e') ('+' | '-')? INT ;
COMPLEX
: INT 'i'
| FLOAT 'i'
;
STRING
: '"' ( ESC | ~[\\"] )*? '"'
| '\'' ( ESC | ~[\\'] )*? '\''
| '`' ( ESC | ~[\\'] )*? '`'
;
fragment
ESC : '\\' [abtnfrv"'\\]
| UNICODE_ESCAPE
| HEX_ESCAPE
| OCTAL_ESCAPE
;
fragment
UNICODE_ESCAPE
: '\\' 'u' HEXDIGIT HEXDIGIT HEXDIGIT HEXDIGIT
| '\\' 'u' '{' HEXDIGIT HEXDIGIT HEXDIGIT HEXDIGIT '}'
;
fragment
OCTAL_ESCAPE
: '\\' [0-3] [0-7] [0-7]
| '\\' [0-7] [0-7]
| '\\' [0-7]
;
fragment
HEX_ESCAPE
: '\\' HEXDIGIT HEXDIGIT?
;
ID : '.' (LETTER|'_'|'.') (LETTER|DIGIT|'_'|'.')*
| LETTER (LETTER|DIGIT|'_'|'.')*
;
fragment LETTER : [a-zA-Z] ;
USER_OP : '%' .*? '%' ;
COMMENT : '#' .*? '\r'? '\n' -> type(NL) ;
// Match both UNIX and Windows newlines
NL : '\r'? '\n' ;
WS : [ \t\u000C]+ -> skip ;
运行ParseTreeListener的代码:
ANTLRInputStream input = new ANTLRInputStream(fileInputStream);
Lexer lexer = new RLexer(input);
CommonTokenStream tokens = new CommonTokenStream(lexer);
RFilter filter = new RFilter(tokens);
filter.stream();
tokens.reset();
Parser parser = new RParser(tokens);
ParseTree tree = ((RParser) parser).prog();
ParserTreeListener listener = new MyRListener(codeModel);
输入(chron.R)
"chron" <-
function(dates. = NULL, times. = NULL,
format = c(dates = "m/d/y", times = "h:m:s"),
out.format, origin.)
{
if(is.null(format))
format <- c(dates = "m/d/y", times = "h:m:s")
if(missing(out.format)){
if(is.character(format))
out.format <- format
else
stop('must specify the "out.format" argument')
}
given <- c(dates = !missing(dates.), times = !missing(times.))
if(is.null(default.origin <- getOption("chron.origin")))
default.origin <- c(month = 1, day = 1, year = 1970)
if(all(!given))
## dates and times missing
return(structure(numeric(0),
format = format, origin = default.origin,
class = c("chron", "dates", "times")))
if(inherits(dates., "dates")) {
if(missing(origin.))
origin. <- origin(dates.)
else origin(dates.) <- origin.
}
else if(missing(origin.))
origin. <- default.origin
if(given["dates"] && !given["times"]) {
## presumably only dates
if(missing(format) && inherits(dates., "dates"))
format <- attr(dates., "format")
fmt <- switch(mode(format),
character = ,
list = format[[1]],
name = ,
"function" = format,
NULL = c(dates = "m/d/y"),
stop("unrecognized format"))
dts <- convert.dates(dates., format = fmt, origin. = origin.)
tms <- dts - floor(dts)
## if dates include fractions of days create a full chron
if(!all(is.na(tms)) && any(tms[!is.na(tms)] != 0))
return(chron(dates. = floor(dts), times. = tms, format
= format, out.format = out.format, origin. =
origin.))
ofmt <- switch(mode(out.format),
character = ,
list = out.format[[1]],
name = ,
"function" = out.format,
NULL = c(dates = "m/d/y"),
stop("invalid output format"))
attr(dts, "format") <- ofmt
attr(dts, "origin") <- origin.
class(dts) <- c("dates", "times")
names(dts) <- names(dates.)
return(dts)
}
if(given["times"] && !given["dates"]) {
## only times
if(missing(format) && inherits(times., "times")) {
format <- attr(times., "format")
if(!is.name(format))
format <- rev(format)[[1]]
}
fmt <- switch(mode(format),
character = ,
list = rev(format)[[1]],
name = ,
"function" = format,
NULL = c(times = "h:m:s"),
stop("invalid times input format"))
tms <- convert.times(times., fmt)
ofmt <- switch(mode(out.format),
character = ,
list = rev(out.format)[[1]],
name = ,
"function" = out.format,
NULL = c(dates = "m/d/y"),
stop("invalid times output format"))
attr(tms, "format") <- ofmt
class(tms) <- "times"
names(tms) <- names(times.)
return(tms)
}
## both dates and times
if(length(times.) != length(dates.)) {
if(length(times.) == 1)
times. <- rep.int(times., length(dates.))
else if(length(dates.) == 1)
dates. <- rep.int(dates., length(times.))
else
stop(paste(deparse(substitute(dates.)), "and",
deparse(substitute(times.)), "must have equal lengths"))
}
if(missing(format)) {
if(is.null(fmt.d <- attr(dates., "format")))
fmt.d <- format[1]
if(is.null(fmt.t <- attr(times., "format")))
fmt.t <- format[2]
if(mode(fmt.d) == "character" && mode(fmt.t) == "character")
format <- structure(c(fmt.d, fmt.t),
names = c("dates", "times"))
else {
fmt.d <- if(is.name(fmt.d)) fmt.d else fmt.d[[1]]
fmt.t <- if(is.name(fmt.t)) fmt.t else rev(fmt.t)[[1]]
format <- list(dates = fmt.d, times = fmt.t)
}
}
if(any(length(format) != 2, length(out.format) != 2))
stop("misspecified chron format(s) length")
if(all(mode(format) != c("character", "list")))
stop("misspecified input format(s)")
if(all(mode(out.format) != c("list", "character")))
stop("misspecified output format(s)")
dts <- convert.dates(dates., format = format[[1]], origin. = origin.)
tms <- convert.times(times., format = format[[2]])
x <- unclass(dts) + unclass(tms)
attr(x, "format") <- out.format
attr(x, "origin") <- origin.
class(x) <- c("chron", "dates", "times")
nms <- paste(names(dates.), names(times.))
if(length(nms) && any(nms != ""))
names(x) <- nms
return(x)
}
as.chron <- function(x, ...) UseMethod("as.chron")
as.chron.default <- function (x, format, ...)
{
if(inherits(x, "chron"))
return(x)
if(is.numeric(x)) {
if (missing(format) || is.null(format)) return(chron(x, ...))
else return(as.chron(as.POSIXct(format(x, scientific = FALSE),
tz = "GMT", format = format),
...))
}
if (is.character(x)) {
if (missing(format) || is.null(format)) {
out <- suppressWarnings(try(chron(x, ...), silent = TRUE))
## If this fails, try Date or datetime.
if(inherits(out, "try-error")) {
xx <- sub("T", " ", x)
out <- if(!any(grepl(" ", x, fixed = TRUE)))
as.chron(as.Date(xx), ...)
else
as.chron(as.POSIXct(xx, tz = "GMT"), ...)
}
} else {
out <- as.chron(as.POSIXct(x, format = format, tz = "GMT"),
...)
}
return(out)
}
stop("'x' cannot be coerced to a chron object")
}
as.chron.POSIXt <- function(x, offset = 0, tz = "GMT", ...)
{
## offset is in hours relative to GMT
if(!inherits(x, "POSIXt")) stop("wrong method")
x <- as.numeric(as.POSIXct(as.character(x, tz = tz), tz = "GMT")) +
60 * round(60 * offset)
tm <- x %% 86400
# if(any(tm != 0))
chron(dates. = x %/% 86400, times. = tm / 86400, ...)
# else
# chron(dates. = x %/% 86400, ...)
}
as.chron.Date <- function(x, ...)
{
chron(unclass(x), ...)
}
asChronYearFreq <-
function(x, frac = 0, holidays = FALSE, frequency, ...)
{
stopifnot(isTRUE((12 / frequency) %% 1 == 0))
x <- unclass(x)
year <- floor(x + 0.001)
month <- floor(12 * (x - year) + 1 + 0.5 + 0.001)
dd.start <- as.Date(paste(year, month, 1, sep = "-"))
nd <- 32 * 12 / frequency
dd.end <- dd.start + nd - as.numeric(format(dd.start + nd, "%d"))
if(identical(holidays, FALSE))
chron(((1 - frac) * as.numeric(dd.start) +
frac * as.numeric(dd.end)),
...)
else
chron(sapply(seq_along(x), function(i) {
s <- unclass(seq(dd.start[i], dd.end[i], by = "days"))
h <- if(isTRUE(holidays)) is.holiday(s) else is.holiday(s, holidays)
ss <- s[!is.weekend(s) & !h]
quantile(ss, probs = frac, names = FALSE)
}), ...)
}
as.chron.yearmon <-
function(x, frac = 0, holidays = FALSE, ...)
{
asChronYearFreq(x, frac = frac, holidays = holidays,
frequency = 12, ...)
}
as.chron.yearqtr <-
function(x, frac = 0, holidays = FALSE, ...)
{
asChronYearFreq(x, frac = frac, holidays = holidays,
frequency = 4, ...)
}
as.chron.ts <-
function(x, frac = 0, holidays = FALSE, ...)
{
asChronYearFreq(time(x), frac = frac, holidays = holidays,
frequency = frequency(x), ...)
}
as.chron.factor <- function(x, ...)
{
as.chron(as.character(x), ...)
}
"is.chron" <-
function(x)
inherits(x, "chron")
as.data.frame.chron <- as.data.frame.vector
"convert.chron" <-
function(x, format = c(dates = "m/d/y", times = "h:m:s"), origin.,
sep = " ", enclose = c("(", ")"), ...)
{
if(is.null(x) || !as.logical(length(x)))
return(numeric(length = 0))
if(is.numeric(x))
return(x)
if(!is.character(x) && all(!is.na(x)))
stop(paste("objects", deparse(substitute(x)),
"must be numeric or character"))
if(length(format) != 2)
stop("format must have length==2")
if(missing(origin.)
&& is.null(origin. <- getOption("chron.origin")))
origin. <- c(month = 1, day = 1, year = 1970)
if(any(enclose != ""))
x <- substring(x, first = 2, last = nchar(x) - 1)
str <- unpaste(x, sep = sep)
dts <- convert.dates(str[[1]], format = format[[1]],
origin. = origin., ...)
tms <- convert.times(str[[2]], format = format[[2]], ...)
dts + tms
}
"format.chron" <-
function(x, format = att$format, origin. = att$origin, sep = " ",
simplify, enclosed = c("(", ")"), ...)
{
att <- attributes(x)
if(length(format) == 1L) {
if(!nzchar(format))
format <- "%Y-%m-%d %H:%M:%S"
return(format(as.POSIXct(x), format = format, tz = "GMT"))
}
if(missing(simplify))
if(is.null(simplify <- getOption("chron.simplify")))
simplify <- FALSE
dts <- format.dates(x, format[[1]], origin. = origin., simplify =
simplify)
tms <- format.times(x - floor(x), format[[2]], simplify = simplify)
x <- paste(enclosed[1], dts, sep, tms, enclosed[2], sep = "")
## output is a character object w.o class
att$class <- att$format <- att$origin <- NULL
attributes(x) <- att
x
}
"new.chron" <-
function(x, new.origin = c(1, 1, 1970),
shift = julian(new.origin[1], new.origin[2], new.origin[3],
c(0, 0, 0)))
{
cl <- class(x)
class(x) <- NULL # get rid of "delim" attribute
del <- attr(x, "delim")
attr(x, "delim") <- NULL # map formats
format <- attr(x, "format")
format[1] <- switch(format[1],
abb.usa = paste("m", "d", "y", sep = del[1]),
abb.world = paste("d", "m", "y", sep = del[1]),
abb.ansi = "ymd",
full.usa = "month day year",
full.world = "day month year",
full.ansi = "year month year",
format[1])
if(length(format) == 2)
format[2] <- switch(format[2],
military = "h:m:s",
format[2])
attr(x, "format") <- format
orig <- attr(x, "origin")
if(is.null(orig)) {
x <- x - shift
attr(x, "origin") <- new.origin
}
## (update origin after we assign the proper class!)
## deal with times as attributes
tms <- attr(x, "times")
if(!is.null(tms)) {
if(all(tms[!is.na(tms)] >= 1))
tms <- tms/(24 * 3600)
x <- x + tms
class(x) <- c("chron", "dates", "times")
}
else class(x) <- c("dates", "times")
x
}
print.chron <-
function(x, digits = NULL, quote = FALSE, prefix = "", sep = " ",
enclosed = c("(", ")"), simplify, ...)
{
if(!as.logical(length(x))) {
cat("chron(0)\n")
return(invisible(x))
}
if(missing(simplify) &&
is.null(simplify <- getOption("chron.simplify")))
simplify <- FALSE
xo <- x
x <- format.chron(x, sep = sep, enclosed = enclosed, simplify =
simplify)
print.default(x, quote = quote)
invisible(xo)
}
unique.chron <-
function(x, incomparables = FALSE, ...)
x[!duplicated(x, incomparables, ...)]
xtfrm.chron <-
function(x)
as.numeric(x)
pretty.chron <-
function(x, ...)
{
if(!inherits(x, "times"))
x <- chron(x)
x <- as.POSIXct(x)
attr(x, "tzone") <- "GMT"
ans <- pretty(x, ...)
structure(as.chron(ans), labels = attr(ans, "labels"))
}
来自ANTLR的错误输出:
line 11:12 no viable alternative at input 'if(missing(out.format)){\nif(is.character(format))\nout.format<-format\nelse\n'
line 8:27 missing {';', NL} at '{'
line 11:12 extraneous input '\n' expecting {'-', '+', '!', '~', 'function', '(', '{', 'if', 'for', 'while', 'repeat', '?', 'next', 'break', 'NULL', 'NA', 'Inf', 'NaN', 'TRUE', 'FALSE', HEX, INT, FLOAT, COMPLEX, STRING, ID}
line 93:12 no viable alternative at input 'if(length(times.)!=length(dates.)){\nif(length(times.)==1)\ntimes.<-rep.int(times.,length(dates.))\nelseif(length(dates.)==1)\ndates.<-rep.int(dates.,length(times.))\nelse\n'
line 96:4 extraneous input '}' expecting {<EOF>, '-', '+', '!', '~', 'function', '(', '{', 'if', 'for', 'while', 'repeat', '?', 'next', 'break', 'NULL', 'NA', 'Inf', 'NaN', 'TRUE', 'FALSE', HEX, INT, FLOAT, COMPLEX, STRING, ID, NL}
line 127:0 extraneous input '}' expecting {<EOF>, '-', '+', '!', '~', 'function', '(', '{', 'if', 'for', 'while', 'repeat', '?', 'next', 'break', 'NULL', 'NA', 'Inf', 'NaN', 'TRUE', 'FALSE', HEX, INT, FLOAT, COMPLEX, STRING, ID, NL}
line 148:20 no viable alternative at input 'if(is.character(x)){\nif(missing(format)||is.null(format)){\nout<-suppressWarnings(try(chron(x,...),silent=TRUE))\n## If this fails, try Date or datetime.\nif(inherits(out,"try-error")){\nxx<-sub("T"," ",x)\nout<-if(!any(grepl(" ",x,fixed=TRUE)))\nas.chron(as.Date(xx),...)\nelse\n'
line 140:25 missing {';', NL} at '{'
line 148:20 no viable alternative at input 'if(missing(format)||is.null(format)){\nout<-suppressWarnings(try(chron(x,...),silent=TRUE))\n## If this fails, try Date or datetime.\nif(inherits(out,"try-error")){\nxx<-sub("T"," ",x)\nout<-if(!any(grepl(" ",x,fixed=TRUE)))\nas.chron(as.Date(xx),...)\nelse\n'
line 148:20 no viable alternative at input 'if(inherits(out,"try-error")){\nxx<-sub("T"," ",x)\nout<-if(!any(grepl(" ",x,fixed=TRUE)))\nas.chron(as.Date(xx),...)\nelse\n'
line 148:20 extraneous input '\n' expecting {'-', '+', '!', '~', 'function', '(', '{', 'if', 'for', 'while', 'repeat', '?', 'next', 'break', 'NULL', 'NA', 'Inf', 'NaN', 'TRUE', 'FALSE', HEX, INT, FLOAT, COMPLEX, STRING, ID}
line 151:8 extraneous input '}' expecting {<EOF>, '-', '+', '!', '~', 'function', '(', '{', 'if', 'for', 'while', 'repeat', '?', 'next', 'break', 'NULL', 'NA', 'Inf', 'NaN', 'TRUE', 'FALSE', HEX, INT, FLOAT, COMPLEX, STRING, ID, NL}
line 156:4 extraneous input '}' expecting {<EOF>, '-', '+', '!', '~', 'function', '(', '{', 'if', 'for', 'while', 'repeat', '?', 'next', 'break', 'NULL', 'NA', 'Inf', 'NaN', 'TRUE', 'FALSE', HEX, INT, FLOAT, COMPLEX, STRING, ID, NL}
line 158:0 extraneous input '}' expecting {<EOF>, '-', '+', '!', '~', 'function', '(', '{', 'if', 'for', 'while', 'repeat', '?', 'next', 'break', 'NULL', 'NA', 'Inf', 'NaN', 'TRUE', 'FALSE', HEX, INT, FLOAT, COMPLEX, STRING, ID, NL}
line 190:8 extraneous input '\n' expecting {'-', '+', '!', '~', 'function', '(', '{', 'if', 'for', 'while', 'repeat', '?', 'next', 'break', 'NULL', 'NA', 'Inf', 'NaN', 'TRUE', 'FALSE', HEX, INT, FLOAT, COMPLEX, STRING, ID}