开发人员创建了一个非常简单的程序:
var a = 6;
var b = 7
console.log(a * b);
我想确保开发人员使用分号,因为我不相信所有开发人员都知道所有ASI规则。由于我将添加其他代码质量检查,因此我想使用Esprima生成要检查的AST代码。当上面的简单程序使用Esprima online parser(使用"基于行和列的"选项)进行解析时,the following structure is created:
{
"loc": {
"start": {
"line": 1,
"column": 0
},
"end": {
"line": 3,
"column": 19
}
},
"type": "Program",
"body": [
{
"loc": {
"start": {
"line": 1,
"column": 0
},
"end": {
"line": 1,
"column": 10
}
},
"type": "VariableDeclaration",
"declarations": [
{
"loc": {
"start": {
"line": 1,
"column": 4
},
"end": {
"line": 1,
"column": 9
}
},
"type": "VariableDeclarator",
"id": {
"loc": {
"start": {
"line": 1,
"column": 4
},
"end": {
"line": 1,
"column": 5
}
},
"type": "Identifier",
"name": "a"
},
"init": {
"loc": {
"start": {
"line": 1,
"column": 8
},
"end": {
"line": 1,
"column": 9
}
},
"type": "Literal",
"value": 6,
"raw": "6"
}
}
],
"kind": "var"
},
{
"loc": {
"start": {
"line": 2,
"column": 0
},
"end": {
"line": 3,
"column": 0
}
},
"type": "VariableDeclaration",
"declarations": [
{
"loc": {
"start": {
"line": 2,
"column": 4
},
"end": {
"line": 2,
"column": 9
}
},
"type": "VariableDeclarator",
"id": {
"loc": {
"start": {
"line": 2,
"column": 4
},
"end": {
"line": 2,
"column": 5
}
},
"type": "Identifier",
"name": "b"
},
"init": {
"loc": {
"start": {
"line": 2,
"column": 8
},
"end": {
"line": 2,
"column": 9
}
},
"type": "Literal",
"value": 7,
"raw": "7"
}
}
],
"kind": "var"
},
{
"loc": {
"start": {
"line": 3,
"column": 0
},
"end": {
"line": 3,
"column": 19
}
},
"type": "ExpressionStatement",
"expression": {
"loc": {
"start": {
"line": 3,
"column": 0
},
"end": {
"line": 3,
"column": 18
}
},
"type": "CallExpression",
"callee": {
"loc": {
"start": {
"line": 3,
"column": 0
},
"end": {
"line": 3,
"column": 11
}
},
"type": "MemberExpression",
"computed": false,
"object": {
"loc": {
"start": {
"line": 3,
"column": 0
},
"end": {
"line": 3,
"column": 7
}
},
"type": "Identifier",
"name": "console"
},
"property": {
"loc": {
"start": {
"line": 3,
"column": 8
},
"end": {
"line": 3,
"column": 11
}
},
"type": "Identifier",
"name": "log"
}
},
"arguments": [
{
"loc": {
"start": {
"line": 3,
"column": 12
},
"end": {
"line": 3,
"column": 17
}
},
"type": "BinaryExpression",
"operator": "*",
"left": {
"loc": {
"start": {
"line": 3,
"column": 12
},
"end": {
"line": 3,
"column": 13
}
},
"type": "Identifier",
"name": "a"
},
"right": {
"loc": {
"start": {
"line": 3,
"column": 16
},
"end": {
"line": 3,
"column": 17
}
},
"type": "Identifier",
"name": "b"
}
}
]
}
}
]
}
我该如何检查是否使用了分号?我可以推断出第二行可能没有使用过,因为AST中的第二个VariableDeclaration
显示它在{line: 3, column: 0}
处结束,如下所示。
这是使用Esprima的其他工具吗?检查\r\n
与\n
行结尾怎么样? Esprima不适合完成这项任务吗?
我分享这个问题的一位同事告诉我,我可能需要解析树,"这样我就可以有一个令牌列表。这样就解决了我的部分问题。以下是Esprima提供的令牌:
[
{
"type": "Keyword",
"value": "var"
},
{
"type": "Identifier",
"value": "a"
},
{
"type": "Punctuator",
"value": "="
},
{
"type": "Numeric",
"value": "6"
},
{
"type": "Punctuator",
"value": ";"
},
{
"type": "Keyword",
"value": "var"
},
{
"type": "Identifier",
"value": "b"
},
{
"type": "Punctuator",
"value": "="
},
{
"type": "Numeric",
"value": "7"
},
{
"type": "Identifier",
"value": "console"
},
{
"type": "Punctuator",
"value": "."
},
{
"type": "Identifier",
"value": "log"
},
{
"type": "Punctuator",
"value": "("
},
{
"type": "Identifier",
"value": "a"
},
{
"type": "Punctuator",
"value": "*"
},
{
"type": "Identifier",
"value": "b"
},
{
"type": "Punctuator",
"value": ")"
},
{
"type": "Punctuator",
"value": ";"
}
]
现在我需要弄清楚如何将这个令牌列表与AST结合使用,告诉我在第2行应该有一个分号。
答案 0 :(得分:1)
要捕获JavaScript解释器赢得的逻辑或协议错误(例如总是用分号终止语句),您应该编写自己的状态机来模拟语法。以下是CoffeeScript + Node.js中为您提供的示例执行此操作的一种方法:
esprima = require 'esprima'
p_type = (is_valid) -> (token) -> is_valid(token.type)
p_value = (is_valid) -> (token) -> is_valid(token.value)
p_is = (target) -> (value) -> value is target
p_in = (targets...) -> (value) -> targets.indexOf(value) >= 0
p_tautology = () -> true
p_disjoin = (fs...) ->
switch fs.length
when 0
p_tautology
when 1
[f] = fs
(value) -> f(value)
when 2
[f, g] = fs
(value) -> f(value) || g(value)
else
[f, gs...] = fs
g = p_disjoin.apply(null, gs)
(value) -> f(value) || g(value)
p_conjoin = (fs...) ->
switch fs.length
when 0
p_tautology
when 1
[f] = fs
(value) -> f(value)
when 2
[f, g] = fs
(value) -> f(value) && g(value)
else
[f, gs...] = fs
g = p_conjoin.apply(null, gs)
(value) -> f(value) && g(value)
f_type = (token) -> token.type
f_value = (token) -> token.value
f_constant = (value) -> () -> value
f_identity = (x) -> x
f_token = (fn) -> (token) -> fn(token)
f_transition = (dispatch, transition) -> (token) -> transition[dispatch token]
f_default = (default_value, transition_fn) -> (token) -> transition_fn(token) || default_value
to_string = (value) ->
if value is null
'null'
else if value is `undefined`
'undefined'
else if typeof value is 'string'
'"' + value + '"'
else if typeof value.length is 'number' and value.length >= 0
elems = []
for e in value
elems.push to_string(e)
'[' + elems.join(', ') + ']'
else if typeof value is 'object'
if value.toString is Object::toString
attrs = []
for own k,v of value
attrs.push k + ': ' + to_string(v)
'{' + attrs.join(', ') + '}'
else
value.toString()
else
value.toString()
root =
is_valid: p_disjoin(
p_conjoin(p_type(p_is 'Keyword'), p_value(p_is 'var')),
p_type(p_is 'Identifier')
)
next_label: f_transition f_type, 'Keyword': 'variable_declaration', 'Identifier': 'identifier'
handle_error: (tokens, index) ->
if index > 0
[prev_token, curr_token] = tokens.slice(index - 1, index + 1)
{line, column} = prev_token.loc.end
process.stderr.write(
"[Error] line #{line}, column #{1 + column}: Expected variable "+
"declaration after #{to_string prev_token.value}, but received "+
"#{to_string curr_token.value}\n")
process.exit(1)
else
curr_token = tokens[index]
{line, column} = curr_token.loc.start
process.stderr.write(
"[Error] line #{line}, column #{1 + column}: Expected variable "+
"declaration but received #{to_string curr_token.value}\n")
process.exit(1)
transition:
identifier: () ->
is_valid: p_conjoin p_type(p_is 'Punctuator'), p_value(p_in '.')
next_label: f_transition f_value, '.': 'membership'
handle_error: (tokens, index) ->
[prev_token, curr_token] = tokens.slice(index - 1, index + 1)
{line, column} = prev_token.loc.end
process.stderr.write(
"[Error] line #{line}, column #{1 + column}: Expected '.' after "+
"#{to_string prev_token.value}, but received #{to_string curr_token.value}\n")
process.exit(1)
transition:
membership: () ->
is_valid: p_type(p_is 'Identifier')
next_label: f_constant 'invocation'
handle_error: (tokens, index) ->
[prev_token, curr_token] = tokens.slice(index - 1, index + 1)
{line, column} = prev_token.loc.end
process.stderr.write(
"[Error] line #{line}, column #{1 + column}: Expected an identifier "+
"after #{to_string prev_token.value}, but received "+
"#{to_string curr_token.value}\n")
process.exit(1)
transition:
invocation: () ->
is_valid: p_conjoin p_type(p_is 'Punctuator'), p_value(p_is '(')
next_label: f_constant 'identifier'
handle_error: (tokens, index) ->
[prev_token, curr_token] = tokens.slice(index - 1, index + 1)
{line, column} = prev_token.loc.end
process.stderr.write(
"[Error] line #{line}, column #{1 + column}: Expected '(' after "+
"#{to_string prev_token.value}, but received "+
"#{to_string curr_token.value}\n")
process.exit(1)
transition:
identifier: () ->
is_valid: p_type(p_in 'Identifier')
next_label: f_constant 'punctuator'
handle_error: (tokens, index) ->
[prev_token, curr_token] = tokens.slice(index - 1, index + 1)
{line, column} = prev_token.loc.end
process.stderr.write(
"[Error] line #{line}, column #{1 + column}: Expected "+
"an identifier after #{to_string prev_token.value}, "+
"but received #{to_string curr_token.value}\n")
process.exit(1)
transition:
punctuator: () ->
is_valid: p_conjoin p_type(p_is 'Punctuator'), p_value(p_in '*')
next_label: f_transition f_value, '*': 'identifier'
handle_error: (tokens, index) ->
[prev_token, curr_token] = tokens.slice(index - 1, index + 1)
{line, column} = prev_token.loc.end
process.stderr.write(
"[Error] line #{line}, column #{1 + column}: "+
"Expected a binary operator after "+
"#{to_string prev_token.value}, but received "+
"#{to_string curr_token.value}\n")
process.exit(1)
transition:
identifier: () ->
is_valid: p_conjoin p_type(p_is 'Identifier')
next_label: f_constant 'punctuator'
handle_error: (tokens, index) ->
[prev_token, curr_token] = tokens.slice(index - 1, index + 1)
{line, column} = prev_token.loc.end
process.stderr.write(
"[Error] line #{line}, column #{1 + column}: Expected "+
"an identifier after #{to_string prev_token.value}, "+
"but received #{to_string curr_token.value}\n")
process.exit(1)
transition:
punctuator: () ->
is_valid: p_conjoin p_type(p_is 'Punctuator'), p_value(p_is ')')
next_label: f_constant 'punctuator'
handle_error: (tokens, index) ->
[prev_token, curr_token] = tokens.slice(index - 1, index + 1)
{line, column} = prev_token.loc.end
process.stderr.write(
"[Error] line #{line}, column #{1 + column}: "+
"Expected ')' after #{to_string prev_token.value}, "+
"but received #{to_string curr_token.value}\n")
process.exit(1)
transition:
punctuator: () ->
is_valid: f_constant p_type(p_is 'Punctuator'), p_value(p_is ';')
next_label: f_transition f_value, ';': 'terminator'
handle_error: (tokens, index) ->
[prev_token, curr_token] = tokens.slice(index - 1, index + 1)
{line, column} = prev_token.loc.end
process.stderr.write(
"[Error] line #{line}, column #{1 + column}: "+
"Expected ';' after #{to_string prev_token.value}, "+
"but received #{to_string curr_token.value}\n")
process.exit(1)
transition:
terminator: () -> root
variable_declaration: () ->
is_valid: p_type(p_is 'Identifier')
next_label: f_constant 'punctuator'
handle_error: (tokens, index) ->
[prev_token, curr_token] = tokens.slice(index - 1, index + 1)
{line, column} = prev_token.loc.end
process.stderr.write(
"[Error] line #{line}, column #{1 + column}: Expected an identifier "+
"after #{to_string prev_token.value}, but received "+
"#{to_string curr_token.value}\n")
process.exit(1)
transition:
punctuator: () ->
is_valid: p_conjoin p_type(p_is 'Punctuator'), p_value(p_in '=', ',', ';')
next_label: f_token f_transition f_value, '=': 'assignment', ',': 'separator', ';': 'terminator'
handle_error: (tokens, index) ->
[prev_token, curr_token] = tokens.slice(index - 1, index + 1)
{line, column} = prev_token.loc.end
process.stderr.write(
"[Error] line #{line}, column #{1 + column}: Expected '=', ',', "+
"or ';' after #{to_string prev_token.value}, but received "+
"#{to_string curr_token.value}\n")
process.exit(1)
transition:
assignment: () ->
is_valid: p_type(p_in 'Boolean', 'Identifier', 'Null', 'Numeric', 'String', 'RegularExpression')
next_label: f_constant 'punctuator'
handle_error: (tokens, index) ->
[prev_token, curr_token] = tokens.slice(index - 1, index + 1)
{line, column} = prev_token.loc.end
process.stderr.write(
"[Error] line #{line}, column #{1 + column}: Expected a "+
"literal or an identifier after #{to_string prev_token.value}, "+
"but received #{to_string curr_token.value}\n")
process.exit(1)
transition:
punctuator: () ->
is_valid: p_conjoin p_type(p_is 'Punctuator'), p_value(p_in ',', ';', '.', '(', '{')
next_label: f_transition f_value, ',': 'identifier', ';': 'terminator'
handle_error: (tokens, index) ->
[prev_token, curr_token] = tokens.slice(index - 1, index + 1)
{line, column} = prev_token.loc.end
process.stderr.write(
"[Error] line #{line}, column: #{1 + column}: "+
"Expected ',' or ';' after #{to_string prev_token.value}, "+
"but received #{to_string curr_token.value}\n")
process.exit(1)
transition:
identifier: () -> root.transition.variable_declaration()
terminator: () -> root
separator: () -> root.transition.variable_declaration()
terminator: () -> root
lint = (tokens) ->
state = root
index = 0
prev_token = null
while index < tokens.length
token = tokens[index]
if state.is_valid(token)
state = state.transition[state.next_label token]()
else
state.handle_error(tokens, index)
prev_token = token
index += 1
text = '''
var a = 6;
var b = 7
console.log(a * b);
'''
tokens = esprima.tokenize(text, loc: true)
lint tokens