解析SQL语法,设计模式

时间:2012-04-30 07:26:07

标签: java sql design-patterns antlr abstract-syntax-tree

我正在尝试使用模拟sql语法来构建一个简单的sql,类似于键值存储的接口。 这些值基本上是POJO

一个例子是

select A.B.C from OBJ_POOL where A.B.X = 45 AND A.B.Y > '88' AND A.B.Z != 'abc';

OBJ_POOL只是同一类POJO的列表。在这个例子中,A将是基类。

Class A
    Class B
        String C
        Integer X
        String Y
        String Z

现在A.B.C等同于A.getB()。getC()

我使用Antlr来解析上面的语句来获取AST,然后hoping使用Apache BeanUtils来反射获取/设置字段名称。

我写了构建AST的语法 AST for the above statment 现在我面临两个问题

  1. 如何为where子句实现访问者? A.B.X = 45表示所有对象的字段X为45,如何进行过滤是否有任何好方法可以做到这一点?
  2. 有没有办法遍历生成的AST,而不会使用自定义逻辑(存储访问,属性getter / setter等)混淆访问者代码。
  3. 第二个问题更令人担忧,因为声明可能会有很多内容。

    简而言之,任何建议/链接/设计模式都能很好地解析sql select statment的一小部分,我们将不胜感激

    由于

1 个答案:

答案 0 :(得分:19)

你可以这样做,就像我在my blog posts中所展示的那样(因为我知道你读过这些,我不会详细介绍)。在这种情况下,唯一的区别是每个数据行都有自己的范围。传递此范围的一种简单方法是将其作为eval(...)方法的参数提供。

以下是如何实施此功能的快速演示。请注意,我根据我的博客文章快速攻击了这一点:并非所有功能都可用(请参阅许多TODO,并且其中也可能存在(小)错误。使用风险自负!) 。

除了ANTLR v3.3之外,此演示还需要以下3个文件:

Select.g

grammar Select;

options {
  output=AST;
}

tokens {
  // imaginary tokens
  ROOT;
  ATTR_LIST;
  UNARY_MINUS;

  // literal tokens
  Eq     = '=';
  NEq    = '!=';
  LT     = '<';
  LTEq   = '<=';
  GT     = '>';
  GTEq   = '>=';
  Minus  = '-';
  Not    = '!';
  Select = 'select';
  From   = 'from';
  Where  = 'where';
  And    = 'AND';
  Or     = 'OR';
}

parse
 : select_stat EOF -> ^(ROOT select_stat)
 ;

select_stat
 : Select attr_list From Id where_stat ';' -> ^(Select attr_list Id where_stat)
 ;

attr_list
 : Id (',' Id)* -> ^(ATTR_LIST Id+)
 ;

where_stat
 : Where expr -> expr
 |            -> ^(Eq Int["1"] Int["1"]) 
                 // no 'where', insert '1=1' which is always true
 ;

expr
 : or_expr
 ;

or_expr
 : and_expr (Or^ and_expr)*
 ;

and_expr
 : eq_expr (And^ eq_expr)*
 ;

eq_expr
 : rel_expr ((Eq | NEq)^ rel_expr)*
 ;

rel_expr
 : unary_expr ((LT | LTEq | GT | GTEq)^ unary_expr)?
 ;

unary_expr
 : Minus atom -> ^(UNARY_MINUS atom)
 | Not atom   -> ^(Not atom)
 | atom
 ;

atom
 : Str
 | Int
 | Id
 | '(' expr ')' -> expr
 ;

Id    : ('a'..'z' | 'A'..'Z' | '_') ('a'..'z' | 'A'..'Z' | '_' | Digit)*;
Str   : '\'' ('\'\'' | ~('\'' | '\r' | '\n'))* '\'' 
        {
         // strip the surrounding quotes and replace '' with '
         setText($text.substring(1, $text.length() - 1).replace("''", "'"));
        }
      ;
Int   : Digit+;
Space : (' ' | '\t' | '\r' | '\n') {skip();};

fragment Digit : '0'..'9';

SelectWalker.g

tree grammar SelectWalker;

options {
  tokenVocab=Select;
  ASTLabelType=CommonTree;
}

@header {
  import java.util.List;
  import java.util.Map;
  import java.util.Set;
}

@members {

  private Map<String, List<B>> dataPool;

  public SelectWalker(CommonTreeNodeStream nodes, Map<String, List<B>> data) {
    super(nodes);
    dataPool = data;
  }
}

query returns [List<List<Object>> result]
 : ^(ROOT select_stat) {$result = (List<List<Object>>)$select_stat.node.eval(null);}
 ;

select_stat returns [Node node]
 : ^(Select attr_list Id expr) 
    {$node = new SelectNode($attr_list.attributes, dataPool.get($Id.text), $expr.node);}
 ;

attr_list returns [List<String> attributes]
@init{$attributes = new ArrayList<String>();}
 : ^(ATTR_LIST (Id {$attributes.add($Id.text);})+)
 ;

expr returns [Node node]
 : ^(Or a=expr b=expr)   {$node = null; /* TODO */}
 | ^(And a=expr b=expr)  {$node = new AndNode($a.node, $b.node);}
 | ^(Eq a=expr b=expr)   {$node = new EqNode($a.node, $b.node);}
 | ^(NEq a=expr b=expr)  {$node = new NEqNode($a.node, $b.node);}
 | ^(LT a=expr b=expr)   {$node = null; /* TODO */}
 | ^(LTEq a=expr b=expr) {$node = null; /* TODO */}
 | ^(GT a=expr b=expr)   {$node = new GTNode($a.node, $b.node);}
 | ^(GTEq a=expr b=expr) {$node = null; /* TODO */}
 | ^(UNARY_MINUS a=expr) {$node = null; /* TODO */}
 | ^(Not a=expr)         {$node = null; /* TODO */}
 | Str                   {$node = new AtomNode($Str.text);}
 | Int                   {$node = new AtomNode(Integer.valueOf($Int.text));}
 | Id                    {$node = new IdNode($Id.text);}
 ;

Main.java

(是的,将所有这些Java类放在同一个文件中:Main.java

import org.antlr.runtime.*;
import org.antlr.runtime.tree.*;
import org.antlr.stringtemplate.*;
import java.util.*;

public class Main {

  static Map<String, List<B>> getData() {
    Map<String, List<B>> map = new HashMap<String, List<B>>();
    List<B> data = new ArrayList<B>();
    data.add(new B("id_1", 345, "89", "abd"));
    data.add(new B("id_2", 45, "89", "abd"));
    data.add(new B("id_3", 1, "89", "abd"));
    data.add(new B("id_4", 45, "8", "abd"));
    data.add(new B("id_5", 45, "89", "abc"));
    data.add(new B("id_6", 45, "99", "abC"));
    map.put("poolX", data);
    return map;
  }

  public static void main(String[] args) throws Exception {
    String src = "select C, Y from poolX where X = 45 AND Y > '88' AND Z != 'abc';";
    SelectLexer lexer = new SelectLexer(new ANTLRStringStream(src));
    SelectParser parser = new SelectParser(new CommonTokenStream(lexer));
    CommonTree tree = (CommonTree)parser.parse().getTree();  
    SelectWalker walker = new SelectWalker(new CommonTreeNodeStream(tree), getData());  
    List<List<Object>> result = walker.query();
    for(List<Object> row : result) {
      System.out.println(row);
    }
  }
}

class B {

  String C;
  Integer X;
  String Y;
  String Z;

  B(String c, Integer x, String y, String z) {
    C = c;
    X = x;
    Y = y;
    Z = z;
  }

  Object getAttribute(String attribute) {
    if(attribute.equals("C")) return C;
    if(attribute.equals("X")) return X;
    if(attribute.equals("Y")) return Y;
    if(attribute.equals("Z")) return Z;
    throw new RuntimeException("Unknown attribute: B." + attribute);
    // or use your Apache Bean-util API, or even reflection here instead of the above...
  }
}

interface Node {
  Object eval(B b);
}

class AtomNode implements Node {

  final Object value;

  AtomNode(Object v) {
    value = v;
  }

  public Object eval(B b) {
    return value;
  }
}

abstract class BinNode implements Node {

  final Node left;
  final Node right;

  BinNode(Node l, Node r) {
    left = l;
    right = r;
  }

  public abstract Object eval(B b);
}

class AndNode extends BinNode {

  AndNode(Node l, Node r) {
    super(l, r);
  }

  @Override
  public Object eval(B b) {
    return (Boolean)super.left.eval(b) && (Boolean)super.right.eval(b);
  }
}

class EqNode extends BinNode {

  EqNode(Node l, Node r) {
    super(l, r);
  }

  @Override
  public Object eval(B b) {
    return super.left.eval(b).equals(super.right.eval(b));
  }
}

class NEqNode extends BinNode {

  NEqNode(Node l, Node r) {
    super(l, r);
  }

  @Override
  public Object eval(B b) {
    return !super.left.eval(b).equals(super.right.eval(b));
  }
}

class GTNode extends BinNode {

  GTNode(Node l, Node r) {
    super(l, r);
  }

  @Override
  public Object eval(B b) {
    return ((Comparable)super.left.eval(b)).compareTo((Comparable)super.right.eval(b)) > 0;
  }
}

class IdNode implements Node {

  final String id;

  IdNode(String i) {
    id = i;
  }

  @Override
  public Object eval(B b) {
    return b.getAttribute(id);
  }
}

class SelectNode implements Node {

  final List<String> attributes;
  final List<B> data;
  final Node expression;

  SelectNode(List<String> a, List<B> d, Node e) {
    attributes = a;
    data = d;
    expression = e;
  }

  @Override
  public Object eval(B ignored) {
    List<List<Object>> result = new ArrayList<List<Object>>();
    for(B b : data) {
      if((Boolean)expression.eval(b)) {
        // 'b' passed, check which attributes to include
        List<Object> row = new ArrayList<Object>();
        for(String attr : attributes) {
          row.add(b.getAttribute(attr));
        }
        result.add(row);
      }
    }
    return result;
  }
}

如果您现在生成词法分析器,解析器和树步行器并运行Main类:

java -cp antlr-3.3.jar org.antlr.Tool Select.g 
java -cp antlr-3.3.jar org.antlr.Tool SelectWalker.g 
javac -cp antlr-3.3.jar *.java
java -cp .:antlr-3.3.jar Main

您将看到查询的输出:

select C, Y from poolX where X = 45 AND Y > '88' AND Z != 'abc';

输入:

C           X       Y       Z
"id_1"      345     "89"    "abd"
"id_2"      45      "89"    "abd"
"id_3"      1       "89"    "abd"
"id_4       45      "8"     "abd"
"id_5"      45      "89"    "abc"
"id_6"      45      "99"    "abC"

是:

[id_2, 89]
[id_6, 99]

请注意,如果省略where语句,则会自动插入表达式1 = 1,从而导致查询:

select C, Y from poolX;

打印以下内容:

[id_1, 89]
[id_2, 89]
[id_3, 89]
[id_4, 8]
[id_5, 89]
[id_6, 99]