将Java`Phank.isJavaIdentifierStart`移植到javascript / typescript

时间:2017-03-28 01:17:26

标签: javascript java typescript

我正在努力从java移植ANTLR4语法 - >打字稿。

语法的一部分使用java特有的功能:

JavaLetter
    :   [a-zA-Z$_] // these are the "java letters" below 0xFF
    |   // covers all characters above 0xFF which are not a surrogate
        ~[\u0000-\u00FF\uD800-\uDBFF]
        {Character.isJavaIdentifierStart(_input.LA(-1))}?
    ;

我需要将Character.isJavaIdentifierStart方法移植到javascript / typescript。

最简单的移植方法是什么?我查看source implementation并提供直接端口似乎很多工作(甚至不确定它在技术上是否合法?)。

我认为简单的正则表达式可以提供相同的功能(可能会牺牲性能)?

1 个答案:

答案 0 :(得分:0)

可以只是转储字符集,然后将其放入合适的搜索结构中以进行快速查找(搜索树)。

这是转储程序:

import java.util.*;

/*
 * Print numeric ranges [LOW,HIGH] for the characters for which Java says that:
 * Character.isJavaIdentifierStart(i) is true ... if "--start" is passed to main.
 * Character.isJavaIdentifierPart(i)  is true ... if "--part"  is passed to main.
 * Those that are "part" but not "start" i.e. those that cannot be used as first chars if "--notstart"  is passed to main.
 */

class CharRangePrinter {

   // The "Range" expresses that a value may be
   // between [low,high]: low <= x <= high.
   // A range of length 1 is of course [limit,limit].

   private static class Range {

      final int low,high;

      public Range(int low,int high) {
         assert(low<=high);
         this.low = low;
         this.high = high;
      }

      public int length() {
         return high-low+1;
      }

      public String toString() {
         int length = high-low+1;
         return "[" + low + "," + high + "]"; 
      }
   }

   // simple option handling

   private final static String want_start_str    = "--start";
   private final static String want_part_str     = "--part";
   private final static String want_nostart_str  = "--nostart";

   // printing; 3 whitespaces at the start of each line
   // and a line contains max 78 characters (excluding the \n)

   private final static int indents     = 3;
   private final static int width_limit = 78;

   private static void printRanges(List<Range> ranges) {
      StringBuilder buf = new StringBuilder();
      for (int i=0;i<ranges.size();i++) {
         if (i>0) { buf.append(","); }
         String hold = buf.toString();
         Range range = ranges.get(i);
         buf.append(range);
         if (buf.length()+indents>width_limit) {
            outLine(hold);
            buf=new StringBuilder();
            buf.append(range);
         }
      }
      outLine(buf.toString());
   }

   private static void outLine(String x) {
      for (int i=0;i<indents;i++) { out(" "); }
      out(x);
      out("\n");
   }

   private static void out(String x) {
      System.out.print(x);
   }

   private static enum WantWhat {start,part,nostart};

   private static WantWhat handleArgv(String[] argv) {
      boolean good_argv = true;
      WantWhat want = null;
      if (argv.length!=1) {
         good_argv = false;
      }
      if (good_argv) {   
         String arg = argv[0];
         if (want_start_str.equals(arg)) {
            want = WantWhat.start;   
         }
         if (want_part_str.equals(arg)) {
            want = WantWhat.part;
         }
         if (want_nostart_str.equals(arg)) {
            want = WantWhat.nostart;
         }
         good_argv = (want != null);
      }
      if (!good_argv) {
         System.err.println("Pass " + want_start_str + " to print codepoints for Character.isJavaIdentifierStart()");
         System.err.println("Pass " + want_part_str  + " to print codepoints for Character.isJavaIdentifierPart()");
         System.err.println("Pass " + want_nostart_str  + " to print codepoints that can be 'part' but cannot be 'start'");
         System.exit(1);
      }      
      return want;
   }

   public static void main(String[] argv) {

      WantWhat want = handleArgv(argv); assert(want!=null);

      int start=0;
      int end=-1;

      List<Range> ranges = new LinkedList<Range>();

      for (int i=0;i<0xFFFF;i++) {
         boolean is;
         switch (want) {
         case start:
            is = Character.isJavaIdentifierStart(i);
            break;
         case part:
            is = Character.isJavaIdentifierPart(i);
            break;
         case nostart:
            is = Character.isJavaIdentifierPart(i) && !Character.isJavaIdentifierStart(i);
            break;
         default:
            throw new IllegalStateException("WTF");
         }
         if (is) {
            if (end < 0) {
               // not currently doing a run; start run
               start = i;
               end   = i;
            }  
            else {
               // extend current run 
               end = i;
            }
         }
         else {
            if (end < 0) {
               // not currently doing a run; do nothing
            }
            else {
               // close current run
               int length = end-start+1; 
               ranges.add(new Range(start,end)); 
               end=-1;
            }
         }
      }
      printRanges(ranges);
   }
}