编译器类型推广

时间:2018-04-04 06:00:41

标签: casting compiler-construction type-promotion

在申请工作之前,我正在编写一个编译器作为练习,并且我自己工作但是在大学课程网站之后(因此我已经毕业了,这不是功课)。

目前,我不确定如何使用post-fix表示法解决类型转换。如果我有:

"1.0 plus 2 equals" + 1.0 + 2

1+2/(3-4)

然后在修复后的表示法中,我可以获得

STR[1,0]:"1.0 plus 2 equals"
DBL[1,22]:1.0
INT[1,28]:2
ADD[1,26]:+
ADD[1,20]:+

INT[0,0]:1
INT[0,2]:2
INT[0,5]:3
INT[0,7]:4
SUB[0,6]:-
DIV[0,3]:/
ADD[0,1]:+

我不确定如何评估第一个。目前我假设所有内容(错误地)都是双打并且已经获得此代码(在Compiler.java下):

static Object evaluate(List<Token> tokens)
{
    Stack<Double> stack = new Stack<>();

    for (Token token : tokens)
    {
        if (token.isEX())
            stack.push(Double.parseDouble(token.getLexeme()));
        else
        {
            Double a = stack.pop();
            Double b = stack.pop();

            switch (token.getType())
            {
                case ADD:
                    stack.push(b+a);
                    break;
                case SUB:
                    stack.push(b-a);
                    break;
                case MUL:
                    stack.push(b*a);
                    break;
                case DIV:
                    stack.push(b/a);
                    break;
                case MOD:
                    stack.push(b%a);
                    break;
            }
        }
    }
    return stack.pop();
}

我可以正确地将第二个评估为-1,但这有两个明显的问题,它在不需要时提升整数,并且它不适用于字符串连接。

直接回答会很好,但会有一些提示/网站或指南反而非常感激。

以下是用于理解此代码的文件(尽管它对许多其他自制编译器来说是通用的):

Compiler.java:

import java.util.List;
import java.util.Stack;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class Compiler
{
    /**
     * Converts code string to direct stack of tokens
     *
     * @param line source of code
     * @param lineNum line number
     * @return stack of tokens in line order
     */
    static Stack<Token> tokenize(String line, int lineNum)
    {
        Stack<Token> tokens = new Stack<>();

        Pattern p = Pattern.compile(Token.regex);
        Matcher m = p.matcher(line);

        while (m.find())
            tokens.push(new Token(m.group(), lineNum, m.start()));

        return tokens;
    }

    /**
     * Formats stack of tokens to postfix notation (Shunting-Yard Algorithm)
     *
     * @param tokens to be added.
     * @return stack tokens in postfix notation
     */
    static Stack<Token> format(Stack<Token> tokens)
    {
        Stack<Token> out = new Stack<>();
        Stack<Token> tmp = new Stack<>();

        for (Token token : tokens)
        {
            Token.Type type = token.getType();

            if (type.rank > 0)  // if token is operation
            {
                while (!tmp.isEmpty() && tmp.peek().getType().rank > type.rank)
                    out.add(tmp.pop());
                tmp.push(token);
            }
            else if (type == Token.Type.LPR)
                tmp.push(token);
            else if (type == Token.Type.RPR)
            {
                while (tmp.peek().getType() != Token.Type.LPR)
                    out.add(tmp.pop());
                tmp.pop();
            }
            else
                out.add(token);
        }

        while (!tmp.isEmpty())
            out.add(tmp.pop());

        return out;
    }

    static Object evaluate(List<Token> tokens)
    {
        Stack<Double> stack = new Stack<>();

        for (Token token : tokens)
        {
            if (token.isEX())
                stack.push(Double.parseDouble(token.getLexeme()));
            else
            {
                // TODO: don't just assume it's double here?
                Double a = stack.pop();
                Double b = stack.pop();

                // TODO: only allow add if either token is a string
                switch (token.getType())
                {
                    case ADD:
                        stack.push(b+a);
                        break;
                    case SUB:
                        stack.push(b-a);
                        break;
                    case MUL:
                        stack.push(b*a);
                        break;
                    case DIV:
                        stack.push(b/a);
                        break;
                    case MOD:
                        stack.push(b%a);
                        break;
                }
            }
        }
        return stack.pop();
    }
}

Token.java:

public class Token
{
    enum Type
    {
        LSH(3, "<<"),       // Left shift
        RSH(3, ">>"),       // Right shift
        MUL(2, "\\*"),      // Multiply
        DIV(2, "/"),        // Divide
        MOD(2, "/"),        // Modulo
        ADD(1, "\\+"),      // Add
        SUB(1, "-"),        // Subtract

        DBL("\\d+\\.\\d*"), // Double
        INT("\\d+"),        // Integer
        STR("\".*\""),      // String
        LPR("\\("),         // Left parenthesis
        RPR("\\)"),         // Right parenthesis
        ;
        //REF(0, "\\w+(\\.\\w+)+"),

        final String symbol;
        final int rank;

        Type(String symbol)
        {
            this.symbol = symbol;
            this.rank = 0;
        }

        Type(int rank, String symbol)
        {
            this.symbol = symbol;
            this.rank = rank;
        }
    }

    /**
     * Regular expression for expressions and operations
     */
    final static String regex;

    static
    {
        StringBuilder sb = new StringBuilder();
        Type[] t = Type.values();
        for (int i = 0; i < t.length; i++)
        {
            if (i != 0)
                sb.append('|');
            sb.append(t[i].symbol);
        }
        regex = sb.toString();
    }

    /**
     * Text literal
     */
    private String lexeme;

    /**
     * Expression/operation type
     */
    private Type type;

    /**
     * line number
     */
    private int lineNum;

    /**
     * character number
     */
    private int posNum;

    /**
     * Creates a token with debugging information
     *
     * @param lexeme Text literal
     * @param lineNum Line number
     * @param posNum Character number
     * @return Token with processed type of lexeme
     */
    Token(String lexeme, int lineNum, int posNum)
    {
        this.lexeme = lexeme;
        for (Type type : Type.values())
            if (lexeme.matches(type.symbol))
            {
                this.type = type;
                break;
            }
        this.lineNum = lineNum;
        this.posNum = posNum;
    }

    boolean isEX()
    {
        return type.rank == 0;
    }

    boolean isOP()
    {
        return type.rank > 0;
    }

    String getLexeme()
    {
        return lexeme;
    }

    Type getType()
    {
        return type;
    }

    public int getLineNum()
    {
        return lineNum;
    }

    public int getPosNum()
    {
        return posNum;
    }

    @Override
    public String toString()
    {
        return String.format("%s[%d,%d]:%s", type, lineNum, posNum, lexeme);
    }
}

Driver.java(对于一些简单的测试用例):

import java.util.Stack;

public class Driver
{
    public static void main(String[] args)
    {
        String simpleTest = "1+2/(3-4)";
        Stack<Token> tokens;
        tokens = Compiler.format(Compiler.tokenize(simpleTest, 0));
        System.out.println("Simple Test");
        tokens.forEach(System.out::println);
        System.out.println(Compiler.evaluate(tokens));

        String hardTest = "\"1.0 plus 2 equals\" + 1.0 + 2";
        tokens = Compiler.format(Compiler.tokenize(hardTest, 1));
        System.out.println("\nHard Test");
        tokens.forEach(System.out::println);
    }
}

0 个答案:

没有答案