是否有任何库可以解析JVM字节码并为每条指令提供偏移量?

时间:2013-12-19 23:53:07

标签: java parsing bytecode

假设你有一个这样的类:

package org.example.foo;
public class Foo {
    private int y;

    // more code here

    public int foo(int x) {
        return x + y;
    }
}

在我的头顶,foo方法应该看起来或多或少像这样:

ILOAD_1
ALOAD_0
GETFIELD org/example/foo/Foo I;
IADD
IRETURN

是否有任何库允许我将.class文件解析为这些指令并让我知道.class文件中的哪个偏移量可以找到每个指令?我已经使用ObjectWeb的ASM来解析和生成.class文件,但它并没有提供开箱即用的信息。

2 个答案:

答案 0 :(得分:1)

我决定出于好奇心检查ASM来源。这是读取和访问指令的循环。偏移量存储在局部变量中,但由于某种原因不会传递给访问者。无论如何,如果你想使用ASM,似乎唯一的选择是自己修改代码或使用丑陋的反射黑客。如果您想修改ASM,只需在此处更改所有vist*Insn次调用以传入offset变量。

u = codeStart;
while (u < codeEnd) {
    int offset = u - codeStart;

    // visits the label and line number for this offset, if any
    Label l = labels[offset];
    if (l != null) {
        mv.visitLabel(l);
        if ((context.flags & SKIP_DEBUG) == 0 && l.line > 0) {
            mv.visitLineNumber(l.line, l);
        }
    }

    // visits the frame for this offset, if any
    while (FRAMES && frame != null
            && (frame.offset == offset || frame.offset == -1)) {
        // if there is a frame for this offset, makes the visitor visit
        // it, and reads the next frame if there is one.
        if (frame.offset != -1) {
            if (!zip || unzip) {
                mv.visitFrame(Opcodes.F_NEW, frame.localCount,
                        frame.local, frame.stackCount, frame.stack);
            } else {
                mv.visitFrame(frame.mode, frame.localDiff, frame.local,
                        frame.stackCount, frame.stack);
            }
        }
        if (frameCount > 0) {
            stackMap = readFrame(stackMap, zip, unzip, frame);
            --frameCount;
        } else {
            frame = null;
        }
    }

    // visits the instruction at this offset
    int opcode = b[u] & 0xFF;
    switch (ClassWriter.TYPE[opcode]) {
    case ClassWriter.NOARG_INSN:
        mv.visitInsn(opcode);
        u += 1;
        break;
    case ClassWriter.IMPLVAR_INSN:
        if (opcode > Opcodes.ISTORE) {
            opcode -= 59; // ISTORE_0
            mv.visitVarInsn(Opcodes.ISTORE + (opcode >> 2),
                    opcode & 0x3);
        } else {
            opcode -= 26; // ILOAD_0
            mv.visitVarInsn(Opcodes.ILOAD + (opcode >> 2), opcode & 0x3);
        }
        u += 1;
        break;
    case ClassWriter.LABEL_INSN:
        mv.visitJumpInsn(opcode, labels[offset + readShort(u + 1)]);
        u += 3;
        break;
    case ClassWriter.LABELW_INSN:
        mv.visitJumpInsn(opcode - 33, labels[offset + readInt(u + 1)]);
        u += 5;
        break;
    case ClassWriter.WIDE_INSN:
        opcode = b[u + 1] & 0xFF;
        if (opcode == Opcodes.IINC) {
            mv.visitIincInsn(readUnsignedShort(u + 2), readShort(u + 4));
            u += 6;
        } else {
            mv.visitVarInsn(opcode, readUnsignedShort(u + 2));
            u += 4;
        }
        break;
    case ClassWriter.TABL_INSN: {
        // skips 0 to 3 padding bytes
        u = u + 4 - (offset & 3);
        // reads instruction
        int label = offset + readInt(u);
        int min = readInt(u + 4);
        int max = readInt(u + 8);
        Label[] table = new Label[max - min + 1];
        u += 12;
        for (int i = 0; i < table.length; ++i) {
            table[i] = labels[offset + readInt(u)];
            u += 4;
        }
        mv.visitTableSwitchInsn(min, max, labels[label], table);
        break;
    }
    case ClassWriter.LOOK_INSN: {
        // skips 0 to 3 padding bytes
        u = u + 4 - (offset & 3);
        // reads instruction
        int label = offset + readInt(u);
        int len = readInt(u + 4);
        int[] keys = new int[len];
        Label[] values = new Label[len];
        u += 8;
        for (int i = 0; i < len; ++i) {
            keys[i] = readInt(u);
            values[i] = labels[offset + readInt(u + 4)];
            u += 8;
        }
        mv.visitLookupSwitchInsn(labels[label], keys, values);
        break;
    }
    case ClassWriter.VAR_INSN:
        mv.visitVarInsn(opcode, b[u + 1] & 0xFF);
        u += 2;
        break;
    case ClassWriter.SBYTE_INSN:
        mv.visitIntInsn(opcode, b[u + 1]);
        u += 2;
        break;
    case ClassWriter.SHORT_INSN:
        mv.visitIntInsn(opcode, readShort(u + 1));
        u += 3;
        break;
    case ClassWriter.LDC_INSN:
        mv.visitLdcInsn(readConst(b[u + 1] & 0xFF, c));
        u += 2;
        break;
    case ClassWriter.LDCW_INSN:
        mv.visitLdcInsn(readConst(readUnsignedShort(u + 1), c));
        u += 3;
        break;
    case ClassWriter.FIELDORMETH_INSN:
    case ClassWriter.ITFMETH_INSN: {
        int cpIndex = items[readUnsignedShort(u + 1)];
        boolean itf = b[cpIndex - 1] == ClassWriter.IMETH;
        String iowner = readClass(cpIndex, c);
        cpIndex = items[readUnsignedShort(cpIndex + 2)];
        String iname = readUTF8(cpIndex, c);
        String idesc = readUTF8(cpIndex + 2, c);
        if (opcode < Opcodes.INVOKEVIRTUAL) {
            mv.visitFieldInsn(opcode, iowner, iname, idesc);
        } else {
            mv.visitMethodInsn(opcode, iowner, iname, idesc, itf);
        }
        if (opcode == Opcodes.INVOKEINTERFACE) {
            u += 5;
        } else {
            u += 3;
        }
        break;
    }
    case ClassWriter.INDYMETH_INSN: {
        int cpIndex = items[readUnsignedShort(u + 1)];
        int bsmIndex = context.bootstrapMethods[readUnsignedShort(cpIndex)];
        Handle bsm = (Handle) readConst(readUnsignedShort(bsmIndex), c);
        int bsmArgCount = readUnsignedShort(bsmIndex + 2);
        Object[] bsmArgs = new Object[bsmArgCount];
        bsmIndex += 4;
        for (int i = 0; i < bsmArgCount; i++) {
            bsmArgs[i] = readConst(readUnsignedShort(bsmIndex), c);
            bsmIndex += 2;
        }
        cpIndex = items[readUnsignedShort(cpIndex + 2)];
        String iname = readUTF8(cpIndex, c);
        String idesc = readUTF8(cpIndex + 2, c);
        mv.visitInvokeDynamicInsn(iname, idesc, bsm, bsmArgs);
        u += 5;
        break;
    }
    case ClassWriter.TYPE_INSN:
        mv.visitTypeInsn(opcode, readClass(u + 1, c));
        u += 3;
        break;
    case ClassWriter.IINC_INSN:
        mv.visitIincInsn(b[u + 1] & 0xFF, b[u + 2]);
        u += 3;
        break;
    // case MANA_INSN:
    default:
        mv.visitMultiANewArrayInsn(readClass(u + 1, c), b[u + 3] & 0xFF);
        u += 4;
        break;
    }

    // visit the instruction annotations, if any
    while (tanns != null && tann < tanns.length && ntoff <= offset) {
        if (ntoff == offset) {
            int v = readAnnotationTarget(context, tanns[tann]);
            readAnnotationValues(v + 2, c, true,
                    mv.visitInsnAnnotation(context.typeRef,
                            context.typePath, readUTF8(v, c), true));
        }
        ntoff = ++tann >= tanns.length || readByte(tanns[tann]) < 0x43 ? -1
                : readUnsignedShort(tanns[tann] + 1);
    }
    while (itanns != null && itann < itanns.length && nitoff <= offset) {
        if (nitoff == offset) {
            int v = readAnnotationTarget(context, itanns[itann]);
            readAnnotationValues(v + 2, c, true,
                    mv.visitInsnAnnotation(context.typeRef,
                            context.typePath, readUTF8(v, c), false));
        }
        nitoff = ++itann >= itanns.length
                || readByte(itanns[itann]) < 0x43 ? -1
                : readUnsignedShort(itanns[itann] + 1);
    }
}

答案 1 :(得分:0)

您可以使用java.lang.instrument来获取类文件的字节,并使用ASM来解析它。还有其他库,如JavaAssist,可以帮助您在更高的抽象级别上执行此操作。 ASM是一个裸机字节码检测库。

您声称已使用过ASM - 但它并未提供“开箱即用的此信息”。那是什么意思?