Question

我正在尝试使用C / C ++构建一个快速的JIT。它应该将我的字节码转换为IA32。是的，我知道libjit和类似的，但我相信它们并不比这简单。我以为我找到了一种更快的方法来构建指令，但我错了 - 传统的开关/案例方法比我的快2倍。我的方法是复制整个块然后填写模板。我知道现代编译器上的switch / case会创建一个跳转表，所以我没有跳转表实现。

我使用了一个50mb的文件，其中包含循环到基准的代码[]的内容。我的配置是：i7,8gb ram，使用VS2010编译。

这是我的代码：

#include <stdio.h>
#include <stdlib.h>
#include <memory.h>
#include <time.h>

//unsigned char code[] = {1,2,3,4,4};

void mycopy(unsigned char* to, unsigned char* from, int size) {
    int i = 0;
    while(i < size) {
        /*if (i < size-3) {
            if (*(unsigned int*) &from[i] == 0xFFFFFFFF) {
                *(unsigned int*) &to[i] = 0xC3C3C3C3;
                i += 4;
                continue;
            }
        }*/

        to[i] = from[i];
        i++;
    } 
}

void translateA(unsigned char* code, unsigned char* output, int size) {

    unsigned char A[] = { 3, 1, 1, 1 }; // { size, <bytes...> }
    unsigned char B[] = { 2, 2, 2 };
    unsigned char C[] = { 8, 3, 3, 3, 3, 0xFF, 0xFF, 0xFF, 0xFF };
    unsigned char D[] = { 1, 4 };

    void* templat[] = { &A, &B, &C, &D };

    int i = 0;
    int total = 0;
    while(i < size) {
        int op_index = (int) code[i] - 1;
        unsigned char* instr_buffer = (unsigned char*) templat[op_index];
        int size = (int) instr_buffer[0];
        instr_buffer++;
        mycopy(output+total, instr_buffer,size);
        total += size;
        i++;
    }
}




void translateB(unsigned char* code, unsigned char* output, int size) {
    for(int i = 0; i < size; i++) {
        switch(code[i]) {
            case 1:
                output[0] = 1;
                output[1] = 1;
                output[2] = 1;
                output += 3;
                break;
            case 2:
                output[0] = 2;
                output[1] = 2;
                output += 2;
                break;
            case 3: 
                output[0] = 3;
                output[1] = 3;
                output[2] = 3;
                output[3] = 3;
                output[4] = 0xC3;
                output[5] = 0xC3;
                output[6] = 0xC3;
                output[7] = 0xC3;               
                output += 8;            
                break;
            case 4:
                output[0] = 4;
                output++;
                break;
        }
    }
}

int main(int argc, char* argv[]) {
    // load the 'code' to an array
    FILE* f = fopen("testops.bin", "r+");

    fseek(f, 0, SEEK_END);
    long fsize = ftell(f);
    fseek(f, 0, SEEK_SET);

    unsigned char* code = (unsigned char*) malloc(fsize);
    fread(code, fsize, 1, f);
    fclose(f);

    unsigned char* output = (unsigned char*) malloc(fsize*10);
    memset(output, 0x7A, fsize*10);

    // benchmark it
    time_t start = clock();

    // Replace with translateB. It's ~2x faster
    translateA(code, output, fsize);

    printf("\nelapsed %fs\n\n", (float) (clock()-start) / 1000); 

    printf("OUTPUT: ");
    for(int i=0;i<1024;i++) {
        if (output[i] == 0x7A) break;
        printf("%X", output[i]); 
    }

    printf("\n");

    system("PAUSE");
    return 0;
}

编辑：我的问题是，为什么切换代码更快？有什么我做错了吗？

Answer 1

构建然后反汇编您的程序，您将得到您的回复。

由于编译器优化，切换案例更有效，因为它会像你已经说过的那样创建一个跳转表。

您的方法进行更多的地址查找，并且每个字节码还有一个函数调用和循环。

此外，为什么使用while来表示你知道的迭代次数的循环？编译器可能对for循环进行了额外的优化。

有4个案例切换，我不确定你的编译器会创建一个跳转表，所以真实的例子可能会更快。

简单的字节码转换器

1 个答案: