clang + llvm为JIT编译函数提供了一个错误的函数指针

时间:2015-05-10 16:57:15

标签: c++ clang llvm jit

我正在尝试使用clang + llvm 3.6来编译几个C函数(每个函数最终都可以非常大)。

不幸的是,LLVM提供的函数指针使程序成为SEGFAULT。

到目前为止,我有以下代码:

#include <iostream>

#include <clang/CodeGen/CodeGenAction.h>
#include <clang/Basic/DiagnosticOptions.h>
#include <clang/Basic/TargetInfo.h>
#include <clang/Basic/SourceManager.h>
#include <clang/Frontend/CompilerInstance.h>
#include <clang/Frontend/CompilerInvocation.h>
#include <clang/Frontend/FrontendDiagnostic.h>
#include <clang/Frontend/TextDiagnosticPrinter.h>
#include <clang/Frontend/Utils.h>
#include <clang/Parse/ParseAST.h>
#include <clang/Lex/Preprocessor.h>

#include <llvm/Analysis/Passes.h>
#include <llvm/ExecutionEngine/SectionMemoryManager.h>
#include <llvm/ExecutionEngine/MCJIT.h>
#include <llvm/ExecutionEngine/ExecutionEngine.h>
#include <llvm/IR/Verifier.h>
#include <llvm/IR/Module.h>
#include <llvm/IR/LLVMContext.h>
#include <llvm/IR/LegacyPassManager.h>
#include <llvm/Bitcode/ReaderWriter.h>
#include <llvm/Support/ManagedStatic.h>
#include <llvm/Support/MemoryBuffer.h>
#include <llvm/Support/TargetSelect.h>
#include <llvm/Support/raw_os_ostream.h>
#include <llvm/Linker/Linker.h>

int main(int argc, char *argv[]) {

    using namespace llvm;
    using namespace clang;

    static const char* clangArgv [] = {"program", "-x", "c", "string-input"};
    static const int clangArgc = sizeof (clangArgv) / sizeof (clangArgv[0]);

    // C functions to be compiled (they could eventually be extremely large)
    std::map<std::string, std::string> func2Source;
    func2Source["getOne"] = "int getOne() {return 1;}";
    func2Source["getTwo"] = "int getTwo() {return 2;}";

    llvm::InitializeAllTargets();
    llvm::InitializeAllAsmPrinters();

    std::unique_ptr<llvm::Linker> linker;
    std::unique_ptr<llvm::LLVMContext> context(new llvm::LLVMContext());
    std::unique_ptr<llvm::Module> module;

    /**
    * add each C function to the same module
    */
    for (const auto& p : func2Source) {
        const std::string& source = p.second;

        IntrusiveRefCntPtr<DiagnosticOptions> diagOpts = new DiagnosticOptions();
        TextDiagnosticPrinter *diagClient = new TextDiagnosticPrinter(llvm::errs(), &*diagOpts); // will be owned by diags
        IntrusiveRefCntPtr<DiagnosticIDs> diagID(new DiagnosticIDs());
        IntrusiveRefCntPtr<DiagnosticsEngine> diags(new DiagnosticsEngine(diagID, &*diagOpts, diagClient));

        ArrayRef<const char *> args(clangArgv + 1, // skip program name
                                    clangArgc - 1);
        std::unique_ptr<CompilerInvocation> invocation(createInvocationFromCommandLine(args, diags));
        if (invocation.get() == nullptr) {
            std::cerr << "Failed to create compiler invocation" << std::endl;
            exit(1);
        }

        CompilerInvocation::setLangDefaults(*invocation->getLangOpts(), IK_C,
                                            LangStandard::lang_unspecified);
        invocation->getFrontendOpts().DisableFree = false; // make sure we free memory (by default it does not)

        // Create a compiler instance to handle the actual work.
        CompilerInstance compiler;
        compiler.setInvocation(invocation.release());

        // Create the compilers actual diagnostics engine.
        compiler.createDiagnostics(); //compiler.createDiagnostics(argc, const_cast<char**> (argv));
        if (!compiler.hasDiagnostics()) {
            std::cerr << "No diagnostics" << std::endl;
            exit(1);
        }

        // Create memory buffer with source text
        std::unique_ptr<llvm::MemoryBuffer> buffer = llvm::MemoryBuffer::getMemBufferCopy(source, "SIMPLE_BUFFER");
        if (buffer.get() == nullptr) {
            std::cerr << "Failed to create memory buffer" << std::endl;
            exit(1);
        }

        // Remap auxiliary name "string-input" to memory buffer
        PreprocessorOptions& po = compiler.getInvocation().getPreprocessorOpts();
        po.addRemappedFile("string-input", buffer.release());

        // Create and execute the frontend to generate an LLVM bitcode module.
        clang::EmitLLVMOnlyAction action(context.get());
        if (!compiler.ExecuteAction(action)) {
            std::cerr << "Failed to emit LLVM bitcode" << std::endl;
            exit(1);
        }

        std::unique_ptr<llvm::Module> module1 = action.takeModule();
        if (module1.get() == nullptr) {
            std::cerr << "No module" << std::endl;
            exit(1);
        }

        if (linker.get() == nullptr) {
            module.reset(module1.release());
            linker.reset(new llvm::Linker(module.get()));
        } else {
            if (linker->linkInModule(module1.release())) {
                std::cerr << "LLVM failed to link module" << std::endl;
                exit(1);
            }
        }
    }

    llvm::InitializeNativeTarget();


    llvm::Module* m = module.get();

    std::string errStr;
    std::unique_ptr<llvm::ExecutionEngine> executionEngine(EngineBuilder(std::move(module))
                                                        .setErrorStr(&errStr)
                                                        .setEngineKind(EngineKind::JIT)
                                                        .setMCJITMemoryManager(std::unique_ptr<SectionMemoryManager>(new SectionMemoryManager()))
                                                        .setVerifyModules(true)
                                                        .create());
    if (!executionEngine.get()) {
        std::cerr << "Could not create ExecutionEngine: " + errStr << std::endl;
        exit(1);
    }

    executionEngine->finalizeObject();

    /**
    * Lets try to use each function
    */
    for (const auto& p : func2Source) {
        const std::string& funcName = p.first;

        llvm::Function* func = m->getFunction(funcName);
        if (func == nullptr) {
            std::cerr << "Unable to find function '" << funcName << "' in LLVM module" << std::endl;
            exit(1);
        }

        // Validate the generated code, checking for consistency.
        llvm::raw_os_ostream os(std::cerr);
        bool failed = llvm::verifyFunction(*func, &os);
        if (failed) {
            std::cerr << "Failed to verify function '" << funcName << "' in LLVM module" << std::endl;
            exit(1);
        }

#if 1
        func->dump(); // Dump the function for exposition purposes.

        // JIT the function, returning a function pointer.
        void *fPtr = executionEngine->getPointerToFunction(func); ///// BAD function pointer!!!!

        // Cast it to the right type (takes no arguments, returns a double) so we
        // can call it as a native function.
        int (*funcPtr)();
        *(int **) (&funcPtr) = *(int **) fPtr;
        int v = (*funcPtr)();

        std::cout << "return: " << v << std::endl;

#else // THIS DOES NOT WORK EITHER:
        // JIT the function, returning a function pointer.
        uint64_t fPtr = executionEngine->getFunctionAddress(funcName); ///// BAD function pointer!!!!
        if (fPtr == 0) {
            std::cerr << "Unable to find function '" << funcName << "' in LLVM module" << std::endl;
            exit(1);
        }

        int (*funcPtr)();
        *(int **) (&funcPtr) = *(int **) fPtr;
        int v = (*funcPtr)();

        std::cout << "return: " << v << std::endl;
#endif
    }
}

任何人都可以帮我解决这个问题吗?

(我在linux-ubuntu 15.04中运行它)

1 个答案:

答案 0 :(得分:7)

这项任务非常混乱:

*(int **) (&funcPtr) = *(int **) fPtr;

它不仅违反严格别名来编写int*,然后将其用作下一行的函数指针,但数据指针通常不足以容纳整个代码指针。

安全方法是

memcpy(funcPtr, fPtr, sizeof funcPtr);

funcPtr = reinterpret_cast<decltype(funcPtr)>(fPtr);