在x86-64 / Windows

时间:2018-05-29 02:33:33

标签: c windows assembly x86-64

我正在为x86-64实现自己的光纤库。部分原因是缺乏跨平台的标准上下文切换(GCC / Linux具有makecontext,其中void * s为varargs,而Windows的光纤API为1 void * arg)以及API设计和实现中的学习练习。在我的API中,一个协程函数有两个参数:一个协程上下文和一个void *参数,所以我正在学习它是如何工作的。我将从调用API开始,即C。

struct win64_mcontext {
  U64 rdi, rsi, rbx, rbp, r12, r13, r14, r15;
  U64 rax, rsp, rip;
  U64 rcx, rdx, r8, r9;
};

struct coroutine {
  struct win64_mcontext caller;
  struct win64_mcontext callee;
  U32 state;
};

void coprepare(struct coroutine **co,
           void *stack, U64 stack_size, cofunc_t func)
{
  *co = malloc(sizeof **co); /* TODO: replace with something cheaper */
  _coprepare(&(*co)->caller, &(*co)->callee, stack, stack_size, func);
}

void coenter(struct coroutine *co, void *enter_arg)
{
   _coenter(&co->caller, &co->callee, enter_arg);
}

void coyield(struct coroutine *co, void *yield_arg)
{
  _coyield(&co->callee, &co->caller, yield_arg);
}

int  coresume(struct coroutine *co)
{
  _coresume(&co->caller, &co->callee);
  return 0; /* punt this for now */
}

这是驱动整个事物的组件。 _coenter,_coyield和_coresume都实现为jmp __cotransfer

;;; _coprepare(struct win64_mcontext *old, struct win64_mcontext *new,
;;;            void *stack, U64 stack_size,
;;;            cofunc_t func);
;;; RCX     -> old
;;; RDX     -> new
;;; R8      -> stack
;;; R9      -> stack_size
;;; RSP + ? -> func
_coprepare proc
    ;; save non-volatile GPRs in 'old'
    mov [RCX + OFF_RSI], RSI
    mov [RCX + OFF_RDI], RDI
    mov [RCX + OFF_RBP], RBP
    mov [RCX + OFF_RBX], RBX
    mov [RCX + OFF_R12], R12
    mov [RCX + OFF_R13], R13
    mov [RCX + OFF_R14], R14
    mov [RCX + OFF_R15], R15

    ;; save stack frame info in 'old'
    mov R10, RSP
    mov R11, OFFSET _coyield

    mov [RCX + OFF_RSP], R10
    mov [RCX + OFF_RIP], R11

    ;; init non-volatile GPRs in 'new'
    lea R10, [R8 + R9]       ; new RSP, = stack + stack_size
    lea R11, [RBP - 32]  ; load func

    xor EAX, EAX
    mov [RDX + OFF_RSI], RAX
    mov [RDX + OFF_RDI], RAX
    mov [RDX + OFF_RBX], RAX
    mov [RDX + OFF_RBP], R10
    mov [RDX + OFF_R12], RAX
    mov [RDX + OFF_R13], RAX
    mov [RDX + OFF_R14], RAX
    mov [RDX + OFF_R15], RAX

    mov [RDX + OFF_RSP], R10
    mov [RDX + OFF_RIP], R11

    ret
_coprepare endp

;;; __cotransfer(struct win64_context *old, struct win64_mcontext *new, void *trans_arg);
;;; RCX : old
;;; RDX : new
;;; R8  : trans_arg
__cotransfer proc
    ;; save non-volatile GPRs
    mov [RCX + OFF_RSI], RSI
    mov [RCX + OFF_RDI], RDI
    mov [RCX + OFF_RBX], RBX
    mov [RCX + OFF_RBP], RBP
    mov [RCX + OFF_R12], R12
    mov [RCX + OFF_R13], R13
    mov [RCX + OFF_R14], R14
    mov [RCX + OFF_R15], R15

    ;; save argument GPRs
    mov [RCX + OFF_RCX], RCX
    mov [RCX + OFF_RDX], RDX
    mov [RCX + OFF_R8], R8
    mov [RCX + OFF_R9], R9

    ;; save stack frame info
    lea R10, [RSP - 8]  ; save SP, exclude IP
    lea R11, [RSP]      ; save IP

    mov [RCX + OFF_RSP], R10
    mov [RCX + OFF_RIP], R11

    ;; switch stacks
    mov RAX, RSP
    mov RSP, [RDX + OFF_RSP]
    mov [RCX + OFF_RSP], RAX

    ;; load non-volatile GPRs
    mov RSI, [RDX + OFF_RSI]
    mov RDI, [RDX + OFF_RDI]
    mov RBX, [RDX + OFF_RBX]
    mov RBP, [RDX + OFF_RBP]
    mov R12, [RDX + OFF_R12]
    mov R13, [RDX + OFF_R13]
    mov R14, [RDX + OFF_R14]
    mov R15, [RDX + OFF_R15]

    ;; load argument registers
    mov R10, RCX
    mov R11, RDX

    mov RCX, [R11 + OFF_RCX]
    mov RDX, [R11 + OFF_RDX]
    mov R8,  [R11 + OFF_R8]
    mov R9,  [R11 + OFF_R9]

    ; push new return address
    mov RAX, [R11 + OFF_RIP]
    push RAX        
    ret ; jump to new return address
__cotransfer endp

我错过了什么吗?它总是在__cotransfer的某个地方崩溃。我不知道在调试过程中我到底在哪里,所以我必须做错事,比如破坏BP或IP或SP。我丢失了堆栈,因为我切换了它,MSVC无法弄清楚我们现在的位置。我很迷茫,我需要有经验的人提供帮助。

1 个答案:

答案 0 :(得分:1)

按设计我们需要5个例程

  • 用于将当前线程转换为光纤(为光纤分配光纤上下文) 当前线程)并将光纤转换回线程(释放此上下文)
  • 用于创建和删除新光纤上下文
  • 和所选光纤的开关上下文的单个例程

在光纤上下文中我们需要将光纤的当前堆栈指针和指针保存到它分配的堆栈中(当我们决定删除光纤时,它是免费的)。从Windows视图 - 我们还必须为每个光纤都有自己的NT_TIB结构,并在切换光纤上下文时切换StackBaseStackLimit等。否则将不会工作异常句柄和堆栈中的额外分配位置(保留转换为已提交内存和移动防护页面)。结果NT_TIB还需要保存在光纤环境中。光纤寄存器我们可以直接保存在堆栈中。

Windows的最小实现(当然在这里存在就绪实现)看起来像:

c / c ++ part:

typedef struct _INITIAL_TEB
{
    PVOID OldStackBase;
    PVOID OldStackLimit;
    PVOID StackBase;
    PVOID StackLimit;
    PVOID StackAllocationBase;
} INITIAL_TEB, *PINITIAL_TEB;

extern "C"
NTSYSAPI 
NTSTATUS 
NTAPI RtlFreeUserStack  (   _In_ PVOID      AllocationBase  );

extern "C"
NTSYSAPI 
NTSTATUS 
NTAPI   
RtlCreateUserStack (
                    _In_opt_ SIZE_T CommittedStackSize, 
                    _In_opt_ SIZE_T MaximumStackSize, 
                    _In_opt_ ULONG_PTR ZeroBits, 
                    _In_ SIZE_T PageSize, 
                    _In_ ULONG_PTR ReserveAlignment, 
                    _Out_ PINITIAL_TEB InitialTeb);

struct FIBER_CONTEXT
{
    NT_TIB Tib;
    PVOID StackPointer;
    PVOID StackAllocationBase;
};

extern "C"
{
    void __cdecl FiberStart();
    void __fastcall SwitchToContext(FIBER_CONTEXT* ctx);
}

FIBER_CONTEXT* MyConvertThreadToFiber()
{
    if (FIBER_CONTEXT* ctx = new FIBER_CONTEXT)
    {
        ((NT_TIB*)NtCurrentTeb())->FiberData = ctx;
        return ctx;
    }

    return 0;
}

void MyConvertFiberToThread()
{
    if (FIBER_CONTEXT* ctx = (FIBER_CONTEXT*)((NT_TIB*)NtCurrentTeb())->FiberData)
    {
        delete ctx;
        ((NT_TIB*)NtCurrentTeb())->FiberData = 0;
    }
}

FIBER_CONTEXT* WINAPI MyCreateFiber(
                          __in      SIZE_T dwStackSize,
                          __in      PFIBER_START_ROUTINE lpStartAddress,
                          __in_opt  PVOID lpParameter
                          )
{
    INITIAL_TEB InitialTeb;
    NTSTATUS status = RtlCreateUserStack(0, dwStackSize, 0, 0x1000, 0x10000, &InitialTeb);

    if (0 <= status)
    {
        if (FIBER_CONTEXT* ctx = new FIBER_CONTEXT)
        {
            ctx->StackAllocationBase = InitialTeb.StackAllocationBase;
            NT_TIB* Tib = ((NT_TIB*)NtCurrentTeb());

            ctx->Tib.ArbitraryUserPointer = 0;
            ctx->Tib.ExceptionList = 0;
            ctx->Tib.FiberData = ctx;
            ctx->Tib.StackBase = InitialTeb.StackBase;
            ctx->Tib.StackLimit = InitialTeb.StackLimit;
            ctx->Tib.SubSystemTib = Tib->SubSystemTib;
            ctx->Tib.Self = Tib->Self;

            void** StackBase = (void**)InitialTeb.StackBase;
            ctx->StackPointer = StackBase - (4 + 1 + 8);
            StackBase[-3] = lpStartAddress;
            StackBase[-4] = lpParameter;
            StackBase[-5] = FiberStart;
            return ctx;
        }
        RtlFreeUserStack(InitialTeb.StackAllocationBase);
    }

    return 0;
}

VOID WINAPI MyDeleteFiber(FIBER_CONTEXT* ctx)
{
    RtlFreeUserStack(ctx->StackAllocationBase);
    delete ctx;
}

asm(对于x64)实现部分:

NT_TIB STRUCT
    ExceptionList DQ ?
    StackBase DQ ?
    StackLimit DQ ?
    SubSystemTib DQ ?
    FiberData DQ ?
    ArbitraryUserPointer DQ ?
    Self DQ ?
NT_TIB ENDS

FIBER_CONTEXT STRUCT
    Tib NT_TIB <?>
    StackPointer DQ ?
FIBER_CONTEXT ENDS

extern __imp_ExitThread:QWORD

_TEXT segment 'CODE'

FiberStart proc
    mov     rcx,[rsp]
    call    qword ptr [rsp + 8]
    mov     ecx,eax
    call    [__imp_ExitThread]
FiberStart endp

SwitchToContext proc
    push    r15
    push    r14
    push    r13
    push    r12
    push    rsi
    push    rdi
    push    rbx
    push    rbp

    mov     rax,gs:[NT_TIB.Self]    ; rax -> NT_TIB

    mov     rdx,[rax + NT_TIB.FiberData]    ; current fiber data

    mov     [rdx + FIBER_CONTEXT.StackPointer],rsp  ; save current rsp
    mov     rsp,[rcx + FIBER_CONTEXT.StackPointer]  ; set new rsp

    ; save NT_TIB
    lea     rdi,[rdx + FIBER_CONTEXT.Tib]
    mov     rsi,rax
    mov     rdx,rcx
    mov     rcx, SIZEOF NT_TIB / SIZEOF QWORD
    rep     movsq

    ; set NT_TIB
    mov     rdi,rax
    lea     rsi,[rdx + FIBER_CONTEXT.Tib]
    mov     rcx, SIZEOF NT_TIB / SIZEOF QWORD
    rep     movsq

    pop     rbp
    pop     rbx
    pop     rdi
    pop     rsi
    pop     r12
    pop     r13
    pop     r14
    pop     r15
    ret
SwitchToContext endp

_TEXT ENDS

END

和使用示例:

struct FCTX 
{
    FIBER_CONTEXT* MainFiber, *WorkFiber;
    PCSTR sz;
};

void WINAPI FiberProc(FCTX* ctx)
{
    for (;;)
    {
        DbgPrint("%s\n", ctx->sz);
        SwitchToContext(ctx->MainFiber);
    }
}

void test()
{
    FCTX ctx;
    if (ctx.MainFiber = MyConvertThreadToFiber())
    {
        if (ctx.WorkFiber = MyCreateFiber(0, (PFIBER_START_ROUTINE)FiberProc, &ctx))
        {
            ctx.sz = "task #1";
            SwitchToContext(ctx.WorkFiber);
            ctx.sz = "task #2";
            SwitchToContext(ctx.WorkFiber);
            MyDeleteFiber(ctx.WorkFiber);
        }
        MyConvertFiberToThread();
    }
}