Question

我想解决一个有趣的学术问题：

在C代码中，我试图在运行时动态重新绑定符号，就像重新绑定函数符号的Facebook's fishhook repo一样。我主要关心的是，追踪Mach-O可执行文件的__DATA.__la_symbol_ptr部分中引用的符号。使用fishhook实现，您可以提供新函数来替换原始函数，一个字符串指示要替换的函数，以及一个全局函数指针，它将取代原始的替换函数。

例如，取自鱼钩回购中的自述文件......

static int (*orig_close)(int);
int my_close(int fd) {
    return orig_close(fd);
}

...然后在main

rebind_symbols((struct rebinding[1]){{"close", my_close, (void *)&orig_close}}, 1);

这很棒，但我希望能够在我的模块中完全切换到my_close的所有呼叫，并且所有呼叫都转到close，反之亦然。例如，我希望我的实现看起来像这样，而不是指向原始close的全局函数指针：

int my_close(int fd) {
    return my_close(fd);
}

不幸的是，由于此符号在同一模块中引用，因此将通过直接调用而不是符号存根调用此符号。这是从main

调用此函数时的程序集

0x100001e00 <+0>:  push   rbp
0x100001e01 <+1>:  mov    rbp, rsp
0x100001e04 <+4>:  sub    rsp, 0x20
0x100001e08 <+8>:  xor    eax, eax
0x100001e0a <+10>: mov    dword ptr [rbp - 0x4], 0x0
0x100001e11 <+17>: mov    dword ptr [rbp - 0x8], edi
0x100001e14 <+20>: mov    qword ptr [rbp - 0x10], rsi
0x100001e18 <+24>: mov    edi, eax
0x100001e1a <+26>: call   0x100001da0               ; my_close at main.m:42
0x100001e1f <+31>: xor    edi, edi
0x100001e21 <+33>: mov    dword ptr [rbp - 0x14], eax
0x100001e24 <+36>: mov    eax, edi
0x100001e26 <+38>: add    rsp, 0x20
0x100001e2a <+42>: pop    rbp
0x100001e2b <+43>: ret

好的，很容易修复，我可以使用汇编程序指令将函数标记为弱，并使用weakref来关闭编译器可能的堆栈溢出。将my_close更改为：

static int f(int) __attribute__ ((weakref ("my_close")));

__attribute__((weak))
int my_close(int fd) {
    return f(fd);
}

然后将在main中生成以下程序集：

0x100001df0 <+0>:  push   rbp
0x100001df1 <+1>:  mov    rbp, rsp
0x100001df4 <+4>:  sub    rsp, 0x20
0x100001df8 <+8>:  xor    eax, eax
0x100001dfa <+10>: mov    dword ptr [rbp - 0x4], 0x0
0x100001e01 <+17>: mov    dword ptr [rbp - 0x8], edi
0x100001e04 <+20>: mov    qword ptr [rbp - 0x10], rsi
0x100001e08 <+24>: mov    edi, eax
0x100001e0a <+26>: call   0x100001e5e               ; symbol stub for: my_close
0x100001e0f <+31>: xor    edi, edi
0x100001e11 <+33>: mov    dword ptr [rbp - 0x14], eax
0x100001e14 <+36>: mov    eax, edi
0x100001e16 <+38>: add    rsp, 0x20
0x100001e1a <+42>: pop    rbp
0x100001e1b <+43>: ret

所以这就是我坚持的部分：在my_close内引用my_close时，它总会导致直接调用。例如：这里是my_close

的程序集

0x100001dd0 <+0>:  push   rbp
0x100001dd1 <+1>:  mov    rbp, rsp
0x100001dd4 <+4>:  sub    rsp, 0x10
0x100001dd8 <+8>:  mov    dword ptr [rbp - 0x4], edi
0x100001ddb <+11>: mov    edi, dword ptr [rbp - 0x4]
0x100001dde <+14>: call   0x100001dd0               ; <+0> at main.m:44
0x100001de3 <+19>: add    rsp, 0x10
0x100001de7 <+23>: pop    rbp
0x100001de8 <+24>: ret

我是否可以使用任何汇编程序指令（我已经错过了）在my_close内调用时my_close被视为存根？是的，我知道我可以使用dlsym获得原版，但我很固执：]

Answer 1

根据我的评论，这是一个可能的实现，其中my_call被声明为全局函数指针类型。

这种方法意味着对符号的每次引用实际上都是对全局变量（函数指针）的引用，可以随时轻松更新。

将其放在my_close.h：

中

extern volatile int (*my_close)(int);

void set_my_close(int (*func)(int));

并将此my_call.c添加到您的项目中（大部分未经测试）：

#if defined(__unix__) || defined(__APPLE__) || defined(__linux__)
#ifndef _GNU_SOURCE
#define _GNU_SOURCE
#endif
#endif /* __unix__ */

/* Select the correct compiler builtin method. */
#if defined(__has_builtin)

#if __has_builtin(__atomic_exchange_n)
#define EXCHANGE(...) __atomic_exchange_n(__VA_ARGS__, __ATOMIC_ACQ_REL)

#elif __has_builtin(__sync_swap)
#define EXCHANGE(...) __sync_swap(__VA_ARGS__)

#else
#error Required builtin "__sync_swap" or "__atomic_exchange_n" missing from compiler.
#endif /* defined(__has_builtin) */

#elif __GNUC__ > 3
#define EXCHANGE(...) __sync_fetch_and_or(__VA_ARGS__)

#else
#error Required builtin "__sync_swap" or "__atomic_exchange_n" not found.
#endif

volatile int (*my_close)(int);

void set_my_close(int (*func)(int)) { EXCHANGE(&my_close, func); }

这样可以轻松更新my_close并将其动态路由到不同的函数。

...

使用C11 Atomic操作（据称更具可移植性）的替代选项可能需要my_close.h包含在任何调用my_close的源文件中（以确保调用atomic_load）

以下是未经测试的：

my_close.h中的

：

#include <stdatomic.h>

extern volatile _Atomic int (*my_close)(int);

inline void set_my_close(int (*func)(int)) { atomic_store(&my_close, func); }

#define my_close(fd) ((atomic_load(&my_close))( fd ))

my_close.c中的

：

#include "my_close.h"

#undef my_close

volatile _Atomic int (*my_close)(int);

我没有在计算机上运行或丢失任何代码，因此请将此视为仅提供大纲。

Answer 2

对于dyld导入的函数，可以在运行时替换被调用的地址。以下代码依赖于dyld存根地址位于__DATA段的开头，后跟全局变量。这里的代码片段从全局变量地址开始执行向后搜索。

size_t (*orgStrlenPtr)(const char *__s);
size_t myStrlen(const char *__s)
{
    return orgStrlenPtr(__s);
}

int main(int argc, const char * argv[]) {

    char *ptr = &orgStrlenPtr;
    while (*(void**)ptr != strlen) {
        ptr--;
    }
    orgStrlenPtr = *(void **)ptr;
    *(void **)ptr = myStrlen;
}

功能设置，标记C功能始终使用符号存根而不是直接调用

2 个答案: