寻找重新定位的起源

时间:2013-09-28 12:34:48

标签: c++ c linux shared-libraries relocation

使用Ulrich Drepper的relinfo.pl脚本,可以轻松计算DSO的重定位次数,但它不适用于.o个文件。

假设我有一个大型共享库,我对它的重定位数量感到不满意。有没有办法找出它们来自哪里(符号,或至少.o),以检查它们是否是易于修复的类型(例如:const char * str = "Hello World";' - > {{1} })?

2 个答案:

答案 0 :(得分:12)

简答:改为使用objdumpreadelf

答案很长:让我们看一个实际的案例example.c

#include <stdio.h>

static const char global1[] = "static const char []";
static const char *global2 = "static const char *";
static const char *const global3 = "static const char *const";
const char global4[] = "const char []";
const char *global5 = "const char *";
const char *const global6 = "const char *const";
char global7[] = "char []";
char *global8 = "char *";
char *const global9 = "char *const";

int main(void)
{
    static const char local1[] = "static const char []";
    static const char *local2 = "static const char *";
    static const char *const local3 = "static const char *const";
    const char local4[] = "const char []";
    const char *local5 = "const char *";
    const char *const local6 = "const char *const";
    char local7[] = "char []";
    char *local8 = "char *";
    char *const local9 = "char *const";

    printf("Global:\n");
    printf("\t%s\n", global1);
    printf("\t%s\n", global2);
    printf("\t%s\n", global3);
    printf("\t%s\n", global4);
    printf("\t%s\n", global5);
    printf("\t%s\n", global6);
    printf("\t%s\n", global7);
    printf("\t%s\n", global8);
    printf("\t%s\n", global9);
    printf("\n");
    printf("Local:\n");
    printf("\t%s\n", local1);
    printf("\t%s\n", local2);
    printf("\t%s\n", local3);
    printf("\t%s\n", local4);
    printf("\t%s\n", local5);
    printf("\t%s\n", local6);
    printf("\t%s\n", local7);
    printf("\t%s\n", local8);
    printf("\t%s\n", local9);

    return 0;
}

您可以使用例如

将其编译为目标文件
gcc -W -Wall -c example.c

和使用

的可执行文件
gcc -W -Wall example.c -o example

您可以使用objdump -tr example.o转储(非动态)目标文件的符号和重定位信息,或objdump -TtRr example转储可执行文件(和动态目标文件)的相同信息。使用

objdump -t example.o

在x86-64上我得到了

example.o:     file format elf64-x86-64

SYMBOL TABLE:
0000000000000000 l    df *ABS*  0000000000000000 example.c
0000000000000000 l    d  .text  0000000000000000 .text
0000000000000000 l    d  .data  0000000000000000 .data
0000000000000000 l    d  .bss   0000000000000000 .bss
0000000000000000 l    d  .rodata    0000000000000000 .rodata
0000000000000000 l     O .rodata    0000000000000015 global1
0000000000000000 l     O .data  0000000000000008 global2
0000000000000048 l     O .rodata    0000000000000008 global3
00000000000000c0 l     O .rodata    0000000000000015 local1.2053
0000000000000020 l     O .data  0000000000000008 local2.2054
00000000000000d8 l     O .rodata    0000000000000008 local3.2055
0000000000000000 l    d  .note.GNU-stack    0000000000000000 .note.GNU-stack
0000000000000000 l    d  .eh_frame  0000000000000000 .eh_frame
0000000000000000 l    d  .comment   0000000000000000 .comment
0000000000000050 g     O .rodata    000000000000000e global4
0000000000000008 g     O .data  0000000000000008 global5
0000000000000080 g     O .rodata    0000000000000008 global6
0000000000000010 g     O .data  0000000000000008 global7
0000000000000018 g     O .data  0000000000000008 global8
00000000000000a0 g     O .rodata    0000000000000008 global9
0000000000000000 g     F .text  000000000000027a main
0000000000000000         *UND*  0000000000000000 puts
0000000000000000         *UND*  0000000000000000 printf
0000000000000000         *UND*  0000000000000000 putchar
0000000000000000         *UND*  0000000000000000 __stack_chk_fail

-t标题下的man 1 objdump中描述了输出。请注意,第二个“列”实际上是固定宽度:七个字符宽,描述对象的类型。第三列是节名称,*UND*表示未定义,.text表示代码,.rodata表示只读(不可变)数据,.data表示初始化可变数据,{ {1}}表示未初始化的可变数据,等等。

我们可以从上面的符号表中看到.bsslocal4local5local6local7local8变量没有实际上根本不在符号表中获取条目。这是因为它们是local9的本地人。它们引用的字符串的内容存储在main().data中(或动态构建),具体取决于编译器最佳选择。

接下来让我们看一下重定位记录。使用

.rodata

我得到了

objdump -r example.o

重定位记录按其重定位所在的部分进行分组。由于字符串内容位于example.o: file format elf64-x86-64 RELOCATION RECORDS FOR [.text]: OFFSET TYPE VALUE 0000000000000037 R_X86_64_32S .rodata+0x000000000000005e 0000000000000040 R_X86_64_32S .rodata+0x000000000000006b 0000000000000059 R_X86_64_32S .rodata+0x0000000000000088 0000000000000062 R_X86_64_32S .rodata+0x000000000000008f 0000000000000067 R_X86_64_32 .rodata+0x00000000000000a8 000000000000006c R_X86_64_PC32 puts-0x0000000000000004 0000000000000071 R_X86_64_32 .rodata+0x00000000000000b0 0000000000000076 R_X86_64_32 .rodata 0000000000000083 R_X86_64_PC32 printf-0x0000000000000004 000000000000008a R_X86_64_PC32 .data-0x0000000000000004 000000000000008f R_X86_64_32 .rodata+0x00000000000000b0 000000000000009f R_X86_64_PC32 printf-0x0000000000000004 00000000000000a6 R_X86_64_PC32 .rodata+0x0000000000000044 00000000000000ab R_X86_64_32 .rodata+0x00000000000000b0 00000000000000bb R_X86_64_PC32 printf-0x0000000000000004 00000000000000c0 R_X86_64_32 .rodata+0x00000000000000b0 00000000000000c5 R_X86_64_32 global4 00000000000000d2 R_X86_64_PC32 printf-0x0000000000000004 00000000000000d9 R_X86_64_PC32 global5-0x0000000000000004 00000000000000de R_X86_64_32 .rodata+0x00000000000000b0 00000000000000ee R_X86_64_PC32 printf-0x0000000000000004 00000000000000f5 R_X86_64_PC32 global6-0x0000000000000004 00000000000000fa R_X86_64_32 .rodata+0x00000000000000b0 000000000000010a R_X86_64_PC32 printf-0x0000000000000004 000000000000010f R_X86_64_32 .rodata+0x00000000000000b0 0000000000000114 R_X86_64_32 global7 0000000000000121 R_X86_64_PC32 printf-0x0000000000000004 0000000000000128 R_X86_64_PC32 global8-0x0000000000000004 000000000000012d R_X86_64_32 .rodata+0x00000000000000b0 000000000000013d R_X86_64_PC32 printf-0x0000000000000004 0000000000000144 R_X86_64_PC32 global9-0x0000000000000004 0000000000000149 R_X86_64_32 .rodata+0x00000000000000b0 0000000000000159 R_X86_64_PC32 printf-0x0000000000000004 0000000000000163 R_X86_64_PC32 putchar-0x0000000000000004 0000000000000168 R_X86_64_32 .rodata+0x00000000000000b5 000000000000016d R_X86_64_PC32 puts-0x0000000000000004 0000000000000172 R_X86_64_32 .rodata+0x00000000000000b0 0000000000000177 R_X86_64_32 .rodata+0x00000000000000c0 0000000000000184 R_X86_64_PC32 printf-0x0000000000000004 000000000000018b R_X86_64_PC32 .data+0x000000000000001c 0000000000000190 R_X86_64_32 .rodata+0x00000000000000b0 00000000000001a0 R_X86_64_PC32 printf-0x0000000000000004 00000000000001a7 R_X86_64_PC32 .rodata+0x00000000000000d4 00000000000001ac R_X86_64_32 .rodata+0x00000000000000b0 00000000000001bc R_X86_64_PC32 printf-0x0000000000000004 00000000000001c1 R_X86_64_32 .rodata+0x00000000000000b0 00000000000001d6 R_X86_64_PC32 printf-0x0000000000000004 00000000000001db R_X86_64_32 .rodata+0x00000000000000b0 00000000000001ef R_X86_64_PC32 printf-0x0000000000000004 00000000000001f4 R_X86_64_32 .rodata+0x00000000000000b0 0000000000000209 R_X86_64_PC32 printf-0x0000000000000004 000000000000020e R_X86_64_32 .rodata+0x00000000000000b0 0000000000000223 R_X86_64_PC32 printf-0x0000000000000004 0000000000000228 R_X86_64_32 .rodata+0x00000000000000b0 000000000000023d R_X86_64_PC32 printf-0x0000000000000004 0000000000000242 R_X86_64_32 .rodata+0x00000000000000b0 0000000000000257 R_X86_64_PC32 printf-0x0000000000000004 0000000000000271 R_X86_64_PC32 __stack_chk_fail-0x0000000000000004 RELOCATION RECORDS FOR [.data]: OFFSET TYPE VALUE 0000000000000000 R_X86_64_64 .rodata+0x0000000000000015 0000000000000008 R_X86_64_64 .rodata+0x000000000000005e 0000000000000018 R_X86_64_64 .rodata+0x0000000000000088 0000000000000020 R_X86_64_64 .rodata+0x0000000000000015 RELOCATION RECORDS FOR [.rodata]: OFFSET TYPE VALUE 0000000000000048 R_X86_64_64 .rodata+0x0000000000000029 0000000000000080 R_X86_64_64 .rodata+0x000000000000006b 00000000000000a0 R_X86_64_64 .rodata+0x000000000000008f 00000000000000d8 R_X86_64_64 .rodata+0x0000000000000029 RELOCATION RECORDS FOR [.eh_frame]: OFFSET TYPE VALUE 0000000000000020 R_X86_64_PC32 .text .data部分,我们可以限制自己查看{{1}的重定位}以.rodataVALUE开头。 (可变字符串,如.data,存储在.rodata中,不可变字符串和字符串文字存储在char global7[] = "char []";中。)

如果我们要在启用调试符号的情况下编译代码,那么确定哪个变量用于引用哪个字符串会更容易,但我可能只是查看每个重定位值(目标)的实际内容,以查看引用到不可变字符串需要修复。

命令组合

.data

将输出每个目标的重定位数,然后是目标部分,然后是该部分中的目标偏移量,并使用最后在重定位中出现最多的目标进行排序。也就是说,上面输出的最后一行是你需要专注的。对我来说,我得到

.rodata

如果我添加优化(objdump -r example.o | awk '($3 ~ /^\..*\+/) { t = $3; sub(/\+/, " ", t); n[t]++ } END { for (r in n) printf "%d %s\n", n[r], r }' | sort -g ),结果为

1 .rodata
1 .rodata 0x0000000000000044
1 .rodata 0x00000000000000a8
1 .rodata 0x00000000000000b5
1 .rodata 0x00000000000000c0
1 .rodata 0x00000000000000d4
2 .rodata 0x0000000000000015
2 .rodata 0x0000000000000029
2 .rodata 0x000000000000005e
2 .rodata 0x000000000000006b
2 .rodata 0x0000000000000088
2 .rodata 0x000000000000008f
18 .rodata 0x00000000000000b0

表明编译器选项确实有很大的影响,但是有一个目标无论如何都使用了18次:section gcc -W -Wall -O3 -fomit-frame-pointer -c example.c offset 1 .rodata 0x0000000000000020 1 .rodata 0x0000000000000040 1 .rodata.str1.1 1 .rodata.str1.1 0x0000000000000058 2 .rodata.str1.1 0x000000000000000d 2 .rodata.str1.1 0x0000000000000021 2 .rodata.str1.1 0x000000000000005f 2 .rodata.str1.1 0x000000000000006c 3 .rodata.str1.1 0x000000000000003a 3 .rodata.str1.1 0x000000000000004c 18 .rodata.str1.1 0x0000000000000008 .rodata offset {{1如果在编译时启用优化)。

这是“\ t \ t%s \ n”字符串文字。

将原始程序修改为

0xb0

等等,用不可变的字符串指针.rodata.str1.1替换格式字符串,完全消除了这18个重定位。 (当然,您也可以使用等效的0x8。)

上述分析表明,至少对于GCC-4.6.3,大多数可避免的重定位是由(重复使用)字符串文字引起的。用const char( char *local8 = "char *"; char *const local9 = "char *const"; const char *const fmt = "\t%s\n"; printf("Global:\n"); printf(fmt, global1); printf(fmt, global2); )数组或const char(fmt的const指针替换它们 - 两种情况都将内容放到const char fmt[] = "\t%s\n";部分,只读,以及指针/数组引用本身也是不可变的 - 对我来说似乎是一种有效而安全的策略。

此外,将字符串文字转换为不可变字符串指针或字符数组完全是源级别的任务。也就是说,如果使用上述方法转换所有字符串文字,则每个字符串文字至少可以消除一次重定位。

事实上,我没有看到对象级分析对你有多大帮助。当然,它会告诉您修改是否减少了所需的重新安置次数。

上面的const char fmt[] = "\t%s\n";节可以扩展为一个函数,该函数输出具有正偏移的动态引用的字符串常量:

const char *const fmt = "\t%s\n";

这有点粗糙,只是拍了一下,所以我不知道它有多便携。在我的机器上,它确实找到了我试过的几个测试用例的字符串文字;你应该重写它以符合你自己的需要。甚至可以使用支持ELF的实际编程语言直接检查目标文件。

对于上面显示的示例程序(在修改之前我建议减少重定位数),在没有优化的情况下编译,上面的脚本产生输出

.rodata

最后,您可能会注意到使用指向awk的函数指针而不是直接调用#!/bin/bash if [ $# -ne 1 ] || [ "$1" = "-h" ] || [ "$1" = "--help" ]; then exec >&2 echo "" echo "Usage: %s [ -h | --help ]" echo " %s object.o" echo "" exit 1 fi export LANG=C LC_ALL=C objdump -wr "$1" | awk ' BEGIN { RS = "[\t\v\f ]*[\r\n][\t\n\v\f\r ]*" FS = "[\t\v\f ]+" } $1 ~ /^[0-9A-Fa-f]+/ { n[$3]++ } END { for (s in n) printf "%d %s\n", n[s], s } ' | sort -g | gawk -v filename="$1" ' BEGIN { RS = "[\t\v\f ]*[\r\n][\t\n\v\f\r ]*" FS = "[\t\v\f ]+" cmd = "objdump --file-offsets -ws " filename while ((cmd | getline) > 0) if ($3 == "section") { s = $4 sub(/:$/, "", s) o = $NF sub(/\)$/, "", o) start[s] = strtonum(o) } close(cmd) } { if ($2 ~ /\..*\+/) { s = $2 o = $2 sub(/\+.*$/, "", s) sub(/^[^\+]*\+/, "", o) o = strtonum(o) + start[s] cmd = "dd if=\"" filename "\" of=/dev/stdout bs=1 skip=" o " count=256" OLDRS = RS RS = "\0" cmd | getline hex close(cmd) RS = OLDRS gsub(/\\/, "\\\\", hex) gsub(/\t/, "\\t", hex) gsub(/\n/, "\\n", hex) gsub(/\r/, "\\r", hex) gsub(/\"/, "\\\"", hex) if (hex ~ /[\x00-\x1F\x7F-\x9F\xFE\xFF]/ || length(hex) < 1) printf "%s\n", $0 else printf "%s = \"%s\"\n", $0, hex } else print $0 } ' 将减少示例代码中的另外18次重定位,但我认为这是一个错误。

对于代码,您想要重定位,因为间接函数调用(通过函数指针调用)比直接调用慢得多。简单地说,这些重定位使函数和子程序调用更快,所以你绝对想要保留它们。

道歉,答案很长;希望您觉得这个有帮助。有问题吗?

答案 1 :(得分:2)

基于Nomainal Animals的答案,我仍然需要完全消化,我提出了以下简单的shell脚本,似乎可以找到我称之为“易于修复”的类型:

for i in path/to/*.o ; do
    REL="$(objdump -TtRr "$i" 2>/dev/null | grep '.data.rel.ro.local[^]+-]')"
    if [ -n "$REL" ]; then
        echo "$(basename "$i"):"
        echo "$REL" | c++filt
        echo
    fi
done

示例输出(对于QtGui库):

qimagereader.o:
0000000000000000 l     O .data.rel.ro.local     00000000000000c0 _qt_BuiltInFormats
0000000000000000 l    d  .data.rel.ro.local     0000000000000000 .data.rel.ro.local

qopenglengineshadermanager.o:
0000000000000000 l     O .data.rel.ro.local     0000000000000090 QOpenGLEngineShaderManager::getUniformLocation(QOpenGLEngineShaderManager::Uniform)::uniformNames
0000000000000000 l    d  .data.rel.ro.local     0000000000000000 .data.rel.ro.local

qopenglpaintengine.o:
0000000000000000 l     O .data.rel.ro.local     0000000000000020 vtable for (anonymous namespace)::QOpenGLStaticTextUserData
0000000000000000 l    d  .data.rel.ro.local     0000000000000000 .data.rel.ro.local

qtexthtmlparser.o:
0000000000000000 l     O .data.rel.ro.local     00000000000003b0 elements
0000000000000000 l    d  .data.rel.ro.local     0000000000000000 .data.rel.ro.local

在源文件中查找这些符号通常会导致修复,或者发现它们不易修复。

但是我想,一旦我用完.data.rel.ro.local来解决问题,我就必须重新审视Nominal Animal的答案......