Question

在查看了this answer之后，我确信#!/usr/bin/env python3 import re import sys import argparse import subprocess def read_u64(binary): return sum(binary[i] * 256 ** i for i in range(8)) def distance_u32(start, end): assert abs(end - start) < 2 ** 31 diff = end - start if diff < 0: return 2 ** 32 + diff else: return diff def to_u32(x): assert 0 <= x < 2 ** 32 return bytes((x // (256 ** i)) % 256 for i in range(4)) class GotInstruction: def __init__(self, lines, symbol_address, symbol_offset): self.address = int(lines[0].split(":")[0].strip(), 16) self.offset = symbol_offset + (self.address - symbol_address) self.got_offset = int(lines[0].split("(File Offset: ")[1].strip().strip(")"), 16) self.got_offset = self.got_offset % 0x200000 # No idea why the offset is actually wrong self.bytes = [] for line in lines: self.bytes += [int(x, 16) for x in line.split("\t")[1].split()] class TextDump: symbol_regex = re.compile(r"^([0-9,a-f]{16}) <(.*)> $File Offset: 0x([0-9,a-f]*)$:") def __init__(self, binary_path): self.got_instructions = [] objdump_output = subprocess.check_output(["objdump", "-Fdj", ".text", "-M", "intel", binary_path]) lines = objdump_output.decode("utf-8").split("\n") current_symbol_address = 0 current_symbol_offset = 0 for line_group in self.group_lines(lines): match = self.symbol_regex.match(line_group[0]) if match is not None: current_symbol_address = int(match.group(1), 16) current_symbol_offset = int(match.group(3), 16) elif "_GLOBAL_OFFSET_TABLE_" in line_group[0]: instruction = GotInstruction(line_group, current_symbol_address, current_symbol_offset) self.got_instructions.append(instruction) @staticmethod def group_lines(lines): if not lines: return line_group = [lines[0]] for line in lines[1:]: if line.count("\t") == 1: # this line continues the previous one line_group.append(line) else: yield line_group line_group = [line] yield line_group def __iter__(self): return iter(self.got_instructions) def read_binary_file(path): try: with open(path, "rb") as f: return f.read() except (IOError, OSError) as exc: print(f"Failed to open {path}: {exc.strerror}") sys.exit(1) def write_binary_file(path, content): try: with open(path, "wb") as f: f.write(content) except (IOError, OSError) as exc: print(f"Failed to open {path}: {exc.strerror}") sys.exit(1) def patch_got_reference(instruction, binary_content): got_data = read_u64(binary_content[instruction.got_offset:]) code = instruction.bytes if code[0] == 0xff: assert len(code) == 6 relative_address = distance_u32(instruction.address, got_data) if code[1] == 0x15: # call QWORD PTR [rip+...] patch = b"\xe8" + to_u32(relative_address - 5) + b"\x90" elif code[1] == 0x25: # jmp QWORD PTR [rip+...] patch = b"\xe9" + to_u32(relative_address - 5) + b"\x90" else: raise ValueError(f"unknown machine code: {code}") elif code[:3] == [0x48, 0x83, 0x3d]: # cmp QWORD PTR [rip+...],<BYTE> assert len(code) == 8 if got_data == code[7]: patch = b"\x48\x39\xc0" + b"\x90" * 5 # cmp rax,rax elif got_data > code[7]: patch = b"\x48\x83\xfc\x00" + b"\x90" * 3 # cmp rsp,0x0 else: patch = b"\x50\x31\xc0\x83\xf8\x01\x90" # push rax # xor eax,eax # cmp eax,0x1 # pop rax elif code[:3] == [0x48, 0x3b, 0x1d]: # cmp rbx,QWORD PTR [rip+...] assert len(code) == 7 patch = b"\x81\xfb" + to_u32(got_data) + b"\x90" # cmp ebx,<DWORD> else: raise ValueError(f"unknown machine code: {code}") return dict(offset=instruction.offset, data=patch) def make_got_patches(binary_path, binary_content): patches = [] text_dump = TextDump(binary_path) for instruction in text_dump.got_instructions: patches.append(patch_got_reference(instruction, binary_content)) return patches def apply_patches(binary_content, patches): for patch in patches: offset = patch["offset"] data = patch["data"] binary_content = binary_content[:offset] + data + binary_content[offset + len(data):] return binary_content def main(): parser = argparse.ArgumentParser() parser.add_argument("binary_path", help="Path to ELF binary") parser.add_argument("-o", "--output", help="Output file path", required=True) args = parser.parse_args() binary_content = read_binary_file(args.binary_path) patches = make_got_patches(args.binary_path, binary_content) patched_content = apply_patches(binary_content, patches) write_binary_file(args.output, patched_content) if __name__ == "__main__": main()应该可以在以下示例中重建列表：

$ cargo build --release --target x86_64-unknown-linux-musl
$ ./resolve_got.py target/x86_64-unknown-linux-musl/release/hello -o hello_no_got
$ objcopy -R.got hello_no_got
$ readelf -e hello_no_got | grep .got
$ ./hello_no_got
Hello, world!

根据relist()，这将导致a <- c("AA01_01", "AA01_03", "AA01_04", "AA01_06", "AA01_08", "AA01_11", "AA01_12", "AA01_13", "AA01_14", "AA01_16", "AA01_19", "AA01_20", "AA02_03", "AA02_04", "AA02_05", "AA02_06", "AA02_07", "AA02_08", "AA02_09", "AA02_13", "AA02_17", "AA02_19", "AA02_20", "AA03_05", "AA03_09", "AA03_10", "AA03_12", "AA03_16", "AA03_20", "AA04_01", "AA04_02", "AA04_03", "AA04_10", "AA04_11", "AA04_14", "AA04_16" ) b <- list( b1 = c("AA01_01", "AA01_02", "AA01_03", "AA01_04", "AA01_05", "AA01_06", "AA01_07", "AA01_08", "AA01_09", "AA01_10", "AA01_11", "AA01_12", "AA01_13", "AA01_14", "AA01_15", "AA01_16", "AA01_17", "AA01_18", "AA01_19", "AA01_20"), b2 = c("AA02_01", "AA02_02", "AA02_03", "AA02_04", "AA02_05", "AA02_06", "AA02_07", "AA02_08", "AA02_09", "AA02_10", "AA02_11", "AA02_12", "AA02_13", "AA02_14", "AA02_15", "AA02_16", "AA02_17", "AA02_18", "AA02_19", "AA02_20"), b3 = c("AA03_01", "AA03_02", "AA03_03", "AA03_04", "AA03_05", "AA03_06", "AA03_07", "AA03_08", "AA03_09", "AA03_10", "AA03_11", "AA03_12", "AA03_13", "AA03_14", "AA03_15", "AA03_16", "AA03_17", "AA03_18", "AA03_19", "AA03_20"), b4 = c("AA04_01", "AA04_02", "AA04_03", "AA04_04", "AA04_05", "AA04_06", "AA04_07", "AA04_08", "AA04_09", "AA04_10", "AA04_11", "AA04_12", "AA04_13", "AA04_14", "AA04_15", "AA04_16", "AA04_17", "AA04_18", "AA04_19", "AA04_20") ) newList <- relist(flesh = a, skeleton = b)包含以?relist()开头的所有字符，newList$b1应包含AA01_，依此类推...但是我得到的是：

newList$b2

为什么会发生这种情况，我该如何继续基于模型列表从向量创建列表？

编辑：如@OganM在评论中所述，AA02_在更改列表后需要相同的结构。因此，我改一下我的问题：如何基于另一个列表的结构从向量创建列表？我更喜欢一种适用于嵌套列表的解决方案。

Answer 1

lapply(b, function(x) a[a %in% x])
#$b1
# [1] "AA01_01" "AA01_03" "AA01_04" "AA01_06" "AA01_08" "AA01_11" "AA01_12" "AA01_13"
# [9] "AA01_14" "AA01_16" "AA01_19" "AA01_20"

#$b2
# [1] "AA02_03" "AA02_04" "AA02_05" "AA02_06" "AA02_07" "AA02_08" "AA02_09" "AA02_13"
# [9] "AA02_17" "AA02_19" "AA02_20"

#$b3
#[1] "AA03_05" "AA03_09" "AA03_10" "AA03_12" "AA03_16" "AA03_20"

#$b4
#[1] "AA04_01" "AA04_02" "AA04_03" "AA04_10" "AA04_11" "AA04_14" "AA04_16"

嵌套列表可能需要递归

# Recursive function
foo = function(l, vect) {
    for (i in seq_along(l)) {
        l[[i]] = if (class(l[[i]]) == "list") {
            Recall(l[[i]], vect)
        } else {
            vect[ vect %in% l[[i]] ]
        }
    }
    return(l)
}

#DATA (nested list)
a = c("a", "b", "c", "d", "e", "f")
b = list(b1 = c("a", "b", "g", "h"),
         b2 = list(b21 = c("a", "d", "y"),
                   b22 = "f"))

# Usage
foo(b, a)
#> $b1
#> [1] "a" "b"
#> 
#> $b2
#> $b2$b21
#> [1] "a" "d"
#> 
#> $b2$b22
#> [1] "f"

^{由reprex package（v0.3.0）于2019-09-02创建}

Answer 2

如果您尝试在a中找到b中存在的元素，则可以使用intersect

lapply(b, intersect, a)


#$b1
# [1] "AA01_01" "AA01_03" "AA01_04" "AA01_06" "AA01_08" "AA01_11" "AA01_12" 
#     "AA01_13" "AA01_14" "AA01_16" "AA01_19" "AA01_20"

#$b2
# [1] "AA02_03" "AA02_04" "AA02_05" "AA02_06" "AA02_07" "AA02_08" "AA02_09"  
#     "AA02_13" "AA02_17" "AA02_19" "AA02_20"

#$b3
#[1] "AA03_05" "AA03_09" "AA03_10" "AA03_12" "AA03_16" "AA03_20"

#$b4
#[1] "AA04_01" "AA04_02" "AA04_03" "AA04_10" "AA04_11" "AA04_14" "AA04_16"

根据给定的结构/列表从向量创建列表

2 个答案: