Question

我正在尝试找到唯一的十六进制“字符串”（？）（十六进制部分的唯一出现）。

示例输入：

F6 03 04 AA F3 03 03 AA F7 03 00 AA F4 02 40 F9 F3 03 03 AA F7 03 00 AA F4 02 40 F9
E8 51 00 F0 08 B1 2A 91 E0 03 14 AA E1 03 02 AA 08 B1 2A 91 E0 03 14 AA E1 03 02 AA
E2 03 08 AA 2F 01 00 94 F5 03 00 AA 75 01 00 B4 2F 01 00 94 F5 03 00 AA 75 01 00 B4
E0 03 17 AA E1 03 15 AA E2 03 16 AA 66 01 00 94 E1 03 15 AA E2 03 16 AA 66 01 00 94
F6 03 00 AA F6 00 00 34 E0 03 14 AA E1 03 15 AA F6 00 00 34 E0 03 14 AA E1 03 15 AA
F9 04 00 94 05 00 00 14 F6 03 1A 32 03 00 00 14 05 00 00 14 F6 03 1A 32 03 00 00 14
16 00 80 52 75 02 00 F9 E0 03 16 AA FD 7B 43 A9 75 02 00 F9 E0 03 16 AA FD 7B 43 A9
F4 4F 42 A9 F6 57 41 A9 F8 5F C4 A8 C0 03 5F D6 F6 57 41 A9 F8 5F C4 A8 C0 03 5F D6

输出：

AA F6 00 00 34 E0 03 14 AA E1 03 15 AA F6 00 00 34 E0 03 14

*不是实际的输出，因为我只是选择了一个随机的唯一的我能找到的，但是我本质上希望它找到一个长度为20的唯一的十六进制节，该节在代码的任何其他地方都不会出现。 >

该序列是唯一的，因为它不在文件中的其他任何位置发生。而且我发现20的大小合理，可以更轻松地找到独特的序列。由于在普通文件中，短序列（例如AA F6甚至是AA E2 03 16 AA 66）会出现多次。

关于我该如何做的任何建议？

Answer 1

这看起来像是一个作业问题。无论如何，这很有趣。该代码应该可以工作：

''' Someone's homework assignment on Stackoverflow '''

import argparse

# When a search for a match comes up empty.
NOT_FOUND = -1

# Print debugging information, increasing values are more verbose.
print_debug = 0

def data_string_to_bytes(data_string):
    '''
    Convert packed string of nybble data to bytes.
    There is expected to be no whitespace intermingled within.
    The number of nybbles must be even.
    '''
    if len(data_string) % 2 != 0:
        raise ValueError('Attempting to parse byte string with odd legnth {}'
                         .format(len(data_string)))
    byte_list = []
    for index in range(0, len(data_string), 2):
        byte_value = int(data_string[index], 16)
        byte_value <<= 8
        byte_value |= int(data_string[index + 1], 16)
        byte_list.append(byte_value)

    return byte_list


def read_hex_file(file_name):
    ''' Read a file of hex values separated by whitespace '''
    read_data = []
    with open(file_name, 'r') as file:
        while True:
            data_line = file.readline()
            if not data_line:
                break

            # Split the line read into whitespace delimited bytes.
            data_string_list = data_line.split()

            # Test the first expected byte string to see if it is the expected
            # size of a whitespace delimited set of bytes.
            if len(data_string_list[0]) > 2:
                # They were not whitespace delimited after all.
                read_data.extend(data_string_to_bytes(data_string_list[0]))
            else:
                data = [int(x, 16) for x in data_string_list]
                read_data.extend(data)

    return read_data


def data_to_string(data):
    ''' Convert an integer list to a string of hex bytes separated by spaces '''
    data_str = ''
    for data_value in data:
        data_str += '%0.2x ' % data_value
    return data_str


def find_window(data, window):
    ''' Winthin a list of data, search for a match of list window '''
    pos_end = (len(data) - len(window)) + 1

    if print_debug > 1:
        print("find_window: data: {}  w: {}".format(data_to_string(data),
                                                    data_to_string(window)))
    for pos in range(pos_end):
        check = data[pos:pos + len(window)]
        if check == window:
            if print_debug > 0:
                print("Found check @   {:4} : {}".format(pos,
                                                         data_to_string(check)))
            return pos

    return NOT_FOUND


def find_unique(data, window_len):
    '''
    Find a unique list of data defined by a window of a given length.
    :data: A list of data in which to search.
    :window_len: The length of the window to search within the data.
    '''
    # Stop searching when there are less than window_len data values remaining.
    pos_end = (len(data) - window_len) + 1
    for pos in range(0, pos_end):
        window = data[pos:pos + window_len]
        if print_debug > 0:
            print("Checking window[{:4}]: {} ".format(pos,
                                                      data_to_string(window)))
        found_pos = find_window(data[pos + 1:], window)
        # If the window is not matched in the data following the window
        # then the window list is unique.
        if found_pos == NOT_FOUND:
            return pos

        if print_debug > 0:
            # Note that the found position returned from find_window() was
            # with respect to pos + 1; the first parameter value passed in.
            # Add pos + 1 to get the file offset location for debug.
            found_pos += pos + 1
            match = data[found_pos:found_pos + window_len]
            print("Found match @   {:4} : {}".format(found_pos,
                                                     data_to_string(match)))
    return NOT_FOUND


def main():
    ''' Ye ole main() '''
    global print_debug

    parser = argparse.ArgumentParser(description='Search a file for a repeating sequence')

    parser.add_argument('-f', '--file',  type=str, default='test_data.txt', help="the file to read")
    parser.add_argument('-l', '--wlen',  type=int, default=20,              help="the window length")
    parser.add_argument('-d', '--debug', type=int, default=0,               help="debugging print level")

    args = parser.parse_args()
    file_name = args.file
    window_len = args.wlen
    print_debug = args.debug
    print("file: '{}', window_len: {}, debug={}".format(file_name, window_len, print_debug))

    data = read_hex_file(file_name)
    if print_debug > 0:
        print("Read {} bytes from file '{}'".format(len(data), file_name))
    unique_pos = find_unique(data, window_len)

    if unique_pos == NOT_FOUND:
        print("No unique windows of length {} found".format(window_len))
    else:
        print("Unique window of length {} found at pos = {}".format(window_len,
                                                                    unique_pos))


if __name__ == '__main__':
    main()

Answer 2

如果您的数据文件不太大，此解决方案可能会起作用：

pandas

根据您的选择调整# Format data as hex bytes separated by space regardless of how the file looks: data = '''F6 03 04 AA F3 03 03 AA F7 03 00 AA F4 02 40 F9 F3 03 03 AA F7 03 00 AA F4 02 40 F9 E8 51 00 F0 08 B1 2A 91 E0 03 14 AA E1 03 02 AA 08 B1 2A 91 E0 03 14 AA E1 03 02 AA E2 03 08 AA 2F 01 00 94 F5 03 00 AA 75 01 00 B4 2F 01 00 94 F5 03 00 AA 75 01 00 B4 E0 03 17 AA E1 03 15 AA E2 03 16 AA 66 01 00 94 E1 03 15 AA E2 03 16 AA 66 01 00 94 F6 03 00 AA F6 00 00 34 E0 03 14 AA E1 03 15 AA F6 00 00 34 E0 03 14 AA E1 03 15 AA F9 04 00 94 05 00 00 14 F6 03 1A 32 03 00 00 14 05 00 00 14 F6 03 1A 32 03 00 00 14 16 00 80 52 75 02 00 F9 E0 03 16 AA FD 7B 43 A9 75 02 00 F9 E0 03 16 AA FD 7B 43 A9 F4 4F 42 A9 F6 57 41 A9 F8 5F C4 A8 C0 03 5F D6 F6 57 41 A9 F8 5F C4 A8 C0 03 5F D6''' data = data.replace('\n', ' ') # Replacing newlines with <space> unique_length = 2 # Unique sequence length in no. of hex bytes: data_length = (len(data)+1) // 3 # Data length in hex bytes unique_list = [] # List to contain unique sequences # Loop through data and extract every possible unique string of unique_length hex bytes for x in range(data_length-unique_length+1): test_string = data[3*x : 3*x+3*unique_length-1] if data.count(test_string) == 1: # Is there just the one? unique_list.append(test_string) print(unique_list)的值。

查找唯一的十六进制序列

2 个答案: