Question

首先让我说，我可能正在尝试使用一个糟糕的数据结构。

我试图从大型文本转储中获取信息，但似乎无法正确排序。数据如下所示，但要长得多。

我想我最终可以把它变成一个数据结构，比如..

r1_list = [
    r01: [77.7,76.0,76.0,76.0],
    r04: [69.5,4,4,5],
]
r2_list = [
    r02: [1,2,3,4],
    r04: [3,4,4,5],
]

然后我可以遍历列表，并检查每个设备的值的平均值等。

这是我一直在尝试的

import re

r1_list = []
r2_list = []

current_device = False
device_type = False
current_reading = False


def matchr1(line):
    matchThis = ""
    matched = re.match(r'^(r1)\s(r\d+)$',line)
    if matched:
        #Matches r1      
        matchThis = matched.group(2) 
    else:
        return False
    return matchThis

def matchr2(line):
    matchThis = ""
    matched = re.match(r'^(r2)\s(r\d+)$',line)
    if matched:
        #Matches r2     
        matchThis = matched.group(2) 
    else:
        return False
    return matchThis

def matchReading(line):
    matchThis = ""
    matched = re.match(r'^(\d+)\s(\d+.\d+)$',line)
    if matched:
        #Matches r2     
        matchThis = matched.group(2) 
    else:
       return False
    return matchThis





with open("data.txt") as f:
    for line in f:
        if matchr1(line):
            current_device = matchr1(line)
            device_type = "r1"

        if matchr2(line):
            current_device = matchr2(line)
            device_type = "r2"

        if matchReading(line):
            current_reading = matchReading(line)

        if current_reading:

            if device_type == "r1":
                temp_dict = {current_device: [current_reading]}
                r1_list.append(temp_dict)

            if device_type == "r2":
                temp_dict = {current_device: [current_reading]}
                r2_list.append(temp_dict)

        current_reading = False

print(r1_list)
print(r2_list)

我得到了什么

[{'r01': ['77.7']}, {'r01': ['76.0']}, {'r01': ['77.7']}, {'r04': ['74.6']}, {'r04': ['75.6']}, {'r04': ['75.8']}]
[{'r02': ['74.7']}, {'r02': ['74.0']}, {'r02': ['76.7']}, {'r03': ['74.2']}, {'r03': ['74.1']}, {'r03': ['76.8']}]

Answer 1

这里有两个单独的步骤：

查看以“r”开头的行，并发现应插入其数据。
查看其他行并将它们插入到数据结构中。

这是我想出的：

#!/usr/bin/env python

data = """r1 r01
2020 77.7
2020 76.0
2020 77.7
r2 r02
2020 74.7
2020 74.0
2020 76.7
r2 r03
2020 74.2
2020 74.1
2020 76.8
r1 r04
2020 74.6
2020 75.6
2020 75.8"""

result = {}

for line in data.splitlines():
    if line.startswith("r"):
        # Find (or create) the place in the data structure where
        # we should insert the values.
        first, second = line.split()
        # dict.setdefault(key, value) sets `dict[key] = value` if
        # it's not already set, then returns `dict[key]`.
        dest = result.setdefault(first, {}).setdefault(second, [])
        # Move on to the next line.
        continue
    # Get the value of the line
    _, value = line.split()
    # Add it to the list we found in the `line.startswith('r')`
    # bit above.
    dest.append(value)

assert result == {
    "r1": {
        "r01": ["77.7", "76.0", "77.7"],
        "r04": ["74.6", "75.6", "75.8"]
    },
    "r2": {
        "r02": ["74.7", "74.0", "76.7"],
        "r03": ["74.2", "74.1", "76.8"]
    },
}

创建一个字典列表，其中值是一个列表

1 个答案: