根据它们之间的距离对线进行分组

时间:2018-12-19 20:39:14

标签: python-3.x

我有此词典列表,其中每个字典代表一行。一种基于y1坐标之间的距离对线进行分组的好方法是什么?

horizontal_lines = [
    {'x1': 2257, 'y1': 2787, 'x2': 2419, 'y2': 2787},
    {'x1': 2256, 'y1': 2788, 'x2': 2460, 'y2': 2788},
    {'x1': 2256, 'y1': 2789, 'x2': 2460, 'y2': 2789},
    {'x1': 2256, 'y1': 2790, 'x2': 2460, 'y2': 2790},
    {'x1': 2256, 'y1': 2791, 'x2': 2459, 'y2': 2798},
    {'x1': 2265, 'y1': 2791, 'x2': 2460, 'y2': 2791},
    {'x1': 2409, 'y1': 2792, 'x2': 2460, 'y2': 2796},
    {'x1': 2351, 'y1': 2792, 'x2': 2454, 'y2': 2794},
    {'x1': 2352, 'y1': 2794, 'x2': 2431, 'y2': 2794},
    {'x1': 2317, 'y1': 2795, 'x2': 2431, 'y2': 2798},
    {'x1': 2255, 'y1': 2795, 'x2': 2316, 'y2': 2795},
    {'x1': 2255, 'y1': 2796, 'x2': 2316, 'y2': 2797},
    {'x1': 2339, 'y1': 2797, 'x2': 2395, 'y2': 2798},

    {'x1': 1892, 'y1': 125, 'x2': 2124, 'y2': 125},
    {'x1': 2127, 'y1': 126, 'x2': 2200, 'y2': 127},
    {'x1': 1981, 'y1': 126, 'x2': 2126, 'y2': 126},
    {'x1': 2004, 'y1': 127, 'x2': 2183, 'y2': 127},
    {'x1': 2116, 'y1': 128, 'x2': 2350, 'y2': 128},
    {'x1': 2439, 'y1': 129, 'x2': 2648, 'y2': 129},
    {'x1': 2285, 'y1': 130, 'x2': 2395, 'y2': 131},
    {'x1': 2339, 'y1': 130, 'x2': 2761, 'y2': 130},
    {'x1': 2396, 'y1': 131, 'x2': 2801, 'y2': 131},
    {'x1': 3003, 'y1': 132, 'x2': 3137, 'y2': 132},
    {'x1': 2567, 'y1': 132, 'x2': 2842, 'y2': 132},
    {'x1': 2969, 'y1': 133, 'x2': 3138, 'y2': 133},
    {'x1': 2607, 'y1': 133, 'x2': 2842, 'y2': 133},
    {'x1': 2915, 'y1': 134, 'x2': 3208, 'y2': 134},
]

我想将距离y1为10或更小的所有元素组合在一起。在这种情况下,这意味着前13行和后14行将被分组。

所需结果:

groups = [
    [
    {'x1': 2257, 'y1': 2787, 'x2': 2419, 'y2': 2787},
    {'x1': 2256, 'y1': 2788, 'x2': 2460, 'y2': 2788},
    {'x1': 2256, 'y1': 2789, 'x2': 2460, 'y2': 2789},
    {'x1': 2256, 'y1': 2790, 'x2': 2460, 'y2': 2790},
    {'x1': 2256, 'y1': 2791, 'x2': 2459, 'y2': 2798},
    {'x1': 2265, 'y1': 2791, 'x2': 2460, 'y2': 2791},
    {'x1': 2409, 'y1': 2792, 'x2': 2460, 'y2': 2796},
    {'x1': 2351, 'y1': 2792, 'x2': 2454, 'y2': 2794},
    {'x1': 2352, 'y1': 2794, 'x2': 2431, 'y2': 2794},
    {'x1': 2317, 'y1': 2795, 'x2': 2431, 'y2': 2798},
    {'x1': 2255, 'y1': 2795, 'x2': 2316, 'y2': 2795},
    {'x1': 2255, 'y1': 2796, 'x2': 2316, 'y2': 2797},
    {'x1': 2339, 'y1': 2797, 'x2': 2395, 'y2': 2798},

    ],
    [
    {'x1': 1892, 'y1': 125, 'x2': 2124, 'y2': 125}
    {'x1': 2127, 'y1': 126, 'x2': 2200, 'y2': 127},
    {'x1': 1981, 'y1': 126, 'x2': 2126, 'y2': 126},
    {'x1': 2004, 'y1': 127, 'x2': 2183, 'y2': 127},
    {'x1': 2116, 'y1': 128, 'x2': 2350, 'y2': 128},
    {'x1': 2439, 'y1': 129, 'x2': 2648, 'y2': 129},
    {'x1': 2285, 'y1': 130, 'x2': 2395, 'y2': 131},
    {'x1': 2339, 'y1': 130, 'x2': 2761, 'y2': 130},
    {'x1': 2396, 'y1': 131, 'x2': 2801, 'y2': 131},
    {'x1': 3003, 'y1': 132, 'x2': 3137, 'y2': 132},
    {'x1': 2567, 'y1': 132, 'x2': 2842, 'y2': 132},
    {'x1': 2969, 'y1': 133, 'x2': 3138, 'y2': 133},
    {'x1': 2607, 'y1': 133, 'x2': 2842, 'y2': 133},
    {'x1': 2915, 'y1': 134, 'x2': 3208, 'y2': 134}
    ]
]

1 个答案:

答案 0 :(得分:0)

所以我认为这可能对您有用:

y1值排序:

new_lines = sorted(horizontal_lines, key=lambda k: k['y1'])


[
    {"x1": 1892, "y1": 125, "x2": 2124, "y2": 125},
    {"x1": 2127, "y1": 126, "x2": 2200, "y2": 127},
    {"x1": 1981, "y1": 126, "x2": 2126, "y2": 126},
    {"x1": 2004, "y1": 127, "x2": 2183, "y2": 127},
    {"x1": 2116, "y1": 128, "x2": 2350, "y2": 128},
    {"x1": 2439, "y1": 129, "x2": 2648, "y2": 129},
    {"x1": 2285, "y1": 130, "x2": 2395, "y2": 131},
    {"x1": 2339, "y1": 130, "x2": 2761, "y2": 130},
    {"x1": 2396, "y1": 131, "x2": 2801, "y2": 131},
    {"x1": 3003, "y1": 132, "x2": 3137, "y2": 132},
    {"x1": 2567, "y1": 132, "x2": 2842, "y2": 132},
    {"x1": 2969, "y1": 133, "x2": 3138, "y2": 133},
    {"x1": 2607, "y1": 133, "x2": 2842, "y2": 133},
    {"x1": 2915, "y1": 134, "x2": 3208, "y2": 134},
    {"x1": 2257, "y1": 2787, "x2": 2419, "y2": 2787},
    {"x1": 2256, "y1": 2788, "x2": 2460, "y2": 2788},
    {"x1": 2256, "y1": 2789, "x2": 2460, "y2": 2789},
    {"x1": 2256, "y1": 2790, "x2": 2460, "y2": 2790},
    {"x1": 2256, "y1": 2791, "x2": 2459, "y2": 2798},
    {"x1": 2265, "y1": 2791, "x2": 2460, "y2": 2791},
    {"x1": 2409, "y1": 2792, "x2": 2460, "y2": 2796},
    {"x1": 2351, "y1": 2792, "x2": 2454, "y2": 2794},
    {"x1": 2352, "y1": 2794, "x2": 2431, "y2": 2794},
    {"x1": 2317, "y1": 2795, "x2": 2431, "y2": 2798},
    {"x1": 2255, "y1": 2795, "x2": 2316, "y2": 2795},
    {"x1": 2255, "y1": 2796, "x2": 2316, "y2": 2797},
    {"x1": 2339, "y1": 2797, "x2": 2395, "y2": 2798},
]

然后可能有几种方法可以将其拆分

def get_index(new_list, delta):
    indexs = [0]
    for i in range(len(new_list) - 1):
        if new_list[i]["y1"] + delta < new_list[i + 1]["y1"]:
            indexs.extend((i + 1, i + 1))
    indexs.append(len(new_list))
    it = iter(indexs)
    split_list = []
    for x in it:
        split_list.append(new_list[x : next(it)])
    return split_list
  

get_index(new_lines,10)

[
    [
        {"x1": 1892, "y1": 125, "x2": 2124, "y2": 125},
        {"x1": 2127, "y1": 126, "x2": 2200, "y2": 127},
        {"x1": 1981, "y1": 126, "x2": 2126, "y2": 126},
        {"x1": 2004, "y1": 127, "x2": 2183, "y2": 127},
        {"x1": 2116, "y1": 128, "x2": 2350, "y2": 128},
        {"x1": 2439, "y1": 129, "x2": 2648, "y2": 129},
        {"x1": 2285, "y1": 130, "x2": 2395, "y2": 131},
        {"x1": 2339, "y1": 130, "x2": 2761, "y2": 130},
        {"x1": 2396, "y1": 131, "x2": 2801, "y2": 131},
        {"x1": 3003, "y1": 132, "x2": 3137, "y2": 132},
        {"x1": 2567, "y1": 132, "x2": 2842, "y2": 132},
        {"x1": 2969, "y1": 133, "x2": 3138, "y2": 133},
        {"x1": 2607, "y1": 133, "x2": 2842, "y2": 133},
    ],
    [
        {"x1": 2257, "y1": 2787, "x2": 2419, "y2": 2787},
        {"x1": 2256, "y1": 2788, "x2": 2460, "y2": 2788},
        {"x1": 2256, "y1": 2789, "x2": 2460, "y2": 2789},
        {"x1": 2256, "y1": 2790, "x2": 2460, "y2": 2790},
        {"x1": 2256, "y1": 2791, "x2": 2459, "y2": 2798},
        {"x1": 2265, "y1": 2791, "x2": 2460, "y2": 2791},
        {"x1": 2409, "y1": 2792, "x2": 2460, "y2": 2796},
        {"x1": 2351, "y1": 2792, "x2": 2454, "y2": 2794},
        {"x1": 2352, "y1": 2794, "x2": 2431, "y2": 2794},
        {"x1": 2317, "y1": 2795, "x2": 2431, "y2": 2798},
        {"x1": 2255, "y1": 2795, "x2": 2316, "y2": 2795},
        {"x1": 2255, "y1": 2796, "x2": 2316, "y2": 2797},
        {"x1": 2339, "y1": 2797, "x2": 2395, "y2": 2798},
    ],
]