我有此词典列表,其中每个字典代表一行。一种基于y1坐标之间的距离对线进行分组的好方法是什么?
horizontal_lines = [
{'x1': 2257, 'y1': 2787, 'x2': 2419, 'y2': 2787},
{'x1': 2256, 'y1': 2788, 'x2': 2460, 'y2': 2788},
{'x1': 2256, 'y1': 2789, 'x2': 2460, 'y2': 2789},
{'x1': 2256, 'y1': 2790, 'x2': 2460, 'y2': 2790},
{'x1': 2256, 'y1': 2791, 'x2': 2459, 'y2': 2798},
{'x1': 2265, 'y1': 2791, 'x2': 2460, 'y2': 2791},
{'x1': 2409, 'y1': 2792, 'x2': 2460, 'y2': 2796},
{'x1': 2351, 'y1': 2792, 'x2': 2454, 'y2': 2794},
{'x1': 2352, 'y1': 2794, 'x2': 2431, 'y2': 2794},
{'x1': 2317, 'y1': 2795, 'x2': 2431, 'y2': 2798},
{'x1': 2255, 'y1': 2795, 'x2': 2316, 'y2': 2795},
{'x1': 2255, 'y1': 2796, 'x2': 2316, 'y2': 2797},
{'x1': 2339, 'y1': 2797, 'x2': 2395, 'y2': 2798},
{'x1': 1892, 'y1': 125, 'x2': 2124, 'y2': 125},
{'x1': 2127, 'y1': 126, 'x2': 2200, 'y2': 127},
{'x1': 1981, 'y1': 126, 'x2': 2126, 'y2': 126},
{'x1': 2004, 'y1': 127, 'x2': 2183, 'y2': 127},
{'x1': 2116, 'y1': 128, 'x2': 2350, 'y2': 128},
{'x1': 2439, 'y1': 129, 'x2': 2648, 'y2': 129},
{'x1': 2285, 'y1': 130, 'x2': 2395, 'y2': 131},
{'x1': 2339, 'y1': 130, 'x2': 2761, 'y2': 130},
{'x1': 2396, 'y1': 131, 'x2': 2801, 'y2': 131},
{'x1': 3003, 'y1': 132, 'x2': 3137, 'y2': 132},
{'x1': 2567, 'y1': 132, 'x2': 2842, 'y2': 132},
{'x1': 2969, 'y1': 133, 'x2': 3138, 'y2': 133},
{'x1': 2607, 'y1': 133, 'x2': 2842, 'y2': 133},
{'x1': 2915, 'y1': 134, 'x2': 3208, 'y2': 134},
]
我想将距离y1为10或更小的所有元素组合在一起。在这种情况下,这意味着前13行和后14行将被分组。
所需结果:
groups = [
[
{'x1': 2257, 'y1': 2787, 'x2': 2419, 'y2': 2787},
{'x1': 2256, 'y1': 2788, 'x2': 2460, 'y2': 2788},
{'x1': 2256, 'y1': 2789, 'x2': 2460, 'y2': 2789},
{'x1': 2256, 'y1': 2790, 'x2': 2460, 'y2': 2790},
{'x1': 2256, 'y1': 2791, 'x2': 2459, 'y2': 2798},
{'x1': 2265, 'y1': 2791, 'x2': 2460, 'y2': 2791},
{'x1': 2409, 'y1': 2792, 'x2': 2460, 'y2': 2796},
{'x1': 2351, 'y1': 2792, 'x2': 2454, 'y2': 2794},
{'x1': 2352, 'y1': 2794, 'x2': 2431, 'y2': 2794},
{'x1': 2317, 'y1': 2795, 'x2': 2431, 'y2': 2798},
{'x1': 2255, 'y1': 2795, 'x2': 2316, 'y2': 2795},
{'x1': 2255, 'y1': 2796, 'x2': 2316, 'y2': 2797},
{'x1': 2339, 'y1': 2797, 'x2': 2395, 'y2': 2798},
],
[
{'x1': 1892, 'y1': 125, 'x2': 2124, 'y2': 125}
{'x1': 2127, 'y1': 126, 'x2': 2200, 'y2': 127},
{'x1': 1981, 'y1': 126, 'x2': 2126, 'y2': 126},
{'x1': 2004, 'y1': 127, 'x2': 2183, 'y2': 127},
{'x1': 2116, 'y1': 128, 'x2': 2350, 'y2': 128},
{'x1': 2439, 'y1': 129, 'x2': 2648, 'y2': 129},
{'x1': 2285, 'y1': 130, 'x2': 2395, 'y2': 131},
{'x1': 2339, 'y1': 130, 'x2': 2761, 'y2': 130},
{'x1': 2396, 'y1': 131, 'x2': 2801, 'y2': 131},
{'x1': 3003, 'y1': 132, 'x2': 3137, 'y2': 132},
{'x1': 2567, 'y1': 132, 'x2': 2842, 'y2': 132},
{'x1': 2969, 'y1': 133, 'x2': 3138, 'y2': 133},
{'x1': 2607, 'y1': 133, 'x2': 2842, 'y2': 133},
{'x1': 2915, 'y1': 134, 'x2': 3208, 'y2': 134}
]
]
答案 0 :(得分:0)
所以我认为这可能对您有用:
按y1
值排序:
new_lines = sorted(horizontal_lines, key=lambda k: k['y1'])
[
{"x1": 1892, "y1": 125, "x2": 2124, "y2": 125},
{"x1": 2127, "y1": 126, "x2": 2200, "y2": 127},
{"x1": 1981, "y1": 126, "x2": 2126, "y2": 126},
{"x1": 2004, "y1": 127, "x2": 2183, "y2": 127},
{"x1": 2116, "y1": 128, "x2": 2350, "y2": 128},
{"x1": 2439, "y1": 129, "x2": 2648, "y2": 129},
{"x1": 2285, "y1": 130, "x2": 2395, "y2": 131},
{"x1": 2339, "y1": 130, "x2": 2761, "y2": 130},
{"x1": 2396, "y1": 131, "x2": 2801, "y2": 131},
{"x1": 3003, "y1": 132, "x2": 3137, "y2": 132},
{"x1": 2567, "y1": 132, "x2": 2842, "y2": 132},
{"x1": 2969, "y1": 133, "x2": 3138, "y2": 133},
{"x1": 2607, "y1": 133, "x2": 2842, "y2": 133},
{"x1": 2915, "y1": 134, "x2": 3208, "y2": 134},
{"x1": 2257, "y1": 2787, "x2": 2419, "y2": 2787},
{"x1": 2256, "y1": 2788, "x2": 2460, "y2": 2788},
{"x1": 2256, "y1": 2789, "x2": 2460, "y2": 2789},
{"x1": 2256, "y1": 2790, "x2": 2460, "y2": 2790},
{"x1": 2256, "y1": 2791, "x2": 2459, "y2": 2798},
{"x1": 2265, "y1": 2791, "x2": 2460, "y2": 2791},
{"x1": 2409, "y1": 2792, "x2": 2460, "y2": 2796},
{"x1": 2351, "y1": 2792, "x2": 2454, "y2": 2794},
{"x1": 2352, "y1": 2794, "x2": 2431, "y2": 2794},
{"x1": 2317, "y1": 2795, "x2": 2431, "y2": 2798},
{"x1": 2255, "y1": 2795, "x2": 2316, "y2": 2795},
{"x1": 2255, "y1": 2796, "x2": 2316, "y2": 2797},
{"x1": 2339, "y1": 2797, "x2": 2395, "y2": 2798},
]
然后可能有几种方法可以将其拆分
def get_index(new_list, delta):
indexs = [0]
for i in range(len(new_list) - 1):
if new_list[i]["y1"] + delta < new_list[i + 1]["y1"]:
indexs.extend((i + 1, i + 1))
indexs.append(len(new_list))
it = iter(indexs)
split_list = []
for x in it:
split_list.append(new_list[x : next(it)])
return split_list
get_index(new_lines,10)
[
[
{"x1": 1892, "y1": 125, "x2": 2124, "y2": 125},
{"x1": 2127, "y1": 126, "x2": 2200, "y2": 127},
{"x1": 1981, "y1": 126, "x2": 2126, "y2": 126},
{"x1": 2004, "y1": 127, "x2": 2183, "y2": 127},
{"x1": 2116, "y1": 128, "x2": 2350, "y2": 128},
{"x1": 2439, "y1": 129, "x2": 2648, "y2": 129},
{"x1": 2285, "y1": 130, "x2": 2395, "y2": 131},
{"x1": 2339, "y1": 130, "x2": 2761, "y2": 130},
{"x1": 2396, "y1": 131, "x2": 2801, "y2": 131},
{"x1": 3003, "y1": 132, "x2": 3137, "y2": 132},
{"x1": 2567, "y1": 132, "x2": 2842, "y2": 132},
{"x1": 2969, "y1": 133, "x2": 3138, "y2": 133},
{"x1": 2607, "y1": 133, "x2": 2842, "y2": 133},
],
[
{"x1": 2257, "y1": 2787, "x2": 2419, "y2": 2787},
{"x1": 2256, "y1": 2788, "x2": 2460, "y2": 2788},
{"x1": 2256, "y1": 2789, "x2": 2460, "y2": 2789},
{"x1": 2256, "y1": 2790, "x2": 2460, "y2": 2790},
{"x1": 2256, "y1": 2791, "x2": 2459, "y2": 2798},
{"x1": 2265, "y1": 2791, "x2": 2460, "y2": 2791},
{"x1": 2409, "y1": 2792, "x2": 2460, "y2": 2796},
{"x1": 2351, "y1": 2792, "x2": 2454, "y2": 2794},
{"x1": 2352, "y1": 2794, "x2": 2431, "y2": 2794},
{"x1": 2317, "y1": 2795, "x2": 2431, "y2": 2798},
{"x1": 2255, "y1": 2795, "x2": 2316, "y2": 2795},
{"x1": 2255, "y1": 2796, "x2": 2316, "y2": 2797},
{"x1": 2339, "y1": 2797, "x2": 2395, "y2": 2798},
],
]