Question

我在列表列表中嵌入了词典，例如：

X = \
[
    [
        {'the_geom': (999,999), 1: [111,112,113], 2: [121,122,123]},
        {'the_geom': (998,998), 1: [211,212,213], 2:[221,222,223]}
    ],
    [
        {'the_geom': (997,997), 1: [1111,1112,1113, 1114], 2: [1121,1122,1123, 1124]},
        {'the_geom': (996,996), 1: [1211, 1212, 1213], 2: [2211,2212,2213]}
    ]
]

我正在寻找能给我的功能：

XX = \
[
    [
        {'the_geom': (999,999), 'values': [[111, 121], [112,122], [113, 123]]},
        {'the_geom': (998,998), 'values': [[211,221], [212,222], [213,223]]}
    ],
    [
        {'the_geom': (997,997), 'values': [[1111,1121],[1112,1122],[1113,1123],[1114,1124]]},
        {'the_geom': (996,996), 'values': [[1211, 2211], [1212,2212],[1213,2213]]}
    ]
]

我该怎么做？

Answer 1

你可以这样做：

new_x = []
for item in X:
    new_inner_item = []
    for inner_item in item:
        new_inner_item.append({
            'the_geom': inner_item['the_geom'],
            'values': [list(a) for a in zip(*[v for k,v in inner_item.items() if k != 'the_geom'])]
        })
    new_x.append(new_inner_item)

Answer 2

它可以写成一行，但是当格式化与原始问题平行时更容易理解。它处理内部字典中有多个键的情况（例如，1和2以外的键）。

# kmeans clustering algorithm
# data = set of data points
# k = number of clusters
# c = initial list of centroids (if provided)
#
def kmeans(data, k, c):
    centroids = []
    centroids = randomize_centroids(data, centroids, k)  
    old_centroids = [[] for i in range(k)] 
    iterations = 0
    while not (has_converged(centroids, old_centroids, iterations)):
        iterations += 1
        clusters = [[] for i in range(k)]
        # assign data points to clusters
        clusters = euclidean_dist(data, centroids, clusters)
        # recalculate centroids
        index = 0
        for cluster in clusters:
            old_centroids[index] = centroids[index]
            centroids[index] = np.mean(cluster, axis=0).tolist()
            index += 1
    print("The total number of data instances is: " + str(len(data)))
    print("The total number of iterations necessary is: " + str(iterations))
    print("The means of each cluster are: " + str(centroids))
    print("The clusters are as follows:")
    for cluster in clusters:
        print("Cluster with a size of " + str(len(cluster)) + " starts here:")
        print(np.array(cluster).tolist())
        print("Cluster ends here.")
    return

# Calculates euclidean distance between
# a data point and all the available cluster
# centroids.      
def euclidean_dist(data, centroids, clusters):
    for instance in data:  
        # Find which centroid is the closest
        # to the given data point.
        mu_index = min([(i[0], np.linalg.norm(instance-centroids[i[0]])) \
                            for i in enumerate(centroids)], key=lambda t:t[1])[0]
        try:
            clusters[mu_index].append(instance)
        except KeyError:
            clusters[mu_index] = [instance]
    # If any cluster is empty then assign one point
    # from data set randomly so as to not have empty
    # clusters and 0 means.        
    for cluster in clusters:
        if not cluster:
            cluster.append(data[np.random.randint(0, len(data), size=1)])
    return clusters

# randomize initial centroids
def randomize_centroids(data, centroids, k):
    for cluster in range(0, k):
        centroids.append(data[np.random.randint(0, len(data), size=1)])
    return centroids

# check if clusters have converged    
def has_converged(centroids, old_centroids, iterations):
    MAX_ITERATIONS = 1000
    if iterations > MAX_ITERATIONS:
        return True
    return old_centroids == centroids

###############################################################################
# STARTING COMPUTATION                                                        #
###############################################################################
A = [1.1, 1.02, 2.3, 10, 10.01, 10.1, 12, 16, 18, 18]
B = [1.01, 1.02, 1.001, 1.03, 2.10, 2.94, 3.01, 8.99]
T = [A,B]
k = 3
for t in T:
    cent = np.random.permutation(t)[0:3]
    print kmeans(t, k, cent)
    print

Answer 3

您可以将zip与list comprehension一起使用：

[ 
    [ 
        { 'the_geom':dict['the_geom'], 
          'values':zip(*[dict[i+1] for i in range(len(dict)-1)])
        } 
        for dict in list
    ] 
    for list in X
]

Answer 4

丑陋的变量名称，但它有效：

required

Answer 5

notes_array = ["C", 1046.50]
reduced=[], notes_array.reduce((a,b,i,c,r,l=["name","frequency"])=>( r=a||{},r[l[i%2]]=isNaN(b)?b.trim():b,a?(reduced.push(r),[]._):r ),false);

输出：

res1= [[{'the_geom':y['the_geom'],'values':[[y[1][i],y[2][i]] for i in range(3)]} for y in z] for z in X]

在字典中列出不同列表的第n项

5 个答案: