我在3-D阵列的数据上运行k-means聚类算法,它看起来如此(从1910-2000每年,有51X 200维的阵列,这51个是美国的状态),这是1916年唯一前三行的例子,ei 3x200)
Year 1916
AK 0.1216 0.0811 0.0541 0.0405 0.0541 0.0000 0.0473 0.0000 0.0541 0.0000 0.0338 0.0338 0.0473 0.0338 0.0000 0.0405 0.0000 0.0338 0.0338 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0338 0.0000 0.0405 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0405 0.0000 0.0000 0.0000 0.0405 0.0338 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0338 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0338 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0338 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000
AL 0.0837 0.0109 0.0113 0.0160 0.0198 0.0158 0.0067 0.0139 0.0121 0.0082 0.0102 0.0083 0.0094 0.0046 0.0031 0.0096 0.0074 0.0164 0.0129 0.0041 0.0042 0.0120 0.0232 0.0100 0.0115 0.0071 0.0024 0.0062 0.0087 0.0095 0.0088 0.0043 0.0078 0.0024 0.0051 0.0375 0.0030 0.0080 0.0101 0.0024 0.0110 0.0064 0.0104 0.0028 0.0019 0.0066 0.0013 0.0052 0.0095 0.0044 0.0048 0.0119 0.0018 0.0081 0.0071 0.0124 0.0044 0.0086 0.0033 0.0022 0.0070 0.0018 0.0025 0.0070 0.0034 0.0075 0.0004 0.0103 0.0008 0.0063 0.0112 0.0006 0.0204 0.0012 0.0022 0.0004 0.0032 0.0025 0.0015 0.0012 0.0073 0.0030 0.0016 0.0045 0.0044 0.0004 0.0028 0.0139 0.0017 0.0026 0.0038 0.0049 0.0167 0.0013 0.0004 0.0009 0.0028 0.0005 0.0050 0.0020 0.0000 0.0033 0.0086 0.0018 0.0012 0.0008 0.0060 0.0035 0.0059 0.0059 0.0008 0.0029 0.0007 0.0052 0.0079 0.0079 0.0028 0.0003 0.0027 0.0066 0.0036 0.0044 0.0032 0.0036 0.0024 0.0000 0.0076 0.0004 0.0068 0.0020 0.0002 0.0004 0.0034 0.0007 0.0011 0.0029 0.0061 0.0003 0.0068 0.0008 0.0066 0.0006 0.0029 0.0004 0.0009 0.0006 0.0036 0.0004 0.0015 0.0007 0.0005 0.0004 0.0019 0.0031 0.0019 0.0060 0.0059 0.0011 0.0000 0.0005 0.0006 0.0016 0.0018 0.0054 0.0014 0.0000 0.0008 0.0010 0.0024 0.0013 0.0040 0.0005 0.0005 0.0004 0.0000 0.0000 0.0037 0.0005 0.0000 0.0015 0.0060 0.0000 0.0000 0.0002 0.0023 0.0000 0.0034 0.0000 0.0000 0.0011 0.0047 0.0002 0.0040 0.0000 0.0018 0.0052 0.0052 0.0004 0.0025 0.0000
AR 0.0703 0.0193 0.0205 0.0120 0.0232 0.0227 0.0089 0.0110 0.0120 0.0100 0.0082 0.0117 0.0103 0.0061 0.0032 0.0093 0.0113 0.0103 0.0115 0.0040 0.0042 0.0155 0.0245 0.0111 0.0096 0.0065 0.0012 0.0090 0.0155 0.0110 0.0196 0.0059 0.0150 0.0047 0.0042 0.0132 0.0037 0.0087 0.0074 0.0024 0.0079 0.0072 0.0130 0.0026 0.0014 0.0045 0.0018 0.0081 0.0075 0.0045 0.0036 0.0069 0.0009 0.0068 0.0066 0.0095 0.0062 0.0076 0.0042 0.0030 0.0059 0.0022 0.0030 0.0052 0.0068 0.0079 0.0005 0.0094 0.0030 0.0062 0.0076 0.0011 0.0133 0.0017 0.0037 0.0015 0.0048 0.0031 0.0020 0.0010 0.0047 0.0054 0.0024 0.0042 0.0061 0.0018 0.0021 0.0097 0.0032 0.0084 0.0124 0.0047 0.0075 0.0039 0.0000 0.0042 0.0040 0.0008 0.0068 0.0018 0.0000 0.0035 0.0037 0.0030 0.0011 0.0005 0.0047 0.0072 0.0013 0.0048 0.0023 0.0087 0.0008 0.0038 0.0044 0.0045 0.0025 0.0007 0.0033 0.0028 0.0039 0.0024 0.0029 0.0033 0.0030 0.0000 0.0046 0.0011 0.0043 0.0028 0.0006 0.0019 0.0024 0.0009 0.0004 0.0045 0.0035 0.0024 0.0022 0.0007 0.0036 0.0012 0.0025 0.0000 0.0025 0.0005 0.0038 0.0000 0.0019 0.0012 0.0008 0.0017 0.0023 0.0036 0.0023 0.0036 0.0040 0.0005 0.0005 0.0014 0.0005 0.0005 0.0008 0.0044 0.0018 0.0000 0.0005 0.0016 0.0024 0.0030 0.0024 0.0006 0.0000 0.0008 0.0000 0.0000 0.0048 0.0022 0.0000 0.0039 0.0058 0.0000 0.0000 0.0000 0.0037 0.0000 0.0022 0.0012 0.0000 0.0012 0.0025 0.0000 0.0061 0.0000 0.0029 0.0023 0.0037 0.0000 0.0032 0.0000
我每年都在50X200的阵列上单独进行算法。
我的问题是
第一季度:单独做几年是一件痛苦的事情,无论如何,我可以一年多次做算法并将其绘制成3D图形吗? 我希望第三个维度是年份,并为所有数据的集群提供一个3D图表Q2:用年份一次读取所有数据,然后每年单独获得一次结果,因为目前我不得不拆分每年的数据文本文件,这很痛苦。
我的代码:
from __future__ import division
from sklearn.cluster import KMeans
from sklearn.metrics import pairwise_distances_argmin_min
import numpy
from numpy.random import *
import numpy as np
import random
from functools import partial
from sklearn.metrics import pairwise_distances_argmin_min
from scipy.spatial.distance import pdist,squareform
import pandas as pd
from functools import partial
from matplotlib import pyplot
def read_from_file(filename):
with open(filename) as f:
data = []
for line in f:
data.append([float(x) for x in line.split()[1:]])
return data #returned the data we read.
def main():
data = read_from_file("Data_Fcopy.txt") # This data is in scope for the initialize function now. :)
km = KMeans(n_clusters=9,init= 'random').fit(data)
print km
centers = km.cluster_centers_
labels = km.labels_
n = km.fit_transform(data)
#print n
print labels
numpy.set_printoptions(threshold=numpy.nan)
#print centers
numpy.set_printoptions(threshold=numpy.nan)
paired_data = []
for x in data:
closest, ignored = pairwise_distances_argmin_min(x, centers)
paired_data.append(closest)
new_list = [x+1 for x in paired_data]
#print paired_data
S = pd.DataFrame(new_list, columns=['x' 'center'])
print (S.to_string())
if __name__ == "__main__":
main() # This is the general (awkward) pattern for main functions in python.