编辑:我有点不好意思-我使用了错误的分隔符。非常抱歉,如果我浪费了任何时间!该代码现在正在运行,但是花了很长时间,所以我可能会在几个小时后回来。谢谢大家
我正在尝试使用在线资源http://alexminnaar.com/time-series-classification-and-clustering-with-python.html中的代码,并将其应用于csv文件中的时间序列数据。我遇到的错误是当它尝试通过“ s1”枚举时。我知道我无法枚举浮点数,但是我不太确定自己知道如何解决该问题,更不用说代码到底需要枚举了什么。我确定我的数据集的格式像他一样,因为他的数据集在github报告中。
谢谢您能给我的任何帮助!
编辑我忘了包含DTW功能,该功能现已添加到下面
函数DTWDistance
def DTWDistance(s1, s2,w):
DTW={}
w = max(w, abs(len(s1)-len(s2)))
for i in range(-1,len(s1)):
for j in range(-1,len(s2)):
DTW[(i, j)] = float('inf')
DTW[(-1, -1)] = 0
for i in range(len(s1)):
for j in range(max(0, i-w), min(len(s2), i+w)):
dist= (s1[i]-s2[j])**2
DTW[(i, j)] = dist + min(DTW[(i-1, j)],DTW[(i, j-1)], DTW[(i-1, j-1)])
return math.sqrt(DTW[len(s1)-1, len(s2)-1])
功能LB_Keogh
def LB_Keogh(s1,s2,r):
LB_sum=0
for ind,i in enumerate(s1):
lower_bound=min(s2[(ind-r if ind-r>=0 else 0):(ind+r)])
upper_bound=max(s2[(ind-r if ind-r>=0 else 0):(ind+r)])
if i>upper_bound:
LB_sum=LB_sum+(i-upper_bound)**2
elif i<lower_bound:
LB_sum=LB_sum+(i-lower_bound)**2
return math.sqrt(LB_sum)
函数k_means_clust
def k_means_clust(data,num_clust,num_iter,w=5):
centroids=random.sample(data,num_clust)
counter=0
for n in range(num_iter):
counter+=1
print (counter)
assignments={}
#assign data points to clusters
for ind,i in enumerate(data):
min_dist=float('inf')
closest_clust=None
for c_ind,j in enumerate(centroids):
if LB_Keogh(i,j,5)<min_dist:
cur_dist=DTWDistance(i,j,w)
if cur_dist<min_dist:
min_dist=cur_dist
closest_clust=c_ind
if closest_clust in assignments:
assignments[closest_clust].append(ind)
else:
assignments[closest_clust]=[]
#recalculate centroids of clusters
for key in assignments:
clust_sum=0
for k in assignments[key]:
clust_sum=clust_sum+data[k]
centroids[key]=[m/len(assignments[key]) for m in clust_sum]
return centroids
数据:
time_series=np.genfromtxt('filepath')
调用函数
centroids=k_means_clust(list(time_series),4,10,4)
我得到的错误是:
--------------------------------------------------------------------
TypeError Traceback (most recent
call last)
<ipython-input-50-909588871671> in <module>()
6
7
----> 8 centroids=k_means_clust(list(df),4,10,4)
9
10
<ipython-input-49-f91bbc3a5a04> in k_means_clust(data, num_clust, num_iter, w)
11 closest_clust=None
12 for c_ind,j in enumerate(centroids):
---> 13 if LB_Keogh(i,j,5)<min_dist:
14 cur_dist=DTWDistance(i,j,w)
15 if cur_dist<min_dist:
<ipython-input-48-b5b8e44a435a> in LB_Keogh(s1, s2, r)
1 def LB_Keogh(s1,s2,r):
2 LB_sum=0
----> 3 for ind,i in enumerate(s1):
4
5 lower_bound=min(s2[(ind-r if ind-r>=0 else 0):(ind+r)])
TypeError: 'numpy.float64' object is not iterable