我有一个程序从命令行获取csv的数据文件,以及clutering的整数值(这是从头开始的K-Means程序)。我一直在遇到这些错误:
Traceback (most recent call last):
File "NewK.py", line 73, in <module>
kMeans(centroids,dataPoints)
File "NewK.py", line 53, in kMeans
CentroidMeans.append(np.mean(dataPoints[t2 == [x]], axis=0))
File "C:\Python27\lib\site-packages\numpy\core\fromnumeric.py", line 2942, in
mean
out=out, **kwargs)
File "C:\Python27\lib\site-packages\numpy\core\_methods.py", line 68, in _mean
ret, rcount, out=ret, casting='unsafe', subok=False)
KeyboardInterrupt
和这个&#34;空片的平均值:&#34;
C:\Python27>python NewK.py iris.txt 3
Points in data set: 150
Dimensionality of Data: (150, 4)
C:\Python27\lib\site-packages\numpy\core\_methods.py:59: RuntimeWarning: Mean of
empty slice.
warnings.warn("Mean of empty slice.", RuntimeWarning)
C:\Python27\lib\site-packages\numpy\core\_methods.py:68: RuntimeWarning: invalid
value encountered in true_divide
ret, rcount, out=ret, casting='unsafe', subok=False)
每次都不会发生,但确实经常发生。处理这个问题的最佳方法是什么?是的,这是作业:)
Python代码:
import numpy as np
from pprint import pprint
import random
import sys
import warnings
arglist = sys.argv
NoOfCentroids = int(arglist[2])
dataPointsFromFile = np.array(np.loadtxt(sys.argv[1], delimiter = ','))
dataRange = ([])
with open(arglist[1]) as f:
print 'Points in data set: ',sum(1 for _ in f)
dataRange.append(round(np.amin(dataPointsFromFile),1))
dataRange.append(round(np.amax(dataPointsFromFile),1))
dataRange = np.asarray(dataRange)
dataPoints = np.array(dataPointsFromFile)
print 'Dimensionality of Data: ', dataPoints.shape
randomCentroids = []
data = ([])
templist = []
i = 0
while i<NoOfCentroids:
for j in range(len(dataPointsFromFile[1,:])):
cat = round(random.uniform(np.amin(dataPointsFromFile),np.amax(dataPointsFromFile)),1)
templist.append(cat)
print '\n'
randomCentroids.append(templist)
templist = []
i = i+1
centroids = np.asarray(randomCentroids)
def kMeans(array1, array2):
ConvergenceCounter = 1
keepGoing = True
StartingCentroids = np.copy(centroids)
while keepGoing:
#--------------Find The new means---------#
t0 = StartingCentroids[None, :, :] - dataPoints[:, None, :]
t1 = np.linalg.norm(t0, axis=-1)
t2 = np.argmin(t1, axis=-1)
#------Push the new means to a new array for comparison---------#
CentroidMeans = []
for x in xrange(len(StartingCentroids)):
CentroidMeans.append(np.mean(dataPoints[t2 == [x]], axis=0))
#--------Convert to a numpy array--------#
NewMeans = np.asarray(CentroidMeans)
#------Compare the New Means with the Starting Means------#
if np.array_equal(NewMeans,StartingCentroids):
print ('Convergence has been reached after {} moves'.format(ConvergenceCounter))
print ('Starting Centroids:\n{}'.format(centroids))
print ('Final Means:\n{}'.format(NewMeans))
print ('Final Cluster assignments: {}'.format(t2))
for x in xrange(len(StartingCentroids)):
print ('Cluster {}:\n'.format(x)), dataPoints[t2 == [x]]
for x in xrange(len(StartingCentroids)):
print ('Size of Cluster {}:'.format(x)), len(dataPoints[t2 == [x]])
keepGoing = False
else:
ConvergenceCounter = ConvergenceCounter +1
StartingCentroids =np.copy(NewMeans)
kMeans(centroids, dataPoints)