我有以下代码:
# Imports
library(cluster)
# Read input dataset from CSV file
input_dataset <- read.csv("C:\\Users\\sw029693\\Desktop\\Overtime_work_hrs_analytics\\input_dataset.csv", header = TRUE)
# Find K
wss <- (nrow(input_dataset$shift_length_avg)-1)*sum(apply(matrix(input_dataset$shift_length_avg,nrow = length(input_dataset$shift_length_avg),ncol = 1),2,var))
for (i in 2:15) {
kmeans(matrix(input_dataset$shift_length_avg,nrow = length(input_dataset$shift_length_avg),ncol = 1), centers=i)
# wss[i] <- sum(kmeans(input_dataset$shift_length_avg, centers=i)$withinss)
}
我收到以下消息:
Error in kmeans(matrix(input_dataset$shift_length_avg, nrow = length(input_dataset$shift_length_avg), :
more cluster centers than distinct data points.
我不确定如何解释此消息。有什么想法吗?
数据如下:
dput(matrix(input_dataset$shift_length_avg,nrow = length(input_dataset$shift_length_avg),ncol = 1), file = "",
control = c("keepNA", "keepInteger", "showAttributes"))
12L, .........., 9L, 9L, 12L, 11L, 14L, 7L, 10L, 13L, 14L, 8L, 8L, 8L, 12L,
13L, 15L, 8L, 12L, 9L, 13L, 13L, 8L, 8L, 12L, 12L, 13L, 13L,
13L, 16L, 13L, 14L, 14L, 14L, 13L, 13L, 14L, 7L, 13L, 13L, 14L,
9L, 13L, 13L, 11L, 9L, 12L, 8L, 12L, 14L, 14L, 13L, 13L, 13L,
9L, 15L, 13L, 13L, 9L, 9L, 14L, 8L, 7L, 14L, 7L, 10L, 14L, 13L,
7L, 9L, 13L, 13L, 7L, 13L, 9L, 14L, 14L, 10L, 13L, 14L, 7L, 13L,
13L, 13L, 13L, 7L, 9L, 13L, 12L, 7L, 13L, 11L, 9L, 10L, 13L,
9L, 11L, 10L, 11L, 10L, 11L, 9L, 11L, 13L, 11L, 13L, 13L, 7L,
8L, 9L, 8L, 9L, 6L, 8L, 13L, 6L, 9L, 13L, 13L, 9L, 7L, 12L, 13L,
13L, 7L, 7L, 10L, 10L, 10L, 8L, 8L, 8L, 8L, 10L, 8L, 7L, 7L,
14L, 10L, 13L, 6L, 13L, 8L, 12L, 14L, 13L, 11L, 8L, 10L, 8L,
8L, 8L, 11L, 11L, 11L, 13L, 9L, 8L, 12L, 11L, 9L, 15L, 15L, 9L,
13L, 6L, 9L, 9L, 12L, 7L, 9L, 12L, 13L, 13L), .Dim = c(116735L,
1L)