我的数据框为fllow:
.config
我打算在我的数据框上应用k-means算法:
PickUP <- data.frame(pickuplong = c(-73.93909 ,-73.94189 ,-73.93754,-73.91638,-73.92792 ,-73.88634), pickuplat =c(40.84408,40.83841,40.85311,40.84966,40.86284,40.85628))
现在我尝试计算每个群集的质心,如下所示:
library(maps)
library(pamr)
library(ggplot2)
library(rgdal)
library(png)
library(grid)
p <- as.data.frame(project(cbind(mdt$Pickup_longitude, mdt$Pickup_latitude),
proj = "+init=ESRI:54030"))
names(p) <- c("long.rob", "lat.rob")
mdt2 <- cbind(mdt, p)
theme_map <- list(theme(panel.grid.minor = element_blank(), panel.grid.major
= element_blank(),panel.background = element_blank(),
plot.background = element_rect(fill = "#e6e8ed"), panel.border
= element_blank(), axis.line = element_blank(), axis.text.x =
element_blank(),axis.text.y = element_blank(), axis.ticks =
element_blank(), axis.title.x = element_blank(), axis.title.y =
element_blank()))
# Spherical coordinates in radians
mdt2$long.rad <- mdt2$Pickup_longitude * (2 * pi)/360
mdt2$lat.rad <- (mdt2$Pickup_latitude * 2) * (2 * pi)/360
R <- (6378 + 6356)/2
# Cartesian coordinates
mdt2$x = R * cos(mdt2$lat.rad) * cos(mdt2$long.rad)
mdt2$y = R * cos(mdt2$lat.rad) * sin(mdt2$long.rad)
mdt2$z = R * sin(mdt2$lat.rad)
# Perform kmeans with k = 1:10
matrix <- mdt2[, colnames(mdt2) %in% c("x", "y", "z")]
models <- list()
chs <- NULL
for (c in seq(2, 10, 1)) {
model <- kmeans(x = matrix, center = c, nstart = 50)
models <- append(models, list(model))
support = c
ch = (sum(model$betweenss)/(c - 1))/(sum(model$withinss)/(sum(model$size)
- c))
chs = rbind(chs, data.frame(support, ch)) }
# Plot the CH index
ggplot(data = chs) + geom_line(aes(x = support, y = ch)) +
ggtitle("Identifying the optimal number of clusters") +
xlab("Number of clusters") + ylab("CH Index")
# Select the model with the highest CH index
best = models[[which.max(chs$ch)]]
best_k = length(best$size)
mdt2$cluster = best$cluster
# Plot the clusters from the best model
county_df <- map_data("county")
ny <- subset(county_df, region=="new york")
ggplot(ny) + geom_point(data=mdt2, aes(x=long.rob, y=lat.rob, color=
as.factor(cluster)))+
guides(fill= FALSE) + theme(axis.text.x=element_text(angle=90, hjust=1))+
scale_colour_discrete(name = "clusters") + xlab("Pickup longitude")+
ylab("Pickup latitude")
但每次我得到不同的结果。我的错在哪里?