尝试使用pam(程序包群集)群集数据时出现R错误

时间:2016-04-03 10:53:39

标签: r cluster-analysis k-means pam

我正在尝试对预处理的数据集运行k-means聚类(分类为虚拟,na清理等)。 这是数据的摘录(头部):

dput(head(clustering.set.in))
structure(list(activity_type = c(1, 1, 1, 1, 1, 1), app_id.PXkw7OJ1se = c(0, 
1, 1, 1, 1, 0), app_id.PXszbKVa5M = c(0, 0, 0, 0, 0, 0), app_id.PXw3GFQKBm = c(1, 
0, 0, 0, 0, 0), browser_version = c(48, 42, 9, 9, 48, 44), continent.AS = c(0, 
1, 1, 0, 0, 0), continent.EU = c(0, 0, 0, 0, 1, 0), continent.SA = c(0, 
0, 0, 0, 0, 0), f_activex = c(1, 1, 1, 1, 1, 1), f_atob = c(2, 
2, 2, 2, 2, 2), f_audio = c(2, 2, 2, 2, 2, 2), f_battery = c(2, 
2, 1, 1, 2, 2), f_bind = c(2, 2, 2, 2, 2, 2), f_flash = c(1, 
2, 2, 2, 2, 2), f_getComputedStyle = c(2, 2, 2, 2, 2, 2), f_matchSelector = c(2, 
2, 2, 2, 2, 2), f_mimeTypes = c(2, 2, 2, 2, 2, 2), f_mimeTypesLength = c(0, 
8, 11, 55, 7, 8), f_navigationTiming = c(2, 2, 1, 2, 2, 2), f_orientationEvents = c(2, 
1, 1, 1, 1, 1), f_plugins = c(2, 2, 2, 2, 2, 2), f_pluginsLength = c(0, 
6, 6, 15, 5, 6), f_raf = c(2, 2, 2, 2, 2, 2), f_resourceTiming = c(2, 
2, 1, 1, 2, 2), f_sse = c(2, 2, 2, 2, 2, 2), f_webgl = c(1, 2, 
2, 2, 2, 1), f_websql = c(1, 2, 2, 2, 2, 2), f_xdr = c(1, 1, 
1, 1, 1, 1), n_appCodeName = c(2, 2, 2, 2, 2, 2), n_doNotTrack = c(2, 
2, 1, 2, 2, 2), n_geolocation = c(2, 2, 2, 2, 2, 2), n_mimeTypes = c(2, 
2, 2, 2, 2, 2), n_platform.iPhone = c(0, 0, 0, 0, 0, 0), n_platform.Linux.armv7l = c(1, 
0, 0, 0, 0, 0), n_platform.MacIntel = c(0, 0, 1, 1, 0, 0), n_platform.Win32 = c(0, 
1, 0, 0, 1, 0), n_plugins = c(2, 2, 2, 2, 2, 2), n_product.Sub20030107 = c(1, 
1, 1, 1, 1, 1), n_product.Sub20100101 = c(0, 0, 0, 0, 0, 0), 
    n_product.Submissing = c(0, 0, 0, 0, 0, 0), os_family.Android = c(1, 
    0, 0, 0, 0, 0), os_family.iOS = c(0, 0, 0, 0, 0, 0), os_family.Mac.OS.X = c(0, 
    0, 1, 1, 0, 0), os_family.Windows = c(0, 1, 0, 0, 1, 0), 
    os_version = c(6, 8.1, 10, 10, 7, 0), site_history_length = c(31, 
    1, 1, 1, 1, 1), w_chrome...loadTimes....csi....app....webstore....runtime.. = c(0, 
    1, 0, 0, 1, 0), w_chrome...loadTimes....csi.. = c(0, 0, 0, 
    0, 0, 0), w_chrome... = c(1, 0, 1, 1, 0, 0), window_dimensions = c(2, 
    1, 2, 2, 2, 2), window_history = c(50, 1, 1, 1, 1, 3)), .Names = c("activity_type", 
"app_id.PXkw7OJ1se", "app_id.PXszbKVa5M", "app_id.PXw3GFQKBm", 
"browser_version", "continent.AS", "continent.EU", "continent.SA", 
"f_activex", "f_atob", "f_audio", "f_battery", "f_bind", "f_flash", 
"f_getComputedStyle", "f_matchSelector", "f_mimeTypes", "f_mimeTypesLength", 
"f_navigationTiming", "f_orientationEvents", "f_plugins", "f_pluginsLength", 
"f_raf", "f_resourceTiming", "f_sse", "f_webgl", "f_websql", 
"f_xdr", "n_appCodeName", "n_doNotTrack", "n_geolocation", "n_mimeTypes", 
"n_platform.iPhone", "n_platform.Linux.armv7l", "n_platform.MacIntel", 
"n_platform.Win32", "n_plugins", "n_product.Sub20030107", "n_product.Sub20100101", 
"n_product.Submissing", "os_family.Android", "os_family.iOS", 
"os_family.Mac.OS.X", "os_family.Windows", "os_version", "site_history_length", 
"w_chrome...loadTimes....csi....app....webstore....runtime..", 
"w_chrome...loadTimes....csi..", "w_chrome...", "window_dimensions", 
"window_history"), row.names = c(NA, 6L), class = "data.frame")

我正在尝试对kmeans进行聚类这个数据集(k = 2) 并收到错误消息:

Error in pam(clustering.set.in, k) : 
  negative length vectors are not allowed

我的代码行:

pam(clustering.set.in, 2)

有什么建议吗?

1 个答案:

答案 0 :(得分:0)

事实证明,一列中有na值。 删除

new.data[is.na(new.data)] <- 1

现在似乎工作正常