我发现了一个数据集导致R
的CLARA算法失败(即进入一个明显无限的循环)。为什么会失败?
我觉得奇怪的是数据集非常无聊(基本上只是一个带有虚拟编码分类变量的正常变量),但也因为几乎任何关于数据集的变化或对cluster::clara
的调用都避免了这个问题
有人有见识吗?我为不包括我用于生成此数据集的随机种子和程序而道歉;遗憾的是,生成它的代码是一组相当大的程序,它们来自并行随机数生成器(使用rlecuyer
包生成)。我无法在标准PC上复制生成过程,因此下面是原始数据集。该错误确实会在R
的操作系统和版本之间进行复制。示例sessionInfo()
如下所示。
一个可能的线索是,当连续随机变量(第一列)中没有真正的组结构时,算法似乎更有可能失败。即便如此,在找到失败的数据集之前,我不得不经历数千次迭代。
# self contained example
library(cluster)
strangeData <- c(-0.866285627488327,1,0,0
,-0.510951849076121,0,0,1
,-1.17167514108559,0,1,0
,-0.376762528272389,1,0,0
,0.78146409125879,0,1,0
,1.61782054548358,0,0,1
,1.13870520606875,1,0,0
,-1.13486644861899,0,0,1
,-0.206927567036607,0,0,0
,-0.207869481212349,0,0,1
,-0.744628008025243,0,1,0
,0.731433838031448,0,0,1
,1.62303204478421,0,1,0
,0.336307634488861,1,0,0
,1.81605021862297,1,0,0
,-0.750411588420933,1,0,0
,-0.0238025825151884,0,1,0
,-1.15317047678752,0,0,1
,-1.16535479706466,0,1,0
,-0.902957290471365,0,1,0
,0.757365905566296,0,0,0
,0.327956357831982,1,0,0
,-1.21209036793528,1,0,0
,-0.827813655803123,0,0,1
,1.29462065083151,1,0,0
,-1.18875004333554,0,0,1
,0.30436829237833,1,0,0
,0.686761122942272,0,0,1
,0.348318524543705,0,0,0
,0.00772114748248065,0,1,0
,-0.0654531320746511,0,1,0
,-0.871825468631758,1,0,0
,-0.24260740899128,1,0,0
,1.6316283625827,0,0,0
,-0.157380038359798,1,0,0
,-1.13594228151195,0,0,1
,0.397846678544776,0,0,0
,-1.79295787039272,0,1,0
,0.659894781274625,0,0,0
,-0.170073590508157,0,0,0
,-0.813943074670405,0,0,0
,-0.382343008302335,0,1,0
,1.00965255945214,1,0,0
,-0.325407591983216,0,0,0
,-0.192348595055317,0,1,0
,-0.776963720822561,0,0,1
,0.412818265336597,0,0,0
,-0.372215722029941,0,1,0
,-0.626596060936548,1,0,0
,1.77998358890641,0,0,1
,1.28471804407001,1,0,0
,2.30521501151449,0,0,1
,-0.0434857353479769,0,1,0
,0.129337095832911,0,0,0
,0.490060486314237,0,0,0
,0.0219239584969476,0,1,0
,2.16246213170809,1,0,0
,-1.6463259663705,0,1,0
,-1.25945578842163,0,1,0
,-0.0508902150466008,1,0,0
,-0.761232947984076,0,0,1
,-1.0844017226841,1,0,0
,0.768163697492781,1,0,0
,0.0512967198361093,0,0,0
,0.496505407614941,0,1,0
,-0.875095658442669,0,0,0
,0.356165420230564,0,1,0
,-1.48736915282316,0,0,0
,0.605835916959207,1,0,0
,0.150248856364222,0,0,1
,0.436072890825432,0,0,0
,-1.2477307042077,0,1,0
,-0.925825618397317,0,0,1
,-0.865719152703743,0,0,0
,1.49904943876406,0,0,0
,-2.000710979317,0,0,0
,-0.592352714624327,0,0,0
,0.393289445874399,0,1,0
,-0.394123590756249,1,0,0
,-0.352673107317825,0,0,1
,0.695357507195699,0,0,1
,-0.0040347449129765,0,1,0
,-0.127791150003591,0,0,1
,0.474647838412314,1,0,0
,0.621985643440749,0,1,0
,-0.0963944386034655,0,0,0
,-0.0769711593827088,0,0,1
,1.52062637216907,0,1,0
,-0.0171131048538696,0,0,1
,-0.614781568866571,0,0,1
,-0.428078937440796,0,0,1
,1.46996872384325,0,0,1
,1.33611500009041,0,1,0
,0.0849546480618341,1,0,0
,1.16189138672992,0,1,0
,-0.0892214257894575,0,0,1
,0.639365524696794,0,0,1
,0.601135082783273,1,0,0
,0.507344807255368,1,0,0
,-0.933432615481704,0,1,0
,1.0605270726388,1,0,0
,-0.846884297959019,0,1,0
,0.069757909351883,0,0,0
,0.971079516592758,0,0,1
,2.13895542286282,0,1,0
,-1.07521573310271,0,0,0
,-0.578762770935955,0,0,1
,0.138245534658141,0,1,0
,0.472499258515774,0,1,0
,-0.955048693548485,0,1,0
,-0.541270774805467,0,0,0
,-0.965725893217509,0,1,0
,1.23937418822633,0,0,0
,0.572605101793754,0,0,1
,1.25059599135706,0,0,0
,0.224476582002008,0,1,0
,-0.0466083676818374,1,0,0
,1.58857119911245,0,0,1
,-1.85121285531852,1,0,0
,0.506446004536966,0,0,0
,0.00529547247104709,0,1,0
,-0.67761610257275,0,1,0
,-0.050520398113479,1,0,0
,1.09977836234059,0,0,0
,-0.729074108182476,0,0,0
,-0.0290602916500079,0,0,0
,0.610785941871284,1,0,0
,0.224899327520079,1,0,0
,1.01797352389614,0,0,0
,0.258960174013477,1,0,0
,-0.0173109255243736,1,0,0
,1.15675575673647,0,0,0
,-1.40075651100777,0,1,0
,1.29973838021484,0,0,0
,-2.19964416776033,0,1,0
,1.43949364450352,0,0,1
,1.07342747391928,0,0,1
,-1.20048803661996,0,0,0
,-0.346455976764648,0,0,0
,-0.480774942257163,0,1,0
,-0.743609323884268,0,1,0
,1.54622738113784,0,1,0
,-0.403898403935946,0,0,0
,-0.833433145942438,1,0,0
,-0.219164470718774,1,0,0
,0.12696814119287,1,0,0
,-0.322315905835392,0,0,0
,-0.350597318362615,1,0,0
,-0.657947808709103,0,0,1
,-1.3599769112632,0,1,0
,-0.101630823838928,0,0,0
,-1.34660046284197,1,0,0
,-2.42751597395552,0,1,0
,-0.890211205480196,1,0,0
,-1.24558895213474,0,0,1
,0.18700333641652,1,0,0
,2.5082251629743,1,0,0
,1.33292648093108,1,0,0
,-0.38816796929655,0,0,1
,-0.340299350153559,0,0,1
,-0.360408688017778,0,0,1
,0.184527132500589,0,0,1
,-0.945284123204619,0,0,0
,-0.447915791428278,0,0,0
,0.960837246768647,0,1,0
,0.261390181284487,0,0,1
,1.99391928572872,1,0,0
,1.63328530659267,0,1,0
,0.841678645192176,0,1,0
,0.807348533253569,0,1,0
,-1.69473655701435,0,0,0
,-2.95460714558617,1,0,0
,-1.64969453370585,0,0,1
,-0.484414672869815,0,1,0
,-0.526272870810604,0,0,0
,0.051830062343814,0,0,1
,1.26507516617792,1,0,0
,-1.41617967114385,0,0,0
,0.236754759425436,0,0,0
,0.904212403266786,0,0,1
,2.42448681816761,0,0,1
,-1.57101731154911,1,0,0
,-0.471525015273919,0,0,1
,-0.777691881154585,0,0,0
,-0.33500864971305,1,0,0
,-0.804758127002811,1,0,0
,1.31836909690498,0,0,1
,0.0609487604963864,0,0,0
,-0.443936034513707,0,0,0
,0.740834846236723,0,0,1
,-2.14041576208016,0,0,0
,-0.650625084741614,0,0,1
,-0.314809179050786,0,0,0
,0.623191053756259,0,0,0
,-0.861527811786575,0,0,0
,-0.495712634432739,0,0,0
,-1.15138427720019,0,0,0
,-0.0368657006311513,0,0,1
,0.808099625439217,0,1,0
,2.04358789173449,0,1,0
,-0.361230312246911,0,1,0
,-1.43757174215673,0,0,1
,-0.0126368333853388,0,0,1
,-0.55062321407905,0,0,1
,-0.598669467196556,0,0,0
,0.553538760296604,0,1,0
,0.0331404550587805,0,1,0
,-1.01641207011743,0,0,0
,-0.969763233966749,1,0,0
,-0.115985687817581,0,0,0
,-1.44923317467671,0,1,0
,0.58359088336307,0,0,1
,1.02177931523912,0,0,0
,-0.772903258762401,0,1,0
,-0.833203951806085,1,0,0
,-0.121756851474623,0,0,1
,0.333735580183243,0,0,0
,0.841447311750965,0,0,1
,-0.202542681685737,1,0,0
,-0.363835977102042,0,0,1
,-0.763208653073085,0,0,0
,-0.404233770949925,0,0,0
,-0.626967850505697,0,0,1
,1.51902583641424,0,0,1
,0.152334670353581,1,0,0
,-0.37793809368425,0,1,0
,0.958745025661511,1,0,0
,-1.44235110382376,0,1,0
,0.0234173973335753,0,0,1
,0.381965794662393,0,0,0
,-0.987421186811441,1,0,0
,0.680391323949574,1,0,0
,-0.200195019135693,1,0,0
,-1.11687467522201,0,0,1
,0.484415933230296,0,0,0
,0.66465930418491,1,0,0
,0.135520835347015,0,0,0
,-0.0135948380436641,0,0,0
,-0.174610953673981,1,0,0
,-0.385258324842704,0,0,1
,0.0736764406605341,0,1,0
,0.433497723474607,0,0,1
,-1.57962309060262,0,0,1
,0.630090656840841,1,0,0
,0.973500666410068,0,1,0
,-0.509883991863271,0,1,0
,0.776678864294335,1,0,0
,1.06439033722933,0,0,1
,0.631572825803999,0,0,1
,0.736226086691134,0,0,0
,-1.23321773011943,1,0,0
,-0.388575379622945,0,1,0
,1.0632151634506,0,0,1
,1.05814098386583,0,0,0
,0.408184424450004,0,0,1
,0.531436889048738,1,0,0
,2.20381966762526,0,0,1
,2.11422588577572,0,0,1
,-0.531704962557588,0,0,0
,1.34561800389927,0,0,1
,0.273769623933743,1,0,0
,-0.372910934670834,0,0,1
,-0.470566520010902,1,0,0
,-0.75477217389578,0,1,0
,-0.501842228377673,0,0,1
,-1.25532930322808,0,0,0
,-0.286477761094775,1,0,0
,-0.823694457831787,0,0,1
,0.797314566796799,0,0,1
,-0.0600523761224243,0,1,0
,0.657605378335186,0,0,0
,-0.725821254759635,0,1,0
,-1.1218762657447,0,1,0
,1.02390098776472,1,0,0
,-0.125900354616813,0,1,0
,-0.110600677983877,0,0,1
,0.362848077657443,0,1,0
,1.75245676080733,0,0,1
,-1.53945786644643,0,0,0
,-1.69041842719508,1,0,0
,2.21366606351434,0,1,0
,1.59672297659057,0,0,1
,1.36855862766991,0,0,0
,-0.59109080681349,1,0,0
,0.344628944020705,0,0,0
,-0.547730633367544,1,0,0
,3.28229260418702,0,1,0
,0.186377905391717,0,0,1
,-0.85647024545773,0,0,0
,-2.10613283819929,1,0,0
,1.0659329233981,1,0,0
,0.197594622321815,0,0,0
,-1.5165240972921,0,0,1
,-1.10653359569001,0,0,0
,-0.702450947236347,0,1,0
,0.561612881169714,0,1,0
,0.0618497778342936,0,1,0
,-1.61352989112514,0,0,1
,-0.380008609813976,0,1,0
,0.485668785864403,1,0,0
,1.44073309607887,0,0,0
,-0.631502388683978,0,1,0
,0.924636979461146,0,1,0
,-0.385053889933482,1,0,0
,1.7335479754834,1,0,0
,0.804525293681982,0,0,1
,-0.991585017557505,0,1,0
,1.35969572742296,0,1,0
,0.76841232389423,0,1,0
,-0.133117031661697,1,0,0
,-1.57093944605932,1,0,0
,0.0463315954106568,1,0,0
,0.0400112040904778,0,1,0
,0.542965613556832,0,0,1
,-0.951132065936325,0,1,0
,1.0236860490532,1,0,0
,-1.65166967479243,1,0,0
,-0.31690124841045,1,0,0
,-0.236230990447918,1,0,0
,2.15914099741492,1,0,0
,1.31763979031855,0,0,1
,-0.607279994041944,0,0,1
,1.13453737158436,0,0,1
,0.6701910306058,0,1,0
,-0.564864937395887,1,0,0
,0.926205103954728,1,0,0
,-0.980200111995638,0,1,0
,0.437252455381278,0,1,0
,-1.13416837126914,0,1,0
,0.780011292450117,0,0,0
,0.558179248192722,0,0,1
,-0.788560632585296,0,0,1
,1.42188103521719,1,0,0
,-1.25403745185199,0,1,0
,-0.961543192416867,0,0,1
,0.272005872845942,0,0,0
,-0.3754160451173,1,0,0
,-1.40457905909558,0,0,0
,0.0564752608649083,0,1,0
,0.0717952713169958,1,0,0
,1.27361011457374,0,1,0
,-1.13881319792865,0,0,1
,-1.43642545765708,0,0,0
,1.56419289350468,0,0,0
,-0.539863901494306,1,0,0
,-0.649284486134111,0,0,1
,-0.816485163999317,0,0,0
,-0.586168990002293,0,0,0
,1.33236676326001,0,0,0
,0.897539764461409,1,0,0)
这是分析部分
strangeData <- matrix(strangeData,byrow=T,ncol=4)
# Remove any column (and probably any row) and it works
# Decrease samples below 42 and it works
# Set sampsize to 199 or 201 and it works
# Change distance to euclidean and it works
# set pamLike to F and it works
resClaraManDum <- clara(
x=strangeData
,k=2
,metric='manhattan'
,samples=42
,sampsize=200
,pamLike=T
)
关于我的系统的相关信息:
sessionInfo()
R version 3.0.2 (2013-09-25)
Platform: x86_64-w64-mingw32/x64 (64-bit)
locale:
[1] LC_COLLATE=English_United States.1252 LC_CTYPE=English_United States.1252
[3] LC_MONETARY=English_United States.1252 LC_NUMERIC=C
[5] LC_TIME=English_United States.1252
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] cluster_1.14.4
loaded via a namespace (and not attached):
[1] tools_3.0.2