ade4中的图集聚类分析

时间:2014-02-18 14:50:12

标签: r cluster-analysis

我想对我的数据进行聚类分析:

library(cluster)
library(ade4)
data=read.table("L5.txt",header=T,row.names=1,dec=".",sep="\t")
JSD <- function(x,y) sqrt(0.5 * KLD(x, (x+y)/2) + 0.5 * KLD(y, (x+y)/2))
KLD <- function(x,y) sum(x * log(x/y))
dist.JSD <- function(inMatrix, pseudocount=0.000001, ...) {
KLD <- function(x,y) sum(x *log(x/y))
JSD<- function(x,y) sqrt(0.5 * KLD(x, (x+y)/2) + 0.5 * KLD(y, (x+y)/2))
matrixColSize <- length(colnames(inMatrix))
matrixRowSize <- length(rownames(inMatrix))
colnames <- colnames(inMatrix)
resultsMatrix <- matrix(0, matrixColSize, matrixColSize)

inMatrix = apply(inMatrix,1:2,function(x) ifelse (x==0,pseudocount,x))

for(i in 1:matrixColSize) {
for(j in 1:matrixColSize) { 
resultsMatrix[i,j]=JSD(as.vector(inMatrix[,i]),
as.vector(inMatrix[,j]))
}
}
colnames -> colnames(resultsMatrix) -> rownames(resultsMatrix)
as.dist(resultsMatrix)->resultsMatrix
attr(resultsMatrix, "method") <- "dist"
return(resultsMatrix) 
}

data.dist=dist.JSD(data)

pam.clustering=function(x,k) { # x is a distance matrix and k the number of clusters
                      require(cluster)
                      cluster = as.vector(pam(as.dist(x), k, diss=TRUE)$clustering)
                      return(cluster)
                     }

我发现我的数据的最佳簇数是2,所以我做了:

data.cluster=pam.clustering(data.dist,k=2)

obs.pca=dudi.pca(data.frame(t(data)), scannf=F, nf=10)
obs.bet=bca(obs.pca, fac=as.factor(data.cluster), scannf=F, nf=k-1)

但是当我尝试使用sclass绘制结果时,我收到了一个错误:

s.class(obs.bet$ls, fac=as.factor(data.cluster), grid=F)

Errore in `[.data.frame`(dfxy, , yax) : undefined columns selected

相反,如果我使用k = 3,它就有效......

有谁知道问题出在哪里?

我的数据如下:

structure(list(X12.BA.V = c(0, 0, 0.33506013, 0, 0, 0.032747456, 
0.06919519, 0.119148936, 0.010545791, 0.024051804, 0.001480111, 
0.000370028, 0.000555042, 0.023311748, 0.008510638, 0.000740056, 
0.018871415, 0.000185014, 0, 0.175763182, 0.000370028, 0.02146161, 
0, 0, 0.007955597, 0, 0, 0, 0), X37.TO.O = c(0.000718219, 0.066076131, 
0.391429255, 0, 0.000239406, 0.001197031, 0.024658846, 0.015800814, 
0.002394063, 0.005266938, 0, 0, 0.000478813, 0.000239406, 0.036868566, 
0, 0.068470194, 0.001197031, 0.000478813, 0.001675844, 0.004788125, 
0.002394063, 0.000239406, 0, 0.016519033, 0, 0, 0.005027532, 
0), X29.TO.VG = c(0.0004004, 0.008808809, 0.181581582, 0.017817818, 
0.036036036, 0.025425425, 0.05985986, 0.052252252, 0.011211211, 
0.038038038, 0.003603604, 0, 0.037037037, 0.084684685, 0.009009009, 
0.005405405, 0.057057057, 0.0004004, 0, 0.18018018, 0.003403403, 
0.032432432, 0, 0, 0.017417417, 0, 0, 0, 0), X04.BA.V = c(0, 
0.001625135, 0.084507042, 0.000812568, 0.018418202, 0.023564464, 
0.06744312, 0.127843987, 0.005417118, 0.10780065, 0.000812568, 
0, 0.023293608, 0.02383532, 0.00595883, 0.003521127, 0.086403034, 
0.000270856, 0.000812568, 0.111863489, 0.004062839, 0.066901408, 
0, 0, 0.020855905, 0, 0, 0, 0), X31.TO.O = c(0.000243309, 0.03406326, 
0.16836983, 0, 0, 0.04379562, 0.105109489, 0.198296837, 0.032360097, 
0.020681265, 0.008759124, 0, 0.000243309, 0.018978102, 0.021167883, 
0.000729927, 0.01946472, 0.000243309, 0.000973236, 0.191240876, 
0.006812652, 0, 0, 0, 0, 0, 0, 0.00243309, 0), X01.BA.V = c(0.001183992, 
0.010419133, 0.058962823, 0.007340753, 0.320625148, 0.008524745, 
0.062988397, 0.072933933, 0.010419133, 0.092825006, 0, 0, 0.007340753, 
0.014681506, 0.008761544, 0.001183992, 0.041676533, 0, 0, 0.041676533, 
0.002131186, 0.016812692, 0, 0, 0.063698792, 0, 0, 0, 0), X01.TO.VG = c(0, 
0.000596125, 0.250074516, 0, 0, 0.015499255, 0.039940387, 0.015797317, 
0.001788376, 0.030104322, 0, 0, 0.037555887, 0.030700447, 0.00119225, 
0.003874814, 0.030700447, 0, 0.005067064, 0.093591654, 0.010730253, 
0.246199702, 0, 0, 0.007153502, 0, 0, 0.001788376, 0), X19.BO.VG = c(0, 
0.017184785, 0.002896312, 0, 0.343309519, 0.009654373, 0.033790307, 
0.034562657, 0.005213362, 0.019694922, 0.00077235, 0.000579262, 
0.001737787, 0.007530411, 0.009847461, 0.001930875, 0.087854798, 
0, 0.000579262, 0.067966789, 0.049623479, 0.01602626, 0, 0, 0.001351612, 
0, 0.0015447, 0, 0.000193087), X32.BO.VG = c(0, 0.004503437, 
0.219720313, 0, 0, 0.031050012, 0.061863001, 0.105001185, 0.006399621, 
0.028205736, 0.000711069, 0.001185115, 0.029153828, 0.059492771, 
0.005925575, 0.003318322, 0.050485897, 0, 0.00237023, 0.118748519, 
0.00474046, 0.039819863, 0, 0, 0, 0, 0, 0.004029391, 0.000237023
), X27.TO.V = c(0.00033428, 0.020892529, 0.397793749, 0.00701989, 
0, 0.006852749, 0.015544041, 0.021226809, 0.002172823, 0.009861274, 
0.001169982, 0, 0.004178506, 0.052816313, 0.001002841, 0.000501421, 
0.055824837, 0.00033428, 0.011031255, 0.047634966, 0.00718703, 
0.026408156, 0, 0, 0.001002841, 0, 0, 0.00016714, 0), X24.TO.V = c(0, 
0.140040241, 0.151911469, 0.006841046, 0, 0.007243461, 0.053521127, 
0.01167002, 0.003420523, 0.037826962, 0.000603622, 0.000201207, 
0.006841046, 0.015694165, 0.003822938, 0.003219316, 0.06861167, 
0.000603622, 0.004024145, 0.041448692, 0.010663984, 0.029376258, 
0, 0, 0.001810865, 0, 0, 0, 0), X26.TO.VG = c(0.000502428, 0.058951599, 
0.173840228, 0, 0.0294758, 0.004186903, 0.008876235, 0.008206331, 
0.001507285, 0.015910233, 0.000502428, 0.000167476, 0.00301457, 
0.002344666, 0.003349523, 0.004689332, 0.094289064, 0.001674761, 
0.038184559, 0.030983085, 0.049572936, 0.019762184, 0.000502428, 
0, 0, 0, 0, 0, 0), X06.BO.O = c(0.000533476, 0.001066951, 0.113630301, 
0, 0.000266738, 0.029607895, 0.044278474, 0.12590024, 0.005601494, 
0.035742865, 0.004001067, 0, 0, 0.144305148, 0.022939451, 0.002934116, 
0.093091491, 0.000266738, 0, 0.155241398, 0.002133902, 0.038943718, 
0.001333689, 0, 0.025873566, 0, 0, 0, 0.012803414), X15.BO.VG = c(0, 
0, 0.327540107, 0, 0.000891266, 0.012477718, 0.076871658, 0.169340463, 
0.005347594, 0.071301248, 0.000222816, 0, 0.000222816, 0.00802139, 
0.014483066, 0.000222816, 0.009581105, 0.000668449, 0, 0.084224599, 
0.001336898, 0.000445633, 0, 0, 0.028743316, 0, 0, 0.007575758, 
0)), .Names = c("X12.BA.V", 
"X37.TO.O", "X29.TO.VG", "X04.BA.V", "X31.TO.O", "X01.BA.V", 
"X01.TO.VG", "X19.BO.VG", "X32.BO.VG", "X27.TO.V", "X24.TO.V", 
"X26.TO.VG", "X06.BO.O", "X15.BO.VG", "X25.BO.VG"), class = "data.frame", row.names =        c("Bacteroidia;Other;Other", 
"Bacteroidia;f__;g__", "Bacteroidaceae;g__Bacteroides", "Prevotellaceae;g__", 
"Prevotellaceae;g__Prevotella", "Lachnospiraceae;Other", "Lachnospiraceae;g__", 
"Lachnospiraceae;g__Blautia", "Lachnospiraceae;g__Clostridium", 
"Lachnospiraceae;g__Coprococcus", "Lachnospiraceae;g__Eubacterium", 
"Lachnospiraceae;g__Lachnobacterium", "Lachnospiraceae;g__Lachnospira", 
"Lachnospiraceae;g__Roseburia", "Lachnospiraceae;g__Ruminococcus", 
"Ruminococcaceae;Other", "Ruminococcaceae;g__", "Ruminococcaceae;g__Clostridium", 
"Ruminococcaceae;g__Eubacterium", "Ruminococcaceae;g__Faecalibacterium", 
"Ruminococcaceae;g__Oscillospira", "Ruminococcaceae;g__Ruminococcus", 
"Veillonellaceae;g__", "Veillonellaceae;g__Acidaminococcus", 
"Veillonellaceae;g__Dialister", "Veillonellaceae;g__Megamonas", 
"Veillonellaceae;g__Megasphaera", "Veillonellaceae;g__Phascolarctobacterium", 
"Veillonellaceae;g__Veillonella"))

0 个答案:

没有答案