Factoextra - 更改省略号和变量的线宽

时间:2016-05-09 11:05:46

标签: r pca

我目前使用factomineRfactoextra个套件制作pca。

我的数据虹膜代码示例:

library(FactoMineR) 
library(factoextra)

data(iris)
res.pca<-PCA(iris , scale.unit=TRUE, ncp=2, quali.sup=c(5), graph =  FALSE)

fviz_pca_biplot(res.pca, label="var", habillage=5,
                addEllipses=TRUE) + theme_minimal()

http://i.stack.imgur.com/s7JO4.png

我想改变椭圆周围线条的宽度,并改变变量宽度的宽度。我尝试了几种方法,但我无法想象如何做我想做的事。

有什么想法吗?

1 个答案:

答案 0 :(得分:0)

我会创建所需功能的副本并更改其中的代码。具体来说,要增加省略号的宽度,可以在size=..命令的调用中添加ggplot2::stat_ellipse

my_fviz_pca_biplot <- function (X, axes = c(1, 2), geom = c("point", "text"), label = "all", 
                                invisible = "none", labelsize = 4, pointsize = 2, habillage = "none", 
                                addEllipses = FALSE, ellipse.level = 0.95, col.ind = "black", 
                                col.ind.sup = "blue", alpha.ind = 1, col.var = "steelblue", 
                                alpha.var = 1, col.quanti.sup = "blue", col.circle = "grey70", 
                                repel = FALSE, axes.linetype = "dashed", select.var = list(name = NULL, 
                                                                                           cos2 = NULL, contrib = NULL), select.ind = list(name = NULL, 
                                                                                                                                           cos2 = NULL, contrib = NULL), title = "Biplot of variables and individuals", 
                                jitter = list(what = "label", width = NULL, height = NULL), 
                                ...) 
{
  if (is.null(jitter$what)) 
    jitter$what <- "label"
  if (length(axes) != 2) 
    stop("axes should be of length 2")
  scale.unit <- .get_scale_unit(X)
  var <- facto_summarize(X, element = "var", result = c("coord", 
                                                        "contrib", "cos2"), axes = axes)
  colnames(var)[2:3] <- c("x", "y")
  var.all <- var
  if (!is.null(select.var)) 
    var <- .select(var, select.var)
  lab <- .label(label)
  hide <- .hide(invisible)
  alpha.limits <- NULL
  if (alpha.var %in% c("cos2", "contrib", "coord", "x", "y")) 
    alpha.limits = range(var.all[, alpha.var])
  pca.ind <- get_pca_ind(X)
  ind <- data.frame(pca.ind$coord[, axes, drop = FALSE])
  colnames(ind) <- c("x", "y")
  r <- min((max(ind[, "x"]) - min(ind[, "x"])/(max(var[, "x"]) - 
                                                 min(var[, "x"]))), (max(ind[, "y"]) - min(ind[, "y"])/(max(var[, 
                                                                                                                "y"]) - min(var[, "y"]))))
  var[, c("x", "y")] <- var[, c("x", "y")] * r * 0.7
  p <- my_fviz_pca_ind(X, axes = axes, geom = geom, repel = repel, 
                       label = label, invisible = invisible, labelsize = labelsize, 
                       pointsize = pointsize, axes.linetype = axes.linetype, 
                       col.ind = col.ind, col.ind.sup = col.ind.sup, alpha.ind = alpha.ind, 
                       habillage = habillage, addEllipses = addEllipses, ellipse.level = ellipse.level, 
                       select.ind = select.ind, jitter = jitter)
  if (!hide$var) {
    p <- .ggscatter(p = p, data = var, x = "x", y = "y", 
                    col = col.var, alpha = alpha.var, alpha.limits = alpha.limits, 
                    geom = c("arrow", "text"), repel = repel, lab = lab$var, 
                    labelsize = labelsize, jitter = jitter)
  }
  if (inherits(X, "PCA") & !hide$quanti) {
    quanti_sup <- .get_supp(X, element = "quanti", axes = axes, 
                            select = select.var)
    if (!is.null(quanti_sup)) 
      colnames(quanti_sup)[2:3] <- c("x", "y")
    if (!is.null(quanti_sup)) {
      p <- fviz_add(p, df = quanti_sup[, 2:3, drop = FALSE] * 
                      r * 0.7, geom = c("arrow", "text"), color = col.quanti.sup, 
                    linetype = 2, labelsize = labelsize, addlabel = (lab$quanti), 
                    jitter = jitter)
    }
  }
  title2 <- title
  p + labs(title = title2)
}

environment(my_fviz_pca_biplot) <-  environment(fviz_pca_biplot)

my_fviz_pca_ind <- function (X, axes = c(1, 2), geom = c("point", "text"), repel = FALSE, 
                             label = "all", invisible = "none", labelsize = 4, pointsize = 2, 
                             habillage = "none", addEllipses = FALSE, ellipse.level = 0.95, 
                             ellipse.type = "norm", ellipse.alpha = 0.1, col.ind = "black", 
                             col.ind.sup = "blue", alpha.ind = 1, select.ind = list(name = NULL, 
                                                                                    cos2 = NULL, contrib = NULL), jitter = list(what = "label", 
                                                                                                                                width = NULL, height = NULL), title = "Individuals factor map - PCA", 
                             axes.linetype = "dashed", ...) 
{
  if (length(intersect(geom, c("point", "text", "arrow"))) == 
      0) 
    stop("The specified value(s) for the argument geom are not allowed ")
  if (length(axes) != 2) 
    stop("axes should be of length 2")
  if (is.null(jitter$what)) 
    jitter$what <- "label"
  ind <- facto_summarize(X, element = "ind", result = c("coord", 
                                                        "contrib", "cos2"), axes = axes)
  colnames(ind)[2:3] <- c("x", "y")
  ind.all <- ind
  if (!is.null(select.ind)) 
    ind <- .select(ind, select.ind)
  lab <- .label(label)
  hide <- .hide(invisible)
  alpha.limits <- NULL
  if (alpha.ind %in% c("cos2", "contrib", "coord", "x", "y")) 
    alpha.limits = range(ind.all[, alpha.ind])
  if (habillage[1] == "none") {
    p <- ggplot()
    if (hide$ind) 
      p <- ggplot() + geom_blank(data = ind, aes_string("x", 
                                                        "y"))
    else p <- .ggscatter(data = ind, x = "x", y = "y", col = col.ind, 
                         alpha = alpha.ind, repel = repel, alpha.limits = alpha.limits, 
                         shape = 19, geom = geom, lab = lab$ind, labelsize = labelsize, 
                         pointsize = pointsize, jitter = jitter)
  }
  else {
    p <- ggplot()
    if (hide$ind & hide$quali) 
      p <- ggplot() + geom_blank(data = ind, aes_string("x", 
                                                        "y"))
    if (inherits(X, "PCA") & length(habillage) == 1) {
      data <- X$call$X
      if (is.numeric(habillage)) 
        name.quali <- colnames(data)[habillage]
      else name.quali <- habillage
      ind <- cbind.data.frame(data[rownames(ind), name.quali], 
                              ind)
      colnames(ind)[1] <- name.quali
      ind[, 1] <- as.factor(ind[, 1])
    }
    else {
      if (nrow(ind) != length(habillage)) 
        stop("The number of active individuals used in the PCA is different ", 
             "from the length of the factor habillage. Please, remove the supplementary ", 
             "individuals in the variable habillage.")
      name.quali <- "Groups"
      ind <- cbind.data.frame(Groups = habillage, ind)
      ind[, 1] <- as.factor(ind[, 1])
    }
    if (!hide$ind) {
      label_coord <- ind
      if (jitter$what %in% c("both", "b")) {
        label_coord <- ind <- .jitter(ind, jitter)
      }
      else if (jitter$what %in% c("point", "p")) {
        ind <- .jitter(ind, jitter)
      }
      else if (jitter$what %in% c("label", "l")) {
        label_coord <- .jitter(label_coord, jitter)
      }
      if ("point" %in% geom) 
        p <- p + geom_point(data = ind, aes_string("x", 
                                                   "y", color = name.quali, shape = name.quali), 
                            size = pointsize)
      if (lab$ind & "text" %in% geom) {
        if (repel) 
          p <- p + ggrepel::geom_text_repel(data = label_coord, 
                                            aes_string("x", "y", label = "name", color = name.quali, 
                                                       shape = name.quali), size = labelsize)
        else p <- p + geom_text(data = label_coord, aes_string("x", 
                                                               "y", label = "name", color = name.quali, shape = name.quali), 
                                size = labelsize, vjust = -0.7)
      }
    }
    if (!hide$quali) {
      coord_quali.sup <- .get_coord_quali(ind$x, ind$y, 
                                          groups = ind[, 1])
      coord_quali.sup <- cbind.data.frame(name = rownames(coord_quali.sup), 
                                          coord_quali.sup)
      colnames(coord_quali.sup)[1] <- name.quali
      coord_quali.sup[, 1] <- as.factor(coord_quali.sup[, 
                                                        1])
      if ("point" %in% geom) {
        p <- p + geom_point(data = coord_quali.sup, aes_string("x", 
                                                               "y", color = name.quali, shape = name.quali), 
                            size = pointsize * 2)
      }
      if (lab$quali & "text" %in% geom) {
        if (repel) 
          p <- p + ggrepel::geom_text_repel(data = coord_quali.sup, 
                                            aes_string("x", "y", color = name.quali), 
                                            label = rownames(coord_quali.sup), size = labelsize)
        else p <- p + geom_text(data = coord_quali.sup, 
                                aes_string("x", "y", color = name.quali), label = rownames(coord_quali.sup), 
                                size = labelsize, vjust = -1)
      }
    }
    if (addEllipses) {
      if (ellipse.type == "convex") {
        frame.data <- .cluster_chull(ind[, c("x", "y")], 
                                     ind[, name.quali])
        colnames(frame.data)[which(colnames(frame.data) == 
                                     "cluster")] <- name.quali
        mapping = aes_string(x = "x", y = "y", colour = name.quali, 
                             fill = name.quali, group = name.quali)
        p <- p + ggplot2::geom_polygon(data = frame.data, 
                                       mapping = mapping, alpha = ellipse.alpha)
      }
      else if (ellipse.type %in% c("t", "norm", "euclid")) {
        mapping = aes_string(x = "x", y = "y", colour = name.quali, 
                             group = name.quali, fill = name.quali)
        p <- p + ggplot2::stat_ellipse(mapping = mapping, 
                                       data = ind, level = ellipse.level, type = ellipse.type, 
                                       alpha = ellipse.alpha, geom = "polygon", size=5)
      }
    }
  }
  if (inherits(X, "PCA") & !hide$ind.sup) {
    ind_sup <- .get_supp(X, element = "ind.sup", axes = axes, 
                         select = select.ind)
    if (!is.null(ind_sup)) 
      colnames(ind_sup)[2:3] <- c("x", "y")
    if (!is.null(ind_sup)) {
      p <- fviz_add(p, df = ind_sup[, 2:3, drop = FALSE], 
                    geom = geom, color = col.ind.sup, shape = 19, 
                    pointsize = pointsize, labelsize = labelsize, 
                    addlabel = (lab$ind.sup & "text" %in% geom), 
                    jitter = jitter)
    }
  }
  title2 <- title
  p <- .fviz_finish(p, X, axes, axes.linetype) + labs(title = title2)
  p
}

environment(my_fviz_pca_ind) <-  environment(fviz_pca_ind)

然后使用新功能。

my_fviz_pca_biplot(res.pca, label="var", habillage=5,
                                  addEllipses=TRUE) + theme_minimal()