许多因素中的重复级别已被弃用"用ggplot2警告

时间:2015-05-03 01:01:17

标签: r ggplot2

我已经创建了一个函数,可以同时绘制多因素分析的负载,也就是当它们的变量没有完全重叠(或根本不重叠)时。它工作正常,除了它产生了许多因素已被弃用的重复级别"警告,我不明白为什么。

代码允许应该是可重现的。

library(devtools)
source_url("https://raw.githubusercontent.com/Deleetdk/psych2/master/psych2.R")

loadings.plot2 = function(fa.objects, fa.names="") {
  fa.num = length(fa.objects) #number of fas

  if (fa.names=="") {
    fa.names = str_c("fa.", 1:fa.num)
  }
  if (length(fa.names) != fa.num) {
    stop("Names vector does not match the number of factor analyses.")
  }

  #merge into df
  d = data.frame() #to merge into
  for (fa.idx in 1:fa.num) { #loop over fa objects
    loads = fa.objects[[fa.idx]]$loadings
    rnames = rownames(loads)
    loads = as.data.frame(as.vector(loads))
    rownames(loads) = rnames
    colnames(loads) = fa.names[fa.idx]

    d = merge.datasets(d, loads, 1)
  }


  #reshape to long form
  d2 = reshape(d,
               varying = 1:fa.num,
               direction="long",
               ids = rownames(d))
  d2$time = as.factor(d2$time)
  d2$id = as.factor(d2$id)
  print(d2)
  print(levels(d2$time))
  print(levels(d2$id))

  #plot
  g = ggplot(reorder_by(id, ~ fa, d2), aes(x=fa, y=id, color=time)) +
      geom_point() +
      xlab("Loading") + ylab("Indicator") +
      scale_color_discrete(name="Analysis",
                           labels=fa.names)

  return(g)
}

fa1 = fa(iris[-5])
fa2 = fa(iris[-c(1:50),-5])
fa3 = fa(ability)
fa4 = fa(ability[1:50,])

loadings.plot2(list(fa1))
loadings.plot2(list(fa1,fa2))
loadings.plot2(list(fa1,fa2,fa3))
loadings.plot2(list(fa1,fa2,fa3,fa4))

The example above, with 4 factor analyses

绘制不同数量的因子会产生不同数量的错误。

我尝试在将变量as.factor提供给ggplot之前设置它们,但它并没有改变任何内容。

有什么想法吗?也许与reorder_by()有关?需要此函数来对data.frame进行排序,否则ggplot会按字母顺序对它们进行排序,这是没用的。

1 个答案:

答案 0 :(得分:2)

如评论中所述,此警告是由reorder_by()函数引起的,但仅与ggplot2一起使用。具体而言,这些级别由于某种原因重复:

#> levels(d2$id)
[1] "Sepal.Width"  "Sepal.Width"  "Sepal.Length" "Sepal.Length" "Petal.Width"  "Petal.Width"  "Petal.Length"
[8] "Petal.Length"

ggplot2不喜欢重复级别,因此会发出警告。

如果有人感兴趣,我会编写新代码来自行重新调整以避免此问题并避免依赖plotflow包。

新功能是:

#' Plot multiple factor loadings in one plot.
#'
#' Returns a ggplot2 plot with sorted loadings colored by the analysis they belong to. Supports reversing óf any factors that are reversed. Dodges to avoid overplotting. Only works for factor analyses with 1 factor solutions!
#' @param fa_objects (list of fa-class objects) Factor analyses objects from the fa() function from the \code{\link{psych}} package.
#' @param fa_labels (chr vector) Names of the analyses. Defaults to fa.1, fa.2, etc..
#' @param reverse_vector (num vector) Vector of numbers to use for reversing factors. Use e.g. c(1, -1) to reverse the second factor. Defaults not reversing.
#' @param reorder (chr scalar or NA) Which factor analysis to order the loadings by. Can be integers that reprensent each factor analysis. Can also be "mean", "median" to use the means and medians of the loadings. Use "all" for the old method. Default = "mean".
#' @export
#' @examples
#' library(psych)
#' plot_loadings_multi(fa(iris[-5])) #extract a factor and reverse
plot_loadings_multi = function (fa_objects, fa_labels, reverse_vector = NA, reorder = "mean") {
  library("stringr")
  library("ggplot2")
  library("plyr")

  fa_num = length(fa_objects)
  fa_names = str_c("fa.", 1:fa_num)
  if (!is.list(fa_objects)) {
    stop("fa_objects parameter is not a list.")
  }
  if (class(fa_objects) %in% c("psych", "fa")) {
    fa_objects = list(fa_objects)
    fa_num = length(fa_objects)
    fa_names = str_c("fa.", 1:fa_num)
  }
  if (missing("fa_labels")) {
    if (!is.null(names(fa_objects))) {
      fa_labels = names(fa_objects)
    }
    else {
      fa_labels = fa_names
    }
  }
  if (length(fa_labels) != fa_num) {
    stop("Factor analysis labels length is not identical to number of analyses.")
  }
  if (all(is.na(reverse_vector))) {
    reverse_vector = rep(1, fa_num)
  }
  else if (length(reverse_vector) != fa_num) {
    stop("Length of reversing vector does not match number of factor analyses.")
  }
  d = data.frame()
  for (fa.idx in 1:fa_num) {
    loads = fa_objects[[fa.idx]]$loadings * reverse_vector[fa.idx]
    rnames = rownames(loads)
    loads = as.data.frame(as.vector(loads))
    rownames(loads) = rnames
    colnames(loads) = fa_names[fa.idx]
    suppressor({
      d = merge_datasets(d, loads, 1)
    })
  }
  d2 = reshape(d, varying = 1:fa_num, direction = "long", ids = rownames(d))
  d2$time = as.factor(d2$time)
  d2$id = as.factor(d2$id)
  colnames(d2)[2] = "fa"

  #reorder factor?
  if (!is.na(reorder)) {
    if (reorder == "all") {
      library("plotflow")

      silence({
        d2 = reorder_by(id, ~fa, d2)
      })
    } else if (reorder == "mean") {
      v_aggregate_values = daply(d2, .(id), function(x) {
        mean(x$fa)
      })

      #re-level
      d2$id = factor(d2$id, levels = names(sort(v_aggregate_values, decreasing = F)))

    } else if (reorder == "median") {
      v_aggregate_values = daply(d2, .(id), function(x) {
        median(x$fa)
      })

      #re-level
      d2$id = factor(d2$id, levels = names(sort(v_aggregate_values, decreasing = F)))

    } else {
      d2_sub = d2[d2$time == reorder, ] #subset the analysis whose loading is to be used for the reorder
      silence({
        d2_sub = reorder_by(id, ~fa, d2_sub)
      })

      library(gdata)
      d2$id = reorder.factor(d2$id, new.order = levels(d2_sub$id))
    }
  }

  #plot
  g = ggplot(d2, aes(x = id, y = fa, color = time, group = time)) +
    geom_point(position = position_dodge(width = 0.5)) +
    ylab("Loading") + xlab("Indicator") + scale_color_discrete(name = "Analysis",
                                                               labels = fa_labels) + coord_flip()
  return(g)
}

library(psych)
fa_1 = fa(iris[-5])
fa_2 = fa(iris[1:125, -5])
plot_loadings_multi(list(fa_1, fa_2), reorder = "mean")

在没有警告的情况下生成以下图:

enter image description here

代码来自my personal package