为什么summary()在分组和NAs中失败?

时间:2016-07-28 03:58:01

标签: r

已在下方更新

当我尝试在我创建的likert对象上运行summary()时,我收到错误“替换有4行,数据有2”,faceLikertG

#  Group              Item    angry  neutral    happy
#1  boys             child 19.54023 48.27586 32.18391
#2  boys primary caregiver 27.58621 36.78161 35.63218
#3 girls             child 35.18519 31.48148 33.33333
#4 girls primary caregiver 33.94495 32.11009 33.94495
#5  <NA>             child  0.00000  0.00000  0.00000
#6  <NA> primary caregiver  0.00000  0.00000  0.00000

我认为this issue可能有关系,但我不确定。

faceLikertG <- structure(list(results = structure(list(Group = structure(c(1L, 
1L, 2L, 2L, NA, NA), .Label = c("boys", "girls"), class = "factor"), 
    Item = structure(c(1L, 2L, 1L, 2L, 1L, 2L), .Label = c("child", 
    "primary caregiver"), class = "factor"), angry = c(19.5402298850575, 
    27.5862068965517, 35.1851851851852, 33.9449541284404, 0, 
    0), neutral = c(48.2758620689655, 36.7816091954023, 31.4814814814815, 
    32.1100917431193, 0, 0), happy = c(32.183908045977, 35.632183908046, 
    33.3333333333333, 33.9449541284404, 0, 0)), .Names = c("Group", 
"Item", "angry", "neutral", "happy"), row.names = c(NA, -6L), class = "data.frame"), 
    items = structure(list(child = structure(c(2L, 2L, 1L, 2L, 
    3L, 3L, 2L, 3L, 3L, 2L, 3L, 1L, 1L, 3L, 3L, 3L, 2L, 2L, 1L, 
    1L, 1L, 1L, 3L, 2L, 2L, 3L, NA, 2L, 3L, 2L, 2L, 3L, 2L, 3L, 
    3L, 3L, 2L, 3L, 3L, 2L, 2L, 1L, 2L, 1L, 2L, 2L, 2L, 3L, 1L, 
    1L, 2L, 1L, 2L, 2L, 3L, 1L, 1L, 1L, 2L, 3L, 2L, 2L, 1L, 3L, 
    2L, 3L, 3L, 3L, 2L, 1L, 2L, 3L, 1L, 3L, 2L, 2L, 1L, 2L, 1L, 
    2L, 3L, 2L, 3L, 1L, 3L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 3L, 
    1L, 3L, 3L, 2L, 3L, 1L, 2L, 2L, 3L, 2L, 1L, 2L, 3L, 1L, 2L, 
    1L, 2L, 1L, NA, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 3L, 3L, 2L, 
    2L, 2L, 3L, 1L, 1L, 2L, 3L, 1L, 3L, 2L, 1L, 1L, 3L, 3L, 3L, 
    3L, 3L, 1L, 3L, 1L, 3L, 1L, 3L, 2L, 3L, 3L, 1L, 2L, 2L, 3L, 
    2L, NA, 2L, 3L, 2L, 1L, 2L, 2L, 1L, 1L, 3L, 1L, 3L, 2L, 2L, 
    2L, 2L, 1L, 2L, 2L, 3L, 2L, 3L, 1L, 3L, 2L, 1L, 2L, NA, 2L, 
    1L, 2L, 1L, 1L, 3L, 2L, 3L, 3L, 2L, 1L, 2L, 2L, 2L, 1L, 2L
    ), .Label = c("angry", "neutral", "happy"), class = "factor"), 
        `primary caregiver` = structure(c(3L, 1L, 1L, 3L, 3L, 
        3L, 3L, 3L, 3L, 3L, 1L, 2L, 2L, 2L, 2L, 3L, 2L, 2L, 1L, 
        1L, 2L, 3L, 1L, 1L, 2L, 2L, NA, 1L, 3L, 3L, 1L, 2L, 1L, 
        2L, 3L, 3L, 2L, 2L, 3L, 2L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 
        3L, 1L, 2L, 1L, 1L, 3L, 3L, 2L, 2L, 2L, 2L, 1L, 1L, 3L, 
        2L, 1L, 1L, 1L, 3L, 3L, 2L, 1L, 1L, 3L, 1L, 3L, 3L, 2L, 
        3L, 3L, 2L, 1L, 3L, 1L, 3L, 3L, 2L, 3L, 3L, 2L, 2L, 3L, 
        1L, 1L, 1L, 3L, 3L, 2L, 2L, 1L, 3L, 3L, 1L, 1L, 2L, 1L, 
        1L, 1L, 2L, 3L, 3L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 3L, 
        2L, 2L, 3L, 1L, 3L, 3L, 2L, 2L, 2L, 3L, 2L, 2L, 1L, 3L, 
        2L, 3L, 1L, 2L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 1L, 
        2L, 2L, 1L, 3L, 3L, 1L, 3L, 1L, 1L, 3L, NA, 1L, 1L, 2L, 
        2L, 2L, 2L, 1L, 3L, 2L, 2L, 3L, 3L, 3L, 2L, 1L, 1L, 2L, 
        1L, 3L, 2L, 3L, 1L, 2L, 2L, 1L, 2L, NA, 1L, 1L, 3L, 2L, 
        2L, 3L, 2L, 3L, 2L, 2L, 2L, 3L, 3L, 3L, 1L, 2L), .Label = c("angry", 
        "neutral", "happy"), class = "factor")), .Names = c("child", 
    "primary caregiver"), row.names = c("1", "61", "121", "181", 
    "241", "301", "361", "421", "481", "541", "601", "661", "721", 
    "781", "841", "901", "961", "1024", "1084", "1144", "1204", 
    "1324", "1387", "1450", "1510", "1570", "1630", "1690", "1750", 
    "1810", "1870", "1993", "2053", "2113", "2173", "2233", "2296", 
    "2356", "2419", "2479", "2542", "2602", "2662", "2722", "2782", 
    "2842", "2902", "2962", "3022", "3082", "3142", "3202", "3262", 
    "3322", "3382", "3442", "3502", "3562", "3622", "3682", "3742", 
    "3802", "3862", "3922", "3982", "4042", "4102", "4162", "4282", 
    "4342", "4405", "4465", "4525", "4588", "4648", "4708", "4768", 
    "4828", "4888", "4948", "5008", "5068", "5128", "5188", "5248", 
    "5308", "5368", "5428", "5488", "5548", "5608", "5668", "5728", 
    "5788", "5848", "5908", "5968", "6028", "6088", "6148", "6208", 
    "6268", "6328", "6388", "6448", "6508", "6568", "6628", "6688", 
    "6748", "6808", "6871", "6931", "6991", "7051", "7111", "7177", 
    "7237", "7303", "7363", "7423", "7483", "7543", "7603", "7666", 
    "7726", "7789", "7849", "7909", "7969", "8029", "8089", "8149", 
    "8209", "8269", "8329", "8389", "8449", "8509", "8575", "8635", 
    "8695", "8755", "8815", "8875", "8935", "8995", "9055", "9115", 
    "9175", "9235", "9295", "9355", "9415", "9475", "NA", "9537", 
    "9597", "9657", "9717", "9777", "9837", "9897", "9960", "10020", 
    "10080", "10140", "10200", "10260", "10320", "10380", "10440", 
    "10500", "10560", "10620", "10680", "10743", "10803", "10866", 
    "10926", "10986", "11046", "11106", "11166", "11226", "11286", 
    "11346", "11406", "11466", "11526", "11586", "11646", "11706", 
    "11766", "11826", "11886", "11946", "12006", "12066"), class = "data.frame"), 
    grouping = structure(c(2L, 2L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 
    1L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 
    1L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 
    1L, 1L, 2L, 2L, 1L, 2L, 2L, 2L, 1L, 2L, 2L, 1L, 2L, 2L, 1L, 
    2L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 1L, 
    2L, 1L, 1L, 2L, 2L, 1L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 1L, 1L, 2L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 
    1L, 2L, 1L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 
    1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 
    1L, 2L, 2L, 1L, 2L, 2L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 2L, 1L, 
    2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 1L, 2L, 2L, NA, 1L, 2L, 2L, 
    2L, 2L, 2L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 
    2L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 
    1L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 1L), .Label = c("boys", 
    "girls"), class = "factor"), factors = NULL, nlevels = 3L, 
    levels = c("angry", "neutral", "happy")), .Names = c("results", 
"items", "grouping", "factors", "nlevels", "levels"), class = "likert")

会话:

#> sessionInfo()
#R version 3.3.0 (2016-05-03)
#Platform: x86_64-apple-darwin13.4.0 (64-bit)
#Running under: OS X 10.11.5 (El Capitan)

#locale:
#[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8

#attached base packages:
#[1] parallel  stats     graphics  grDevices utils     datasets  methods  
#[8] base     

#other attached packages:
# [1] likert_1.3.3 ......

更新

这是一个更好的示例,显示了问题。该错误由分组变量中的NA触发。我不确定这是一个错误,还是我应该通过限制分组变量的完整案例来解决的问题。

library(likert)
library(plyr)
df <- data.frame(v1=c(1, 0, 1, 0, 1, 1, 0, 1),
                 v2=c(1, NA, 1, 0, 1, 1, 0, 1),
                 grp=c("a", "b", "a", NA, "a", "b", "a", "b"))
df[,1:2] <- lapply(df[,1:2],
                   factor, 
                   levels=c(0,1), 
                   labels=c("incorrect", "correct"))
df$grp <- factor(df$grp, 
                 levels=c("a", "b"),
                 labels=c("a", "b"))
dfLikert <- likert(df[,1:2], grouping=df$grp)
summary(dfLikert)
plot(dfLikert, low.color="blue", high.color="green",
     neutral.color="grey90", text.size=1.75) +
        theme(axis.text.y=element_text(colour="darkgrey", size=10),
        strip.text=element_text(colour="black", size=10))

1 个答案:

答案 0 :(得分:1)

问题在于,当你致电summary时,它正在调用summary.likert,正如你所说的那样({1}}无法处理NA,而summary.likert.hackr则可以。< / p>

pacman::p_load(likert)
library(plyr)
df <- data.frame(v1=c(1, 0, 1, 0, 1, 1, 0, 1),
                 v2=c(1, NA, 1, 0, 1, 1, 0, 1),
                 grp=c("a", "b", "a", NA, "a", "b", "a", "b"))
df[,1:2] <- lapply(df[,1:2],
                   factor, 
                   levels=c(0,1), 
                   labels=c("incorrect", "correct"))
df$grp <- factor(df$grp, 
                 levels=c("a", "b"),
                 labels=c("a", "b"))

df$v1  <- addNA(df$v1)
df$v2  <- addNA(df$v2)
df$grp <- addNA(df$grp)

dfLikert <- likert(df[,1:2], grouping=df$grp)
source("https://raw.githubusercontent.com/hack-r/summary.likert.hackr/master/summary.likert.hackr.R")
summary.likert.hackr(dfLikert)
  Group Item      low neutral      high     mean        sd
1     a   v1 25.00000       0  75.00000 1.750000 0.5000000
2     a   v2 25.00000       0  75.00000 1.750000 0.5000000
3     b   v1 33.33333       0  66.66667 1.666667 0.5773503
4     b   v2  0.00000       0 100.00000 2.000000 0.0000000
5  <NA>   v1  0.00000       0   0.00000       NA        NA
6  <NA>   v2  0.00000       0   0.00000       NA        NA
>

我更改的函数的关键部分是this(这是新版本):

for(g in unique(results$Group[!is.na(results$Group)])) {
  if(length(lowrange) == 1) {
    results2[which(results2$Group == g),]$low <-
      results[results$Group == g & !(is.na(results$Group)), lowrange + 2]
  } else {
    results2[which(results2$Group == g),]$low = apply(
      results[results$Group == g& !(is.na(results$Group)), lowrange + 2], 1, sum)
  }
  if(length(highrange) == 1) {
    results2[which(results2$Group == g),]$high <-
      results[results$Group == g& !(is.na(results$Group)), highrange + 2]
  } else {
    results2[which(results2$Group == g),]$high = apply(
      results[results$Group == g& !(is.na(results$Group)),highrange + 2], 1, sum)
  }
  if(lowrange[length(lowrange)] + 1 != highrange[1]) {
    results2[which(results2$Group == g),]$neutral <- 
      results[results$Group == g& !(is.na(results$Group)), (highrange[1] - 1 + 2)]
  }
  for(i in names(items)) {
    results2[which(results2$Group == g & results2$Item == i), 'mean'] = 
      mean(as.numeric(items[which(object$grouping == g), i]), na.rm=TRUE)
    results2[which(results2$Group == g & results2$Item == i), 'sd'] = 
      sd(as.numeric(items[which(object$grouping == g), i]), na.rm=TRUE)
  }
}

这是有效的,但是我添加的后面的is.na测试可能会在此时删除,现在我把它放在unique语句中。但他们并没有伤害任何东西。