如何正确地从ICC输出创建数据帧

时间:2012-07-31 13:47:21

标签: r dataframe icc

我正在尝试从ICC计算中获取输出并创建数据框(请参阅下面的示例数据和代码)。当我查看新数据框时,一切看起来都很完美,但是如果你使用str(),它会显示所有变量都是因子,而我的结果实际上是每个因子级别的实际数据计数的因子级别。这特别是一个问题,因为我想操纵新的数据框架。

我对R比较陌生,我无法弄清楚如何修改我的代码来解决这个问题。

数据集:

WW_Wing_13C_Summary <- structure(list(Individual_ID = c("WW_08A_02", "WW_08A_03",
"WW_08A_04", "WW_08A_05", "WW_08A_06", "WW_08A_08", "WW_08A_09", "WW_08A_13", 
"WW_08B_02", "WW_08G_01", "WW_08G_02", "WW_08G_05", "WW_08G_07", 
"WW_08I_01", "WW_08I_03", "WW_08I_07", "WW_08I_12"), Region = c("South", 
"South", "South", "South", "South", "South", "South", "South", 
"South", "North", "North", "North", "North", "North", "North", 
"North", "North"), P1 = c(-18.67, -20.06, -16.54, -20.33, -21.28, 
-23.86, -21.3, -21.34, -20.87, -20.32, -19.35, -21.2, -21.61, 
-18.3, -22.3, -21.6, -24.12), P2 = c(-19.16, -20.3, -15.6, -20.28, 
-21.24, -23.95, -21.44, -24.13, -20.95, -20.02, -19.38, -21.29, 
-21.42, -18.53, -22.2, -21.77, -24.08), P3 = c(-20.38, -21.21, 
-16.61, -20.58, -21.22, -24, -21.49, -23.03, -20.76, -19.92, 
-19.7, -21.85, -21.84, -19.55, -22.18, -22.17, -24), P4 = c(-20.96, 
-22.9, -19.65, -20.8, -21.2, -24.16, -21.49, -21.77, -20.9, -20.05, 
-19.94, -22.22, -21.68, -20.18, -22.14, -22.21, -24.2), P5 = c(-21.61, 
-22.87, -20.98, -21.24, -21.47, -24.93, -21.1, -21.4, -21.02, 
-20.23, -20.43, -22.34, -21.79, -20.96, -21.55, -22.24, -24.16
), P6 = c(-21.65, -21.13, -21.18, -20.94, -21.23, -24.93, -20.84, 
-21.57, -20.84, -20.73, -20.08, -22.42, -21.49, -21.08, -20.85, 
-22.47, -22.87), P7 = c(-21.31, -20.68, -21.7, -20.54, -21.89, 
-24.48, -20.78, -21.45, -21.11, -20.91, -20.81, -22.69, -21.88, 
-21.5, -23.1, -22.19, -22.51), P8 = c(-20.8, -20.58, -21.18, 
-21.04, -21.89, -24.17, -21.58, -21.32, -20.64, -19.87, -20.9, 
-22.75, -21.62, -17.42, -20.75, -21.89, -22.12), P9 = c(-21.28, 
-20.69, -21.33, -20.42, -21.6, -23.1, -20.76, -21.59, -20.11, 
-19.58, -19.24, -22.73, -21.54, -13.18, -20.9, -21.89, -22.3)), 
.Names = c("Individual_ID", "Region", "P1", "P2", "P3", "P4", "P5",
"P6", "P7", "P8", "P9"), class = "data.frame", row.names = c(NA, -17L))

代码:

## split the complete dataset by regions
WW_Wing_13C_Summary_Region <- split(WW_Wing_13C_Summary, WW_Wing_13C_Summary$Region)

library(psych)
# calculate ICC for delta-13C and create a data frame with results for each region
WW_Wing_13C_ICC <- data.frame(t(sapply(WW_Wing_13C_Summary_Region,
    function(temp) unlist(ICC(temp[ , c(3:11)])[c("results", "summary")]))))[ , c(49, 52, 55, 58, 61, 9, 39, 45)]

# View results
WW_Wing_13C_ICC

# View structure of dataframe
str(WW_Wing_13C_ICC)

1 个答案:

答案 0 :(得分:2)

这是因为你的功能在这里:

WW_Wing_13C_ICC <- data.frame(t(sapply(WW_Wing_13C_Summary_Region,
    function(temp) unlist(ICC(temp[ , c(3:11)])[c("results", "summary")]))))[ , c(49, 52, 55, 58, 61, 9, 39, 45)]

...(我可能会稍微清理一下)你使用unlist。当你这样做时,你混合了字符和数字类,R自动将所有内容转换为字符。然后你只需要你想要的那些看似数字但实际上是字符/因子的作品。

两个修正:

<强> 1。重写上面的匿名函数,只提取你想要的东西,并确保    一切都是数字

如......

SUM <- function(temp) {
    x <- ICC(temp[ , c(3:11)])
    y <- x["summary"]
    z <- x["results"]
    sumR1 <- data.frame(y[[c(1, 1)]], check.names = FALSE)[1, ]
    resR3 <- z[[1]][3, c("ICC", "lower bound", "upper bound")]
    list(sum = sumR1, res = resR3)
}

sumres <- unlist(lapply(WW_Wing_13C_Summary_Region, SUM), recursive = FALSE)
DF <- data.frame(do.call(rbind, sumres[c(T, F)]), 
    do.call(rbind, sumres[c(F, T)]), check.names = FALSE)

rownames(DF) <- unlist(strsplit(rownames(DF) , "\\."))[c(T, F)]
DF

<强> 2。手动更改类:

如......

NEW <- data.frame(apply(WW_Wing_13C_ICC, 2, as.numeric))
str(NEW)