我的数据由一个列表表组成,描述了6个类别(random_sequence_generation等)的“ H”,“ L”和“ U”频率。
我想生成一个数据帧,其中每一行代表观察到的“ H”,“ L”和“ U”的百分比,即
random_sequence_generation 6.7 63.3 30.0
allocation_concealment 0.0 43.3 56.7
... ...
给出数据:
dat <- list(random_sequence_generation = structure(c(H = 2L, L = 19L,
U = 9L), .Dim = 3L, .Dimnames = structure(list(c("H", "L", "U"
)), .Names = ""), class = "table"), allocation_concealment = structure(c(L = 13L,
U = 17L), .Dim = 2L, .Dimnames = structure(list(c("L", "U")), .Names = ""), class = "table"),
blinding_of_participants = structure(c(H = 30L), .Dim = 1L, .Dimnames = structure(list(
"H"), .Names = ""), class = "table"), blinding_of_personnel = structure(c(H = 28L,
U = 2L), .Dim = 2L, .Dimnames = structure(list(c("H", "U"
)), .Names = ""), class = "table"), blinding_of_outcome_assessor = structure(c(H = 17L,
L = 8L, U = 5L), .Dim = 3L, .Dimnames = structure(list(c("H",
"L", "U")), .Names = ""), class = "table"), incomplete_outcome_data = structure(c(H = 10L,
L = 20L), .Dim = 2L, .Dimnames = structure(list(c("H", "L"
)), .Names = ""), class = "table"))
答案 0 :(得分:4)
使用sapply
val <- c("H", "L", "U")
t(sapply(dat, function(x) {
tot = rep(0, length(val))
tot[match(names(x), val)] = x/sum(x)
tot
}))
# [,1] [,2] [,3]
#random_sequence_generation 6.66667 63.3333 30.00000
#allocation_concealment 0.00000 43.3333 56.66667
#blinding_of_participants 100.00000 0.0000 0.00000
#blinding_of_personnel 93.33333 0.0000 6.66667
#blinding_of_outcome_assessor 56.66667 26.6667 16.66667
#incomplete_outcome_data 33.33333 66.6667 0.00000
我们首先创建一个length
3,match
名称的向量,并用x
除以sum
来分配值。感谢@Rohit注意到以前的方法中的问题。
答案 1 :(得分:4)
dat <- list(random_sequence_generation = structure(c(H = 2L, L = 19L,
U = 9L), .Dim = 3L, .Dimnames = structure(list(c("H", "L", "U"
)), .Names = ""), class = "table"), allocation_concealment = structure(c(L = 13L,
U = 17L), .Dim = 2L, .Dimnames = structure(list(c("L", "U")), .Names = ""), class = "table"),
blinding_of_participants = structure(c(H = 30L), .Dim = 1L, .Dimnames = structure(list(
"H"), .Names = ""), class = "table"), blinding_of_personnel = structure(c(H = 28L,
U = 2L), .Dim = 2L, .Dimnames = structure(list(c("H", "U"
)), .Names = ""), class = "table"), blinding_of_outcome_assessor = structure(c(H = 17L,
L = 8L, U = 5L), .Dim = 3L, .Dimnames = structure(list(c("H",
"L", "U")), .Names = ""), class = "table"), incomplete_outcome_data = structure(c(H = 10L,
L = 20L), .Dim = 2L, .Dimnames = structure(list(c("H", "L"
)), .Names = ""), class = "table"))
flatten_list<-function (mydata) {
result <- plyr::ldply(lapply(mydata, function(x) {
data.frame(x, stringsAsFactors = FALSE)
}))
return(result)
}
res<-flatten_list(lapply(dat, prop.table))
reshape(res,idvar=".id",timevar="Var1",direction="wide")
,这是输出,不确定是否正确。是吗?
.id Freq.H Freq.L Freq.U
1 random_sequence_generation 0.06666667 0.6333333 0.30000000
4 allocation_concealment NA 0.4333333 0.56666667
6 blinding_of_participants 1.00000000 NA NA
7 blinding_of_personnel 0.93333333 NA 0.06666667
9 blinding_of_outcome_assessor 0.56666667 0.2666667 0.16666667
12 incomplete_outcome_data 0.33333333 0.6666667 NA
答案 2 :(得分:1)
这是一种melt/acast
方法
library(reshape2)
100 *prop.table(acast(melt(dat), L1~ Var1, FUN = sum, fill = 0), 1)
# H L U
#allocation_concealment 0.000000 43.33333 56.666667
#blinding_of_outcome_assessor 56.666667 26.66667 16.666667
#blinding_of_participants 100.000000 0.00000 0.000000
#blinding_of_personnel 93.333333 0.00000 6.666667
#incomplete_outcome_data 33.333333 66.66667 0.000000
#random_sequence_generation 6.666667 63.33333 30.000000
或带有tidyverse
的选项
library(tidyverse)
bind_rows(map(dat, enframe), .id = 'grp') %>%
group_by(grp) %>%
mutate(value = 100 *value/sum(value)) %>%
spread(name, value, fill = 0)
# A tibble: 6 x 4
# Groups: grp [6]
# grp H L U
# <chr> <dbl> <dbl> <dbl>
#1 allocation_concealment 0 43.3 56.7
#2 blinding_of_outcome_assessor 56.7 26.7 16.7
#3 blinding_of_participants 100 0 0
#4 blinding_of_personnel 93.3 0 6.67
#5 incomplete_outcome_data 33.3 66.7 0
#6 random_sequence_generation 6.67 63.3 30