我有一些数据可以查看特定农场的小母牛的怀孕测试结果。
EG。
Farm Breed Pregnant Empty Total ADG
1 J 5 3 8 12
2 F 2 1 3 10
3 J 3 0 3 11
我希望怀孕和空数据成为二进制输出,其他值只是重复
EG。
Farm Breed Pregnant ADG
1 J 1 12
1 J 1 12
1 J 1 12
1 J 1 12
1 J 1 12
1 J 0 12
1 J 0 12
1 J 0 12
2 F 1 10
2 F 1 10
2 F 0 10
我一直在尝试使用重塑代码,但是很困惑,并且想知道是否有人可以帮助我。
答案 0 :(得分:2)
我建议将“data.table”包与rep
结合使用。您希望使用1
和0
列中显示的数字重复值Pregnant
和Empty
,以便您可以轻松地为此创建rep
语句。默认情况下,行将“扩展”您用来容纳新数据的key
。
假设您开始使用名为“mydf”的data.frame
,请尝试:
library(data.table)
as.data.table(mydf)[, list(Pregnant = rep(c(1, 0), c(Pregnant, Empty))),
by = list(Farm, Breed, ADG)]
# Farm Breed ADG Pregnant
# 1: 1 J 12 1
# 2: 1 J 12 1
# 3: 1 J 12 1
# 4: 1 J 12 1
# 5: 1 J 12 1
# 6: 1 J 12 0
# 7: 1 J 12 0
# 8: 1 J 12 0
# 9: 2 F 10 1
# 10: 2 F 10 1
# 11: 2 F 10 0
# 12: 3 J 11 1
# 13: 3 J 11 1
# 14: 3 J 11 1
我能想到的最接近的基础R方法是首先“扩展”源数据集的相关列(简单,使用rep
),然后使用apply
获取二进制结果“怀孕”,像这样:
cbind(mydf[rep(rownames(mydf), mydf$Total),
c("Farm", "Breed", "ADG")],
Pregnant = unlist(apply(mydf[c("Pregnant", "Empty")], 1,
function(x) rep(c(1, 0), c(x[1], x[2])))))
# Farm Breed ADG Pregnant
# 1 1 J 12 1
# 1.1 1 J 12 1
# 1.2 1 J 12 1
# 1.3 1 J 12 1
# 1.4 1 J 12 1
# 1.5 1 J 12 0
# 1.6 1 J 12 0
# 1.7 1 J 12 0
# 2 2 F 10 1
# 2.1 2 F 10 1
# 2.2 2 F 10 0
# 3 3 J 11 1
# 3.1 3 J 11 1
# 3.2 3 J 11 1
答案 1 :(得分:0)
以下是不言自明的:
# repeat column values taking one row at a time and convert outcome list to a matrices:
> m1 = matrix(unlist(apply(ddf[,c(1,2,3,6)], 1, function(x) rep(c(x[c(1,2,4)],1), x[3]))), nrow=4)
> m2 = matrix(unlist(apply(ddf[,c(1,2,4,6)], 1, function(x) rep(c(x[c(1,2,4)],0), x[3]))), nrow=4)
>
>
> m1
[,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
[1,] "1" "1" "1" "1" "1" "2" "2" "3" "3" "3"
[2,] "J" "J" "J" "J" "J" "F" "F" "J" "J" "J"
[3,] "12" "12" "12" "12" "12" "10" "10" "11" "11" "11"
[4,] "1" "1" "1" "1" "1" "1" "1" "1" "1" "1"
> m2
[,1] [,2] [,3] [,4]
[1,] "1" "1" "1" "2"
[2,] "J" "J" "J" "F"
[3,] "12" "12" "12" "10"
[4,] "0" "0" "0" "0"
# combine, transpose and rename columns of matrices:
> mm = t(cbind(m1, m2))
> colnames(mm) = c('Farm','Breed','ADG','Preg_empty')
> mm
Farm Breed ADG Preg_empty
[1,] "1" "J" "12" "1"
[2,] "1" "J" "12" "1"
[3,] "1" "J" "12" "1"
[4,] "1" "J" "12" "1"
[5,] "1" "J" "12" "1"
[6,] "2" "F" "10" "1"
[7,] "2" "F" "10" "1"
[8,] "3" "J" "11" "1"
[9,] "3" "J" "11" "1"
[10,] "3" "J" "11" "1"
[11,] "1" "J" "12" "0"
[12,] "1" "J" "12" "0"
[13,] "1" "J" "12" "0"
[14,] "2" "F" "10" "0"
答案 2 :(得分:0)
使用dplyr
library(dplyr)
library(tidyr)
mydf %>%
gather(Var, Val, Pregnant, Empty) %>%
mutate(N= row_number())%>%
do(.[rep(.$N, .$Val),]) %>%
select(Farm, Breed, ADG, Pregnant=Var) %>%
mutate(Pregnant=c(1,0)[as.numeric(factor(Pregnant))]) %>%
arrange(Farm, Breed)
给出结果
# Farm Breed ADG Pregnant
#1 1 J 12 1
#2 1 J 12 1
#3 1 J 12 1
#4 1 J 12 1
#5 1 J 12 1
#6 1 J 12 0
#7 1 J 12 0
#8 1 J 12 0
#9 2 F 10 1
#10 2 F 10 1
#11 2 F 10 0
#12 3 J 11 1
#13 3 J 11 1
#14 3 J 11 1
mydf <- structure(list(Farm = 1:3, Breed = c("J", "F", "J"), Pregnant = c(5L,
2L, 3L), Empty = c(3L, 1L, 0L), Total = c(8L, 3L, 3L), ADG = c(12L,
10L, 11L)), .Names = c("Farm", "Breed", "Pregnant", "Empty",
"Total", "ADG"), class = "data.frame", row.names = c(NA, -3L))
答案 3 :(得分:-6)
尝试melt
require(reshape2)
newdata<-melt(data[c(1:3,6)],id=c("Farm","Breed","Pregnant"))