我正在尝试根据另一个df的百分比将一个df的值存储到新的数据框中。
df1
"seq" "loc" "ball" "square" "triangle"
1 abcd b 65 12 9
2 abcd a 0 75 0
3 ght5 a 5 10 25
4 dfsf b 12 9 75
df2
"Type" "n" "n/2" "1/n"
1 ball 10 50 10
2 square 9 55.5 11.1
3 triangle 6 50 16.6`
我想按“类型”存储“ seq”和“ loc”,其方式应让我知道它是在(n =)值上方(> =)还是在(n =)值下方(<=)类型为“ 1 / n”。
output df3
"seq" "loc" "ball" "square" "triangle"
1 abcd b True NA False
2 abcd a False True False
3 ght5 a False False NA
4 dfsf b NA False True`
我用True / NA / False表示> = /之间/ <=,但这可能是真的。我只需要一种区分两个预选赛的方法。最终,我将选择在一个“类型”中为True且在所有其他类型中为False的任何“ seq”和“ loc”。 (在输出示例中,最后要报告的唯一一个是“ square”的“ abcd”“ a”。
在我输入时,我想我可能会先复制df1作为df3,然后再存储新值来代替百分比。这似乎比选择所有内容并制作一张新桌子容易吗?之后,我不确定要去哪里。
编辑: 目前正在玩这个游戏,但是没有运气在数据框中存储一个值。
df3 <- df1
for(i in seq_len(nrow(df2)))
{
df3[, df2$Type[i]] <-
sapply(1:length(df2$Type),
function(y) sapply(df3[, df2$Type][,y],
function(x) ifelse(x < df2[y, c("1/n")], "1",
ifelse(x > df2[y, c("n/2")], "3", "2")) ))
}
编辑:(添加df1和df2的dput(头)
df1
structure(list(Seq = structure(c(1L, 2L, 2L, 3L, 3L), .Label =
c("AAAAAACCAGTCCCAGTTCGGATTG",
"AAAAAACCAGTCTCAGTTCGGATTG", "AAAAAACCGGTCACAGTTCAGATTG"), class =
"factor"),
loc = structure(c(2L, 1L, 2L, 1L, 2L), .Label = c("b",
"t"), class = "factor"), Ball = c(0, 0, 0, 0, 0), Cat = c(0,
0, 0, 16.6666666666667, 16.6666666666667), Square = c(0,
0, 0, 0, 0), Water = c(0, 0, 0, 33.3333333333333, 33.3333333333333
)), row.names = c(NA, -5L), class = c("grouped_df", "tbl_df",
"tbl", "data.frame"), vars = c("Seq", "loc"), drop = TRUE, indices =
list(
0L, 1L, 2L, 3L, 4L), group_sizes = c(1L, 1L, 1L, 1L, 1L),
biggest_group_size = 1L, labels = structure(list(
Seq = structure(c(1L, 2L, 2L, 3L, 3L), .Label =
c("AAAAAACCAGTCCCAGTTCGGATTG",
"AAAAAACCAGTCTCAGTTCGGATTG", "AAAAAACCGGTCACAGTTCAGATTG"), class =
"factor"),
loc = structure(c(2L, 1L, 2L, 1L, 2L), .Label = c("b",
"t"), class = "factor")), row.names = c(NA, -5L), class = "data.frame",
vars = c("Seq",
"loc"), drop = TRUE))
df2
dput(head(df2))
structure(list(Type = c("Ball", "Cat", "Square", "Water"), n = c(4L,
6L, 3L, 6L), `n/2` = c(50, 50, 66.6666666666667, 50), `1/n` = c(25,
16.6666666666667, 33.3333333333333, 16.6666666666667)), row.names = c(NA,
-4L), class = c("tbl_df", "tbl", "data.frame"))
>
答案 0 :(得分:1)
您提供的dput
:
df1 <- structure(list(Seq = structure(c(1L, 2L, 2L, 3L, 3L), .Label =
c("AAAAAACCAGTCCCAGTTCGGATTG",
"AAAAAACCAGTCTCAGTTCGGATTG", "AAAAAACCGGTCACAGTTCAGATTG"), class =
"factor"),
loc = structure(c(2L, 1L, 2L, 1L, 2L), .Label = c("b",
"t"), class = "factor"), Ball = c(0, 0, 0, 0, 0), Cat = c(0,
0, 0, 16.6666666666667, 16.6666666666667), Square = c(0,
0, 0, 0, 0), Water = c(0, 0, 0, 33.3333333333333, 33.3333333333333
)), row.names = c(NA, -5L), class = c("grouped_df", "tbl_df",
"tbl", "data.frame"), vars = c("Seq", "loc"), drop = TRUE, indices =
list(
0L, 1L, 2L, 3L, 4L), group_sizes = c(1L, 1L, 1L, 1L, 1L),
biggest_group_size = 1L, labels = structure(list(
Seq = structure(c(1L, 2L, 2L, 3L, 3L), .Label =
c("AAAAAACCAGTCCCAGTTCGGATTG",
"AAAAAACCAGTCTCAGTTCGGATTG", "AAAAAACCGGTCACAGTTCAGATTG"), class =
"factor"),
loc = structure(c(2L, 1L, 2L, 1L, 2L), .Label = c("b",
"t"), class = "factor")), row.names = c(NA, -5L), class = "data.frame",
vars = c("Seq",
"loc"), drop = TRUE))
df2 <- structure(list(Type = c("Ball", "Cat", "Square", "Water"), n = c(4L,
6L, 3L, 6L), `n/2` = c(50, 50, 66.6666666666667, 50), `1/n` = c(25,
16.6666666666667, 33.3333333333333, 16.6666666666667)), row.names = c(NA,
-4L), class = c("tbl_df", "tbl", "data.frame"))
由于我未知的原因,但与数据帧对象的class
有关,代码之前无法正常工作。在这里,我转换为data.frame
,然后sapply
代码可以正常工作。答案:
library(tidyverse)
df1 <- as.data.frame(df1)
df2 <- as.data.frame(df2)
df3 <- sapply(1:length(df2$Type),
function(y) sapply(df1[, df2$Type][,y],
function(x) ifelse(x < df2[y, c("1/n")], "1",
ifelse(x > df2[y, c("n/2")], "3",
"2")) ))
df3 <- df3 %>% as_data_frame %>%
setNames(df2$Type) %>%
add_column(Seq = df1[,c("Seq")], loc = df1[,c("loc")], .before = 1)
df3
# A tibble: 5 x 6
Seq loc Ball Cat Square Water
<fct> <fct> <chr> <chr> <chr> <chr>
1 AAAAAACCAGTCCCAGTTCGGATTG t 1 1 1 1
2 AAAAAACCAGTCTCAGTTCGGATTG b 1 1 1 1
3 AAAAAACCAGTCTCAGTTCGGATTG t 1 1 1 1
4 AAAAAACCGGTCACAGTTCAGATTG b 1 2 1 2
5 AAAAAACCGGTCACAGTTCAGATTG t 1 2 1 2