我有一个数据框,在某些列中有很多重复的值。我想为感兴趣的列中的每个唯一条目创建一个具有新值的新列。我已经在有关Stack Overflow的聚合相关问题中四处寻找,但还没有找到我想要的东西。
dput(head(示例))输出如下。
structure(list(avecor = c(-0.929199786400515, -0.729228501795928,
-0.431983639087243, -0.55088842103792, -0.978422379116014, -0.627856061946295
), miR = structure(c(9L, 5L, 6L, 2L, 8L, 4L), .Label = c("hsa-miR-107",
"hsa-miR-193a-3p", "hsa-miR-28-5p", "hsa-miR-331-3p", "hsa-miR-362-3p",
"hsa-miR-362-5p", "hsa-miR-429", "hsa-miR-590-5p", "hsa-miR-630"
), class = "factor"), mRNA = structure(c(1L, 2L, 2L, 3L, 3L,
4L), .Label = c("IGF1R", "PRKCA", "TESK2", "THBS1", "TLN2", "VAV3"
), class = "factor")), row.names = c("hsa-miR-630:IGF1R", "hsa-miR-362-3p:PRKCA",
"hsa-miR-362-5p:PRKCA", "hsa-miR-193a-3p:TESK2", "hsa-miR-590-5p:TESK2",
"hsa-miR-331-3p:THBS1"), class = "data.frame")
avecor miR mRNA
hsa-miR-630:IGF1R -0.9291998 hsa-miR-630 IGF1R
hsa-miR-362-3p:PRKCA -0.7292285 hsa-miR-362-3p PRKCA
hsa-miR-362-5p:PRKCA -0.4319836 hsa-miR-362-5p PRKCA
hsa-miR-193a-3p:TESK2 -0.5508884 hsa-miR-193a-3p TESK2
hsa-miR-590-5p:TESK2 -0.9784224 hsa-miR-590-5p TESK2
hsa-miR-331-3p:THBS1 -0.6278561 hsa-miR-331-3p THBS1
hsa-miR-28-5p:TLN2 -0.9988643 hsa-miR-28-5p TLN2
hsa-miR-331-3p:TLN2 -0.8773624 hsa-miR-331-3p TLN2
hsa-miR-429:TLN2 -0.9901250 hsa-miR-429 TLN2
hsa-miR-107:VAV3 -0.7713383 hsa-miR-107 VAV3
如果将其应用于 mRNA 列,则理想的输出应为:
avecor miR mRNA UniquemRNA
hsa-miR-630:IGF1R -0.9291998 hsa-miR-630 IGF1R 1
hsa-miR-362-3p:PRKCA -0.7292285 hsa-miR-362-3p PRKCA 2
hsa-miR-362-5p:PRKCA -0.4319836 hsa-miR-362-5p PRKCA 2
hsa-miR-193a-3p:TESK2 -0.5508884 hsa-miR-193a-3p TESK2 3
hsa-miR-590-5p:TESK2 -0.9784224 hsa-miR-590-5p TESK2 3
hsa-miR-331-3p:THBS1 -0.6278561 hsa-miR-331-3p THBS1 4
hsa-miR-28-5p:TLN2 -0.9988643 hsa-miR-28-5p TLN2 5
hsa-miR-331-3p:TLN2 -0.8773624 hsa-miR-331-3p TLN2 5
hsa-miR-429:TLN2 -0.9901250 hsa-miR-429 TLN2 5
hsa-miR-107:VAV3 -0.7713383 hsa-miR-107 VAV3 6
任何帮助将不胜感激。
答案 0 :(得分:1)
如果我对您的理解正确,那么您已经通过将mRNA
创建为factor
来创建该列。
如果这确实是您想要的,则可以将factor
重新编码为numeric
值。但这只是复制已经存在的信息。
这是您可以这样做的方式:
library(dplyr)
#>
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
structure(list(avecor = c(-0.929199786400515, -0.729228501795928,
-0.431983639087243, -0.55088842103792, -0.978422379116014, -0.627856061946295
), miR = structure(c(9L, 5L, 6L, 2L, 8L, 4L), .Label = c("hsa-miR-107",
"hsa-miR-193a-3p", "hsa-miR-28-5p", "hsa-miR-331-3p", "hsa-miR-362-3p",
"hsa-miR-362-5p", "hsa-miR-429", "hsa-miR-590-5p", "hsa-miR-630"
), class = "factor"), mRNA = structure(c(1L, 2L, 2L, 3L, 3L,
4L), .Label = c("IGF1R", "PRKCA", "TESK2", "THBS1", "TLN2", "VAV3"
), class = "factor")), row.names = c("hsa-miR-630:IGF1R", "hsa-miR-362-3p:PRKCA",
"hsa-miR-362-5p:PRKCA", "hsa-miR-193a-3p:TESK2", "hsa-miR-590-5p:TESK2",
"hsa-miR-331-3p:THBS1"), class = "data.frame") %>%
mutate(UniquemRNA = as.numeric(mRNA))
#> avecor miR mRNA UniquemRNA
#> 1 -0.9291998 hsa-miR-630 IGF1R 1
#> 2 -0.7292285 hsa-miR-362-3p PRKCA 2
#> 3 -0.4319836 hsa-miR-362-5p PRKCA 2
#> 4 -0.5508884 hsa-miR-193a-3p TESK2 3
#> 5 -0.9784224 hsa-miR-590-5p TESK2 3
#> 6 -0.6278561 hsa-miR-331-3p THBS1 4
答案 1 :(得分:1)
我使用R基础包。
df<-structure(list(avecor = c(-0.929199786400515, -0.729228501795928,
-0.431983639087243, -0.55088842103792, -0.978422379116014, -0.627856061946295
), miR = structure(c(9L, 5L, 6L, 2L, 8L, 4L), .Label = c("hsa-miR-107",
"hsa-miR-193a-3p", "hsa-miR-28-5p", "hsa-miR-331-3p", "hsa-miR-362-3p",
"hsa-miR-362-5p", "hsa-miR-429", "hsa-miR-590-5p", "hsa-miR-630"
), class = "factor"), mRNA = structure(c(1L, 2L, 2L, 3L, 3L,
4L), .Label = c("IGF1R", "PRKCA", "TESK2", "THBS1", "TLN2", "VAV3"
), class = "factor")), row.names = c("hsa-miR-630:IGF1R", "hsa-miR-362-3p:PRKCA",
"hsa-miR-362-5p:PRKCA", "hsa-miR-193a-3p:TESK2", "hsa-miR-590-5p:TESK2",
"hsa-miR-331-3p:THBS1"), class = "data.frame")
UniquemRNA<-c()
for (i in 1:length(table(df$mRNA))){
fre <- rep(i, table(df$mRNA)[[i]])
UniquemRNA<-c(UniquemRNA,fre)
}
UniquemRNA
df$UniquemRNA<-UniquemRNA
df