我正在尝试根据另一列中的数值创建一个因子列。这是我的数据的一个子集:
> dput(sample)
structure(list(ID = c(1683L, 1684L, 1684L, 1684L, 1684L, 1685L,
1685L, 1685L, 1685L, 1686L, 1686L, 1686L, 1686L, 30759L, 30759L,
30759L, 30759L, 30760L, 30760L, 30760L, 30760L), Month = structure(c(2L,
2L, 3L, 1L, 2L, 2L, 3L, 1L, 2L, 2L, 3L, 1L, 2L, 2L, 3L, 1L, 2L,
2L, 3L, 1L, 2L), .Label = c("Jun", "Jul", "Aug"), class = "factor"),
Year = c(2018, 2017, 2017, 2018, 2018, 2017, 2017, 2018,
2018, 2017, 2017, 2018, 2018, 2017, 2017, 2018, 2018, 2017,
2017, 2018, 2018), Homerange = c(NA, 27.2850594918174, NA,
NA, NA, NA, 30.52684873837, NA, NA, NA, 30.7069481409563,
10.625864752589, 29.2661529202662, 32.3278427642325, NA,
NA, NA, NA, 33.8586876862157, NA, NA)), out.attrs = list(
dim = c(58L, 4L, 2L), dimnames = list(Var1 = c("Var1= 1657",
"Var1= 1658", "Var1= 1659", "Var1= 1660", "Var1= 1661", "Var1= 1662",
"Var1= 1663", "Var1= 1664", "Var1= 1666", "Var1= 1667", "Var1= 1668",
"Var1= 1669", "Var1= 1670", "Var1= 1671", "Var1= 1672", "Var1= 1673",
"Var1= 1674", "Var1= 1675", "Var1= 1676", "Var1= 1678", "Var1= 1679",
"Var1= 1680", "Var1= 1681", "Var1= 1682", "Var1= 1683", "Var1= 1684",
"Var1= 1685", "Var1= 1686", "Var1=30759", "Var1=30760", "Var1=30761",
"Var1=30762", "Var1=30763", "Var1=30764", "Var1=30765", "Var1=30766",
"Var1=30767", "Var1=30768", "Var1=30769", "Var1=30770", "Var1=30771",
"Var1=30772", "Var1=30773", "Var1=30774", "Var1=30775", "Var1=30776",
"Var1=30777", "Var1=30778", "Var1=30779", "Var1=30780", "Var1=30781",
"Var1=30782", "Var1=30783", "Var1=30784", "Var1=30785", "Var1=30786",
"Var1=30787", "Var1=30788"), Var2 = c("Var2=Jun", "Var2=Jul",
"Var2=Aug", "Var2=Sep"), Var3 = c("Var3=2017", "Var3=2018"
))), row.names = c(315L, 84L, 142L, 258L, 316L, 85L, 143L,
259L, 317L, 86L, 144L, 260L, 318L, 87L, 145L, 261L, 319L, 88L,
146L, 262L, 320L), class = "data.frame")
数字列“ ID”的值介于1659-1685和30759-30788之间。我想做的是创建一个因子列“类型”,其具有2个级别“ V13”,分别对应于ID 1659-1685,而“ V16”则对应于ID 30759-30788。我知道我以前做过,但是由于某种原因,我不记得怎么做。感谢您的帮助!
答案 0 :(得分:2)
假设您的范围内没有考虑使用ID 1686是故意的,您可以尝试以下方法:
library(dplyr)
library(forcats)
df %>%
mutate(type = case_when(between(ID, 1659, 1685) ~ "V13",
between(ID, 30759, 30788) ~ "V16")) %>%
mutate(type = as_factor(type))
# A tibble: 21 x 5
ID Month Year Homerange type
<int> <fct> <dbl> <dbl> <fct>
1 1683 Jul 2018 NA V13
2 1684 Jul 2017 27.3 V13
3 1684 Aug 2017 NA V13
4 1684 Jun 2018 NA V13
5 1684 Jul 2018 NA V13
6 1685 Jul 2017 NA V13
7 1685 Aug 2017 30.5 V13
8 1685 Jun 2018 NA V13
9 1685 Jul 2018 NA V13
10 1686 Jul 2017 NA NA
11 1686 Aug 2017 30.7 NA
12 1686 Jun 2018 10.6 NA
13 1686 Jul 2018 29.3 NA
14 30759 Jul 2017 32.3 V16
15 30759 Aug 2017 NA V16
16 30759 Jun 2018 NA V16
17 30759 Jul 2018 NA V16
18 30760 Jul 2017 NA V16
19 30760 Aug 2017 33.9 V16
20 30760 Jun 2018 NA V16
21 30760 Jul 2018 NA V16
答案 1 :(得分:2)
直接基于R的解决方案是应用cut
。
transform(sample, Type2=cut(sample$ID, c(1659, 1685, 1686, 30788), include.lowest=TRUE,
labels=c("V13", NA, "V16")))
或更有效的使用data.table::inrange
(对 @camille 的赠送金额):
library(data.table)
sample <- transform(sample,
Type=factor(ifelse(ID %inrange% c(1659, 1685), "V13",
ifelse(ID %inrange% c(30759, 30788), "V16",
NA))))
或带有str(sample)
# 'data.frame': 21 obs. of 5 variables:
# $ ID : int 1683 1684 1684 1684 1684 1685 1685 1685 1685 1686 ...
# $ Month : Factor w/ 3 levels "Jun","Jul","Aug": 2 2 3 1 2 2 3 1 2 2 ...
# $ Year : num 2018 2017 2017 2018 2018 ...
# $ Homerange: num NA 27.3 NA NA NA ...
# $ Type : Factor w/ 2 levels "V13","V16": 1 1 1 1 1 1 1 1 1 NA ...
Develop--BranchA--CommitA1--CommitA2
\
BranchB--CommitB1--CommitB2
Develop
\
Branch2--CommitA1--CommitA2--CommitB1--CommitB2