我有一个数据集,其中“年龄”的值具有不同的单位(天,月,年)。我想将其值基于天和月的行转换为年。我怎么能用R做呢? 如果号码后面没有字母,则单位为年。 如果号码后面有'D',那么单位为天(例如10D表示10天) 如果号码后面有“M”,则单位为月(例如5M表示5个月)。
Age <- c("33","32","44","54M","67M","34D","33D","44","77","88M","49 D","55D","11M")
ID <- c(1,2,3,4,5,6,7,8,9,10,11,12,13)
Data <- data.frame(ID,Age)
> Data
ID Age
1 1 33
2 2 32
3 3 44
4 4 54M
5 5 67M
6 6 34D
7 7 33D
8 8 44
9 9 77
10 10 88M
11 11 49 D
12 12 55D
13 13 11M
答案 0 :(得分:5)
这里是基地R的快捷方式:
Data$units = ifelse(grepl("M", Data$Age), "month", ifelse(grepl("D", Data$Age), "day", "year"))
Data$value = as.numeric(gsub(pattern = "[^0-9]", replacement = "", Data$Age))
Data$result = with(Data,
ifelse(units == "year", value,
ifelse(units == "month", value / 12, value / 365.25)))
Data
# ID Age units value result
# 1 1 33 year 33 33.00000000
# 2 2 32 year 32 32.00000000
# 3 3 44 year 44 44.00000000
# 4 4 54M month 54 4.50000000
# 5 5 67M month 67 5.58333333
# 6 6 34D day 34 0.09308693
# 7 7 33D day 33 0.09034908
# 8 8 44 year 44 44.00000000
# 9 9 77 year 77 77.00000000
# 10 10 88M month 88 7.33333333
# 11 11 49 D day 49 0.13415469
# 12 12 55D day 55 0.15058179
# 13 13 11M month 11 0.91666667
答案 1 :(得分:4)
这是使用 tidyverse 工具的另一种选择:
library(dplyr)
library(stringr)
Data %>%
mutate(Unit = str_extract(string = Age,pattern = "[DM]"),
Unit = if_else(is.na(Unit),'Y',Unit),
Age = as.numeric(gsub(pattern = "[MD]","",Age))) %>%
mutate(AgeYears = Age / c('Y' = 1,'M' = 12,'D' = 365)[Unit])
ID Age Unit AgeYears
1 1 33 Y 33.00000000
2 2 32 Y 32.00000000
3 3 44 Y 44.00000000
4 4 54 M 4.50000000
5 5 67 M 5.58333333
6 6 34 D 0.09315068
7 7 33 D 0.09041096
8 8 44 Y 44.00000000
9 9 77 Y 77.00000000
10 10 88 M 7.33333333
11 11 49 D 0.13424658
12 12 55 D 0.15068493
13 13 11 M 0.91666667
答案 2 :(得分:1)
#baseR
Age <-c("33","32","44","54M","67M","34D","33D","44","77","88M","49 D","55D","11M")
AgeNum<- as.numeric(sub("\\s*\\D$","",Age))
Age[grepl("M$",Age)] <- AgeNum[grepl("M$",Age)]/12
Age[grepl("D$",Age)] <- AgeNum[grepl("D$",Age)]/365
Age <- as.numeric(Age)
结果:
> Age
[1] 33.00000000 32.00000000 44.00000000 4.50000000 5.58333333 0.09315068 0.09041096 44.00000000
[9] 77.00000000 7.33333333 0.13424658 0.15068493 0.91666667
>
答案 3 :(得分:0)
此外,使用data.table
进一步解决方案:
> library(data.table)
> dt <- data.table(ID, Age)
> dt[, Unit := ifelse(grepl("D$", Age), "D", ifelse(grepl("M$", Age), "M", "Y"))][
, Age := as.integer(gsub("M|D", "", Age))]
> dt[, Age_in_years := ifelse(Unit == "Y", Age,
ifelse(Unit == "M", Age / 12, Age / 365.25))][]
ID Age Unit Age_in_years
1: 1 33 Y 33.00000000
2: 2 32 Y 32.00000000
3: 3 44 Y 44.00000000
4: 4 54 M 4.50000000
5: 5 67 M 5.58333333
6: 6 34 D 0.09308693
7: 7 33 D 0.09034908
8: 8 44 Y 44.00000000
9: 9 77 Y 77.00000000
10: 10 88 M 7.33333333
11: 11 49 D 0.13415469
12: 12 55 D 0.15058179
13: 13 11 M 0.91666667