根据特定条件拥有数据集的子集

时间:2018-04-09 15:07:40

标签: r dataset

我有一个数据集,其中“年龄”的值具有不同的单位(天,月,年)。我想将其值基于天和月的行转换为年。我怎么能用R做呢? 如果号码后面没有字母,则单位为年。 如果号码后面有'D',那么单位为天(例如10D表示10天) 如果号码后面有“M”,则单位为月(例如5M表示5个月)。

Age <- c("33","32","44","54M","67M","34D","33D","44","77","88M","49 D","55D","11M")
ID <- c(1,2,3,4,5,6,7,8,9,10,11,12,13)
Data <- data.frame(ID,Age)

> Data
   ID  Age
1   1   33
2   2   32
3   3   44
4   4  54M
5   5  67M
6   6  34D
7   7  33D
8   8   44
9   9   77
10 10  88M
11 11 49 D
12 12  55D
13 13  11M

4 个答案:

答案 0 :(得分:5)

这里是基地R的快捷方式:

Data$units = ifelse(grepl("M", Data$Age), "month", ifelse(grepl("D", Data$Age), "day", "year"))
Data$value = as.numeric(gsub(pattern = "[^0-9]", replacement = "", Data$Age))
Data$result = with(Data,
    ifelse(units == "year", value, 
        ifelse(units == "month", value / 12, value / 365.25)))
Data
#    ID  Age units value      result
# 1   1   33  year    33 33.00000000
# 2   2   32  year    32 32.00000000
# 3   3   44  year    44 44.00000000
# 4   4  54M month    54  4.50000000
# 5   5  67M month    67  5.58333333
# 6   6  34D   day    34  0.09308693
# 7   7  33D   day    33  0.09034908
# 8   8   44  year    44 44.00000000
# 9   9   77  year    77 77.00000000
# 10 10  88M month    88  7.33333333
# 11 11 49 D   day    49  0.13415469
# 12 12  55D   day    55  0.15058179
# 13 13  11M month    11  0.91666667

答案 1 :(得分:4)

这是使用 tidyverse 工具的另一种选择:

library(dplyr)
library(stringr)

Data %>% 
    mutate(Unit = str_extract(string = Age,pattern = "[DM]"),
                 Unit = if_else(is.na(Unit),'Y',Unit),
                 Age = as.numeric(gsub(pattern = "[MD]","",Age))) %>%
    mutate(AgeYears = Age / c('Y' = 1,'M' = 12,'D' = 365)[Unit])

   ID Age Unit    AgeYears
1   1  33    Y 33.00000000
2   2  32    Y 32.00000000
3   3  44    Y 44.00000000
4   4  54    M  4.50000000
5   5  67    M  5.58333333
6   6  34    D  0.09315068
7   7  33    D  0.09041096
8   8  44    Y 44.00000000
9   9  77    Y 77.00000000
10 10  88    M  7.33333333
11 11  49    D  0.13424658
12 12  55    D  0.15068493
13 13  11    M  0.91666667

答案 2 :(得分:1)

#baseR

Age   <-c("33","32","44","54M","67M","34D","33D","44","77","88M","49 D","55D","11M")
AgeNum<- as.numeric(sub("\\s*\\D$","",Age))

Age[grepl("M$",Age)] <- AgeNum[grepl("M$",Age)]/12
Age[grepl("D$",Age)] <- AgeNum[grepl("D$",Age)]/365

Age <- as.numeric(Age)

结果:

> Age
 [1] 33.00000000 32.00000000 44.00000000  4.50000000  5.58333333  0.09315068  0.09041096 44.00000000
 [9] 77.00000000  7.33333333  0.13424658  0.15068493  0.91666667
> 

答案 3 :(得分:0)

此外,使用data.table进一步解决方案:

> library(data.table)
> dt <- data.table(ID, Age)

> dt[, Unit := ifelse(grepl("D$", Age), "D", ifelse(grepl("M$", Age), "M", "Y"))][
, Age := as.integer(gsub("M|D", "", Age))]

> dt[, Age_in_years := ifelse(Unit == "Y", Age, 
ifelse(Unit == "M", Age / 12, Age / 365.25))][]

    ID Age Unit Age_in_years
 1:  1  33    Y  33.00000000
 2:  2  32    Y  32.00000000
 3:  3  44    Y  44.00000000
 4:  4  54    M   4.50000000
 5:  5  67    M   5.58333333
 6:  6  34    D   0.09308693
 7:  7  33    D   0.09034908
 8:  8  44    Y  44.00000000
 9:  9  77    Y  77.00000000
10: 10  88    M   7.33333333
11: 11  49    D   0.13415469
12: 12  55    D   0.15058179
13: 13  11    M   0.91666667