我有一个名为tt
的数据框。我想为年龄大于0且小于65、70和75的年龄创建一个年龄分层表。这是Alzheimer项目的dta,其中有一些值,例如发病年龄-9和888,最近一次发病的年龄以及“死亡年龄”列中的年龄。 -9和888表示未知年龄。
因此,我编写了这段代码来进行分层分析,但是我不确定如何设置0岁及以上且小于65岁,小于70岁和小于75岁的范围。我的代码现在还计算诸如-9之类的值。有办法防止这种情况吗?
tt %>%
group_by(STATUS, ETHNICITY) %>%
summarise('<65' = sum(as.numeric(as.character(AGE_AT_ONSET)) < 65,
na.rm = TRUE),
'<70'= sum(as.numeric(as.character(AGE_AT_ONSET)) < 70,
na.rm = TRUE),
'<75'= sum(as.numeric(as.character(AGE_AT_ONSET)) < 75,
na.rm = TRUE))
tt <- structure(list(IID = structure(c(`5068` = 80L, `15562` = 58L,
`8939` = 52L, `17602` = 34L, `3173` = 40L, `12591` = 30L, `17391` = 97L,
`8241` = 93L, `9746` = 10L, `9673` = 7L, `16594` = 29L, `16911` = 60L,
`4796` = 18L, `6598` = 12L, `11462` = 26L, `16425` = 17L, `12698` = 37L,
`17118` = 81L, `1501` = 76L, `13294` = 92L, `8072` = 84L, `11642` = 46L,
`4164` = 85L, `9035` = 62L, `16691` = 35L, `16002` = 86L, `3915` = 21L,
`7409` = 54L, `9759` = 11L, `6130` = 6L, `15153` = 23L, `13539` = 100L,
`13262` = 87L, `742` = 28L, `17592` = 33L, `16812` = 53L, `213` = 66L,
`11963` = 77L, `12093` = 89L, `11910` = 68L, `15813` = 73L, `1104` = 51L,
`1966` = 95L, `5589` = 61L, `8860` = 41L, `482` = 16L, `3967` = 55L,
`5869` = 1L, `12435` = 20L, `11675` = 50L, `16701` = 36L, `5893` = 2L,
`16880` = 57L, `13290` = 90L, `1097` = 49L, `1476` = 71L, `9100` = 67L,
`6220` = 8L, `15393` = 42L, `16631` = 31L, `9641` = 4L, `13485` = 99L,
`1028` = 44L, `8200` = 91L, `12190` = 94L, `5581` = 19L, `7266` = 43L,
`12254` = 98L, `15763` = 69L, `17764` = 79L, `16239` = 96L, `7548` = 59L,
`12037` = 83L, `7813` = 70L, `12943` = 63L, `17748` = 75L, `12703` = 38L,
`11964` = 78L, `14018` = 45L, `1769` = 88L, `13713` = 22L, `13100` = 74L,
`13866` = 32L, `2527` = 25L, `2281` = 15L, `4463` = 39L, `5815` = 14L,
`14040` = 47L, `16560` = 24L, `12887` = 56L, `11167` = 13L, `6123` = 5L,
`5668` = 48L, `3036` = 82L, `7622` = 65L, `11470` = 27L, `4770` = 64L,
`17050` = 72L, `6295` = 9L, `9575` = 3L), .Label = c("08AD09051_NACC295883",
"08AD10766_NACC977458", "08AD9133", "09AD14006", "09AD14313_NACC904765",
"09AD14360_NACC785663", "09AD14874", "09AD14943_NACC009736",
"09AD15417_NACC169039", "09AD15778", "09AD15810", "09AD17022_NACC426380",
"25795", "NACC026302", "NACC026743", "NACC044624", "NACC062886",
"NACC083669", "NACC088187", "NACC094571", "NACC107551", "NACC134929",
"NACC178119", "NACC178349", "NACC183751", "NACC186606", "NACC192719",
"NACC193548", "NACC209758", "NACC224665", "NACC243923", "NACC246256",
"NACC261383", "NACC283729", "NACC298544", "NACC305567", "NACC310219",
"NACC310896", "NACC312856", "NACC336802", "NACC342957", "NACC350799",
"NACC351234_09AD13080", "NACC355338", "NACC355951", "NACC361682",
"NACC369873", "NACC397276", "NACC402765", "NACC403144", "NACC407162",
"NACC412031", "NACC413408", "NACC422516_08AD10849", "NACC436908",
"NACC465387", "NACC472288", "NACC479723", "NACC485644_08AD8204",
"NACC504120", "NACC508353", "NACC509594", "NACC510498", "NACC519864",
"NACC521718_08AD9198", "NACC559675", "NACC585997", "NACC605438",
"NACC612578", "NACC619036_09AD14621", "NACC621261", "NACC634809",
"NACC635885", "NACC639654", "NACC640099", "NACC642393", "NACC660918",
"NACC660981", "NACC684037", "NACC690933", "NACC695603", "NACC703758",
"NACC740374", "NACC744168_08AD7716", "NACC766835", "NACC769330",
"NACC775129", "NACC792439", "NACC796641", "NACC805995", "NACC806269_09AD13056",
"NACC809589", "NACC824113_08AD9038", "NACC884140", "NACC916661",
"NACC921664", "NACC926195", "NACC929277", "NACC959601", "NACC992086"
), class = "factor"), AGE_AT_ONSET = structure(c(`5068` = 4L,
`15562` = 16L, `8939` = 24L, `17602` = NA, `3173` = 24L, `12591` = NA,
`17391` = 15L, `8241` = 13L, `9746` = 18L, `9673` = NA, `16594` = 20L,
`16911` = NA, `4796` = NA, `6598` = NA, `11462` = 20L, `16425` = NA,
`12698` = NA, `17118` = NA, `1501` = 5L, `13294` = NA, `8072` = 11L,
`11642` = NA, `4164` = 25L, `9035` = NA, `16691` = NA, `16002` = NA,
`3915` = NA, `7409` = 21L, `9759` = 14L, `6130` = NA, `15153` = NA,
`13539` = NA, `13262` = NA, `742` = 26L, `17592` = 28L, `16812` = 9L,
`213` = 14L, `11963` = NA, `12093` = NA, `11910` = NA, `15813` = 10L,
`1104` = NA, `1966` = NA, `5589` = 16L, `8860` = 8L, `482` = NA,
`3967` = 7L, `5869` = NA, `12435` = NA, `11675` = NA, `16701` = 19L,
`5893` = NA, `16880` = 22L, `13290` = NA, `1097` = NA, `1476` = 7L,
`9100` = 22L, `6220` = NA, `15393` = NA, `16631` = NA, `9641` = NA,
`13485` = NA, `1028` = NA, `8200` = NA, `12190` = NA, `5581` = NA,
`7266` = 17L, `12254` = 17L, `15763` = NA, `17764` = 6L, `16239` = NA,
`7548` = 14L, `12037` = 27L, `7813` = 26L, `12943` = NA, `17748` = NA,
`12703` = NA, `11964` = 20L, `14018` = 23L, `1769` = 25L, `13713` = NA,
`13100` = NA, `13866` = NA, `2527` = 12L, `2281` = NA, `4463` = 1L,
`5815` = 3L, `14040` = NA, `16560` = NA, `12887` = 14L, `11167` = NA,
`6123` = NA, `5668` = 5L, `3036` = 2L, `7622` = 7L, `11470` = NA,
`4770` = 17L, `17050` = 15L, `6295` = NA, `9575` = 19L), .Label = c("44",
"52", "56", "58", "60", "61", "888", "64", "65", "66", "67", "69",
"70", "71", "72", "-9", "74", "75", "76", "77", "78", "79", "80",
"81", "82", "83", "88", "90"), class = "factor"), AGE_LAST_VISIT = structure(c(`5068` = 8L,
`15562` = 18L, `8939` = 24L, `17602` = 16L, `3173` = 21L, `12591` = NA,
`17391` = 17L, `8241` = NA, `9746` = NA, `9673` = NA, `16594` = 25L,
`16911` = 4L, `4796` = 5L, `6598` = NA, `11462` = 21L, `16425` = 10L,
`12698` = 25L, `17118` = 12L, `1501` = 7L, `13294` = 9L, `8072` = NA,
`11642` = NA, `4164` = 21L, `9035` = 21L, `16691` = 3L, `16002` = 14L,
`3915` = 13L, `7409` = NA, `9759` = NA, `6130` = 25L, `15153` = 22L,
`13539` = NA, `13262` = 24L, `742` = 26L, `17592` = 30L, `16812` = 9L,
`213` = 11L, `11963` = NA, `12093` = NA, `11910` = NA, `15813` = 10L,
`1104` = 24L, `1966` = 14L, `5589` = 18L, `8860` = 23L, `482` = 15L,
`3967` = 7L, `5869` = NA, `12435` = 6L, `11675` = NA, `16701` = 25L,
`5893` = NA, `16880` = 20L, `13290` = NA, `1097` = 8L, `1476` = 5L,
`9100` = 28L, `6220` = 21L, `15393` = 17L, `16631` = 9L, `9641` = 24L,
`13485` = NA, `1028` = 7L, `8200` = NA, `12190` = 8L, `5581` = 15L,
`7266` = NA, `12254` = 19L, `15763` = 7L, `17764` = 6L, `16239` = 11L,
`7548` = NA, `12037` = 29L, `7813` = NA, `12943` = NA, `17748` = 23L,
`12703` = 27L, `11964` = 23L, `14018` = 26L, `1769` = 24L, `13713` = 13L,
`13100` = 20L, `13866` = NA, `2527` = 13L, `2281` = 21L, `4463` = 4L,
`5815` = 3L, `14040` = 2L, `16560` = 14L, `12887` = 24L, `11167` = NA,
`6123` = NA, `5668` = 12L, `3036` = 1L, `7622` = NA, `11470` = 18L,
`4770` = 18L, `17050` = 18L, `6295` = NA, `9575` = NA), .Label = c("59",
"60", "61", "62", "64", "65", "67", "68", "69", "70", "71", "72",
"-9", "74", "-9", "76", "77", "79", "80", "81", "82", "83", "84",
"85", "86", "89", "91", "92", "93", "94"), class = "factor"),
AGE_AT_DEATH = structure(c(`5068` = 2L, `15562` = NA, `8939` = NA,
`17602` = NA, `3173` = NA, `12591` = NA, `17391` = NA, `8241` = 10L,
`9746` = 9L, `9673` = NA, `16594` = NA, `16911` = NA, `4796` = NA,
`6598` = NA, `11462` = NA, `16425` = NA, `12698` = NA, `17118` = NA,
`1501` = NA, `13294` = NA, `8072` = 6L, `11642` = NA, `4164` = NA,
`9035` = NA, `16691` = NA, `16002` = NA, `3915` = NA, `7409` = 16L,
`9759` = 8L, `6130` = NA, `15153` = NA, `13539` = NA, `13262` = NA,
`742` = 14L, `17592` = NA, `16812` = NA, `213` = NA, `11963` = NA,
`12093` = NA, `11910` = NA, `15813` = NA, `1104` = NA, `1966` = NA,
`5589` = NA, `8860` = NA, `482` = NA, `3967` = NA, `5869` = NA,
`12435` = NA, `11675` = NA, `16701` = NA, `5893` = 16L, `16880` = NA,
`13290` = NA, `1097` = NA, `1476` = 1L, `9100` = NA, `6220` = NA,
`15393` = NA, `16631` = NA, `9641` = NA, `13485` = NA, `1028` = NA,
`8200` = NA, `12190` = NA, `5581` = NA, `7266` = 11L, `12254` = NA,
`15763` = NA, `17764` = 3L, `16239` = NA, `7548` = 6L, `12037` = 15L,
`7813` = 13L, `12943` = NA, `17748` = NA, `12703` = NA, `11964` = NA,
`14018` = NA, `1769` = 12L, `13713` = NA, `13100` = NA, `13866` = NA,
`2527` = 5L, `2281` = NA, `4463` = NA, `5815` = NA, `14040` = NA,
`16560` = NA, `12887` = NA, `11167` = NA, `6123` = NA, `5668` = NA,
`3036` = NA, `7622` = 4L, `11470` = NA, `4770` = NA, `17050` = NA,
`6295` = NA, `9575` = 7L), .Label = c("66", "70", "71", "73",
"74", "75", "77", "79", "82", "83", "85", "86", "88", "90",
"93", "94"), class = "factor"), ETHNICITY = structure(c(`5068` = 4L,
`15562` = 4L, `8939` = 4L, `17602` = 3L, `3173` = 4L, `12591` = 4L,
`17391` = 4L, `8241` = 4L, `9746` = 4L, `9673` = 4L, `16594` = 4L,
`16911` = 4L, `4796` = 4L, `6598` = 4L, `11462` = 4L, `16425` = 4L,
`12698` = 4L, `17118` = 4L, `1501` = 4L, `13294` = 4L, `8072` = 4L,
`11642` = 4L, `4164` = 1L, `9035` = 4L, `16691` = 4L, `16002` = 4L,
`3915` = 2L, `7409` = 4L, `9759` = 4L, `6130` = 4L, `15153` = 4L,
`13539` = 4L, `13262` = 4L, `742` = 4L, `17592` = 3L, `16812` = 4L,
`213` = 1L, `11963` = 4L, `12093` = 4L, `11910` = 4L, `15813` = 4L,
`1104` = 4L, `1966` = 4L, `5589` = 1L, `8860` = 4L, `482` = 4L,
`3967` = 4L, `5869` = 4L, `12435` = 4L, `11675` = 4L, `16701` = 4L,
`5893` = 4L, `16880` = 4L, `13290` = 4L, `1097` = 4L, `1476` = 4L,
`9100` = 4L, `6220` = 4L, `15393` = 4L, `16631` = 4L, `9641` = 4L,
`13485` = 4L, `1028` = 4L, `8200` = 4L, `12190` = 4L, `5581` = 4L,
`7266` = 4L, `12254` = 4L, `15763` = 4L, `17764` = 3L, `16239` = 4L,
`7548` = 4L, `12037` = 4L, `7813` = 4L, `12943` = 4L, `17748` = 3L,
`12703` = 4L, `11964` = 4L, `14018` = 4L, `1769` = 4L, `13713` = 4L,
`13100` = 4L, `13866` = 4L, `2527` = 4L, `2281` = 2L, `4463` = 4L,
`5815` = 4L, `14040` = 4L, `16560` = 4L, `12887` = 4L, `11167` = 4L,
`6123` = 4L, `5668` = 4L, `3036` = 4L, `7622` = 4L, `11470` = 4L,
`4770` = 2L, `17050` = 4L, `6295` = 4L, `9575` = 4L), .Label = c("AA",
"Asian", "Hispanic", "NHW"), class = "factor")), class = "data.frame", row.names = c(NA,
-100L))
答案 0 :(得分:2)
您可以将不需要计数的值设置为NA
:
library(dplyr)
tt %>%
mutate(across(starts_with('AGE'),
~as.numeric(as.character(replace(., . %in% c(-9, 888), NA))))) %>%
group_by(ETHNICITY) %>%
summarise('<65' = sum(AGE_AT_ONSET < 65, na.rm = TRUE),
'<70'= sum(AGE_AT_ONSET< 70, na.rm = TRUE),
'<75'= sum(AGE_AT_ONSET < 75, na.rm = TRUE))
# ETHNICITY `<65` `<70` `<75`
# <fct> <int> <int> <int>
#1 AA 0 0 1
#2 Asian 0 0 1
#3 Hispanic 1 1 1
#4 NHW 7 11 19