events <- structure(list(ID = c(3049951, 3085397, 3204081, 3262134,
3467254), TVTProcedureStartDate = structure(c(16210, 16238, 16322,
16420, 16546), class = "Date"), DCDate = structure(c(16213, 16250,
16326, 16426, 16560), class = "Date"), CE_EventOccurred = c(0L,
0L, 0L, 0L, 0L), CE_EventDate = c(0L, 0L, 0L, 0L, 0L), `Annular Dissection (In Hospital)` = c(0L,
0L, 0L, 0L, 0L), `Aortic Dissection (In Hospital)` = c(0L, 0L,
0L, 1L, 0L), `Atrial Fibrillation (In Hospital)` = c(0L, 1L,
0L, 0L, 1L), `Bleeding at Access Site (In Hospital)` = c(0L,
0L, 0L, 0L, 0L), `Cardiac Arrest (In Hospital)` = c(1L, 0L, 0L,
0L, 0L), `Conduction/Native Pacer Disturbance Req ICD (In Hospital)` = c(0L,
0L, 1L, 0L, 0L), `Conduction/Native Pacer Disturbance Req Pacer (In Hospital)` = c(0L,
0L, 0L, 0L, 0L), `Endocarditis (In Hospital)` = c(0L, 0L, 0L,
0L, 0L), `GI Bleed (In Hospital)` = c(0L, 0L, 0L, 0L, 0L), `Hematoma at Access Site (In Hospital)` = c(0L,
0L, 0L, 0L, 0L), `Ischemic Stroke (In Hospital)` = c(0L, 0L,
0L, 0L, 0L), `Major Vascular Complications (In Hospital)` = c(0L,
0L, 0L, 0L, 0L), `Minor Vascular Complication (In Hospital)` = c(0L,
0L, 0L, 0L, 0L), `Mitral Leaflet Injury - detected during surgery (In Hospital)` = c(0L,
0L, 0L, 0L, 0L), `Mitral Subvalvular Injury -detected during surgery (In Hospital)` = c(0L,
0L, 0L, 0L, 0L), `New Requirement for Dialysis (In Hospital)` = c(0L,
0L, 0L, 0L, 0L), `Other Bleed (In Hospital)` = c(0L, 0L, 0L,
0L, 0L), `Perforation with or w/o Tamponade (In Hospital)` = c(1L,
0L, 0L, 0L, 0L), `Retroperitoneal Bleeding (In Hospital)` = c(0L,
0L, 0L, 0L, 0L), `Single Leaflet Device Attachment (In Hospital)` = c(0L,
0L, 0L, 0L, 0L), `Unplanned Other Cardiac Surgery or Intervention (In Hospital)` = c(0L,
0L, 0L, 0L, 0L), `Unplanned Vascular Surgery or Intervention (In Hospital)` = c(0L,
0L, 0L, 1L, 0L)), class = c("grouped_df", "tbl_df", "tbl", "data.frame"
), row.names = c(NA, -5L), vars = "NCDRPatientID", labels = structure(list(
NCDRPatientID = c(3049951, 3085397, 3204081, 3262134, 3467254
)), class = "data.frame", row.names = c(NA, -5L), vars = "NCDRPatientID", labels = structure(list(
NCDRPatientID = c(3049951, 3085397, 3204081, 3262134, 3467254,
3467324, 3510387, 3586037, 3661089, 3668621, 3679485, 3737916,
3738064, 3960141, 4006862, 4018241, 4019056, 4025174, 4027490,
4050900, 4051101, 4096816, 4097119, 4097146, 4097180, 4098426,
4106410, 4109968, 4147466, 4198427, 4198450, 4198458, 4204554,
4208053, 4213116, 4218802, 4218854, 4223378, 4223415, 4243959,
4316979, 4341660, 4348676, 4413567, 4419513, 4421948, 4422768,
4426483, 4430159, 4431211, 4433156, 4433406, 4433988)), class = "data.frame", row.names = c(NA,
-53L), vars = "NCDRPatientID", labels = structure(list(NCDRPatientID = c(3049951,
3085397, 3204081, 3262134, 3467254, 3467324, 3510387, 3586037,
3661089, 3668621, 3679485, 3737916, 3738064, 3960141, 4006862,
4018241, 4019056, 4025174, 4027490, 4050900, 4051101, 4096816,
4097119, 4097146, 4097180, 4098426, 4106410, 4109968, 4147466,
4198427, 4198450, 4198458, 4204554, 4208053, 4213116, 4218802,
4218854, 4223378, 4223415, 4243959, 4316979, 4341660, 4348676,
4413567, 4419513, 4421948, 4422768, 4426483, 4430159, 4431211,
4433156, 4433406, 4433988)), class = "data.frame", row.names = c(NA,
-53L), vars = "NCDRPatientID", drop = TRUE), indices = list(0L,
1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10:12, 13L, 14L, 15L,
16:17, 18L, 19:21, 22L, 23L, 24L, 25:26, 27L, 28L, 29:30,
31L, 32:33, 34L, 35:38, 39L, 40:41, 42L, 43L, 44L, 45L, 46L,
47L, 48:50, 51:53, 54L, 55L, 56L, 57L, 58L, 59:60, 61L, 62L,
63:64, 65:66, 67:68, 69L, 70L, 71:72, 73L), drop = TRUE, group_sizes = c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 1L, 1L, 1L, 2L, 1L, 3L,
1L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 4L, 1L, 2L, 1L, 1L, 1L,
1L, 1L, 1L, 3L, 3L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 2L, 2L,
1L, 1L, 2L, 1L), biggest_group_size = 4L), indices = list(0L,
1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L,
15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L,
27L, 28L, 29L, 30L, 31L, 32L, 33L, 34L, 35L, 36L, 37L, 38L,
39L, 40L, 41L, 42L, 43L, 44L, 45L, 46L, 47L, 48L, 49L, 50L,
51L, 52L), drop = TRUE, group_sizes = c(1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L
), biggest_group_size = 1L), indices = list(0L, 1L, 2L, 3L, 4L), drop = TRUE, group_sizes = c(1L,
1L, 1L, 1L, 1L), biggest_group_size = 1L)
根据这些数据,如果以(in-hospital)
结尾的任何列包含1个其他0,则需要创建一个值为1的列。
我尝试了多种操作,但不起作用或显示错误
Error in mutate_impl(.data, dots) : Evaluation error: NA/NaN argument.
event %>% mutate(TR = rowSums(select_(.,6:n)))
Error in mutate_impl(.data, dots) : Column `TR` must be length 1 (the group size), not 53
event %>% mutate(TR = rowSums(.[6:ncol(.)]))
它的一些其他变体,看我是否能够理解或理解,但它不断遇到类似的错误和问题
我尝试的另一件事是以下内容,它似乎可以对行求和,但即使执行以下操作,它也会添加ID:
event %>% select(6:27) %>% rowSums()
,但是它为每行从1
到0
列中添加了6
和27
的ID。不知道为什么要这么做。
我希望结果作为具有相同数据的数据帧,但是如果从6到27的任何列中包含1,否则也将是1s的列
答案 0 :(得分:2)
这不是dplyr方式,但也可以:
events$new_col <- 0
events$new_col[rowSums(events[, grep("In Hospital", colnames(events))]) >= 1] <- 1
答案 1 :(得分:2)
在开发解决方案之前,我先运行以下代码对数据进行分组。
library(dplyr)
events <- events %>% ungroup()
此解决方案的想法是使用rowSums
将所选列中的所有数字相加,确定总和是否大于0,然后将逻辑向量转换为整数向量(具有1或0 )。
有许多选择列的方法。我们可以根据列号进行选择。
events2 <- events %>% mutate(Col = as.integer(rowSums(select(., 6:27)) > 0))
events2$Col
# [1] 1 1 1 1 1
我们可以使用ends_with
。
events2 <- events %>% mutate(Col = as.integer(rowSums(select(., ends_with("(In Hospital)"))) > 0))
events2$Col
# [1] 1 1 1 1 1
我们可以使用matches
。正则表达式\\(In Hospital\\)$
表示末尾的字符串。
events2 <- events %>% mutate(Col = as.integer(rowSums(select(., matches("\\(In Hospital\\)$"))) > 0))
events2$Col
# [1] 1 1 1 1 1
我们可以使用contains
,但是请注意,目标字符串不必在列名的末尾。
events2 <- events %>% mutate(Col = as.integer(rowSums(select(., contains("(In Hospital)"))) > 0))
events2$Col
# [1] 1 1 1 1 1
由于目标列中的数字均为1
或0
,因此我们可以将apply
与max
一起使用以获取最大值,即{{1} },如果有1
或1
。上面显示的使用0
函数的所有方式在这里也适用。下面,我介绍了一种方法。
select
答案 2 :(得分:2)
使用R
的基础apply()
的解决方案
cols <- grep("in hospital", colnames(events), ignore.case = T)
apply(events[, cols], 1, function(x) ifelse(any(x == 1), 1, 0))
# [1] 1 1 1 1 1