我有一个包含三列的数据框:
structure(list(introduction = structure(c(1522175035.901, 1520961618.904,
1520978326.696, 1520965207.553, 1523918791.109, 1522173494.567,
1521218841.268, 1522170731.4, 1521042736.772, 1522701901.544,
1521145185.29, 1522706004.953, 1521150062.849, 1521047868.109,
1520974602.5, 1521163554.751, 1524613269.765, 1521592575.346,
1521061958.284, 1523045730.56, 1521219389.032, 1521828244.272,
1521996291.152, 1522067323.354, 1525645608.604, 1521567579.468,
1521567534.356, 1522426237.303, 1521567542.767, 1521567641.958,
1521567555.865, 1521573065.994, 1521584733.425, 1522103377.939,
1521821245.259, 1521829777.22, 1521829835.529, 1521980738.646,
1522170449.584, 1522103997.818, 1522104390.669, 1522165861.644,
1522171832.317, 1522171870.626, 1522171875.693, 1522171866.211,
1522171933.098, 1522171855.451, 1522180873.26, 1522181644.842,
1522333570.248, 1523525628.687, 1522487903.13, 1522078502.859,
1522318895.073, 1522319216.787, 1522335495.615, 1522342148.729,
1522342669.515, 1522692396.853, 1522832258.045, 1522860054.396,
1523908701.209, 1522942128.02, 1523562329.681, 1523562640.799,
1523569245.794, 1524162373.274, 1524599570.324, 1524249914.822,
1524256541.826, 1525558540.165, 1524266810.439, 1525620261.23,
1524520384.02, 1524680108.876, 1524678310.567, 1524768468.141,
1524769276.356, 1524774051.792, 1524853389.661, 1525620789.277,
1525626502.563, 1525649018.551, 1526072632.14, 1526484415.769,
1526413818.926, 1526416221.779, 1527373743.794, 1527200067.957,
1527642278.236, 1527719425.739, 1528335439.152), class = c("POSIXct",
"POSIXt")), demoChat = structure(c(NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 1521996371.505,
1522089543.872, NA, 1521567907.885, 1521567794.224, NA, 1521567970.389,
1521568055.997, 1521567858.716, NA, NA, NA, 1521821640.607, 1521830168.929,
1521830168.182, 1521980950.771, NA, 1522104226.885, 1522104740.322,
NA, 1522172233.515, 1522172208.375, 1522172220.053, 1522172210.93,
1522172211.298, 1522172241.241, 1522338283.99, NA, NA, 1523526243.697,
1522488263.46, NA, 1522318901.563, NA, 1522335710.692, 1522342944.839,
NA, 1522692960.938, NA, 1522860380.603, NA, 1522942437.122, NA,
NA, 1523569647.089, NA, NA, 1524503493.328, NA, 1525668259.52,
1524266917.649, 1525620673.917, 1526306725.36, NA, NA, 1524769206.836,
1524769284.502, 1524774356.781, 1524853879.681, NA, NA, 1525649108.505,
NA, 1526484828.946, 1526413874.213, 1526416344.729, 1528407164.373,
1527200222.721, 1527642486.482, 1527719632.36, NA), class = c("POSIXct",
"POSIXt")), demoChatSkipped = structure(c(1522175035.901, 1520961618.904,
1520978326.696, 1520965207.553, 1523918791.109, 1522173494.567,
1521218841.268, 1522170731.4, 1521042736.772, 1522701901.544,
1521145185.29, 1522706004.953, 1521150062.849, 1521047868.109,
1520974602.5, 1521163554.751, 1524613269.765, 1521592575.346,
1521061958.284, 1523045730.56, 1521219389.032, 1521828244.272,
NA, NA, NA, NA, NA, 1522426243.108, NA, NA, NA, 1521573070.218,
1523474984.126, 1522103382.51, NA, NA, NA, NA, 1522170464.136,
NA, NA, 1522165866.172, NA, NA, NA, NA, NA, NA, 1522180954.076,
1522181651.368, 1522333575.922, NA, NA, 1522078502.859, NA, 1522319239.67,
NA, NA, 1522342677.026, NA, 1522832258.045, NA, 1523908713.755,
NA, 1523562337.321, 1523562650.926, NA, 1524162384.352, 1524599577.553,
NA, 1524256546.09, 1525652931.413, NA, NA, 1524520476.233, NA,
1524678321.292, NA, NA, NA, NA, 1525620799.957, 1525626519.183,
NA, 1526072640.635, NA, NA, NA, 1527373758.872, NA, NA, NA, 1528335444.431
), class = c("POSIXct", "POSIXt"))), row.names = c(NA, -93L), class = "data.frame")
仅使用第二列和第三列,我想计算有多少行包含is.na(demoChat)
和!is.na(demoChat)
的实例,反之亦然。
为了更清楚,我想知道(不是R格式):
1. demoChat != NA && demoChatSkipped == NA
2. demoChat == NA && demoChatSkipped != NA
以R格式:
!is.na(demoChat) && is.na(demoChatSkipped)
is.na(demoChat) && !is.na(demoChatSkipped)
我希望最好的方法是使用table()
;但是,当我测试这两个值时,我没有得到我期望获得的结果,这是与每个语句相关的TRUE
和FALSE
值的表。
这是怎么做到的?
答案 0 :(得分:1)
dplyr解决方案:
library(dplyr)
df %>
filter(is.na(demoChat) & !is.na(demoChatSkipped)) %>%
tally()
甚至:
df %>%
tally(is.na(demoChat) & !is.na(demoChatSkipped))
n
1 45
答案 1 :(得分:1)
当我尝试使用多个输入创建新的分类变量时,我喜欢运行ifelse循环。对于您的数据(假设您将其保存为x),您可以这样做:
library(tidyverse) ), class = c("POSIXct", "POSIXt"))), row.names = c(NA, -93L), class = "data.frame")
mutate(x, NA_check = ifelse(is.na(demoChat)==TRUE & is.na(demoChatSkipped)==FALSE, 'demo NA, Chat not NA',
ifelse(is.na(demoChat)==FALSE & is.na(demoChatSkipped)==TRUE, 'demo not NA, Chat NA', 'some other thing happend or more ifelse loops')))
table(x$NA_check)
demo NA, Chat not NA
45
demo not NA, Chat NA
42
some other thing happend or more ifelse loops
6
答案 2 :(得分:0)
您可以在所需条件的逻辑矩阵上使用rowSums
,然后table
结果。
table(rowSums(cbind(is.na(df[[2]]), !is.na(df[[3]]))))
# 0 1 2
#42 6 45
您想要的是与条目0
和2
对应的值:
is.na(df[[2]])
和!is.na(df[[3]])
; !is.na(df[[2]])
和is.na(df[[3]])
。