我有下表(称为火车)(实际上更大)
UNSPSC adaptor alert bact blood collection packet patient ultrasoft whit
514415 1 0 1 0 0 0 0 1 0
514415 0 0 0 1 1 0 0 1 0
514415 0 0 1 0 0 0 0 1 0
514415 0 0 0 0 0 0 0 1 0
514415 1 0 1 0 0 0 0 1 0
514415 0 0 0 0 0 0 0 1 0
422018 1 0 1 0 0 0 0 1 0
422018 0 0 0 0 0 0 0 1 0
422018 0 0 0 1 0 0 0 1 0
411011 0 0 0 0 0 0 0 1 0
以及下面的表格,我称之为协会:
lhd rhs
blood collection
adaptor bact
[...]
我想计算每列唯一UNSPSC的数量,其中lhs和rhs的关联表中每个记录的值等于1。 像:
采血1 适配器bact 2此代码仅用于一个学期。
apply(train[,-1], 2, function(x) length(unique(substr(train$UNSPSC,1,4)[x == 1])))
答案 0 :(得分:3)
您可以迭代trains
而不是迭代associations
,并使用subset
(x
行第1列和第2列等于1
的子集,unique
,length
功能
使用x
函数调用行get
中的列。
train$lhd <- 1
train$rhs <- 1
apply(associations, 1, function(x)
length(unique(subset(train, get(x[1]) == 1 & get(x[2]) == 1)$UNSPSC))
)
# [1] 3 1 2
数据(train
):
structure(list(UNSPSC = c(514415L, 514415L, 514415L, 514415L,
514415L, 514415L, 422018L, 422018L, 422018L, 411011L), adaptor = c(1L,
0L, 0L, 0L, 1L, 0L, 1L, 0L, 0L, 0L), alert = c(0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L), bact = c(1L, 0L, 1L, 0L, 1L, 0L, 1L,
0L, 0L, 0L), blood = c(0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L
), collection = c(0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), packet = c(0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), patient = c(0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L), ultrasoft = c(1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L), whit = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L), lhd = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1), rhs = c(1, 1,
1, 1, 1, 1, 1, 1, 1, 1)), .Names = c("UNSPSC", "adaptor", "alert",
"bact", "blood", "collection", "packet", "patient", "ultrasoft",
"whit", "lhd", "rhs"), row.names = c(NA, -10L), class = "data.frame")
数据(associations
):
structure(list(V1 = c("lhd", "blood", "adaptor"), V2 = c("rhs",
"collection", "bact")), .Names = c("V1", "V2"), row.names = c(NA,
-3L), class = "data.frame")
答案 1 :(得分:2)
tidyverse
的类似选项(来自@PoGibas帖子的数据)将pmap
应用于'关联'数据以循环列filter
'列''这些列都是1,pull
'UNSCPSC'列,并获取length
个unique
元素(n_distinct
)
library(tidyverse)
pmap_int(associations, ~ train %>%
filter(!! rlang::sym(.x) == 1, !! rlang::sym(.y) == 1) %>%
pull(UNSPSC) %>%
n_distinct)
#[1] 3 1 2