假设我有以下数据集:
function showReadMoreButton(element){
if (element.offsetHeight < element.scrollHeight ||
element.offsetWidth < element.scrollWidth) {
// your element has an overflow
// show read more button
} else {
// your element doesn't have overflow
}
}
Example call:
var elementToCheck = document.getElementById('someElementToCheck');
showReadMoreButton(elementToCheck);
// call showReadMoreButton() after page load and/or window resize and/or functions which change content within the overflow element.
我想将其转换为邻接数据集,其中行和列是person_id,值是这些人出现在其中的event_id的总数。
我试图做这样的事情:
set.seed(42)
test <- data.frame(event_id = stringi::stri_rand_strings(1000, 2, '[A-Z]'), person_id = floor(runif(1000, min=0, max=500)))
>head(test)
event_id person_id
1 EP 438
2 IX 227
3 AV 212
4 GX 469
5 QF 193
6 MM 222
但是在尝试将其转换为邻接矩阵并然后计算非零值(不是对角项)的总数时,
adjacency_df <- test %>%
select('event_id', 'person_id') %>%
melt('event_id', value.name = 'invitee_id') %>%
dcast(invitee_id~invitee_id, fun.aggregate = n_distinct, value.var = 'event_id')
我得到所有非对角线值均为零。
#convert to a matrix, and rename rownames
adjacency_matrix <- as.matrix(sapply(adjacency_df[, -1], as.numeric))
rownames(adjacency_matrix) <- colnames(adjacency_matrix)
#identify if only the diagonal of the matrix is non-zero
all(adjacency_matrix[lower.tri(adjacency_matrix)] == 0, adjacency_matrix[upper.tri(adjacency_matrix)] == 0)
最有效的方法是什么(请注意数据集包含200万个观测值)?
我尝试了注释部分中建议的技术,并在我的实际数据集上收到以下错误:
> all(adjacency_matrix[lower.tri(adjacency_matrix)] == 0, adjacency_matrix[upper.tri(adjacency_matrix)] == 0)
[1] TRUE
所以我需要更好的方法
答案 0 :(得分:2)
由于矩阵大小似乎是问题所在,因此可以使用Matrix
的{{1}}版本,如下所示:
crossprod
library(Matrix)
mat <- with(
test,
sparseMatrix(
i = as.numeric(factor(event_id)),
j = as.numeric(factor(person_id)),
dimnames = list(levels(factor(event_id)), levels(factor(person_id)))
)
)
crossprod(mat)
程序包会创建稀疏矩阵,因此它应该能够处理更多的单元格。
答案 1 :(得分:1)
不确定用crossprod
-是否可以解决您的错误,但可以尝试这样。以上数据:
library(dplyr)
set.seed(42)
test <-
data.frame(
event_id = stringi::stri_rand_strings(1000, 2, '[A-Z]'),
person_id = floor(runif(1000, min = 0, max = 500))
)
按event_id
分组并从中创建表格:
out <- test %>%
group_by(event_id) %>%
table()
使用该分组的输出作为crossprod
的输入:
x <- crossprod(out)
看看大矩阵的一小部分:
> x[1:20, 1:20]
person_id
person_id 0 2 3 4 5 6 9 10 11 12 13 14 15 16 17 18 19 20 21 23
0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
2 0 5 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0
3 0 0 4 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
4 0 0 0 3 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0
5 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
6 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0
9 0 0 0 0 0 0 3 0 0 0 0 0 0 0 0 0 0 0 0 0
10 0 0 0 0 0 0 0 4 0 0 0 0 0 0 0 0 0 0 0 0
11 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0
12 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0
13 0 0 0 1 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0
14 0 0 0 0 0 0 0 0 0 0 0 3 0 0 0 0 0 0 0 0
15 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0
16 0 1 0 0 0 0 0 0 0 0 0 0 0 3 0 0 0 0 0 0
17 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0
18 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 5 0 0 0 0
19 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3 0 0 0
20 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3 0 0
21 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0
23 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3
是否接近您期望的输出?很难判断它是否有效-也许看看这个较小的示例数据集:
{
set.seed(42)
test <-
data.frame(
event_id = sample(c("AB", "LM", "YZ"), size = 10, replace = TRUE),
person_id = 1:10
)
out <- test %>%
group_by(event_id) %>%
table()
x <- crossprod(out)
print(out)
x
}
person_id
event_id 1 2 3 4 5 6 7 8 9 10
AB 0 0 1 0 0 0 0 1 0 0
LM 0 0 0 0 1 1 0 0 1 0
YZ 1 1 0 1 0 0 1 0 0 1
person_id
person_id 1 2 3 4 5 6 7 8 9 10
1 1 1 0 1 0 0 1 0 0 1
2 1 1 0 1 0 0 1 0 0 1
3 0 0 1 0 0 0 0 1 0 0
4 1 1 0 1 0 0 1 0 0 1
5 0 0 0 0 1 1 0 0 1 0
6 0 0 0 0 1 1 0 0 1 0
7 1 1 0 1 0 0 1 0 0 1
8 0 0 1 0 0 0 0 1 0 0
9 0 0 0 0 1 1 0 0 1 0
10 1 1 0 1 0 0 1 0 0 1