给出以下以列表为值的数据框:
function myCallback(blob) {
var url = window.URL.createObjectURL(blob);
var filename = "myBlobFile.png";
var a = document.createElement("a");
a.style = "display: none";
a.href = url;
a.download = filename;
// IE 11
if (window.navigator.msSaveBlob !== undefined) {
window.navigator.msSaveBlob(blob, filename);
return;
}
document.body.appendChild(a);
requestAnimationFrame(function() {
a.click();
window.URL.revokeObjectURL(url);
document.body.removeChild(a);
});
}
function makeBlob() {
var blob = myDiagram.makeImageData( { background: "white", returnType: "blob", callback: myCallback });
}
我要使用以下方式执行Jaccard计算:
df <- structure(list(keys.userId = c("9875", "5465",
"1234", "4567", "8910"), user_data.SSIDs = list(
c("qjJf5iZtYboSPvqe1oa/xg==", "ul7kroLEB2cZx6AMGhjnrA==",
"OYRT/hYu1Dl3/S5WIWyLHA==", "HFiSH/Tu0RSaQgIbDEZfeA==", "gUBxBfxjGdyPNzqYX7t6nA==",
"m7UqzqaXUm1GkXMbxf+SJw==", "KjxvZwsVCNSTtXXKiidmjw==", "3UOqe+4qPVZYXvja8GBEqg==",
"a9Ba8b19tY/bprM7WA326A==", "uStr9Fg+JlU9B+hdBCafZg==", "i9J11W00HFmoeCDObOfSdA==",
"A9vOz8zSrwDiQcKv8hk64Q==", "/8QeMoqFwd/eJ+/6NKk1iQ==", "HbyJqQxUfH6oiW3skqPzGQ==",
"51H9RcZmdRgkgg4X6U/mhQ==", "dwat86ppe1b/WXSaGi8r3w==", "yBCbMedxtZdiGFXmTfk2eQ==",
"wKSIsw3sDPRQhLIhdQkBJw==", "3LkFUenHOXWL4Be5T4XmaQ==", "Krni6eGQUnZuL/jU0MzKNA==",
"Wt9BCH4guyC4oSIHwE8XGA==", "rbgxp/3YPdHiownOdZHf+A==", "34rmNRgT/xFDXIDwHKIY6Q==",
"pT3zFcGdlJKmR+khJLMoVw==", "eni3X9I2B4KRK+sho2MbjA==", "sxuba/1Brg4CrYL8AFv8ZQ==",
"EYIxPOXPVvop99YD0vjXPA==", "JC1xqrtmQEaohwzviYDFYA==", "qv+cfEEqsIGrDFuEqpkQuw==",
"d3xXMR1RDKZdrDwQd97kNQ==", "qU1JULumBTqw+m/rLr4E7A==", "teDCJvNdyjktWD6leDpCmw==",
"ytSBHvzbEACq56aEHZlXEw==", "eV7WGimPD01weRI19ojO3g==", "vNkJyD9KOzOprGkYyfViMA==",
"r8jjZXWyax7JPfJUPFwRTQ==", "rJ1N3ONwDBK+jwFf+7xeHg==", "2xPbTqIww1KI/tVL2UH1cw==",
"1hk1AOU4DZXV52Auyr2FHA==", "aNH8uS5nrlwcHb8rLdZeXQ==", "5JPQs2z4N1Dru0dGI9ImBQ==",
"nbQIn5G4uyl8b1+A6aVkQg==", "A/UcwEccakKDuiATgoP1NA==", "JC57Ib2V7fOU/CgBk2R41g==",
"PsI8Ys++JveA+SuafbB8pg==", "eXiuBymYN+tcbjtpM9Vxmg==", "jEdP3Rs02d/4UE8G1GeE3A=="
), NULL, c("qjJf5iZtYboSPvqe1oa/xg==", "ul7kroLEB2cZx6AMGhjnrA==",
"OYRT/hYu1Dl3/S5WIWyLHA==", "HFiSH/Tu0RSaQgIbDEZfeA==", "gUBxBfxjGdyPNzqYX7t6nA==",
"m7UqzqaXUm1GkXMbxf+SJw==", "KjxvZwsVCNSTtXXKiidmjw==", "3UOqe+4qPVZYXvja8GBEqg==",
"a9Ba8b19tY/bprM7WA326A==", "uStr9Fg+JlU9B+hdBCafZg==", "i9J11W00HFmoeCDObOfSdA==",
"A9vOz8zSrwDiQcKv8hk64Q==", "/8QeMoqFwd/eJ+/6NKk1iQ==", "HbyJqQxUfH6oiW3skqPzGQ==",
"51H9RcZmdRgkgg4X6U/mhQ==", "dwat86ppe1b/WXSaGi8r3w==", "yBCbMedxtZdiGFXmTfk2eQ==",
"wKSIsw3sDPRQhLIhdQkBJw==", "3LkFUenHOXWL4Be5T4XmaQ==", "Krni6eGQUnZuL/jU0MzKNA==",
"Wt9BCH4guyC4oSIHwE8XGA==", "rbgxp/3YPdHiownOdZHf+A==", "34rmNRgT/xFDXIDwHKIY6Q==",
"pT3zFcGdlJKmR+khJLMoVw==", "eni3X9I2B4KRK+sho2MbjA==", "sxuba/1Brg4CrYL8AFv8ZQ==",
"EYIxPOXPVvop99YD0vjXPA==", "JC1xqrtmQEaohwzviYDFYA==", "qv+cfEEqsIGrDFuEqpkQuw==",
"d3xXMR1RDKZdrDwQd97kNQ==", "qU1JULumBTqw+m/rLr4E7A==", "teDCJvNdyjktWD6leDpCmw==",
"ytSBHvzbEACq56aEHZlXEw==", "eV7WGimPD01weRI19ojO3g==", "vNkJyD9KOzOprGkYyfViMA==",
"r8jjZXWyax7JPfJUPFwRTQ==", "rJ1N3ONwDBK+jwFf+7xeHg==", "2xPbTqIww1KI/tVL2UH1cw==",
"1hk1AOU4DZXV52Auyr2FHA==", "aNH8uS5nrlwcHb8rLdZeXQ==", "5JPQs2z4N1Dru0dGI9ImBQ==",
"nbQIn5G4uyl8b1+A6aVkQg==", "A/UcwEccakKDuiATgoP1NA==", "JC57Ib2V7fOU/CgBk2R41g==",
"PsI8Ys++JveA+SuafbB8pg==", "eXiuBymYN+tcbjtpM9Vxmg==", "jEdP3Rs02d/4UE8G1GeE3A=="
), NULL, c("qjJf5iZtYboSPvqe1oa/xg==", "ul7kroLEB2cZx6AMGhjnrA==",
"OYRT/hYu1Dl3/S5WIWyLHA==", "HFiSH/Tu0RSaQgIbDEZfeA==", "gUBxBfxjGdyPNzqYX7t6nA==",
"m7UqzqaXUm1GkXMbxf+SJw==", "KjxvZwsVCNSTtXXKiidmjw==", "3UOqe+4qPVZYXvja8GBEqg==",
"a9Ba8b19tY/bprM7WA326A==", "uStr9Fg+JlU9B+hdBCafZg==", "i9J11W00HFmoeCDObOfSdA==",
"A9vOz8zSrwDiQcKv8hk64Q==", "/8QeMoqFwd/eJ+/6NKk1iQ==", "HbyJqQxUfH6oiW3skqPzGQ==",
"51H9RcZmdRgkgg4X6U/mhQ==", "dwat86ppe1b/WXSaGi8r3w==", "yBCbMedxtZdiGFXmTfk2eQ==",
"wKSIsw3sDPRQhLIhdQkBJw==", "3LkFUenHOXWL4Be5T4XmaQ==", "Krni6eGQUnZuL/jU0MzKNA==",
"Wt9BCH4guyC4oSIHwE8XGA==", "rbgxp/3YPdHiownOdZHf+A==", "34rmNRgT/xFDXIDwHKIY6Q==",
"pT3zFcGdlJKmR+khJLMoVw==", "eni3X9I2B4KRK+sho2MbjA==", "sxuba/1Brg4CrYL8AFv8ZQ==",
"EYIxPOXPVvop99YD0vjXPA==", "JC1xqrtmQEaohwzviYDFYA==", "qv+cfEEqsIGrDFuEqpkQuw==",
"d3xXMR1RDKZdrDwQd97kNQ==", "qU1JULumBTqw+m/rLr4E7A==", "teDCJvNdyjktWD6leDpCmw==",
"ytSBHvzbEACq56aEHZlXEw==", "eV7WGimPD01weRI19ojO3g==", "vNkJyD9KOzOprGkYyfViMA==",
"r8jjZXWyax7JPfJUPFwRTQ==", "rJ1N3ONwDBK+jwFf+7xeHg==", "2xPbTqIww1KI/tVL2UH1cw==",
"1hk1AOU4DZXV52Auyr2FHA==", "aNH8uS5nrlwcHb8rLdZeXQ==", "5JPQs2z4N1Dru0dGI9ImBQ==",
"nbQIn5G4uyl8b1+A6aVkQg==", "A/UcwEccakKDuiATgoP1NA==", "JC57Ib2V7fOU/CgBk2R41g==",
"PsI8Ys++JveA+SuafbB8pg==", "eXiuBymYN+tcbjtpM9Vxmg==", "jEdP3Rs02d/4UE8G1GeE3A=="
)), user_data.contacts = list(list(), NULL, list(), NULL,
list())), row.names = c(NA, 5L), class = "data.frame")
我想稍微清理一下数据。
我正在尝试清除NULL / NA / 0长度列表行。
请告知执行此操作的正确方法,我首先想到的是:
jaccard <- function(vector1, vector2) {
return(length(intersect(vector1, vector2)) /
length(union(vector1, vector2)))
}
jaccardV <- Vectorize(jaccard)
但是它到处都返回False。
有什么方法可以正确有效地做到这一点?请告知。
最后清洗后,我将运行:
df %>%
dplyr::mutate(isNull = ifelse(is.null(unlist(user_data.contacts)), TRUE, FALSE))
答案 0 :(得分:1)
这是一种实现方法:
library(dplyr)
library(purrr)
df %>%
filter(!map_lgl(user_data.contacts, is.null)) %>%
filter(!map_lgl(user_data.contacts, function(x) length(x) == 0)) %>%
filter(!map_lgl(user_data.contacts, is.na)) %>%
mutate(contacts_jaccard = jaccardV(user_data.contacts, lag(user_data.contacts)))
这不会产生任何输出,因为在您提供的模拟数据中,所有行现在都已删除。如果唯一的目的是稍后删除行,则不必创建新列isNull
。我开始更喜欢map
而不是sapply
,因为强制达到某个结果确实很容易。在这种情况下,map_lgl
仅会产生TRUE / FALSE结果。
请注意,如果列表列的一个元素长于1,则应改用此元素:
df %>%
filter(!map_lgl(user_data.contacts, is.null)) %>%
filter(!map_lgl(user_data.contacts, function(x) length(x) == 0)) %>%
filter(!map_lgl(user_data.contacts, function(x) is.na(x)[1])) %>%
mutate(contacts_jaccard = jaccardV(user_data.contacts, lag(user_data.contacts)))
答案 1 :(得分:1)
我认为这应该对您有用。浏览“联系人”列,创建一个新列,告知它是否为空,然后根据新列将其过滤掉
library(tidyverse)
new_df <- df %>%as_tibble() %>%
mutate(is_Null = sapply(user_data.contacts, is_null)) %>%
filter(is_Null == FALSE)
new_df
答案 2 :(得分:1)
列user_data.contacts
是一个列表,当您对该列进行更改时,会将其视为一个列表。尝试在rowwise
之前将数据mutate
分组。
library(dplyr)
df %>%
rowwise() %>%
mutate(isNull = is.null(unlist(user_data.contacts)))
# A tibble: 5 x 4 # added data to 1st observation for test
keys.userId user_data.SSIDs user_data.contacts isNull
<chr> <list> <list> <lgl>
1 9875 <chr [47]> <list [3]> FALSE
2 5465 <NULL> <NULL> TRUE
3 1234 <chr [47]> <list [0]> TRUE
4 4567 <NULL> <NULL> TRUE
5 8910 <chr [47]> <list [0]> TRUE