一位同事正在向我发送Elasticsearch查询结果(100000条记录,数百个属性),如下所示:
pets_json <- paste0('[{"animal":"cat","attributes":{"intelligence":"medium","noises":[{"noise":"meow","code":4},{"noise":"hiss","code":2}]}},',
'{"animal":"dog","attributes":{"intelligence":"high","noises":{"noise":"bark","code":1}}},',
'{"animal":"snake","attributes":{"intelligence":"low","noises":{"noise":"hiss","code":2}}}]')
我有一个冗余密钥code
,我不需要捕获。
我想生成一个类似于:
的data.frameanimal intelligence noises.bark noises.hiss noises.meow
cat medium 0 1 1
dog high 1 0 0
snake low 0 1 0
我可以在json中阅读,但flatten=TRUE
并没有完全展平:
library(jsonlite)
str(df <- fromJSON(txt=pets_json, flatten=TRUE))
# 'data.frame': 3 obs. of 3 variables:
# $ animal : chr "cat" "dog" "snake"
# $ attributes.intelligence: chr "medium" "high" "low"
# $ attributes.noises :List of 3
# ..$ :'data.frame': 2 obs. of 2 variables: \
# .. ..$ noise : chr "meow" "hiss" \
# .. ..$ code: int 4 2 |
# ..$ :List of 2 |
# .. ..$ noise : chr "bark" |- need to remove code and flatten
# .. ..$ code: int 1 |
# ..$ :List of 2 |
# .. ..$ noise : chr "hiss" /
# .. ..$ code: int 2 /
因为展平是不完整的,我可以在调用另一个code
之前使用这个中间阶段摆脱flatten()
不需要的密钥,但我知道摆脱密钥的唯一方法就是慢:
for( l in which(sapply(df, is.list)) ){
for( l2 in which(sapply(df[[l]], is.list))){
df[[l]][[l2]]['code'] <- NULL
}
}
( df <- data.frame(flatten(df)) )
# animal attributes.intelligence attributes.noises
# 1 cat medium meow, hiss
# 2 dog high bark
# 3 snake low hiss
之后......?我知道使用tidyr::separate
我可能会想出一种将spread
噪声值放入列和设置标志的方法。但这一次适用于一个属性,我可能有数百个。我事先并不知道所有可能的属性值。
如何有效地生成所需的data.frame?谢谢你的时间!
答案 0 :(得分:2)
我不认为有一种超级简单的方法可以让它以正确的格式获得,但这是一次尝试:
out <- fromJSON(pets_json)
# drop the "code" data and do some initial cleaning
out$noises <- lapply(
out$attributes$noises,
function(x) unlist(x[-match("code",names(x))])
)
# extract the key part of the intelligence variable
out$intelligence <- out$attributes$intelligence
# set up a vector of all possible noises
unq_noises <- unique(unlist(out$noises))
# make the new separate noise variables
out[unq_noises] <- t(vapply(
out$noises,
function(x) unq_noises %in% x,
FUN.VALUE=logical(length(out$noises)))
)
# clean up no longer needed variables
out[c("attributes","noises")] <- list(NULL)
out
# animal intelligence meow hiss bark
#1 cat medium TRUE TRUE FALSE
#2 dog high FALSE FALSE TRUE
#3 snake low FALSE TRUE FALSE
答案 1 :(得分:2)
这是另一个将socket.on('serverTellPlayerMove', function (count, userData, countEnergy, strEnergy) {
var tempUsers = userData.split(",");
console.log(userData);
var visible = [];
var overall = 0;
for (var i = 0; i < count; i++) {
if ((Number(tempUsers[overall]) == 1) == true) {
visible.push({
p: Number(tempUsers[0 + overall]) == 1,
kills: Number(tempUsers[1 + overall]),
x: Number(tempUsers[2 + overall]),
y: Number(tempUsers[3 + overall]),
angle: Number(tempUsers[4 + overall]),
hue: Number(tempUsers[5 + overall]),
radius: Number(tempUsers[6 + overall]),
name: tempUsers[7 + overall],
dead: parseInt(tempUsers[8 + overall]) == 1,
dying: parseInt(tempUsers[9 + overall]) == 1
});
overall += 10;
} else {
visible.push({
x: Number(tempUsers[0 + overall]),
y: Number(tempUsers[1 + overall]),
angle: Number(tempUsers[2 + overall]),
hue: Number(tempUsers[3 + overall]),
radius: Number(tempUsers[4 + overall]),
name: tempUsers[5 + overall],
dead: Number(tempUsers[6 + overall]) == 1,
dying: Number(tempUsers[7 + overall]) == 1
});
overall += 8;
}
}
console.log(userData);
console.log(visible);
var playerData;
for (var i = 0; i < visible.length; i++) {
if (visible[i].p) {
//console.log("mwahahahahahahahahahahahahahahaha");
playerData = visible[i];
i = visible.length;
}
}
if (userData == "") {} else {
if (playerType == 'player') {
console.log(playerData);
// HOW MUCH YOU MOVED BY:
//console.log(playerData.y);
var xoffset = player.x - playerData.x;
var yoffset = player.y - playerData.y;
player.x = playerData.x;
player.y = playerData.y;
player.angle = playerData.angle;
player.hue = playerData.hue;
player.xoffset = isNaN(xoffset) ? 0 : xoffset;
player.yoffset = isNaN(yoffset) ? 0 : yoffset;
player.dead = playerData.dead;
document.getElementById("killsText").innerHTML = "Kills: " + playerData.kills;
}
}
// DEATH ANIM:
for (var i = 0; i < visible.length; i++) {
if (visible[i].dying) {
createExplosion(visible[i].x, visible[i].y);
}
}
// DEATH ANIM:
for (var i = 0; i < visible.length; i++) {
if (visible[i].dying) {
createExplosion(visible[i].x, visible[i].y);
}
}
users = visible;
var energiesList = [];
var allEnergies = 0;
if (strEnergy != "") {
var tempEnergies = strEnergy.split(",");
for (var i = 0; i < countEnergy; i++) {
energiesList.push({
x: Number(tempEnergies[0 + allEnergies]),
y: Number(tempEnergies[1 + allEnergies]),
radius: Number(tempEnergies[2 + allEnergies]),
index: Number(tempEnergies[3 + allEnergies]),
animate: tempEnergies[4 + allEnergies] === "true",
hue: Number(tempEnergies[5 + allEnergies]),
room: tempEnergies[6 + allEnergies]
});
allEnergies += 7;
}
}
console.log(energiesList);
if (energiesList == "") {} else {
for (var i = 0; i < energiesList.length; ++i) {
var energyT = energiesList[i];
//console.log(energyT);
if (energyT.animate) {
energyT.animScale = 0;
} else {
if (energies[energyT.index] != undefined)
energyT.animScale = energies[energyT.index].animScale;
}
energies[energyT.index] = energyT;
}
}
});
和magrittr
合并为额外的时代精神布朗尼点的提案:
data.table
结束格式符合您的要求:
# Do not simplify to data.frame
str(df <- fromJSON(txt=pets_json, simplifyDataFrame=F))
# The %<>% operator create a pipe and assigns back to the variable
df %<>%
lapply(. %>%
data.table(animal = .$animal,
intelligence = .$attributes$intelligence,
noises = unlist(.$attributes$noises)) %>% # Create a data.table
.[!noises %in% as.character(0:9)] ) %>% # Remove numeric values
rbindlist %>% # Combine into a single data.table
dcast(animal + intelligence ~ paste0("noises.", noises), # Cast the noises variables
value.var = "noises",
fill = 0, # Put 0 instead of NA
fun.aggregate = function(x) 1) # Put 1 instead of noise
现在,您似乎想要对多个属性进行泛化。假设您的数据也具有df
# animal intelligence noises.bark noises.hiss noises.meow
# 1: cat medium 0 1 1
# 2: dog high 1 0 0
# 3: snake low 0 1 0
属性,例如:
colors
然后你可以遵循这个通用代码,这个代码相当丑陋,但应该可以正常工作:
pets_json <- paste0('[{"animal":"cat","attributes":{"intelligence":"medium","noises":[{"noise":"meow","code":4},{"noise":"hiss","code":2}],"colors":[{"color":"brown","code":4},{"color":"white","code":2}]}},',
'{"animal":"dog","attributes":{"intelligence":"high","noises":{"noise":"bark","code":1},"colors":{"color":"brown","code":4}}},',
'{"animal":"snake","attributes":{"intelligence":"low","noises":{"noise":"hiss","code":2},"colors":[{"color":"green","code":4},{"color":"brown","code":4}]}}]')
此解决方案也适用于单个属性,例如# Do not simplify to data.frame
str(df <- fromJSON(txt=pets_json, simplifyDataFrame=F))
# Set up the attributes names
attr.names <- c("noises", "colors")
# The %<>% operator create a pipe and assigns back to the variable
df %<>%
lapply(function(.)
eval(parse(text=paste0(
"data.table(animal = .$animal, ",
"intelligence = .$attributes$intelligence, ",
paste0(attr.names, " = unlist(.$attributes$", attr.names, ")", collapse=", "),
")")))
%>%
.[eval(parse(text=paste("!", attr.names, "%in% as.character(0:9)", collapse = " & ")))] ) %>%
rbindlist
# Cast each variable and merge together
df <- dcast(melt(df, measure.vars=c(attr.names)),
animal + intelligence ~ variable + value, sep=".")
# animal intelligence noises.bark noises.hiss noises.meow colors.brown
# 1: cat medium 0 1 1 1
# 2: dog high 1 0 0 1
# 3: snake low 0 1 0 1
# colors.green colors.white
# 1: 0 1
# 2: 0 0
# 3: 1 0
。