我有一个xml文件的两个问题。首先是重复的列名(“timestamp”和“id”),data.table将它们放在一列中,而不是将它们分隔在不同的列中。其次,下面的data.table示例产生了许多NA,应该用值的代表填充。
<Node1 timestamp="start">
<Node2 id="1110" Value1="345">
<Node3 id="500" timestamp="1">
<Node4 id="484663" Value2="130" Value3="1,2,3" />
<Node4 id="253234" Value2="59" Value3="1,2,3" />
<Node4 id="198476" Value2="131" Value3="1,2,3" />
</Node3>
<Node3 id="501" timestamp="2">
<Node4 id="305943" Value2="444" Value3="1,2,3" />
</Node3>
<Node3 id="601" timestamp="5">
</Node3>
<Node3 id="113" timestamp="3">
<Node4 id="2009343" Value2="555" Value3="1,2,3" />
<Node4 id="2530931" Value2="333" />
<Node4 id="1984761" Value2="111" Value3="1,2,3" />
</Node3>
</Node2>
</Node1>
我使用以下行来获取数据框。但是有很多NA,“id”和“timestamp”值混合在一列中。如何强制data.table生成三个id cols并重复值,而不是将NA放入并写入?
library(data.table)
library(XML)
# test.xml = the xml-file
test <- xmlTreeParse("test.xml", useInternalNodes=TRUE)
Node1 <- rbindlist(lapply(test["//*"], function(x)as.list(xmlAttrs(x))), fill = TRUE, use.names = TRUE)
结果应如下所示..
timestamp id Value1 id timestamp id Value2 Value3
start 1110 345 500 1 484663 130 1,2,3
start 1110 345 500 1 253234 59 1,2,3
start 1110 345 500 1 198476 131 1,2,3
start 1110 345 501 2 305943 444 1,2,3
start 1110 345 601 5 NA NA NA
start 1110 345 113 3 2009343 555 1,2,3
start 1110 345 113 3 2530931 333 NA
start 1110 345 113 3 1984761 111 1,2,3
答案 0 :(得分:0)
循环遍历节点集并将其传递给xmlAncestors
,node4
会将节点集从node1
遍历到xmlAttrs()
并使用node4
获取每个级别的属性值层次结构。如果没有node4
,则将忽略整个遍历树。完成node3
后,再次向同一个节点执行相同操作 - a1 <- xpathSApply(doc, "//Node4", xmlAncestors, xmlAttrs)
a1_len <- lengths( a1 )
nm <- make.unique( names( a1[[ which( a1_len == max( a1_len ))[1] ]] ) )
a1 <- lapply( a1, function( x ) {
require('data.table')
nm_x <- make.unique( names(x) )
if( ! all( nm %in% nm_x ) ) {
x [ (nm [ which( ! nm %in% nm_x ) ]) ] <- NA
}
x <- cbind.data.frame( x, stringsAsFactors = FALSE )
colnames( x ) <- make.unique( colnames( x ) )
setDT(x)
setcolorder( x, nm )
return( x )
})
a1 <- rbindlist( a1 )
a1
# timestamp id Value1 id.1 timestamp.1 id.2 Value2 Value3
# 1: start 1110 345 500 1 484663 130 1,2,3
# 2: start 1110 345 500 1 253234 59 1,2,3
# 3: start 1110 345 500 1 198476 131 1,2,3
# 4: start 1110 345 501 2 305943 444 1,2,3
# 5: start 1110 345 113 3 2009343 555 1,2,3
# 6: start 1110 345 113 3 2530931 333 NA
# 7: start 1110 345 113 3 1984761 111 1,2,3
并将两个数据合并在一起。
从node4开始工作
b1 <- t( xpathSApply(doc, "//Node3", xmlAncestors, xmlAttrs) )
b1 <- data.frame( matrix( unlist( b1 ),
nrow = nrow(b1),
ncol = ncol(b1),
dimnames = list( NULL, colnames( b1 ) ) ),
stringsAsFactors = FALSE )
b1
# timestamp id Value1 id.1 timestamp.1
# 1 start 1110 345 500 1
# 2 start 1110 345 501 2
# 3 start 1110 345 601 5
# 4 start 1110 345 113 3
从Node3开始工作
merge(a1, b1, all = TRUE)
# timestamp id Value1 id.1 timestamp.1 id.2 Value2 Value3
# 1: start 1110 345 113 3 2009343 555 1,2,3
# 2: start 1110 345 113 3 2530931 333 NA
# 3: start 1110 345 113 3 1984761 111 1,2,3
# 4: start 1110 345 500 1 484663 130 1,2,3
# 5: start 1110 345 500 1 253234 59 1,2,3
# 6: start 1110 345 500 1 198476 131 1,2,3
# 7: start 1110 345 501 2 305943 444 1,2,3
# 8: start 1110 345 601 5 NA NA NA
合并a1和b1:
library('XML')
doc <- xmlParse(' <Node1 timestamp="start">
<Node2 id="1110" Value1="345">
<Node3 id="500" timestamp="1">
<Node4 id="484663" Value2="130" Value3="1,2,3" />
<Node4 id="253234" Value2="59" Value3="1,2,3" />
<Node4 id="198476" Value2="131" Value3="1,2,3" />
</Node3>
<Node3 id="501" timestamp="2">
<Node4 id="305943" Value2="444" Value3="1,2,3" />
</Node3>
<Node3 id="601" timestamp="5">
</Node3>
<Node3 id="113" timestamp="3">
<Node4 id="2009343" Value2="555" Value3="1,2,3" />
<Node4 id="2530931" Value2="333" />
<Node4 id="1984761" Value2="111" Value3="1,2,3" />
</Node3>
</Node2>
</Node1> ')
数据:强>
$('#open').click(function()
{
$(this).find('i').toggleClass('glyphicon-align-justify').toggleClass('glyphicon-remove');
$('.sidebar').toggleClass('actives');
$(".backdrop").toggle();
})
$(document).click(function(e)
{
if($(".sidebar.actives").length > 0)
{
var sidebar = $(".sidebar, #open");
if(!sidebar.is(e.target) && sidebar.has(e.target).length === 0)
{
$('#open').find('i').toggleClass('glyphicon-align-justify').toggleClass('glyphicon-remove');
sidebar.removeClass('actives');
$(".backdrop").hide();
}
}
});