我正在将复杂的XML文件转换为数据帧。
这种方法存在两个问题:
这是它的简化版本:
require(xml2)
xml_data = "
<top>
<line>
<one>1</one>
</line>
<line>
<one>1</one>
<two>2</two>
</line>
<line>
<one>1</one>
</line>
</top>
"
data2 <- read_xml(file)
df <- data.frame(
#purchase
one=xml_text(xml_find_all(data2, ".//line/one")),
two=xml_text(xml_find_all(data2, ".//line/two")),
sum1 = one + two
)
答案 0 :(得分:2)
在我写完评论后,我意识到实际的搜索努力是不可能的:
require(xml2)
library(purrr)
library(dplyr)
xml_data = "
<top>
<line>
<one>1</one>
</line>
<line>
<one>1</one>
<two>2</two>
</line>
<line>
<one>1</one>
</line>
</top>
"
data2 <- read_xml(xml_data)
xml_find_all(data2, ".//line") %>%
map_df(function(x) {
one <- xml_find_all(x, ".//one") %>% xml_text() %>% as.numeric()
two <- xml_find_all(x, ".//two") %>% xml_text() %>% as.numeric()
if (length(two) == 0) two <- NA_integer_
data_frame(one, two, sum=sum(one, two, na.rm=TRUE))
})
## # A tibble: 3 × 3
## one two sum
## <dbl> <dbl> <dbl>
## 1 1 NA 1
## 2 1 2 3
## 3 1 NA 1