我正在使用XML文件,我设法从中提取了一组我感兴趣的节点:
XMLData <- xmlParse("MX_Test_K_Charge_2-6.xml")
XMLNodes <- getNodeSet(xx,"//Mono")
我得到一组看起来像这样的节点:
[[1]]
<Mono MonoisoMass="3333.7954" SumIntensity="31795.6617" AveragineMass="3334.7653">
<Charged>
<Match Chg="5" MoiM="3332.8416" avgMoiM="3332.7844" MoiMz="667.5756" calcMoiMz="667.56261" MabMz="667.7632" MabInt="5995" Dev="15.65" Scale="1.087"/>
<Match Chg="6" MoiM="3332.7776" avgMoiM="3332.7768" MoiMz="556.4702" calcMoiMz="556.47006" MabMz="556.6372" MabInt="5554" Dev="2.02" Scale="0.999"/>
<Match Chg="4" MoiM="3332.7775" avgMoiM="3332.7774" MoiMz="834.2016" calcMoiMz="834.20145" MabMz="834.4524" MabInt="5613" Dev="2.00" Scale="0.999"/>
<Match Chg="3" MoiM="3332.7760" avgMoiM="3332.7765" MoiMz="1111.9326" calcMoiMz="1111.93284" MabMz="1112.2676" MabInt="5501" Dev="2.17" Scale="1.001"/>
<Match Chg="2" MoiM="3332.7786" avgMoiM="3332.7770" MoiMz="1667.3966" calcMoiMz="1667.39562" MabMz="1667.8966" MabInt="5574" Dev="1.40" Scale="0.999"/>
</Charged>
</Mono>
[[2]]
<Mono MonoisoMass="2667.3004" SumIntensity="24359.4426" AveragineMass="2667.3000">
<Charged>
<Match Chg="2" MoiM="2666.2969" avgMoiM="2666.2968" MoiMz="1334.1557" calcMoiMz="1334.15581" MabMz="1334.6573" MabInt="5596" Dev="5.32" Scale="0.971"/>
<Match Chg="3" MoiM="2666.2970" avgMoiM="2666.2969" MoiMz="889.7730" calcMoiMz="889.77296" MabMz="890.1073" MabInt="5482" Dev="7.28" Scale="0.971"/>
<Match Chg="4" MoiM="2666.2733" avgMoiM="2666.2926" MoiMz="667.5756" calcMoiMz="667.58154" MabMz="667.8324" MabInt="4790" Dev="33.98" Scale="0.854"/>
<Match Chg="5" MoiM="2666.2976" avgMoiM="2666.2976" MoiMz="534.2668" calcMoiMz="534.26669" MabMz="534.4674" MabInt="5437" Dev="7.52" Scale="0.970"/>
<Match Chg="6" MoiM="2666.2977" avgMoiM="2666.2975" MoiMz="445.3902" calcMoiMz="445.39012" MabMz="445.5574" MabInt="5442" Dev="7.62" Scale="0.970"/>
</Charged>
</Mono>
[[3]]
<Mono MonoisoMass="2000.8526" SumIntensity="20204.6338" AveragineMass="1999.8456">
<Charged>
<Match Chg="6" MoiM="1999.8456" avgMoiM="1999.8451" MoiMz="334.3149" calcMoiMz="334.31481" MabMz="334.4820" MabInt="5115" Dev="6.90" Scale="0.954"/>
<Match Chg="5" MoiM="1999.8456" avgMoiM="1999.8452" MoiMz="400.9764" calcMoiMz="400.97632" MabMz="401.1769" MabInt="5040" Dev="6.63" Scale="0.956"/>
<Match Chg="4" MoiM="1999.8457" avgMoiM="1999.8452" MoiMz="500.9687" calcMoiMz="500.96858" MabMz="501.2194" MabInt="5014" Dev="6.88" Scale="0.954"/>
<Match Chg="3" MoiM="1999.8453" avgMoiM="1999.8480" MoiMz="667.6224" calcMoiMz="667.62234" MabMz="667.9603" MabInt="4353" Dev="3.44" Scale="1.010"/>
<Match Chg="2" MoiM="1999.8450" avgMoiM="1999.8447" MoiMz="1000.9298" calcMoiMz="1000.92988" MabMz="1001.4312" MabInt="5030" Dev="4.91" Scale="0.955"/>
</Charged>
</Mono>
attr(,"class")
[1] "XMLNodeSet"
现在我想取这组节点并转换为数据帧,以便有列:MonoisoMass; SumIntensity; AveragineMass;在父节点中作为属性出现
存在于子节点中
如果问题已经提出,我很抱歉,但我发现XML包的文档非常复杂,无法理解。如果您能够以可理解的方式向我推荐一本解释在R中使用XML文件的教程或书籍,我将不胜感激。
答案 0 :(得分:1)
您可以执行以下操作:
require(tidyverse)
require(xml2)
mono <- dat %>% map(xml_find_all, "//Mono")
parent_dat <- mono %>% map(xml_attrs) %>% flatten %>% map(~t(.) %>% as_tibble)
child_dat <- mono %>% map(xml_find_all,"//Match") %>% map(xml_attrs) %>% map(~map_df(., ~t(.) %>% as_tibble))
map2_df(parent_dat, child_dat, cbind) %>% type_convert
给你:
MonoisoMass SumIntensity AveragineMass Chg MoiM avgMoiM MoiMz calcMoiMz MabMz MabInt Dev Scale
1 3333.795 31795.66 3334.765 5 3332.842 3332.784 667.5756 667.5626 667.7632 5995 15.65 1.087
2 3333.795 31795.66 3334.765 6 3332.778 3332.777 556.4702 556.4701 556.6372 5554 2.02 0.999
3 3333.795 31795.66 3334.765 4 3332.778 3332.777 834.2016 834.2015 834.4524 5613 2.00 0.999
4 3333.795 31795.66 3334.765 3 3332.776 3332.776 1111.9326 1111.9328 1112.2676 5501 2.17 1.001
5 3333.795 31795.66 3334.765 2 3332.779 3332.777 1667.3966 1667.3956 1667.8966 5574 1.40 0.999
6 2667.300 24359.44 2667.300 2 2666.297 2666.297 1334.1557 1334.1558 1334.6573 5596 5.32 0.971
7 2667.300 24359.44 2667.300 3 2666.297 2666.297 889.7730 889.7730 890.1073 5482 7.28 0.971
8 2667.300 24359.44 2667.300 4 2666.273 2666.293 667.5756 667.5815 667.8324 4790 33.98 0.854
9 2667.300 24359.44 2667.300 5 2666.298 2666.298 534.2668 534.2667 534.4674 5437 7.52 0.970
10 2667.300 24359.44 2667.300 6 2666.298 2666.298 445.3902 445.3901 445.5574 5442 7.62 0.970
假设您的数据如下所示:
txt1 <- '<Mono MonoisoMass="3333.7954" SumIntensity="31795.6617" AveragineMass="3334.7653">
<Charged>
<Match Chg="5" MoiM="3332.8416" avgMoiM="3332.7844" MoiMz="667.5756" calcMoiMz="667.56261" MabMz="667.7632" MabInt="5995" Dev="15.65" Scale="1.087"/>
<Match Chg="6" MoiM="3332.7776" avgMoiM="3332.7768" MoiMz="556.4702" calcMoiMz="556.47006" MabMz="556.6372" MabInt="5554" Dev="2.02" Scale="0.999"/>
<Match Chg="4" MoiM="3332.7775" avgMoiM="3332.7774" MoiMz="834.2016" calcMoiMz="834.20145" MabMz="834.4524" MabInt="5613" Dev="2.00" Scale="0.999"/>
<Match Chg="3" MoiM="3332.7760" avgMoiM="3332.7765" MoiMz="1111.9326" calcMoiMz="1111.93284" MabMz="1112.2676" MabInt="5501" Dev="2.17" Scale="1.001"/>
<Match Chg="2" MoiM="3332.7786" avgMoiM="3332.7770" MoiMz="1667.3966" calcMoiMz="1667.39562" MabMz="1667.8966" MabInt="5574" Dev="1.40" Scale="0.999"/>
</Charged>
</Mono> '
txt2 <- '<Mono MonoisoMass="2667.3004" SumIntensity="24359.4426" AveragineMass="2667.3000">
<Charged>
<Match Chg="2" MoiM="2666.2969" avgMoiM="2666.2968" MoiMz="1334.1557" calcMoiMz="1334.15581" MabMz="1334.6573" MabInt="5596" Dev="5.32" Scale="0.971"/>
<Match Chg="3" MoiM="2666.2970" avgMoiM="2666.2969" MoiMz="889.7730" calcMoiMz="889.77296" MabMz="890.1073" MabInt="5482" Dev="7.28" Scale="0.971"/>
<Match Chg="4" MoiM="2666.2733" avgMoiM="2666.2926" MoiMz="667.5756" calcMoiMz="667.58154" MabMz="667.8324" MabInt="4790" Dev="33.98" Scale="0.854"/>
<Match Chg="5" MoiM="2666.2976" avgMoiM="2666.2976" MoiMz="534.2668" calcMoiMz="534.26669" MabMz="534.4674" MabInt="5437" Dev="7.52" Scale="0.970"/>
<Match Chg="6" MoiM="2666.2977" avgMoiM="2666.2975" MoiMz="445.3902" calcMoiMz="445.39012" MabMz="445.5574" MabInt="5442" Dev="7.62" Scale="0.970"/>
</Charged>
</Mono> '
dat <- map(list(txt1, txt2), read_xml)