如何从R studio中创建一个从XML中提取的数据框的等值线图

时间:2015-05-06 00:30:23

标签: xml r ggplot2 choropleth

(R全新)我已经下载了一个用于R的XML文件,用于根据数据创建一个等值区域图。我正在使用美国流感数据。从我的研究中我了解到我需要将该XML文件作为R的数据框来读取。所以我做到了。当我查看我的数据框时,我得到了所有的XML格式。我的问题是如何获取我需要的信息并将其提取以创建地图?此时,即使绘制数据,我也会遇到错误。我已经看到了这个信息的高低,我还没有找到它。

 setwd("C:/Users/Steven/Downloads/Map_Final")
> library (XML)
> library(ggplot2)
> library(maps)
> library(plyr)
> library(mapproj)
> map('state')
> 
> xmlfile=xmlParse("flu.xml")
> 
> class(xmlfile)
[1] "XMLInternalDocument" "XMLAbstractDocument"
> ggplot(xmlfile)
Error: ggplot2 doesn't know how to deal with data of class XMLInternalDocumentXMLAbstractDocument
> xmltop = xmlRoot(xmlfile) #gives content of root
> 
> class(xmltop)#"XMLInternalElementNode" "XMLInternalNode" "XMLAbstractNode"
[1] "XMLInternalElementNode" "XMLInternalNode"        "XMLAbstractNode"       
> 
> xmlName(xmltop) #give name of node, PubmedArticleSet
[1] "timeperiod"
> 
> xmlSize(xmltop) #how many children in node, 19
[1] 54
> 
> xmlName(xmltop[[1]]) #name of root's children
[1] "state"
> 
> xmltop[[1]]
<state>
  <abbrev>ME</abbrev>
  <color>No Activity</color>
  <label>No Activity</label>
</state> 
> 
> xmltop[[2]]
<state>
  <abbrev>NH</abbrev>
  <color>Local Activity</color>
  <label>Local Activity</label>
</state> 
> 
> ggplot(xmltop)
Error: ggplot2 doesn't know how to deal with data of class XMLInternalElementNodeXMLInternalNodeXMLAbstractNode
> xmltop[[2]]
<state>
  <abbrev>NH</abbrev>
  <color>Local Activity</color>
  <label>Local Activity</label>
</state> 
> 
> xmltop[[2]]
<state>
  <abbrev>NH</abbrev>
  <color>Local Activity</color>
  <label>Local Activity</label>
</state> 
> 
> birdflu=ldply(xmlToList("flu.xml"), data.frame)
> ggplot(birdflu)
Error: No layers in plot
> View(birdflu)

XML文件:

<timeperiod number="40" year="2014" subtitle="Week Ending October 11, 2014- Week 40">
<state>
<abbrev>ME</abbrev>
<color>No Activity</color>
<label>No Activity</label>
</state>
<state>
<abbrev>NH</abbrev>
<color>Local Activity</color>
<label>Local Activity</label>
</state>
<state>
<abbrev>VT</abbrev>
<color>Sporadic</color>
<label>Sporadic</label>
</state>
<state>
<abbrev>MA</abbrev>
<color>Sporadic</color>
<label>Sporadic</label>
</state>
<state>
<abbrev>RI</abbrev>
<color>No Activity</color>
<label>No Activity</label>
</state>
<state>
<abbrev>CT</abbrev>
<color>Sporadic</color>
<label>Sporadic</label>
</state>
<state>
<abbrev>NY</abbrev>
<color>Sporadic</color>
<label>Sporadic</label>
</state>
<state>
<abbrev>NJ</abbrev>
<color>Sporadic</color>
<label>Sporadic</label>
</state>
<state>
<abbrev>PR</abbrev>
<color>Regional</color>
<label>Regional</label>
</state>
<state>
<abbrev>VI</abbrev>
<color>Sporadic</color>
<label>Sporadic</label>
</state>
<state>
<abbrev>PA</abbrev>
<color>Sporadic</color>
<label>Sporadic</label>
</state>
<state>
<abbrev>DE</abbrev>
<color>No Activity</color>
<label>No Activity</label>
</state>
<state>
<abbrev>MD</abbrev>
<color>No Activity</color>
<label>No Activity</label>
</state>
<state>
<abbrev>DC</abbrev>
<color>Sporadic</color>
<label>Sporadic</label>
</state>
<state>
<abbrev>VA</abbrev>
<color>Sporadic</color>
<label>Sporadic</label>
</state>
<state>
<abbrev>WV</abbrev>
<color>No Activity</color>
<label>No Activity</label>
</state>
<state>
<abbrev>NC</abbrev>
<color>Sporadic</color>
<label>Sporadic</label>
</state>
<state>
<abbrev>SC</abbrev>
<color>Sporadic</color>
<label>Sporadic</label>
</state>
<state>
<abbrev>GA</abbrev>
<color>Sporadic</color>
<label>Sporadic</label>
</state>
<state>
<abbrev>FL</abbrev>
<color>Sporadic</color>
<label>Sporadic</label>
</state>
<state>
<abbrev>KY</abbrev>
<color>No Activity</color>
<label>No Activity</label>
</state>
<state>
<abbrev>TN</abbrev>
<color>No Activity</color>
<label>No Activity</label>
</state>
<state>
<abbrev>AL</abbrev>
<color>Local Activity</color>
<label>Local Activity</label>
</state>
<state>
<abbrev>MS</abbrev>
<color>No Activity</color>
<label>No Activity</label>
</state>
<state>
<abbrev>OH</abbrev>
<color>Sporadic</color>
<label>Sporadic</label>
</state>
<state>
<abbrev>IN</abbrev>
<color>No Activity</color>
<label>No Activity</label>
</state>
<state>
<abbrev>IL</abbrev>
<color>No Activity</color>
<label>No Activity</label>
</state>
<state>
<abbrev>MI</abbrev>
<color>Sporadic</color>
<label>Sporadic</label>
</state>
<state>
<abbrev>WI</abbrev>
<color>Sporadic</color>
<label>Sporadic</label>
</state>
<state>
<abbrev>MN</abbrev>
<color>Sporadic</color>
<label>Sporadic</label>
</state>
<state>
<abbrev>AR</abbrev>
<color>Sporadic</color>
<label>Sporadic</label>
</state>
<state>
<abbrev>LA</abbrev>
<color>Sporadic</color>
<label>Sporadic</label>
</state>
<state>
<abbrev>OK</abbrev>
<color>No Activity</color>
<label>No Activity</label>
</state>
<state>
<abbrev>TX</abbrev>
<color>Sporadic</color>
<label>Sporadic</label>
</state>
<state>
<abbrev>NM</abbrev>
<color>Sporadic</color>
<label>Sporadic</label>
</state>
<state>
<abbrev>IA</abbrev>
<color>No Activity</color>
<label>No Activity</label>
</state>
<state>
<abbrev>MO</abbrev>
<color>No Activity</color>
<label>No Activity</label>
</state>
<state>
<abbrev>NE</abbrev>
<color>No Activity</color>
<label>No Activity</label>
</state>
<state>
<abbrev>KS</abbrev>
<color>No Activity</color>
<label>No Activity</label>
</state>
<state>
<abbrev>ND</abbrev>
<color>Local Activity</color>
<label>Local Activity</label>
</state>
<state>
<abbrev>SD</abbrev>
<color>Sporadic</color>
<label>Sporadic</label>
</state>
<state>
<abbrev>MT</abbrev>
<color>No Activity</color>
<label>No Activity</label>
</state>
<state>
<abbrev>WY</abbrev>
<color>Sporadic</color>
<label>Sporadic</label>
</state>
<state>
<abbrev>CO</abbrev>
<color>Sporadic</color>
<label>Sporadic</label>
</state>
<state>
<abbrev>UT</abbrev>
<color>Sporadic</color>
<label>Sporadic</label>
</state>
<state>
<abbrev>AZ</abbrev>
<color>No Activity</color>
<label>No Activity</label>
</state>
<state>
<abbrev>NV</abbrev>
<color>No Activity</color>
<label>No Activity</label>
</state>
<state>
<abbrev>CA</abbrev>
<color>Sporadic</color>
<label>Sporadic</label>
</state>
<state>
<abbrev>HI</abbrev>
<color>Sporadic</color>
<label>Sporadic</label>
</state>
<state>
<abbrev>GU</abbrev>
<color>Widespread</color>
<label>Widespread</label>
</state>
<state>
<abbrev>ID</abbrev>
<color>No Activity</color>
<label>No Activity</label>
</state>
<state>
<abbrev>WA</abbrev>
<color>Sporadic</color>
<label>Sporadic</label>
</state>
<state>
<abbrev>OR</abbrev>
<color>Sporadic</color>
<label>Sporadic</label>
</state>
<state>
<abbrev>AK</abbrev>
<color>Sporadic</color>
<label>Sporadic</label>
</state>
</timeperiod>

1 个答案:

答案 0 :(得分:2)

下面使用内置(ggplot)美国地图数据的基本注释示例。如果您需要这些地区(似乎有更多的爆发),您需要查看显示其中的其他SO示例(其中有很多)。

library(xml2)
library(dplyr)
library(ggplot2)

# read in the XML file

flu <- read_xml("flu.xml")

# get data from it into data frame

flu_dat <- data_frame(id=flu %>% xml_find_all("//state/abbrev") %>% xml_text,
                      value=flu %>% xml_find_all("//state/color") %>% xml_text)

# for built-in (ggplot) map data we need names, not abbreviations

state_name <- state.name
names(state_name) <- state.abb

us <- map_data("state")

# convert abbrev to name; ensure ordered factor, filter by what the
# built-in plot has. NOTE that if you need the territories, you'll
# need to use another base map of which there are many examples on SO

flu_dat %>% 
  mutate(id=state_name[id],
         Level=factor(value, 
                      levels=c("No Activity", "Local Activity", 
                               "Sporadic", "Regional", "Widespread"),
                      ordered=TRUE)) %>% 
  filter(id %in% unique(us$region))-> flu_dat

us <- fortify(us, region="region")

# for theme_map convenience function
devtools::source_gist("33baa3a79c5cfef0f6df")

gg <- ggplot()
# plot outlines
gg <- gg + geom_map(data=us, map=us,
                    aes(x=long, y=lat, map_id=region, group=group),
                    fill="#ffffff", color="#7f7f7f", size=0.25)
# plot fills based on flu data
gg <- gg + geom_map(data=flu_dat, map=us,
                    aes(fill=Level, map_id=id),
                    color="#7f7f7f", size=0.25)
# manual fill scale showing all possible values on legend
gg <- gg + scale_fill_manual(values=c("#f2f0f7", "#dadaeb", "#bcbddc", 
                                      "#9e9ac8", "#756bb1"), drop=FALSE)
# a proper US projection
gg <- gg + coord_map("albers", lat0=39, lat1=45)
gg <- gg + theme_map()
gg <- gg + theme(legend.position="right")
gg

enter image description here