格式化时间序列数据并创建分组条形图

时间:2013-09-10 21:44:47

标签: r time matrix dataframe bar-chart

好的,所以我正在以一种新的方式解决这个问题。我想创建一个分组的条形图,显示每天三个地点的降水量。我的数据存储为.csv文件。这是:

date    strick  huetten hornberg
31.01.2013  18.15   81.25   100
01.02.2013  12.7    11.75   NA
02.02.2013  59.7    61.25   NA
03.02.2013  NA  NA  NA
04.02.2013  NA  NA  NA
05.02.2013  45.25   31.3    NA
06.02.2013  NA  NA  NA
07.02.2013  NA  NA  NA
08.02.2013  NA  NA  NA
09.02.2013  NA  NA  NA
10.02.2013  NA  NA  NA
11.02.2013  NA  NA  NA
12.02.2013  NA  NA  NA
13.02.2013  NA  NA  NA
14.02.2013  NA  NA  NA
15.02.2013  NA  NA  NA
16.02.2013  NA  NA  NA
17.02.2013  NA  NA  NA
18.02.2013  NA  NA  NA
19.02.2013  NA  NA  NA
20.02.2013  NA  NA  NA
21.02.2013  NA  NA  NA
22.02.2013  NA  NA  NA
23.02.2013  NA  NA  NA
24.02.2013  NA  NA  NA
25.02.2013  NA  NA  NA
26.02.2013  NA  NA  NA
27.02.2013  NA  NA  NA
28.02.2013  55.6    NA  NA
01.03.2013  NA  NA  NA
02.03.2013  NA  NA  NA
03.03.2013  NA  NA  NA
04.03.2013  NA  NA  NA
05.03.2013  NA  NA  NA
06.03.2013  NA  NA  NA
07.03.2013  NA  NA  NA
08.03.2013  NA  NA  NA
09.03.2013  7.95    NA  NA
10.03.2013  NA  NA  NA
11.03.2013  9.65    76.2    NA
12.03.2013  1.65    3.35    NA
13.03.2013  NA  NA  NA
14.03.2013  NA  NA  NA
15.03.2013  NA  NA  NA
16.03.2013  NA  NA  NA
17.03.2013  NA  NA  NA
18.03.2013  NA  NA  NA
19.03.2013  NA  NA  NA
20.03.2013  30.2    NA  NA
21.03.2013  12.05   NA  NA
22.03.2013  NA  NA  NA
23.03.2013  NA  NA  NA
24.03.2013  2.15    NA  NA
25.03.2013  0.25    NA  NA
26.03.2013  NA  NA  NA
27.03.2013  NA  NA  NA
28.03.2013  11.4    NA  NA
29.03.2013  NA  NA  NA
30.03.2013  12.25   NA  NA
31.03.2013  6   NA  NA
01.04.2013  5.6 NA  NA
02.04.2013  NA  NA  NA
03.04.2013  NA  NA  NA
04.04.2013  NA  NA  NA
05.04.2013  NA  NA  NA
06.04.2013  NA  NA  NA
07.04.2013  NA  NA  NA
08.04.2013  NA  78.25   NA
09.04.2013  2.9 NA  NA
10.04.2013  15.6    14.25   NA
11.04.2013  11.55   11.15   NA
12.04.2013  34.8    34.75   NA
13.04.2013  9.65    11.1    NA

如何创建分组条形图所需的矩阵/表格,如何使条形图的x轴看起来与此完全相同(我有与图中相同的时间序列): enter image description here

此示例图的代码是:

setwd("path")
rb=read.csv("mean_alllocations1.csv", header=TRUE, sep=";")
rb$DATE<-as.POSIXct(rb$DATE, format = "%d.%m.%Y")
pdf("air_temp_mean_all_locations.pdf", width=12,height=6)
a=c("31.01.2013","07.02.2013", "14.02.2013", "21.02.2013", "28.02.2013", "07.03.2013", "14.03.2013", "21.03.2013", "28.03.2013","04.04.2013", "11.04.2013")
a<-as.POSIXct(a, format = "%d.%m.%Y")
b=c("31.01.2013","01.02.2013","02.02.2013","03.02.2013","04.02.2013","05.02.2013","06.02.2013","07.02.2013","08.02.2013","09.02.2013","10.02.2013","11.02.2013","12.02.2013","13.02.2013","14.02.2013","15.02.2013","16.02.2013","17.02.2013","18.02.2013","19.02.2013","20.02.2013","21.02.2013","22.02.2013","23.02.2013","24.02.2013","25.02.2013","26.02.2013","27.02.2013","28.02.2013","01.03.2013","02.03.2013","03.03.2013","04.03.2013","05.03.2013","06.03.2013","07.03.2013","08.03.2013","09.03.2013","10.03.2013","11.03.2013","12.03.2013","13.03.2013","14.03.2013","15.03.2013","16.03.2013","17.03.2013","18.03.2013","19.03.2013","20.03.2013","21.03.2013","22.03.2013","23.03.2013","24.03.2013","25.03.2013","26.03.2013","27.03.2013","28.03.2013","29.03.2013","30.03.2013","31.03.2013","01.04.2013","02.04.2013","03.04.2013","04.04.2013","05.04.2013","06.04.2013","07.04.2013","08.04.2013","09.04.2013","10.04.2013","11.04.2013","12.04.2013","13.04.2013")
b<-as.POSIXct(b, format = "%d.%m.%Y")
par(mar=c(5,4,0.5,0.5))
plot(rb$DATE, rb$RBGL830_TEMP_MIN, ylim=c(-10,10), xlim = c(min(rb$DATE),max(rb$DATE)), axes = TRUE, "l", xaxt="n", yaxt="n", col="olivedrab2", lwd=1.0, xlab="", ylab="",xaxs="i", panel.first= abline(h = c(-10, -5, 0, 5, 10), col = "grey", lty = 3))
axis(2, c(-10, 10, c(-10, -5, 0, 5, 10)), las=1)
axis(1, at=a, labels=FALSE)
axis(1, at=b, labels=FALSE, tck=-0.01)
text(a, par("usr")[3] - 0.8, srt = 45, adj = 1,labels =format(a, format ="%d.%m.%Y"), xpd = TRUE) 
mtext(1, text="Time", line=4)
mtext(2, text="Mean Daily Air Temperature [°C]", line=2.5)
abline(h = c(20, 15, 10, 5, 0, -5, -10, -15, -20), col = "grey", lty = 2)
lines(rb$DATE, rb$SBGL836_TEMP_MIN, "l", col="limegreen", lwd=1.0)
lines(rb$DATE, rb$SBGL989_TEMP_MIN, "l", col="darkgreen", lwd=1.0)
lines(rb$DATE, rb$SBBF872_TEMP_MIN, "l", col="sienna3", lwd=1.0)
lines(rb$DATE, rb$SBF993_TEMP_MIN, "l", col="sienna4", lwd=1.0)
points(rb$DATE, rb$RBGL830_TEMP_MIN, pch=20, col="olivedrab2")
points(rb$DATE, rb$SBGL836_TEMP_MIN, pch=20, col="limegreen")
points(rb$DATE, rb$SBGL989_TEMP_MIN, pch=20, col="darkgreen")
points(rb$DATE, rb$SBBF872_TEMP_MIN, pch=18, col="sienna3")
points(rb$DATE, rb$SBF993_TEMP_MIN, pch=18, col="sienna4")
legend("bottomright", c("[4] SB Grassland 989 m","[5] SB Grassland 836 m","[8] RB Grassland 830 m","[6] SB Forest 993 m","[7] SBB Forest 872 m"), bty="n" , lwd=c(1.0,1.0,1.0,1.0,1.0), pch=c(20,20,20,18,18), col=c("darkgreen","limegreen","olivedrab2","sienna4","sienna3"))
dev.off()

我手动创建了一个矩阵,并从中创建了一个条形图,但是有一个错误(不应该有“hornberg”的值 - >看看数据)并且用这种方法我不知道x轴是怎么样的作为一个时间序列。

a<-c(18.15,12.7,59.7,NA,NA,45.25,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,55.6,NA,NA,NA,NA,NA,NA,NA,NA,7.95,NA,9.65,1.65,NA,NA,NA,NA,NA,NA,NA,30.2,12.05,NA,NA,2.15,0.25,NA,NA,11.4,NA,12.25,6,5.6,NA,NA,NA,NA,NA,NA,NA,2.9,15.6,11.55,34.8,9.65)
b<-c(81.25,11.75,61.25,NA,NA,31.3,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,76.2,3.35,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,78.25,NA,14.25,11.15,34.75,11.1)
c<-c(NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA)
mymatrix<-matrix(c(a,b,c),73)
colnames(mymatrix)<-c("huetten","strick","hornberg")
rownames(mymatrix)<-c("31.01.2013","01.02.2013","02.02.2013","03.02.2013","04.02.2013","05.02.2013","06.02.2013","07.02.2013","08.02.2013","09.02.2013","10.02.2013","11.02.2013","12.02.2013","13.02.2013","14.02.2013","15.02.2013","16.02.2013","17.02.2013","18.02.2013","19.02.2013","20.02.2013","21.02.2013","22.02.2013","23.02.2013","24.02.2013","25.02.2013","26.02.2013","27.02.2013","28.02.2013","01.03.2013","02.03.2013","03.03.2013","04.03.2013","05.03.2013","06.03.2013","07.03.2013","08.03.2013","09.03.2013","10.03.2013","11.03.2013","12.03.2013","13.03.2013","14.03.2013","15.03.2013","16.03.2013","17.03.2013","18.03.2013","19.03.2013","20.03.2013","21.03.2013","22.03.2013","23.03.2013","24.03.2013","25.03.2013","26.03.2013","27.03.2013","28.03.2013","29.03.2013","30.03.2013","31.03.2013","01.04.2013","02.04.2013","03.04.2013","04.04.2013","05.04.2013","06.04.2013","07.04.2013","08.04.2013","09.04.2013","10.04.2013","11.04.2013","12.04.2013","13.04.2013")
pdf("test.pdf",width=12,height=6)
barplot(mymatrix, beside=TRUE, ylim=c(0,100), col=c("blue","red","black"),las=1)
mtext(2, text="Precipitation [mm/d]", line=2.7)
box()
dev.off()

enter image description here

2 个答案:

答案 0 :(得分:2)

当您使用read.table将文件读取到R时,无需将结果数据帧转换为矩阵。在我的例子中,我假设你的数据框叫做'df'。

# load necessary packages
library(reshape2)
library(ggplot2)
library(scales)

# melt data from wide to long format
df2 <- melt(data = df, id.vars = "date", variable.name = "Location", value.name = "Precipitation")
str(df2)

# change 'date' to class Date
df2$date <- as.Date(df2$date, format = "%d.%m.%Y")

# reorder and rename levels of Location
df2$Location <- factor(df2$Location,
                       levels = c("hornberg", "huetten", "strick"),
                       labels = c("Hornberg", "Huetten", "Strick"))

# plot
ggplot(data = df2, aes(x = date, y = Precipitation, fill = Location)) +
  geom_bar(stat = "identity", position = "dodge") +
  scale_x_date(
    breaks = seq(from = as.Date("2013-01-31"), to = as.Date("2013-04-13"), by = "week"),
    labels = date_format("%d.%m.%Y")) +
  coord_cartesian(ylim = c(0, 105)) +
  xlab("Date") +
  ylab("Precipitation (mm/d)") +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1),
        panel.grid.major.x = element_blank(),
        panel.grid.minor.x = element_blank(),
        panel.grid.major.y = element_line(linetype = "longdash"))

enter image description here

答案 1 :(得分:0)

使用此:

structure(as.matrix(x[,-1]), dimnames=list(as.character(x[,1]),names(x)[-1]))

结果:

           London New_York Moscow
31.01.2013    400      750    390
01.02.2013    350      700    300
02.02.2013    330      730    250
03.02.3013    300      650    250

OBS:使用输入:

x <- read.table(header=TRUE,text="
Date         London   New_York   Moscow    
31.01.2013   400      750        390
01.02.2013   350      700        300
02.02.2013   330      730        250
03.02.3013   300      650        250
")