Question

我有以下任务： - 从URL Excel文件中删除2个选项卡（包含每小时数据） - 在一个标签中，我只对16:00的数据点感兴趣，因此对其进行过滤 - 在第二个标签中，我对所有数据点感兴趣，但为了对齐，我创建24小时值的每日平均值

然后，我使用某种cbind.fill函数，因为我想合并数据。这是必要的，因为我当然只在16:00得到我的16:00并且在16:00之前无法使用CBIND，因为我在聚合标签中只有一条线。

然后我做一些变量过滤和重命名。

在绘图之前，我使用融合功能来创建某种子图。这可能是问题所在，我在运行代码时也会收到警告。我不明白为什么，但我很难将等级转换为数字字符。

我仍然可以在最后绘制，但x轴看起来很可怕。这实际上并不是一项艰巨的任务，但我无处可去。如果你能给我一个提示，我会很感激。

提前致谢！

代码：

is.installed <- function(mypkg){
  is.element(mypkg, installed.packages()[,1])
} 
if (!is.installed("ggplot2")){
  install.packages("ggplot2")
}
if (!is.installed("lubridate")){
  install.packages("lubridate")
}
if (!is.installed("openxlsx")){
  install.packages("openxlsx")
}
library(ggplot2)
library(lubridate)
library(openxlsx)
library(reshape2)
library(dplyr)

Storico_G <- read.xlsx(xlsxFile = "http://www.snamretegas.it/repository/file/Info-storiche-qta-gas-trasportato/dati_operativi/2017/DatiOperativi_2017-IT.xlsx",sheet = "Storico_G", startRow = 1, colNames = TRUE)

Storico_G1 <- read.xlsx(xlsxFile = "http://www.snamretegas.it/repository/file/Info-storiche-qta-gas-trasportato/dati_operativi/2017/DatiOperativi_2017-IT.xlsx",sheet = "Storico_G+1", startRow = 1, colNames = TRUE)

# Selecting Column C,E,R from Storico_G and stored in variable Storico_G_df
# Selecting Column A,P from Storico_G+1 and stored in variable Storico_G1_df

Storico_G_df <- data.frame(Storico_G$pubblicazione,Storico_G$IMMESSO, Storico_G$`RICONSEGNATO.(1)`, Storico_G$BILANCIAMENTO.RESIDUALE )
Storico_G1_df <- data.frame(Storico_G1$pubblicazione, Storico_G1$`SBILANCIAMENTO.ATTESO.DEL.SISTEMA.(SAS)`)


# Conerting pubblicazione in date format and time
Storico_G_df$pubblicazione <- ymd_h(Storico_G_df$Storico_G.pubblicazione)
Storico_G1_df$pubblicazione   <- ymd_h(Storico_G1_df$Storico_G1.pubblicazione)


# Selecting on row which is having 4PM value in Storico_G+1 excel sheet tab
Storico_G1_df <- subset(Storico_G1_df, hour(Storico_G1_df$pubblicazione) == 16)
rownames(Storico_G1_df) <- 1:nrow(Storico_G1_df)

# Averaging hourly values to 1 daily data point in G excel sheet tab
Storico_G_df$Storico_G.pubblicazione <- strptime(Storico_G_df$Storico_G.pubblicazione, "%Y_%m_%d_%H")
storico_G_df_agg <- aggregate(Storico_G_df, by=list(day=format(Storico_G_df$Storico_G.pubblicazione, "%F")), FUN=mean, na.rm=TRUE)[,-2]

#cbind.fill function
cbind.fill <- function(...){
    nm <- list(...) 
    nm <- lapply(nm, as.matrix)
    n <- max(sapply(nm, nrow)) 
    do.call(cbind, lapply(nm, function (x) 
        rbind(x, matrix(, n-nrow(x), ncol(x))))) 
}

#cbind with both frames
G_G1_df= data.frame(cbind.fill(storico_G_df_agg,Storico_G1_df))

#keep required columns
keep=c("Storico_G.IMMESSO","Storico_G..RICONSEGNATO..1..","Storico_G1..SBILANCIAMENTO.ATTESO.DEL.SISTEMA..SAS..")

#update dataframe to kept variables
G_G1_df=G_G1_df[,keep,drop=FALSE]

#add counting variable
G_G1_df$X=seq(nrow(G_G1_df))

#Rename crazy variable names
G_G1_df <- data.frame(G_G1_df) %>% 
     select(Storico_G.IMMESSO, Storico_G..RICONSEGNATO..1.., Storico_G1..SBILANCIAMENTO.ATTESO.DEL.SISTEMA..SAS.., X)
names(G_G1_df) <- c("Immesso","Riconsegnato", "SAS","X")

#Melt time series
G_G1_df=melt(G_G1_df,id.vars = "X")

#Create group variable
G_G1_df$group<- ifelse(G_G1_df$variable == "SAS", "SAS", "Immesso/Consegnato")

#plot
ggplot(G_G1_df, aes(X,value, col=variable))+geom_point()+geom_line()+facet_wrap(~group,ncol=1,scales="free_y")+geom_abline(intercept=c(-2,0,2),slope=0,data=subset(G_G1_df,group=="SAS"),lwd=0.5,lty=2)

Answer 1

除了数据争论部分（肯定会有很大改进）之外，情节问题在于value列属于character类。（这来自数据争论部分）

作为一个简单的解决方案，您只需将其转换为ggplot调用中的数字：

library(ggplot2)

ggplot(G_G1_df, aes(X,as.numeric(value), col=variable))+
    geom_point()+
    geom_line()+
    facet_wrap(~group,ncol=1,scales="free_y")+
    geom_abline(intercept=c(-2,0,2),slope=0,data=subset(G_G1_df,group=="SAS"),lwd=0.5,lty=2)
#> Warning: Removed 1 rows containing missing values (geom_point).
#> Warning: Removed 1 rows containing missing values (geom_path).

美学不对，可能与融化功能有关

1 个答案: