我正在尝试合并具有不同行数的两个熊猫数据框,也有一些熊猫共有的索引值,而有些不是。我想创建一个组合的数据框。
我正在使用'concat'方法获得结果
library(tidyr)
library(ggplot2)
library(ggiraph)
library(ggdendro)
library(heatmaply)
# mydata <- cor(mtcars)
create_data <- function(){
df <- matrix(runif(2500, min = -2, max = 2), ncol = 50)
row.names(df) <- paste0("row_", seq_len(nrow(df)))
colnames(df) <- paste0("col_", seq_len(ncol(df)))
return(df)
}
gg2heat <- function(mydata){
# dendrogram for rows
hc <- hclust(dist(mydata), "ave")
dhr <- as.dendrogram(hc)
order_r <- rownames(mydata)[hc$order]
# dendrogram for columns
hc <- hclust(dist(t(mydata)), "ave")
dhc <- as.dendrogram(hc)
order_c <- colnames(mydata)[hc$order]
# the data
expr_set <- bind_cols(
data_frame(rowvar = rownames(mydata)),
as.data.frame(mydata)
)
expr_set <- gather(expr_set, colvar, measure, -rowvar)
expr_set$rowvar <- factor( expr_set$rowvar, levels = order_r )
expr_set$colvar <- factor( expr_set$colvar, levels = order_c )
expr_set <- arrange(expr_set, rowvar, colvar)
# get data for dendrograms - IMHO, ggdendro is the hero here...
data_c <- dendro_data(dhc, type = "rectangle")
data_c <- segment(data_c) %>% mutate(
y = y + length(order_r) + .5,
yend = yend + length(order_r) + .5
)
data_r <- dendro_data(dhr, type = "rectangle")
data_r <- segment(data_r)
data_r <- data_r %>%
mutate( x_ = y + length(order_c) + .5,
xend_ = yend + length(order_c) + .5,
y_ = x,
yend_ = xend )
expr_set <- expr_set %>%
mutate(
tooltip = sprintf("Row: %s<br/>Col: %s<br/>measure: %.02f",
rowvar, colvar, measure) ,
data_id = sprintf("%s_%s", rowvar, colvar)
)
# all data are tidy and can be now used with ggplot
p <- ggplot(data = expr_set, aes(x = colvar, y = rowvar) ) +
geom_tile_interactive(aes(fill = measure, tooltip = tooltip, data_id = data_id), colour = "white") +
scale_fill_gradient(low = "white", high = "#BC120A") +
geom_segment(
data = data_c,
mapping = aes(x = x, y = yend, xend = xend, yend = y),
colour = "gray20", size = .2) +
geom_segment(
data = data_r,
mapping = aes(x = x_, y = y_, xend = xend_, yend = yend_),
colour = "gray20", size = .2) +
coord_equal()
# cosmetics
p <- p + theme_minimal() +
theme(
legend.position = "right",
panel.grid.minor = element_line(color = "transparent"),
panel.grid.major = element_line(color = "transparent"),
axis.ticks.length = unit(2, units = "mm"),
plot.title = element_text(face = "bold", hjust = 0.5, size = 12),
axis.title = element_text(size = 9, colour = "gray30"),
axis.text.y = element_text(hjust = 1, size = 5, colour = "gray40"),
axis.text.x = element_text(angle = 90, hjust = 1, size = 5, colour = "gray40"),
legend.title=element_text(face = "bold", hjust = 0.5, size=8),
legend.text=element_text(size=6)
)
ggiraph(ggobj = p)
}
htmp_gg <- c()
htmp_maply <-c()
for (i in 1:20){
df <- create_data()
time_gg <- (system.time(gg2heat(df)))[3]
htmp_gg<- append(htmp_gg, values = time_gg)
time_heatmaply <- (system.time(heatmaply::heatmaply(df, hclust_method = 'ave')))[3]
htmp_maply<- append(htmp_maply, values = time_heatmaply)
rm(df)
}
score <- data.frame(htmp_gg, htmp_maply)%>% gather(key = 'method', value = 'time')
p <- ggplot(score, aes(x = method, y = time, fill = method))+geom_violin()+ stat_summary(fun.y=median, geom="point", size=2, color="black")
print(p)
例如我有以下两个数据帧(df1和df2)
result=pd.concat([df1,df2]).sort_index()
并希望获得此结果:
index value1
2019-01-01 1
2019-02-01 2
2019-03-01 3
2019-04-01 4
2019-05-01 5
2019-12-01 17
index value2
2019-01-01 2
2019-02-01 6
2019-08-01 9
2019-09-01 7.5
2019-10-01 11
但是当我使用上面的“ concat”时,会得到类似的结果,其中常见的索引被复制而不是合并。
index value1 value2
2019-01-01 1 2
2019-02-01 2 6
2019-03-01 3 NaN
2019-04-01 4 NaN
2019-05-01 5 NaN
2019-08-01 NaN 9
2019-09-01 NaN 7.5
2019-10-01 NaN 11
2019-12-01 17 NaN
任何人都可以指出我在这里做错了什么,或者如何获得我想要的结果?