我正在尝试将此数据框转到R:
tableDataSortedCols <- data.frame(
"groups" = c(1,1,1,1,1,1,
1,1,1,1,1,1,
2,2,2,2,2,2,
2,2,2,2,2,2,
2,2,2,2,2,2,
3,3,3,3,3,3),
"element" = c("A","A","A","A","A","A",
"B","B","B","B","B","B",
"C","C","C","C","C","C",
"D","D","D","D","D","D",
"E","E","E","E","E","E",
"F","F","F","F","F","F"),
"hours" = c(0,4,8,12,16,20,
0,4,8,12,16,20,
0,4,8,12,16,20,
0,4,8,12,16,20,
0,4,8,12,16,20,
0,4,8,12,16,20),
"values" = c(123.0,124.0,123.5,125.0,123.0,123.0,
223.0,224.0,223.5,225.0,223.0,223.0,
223.1,223.1,223.1,223.5,223.1,223.2,
233.1,234.0,233.5,235.0,233.0,233.0,
323.0,324.0,323.5,325.0,323.0,323.0,
523.0,524.0,523.5,525.0,523.0,523.0)
)
对此:
groups element timeseries
1 1 A 123.0 124.0 123.5 125.0 123.0 123.0
2 B 223.0 224.0 223.5 225.0 223.0 223.0
3 2 C 223.1 223.1 223.1 223.5 223.1 223.2
4 D 233.1 234.0 233.5 235.0 233.0 233.0
5 E 323.0 324.0 323.5 325.0 323.0 323.0
6 3 F 523.0 524.0 523.5 525.0 523.0 523.0
将小时和值对合拢/汇总到称为timeseries
的序列中,其中timeseries
序列中的每个元素对应于一个hour
值0h,4h,8h,12h,16h, 20小时。
这是我到目前为止所做的:
collapse_rows_df <- function(df, variable){
group_var <- enquo(variable)
df %>%
group_by(!! group_var) %>%
mutate(groupRow = 1:n()) %>%
ungroup() %>%
mutate(!!quo_name(group_var) := ifelse(groupRow == 1, as.character(!! group_var), "")) %>%
select(-c(groupRow))
}
tableOut <- tableDataSortedCols %>%
group_by(groups) %>%
select(groups, everything()) %>%
distinct %>%
collapse_rows_df(groups) %>%
formattable()
您能提出一种实现此目标的方法吗?
答案 0 :(得分:1)
library(tidyverse)
make_timeseries = function(hours, values) {
paste(values[order(hours)], collapse = " ")
}
tableDataSortedCols %>%
group_by(groups, element) %>%
summarise(timeseries = make_timeseries(hours, values))
答案 1 :(得分:1)
您可以使用dcast()
中的data.table()
。
> dcast.data.table(data.table(tableDataSortedCols), ... ~ hours,
+ value.var=c("values"))
groups element 0 4 8 12 16 20
1: 1 A 123.0 124.0 123.5 125.0 123.0 123.0
2: 1 B 223.0 224.0 223.5 225.0 223.0 223.0
3: 2 C 223.1 223.1 223.1 223.5 223.1 223.2
4: 2 D 233.1 234.0 233.5 235.0 233.0 233.0
5: 2 E 323.0 324.0 323.5 325.0 323.0 323.0
6: 3 F 523.0 524.0 523.5 525.0 523.0 523.0
要合并列,您可以做
library(data.table)
tableDataSortedCols.1 <- dcast.data.table(data.table(tableDataSortedCols),
... ~ hours, value.var=c("values"))
tableDataSortedCols.1 <- as.data.frame(tableDataSortedCols.1) # to get back a data frame.
out <- data.frame(tableDataSortedCols.1[, 1:2],
timeseries=apply(tableDataSortedCols.1[, 3:8], 1,
function(x) paste0(sprintf("%.1f", x),
collapse=" ")))
产量:
> out
groups element timeseries
1 1 A 123.0 124.0 123.5 125.0 123.0 123.0
2 1 B 223.0 224.0 223.5 225.0 223.0 223.0
3 2 C 223.1 223.1 223.1 223.5 223.1 223.2
4 2 D 233.1 234.0 233.5 235.0 233.0 233.0
5 2 E 323.0 324.0 323.5 325.0 323.0 323.0
6 3 F 523.0 524.0 523.5 525.0 523.0 523.0
答案 2 :(得分:0)
library(tidyverse)
tableDataSortedCols %>%
mutate(values = format(values)) %>%
arrange(groups, element, hours) %>%
group_by(groups, element) %>%
spread(hours, values) %>%
unite(timeseries, 3:8, sep = " ")