我有以下数据
df<- structure(list(data1 = c(0.013818378, 0.014362551, 0.014647562,
0.0136627, 0.015510173, 0.006818502, 0.006683564, 0.006655434,
0.006691479, 0.00666666, 0.014507653, 0.017446481, 0.014021427,
0.013963069, 0.020706391, 0.007104358, 0.006809539, 0.006680631,
0.009059533, 0.006681197, 0.015691738, 0.016709763, 0.015761994,
0.016062111, 0.015917196, 0.006816436, 0.006809539, 0.006680631,
0.009059533, 0.006681197), data2 = c(0.045378058, 0.041371486,
0.046058451, 0.040479177, 0.051143336, 0.016131932, 0.014399847,
0.014950329, 0.016408355, 0.015886182, 0.046151342, 0.05265521,
0.046046663, 0.040515428, 0.086865434, 0.019222881, 0.016926183,
0.016703444, 0.081352865, 0.132841645, 0.051641343, 0.059851738,
0.04830957, 0.047550067, 0.049228835, 0.015154055, 0.016926183,
0.016703444, 0.081352865, 0.132841645), time = c(1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 10L, 10L, 10L, 10L, 10L, 10L, 10L,
10L, 10L, 10L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L
), place = structure(c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L,
1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L,
6L, 7L, 8L, 9L, 10L), .Label = c("B02", "B03", "B04", "B05",
"B06", "C02", "C03", "C04", "C05", "C06"), class = "factor")), .Names = c("data1",
"data2", "time", "place"), class = "data.frame", row.names = c(NA,
-30L))
它有几个数据,可以按时间区分
我试图将它们分开并在各种数据框中重新设置它们
除时间和地点之外的每一列都是需要组织的一个数据
例如,对于时间1的data1
B 0.013818378 0.014362551 0.014647562 0.0136627 0.015510173
C 0.006818502 0.006683564 0.006655434 0.006691479 0.00666666
时间10的数据1
B 0.014507653 0.017446481 0.014021427 0.013963069 0.020706391
C 0.007104358 0.006809539 0.006680631 0.009059533 0.006681197
等等
答案 0 :(得分:2)
我们通过在字母和数字之间进行拆分,将separate
“地点”列分为两列,将spread
转换为“宽”格式
library(dplyr)
library(tidyr)
df %>%
separate(place, into = c("grp", "number"), "(?<=[A-Z])(?=[0-9])") %>%
select(-data2) %>%
spread(number, data1)
# time grp 02 03 04 05 06
#1 1 B 0.013818378 0.014362551 0.014647562 0.013662700 0.015510173
#2 1 C 0.006818502 0.006683564 0.006655434 0.006691479 0.006666660
#3 10 B 0.014507653 0.017446481 0.014021427 0.013963069 0.020706391
#4 10 C 0.007104358 0.006809539 0.006680631 0.009059533 0.006681197
#5 17 B 0.015691738 0.016709763 0.015761994 0.016062111 0.015917196
#6 17 C 0.006816436 0.006809539 0.006680631 0.009059533 0.006681197
如果我们想要'{1}}数据集'data1'和'data2'
list
当我们有多个nm1 <- grep("data", names(df), value = TRUE)
nm1 %>%
purrr::map(~ df %>%
select(-one_of(nm1), .x) %>%
separate(place, into = c("grp", "number"), "(?<=[A-Z])(?=[0-9])") %>%
spread(number, .x) )
#[[1]]
# time grp 02 03 04 05 06
#1 1 B 0.013818378 0.014362551 0.014647562 0.013662700 0.015510173
#2 1 C 0.006818502 0.006683564 0.006655434 0.006691479 0.006666660
#3 10 B 0.014507653 0.017446481 0.014021427 0.013963069 0.020706391
#4 10 C 0.007104358 0.006809539 0.006680631 0.009059533 0.006681197
#5 17 B 0.015691738 0.016709763 0.015761994 0.016062111 0.015917196
#6 17 C 0.006816436 0.006809539 0.006680631 0.009059533 0.006681197
#[[2]]
# time grp 02 03 04 05 06
#1 1 B 0.04537806 0.04137149 0.04605845 0.04047918 0.05114334
#2 1 C 0.01613193 0.01439985 0.01495033 0.01640835 0.01588618
#3 10 B 0.04615134 0.05265521 0.04604666 0.04051543 0.08686543
#4 10 C 0.01922288 0.01692618 0.01670344 0.08135286 0.13284165
#5 17 B 0.05164134 0.05985174 0.04830957 0.04755007 0.04922883
#6 17 C 0.01515405 0.01692618 0.01670344 0.08135286 0.13284165
列时,不清楚输出应该如何。来自value
的{{1}}可以处理多个dcast
列
data.table
答案 1 :(得分:1)
你的问题有点不清楚,但我认为这就是你想要的:
library(tidyverse)
df %>%
mutate(
column = str_extract(place, "[0-9]+"),
place = str_extract(place, "[A-Z]")
) %>%
gather(data1, data2, key = "data", value = "val") %>%
spread(column, val) %>%
split(f = .$data)
产生以下格式:
$data1
time place data 02 03 04 05 06
1 1 B data1 0.013818378 0.014362551 0.014647562 0.013662700 0.015510173
3 1 C data1 0.006818502 0.006683564 0.006655434 0.006691479 0.006666660
5 10 B data1 0.014507653 0.017446481 0.014021427 0.013963069 0.020706391
7 10 C data1 0.007104358 0.006809539 0.006680631 0.009059533 0.006681197
9 17 B data1 0.015691738 0.016709763 0.015761994 0.016062111 0.015917196
11 17 C data1 0.006816436 0.006809539 0.006680631 0.009059533 0.006681197
$data2
time place data 02 03 04 05 06
2 1 B data2 0.04537806 0.04137149 0.04605845 0.04047918 0.05114334
4 1 C data2 0.01613193 0.01439985 0.01495033 0.01640835 0.01588618
6 10 B data2 0.04615134 0.05265521 0.04604666 0.04051543 0.08686543
8 10 C data2 0.01922288 0.01692618 0.01670344 0.08135286 0.13284165
10 17 B data2 0.05164134 0.05985174 0.04830957 0.04755007 0.04922883
12 17 C data2 0.01515405 0.01692618 0.01670344 0.08135286 0.13284165