我在使用 R 将行转换为某些列时遇到问题。我给您提供了示例。我有这样的桌子
datex <- c("01/01/21","02/01/21","03/01/21","04/01/21","05/01/21","06/01/21",
"07/01/21","08/01/21","09/01/21","10/01/21","11/01/21","12/01/21",
"13/01/21","14/01/21","15/01/21","16/01/21","17/01/21","18/01/21",
"19/01/21","20/01/21","21/01/21","22/01/21","23/01/21","24/01/21",
"25/01/21","26/01/21","27/01/21")
values <- c(24,34,11,1,0,11,15,18,0,11,15,18,21,5,5,23,44,32,12,3,4,7,5,4,10,6,6)
x <- data.frame(datex, variables, values)
我想把这张表转换成这张表。
datex <- c("01/01/21","02/01/21","03/01/21","04/01/21","05/01/21","06/01/21",
"07/01/21","08/01/21","09/01/21","10/01/21","11/01/21","12/01/21",
"13/01/21","14/01/21","15/01/21","16/01/21","17/01/21","18/01/21",
"19/01/21","20/01/21","21/01/21","22/01/21","23/01/21","24/01/21",
"25/01/21","26/01/21","27/01/21")
values <- c(24,34,11,1,0,11,15,18,0,11,15,18,21,5,5,23,44,32,12,3,4,7,5,4,10,6,6)
y <- data.frame(datex, values)
谢谢
更新: 这是真实的数据样本。
答案 0 :(得分:1)
这行得通吗:
library(dplyr)
library(tidyr)
df %>% separate_rows(variables) %>%
mutate(names = str_extract(variables, '[A-Z]+')) %>%
pivot_wider(id_cols = c(datex,value), names_from = names, values_from = variables)
# A tibble: 12 x 5
datex value A B C
<date> <dbl> <chr> <chr> <chr>
1 2021-01-01 80 A1 NA NA
2 2021-01-02 38 A2 NA NA
3 2021-01-03 88 A1 B1 NA
4 2021-01-04 72 A1 B1 C1
5 2021-01-05 56 A1 B1 C2
6 2021-01-06 91 A2 NA C1
7 2021-01-07 61 NA B1 NA
8 2021-01-08 81 NA B2 NA
9 2021-01-09 44 NA B2 C1
10 2021-01-10 26 NA B2 C2
11 2021-01-11 26 NA NA C1
12 2021-01-12 71 NA NA C2
使用的数据:
structure(list(datex = structure(c(18628, 18629, 18630, 18631,
18632, 18633, 18634, 18635, 18636, 18637, 18638, 18639), class = "Date"),
variables = c("A1", "A2", "A1,B1", "A1,B1,C1", "A1,B1,C2",
"A2,C1", "B1", "B2", "B2,C1", "B2,C2", "C1", "C2"), value = c(80,
38, 88, 72, 56, 91, 61, 81, 44, 26, 26, 71)), row.names = c(NA,
-12L), class = c("tbl_df", "tbl", "data.frame"))
更新答案:
x %>% separate_rows(variables, sep = ',') %>%
+ mutate(name = str_extract(variables, '.*(?=\\s=\\s.*)')) %>%
+ pivot_wider(id_cols = c(datex,values), names_from = name, values_from = variables) %>%
+ print(n = 50)
# A tibble: 27 x 6
datex values api hostname product_id status_code
<chr> <dbl> <chr> <chr> <chr> <chr>
1 01/01/21 24 api = http://xxx/ NA NA NA
2 02/01/21 34 api = http://xxx/ NA NA NA
3 03/01/21 11 api = http://yyy/ NA NA NA
4 04/01/21 1 api = http://yyy/ NA NA NA
5 05/01/21 0 api = http://xxx/ hostname = digital1 NA NA
6 06/01/21 11 api = http://xxx/ hostname = digital1 NA NA
7 07/01/21 15 api = http://xxx/ hostname = digital2 NA NA
8 08/01/21 18 api = http://xxx/ hostname = digital2 NA NA
9 09/01/21 0 NA hostname = digital1 NA NA
10 10/01/21 11 NA hostname = digital1 NA NA
11 11/01/21 15 NA hostname = digital2 NA NA
12 12/01/21 18 NA hostname = digital2 NA NA
13 13/01/21 21 api = http://xxx/ hostname = digital1 product_id = 0 NA
14 14/01/21 5 api = http://xxx/ hostname = digital1 product_id = 1 NA
15 15/01/21 5 api = http://xxx/ hostname = digital2 product_id = 0 NA
16 16/01/21 23 NA NA product_id = 0 NA
17 17/01/21 44 NA NA product_id = 1 NA
18 18/01/21 32 NA NA product_id = 2 NA
19 19/01/21 12 NA hostname = digital1 product_id = 0 NA
20 20/01/21 3 NA hostname = digital1 product_id = 1 NA
21 21/01/21 4 NA hostname = digital1 product_id = 2 NA
22 22/01/21 7 NA NA NA status_code = 1
23 23/01/21 5 NA NA NA status_code = 2
24 24/01/21 4 NA NA NA status_code = 3
25 25/01/21 10 api = http://xxx/ hostname = digital1 product_id = 0 status_code = 1
26 26/01/21 6 api = http://xxx/ hostname = digital1 product_id = 1 status_code = 1
27 27/01/21 6 api = http://xxx/ hostname = digital2 product_id = 0 status_code = 1
答案 1 :(得分:1)
这可能是更好的解决方案
x %>% separate_rows(variables, sep = ",") %>%
separate(variables, into = c("var_name", "var"), sep = " = ") %>%
pivot_wider(names_from = var_name, values_from = var)
# A tibble: 12 x 5
datex value A B C
<chr> <dbl> <chr> <chr> <chr>
1 01/01/21 80 A1 NA NA
2 02/01/21 38 A2 NA NA
3 03/01/21 88 A1 B1 NA
4 04/01/21 72 A1 B1 C1
5 05/01/21 56 A1 B1 C2
6 06/01/21 91 A2 NA C1
7 07/01/21 61 NA B1 NA
8 08/01/21 81 NA B2 NA
9 09/01/21 44 NA B2 C1
10 10/01/21 26 NA B2 C2
11 11/01/21 26 NA NA C1
12 12/01/21 71 NA NA C2
输出第二个样本
x %>% separate_rows(variables, sep = ",") %>%
separate(variables, into = c("var_name", "var"), sep = " = ") %>%
pivot_wider(names_from = var_name, values_from = var)
# A tibble: 27 x 6
datex values api hostname product_id status_code
<chr> <dbl> <chr> <chr> <chr> <chr>
1 01/01/21 24 http://xxx/ NA NA NA
2 02/01/21 34 http://xxx/ NA NA NA
3 03/01/21 11 http://yyy/ NA NA NA
4 04/01/21 1 http://yyy/ NA NA NA
5 05/01/21 0 http://xxx/ digital1 NA NA
6 06/01/21 11 http://xxx/ digital1 NA NA
7 07/01/21 15 http://xxx/ digital2 NA NA
8 08/01/21 18 http://xxx/ digital2 NA NA
9 09/01/21 0 NA digital1 NA NA
10 10/01/21 11 NA digital1 NA NA
# ... with 17 more rows
完整输出
datex values api hostname product_id status_code
1 01/01/21 24 http://xxx/ <NA> <NA> <NA>
2 02/01/21 34 http://xxx/ <NA> <NA> <NA>
3 03/01/21 11 http://yyy/ <NA> <NA> <NA>
4 04/01/21 1 http://yyy/ <NA> <NA> <NA>
5 05/01/21 0 http://xxx/ digital1 <NA> <NA>
6 06/01/21 11 http://xxx/ digital1 <NA> <NA>
7 07/01/21 15 http://xxx/ digital2 <NA> <NA>
8 08/01/21 18 http://xxx/ digital2 <NA> <NA>
9 09/01/21 0 <NA> digital1 <NA> <NA>
10 10/01/21 11 <NA> digital1 <NA> <NA>
11 11/01/21 15 <NA> digital2 <NA> <NA>
12 12/01/21 18 <NA> digital2 <NA> <NA>
13 13/01/21 21 http://xxx/ digital1 0 <NA>
14 14/01/21 5 http://xxx/ digital1 1 <NA>
15 15/01/21 5 http://xxx/ digital2 0 <NA>
16 16/01/21 23 <NA> <NA> 0 <NA>
17 17/01/21 44 <NA> <NA> 1 <NA>
18 18/01/21 32 <NA> <NA> 2 <NA>
19 19/01/21 12 <NA> digital1 0 <NA>
20 20/01/21 3 <NA> digital1 1 <NA>
21 21/01/21 4 <NA> digital1 2 <NA>
22 22/01/21 7 <NA> <NA> <NA> 1
23 23/01/21 5 <NA> <NA> <NA> 2
24 24/01/21 4 <NA> <NA> <NA> 3
25 25/01/21 10 http://xxx/ digital1 0 1
26 26/01/21 6 http://xxx/ digital1 1 1
27 27/01/21 6 http://xxx/ digital2 0 1