我需要像这样转换数据帧:
> test
day_1 Freq_1 Percent_1 day_2 Freq_2 Percent_2 day_3 Freq_3 Percent_3
1 104 1514 1.720063622 847 23.11370681 4.368660e-02 14 0.004547728 2.068183e-05
2 37 11754 13.353783231 994 16.67422696 3.151551e-02 760 0.009095457 4.136367e-05
3 277 11689 13.279936378 <NA> NA NA 371 0.013643185 6.204550e-05
4 1622 20314 23.078845717 153 5.08996749 9.620412e-03 737 0.018190914 8.272733e-05
5 277 11689 13.279936378 994 16.67422696 3.151551e-02 92 0.022738642 1.034092e-04
6 1622 20314 23.078845717 847 23.11370681 4.368660e-02 534 0.027286370 1.240910e-04
7 1677 96 0.109066121 964 0.17766689 3.358035e-04 302 0.031834099 1.447728e-04
8 37 11754 13.353783231 994 16.67422696 3.151551e-02 185 0.036381827 1.654547e-04
9 1073 1405 1.596228130 <NA> NA NA 51 0.040929556 1.861365e-04
10 1622 20314 23.078845717 847 23.11370681 4.368660e-02 4 0.045477284 2.068183e-04
11 702 20110 22.847080209 <NA> NA NA 289 0.050025013 2.275002e-04
在
| 847 | 14 | 994
104 | 23.1137| 0 | 0
847 | 0 | 0.00454 | 0
37 | 0 | 0 | 16.674
rownames和colnames是天(day_1,day_2,day_3),字符串是分析的第二天的Freq。该行是分析的第一天,第二天是列。 当存在NA以使第二天链接时。
答案 0 :(得分:0)
数据
df <- read.table(text="day_1 Freq_1 Percent_1 day_2 Freq_2 Percent_2 day_3 Freq_3 Percent_3
104 1514 1.720063622 847 23.11370681 4.368660e-02 14 0.004547728 2.068183e-05
37 11754 13.353783231 994 16.67422696 3.151551e-02 760 0.009095457 4.136367e-05
277 11689 13.279936378 <NA> NA NA 371 0.013643185 6.204550e-05
1622 20314 23.078845717 153 5.08996749 9.620412e-03 737 0.018190914 8.272733e-05
277 11689 13.279936378 994 16.67422696 3.151551e-02 92 0.022738642 1.034092e-04
1622 20314 23.078845717 847 23.11370681 4.368660e-02 534 0.027286370 1.240910e-04
1677 96 0.109066121 964 0.17766689 3.358035e-04 302 0.031834099 1.447728e-04
37 11754 13.353783231 994 16.67422696 3.151551e-02 185 0.036381827 1.654547e-04
1073 1405 1.596228130 <NA> NA NA 51 0.040929556 1.861365e-04
1622 20314 23.078845717 847 23.11370681 4.368660e-02 4 0.045477284 2.068183e-04
702 20110 22.847080209 <NA> NA NA 289 0.050025013 2.275002e-04", header=TRUE, stringsAsFactors=FALSE)
尝试以下
library(tidyverse)
df %>%
select(-starts_with("Percent")) %>%
group_by(G = row_number()) %>%
nest %>%
mutate(data = map(data, ~.x %>% gather(key, value))) %>%
unnest %>%
separate(key, "_", into=c("word", "num")) %>%
group_by(G, num) %>%
spread(word, value) %>%
ungroup %>%
mutate(day = as.numeric(day), Freq = as.numeric(Freq)) %>%
filter(!is.na(day)) %>%
group_by(G) %>%
mutate(prev_day = dplyr::lag(day, 1)) %>%
filter(!is.na(prev_day)) %>%
ungroup %>%
select(-G, -num) %>%
group_by(prev_day, row_number()) %>%
spread(day, Freq, fill = 0) %>%
ungroup
# A tibble: 19 x 17
# prev_day `row_number()` `4` `14` `51` `92` `153` `185` `289` `302` `371` `534` `737` `760` `847` `964` `994`
# <dbl> <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
# 1 37 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 16.7
# 2 37 14 0 0 0 0 0 0 0 0 0 0 0 0 0 0 16.7
# 3 104 1 0 0 0 0 0 0 0 0 0 0 0 0 23.1 0 0
# 4 153 7 0 0 0 0 0 0 0 0 0 0 0.0182 0 0 0 0
# 5 277 5 0 0 0 0 0 0 0 0 0.0136 0 0 0 0 0 0
# 6 277 8 0 0 0 0 0 0 0 0 0 0 0 0 0 0 16.7
# 7 702 19 0 0 0 0 0 0 0.0500 0 0 0 0 0 0 0 0
# 8 847 2 0 0.00455 0 0 0 0 0 0 0 0 0 0 0 0 0
# 9 847 11 0 0 0 0 0 0 0 0 0 0.0273 0 0 0 0 0
# 10 847 18 0.0455 0 0 0 0 0 0 0 0 0 0 0 0 0 0
# 11 964 13 0 0 0 0 0 0 0 0.0318 0 0 0 0 0 0 0
# 12 994 4 0 0 0 0 0 0 0 0 0 0 0 0.00910 0 0 0
# 13 994 9 0 0 0 0.0227 0 0 0 0 0 0 0 0 0 0 0
# 14 994 15 0 0 0 0 0 0.0364 0 0 0 0 0 0 0 0 0
# 15 1073 16 0 0 0.0409 0 0 0 0 0 0 0 0 0 0 0 0
# 16 1622 6 0 0 0 0 5.09 0 0 0 0 0 0 0 0 0 0
# 17 1622 10 0 0 0 0 0 0 0 0 0 0 0 0 23.1 0 0
# 18 1622 17 0 0 0 0 0 0 0 0 0 0 0 0 23.1 0 0
# 19 1677 12 0 0 0 0 0 0 0 0 0 0 0 0 0 0.178 0