如何在R中将某些行转换为列?

时间:2019-05-24 12:56:03

标签: r

我有一个看起来像这样的数据框:

`Row Labels` Female Male 
  <chr>      <chr>  <chr>
 1 London     <NA>   <NA> 
 2 42         <NA>   1    
 3 Paris      <NA>   <NA> 
 4 36         1      <NA> 
 5 Belgium    <NA>   <NA> 
 6  18                 1
 7 21         <NA>   1    
 8 Madrid    <NA>   <NA> 
 9 20         1      <NA>  
 10 Berlin     <NA>   <NA> 
 11 37         <NA>   1 
 12 23          1
 13 25          1
 14 44          1

我用来生成此数据帧的代码如下:

structure(list(`Row Labels` = c("London", "42", "Paris","36", "Belgium","18" ,"21", "Madrid", "20", "Berlin", "37","23","25","44"), 
Female = c(NA, NA, NA, "1", NA, NA,NA, NA, "1", NA, NA,"1","1","1"), Male = c(NA,"1", NA, NA, NA, "1", NA, NA, NA, "1",NA,NA,NA,NA)), 
.Names = c("Row Labels","Female", "Male"), row.names = c(NA, -14L), class = c("tbl_df", "tbl", "data.frame"))

我想知道如何在此数据框中将多行更改为列。

我的理想输出如下:

'Row Labels' Female Male 42 36 21 20 37  18  23 25 44
 London              1   1
 Paris         1             1
 Belgium       1       1         1        1
Madrid         1                   1
Berlin         3       1              1      1  1   1

2 个答案:

答案 0 :(得分:1)

似乎非常机械。调用数据d

d1 = d[seq(1, nrow(d), by = 2), ]
d2 = d[seq(2, nrow(d), by = 2), ]
d1[, c("Male", "Female")] = d2[, c("Male", "Female")]
d3 = matrix(nrow = nrow(d2), ncol = nrow(d2))
diag(d3) = 1
colnames(d3) = d2$`Row Labels`

cbind(d2, d3)
#   Row Labels Female Male 42 36 21 20 37
# 1         42   <NA>    1  1 NA NA NA NA
# 2         36      1 <NA> NA  1 NA NA NA
# 3         21   <NA>    1 NA NA  1 NA NA
# 4         20      1 <NA> NA NA NA  1 NA
# 5         37   <NA>    1 NA NA NA NA  1

答案 1 :(得分:1)

使用tidyverse

library(dplyr)
library(tidyr)
#cumsum based on country names 
df %>% group_by(gr=cumsum(grepl('\\D+',`Row Labels`))) %>%
       #Sum Female and Male
       mutate_at(vars('Female','Male'), list(~sum(as.numeric(.), na.rm = T))) %>% 
       #Create RL from country name and number where we are at numbers  
       mutate(RL=ifelse(row_number()>1, paste0(first(`Row Labels`),',',`Row Labels`), NA)) %>% 
       filter(!is.na(RL)) %>% 
       select(RL, gr, Male, Female) %>% 
       separate(RL, into = c('RL','Age')) %>% mutate(flag=1) %>% spread(Age, flag) %>% 
       ungroup() %>% select(-gr)


# A tibble: 5 x 12
   RL      Male   Female  `18`  `20`  `21`  `23`  `25`  `36`  `37`  `42`  `44`
  <chr>    <dbl>    <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 Belgium      1        0     1    NA     1    NA    NA    NA    NA    NA    NA
2 Berlin       1        3    NA    NA    NA     1     1    NA     1    NA     1
3 London       1        0    NA    NA    NA    NA    NA    NA    NA     1    NA
4 Madrid       0        1    NA     1    NA    NA    NA    NA    NA    NA    NA
5 Paris        0        1    NA    NA    NA    NA    NA     1    NA    NA    NA