循环数据框

时间:2018-05-07 00:17:52

标签: r loops for-loop dataframe

我有以下信息:

M_PT

     CEDIS       |              PLAZA        
 9999999021-1    |   10MDA
 9999999021-2    |   10CAN
 9999999012-1    |   10GUD','10CLJ
 9999999012-2    |   10DZV
 9999999025-1    |   10LPB','10HHM','10OBR','10HER
 9999999025-2    |   10DCU

我想最终得到以下结论:

      CEDIS     |  PLAZA       
  9999999021-1  |   10MDA
  9999999021-2  |   10CAN
  9999999012-1  |   10GUD
  9999999012-1  |   10CLJ
  9999999012-2  |   10DZV
  9999999025-1  |   10LPB
  9999999025-1  |   10HHM
  9999999025-1  |   10OBR
  9999999025-1  |   10HER
  9999999025-2  |   10DCU

我试着做了以下但是我失败了:

> vec  <- rep(NA,length(unlist(strsplit(M_PT[,"PLAZA"],split="','"))))
> j <- 0
> 
> for(i in 1:nrow(M_PT)){
+ 
+    if(nchar(M_PT[i,"PLAZA"]) == 5){
+    
+         vec[i] <- paste(M_PT[i,"CEDIS"],M_PT[i,"PLAZA"],sep="-")
+       
+          }else{
+ 
+         for(j in 1:sum(nchar(gsub("','","",M_PT[i,"PLAZA"])) / 5)){
+         
+         vec[i + ifelse(j == 1, 0, j - 1)] <- paste(M_PT[i,"CEDIS"], 
unlist(strsplit(M_PT[i,"PLAZA"],split="','"))[j],sep="-")
+ 
+     }
+   }
+ }

对解决方案有任何想法吗?

2 个答案:

答案 0 :(得分:4)

我们可以使用包中的separate_rows

library(dplyr)
library(tidyr)

dat2 <- dat %>% separate_rows("PLAZA")
dat2
#           CEDIS PLAZA
# 1  9999999021-1 10MDA
# 2  9999999021-2 10CAN
# 3  9999999012-1 10GUD
# 4  9999999012-1 10CLJ
# 5  9999999012-2 10DZV
# 6  9999999025-1 10LPB
# 7  9999999025-1 10HHM
# 8  9999999025-1 10OBR
# 9  9999999025-1 10HER
# 10 9999999025-2 10DCU

数据

dat <- read.table(text = "   CEDIS        PLAZA        
1 '9999999021-1'                             10MDA
                  2 '9999999021-2'                             10CAN
                  3 '9999999012-1'                     '10GUD, 10CLJ'
                  4 '9999999012-2'                             10DZV
                  5 '9999999025-1'       '10LPB, 10HHM, 10OBR, 10HER'
                  6 '9999999025-2'                             10DCU",
                  header = TRUE, stringsAsFactors = FALSE)

答案 1 :(得分:3)

使用unnest,来自www。

的数据
library(dplyr)
library(tidyr)
dat %>%
  transform(COLUMN.NAME.2 = strsplit(COLUMN.NAME.2, ",")) %>%
  unnest(COLUMN.NAME.2)

    COLUMN.NAME COLUMN.NAME.2
1  9999999021-1         10MDA
2  9999999021-2         10CAN
3  9999999012-1         10GUD
4  9999999012-1         10CLJ
5  9999999012-2         10DZV
6  9999999025-1         10LPB
7  9999999025-1         10HHM
8  9999999025-1         10OBR
9  9999999025-1         10HER
10 9999999025-2         10DCU