如何将创建列的函数应用于R中的数据框

时间:2016-01-07 11:49:54

标签: r dataframe apply

我尝试在R中的数据框架上进行一些数据争用,但我可以找出为什么我的解决方案不起作用。

auditlog <- data.frame(X_ID=c(1,2,4,5,6,7,8,9,10),
           EVENT=c('create', 'delete', 'update', 'update', 'delete', 'delete', 'create', 
                   'create', 'update'),
           UNIT=c('30xx', '30xx', '30xx', '30xx', '30xx', '30xx', '30xx', '30xx', '30xx'),
           CREATED=c('2015-12-01', '2015-12-01', '2015-12-02', '2015-12-04',
                     '2015-12-05', '2015-12-06', '2015-12-10', '2015-12-10',
                     '2015-12-10'),
           R1=c('xxxxxxxx11', 'title', 'xxxxxxxx25', 'xxxxxxxx11', 'new_title',
                '_title', 'xxxxxxxx12', 'xxxxxxxx87', 'xxxxxxxx87'),
           R2=c('my_title', 'xxxxxxxx47', 'titleA', 'my_title', 'xxxxxxxx11',
                'xxxxxxxx64', 'my_title_24', 'my_title_2', 'my_title_2'),
           R3=c('red', '', 'title_42', 'new_title', '', '', 'green', 'blue', '_title_'),
           R4=c('note', '', '', '', '', '', 'my_important_note', 'yet another note', ''))

以下是我的审核日志摘录:

X_ID    EVENT   UNIT    CREATED     R1          R2          R3          R4
1       create  30xx    2015-12-01  xxxxxxxx11  my_title    red         'note'          
2       delete  30xx    2015-12-01  title       xxxxxxxx47
4       update  30xx    2015-12-02  xxxxxxxx25  titleA      title_42
5       update  30xx    2015-12-04  xxxxxxxx11  my_title    new_title           
6       delete  30xx    2015-12-05  new_title   xxxxxxxx11      
7       delete  30xx    2015-12-06  _title      xxxxxxxx64          
8       create  30xx    2015-12-10  xxxxxxxx12  my_title_24 green       'my_important_note'         
9       create  30xx    2015-12-10  xxxxxxxx87  my_title_2  blue        'yet another note'
10      update  30xx    2015-12-10  xxxxxxxx87  my_title_2  _title_         

我想通过将r1到r4列重新分配给更清晰的列来构建此日志:

X_ID    EVENT   UNIT    CREATED     ELEMENT_ID  TITLE       NEW_TITLE   COLOR   COMMENT
1       create  30xx    2015-12-01  xxxxxxxx11  my_title                red     'note'          
2       delete  30xx    2015-12-01  xxxxxxxx47  title
4       update  30xx    2015-12-02  xxxxxxxx25  titleA      title_42
5       update  30xx    2015-12-04  xxxxxxxx11  my_title    new_title           
6       delete  30xx    2015-12-05  xxxxxxxx11  new_title       
7       delete  30xx    2015-12-06  xxxxxxxx64  _title          
8       create  30xx    2015-12-10  xxxxxxxx12  my_title_24             green   'my_important_note'         
9       create  30xx    2015-12-10  xxxxxxxx87  my_title_2              blue    'yet another note'
10      update  30xx    2015-12-10  xxxxxxxx87  my_title_2  _title_

这是我在R中的方法,我尝试将具有条件的函数应用于每一行:

struct_log <- apply(auditlog, 1, function(row) {
   if (row['EVENT'] == 'create') { row['ELEMNT_ID'] <- row['R1']; row['TITLE'] <- row['R2']; row['COLOR'] <- row['R3']; row['COMMENT'] <- row['R4'] }
   else if (row['EVENT'] == 'delete') { row['TITLE'] <- row['R1']; row['ELEMNT_ID'] <- row['R2'] }
   else if (row['EVENT'] == 'update') { row['ELEMENT_ID'] <- row['R1']; row['TITLE'] <- row['R2']; row['NEW_TITLE'] <- row['R3'] } }
})

现在我不知道为什么我没有收回数据框。

2 个答案:

答案 0 :(得分:1)

(好的,评论后的第二个答案:)

我修改了你的初步方法。首先,您需要在if语句中设置所有列(即包括空白列);第二,你需要return函数结果。以下是整个代码(包括auditlog定义中的修改):

auditlog <- data.frame(X_ID=c(1,2,4,5,6,7,8,9,10),
                   EVENT=c('create', 'delete', 'update', 'update', 'delete', 'delete', 'create', 
                           'create', 'update'),
                   UNIT=c('30xx', '30xx', '30xx', '30xx', '30xx', '30xx', '30xx', '30xx', '30xx'),
                   CREATED=c('2015-12-01', '2015-12-01', '2015-12-02', '2015-12-04',
                             '2015-12-05', '2015-12-06', '2015-12-10', '2015-12-10',
                             '2015-12-10'),
                   R1=c('xxxxxxxx11', 'title', 'xxxxxxxx25', 'xxxxxxxx11', 'new_title',
                        '_title', 'xxxxxxxx12', 'xxxxxxxx87', 'xxxxxxxx87'),
                   R2=c('my_title', 'xxxxxxxx47', 'titleA', 'my_title', 'xxxxxxxx11',
                        'xxxxxxxx64', 'my_title_24', 'my_title_2', 'my_title_2'),
                   R3=c('red', '', 'title_42', 'new_title', '', '', 'green', 'blue', '_title_'),
                   R4=c('note', '', '', '', '', '', 'my_important_note', 'yet another note', ''),
                   stringsAsFactors = FALSE  # added
                   )


ff <- function(row) {
  if (row['EVENT'] == 'create')      { row['ELEMENT_ID'] <- row['R1']; row['TITLE'] <- row['R2']; row['NEW_TITLE'] <- ''; row['COLOR'] <- row['R3']; row['COMMENT'] <- row['R4'] }
  else if (row['EVENT'] == 'delete') { row['ELEMENT_ID'] <- row['R2']; row['TITLE'] <- row['R1']; row['NEW_TITLE'] <- row['COLOR'] <- row['COMMENT'] <- '' }
  else if (row['EVENT'] == 'update') { row['ELEMENT_ID'] <- row['R1']; row['TITLE'] <- row['R2']; row['NEW_TITLE'] <- row['R3']; row['COLOR'] <- row['COMMENT'] <- '' } 
  return(row)  # added
}  

struct_log <- t(apply(auditlog,1,ff))
struct_log <- as.data.frame(struct_log)
new_names <- c("ELEMENT_ID", "TITLE", "NEW_TITLE", "COLOR", "COMMENT")
names(struct_log) <- c(names(auditlog), new_names)
struct_log[c('R1', 'R2', 'R3', 'R4')] <- list(NULL)  # drop 'R' columns

之后你再次拥有:

    X_ID  EVENT UNIT    CREATED ELEMENT_ID       TITLE NEW_TITLE COLOR           COMMENT
1    1 create 30xx 2015-12-01 xxxxxxxx11    my_title             red              note
2    2 delete 30xx 2015-12-01 xxxxxxxx47       title                                  
3    4 update 30xx 2015-12-02 xxxxxxxx25      titleA  title_42                        
4    5 update 30xx 2015-12-04 xxxxxxxx11    my_title new_title                        
5    6 delete 30xx 2015-12-05 xxxxxxxx11   new_title                                  
6    7 delete 30xx 2015-12-06 xxxxxxxx64      _title                                  
7    8 create 30xx 2015-12-10 xxxxxxxx12 my_title_24           green my_important_note
8    9 create 30xx 2015-12-10 xxxxxxxx87  my_title_2            blue  yet another note
9   10 update 30xx 2015-12-10 xxxxxxxx87  my_title_2   _title_             

答案 1 :(得分:0)

使用索引更容易。方法如下,但首先您还应该在stringsAsFactors=FALSE定义中包含auditlog参数:

k <- nrow(auditlog)

# build an empty dataframe:
struct_log <- data.frame(X_ID=numeric(k),
                     EVENT=character(k),
                     UNIT=character(k),
                     CREATED=character(k),
                     ELEMENT_ID=character(k),
                     TITLE=character(k),
                     NEW_TITLE=character(k),
                     COLOR=character(k),
                     COMMENT=character(k),
                     stringsAsFactors=FALSE)

struct_log[,1:4] <- auditlog[,1:4] # keep first 4 columns as-is

ind <- which(auditlog['EVENT']=='create')
struct_log[ind,'ELEMENT_ID'] <- auditlog[ind,'R1']
struct_log[ind,'TITLE'] <- auditlog[ind,'R2']
struct_log[ind,'COLOR'] <- auditlog[ind,'R3']
struct_log[ind,'COMMENT'] <- auditlog[ind,'R4']

ind <- which(auditlog['EVENT']=='delete')
struct_log[ind,'TITLE'] <- auditlog[ind,'R1']
struct_log[ind,'ELEMENT_ID'] <- auditlog[ind,'R2']

ind <- which(auditlog['EVENT']=='update')
struct_log[ind,'ELEMENT_ID'] <- auditlog[ind,'R1']
struct_log[ind,'TITLE'] <- auditlog[ind, 'R2']
struct_log[ind,'NEW_TITLE'] <- auditlog[ind,'R3']

结果符合您的要求:

  X_ID  EVENT UNIT    CREATED ELEMENT_ID       TITLE NEW_TITLE COLOR           COMMENT
1    1 create 30xx 2015-12-01 xxxxxxxx11    my_title             red              note
2    2 delete 30xx 2015-12-01 xxxxxxxx47       title                                  
3    4 update 30xx 2015-12-02 xxxxxxxx25      titleA  title_42                        
4    5 update 30xx 2015-12-04 xxxxxxxx11    my_title new_title                        
5    6 delete 30xx 2015-12-05 xxxxxxxx11   new_title                                  
6    7 delete 30xx 2015-12-06 xxxxxxxx64      _title                                  
7    8 create 30xx 2015-12-10 xxxxxxxx12 my_title_24           green my_important_note
8    9 create 30xx 2015-12-10 xxxxxxxx87  my_title_2            blue  yet another note
9   10 update 30xx 2015-12-10 xxxxxxxx87  my_title_2   _title_                        

不要忘记stringsAsFactors=FALSE包含在auditlog定义中(与我上面对struct_log所做的相似),否则不会工作...