如何按比例按键合并或填充数据

时间:2018-04-05 05:39:52

标签: r merge data.table

我有两个data.table

 Table A
     ID        DATE   TYPE
 1:  36111468       0    A
 2:  93908095       0    A
 3:  68889156       0    B
 4:  94391053       0    B
 5:  66486300       0    C
 6:  48316003       0    C
 7:  30945034       0    C
 8:  93530960       0    C
 9:  50298782       0    D
10:  61124841       0    D
11:  73185741       0    D
12:  61584842       0    D  
13:  65174431       0    D
14:  69089847       0    D
15:  31180801       0    E
16:  41084849       0    E

TABLE B
    ID       DATE  TYPE 
1:  30341961 20031124 A
2:  53682621 20041023 B
3:  43370733 20040119 C
4:  61728911 20040909 C
5:  79482564 20020107 D  
6:  81995262 20040609 D 
7:  23256429 20021029 D
8:  84849044 20020401 E   

我想填写表A中的DATE(现在为0) 表B中的DATE键按TYPE比例1:2。

并且像这样rbind两个表

     ID        DATE   TYPE
 1:  36111468 20031124 A
 2:  93908095 20031124 A
 3:  68889156 20041023 B
 4:  94391053 20041023 B
 5:  66486300 20040119 C
 6:  48316003 20040119 C
 7:  30945034 20040909 C
 8:  93530960 20040909 C
 9:  50298782 20020107 D
10:  61124841 20020107 D
11:  73185741 20040609 D
12:  61584842 20040609 D  
13:  65174431 20021029 D
14:  69089847 20021029 D
15:  31180801 20020401 E
16:  41084849 20020401 E
17:  30341961 20031124 A
18:  53682621 20041023 B
19:  43370733 20040119 C
20:  61728911 20040909 C
21:  79482564 20020107 D  
22:  81995262 20040609 D 
23:  23256429 20021029 D
24:  84849044 20020401 E   

我更喜欢data.table而不是dplyr 如果可能,请将其描述为data.table。

2 个答案:

答案 0 :(得分:0)

如果表格Bdata.table已经df <- merge(A[,-'DATE'], B[,-'ID'], by = 'TYPE', allow.cartesian = TRUE) setcolorder(df, c('ID', 'DATE', 'TYPE')) df ,那么您可以这样做:

#define NAME_LEN 30

struct equipment{
    char type[NAME_LEN+1];
    char description[NAME_LEN+1];
    int quantity;
    struct equipment *next;
};

struct equipment *append_to_list(struct equipment *list){
  char type[NAME_LEN + 1], description[NAME_LEN + 1];
  int quantity;

  printf("Enter equipment type: ");
  fgets(type, NAME_LEN, stdin);

  printf("Enter description of the equipment: ");
  fgets(description, NAME_LEN, stdin);

  printf("Enter quantity: ");
  scanf("%d", &quantity);

  struct equipment *temp = (struct equipment *)malloc(sizeof(struct equipment));

  strcpy(temp->type, type);
  strcpy(temp->description, description);
  temp->quantity = quantity;
  temp->next = NULL;

  bool doesExist = false;

  if ( list == NULL ){
    list = temp;
  }
  else{
    while ( list->next != NULL ){
      if ( list == temp ){
        printf("This equipment is already in the list\n");
      }
      list = list->next;
    }
    list->next = temp;
  }
  // return head of this list here;
}

答案 1 :(得分:0)

您可以在加入之前创建索引,如下所示:

#same index for every pair of the same TYPE
dtA[, IDX := rep(seq_len(.N/2), each=2), by=TYPE]

#an index for each row within a particular TYPE
dtB[, IDX := seq_len(.N), by=TYPE]

#perform a lookup
dtA[dtB, DATE := i.DATE , on=.(TYPE, IDX)]

#rbind 2 tables
rbindlist(list(dtA, dtB), use.names=TRUE)

数据:

library(data.table)
dtA <- fread("ID        DATE   TYPE
36111468       0    A
93908095       0    A
68889156       0    B
94391053       0    B
66486300       0    C
48316003       0    C
30945034       0    C
93530960       0    C
50298782       0    D
61124841       0    D
73185741       0    D
61584842       0    D  
65174431       0    D
69089847       0    D
31180801       0    E
41084849       0    E")

dtB <- fread("ID       DATE  TYPE 
30341961 20031124 A
53682621 20041023 B
43370733 20040119 C
61728911 20040909 C
79482564 20020107 D  
81995262 20040609 D 
23256429 20021029 D
84849044 20020401 E")