根据跨多个变量的查找表重新编码

时间:2018-10-15 21:56:33

标签: r dplyr

我正在尝试重新编码许多列,每列具有不同的重新编码规则。据我所知,dplyr::recode()不接受向量。最好的解决方案是tidyverse,而不是一堆嵌套的循环!

这是示例数据和查找表:

x <-structure(list(MAIN = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
                            1L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 1L), PREDDEG = c(3L, 3L, 
                                                                                 3L, 3L, 3L, 3L, 2L, 3L, 3L, 3L, 3L, 2L, 3L, 3L, 2L, 2L, 3L, 1L, 
                                                                                 1L, 2L), HIGHDEG = c(4L, 4L, 4L, 4L, 4L, 4L, 2L, 3L, 4L, 4L, 
                                                                                                      3L, 2L, 3L, 4L, 2L, 2L, 4L, 2L, 1L, 2L), CONTROL = c(1L, 1L, 
                                                                                                                                                           2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 3L, 1L, 1L, 2L, 1L, 
                                                                                                                                                           3L, 1L), not_to_recode = c("asdf", "asdf", "asdf", "asdf", "asdf", 
                                                                                                                                                                                      "asdf", "asdf", "asdf", "asdf", "asdf", "asdf", "asdf", "asdf", 
                                                                                                                                                                                      "asdf", "asdf", "asdf", "asdf", "asdf", "asdf", "asdf")), row.names = c(NA, 
                                                                                                                                                                                                                                                              -20L), class = c("tbl_df", "tbl", "data.frame"))
x
#>    MAIN PREDDEG HIGHDEG CONTROL not_to_recode
#> 1     1       3       4       1          asdf
#> 2     1       3       4       1          asdf
#> 3     1       3       4       2          asdf
#> 4     1       3       4       1          asdf
#> 5     1       3       4       1          asdf
#> 6     1       3       4       1          asdf
#> 7     1       2       2       1          asdf
#> 8     1       3       3       1          asdf
#> 9     1       3       4       1          asdf
#> 10    1       3       4       1          asdf
#> 11    1       3       3       2          asdf
#> 12    1       2       2       1          asdf
#> 13    1       3       3       2          asdf
#> 14    0       3       4       3          asdf
#> 15    1       2       2       1          asdf
#> 16    1       2       2       1          asdf
#> 17    1       3       4       2          asdf
#> 18    1       1       2       1          asdf
#> 19    1       1       1       3          asdf
#> 20    1       2       2       1          asdf


lookup <- structure(list(variable_name = c("MAIN", "MAIN", "PREDDEG", "PREDDEG", "PREDDEG", "PREDDEG", "PREDDEG", "HIGHDEG", "HIGHDEG", "HIGHDEG","HIGHDEG", "HIGHDEG", "CONTROL", "CONTROL", "CONTROL"), 
                         value = c(0, 1, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 1, 2, 3), 
                         label = c("Not main campus", "Main campus", "Not classified", "Predominantly certificate-degree granting", "Predominantly associate's-degree granting", "Predominantly bachelor's-degree granting", "Entirely graduate-degree granting", "Non-degree-granting", "Certificate degree", "Associate degree", "Bachelor's degree", "Graduate degree", "Public", "Private nonprofit", "Private for-profit")), 
                    row.names = c(NA, -15L), class = c("tbl_df", "tbl", "data.frame"))

lookup
#>    variable_name value                                     label
#> 1           MAIN     0                           Not main campus
#> 2           MAIN     1                               Main campus
#> 3        PREDDEG     0                            Not classified
#> 4        PREDDEG     1 Predominantly certificate-degree granting
#> 5        PREDDEG     2 Predominantly associate's-degree granting
#> 6        PREDDEG     3  Predominantly bachelor's-degree granting
#> 7        PREDDEG     4         Entirely graduate-degree granting
#> 8        HIGHDEG     0                       Non-degree-granting
#> 9        HIGHDEG     1                        Certificate degree
#> 10       HIGHDEG     2                          Associate degree
#> 11       HIGHDEG     3                         Bachelor's degree
#> 12       HIGHDEG     4                           Graduate degree
#> 13       CONTROL     1                                    Public
#> 14       CONTROL     2                         Private nonprofit
#> 15       CONTROL     3                        Private for-profit

reprex package(v0.2.1)于2018-10-15创建

1 个答案:

答案 0 :(得分:2)

通过% Main file for ternary plot close all; clear all; clc; A = [... 1.000 0.000 0.000 0.000 1.000 0.000 0.000 0.000 1.000 0.330 0.330 0.340 0.340 0.000 0.660 0.000 0.340 0.660 0.000 0.160 0.840 0.160 0.000 0.840 0.000 0.153 0.847 ]; l=length(A); % A(l+1,:)=[1 0 0 6]; % A(l+2,:)=[0 1 0 30]; % A(l+3,:)=[0 0 1 1]; % ... and the GPR velocity % v=0.29./sqrt(A(:,4)); data = [... 0.0 0.0 0.0 0.419 0.273 0.090 0.014 0.010 0.00 ]; v = data; figure; % Plot the data % First set the colormap (can't be done afterwards) colormap(jet) [hg,htick,hcb]=tersurf(A(:,1),A(:,2),A(:,3),v); % Add the labels hlabels=terlabel('Gas','Water','Oil'); set(hg(:,3),'color','m') set(hg(:,2),'color','c') set(hg(:,1),'color','y') %-- Modify the labels set(hlabels,'fontsize',12) set(hlabels(3),'color','m') set(hlabels(2),'color','c') set(hlabels(1),'color','y') %-- Modify the tick labels set(htick(:,1),'color','y','linewidth',3) set(htick(:,2),'color','c','linewidth',3) set(htick(:,3),'color','m','linewidth',3) %-- Change the colorbar set(hcb,'xcolor','w','ycolor','w') %-- Modify the figure color set(gcf,'color',[0 0 0.3]) %-- Change some defaults set(gcf,'paperpositionmode','auto','inverthardcopy','off') 拆分查找的长格式,并在variable_name内按names对其进行排序

x

然后使用slook <- split(lookup[-1], lookup$variable_name)[names(x)] 进行表查找,仅限于每个变量中的值:

mapply

要解决与该任务不匹配的示例有关的问题:可以将分配限制为名称在 mapply(function(a,b){ b[['label']][match(a, b$value)]}, x, slook) MAIN PREDDEG HIGHDEG CONTROL [1,] "Main campus" "Predominantly bachelor's-degree granting" "Graduate degree" "Public" [2,] "Main campus" "Predominantly bachelor's-degree granting" "Graduate degree" "Public" [3,] "Main campus" "Predominantly bachelor's-degree granting" "Graduate degree" "Private nonprofit" [4,] "Main campus" "Predominantly bachelor's-degree granting" "Graduate degree" "Public" [5,] "Main campus" "Predominantly bachelor's-degree granting" "Graduate degree" "Public" [6,] "Main campus" "Predominantly bachelor's-degree granting" "Graduate degree" "Public" [7,] "Main campus" "Predominantly associate's-degree granting" "Associate degree" "Public" [8,] "Main campus" "Predominantly bachelor's-degree granting" "Bachelor's degree" "Public" [9,] "Main campus" "Predominantly bachelor's-degree granting" "Graduate degree" "Public" [10,] "Main campus" "Predominantly bachelor's-degree granting" "Graduate degree" "Public" [11,] "Main campus" "Predominantly bachelor's-degree granting" "Bachelor's degree" "Private nonprofit" [12,] "Main campus" "Predominantly associate's-degree granting" "Associate degree" "Public" [13,] "Main campus" "Predominantly bachelor's-degree granting" "Bachelor's degree" "Private nonprofit" [14,] "Not main campus" "Predominantly bachelor's-degree granting" "Graduate degree" "Private for-profit" [15,] "Main campus" "Predominantly associate's-degree granting" "Associate degree" "Public" [16,] "Main campus" "Predominantly associate's-degree granting" "Associate degree" "Public" [17,] "Main campus" "Predominantly bachelor's-degree granting" "Graduate degree" "Private nonprofit" [18,] "Main campus" "Predominantly certificate-degree granting" "Associate degree" "Public" [19,] "Main campus" "Predominantly certificate-degree granting" "Certificate degree" "Private for-profit" [20,] "Main campus" "Predominantly associate's-degree granting" "Associate degree" "Public" 对象中存在的名称中的那些列:

lookup

如果您想模拟 x[ , names(slook)] <- mapply( function(a,b){ b[['label']][ # the character label col match(a, b$value) ]}, # lookup x-index in slook # end function call, now the arguments x[names(slook)], # arg matched to `a` slook, #arg gets matched to `b` SIMPLIFY=FALSE) # keep it a list rather than make a matrix > x # A tibble: 20 x 5 MAIN PREDDEG HIGHDEG CONTROL not_to_recode <chr> <chr> <chr> <chr> <chr> 1 Main campus Predominantly bachelor's-degree granting Graduate degree Public asdf 2 Main campus Predominantly bachelor's-degree granting Graduate degree Public asdf 3 Main campus Predominantly bachelor's-degree granting Graduate degree Private nonprofit asdf 4 Main campus Predominantly bachelor's-degree granting Graduate degree Public asdf 5 Main campus Predominantly bachelor's-degree granting Graduate degree Public asdf 6 Main campus Predominantly bachelor's-degree granting Graduate degree Public asdf 7 Main campus Predominantly associate's-degree granting Associate degree Public asdf 8 Main campus Predominantly bachelor's-degree granting Bachelor's degree Public asdf 9 Main campus Predominantly bachelor's-degree granting Graduate degree Public asdf 10 Main campus Predominantly bachelor's-degree granting Graduate degree Public asdf 11 Main campus Predominantly bachelor's-degree granting Bachelor's degree Private nonprofit asdf 12 Main campus Predominantly associate's-degree granting Associate degree Public asdf 13 Main campus Predominantly bachelor's-degree granting Bachelor's degree Private nonprofit asdf 14 Not main campus Predominantly bachelor's-degree granting Graduate degree Private for-profit asdf 15 Main campus Predominantly associate's-degree granting Associate degree Public asdf 16 Main campus Predominantly associate's-degree granting Associate degree Public asdf 17 Main campus Predominantly bachelor's-degree granting Graduate degree Private nonprofit asdf 18 Main campus Predominantly certificate-degree granting Associate degree Public asdf 19 Main campus Predominantly certificate-degree granting Certificate degree Private for-profit asdf 20 Main campus Predominantly associate's-degree granting Associate degree Public asdf 的动作,我相信mapply软件包中的tidyverse轨道提供了类似的功能。具体来说,您应该查看purrr

map2