删除r中包含整数的列

时间:2019-10-20 20:05:24

标签: r

我有以下数据:

# A tibble: 49 x 9
   date           Y    X1    X2    X3    X4     X5       X6 ID   
   <date>     <dbl> <dbl> <dbl> <dbl> <dbl>  <dbl>    <dbl> <chr>
 1 2016-10-21     1  4.14    18     0     1 0.0770 0.000429 CAT1 
 2 2016-10-24     1  4.14    17     0     1 0.0793 0.000424 CAT1 
 3 2016-10-25     0  4.16    16     0     1 0.0804 0.000404 CAT1 
 4 2016-10-26     0  4.16    15     0     1 0.0626 0.000426 CAT1 
 5 2016-10-27     0  4.16    14     0     1 0.0587 0.000442 CAT1 
 6 2016-10-28     0  4.11    13     0     1 0.0610 0.000440 CAT1 
 7 2016-10-31     0  4.14    12     0     1 0.0704 0.000425 CAT1 
 8 2016-11-01     1  3.98    11     0     1 0.0867 0.000417 CAT1 
 9 2016-11-02     0  6.00    10     0     1 0.0673 0.000453 CAT1 
10 2016-11-03     0  8.42    10     0     1 0.0877 0.000429 CAT1 
# ... with 39 more rows

我想删除所有包含整数的列-除了Y -。

# A tibble: 49 x 6
   date           Y    X1     X5       X6 ID   
   <date>     <dbl> <dbl>  <dbl>    <dbl> <chr>
 1 2016-10-21     1  4.14 0.0770 0.000429 CAT1 
 2 2016-10-24     1  4.14 0.0793 0.000424 CAT1 
 3 2016-10-25     0  4.16 0.0804 0.000404 CAT1 
 4 2016-10-26     0  4.16 0.0626 0.000426 CAT1 
 5 2016-10-27     0  4.16 0.0587 0.000442 CAT1 
 6 2016-10-28     0  4.11 0.0610 0.000440 CAT1 
 7 2016-10-31     0  4.14 0.0704 0.000425 CAT1 
 8 2016-11-01     1  3.98 0.0867 0.000417 CAT1 
 9 2016-11-02     0  6.00 0.0673 0.000453 CAT1 
10 2016-11-03     0  8.42 0.0877 0.000429 CAT1 
# ... with 39 more rows

数据:

df <- structure(list(date = structure(c(17095, 17098, 17099, 17100, 
17101, 17102, 17105, 17106, 17107, 17108, 17109, 17112, 17113, 
17114, 17115, 17116, 17119, 17120, 17121, 17122, 17123, 17126, 
17127, 17128, 17130, 17133, 17134, 17135, 17136, 17137, 17140, 
17141, 17142, 17143, 17144, 17147, 17148, 17149, 17150, 17151, 
17154, 17155, 17156, 17157, 17158, 17162, 17163, 17164, 17165
), class = "Date"), Y = c(1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 
1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 
1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0), X1 = c(4.13858526513854, 
4.13858526513855, 4.16341131085939, 4.16341131085937, 4.16341131085937, 
4.11423119297315, 4.13858526513857, 3.97599968560627, 5.99758130881283, 
8.41953801047614, 7.95231443679086, 7.88558780320248, 7.6408950559188, 
4.51370117323327, 4.52868963859669, 4.39998987943623, 4.18852747359839, 
4.27042958796773, 2.30720560360487, 2.3083029424251, 2.3083029424251, 
2.30720560360488, 2.30720560360486, 2.33467572807035, 2.33467572807036, 
2.33467572807036, 2.30720560360486, 2.33467572807035, 2.31545097851707, 
2.3399413414153, 2.40685890963718, 2.40309440701756, 2.33633188340289, 
2.3363318834029, 2.23996107961566, 2.23996107961567, 2.23996107961566, 
3.14644299189703, 3.1910343925295, 3.4393907031427, 3.30413087760388, 
3.33080017630688, 2.63827508869038, 2.99443088216722, 2.99443088216723, 
2.99443088216722, 2.99443088216722, 3.14542139469794, 3.14542139469794
), X2 = c(18, 17, 16, 15, 14, 13, 12, 11, 10, 10, 10, 10, 10, 
10, 10, 10, 10, 10, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 
18, 17, 16, 15, 14, 13, 12, 11, 10, 10, 10, 10, 10, 87, 86, 85, 
84, 83, 82, 81), X3 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), X4 = c(1, 1, 1, 
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
1, 1, 1, 1), X5 = c(0.0769944316144198, 0.0793311568823971, 0.0803743155230278, 
0.0625789438826206, 0.0586506192715035, 0.0610303101083243, 0.0703939970975855, 
0.0867098273608016, 0.0673120522212106, 0.0877296725069155, 0.0923742675361241, 
0.0821050363880187, 0.0360531976099817, 0.063410467337928, 0.0289807505667197, 
0.0403890038946993, 0.0587200889534704, 0.124855015077667, 0.0636602113103218, 
0.0748772236055617, 0.0828248414842617, 0.0588561607897347, 0.0437146614571738, 
0.0399432627968126, 0.0535895503558405, 0.0538598239712004, 0.0600971764378981, 
0.0543532803438423, 0.0412119504402689, 0.0445593481900316, 0.0471324573693227, 
0.0366910541674913, 0.0412784111781792, 0.0338162772274317, 0.0391189676384125, 
0.051151724195942, 0.0442197922283997, 0.0458769828703159, 0.0392536462039503, 
0.0397989336000519, 0.0293505218180493, 0.0229058449521028, 0.0154019371887762, 
0.0102366640366435, 0.00783792657548366, 0.014037818210456, 0.00900392496961011, 
0.0148108452415051, 0.0159871581537364), X6 = c(0.000428660536007568, 
0.000424348382531349, 0.000403672086504106, 0.000425772306880377, 
0.000441567036819891, 0.000440420473928468, 0.000424616565866307, 
0.000417156794102717, 0.000453102696396517, 0.000429420158272163, 
0.000426339236438714, 0.000424204011080916, 0.000450812884669126, 
0.00048728803860348, 0.000526461561504051, 0.000494106517096305, 
0.000493488610269819, 0.00041740609044358, 0.000329604373072286, 
0.000321981688032803, 0.000313820182149535, 0.000324018084037671, 
0.000321928021838835, 0.000325886279909115, 0.000324905583026473, 
0.000323263064904554, 0.000315582726878559, 0.00033115144688, 
0.000326176783596685, 0.000320421043513733, 0.000317459171547033, 
0.000306378296724892, 0.000304230982248009, 0.00031578316067723, 
0.000315783239223671, 0.000308443856342272, 0.000302387474982801, 
0.000302702247056619, 0.000300580888361005, 0.000298662388842681, 
0.000295031270763261, 0.000300955138678924, 0.000308009865186193, 
0.00031004060452567, 0.000308580481883199, 0.000309081734643359, 
0.000309347430761987, 0.00031610525741575, 0.000316030523318374
), ID = c("CAT1", "CAT1", "CAT1", "CAT1", "CAT1", "CAT1", "CAT1", 
"CAT1", "CAT1", "CAT1", "CAT1", "CAT1", "CAT1", "CAT1", "CAT1", 
"CAT1", "CAT1", "CAT1", "CAT1", "CAT1", "CAT1", "CAT1", "CAT1", 
"CAT1", "CAT1", "CAT1", "CAT1", "CAT1", "CAT1", "CAT1", "CAT1", 
"CAT1", "CAT1", "CAT1", "CAT1", "CAT1", "CAT1", "CAT1", "CAT1", 
"CAT1", "CAT1", "CAT1", "CAT1", "CAT1", "CAT1", "CAT1", "CAT1", 
"CAT1", "CAT1")), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA, 
-49L))

2 个答案:

答案 0 :(得分:2)

一个dplyr选项可能是:

df %>%
 select(starts_with("X")) %>%
 select_if(~ sum(. %% 1) != 0) %>%
 bind_cols(df %>%
            select(-starts_with("X")))

      X1     X5       X6 date           Y ID   
   <dbl>  <dbl>    <dbl> <date>     <dbl> <chr>
 1  4.14 0.0770 0.000429 2016-10-21     1 CAT1 
 2  4.14 0.0793 0.000424 2016-10-24     1 CAT1 
 3  4.16 0.0804 0.000404 2016-10-25     0 CAT1 
 4  4.16 0.0626 0.000426 2016-10-26     0 CAT1 
 5  4.16 0.0587 0.000442 2016-10-27     0 CAT1 
 6  4.11 0.0610 0.000440 2016-10-28     0 CAT1 
 7  4.14 0.0704 0.000425 2016-10-31     0 CAT1 
 8  3.98 0.0867 0.000417 2016-11-01     1 CAT1 
 9  6.00 0.0673 0.000453 2016-11-02     0 CAT1 
10  8.42 0.0877 0.000429 2016-11-03     0 CAT1 

答案 1 :(得分:1)

这里是使用purrr的imap作为子集向量的选项(保留列的原始顺序)。

library(purrr)

not_whole <- function(x,y) { 
      if (all(is.numeric(x)) & y != "Y") {
        !all(as.integer(x) == x)
      } else TRUE
    }

df[imap_lgl(df, ~ not_whole(.x, .y))]