使用dplyr在充满特定条件的数据框中创建列

时间:2018-09-19 18:35:24

标签: r dplyr

我的数据框有两列:

    df<-structure(c(3.39731077987836, 3.35113905626126, 3.54151558185337, 
3.32908416124226, 3.11708556895431, 2.63033674090622, 3.26358791285813, 
3.285473899684, 2.11527332084524, 1.25521701811421, 0.558756742281551, 
-0.478780166461706, -0.667471284066706, -0.814286812271371, -1.4892267510757, 
-1.95734076424983, -2.39782989530402, -2.10637673680131, -1.79945196199986, 
-0.699075436407749, -0.686257092778993, -0.397822856743792, -0.0798214490317454, 
-0.532685697966011, -0.502453448481614, 0.124038497996675, -0.183209858531146, 
-0.207727844806884, -0.42181300275947, -1.55064839647219, -0.940915675467525, 
-0.282337163324608, -0.282104913840212, 0.0517233625425734, -0.0959578223084026, 
-0.356446953362597, -0.586070427770486, -0.00389588872917584, 
-0.706760137663439, -0.499624386597704, -0.265363281891766, 0.335157519808067, 
-0.0564082441567325, -1.04811828422925, -1.25133778553553, -1.19362493003892, 
-1.51921954922526, -1.66301615496241, -1.03652420848412, -1.00636953495087, 
-1.06613728546647, -0.991731904652815, -1.46358622083305, -1.41903656391168, 
-1.1216122606305, -1.57166856336155, -1.49467708840353, -1.21322392990081, 
-0.75881854908715, -0.418730575710473, -0.0800077874598371, 0.240368738132275, 
0.0506676877732472, -0.157657301665174, -0.327569328288499, -0.305106237031753, 
-0.0708451323258146, 0.339675669374017, 0.435446235313711, 0.323159090810323, 
0.305477905959347, 0.500815643575891, 0.697451866161896, 0.504088088747904, 
0.185252695035182, 0.149946628064277, 0.098092311884041, -0.11973314907465, 
-0.0321645696857616, -0.430422858967607, 0.0560690875106806, 
0.079253559306016, 0.651283822239604, 0.302448428526886, 0.282680678011282, 
0.137663163255812, 0.0376068605247707, 0.175752544344534, 0.619292268511883, 
0.252188188091779, 0.629112962893222, 0.504017872186609, 0.236836215815368, 
-0.235883757011178, -0.341334100089804, -0.00419834902406702, 
0.0184090183403975, 0.710295005166268, 0.514988938195361, 0.470115699547611, 
0.528261383367372, 0.569070737282641, -0.0622064744667214, -0.597656817545347, 
-0.783107160623972, -0.670433093102934, -0.638114277953906, -0.705795462804879, 
-0.793188095440416, -0.502023489153139, -0.202656896315051, -0.335732121513609, 
-0.624201387059748, -0.282459676341596, -0.598342847743377, -0.705957332437773, 
-0.547667372510291, -0.703550543912073, -0.958645634618686, -0.748990489565163, 
-0.367393054954727, 0.188666063200407, 0.852927167906345, 1.13423605030134, 
1.40015089234876, 1.02844085227624, 0.964499970431373, 1.73250137814342, 
2.08424308886157, 1.74360968169966, 1.58729368197473, 1.30911296864402, 
1.04855713743325, 1.19375059894207, 1.33729032310942, 0.911695703923079, 
0.79343741464123, 0.976832862700858, 0.682248176268544, 0.479172951069985, 
0.593356479824784, 0.825741995130385, 0.201656837177799, 0.0877159553329318, 
-0.200253781733476, -0.281464293326265, -0.423751437829655, -0.849923161446871, 
-0.548758555159591, -0.775574083364255, -0.812678163784358, -0.918994163509289, 
-0.71214696465899, -0.987741583845334, -0.999307347810134, -0.264546714624464, 
-0.110574162133963, 0.445984484500902, 0.453985892212947, 0.342420128248148, 
0.344738943397175, 0.105759273576736, 0.00051990676240643, -0.440113500399508, 
-0.669592698699677, -0.851879843203069, -0.589128199730886, -0.328107869551325, 
-0.253702488737667, -0.260806569157768, -0.0286320301164601, 
0.183398232817129, 0.0670822330921953, 0.157891587007462, -0.261587611292705, 
-0.320423005005425, -0.704508162958012, -0.894420189581332, -0.835053596743246, 
-0.499715859126705, -0.482868660276407, -0.675799609367244, -0.894779279187683, 
-1.28857588492483, -1.75445905632661, -1.27811138595604, -1.26262937223178, 
-1.16361803176571, -0.725183795730507, -0.505528650676985, -0.866084481887756, 
-1.14240048161269, -0.924687626116082, -1.22309019150565, -0.949406191230579, 
-0.314712258201487, -0.191172534034138, 0.259636819881127, 0.0582153320240489, 
-0.397812115485449, -0.393406734671792, -0.201598323797064, -0.0677700474142788, 
0.584115939411592, 0.347944215794377, 0.773070977146622, 0.653014674415583, 
1.01173746266622, 0.958085133036788, 0.961624857204136, 1.12883818422102, 
1.2553301306993, 1.11045689205155, 1.1811219698591, 1.67588260304477, 
2.11136461676903, 1.56742373492416, 1.14873261731916, 1.60076288025275, 
1.30085085362943, 1.64215973602443, 0.943546194370568, 0.645076928824428, 
0.905164902201106, 1.04165684867939, 0.744552768259294, 1.02046761030671, 
0.953718782258619, 0.627979886964559, 0.420442978221301, 0.160963779921135, 
-0.263409930246887, -0.277350812091753, -0.239205128271991, -0.364511195242898, 
-0.0344232218662192, -0.376277538046457, -0.197843302011256, 
0.0770616072821318, -0.304504156682666, -0.229954499761293, -1.07821278904314, 
-0.954884041140085, -0.682276673775623, -0.435351898974179, -0.449226080662472, 
-0.17438787152566, -0.156530739921331, -1.56910643664016, -1.55823038256832, 
3.39731077987836, 3.35113905626126, 3.54151558185337, 3.32908416124226, 
3.11708556895431, 2.63033674090622, 3.26358791285813, 3.285473899684, 
2.11527332084524, 1.25521701811421, 0.558756742281551, -0.478780166461706, 
-0.667471284066706, -0.814286812271371, -1.4892267510757, -1.95734076424983, 
-2.39782989530402, -2.10637673680131, -1.79945196199986, -0.699075436407749, 
-0.686257092778993, -0.397822856743792, -0.0798214490317454, 
-0.532685697966011, -0.502453448481614, 0.124038497996675, -0.183209858531146, 
-0.207727844806884, -0.42181300275947, -1.55064839647219, -0.940915675467525, 
-0.282337163324608, -0.282104913840212, 0.0517233625425734, -0.0959578223084026, 
-0.356446953362597, -0.586070427770486, -0.00389588872917584, 
-0.706760137663439, -0.499624386597704, -0.265363281891766, 0.335157519808067, 
-0.0564082441567325, -1.04811828422925, -1.25133778553553, -1.19362493003892, 
-1.51921954922526, -1.66301615496241, -1.03652420848412, -1.00636953495087, 
-1.06613728546647, -0.991731904652815, -1.46358622083305, -1.41903656391168, 
-1.1216122606305, -1.57166856336155, -1.49467708840353, -1.21322392990081, 
-0.75881854908715, -0.418730575710473, -0.0800077874598371, 0.240368738132275, 
0.0506676877732472, -0.157657301665174, -0.327569328288499, -0.305106237031753, 
-0.0708451323258146, 0.339675669374017, 0.435446235313711, 0.323159090810323, 
0.305477905959347, 0.500815643575891, 0.697451866161896, 0.504088088747904, 
0.185252695035182, 0.149946628064277, 0.098092311884041, -0.11973314907465, 
-0.0321645696857616, -0.430422858967607, 0.0560690875106806, 
0.079253559306016, 0.651283822239604, 0.302448428526886, 0.282680678011282, 
0.137663163255812, 0.0376068605247707, 0.175752544344534, 0.619292268511883, 
0.252188188091779, 0.629112962893222, 0.504017872186609, 0.236836215815368, 
-0.235883757011178, -0.341334100089804, -0.00419834902406702, 
0.0184090183403975, 0.710295005166268, 0.514988938195361, 0.470115699547611, 
0.528261383367372, 0.569070737282641, -0.0622064744667214, -0.597656817545347, 
-0.783107160623972, -0.670433093102934, -0.638114277953906, -0.705795462804879, 
-0.793188095440416, -0.502023489153139, -0.202656896315051, -0.335732121513609, 
-0.624201387059748, -0.282459676341596, -0.598342847743377, -0.705957332437773, 
-0.547667372510291, -0.703550543912073, -0.958645634618686, -0.748990489565163, 
-0.367393054954727, 0.188666063200407, 0.852927167906345, 1.13423605030134, 
1.40015089234876, 1.02844085227624, 0.964499970431373, 1.73250137814342, 
2.08424308886157, 1.74360968169966, 1.58729368197473, 1.30911296864402, 
1.04855713743325, 1.19375059894207, 1.33729032310942, 0.911695703923079, 
0.79343741464123, 0.976832862700858, 0.682248176268544, 0.479172951069985, 
0.593356479824784, 0.825741995130385, 0.201656837177799, 0.0877159553329318, 
-0.200253781733476, -0.281464293326265, -0.423751437829655, -0.849923161446871, 
-0.548758555159591, -0.775574083364255, -0.812678163784358, -0.918994163509289, 
-0.71214696465899, -0.987741583845334, -0.999307347810134, -0.264546714624464, 
-0.110574162133963, 0.445984484500902, 0.453985892212947, 0.342420128248148, 
0.344738943397175, 0.105759273576736, 0.00051990676240643, -0.440113500399508, 
-0.669592698699677, -0.851879843203069, -0.589128199730886, -0.328107869551325, 
-0.253702488737667, -0.260806569157768, -0.0286320301164601, 
0.183398232817129, 0.0670822330921953, 0.157891587007462, -0.261587611292705, 
-0.320423005005425, -0.704508162958012, -0.894420189581332, -0.835053596743246, 
-0.499715859126705, -0.482868660276407, -0.675799609367244, -0.894779279187683, 
-1.28857588492483, -1.75445905632661, -1.27811138595604, -1.26262937223178, 
-1.16361803176571, -0.725183795730507, -0.505528650676985, -0.866084481887756, 
-1.14240048161269, -0.924687626116082, -1.22309019150565, -0.949406191230579, 
-0.314712258201487, -0.191172534034138, 0.259636819881127, 0.0582153320240489, 
-0.397812115485449, -0.393406734671792, -0.201598323797064, -0.0677700474142788, 
0.584115939411592, 0.347944215794377, 0.773070977146622, 0.653014674415583, 
1.01173746266622, 0.958085133036788, 0.961624857204136, 1.12883818422102, 
1.2553301306993, 1.11045689205155, 1.1811219698591, 1.67588260304477, 
2.11136461676903, 1.56742373492416, 1.14873261731916, 1.60076288025275, 
1.30085085362943, 1.64215973602443, 0.943546194370568, 0.645076928824428, 
0.905164902201106, 1.04165684867939, 0.744552768259294, 1.02046761030671, 
0.953718782258619, 0.627979886964559, 0.420442978221301, 0.160963779921135, 
-0.263409930246887, -0.277350812091753, -0.239205128271991, -0.364511195242898, 
-0.0344232218662192, -0.376277538046457, -0.197843302011256, 
0.0770616072821318, -0.304504156682666, -0.229954499761293, -1.07821278904314, 
-0.954884041140085, -0.682276673775623, -0.435351898974179, -0.449226080662472, 
-0.17438787152566, -0.156530739921331, -1.56910643664016, -1.55823038256832
), .Dim = c(250L, 2L), .Dimnames = list(NULL, c("columnA", "columnB"
)))

我需要在columnA和columnB旁边创建填充0、1,-1或-2的列。

columnA旁边的列在其值介于1.016414和-1.016414之间时将接收值0,而在其值在1.016414和2 * 1.016414之间时将接收到值1。当它大于2 * 1.016414时,它将收到值2。如果它小于-2 * 1.016414,它将接收-2值。如果其值在-1 * 1.016414和2 *-1.016414之间,则它将接收-1值。

那么,这将是列columnA旁边的列。

要在columnB旁边创建列,我将使用相同的条件。

最后,我将获得一个包含4列的数据框。

是否可以使用dplyr软件包执行此操作?

3 个答案:

答案 0 :(得分:2)

使用eshell-send-eof-to-process的替代选项:

case_when

请注意,library(dplyr) data.frame(df) %>% mutate_all(funs(new = case_when(between(., -1.016414, 1.016414) ~ 0, between(., 1.016414, 2*1.016414) ~ 1, . > 2*1.016414 ~ 2, . < -2*1.016414 ~ -2, TRUE ~ -1))) %>% tbl_df() # only for visualisation purposes # # A tibble: 250 x 4 # columnA columnB columnA_new columnB_new # <dbl> <dbl> <dbl> <dbl> # 1 3.40 3.40 2 2 # 2 3.35 3.35 2 2 # 3 3.54 3.54 2 2 # 4 3.33 3.33 2 2 # 5 3.12 3.12 2 2 # 6 2.63 2.63 2 2 # 7 3.26 3.26 2 2 # 8 3.29 3.29 2 2 # 9 2.12 2.12 2 2 #10 1.26 1.26 1 1 # # ... with 240 more rows 是按顺序工作的,而case_whenbetween的工作,因此,如果您有一个像x >= left & x <= right这样的值,它将首先返回值1.016414,然后然后将其更新为0(即在第一1和第二between之后)。因此,该过程将为该值返回1

答案 1 :(得分:1)

您可以通过按预先设定的截止值剪切向量来实现。由于您希望重复执行此操作,因此可以将其包装在一个函数中。

customClassification <- function(x) {
  out <- cut(x, breaks = c(-Inf, 2 * -1.016414, -1.016414, 1.016414, 2 * 1.016414, Inf),
             labels = c(-2, 1, 0, 1, 2))
  as.numeric(as.character(out))
}


xy <- cbind(test, classA = customClassification(test[, "columnA"]))
head(xy, 20)

         columnA    columnB classA
 [1,]  3.3973108  3.3973108      2
 [2,]  3.3511391  3.3511391      2
 [3,]  3.5415156  3.5415156      2
 [4,]  3.3290842  3.3290842      2
 [5,]  3.1170856  3.1170856      2
 [6,]  2.6303367  2.6303367      2
 [7,]  3.2635879  3.2635879      2
 [8,]  3.2854739  3.2854739      2
 [9,]  2.1152733  2.1152733      2
[10,]  1.2552170  1.2552170      1
[11,]  0.5587567  0.5587567      0
[12,] -0.4787802 -0.4787802      0
[13,] -0.6674713 -0.6674713      0
[14,] -0.8142868 -0.8142868      0
[15,] -1.4892268 -1.4892268      1
[16,] -1.9573408 -1.9573408      1
[17,] -2.3978299 -2.3978299     -2
[18,] -2.1063767 -2.1063767     -2
[19,] -1.7994520 -1.7994520      1
[20,] -0.6990754 -0.6990754      0

答案 2 :(得分:0)

这是您需要的:

  CompFun<-function(x){
   if(x > -1.016414 & x < 1.016414){return(0)}
   if(x < 2*1.016414 & x > 1.016414){return(1)}
   if(x > 2*-1.016414 & x < -1.016414){return(-1)}
   if(x > 2*1.016414){return(2)}
   if(x < 2*-1.016414){return(-2)}
  }
df<-as.data.frame(df)
library("dplyr")
res<-df %>% 
  rowwise() %>% 
   mutate(ColAA= CompFun(columnA),colBB=CompFun(columnB))
head(res)
   #  columnA columnB ColAA colBB
   #    <dbl>   <dbl> <dbl> <dbl>
   #1    3.40    3.40    2.    2.
   #2    3.35    3.35    2.    2.
   #3    3.54    3.54    2.    2.
   #4    3.33    3.33    2.    2.
   #5    3.12    3.12    2.    2.
   #6    2.63    2.63    2.    2.