我正在查看螺栓的一些数据。如果我有例如
diameter thread
1 4
1 6
1 4
2 5
2 7
3 9
我想要一种方法来制作一个新列,告诉我它是每个直径的最大或最小线程。每个直径的螺纹尺寸不超过2个,但偶尔只有1个,在这种情况下我希望它的尺寸大。例如:
diameter thread size
1 4 small
1 6 large
1 4 small
2 5 small
2 7 large
3 9 large
答案 0 :(得分:5)
使用dplyr
library(dplyr)
data <- data.frame(diameter=c(1,1,2,2,3),thread=c(4,6,5,7,9))
data %>% group_by(diameter) %>% mutate(size=ifelse(thread==max(thread),"large","small"))
diameter thread size
(dbl) (dbl) (chr)
1 1 4 small
2 1 6 large
3 2 5 small
4 2 7 large
5 3 9 large
答案 1 :(得分:3)
这个怎么样(使用base
R):
dt$size="small"
a=aggregate(dt$thread~dt$diameter, dt, max)[,"dt$thread"]
dt[dt$thread %in% a,]$size="large"
<强>输出强>
diameter thread size
1 1 4 small
2 1 6 large
3 1 4 small
4 2 5 small
5 2 7 large
6 3 9 large
数据强>
dt=structure(list(diameter = c(1L, 1L, 1L, 2L, 2L, 3L), thread = c(4L,
6L, 4L, 5L, 7L, 9L)), .Names = c("diameter", "thread"), class = "data.frame", row.names = c(NA,
-6L))
<强> BENCHMARK 强>
library(dplyr)
library(microbenchmark)
dt=structure(list(diameter = c(1L, 1L, 1L, 2L, 2L, 3L), thread = c(4L,
6L, 4L, 5L, 7L, 9L)), .Names = c("diameter", "thread"), class = "data.frame", row.names = c(NA,
-6L))
func_ZachTurn <- function(data){data %>% group_by(diameter) %>% mutate(size=ifelse(thread==max(thread),"large","small"))}
func_m0h3n <- function(dt){dt$size="small";a=aggregate(dt$thread~dt$diameter, dt, max)[,"dt$thread"];dt[dt$thread %in% a,]$size="large";dt}
func_Psidom <- function(df){data.table::setDT(df);df[, size := c("small", "large")[(thread == max(thread)) + 1L], .(diameter)];df[];}
f <- function(x) (if(length(x)==1) 1L else x == max(x)) + 1L
func_docendo.discimus <- function(dat){dat$size <- c("small", "large")[ave(dat$thread, dat$diameter, FUN = f)];dat;}
func_Ernest.A <- function(df){df$size <- factor(unsplit(lapply(split(df$thread, df$diameter), function(x) ifelse(x == max(x), 'large', 'small')), df$diameter));df;}
r <- func_ZachTurn(dt)
all(r == func_m0h3n(dt))
# [1] TRUE
all(r == func_docendo.discimus(dt))
# [1] TRUE
all(r == func_Ernest.A(dt))
# [1] TRUE
all(r == as.data.frame(func_Psidom(dt)))
# [1] TRUE
microbenchmark(func_ZachTurn(dt), func_m0h3n(dt), func_docendo.discimus(dt), func_Ernest.A(dt), func_Psidom(dt))
# Unit: microseconds
# expr min lq mean median uq max neval
# func_ZachTurn(dt) 3477.835 3609.147 3833.5482 3679.079 3860.6490 7136.169 100
# func_m0h3n(dt) 4436.367 4601.042 4879.2726 4743.474 4859.8150 8578.031 100
# func_docendo.discimus(dt) 854.168 923.673 999.2991 956.180 992.9645 4422.252 100
# func_Ernest.A(dt) 1032.101 1086.636 1165.4361 1129.195 1167.9040 4882.057 100
# func_Psidom(dt) 1537.245 1622.577 1731.0602 1678.822 1742.3395 5424.840 100
答案 2 :(得分:2)
这是使用ave
按diameter
分组的基本R选项。我还创建了一个小辅助函数f
来提高可读性。
# define function f:
f <- function(x) (if(length(x)==1) 1L else x == max(x)) + 1L
# apply f to each thread by group of diameter:
dat$size <- c("small", "large")[ave(dat$thread, dat$diameter, FUN = f)]
# diameter thread size
#1 1 4 small
#2 1 6 large
#3 2 5 small
#4 2 7 large
#5 3 9 large
答案 3 :(得分:1)
以下是dplyr
解决方案:
library(dplyr);
df %>% group_by(diameter) %>%
mutate(size = ifelse(thread == min(thread), "small", "large"))
Source: local data frame [5 x 3]
Groups: diameter [3]
diameter thread size
(int) (int) (chr)
1 1 4 small
2 1 6 large
3 2 5 small
4 2 7 large
5 3 9 large
或使用data.table
:
data.table::setDT(df)
df[, size := c("small", "large")[(thread == max(thread)) + 1L], .(diameter)]
df
diameter thread size
1: 1 4 small
2: 1 6 large
3: 2 5 small
4: 2 7 large
5: 3 9 large
答案 4 :(得分:1)
使用Error: error: {
"$err" : "setShardVersion failed shard: shard0001:127.0.0.1:27122 { errmsg: \"not master\", note: \"from post init in setShardVersion\", ok: 0.0, $gleStats: { lastOpTime: Timestamp 0|0, electionId: ObjectId('000000000000000000000000') } }",
"code" : 10429,
"shard" : "shard0001"}
+ split
+ ifelse
unsplit