我有两个数据框。一个数据帧只有1条记录和3列。另一个数据帧有6行3列。 现在,我要从数据帧2的值中减去数据帧1的值。
样本数据:
df1 = structure(list(col1 = 2L, col2 = 3L, col3 = 4L), .Names = c("col1",
"col2", "col3"), class = "data.frame", row.names = c(NA, -1L))
df2 = structure(list(col1 = c(1L, 2L, 4L, 5L, 6L, 3L), col2 = c(1L,
2L, 4L, 3L, 5L, 7L), col3 = c(6L, 4L, 3L, 6L, 4L, 6L)), .Names = c("col1", "col2", "col3"), class = "data.frame", row.names = c(NA, -6L))
最终输出应为
output = structure(list(col1 = c(-1L, 0L, 2L, 3L, 4L, 1L), col2 = c(-2L,
-1L, 1L, 0L, 2L, 4L), col3 = c(2L, 0L, -1L, 2L, 0L, 2L)), .Names = c("col1","col2", "col3"), class = "data.frame", row.names = c(NA, -6L))
答案 0 :(得分:4)
尝试一下。
# Creating Datasets
df1 = structure(list(col1 = 2L, col2 = 3L, col3 = 4L), .Names = c("col1", "col2", "col3"), class = "data.frame", row.names = c(NA, -1L))
df2 = structure(list(col1 = c(1L, 2L, 4L, 5L, 6L, 3L), col2 = c(1L,2L, 4L, 3L, 5L, 7L), col3 = c(6L, 4L, 3L, 6L, 4L, 6L)), .Names = c("col1", "col2", "col3"), class = "data.frame", row.names = c(NA, -6L))
# Output
data.frame(sapply(names(df1), function(i){df2[[i]] - df1[[i]]}))
# col1 col2 col3
# 1 -1 -2 2
# 2 0 -1 0
# 3 2 1 -1
# 4 3 0 2
# 5 4 2 0
# 6 1 4 2
答案 1 :(得分:3)
如果您直接df2 - df1
,您会得到
df2 - df1
Ops.data.frame(df2,df1)中的错误: “-”仅适用于大小相等的数据帧
因此,让我们通过重复行然后减去来使df1
的大小与df2
相同
df2 - df1[rep(seq_len(nrow(df1)), nrow(df2)), ]
# col1 col2 col3
#1 -1 -2 2
#2 0 -1 0
#3 2 1 -1
#4 3 0 2
#5 4 2 0
#6 1 4 2
或者另一个选择是使用mapply
而不复制行
mapply("-", df2, df1)
如果您想返回数据框,则会返回一个矩阵
data.frame(mapply("-", df2, df1))
# col1 col2 col3
#1 -1 -2 2
#2 0 -1 0
#3 2 1 -1
#4 3 0 2
#5 4 2 0
#6 1 4 2
答案 2 :(得分:2)
我们可以使用 sweep :
x <- sweep(df2, 2, unlist(df1), "-")
#test if same as output
identical(output, x)
# [1] TRUE
请注意,它比 mapply 慢两倍:
df2big <- data.frame(col1 = runif(100000),
col2 = runif(100000),
col3 = runif(100000))
microbenchmark::microbenchmark(
mapply = data.frame(mapply("-", df2big, df1)),
sapply = data.frame(sapply(names(df1), function(i){df2big[[i]] - df1[[i]]})),
sweep = sweep(df2big, 2, unlist(df1), "-"))
# Unit: milliseconds
# expr min lq mean median uq max neval
# mapply 5.239638 7.645213 11.49182 8.514876 9.345765 60.60949 100
# sapply 5.250756 5.518455 10.94827 8.706027 10.091841 59.09909 100
# sweep 10.572785 13.912167 21.18537 14.985525 16.737820 64.90064 100