我有一些类似于以下内容的数据:
# A tibble: 100 x 3
# Groups: Section, Quintiles [20]
S Q R
<chr> <chr> <dbl>
1 business quintile_1 -0.0167
2 business quintile_1 0.0202
3 business quintile_1 0.00797
4 business quintile_1 0.0492
5 business quintile_1 0.0193
6 business quintile_2 -0.0393
7 business quintile_2 0.0159
8 business quintile_2 0.0163
9 business quintile_2 0.0740
10 business quintile_2 0.0385
如何区分“企业”-“ quintile1”和“企业”-“ quintile5”之间的区别。
因此,我尝试创建一个新的“ quintile6”,它只是第一个和最后一个五分位数之间的区别。我试图首先散布数据,并将每个五分位数作为列,但是我有其他变量,并且它很快变成了数千列宽,所以我想知道这是否是一种更“简单”的方式。
数据:
structure(list(S = c("business", "business", "business", "business",
"business", "business", "business", "business", "business", "business",
"business", "business", "business", "business", "business", "business",
"business", "business", "business", "business", "business", "business",
"business", "business", "business", "mgnt", "mgnt", "mgnt", "mgnt",
"mgnt", "mgnt", "mgnt", "mgnt", "mgnt", "mgnt", "mgnt", "mgnt",
"mgnt", "mgnt", "mgnt", "mgnt", "mgnt", "mgnt", "mgnt", "mgnt",
"mgnt", "mgnt", "mgnt", "mgnt", "mgnt", "qualitative", "qualitative",
"qualitative", "qualitative", "qualitative", "qualitative", "qualitative",
"qualitative", "qualitative", "qualitative", "qualitative", "qualitative",
"qualitative", "qualitative", "qualitative", "qualitative", "qualitative",
"qualitative", "qualitative", "qualitative", "qualitative", "qualitative",
"qualitative", "qualitative", "qualitative", "risk", "risk",
"risk", "risk", "risk", "risk", "risk", "risk", "risk", "risk",
"risk", "risk", "risk", "risk", "risk", "risk", "risk", "risk",
"risk", "risk", "risk", "risk", "risk", "risk", "risk"), Q = c("quintile_1",
"quintile_1", "quintile_1", "quintile_1", "quintile_1", "quintile_2",
"quintile_2", "quintile_2", "quintile_2", "quintile_2", "quintile_3",
"quintile_3", "quintile_3", "quintile_3", "quintile_3", "quintile_4",
"quintile_4", "quintile_4", "quintile_4", "quintile_4", "quintile_5",
"quintile_5", "quintile_5", "quintile_5", "quintile_5", "quintile_1",
"quintile_1", "quintile_1", "quintile_1", "quintile_1", "quintile_2",
"quintile_2", "quintile_2", "quintile_2", "quintile_2", "quintile_3",
"quintile_3", "quintile_3", "quintile_3", "quintile_3", "quintile_4",
"quintile_4", "quintile_4", "quintile_4", "quintile_4", "quintile_5",
"quintile_5", "quintile_5", "quintile_5", "quintile_5", "quintile_1",
"quintile_1", "quintile_1", "quintile_1", "quintile_1", "quintile_2",
"quintile_2", "quintile_2", "quintile_2", "quintile_2", "quintile_3",
"quintile_3", "quintile_3", "quintile_3", "quintile_3", "quintile_4",
"quintile_4", "quintile_4", "quintile_4", "quintile_4", "quintile_5",
"quintile_5", "quintile_5", "quintile_5", "quintile_5", "quintile_1",
"quintile_1", "quintile_1", "quintile_1", "quintile_1", "quintile_2",
"quintile_2", "quintile_2", "quintile_2", "quintile_2", "quintile_3",
"quintile_3", "quintile_3", "quintile_3", "quintile_3", "quintile_4",
"quintile_4", "quintile_4", "quintile_4", "quintile_4", "quintile_5",
"quintile_5", "quintile_5", "quintile_5", "quintile_5"), R = c(-0.0166774158167082,
0.0201596769556875, 0.00796992085297743, 0.0492147329548896,
0.019344865533839, -0.0393260514127627, 0.0159402689787551, 0.0162507344633192,
0.0740337591014227, 0.0384769820770539, -0.0302717090017819,
0.0254691625247841, 0.0122239330016886, 0.0446599436180717, 0.0289436320423226,
-0.0308599365345965, 0.0219191738217161, 0.0176853257846887,
0.0456353457446462, 0.0341367113786865, -0.0143715467524916,
0.0393541248460465, 0.00956169994553254, 0.045506941231113, 0.022679161458704,
-0.0105046406388283, 0.0113396747037768, -0.036155735142529,
0.0550687853246927, 0.0269238017499447, 0.0414200760424137, 0,
0.0369318500161171, 0.05479446798563, 0.0441558659076691, -0.032821835950017,
0.0359311569482088, -0.139999955892563, 0.0209301561117172, 0.104783609509468,
-0.155844137072563, -0.0610789265483618, 0.0988630047067999,
-0.0086556291207671, 0.0815064907073975, -0.0115016167983413,
-0.12346476316452, -0.0105703119188547, 0.092919297516346, 0.136621922254562,
-0.0196536407222738, 0.0265621797051281, 0.0121992440563654,
0.0340947461024625, 0.0114726169959482, -0.0152790856625264,
0.0265492763632932, 0.0222894305734672, 0.0476030515586719, 0.0257378459646134,
-0.0400270565959709, 0.0249571957657415, 0.0296065641893266,
0.0516467535933711, 0.0204032773069533, -0.0385776743255634,
0.0327908558900147, -0.00581402105550901, 0.0471795087541677,
0.0316287353740667, -0.0299233697188024, 0.0277489582011476,
0.0194428538125939, 0.0464771821653864, 0.0257204433555745, -0.0248971471088522,
0.0182182283776942, 0.0133596019044421, 0.0432659004262889, 0.0221001061009796,
-0.031518697262207, 0.0191115892525381, 0.0213770552106516, 0.0495591080020083,
0.0217964101541805, -0.0139142393788832, 0.0127029458612358,
0.0018533759915124, 0.0760172229180238, 0.0367249979117247, -0.0347955894141082,
0.0380760367129327, 0.0177375553170367, 0.0434032638099822, 0.0288078728030292,
-0.026367978979474, 0.0392411376774867, 0.0154176355104196, 0.040917916701852,
0.0218273582628919)), class = c("grouped_df", "tbl_df", "tbl",
"data.frame"), row.names = c(NA, -100L), groups = structure(list(
Section = c("business", "business", "business", "business",
"business", "mgnt", "mgnt", "mgnt", "mgnt", "mgnt", "qualitative",
"qualitative", "qualitative", "qualitative", "qualitative",
"risk", "risk", "risk", "risk", "risk"), Quintiles = c("quintile_1",
"quintile_2", "quintile_3", "quintile_4", "quintile_5", "quintile_1",
"quintile_2", "quintile_3", "quintile_4", "quintile_5", "quintile_1",
"quintile_2", "quintile_3", "quintile_4", "quintile_5", "quintile_1",
"quintile_2", "quintile_3", "quintile_4", "quintile_5"),
.rows = list(1:5, 6:10, 11:15, 16:20, 21:25, 26:30, 31:35,
36:40, 41:45, 46:50, 51:55, 56:60, 61:65, 66:70, 71:75,
76:80, 81:85, 86:90, 91:95, 96:100)), row.names = c(NA,
-20L), class = c("tbl_df", "tbl", "data.frame"), .drop = TRUE))
答案 0 :(得分:1)
可以尝试:
library(dplyr)
bind_rows(
df,
df %>%
filter(Q %in% c('quintile_1', 'quintile_5')) %>%
group_by(S, Q) %>%
mutate(idx = row_number()) %>%
group_by(S, idx) %>%
mutate(R = R[Q == 'quintile_5'] - R[Q == 'quintile_1'],
Q = 'quintile_6'
) %>%
ungroup() %>%
distinct(S, Q, R)
)
或通过data.table
方式:
library(data.table)
rbindlist(
list(df,
setDT(df)[
Q %in% c('quintile_1', 'quintile_5'), ][
, .(Q = 'quintile_6',
R = R[Q == 'quintile_5'] - R[Q == 'quintile_1']),
by = .(S, rowid(S, Q))
][, 'rowid' := NULL]
)
)