我正在尝试使用grep
和rename_at
dplyr
包的模式匹配重命名我的df的几列。但它产生了一个错误。这是我的一个数据:
sensdata_th <- structure(list(PolygonId = 0:1, phi = c(0.3, 0.3), Perimeter = c(10641.486,
8631.428), m25count = c(3455, 2284), m25sum = c(-695.151973724365,
365.065137863159), m25mean = c(-0.201201729008499, 0.159835874721173
), m5count = c(3455, 2284), m5sum = c(-2692.13891601562, -955.084840774536
), m5mean = c(-0.779200843998734, -0.418163240269061), m75count = c(3455,
2284), m75sum = c(-4685.67934608459, -2272.95643043518), m75mean = c(-1.35620241565401,
-0.995164811924335), p10count = c(3455, 2284), p10sum = c(9279.43661117554,
6958.97550773621), p10mean = c(2.68579930858916, 3.04683691231883
), p15count = c(3455, 2284), p15sum = c(13269.9573936462, 9596.99271965027
), p15mean = c(3.84079808788603, 4.2018356916157), p5count = c(3455,
2284), p5sum = c(5288.90923881531, 4320.95393943787), p5mean = c(1.53079862194365,
1.89183622567332), p125count = c(3455, 2284), p125sum = c(11276.4235534668,
8279.1254863739), p125mean = c(3.26379842357939, 3.62483602730906
), p175count = c(3455, 2284), p175sum = c(15266.950925827, 10917.1470546722
), p175mean = c(4.4187991102249, 4.77983671395457), p20count = c(3455,
2284), p20sum = c(17260.4847660065, 12235.0142879486), p20mean = c(4.99579877453154,
5.35683637826121), p75count = c(3455, 2284), p75sum = c(7285.89618110657,
5641.10391807556), p75mean = c(2.10879773693388, 2.46983534066356
), p25count = c(3455, 2284), p25sum = c(3295.37539863586, 3003.0867061615
), p25mean = c(0.953798957637009, 1.31483656136668), t_m25count = c(3455,
2284), t_m25sum = c(974.081996917725, 656.901090621948), t_m25mean = c(0.281934007790948,
0.287609934598051), t_m5count = c(3455, 2284), t_m5sum = c(346.412973403931,
146.117877960205), t_m5mean = c(0.100264247005479, 0.063974552521981
), t_m75count = c(3455, 2284), t_m75sum = c(73.7441272735596,
10.1595973968506), t_m75mean = c(0.021344175766587, 0.004448159981108
), t_p5count = c(3455, 2284), t_p5sum = c(5323.15208816528, 4320.95393943787
), t_p5mean = c(1.54070972161079, 1.89183622567332), t_p10count = c(3455,
2284), t_p10sum = c(9279.43661117554, 6958.97550773621), t_p10mean = c(2.68579930858916,
3.04683691231883), t_p125coun = c(3455, 2284), t_p125sum = c(11276.4235534668,
8279.1254863739), t_p125mean = c(3.26379842357939, 3.62483602730906
), t_p15count = c(3455, 2284), t_p15sum = c(13269.9573936462,
9596.99271965027), t_p15mean = c(3.84079808788603, 4.2018356916157
), t_p175coun = c(3455, 2284), t_p175sum = c(15266.950925827,
10917.1470546722), t_p175mean = c(4.4187991102249, 4.77983671395457
), t_p20count = c(3455, 2284), t_p20sum = c(17260.4847660065,
12235.0142879486), t_p20mean = c(4.99579877453154, 5.35683637826121
), t_p25count = c(3455, 2284), t_p25sum = c(3516.20801353455,
3003.0867061615), t_p25mean = c(1.01771577815761, 1.31483656136668
), t_p75count = c(3455, 2284), t_p75sum = c(7285.89618110657,
5641.10391807556), t_p75mean = c(2.10879773693388, 2.46983534066356
)), .Names = c("PolygonId", "phi", "Perimeter", "m25count", "m25sum",
"m25mean", "m5count", "m5sum", "m5mean", "m75count", "m75sum",
"m75mean", "p10count", "p10sum", "p10mean", "p15count", "p15sum",
"p15mean", "p5count", "p5sum", "p5mean", "p125count", "p125sum",
"p125mean", "p175count", "p175sum", "p175mean", "p20count", "p20sum",
"p20mean", "p75count", "p75sum", "p75mean", "p25count", "p25sum",
"p25mean", "t_m25count", "t_m25sum", "t_m25mean", "t_m5count",
"t_m5sum", "t_m5mean", "t_m75count", "t_m75sum", "t_m75mean",
"t_p5count", "t_p5sum", "t_p5mean", "t_p10count", "t_p10sum",
"t_p10mean", "t_p125coun", "t_p125sum", "t_p125mean", "t_p15count",
"t_p15sum", "t_p15mean", "t_p175coun", "t_p175sum", "t_p175mean",
"t_p20count", "t_p20sum", "t_p20mean", "t_p25count", "t_p25sum",
"t_p25mean", "t_p75count", "t_p75sum", "t_p75mean"), data_types = c("N",
"N", "N", "N", "N", "N", "N", "N", "N", "N", "N", "N", "N", "N",
"N", "N", "N", "N", "N", "N", "N", "N", "N", "N", "N", "N", "N",
"N", "N", "N", "N", "N", "N", "N", "N", "N", "N", "N", "N", "N",
"N", "N", "N", "N", "N", "N", "N", "N", "N", "N", "N", "N", "N",
"N", "N", "N", "N", "N", "N", "N", "N", "N", "N", "N", "N", "N",
"N", "N", "N"), row.names = 1:2, class = "data.frame")
这是我正在处理的代码:
sensdata_th <- sensdata_th %>%
select(
colnames(.)[grep(
"^[mp]\\d{1,3}count$|^[mp]\\d{1,3}sum$|^[mp]\\d{1,3}mean$|^t_[mp]\\d{1,}sum$|^t_[mp]\\d{1,}co.*$",
colnames(.), invert = TRUE)]
) %>%
rename_at(
.vars = colnames(.)[grep("t_[mp]\\d{1,}", colnames(.))],
.funs = funs(gsub("mean","",colnames(.)[grep("t_[mp]\\d{1,}", colnames(.))]))
)
如果我注释掉最后一个rename_at
,但是当使用rename_at运行代码时,一切都有效,它会产生错误:
错误:
长度相同的字符向量nm
必须为NULL
或与[{1}}
有人可以在这指出问题吗?
答案 0 :(得分:2)
我们可以提前两次创建索引,而不是创建索引
nm1 <- grep("t_[mp]\\d{1,}", colnames(out), value = TRUE)
out %>%
rename_at(vars(nm1), funs(gsub("mean", "", nm1)))
# PolygonId phi Perimeter t_m25 t_m5 t_m75 t_p5 t_p10
#1 0 0.3 10641.486 0.2819340 0.10026425 0.02134418 1.540710 2.685799
#2 1 0.3 8631.428 0.2876099 0.06397455 0.00444816 1.891836 3.046837
# t_p125 t_p15 t_p175 t_p20 t_p25 t_p75
#1 3.263798 3.840798 4.418799 4.995799 1.017716 2.108798
#2 3.624836 4.201836 4.779837 5.356836 1.314837 2.469835
其中out
是
out <- sensdata_th %>%
select(
colnames(.)[grep(
"^[mp]\\d{1,3}count$|^[mp]\\d{1,3}sum$|^[mp]\\d{1,3}mean$|^t_[mp]\\d{1,}sum$|^t_[mp]\\d{1,}co.*$",
colnames(.), invert = TRUE)]
)
问题在于funs
rename_at
,.
代替colnames(.)
sensdata_th %>%
select(
colnames(.)[grep(
"^[mp]\\d{1,3}count$|^[mp]\\d{1,3}sum$|^[mp]\\d{1,3}mean$|^t_[mp]\\d{1,}sum$|^t_[mp]\\d{1,}co.*$",
colnames(.), invert = TRUE)]
) %>%
rename_at(
.vars = colnames(.)[grep("t_[mp]\\d{1,}", colnames(.))],
.funs = funs(gsub("mean","", .[grep("t_[mp]\\d{1,}", .)]))
)
# PolygonId phi Perimeter t_m25 t_m5 t_m75 t_p5 t_p10
#1 0 0.3 10641.486 0.2819340 0.10026425 0.02134418 1.540710 2.685799
#2 1 0.3 8631.428 0.2876099 0.06397455 0.00444816 1.891836 3.046837
# t_p125 t_p15 t_p175 t_p20 t_p25 t_p75
#1 3.263798 3.840798 4.418799 4.995799 1.017716 2.108798
#2 3.624836 4.201836 4.779837 5.356836 1.314837 2.469835