这是我的data.frame:
df.index= dput(df.index)
structure(list(Var1 = structure(c(43L, 42L, 46L, 33L, 29L), .Label = c("ABEV3",
"AEDU3", "ALLL3", "BBAS3", "BBDC3", "BBDC4", "BISA3", "BRAP4",
"BRFS3", "BRKM5", "BRML3", "BRPR3", "BVMF3", "CCRO3", "CESP6",
"CIEL3", "CMIG4", "CPFE3", "CPLE6", "CRUZ3", "CSAN3", "CSNA3",
"CTIP3", "CYRE3", "DASA3", "DTEX3", "ECOR3", "ELET3", "ELET6",
"ELPL4", "EMBR3", "ENBR3", "ESTC3", "EVEN3", "FIBR3", "GFSA3",
"GGBR4", "GOAU4", "GOLL4", "HGTX3", "HYPE3", "ITSA4", "ITUB4",
"JBSS3", "KLBN4", "KROT3", "LAME4", "LIGT3", "LREN3", "MRFG3",
"MRVE3", "NATU3", "OIBR4", "PCAR4", "PDGR3", "PETR3", "PETR4",
"QUAL3", "RENT3", "RSID3", "SANB11", "SBSP3", "SUZB5", "TBLE3",
"TIMP3", "UGPA3", "USIM5", "VALE3", "VALE5", "VIVT4"), class = "factor"),
Var2 = structure(c(42L, 43L, 33L, 46L, 28L), .Label = c("ABEV3",
"AEDU3", "ALLL3", "BBAS3", "BBDC3", "BBDC4", "BISA3", "BRAP4",
"BRFS3", "BRKM5", "BRML3", "BRPR3", "BVMF3", "CCRO3", "CESP6",
"CIEL3", "CMIG4", "CPFE3", "CPLE6", "CRUZ3", "CSAN3", "CSNA3",
"CTIP3", "CYRE3", "DASA3", "DTEX3", "ECOR3", "ELET3", "ELET6",
"ELPL4", "EMBR3", "ENBR3", "ESTC3", "EVEN3", "FIBR3", "GFSA3",
"GGBR4", "GOAU4", "GOLL4", "HGTX3", "HYPE3", "ITSA4", "ITUB4",
"JBSS3", "KLBN4", "KROT3", "LAME4", "LIGT3", "LREN3", "MRFG3",
"MRVE3", "NATU3", "OIBR4", "PCAR4", "PDGR3", "PETR3", "PETR4",
"QUAL3", "RENT3", "RSID3", "SANB11", "SBSP3", "SUZB5", "TBLE3",
"TIMP3", "UGPA3", "USIM5", "VALE3", "VALE5", "VIVT4"), class = "factor"),
time = structure(c(1L, 1L, 1L, 1L, 1L), class = "factor", .Label = "t")), class = "data.frame", row.names = c(NA,
-5L))
它是这样的:
Var1 Var2 time
1 ITUB4 ITSA4 t
2 ITSA4 ITUB4 t
3 KROT3 ESTC3 t
4 ESTC3 KROT3 t
5 ELET6 ELET3 t
我想将这3列连接成这样的文本:
"ITUB4~ITSA4+t" "ITSA4~ITUB4+t" "KROT3~ESTC3+t" "ESTC3~KROT3+t" "ELET6+ELET3+t"
我正在使用apply
函数:
df.index=apply(df.index,1,paste,collapse="~+")
但是结果是错误的。问题是我无法使用“ +”符号将第二列与第三列分开。如何将第二个变量与带有“ +”符号的“ t”变量分开?
我想要的结果是:
"ITUB4~ITSA4+t" "ITSA4~ITUB4+t" "KROT3~ESTC3+t" "ESTC3~KROT3+t" "ELET6+ELET3+t"
如上所述。
答案 0 :(得分:3)
我们可以使用paste
with(df.index, paste0(Var1, "~", Var2, "+", time))
#[1] "ITUB4~ITSA4+t" "ITSA4~ITUB4+t" "KROT3~ESTC3+t" "ESTC3~KROT3+t" "ELET6~ELET3+t"
正如OP提到的用apply
获取结果一样,将MARGIN
指定为1(按行),然后在数据集的每一行中应用paste
。由于paste
被向量化,效率会降低
apply(df.index, 1, FUN = function(x) paste0(x[1], "~", x[2], "+", x[3]))
答案 1 :(得分:2)
如果您想要每个公式(类公式),可以执行以下操作。请注意,我首先将所有因素更改为带有mutate_if
library(tidyverse)
df <- df %>% mutate_if(is.factor, as.character) %>%
mutate(forms = map2(Var1, Var2, ~reformulate(c(.y, "t"), .x, TRUE)))
df
#> Var1 Var2 time forms
#> 1 ITUB4 ITSA4 t ITUB4 ~ ITSA4 + t
#> 2 ITSA4 ITUB4 t ITSA4 ~ ITUB4 + t
#> 3 KROT3 ESTC3 t KROT3 ~ ESTC3 + t
#> 4 ESTC3 KROT3 t ESTC3 ~ KROT3 + t
#> 5 ELET6 ELET3 t ELET6 ~ ELET3 + t
df$forms
#> [[1]]
#> ITUB4 ~ ITSA4 + t
#> <environment: 0x7fe3b5854c88>
#>
#> [[2]]
#> ITSA4 ~ ITUB4 + t
#> <environment: 0x7fe3b583d1a8>
#>
#> [[3]]
#> KROT3 ~ ESTC3 + t
#> <environment: 0x7fe3b58352f8>
#>
#> [[4]]
#> ESTC3 ~ KROT3 + t
#> <environment: 0x7fe3b58333a8>
#>
#> [[5]]
#> ELET6 ~ ELET3 + t
#> <environment: 0x7fe3b581c8a8>
由reprex package(v0.2.0)于2018-08-20创建。