我有以下数据集,我想转换类似于R中的mtcars
数据集的数据集。类似我的意思是,mtcars
中的第一列包含没有标签的汽车名称,因此它有行名称。我的下面的数据集有rownames,即数字,我想将它转换为作者的名字。有没有办法在R?中做到这一点?
在John的评论后编辑
structure(list(author = structure(c(1L, 1L, 1L, 1L, 1L, 1L), .Label = c("dispt",
"Hamilton", "HM", "Jay", "Madison"), class = "factor"), filename = structure(1:6, .Label = c("dispt_fed_49.txt",
"dispt_fed_50.txt", "dispt_fed_51.txt", "dispt_fed_52.txt", "dispt_fed_53.txt",
"dispt_fed_54.txt", "dispt_fed_55.txt", "dispt_fed_56.txt", "dispt_fed_57.txt",
"dispt_fed_62.txt", "dispt_fed_63.txt", "Hamilton_fed_1.txt",
"Hamilton_fed_11.txt", "Hamilton_fed_12.txt", "Hamilton_fed_13.txt",
"Hamilton_fed_15.txt", "Hamilton_fed_16.txt", "Hamilton_fed_17.txt",
"Hamilton_fed_21.txt", "Hamilton_fed_22.txt", "Hamilton_fed_23.txt",
"Hamilton_fed_24.txt", "Hamilton_fed_25.txt", "Hamilton_fed_26.txt",
"Hamilton_fed_27.txt", "Hamilton_fed_28.txt", "Hamilton_fed_29.txt",
"Hamilton_fed_30.txt", "Hamilton_fed_31.txt", "Hamilton_fed_32.txt",
"Hamilton_fed_33.txt", "Hamilton_fed_34.txt", "Hamilton_fed_35.txt",
"Hamilton_fed_36.txt", "Hamilton_fed_59.txt", "Hamilton_fed_6.txt",
"Hamilton_fed_60.txt", "Hamilton_fed_61.txt", "Hamilton_fed_65.txt",
"Hamilton_fed_66.txt", "Hamilton_fed_67.txt", "Hamilton_fed_68.txt",
"Hamilton_fed_69.txt", "Hamilton_fed_7.txt", "Hamilton_fed_70.txt",
"Hamilton_fed_71.txt", "Hamilton_fed_72.txt", "Hamilton_fed_73.txt",
"Hamilton_fed_74.txt", "Hamilton_fed_75.txt", "Hamilton_fed_76.txt",
"Hamilton_fed_77.txt", "Hamilton_fed_78.txt", "Hamilton_fed_79.txt",
"Hamilton_fed_8.txt", "Hamilton_fed_80.txt", "Hamilton_fed_81.txt",
"Hamilton_fed_82.txt", "Hamilton_fed_83.txt", "Hamilton_fed_84.txt",
"Hamilton_fed_85.txt", "Hamilton_fed_9.txt", "HM_fed_18.txt",
"HM_fed_19.txt", "HM_fed_20.txt", "Jay_fed_2.txt", "Jay_fed_3.txt",
"Jay_fed_4.txt", "Jay_fed_5.txt", "Jay_fed_64.txt", "Madison_fed_10.txt",
"Madison_fed_14.txt", "Madison_fed_37.txt", "Madison_fed_38.txt",
"Madison_fed_39.txt", "Madison_fed_40.txt", "Madison_fed_41.txt",
"Madison_fed_42.txt", "Madison_fed_43.txt", "Madison_fed_44.txt",
"Madison_fed_45.txt", "Madison_fed_46.txt", "Madison_fed_47.txt",
"Madison_fed_48.txt", "Madison_fed_58.txt"), class = "factor"),
a = c(0.28, 0.177, 0.339, 0.27, 0.303, 0.245), all = c(0.052,
0.063, 0.09, 0.024, 0.054, 0.059), also = c(0.009, 0.013,
0.008, 0.016, 0.027, 0.007), an = c(0.096, 0.038, 0.03, 0.024,
0.034, 0.067), and = c(0.358, 0.393, 0.301, 0.262, 0.404,
0.282), any = c(0.026, 0.063, 0.008, 0.056, 0.04, 0.052),
are = c(0.131, 0.051, 0.068, 0.064, 0.128, 0.111), as = c(0.122,
0.139, 0.203, 0.111, 0.148, 0.252), at = c(0.017, 0.114,
0.023, 0.056, 0.013, 0.015), be = c(0.411, 0.393, 0.474,
0.365, 0.344, 0.297)), .Names = c("author", "filename", "a",
"all", "also", "an", "and", "any", "are", "as", "at", "be"), row.names = c(NA,
6L), class = "data.frame")
提前致谢!
答案 0 :(得分:1)
每个author
名称的数据都有很多行。因此,只有author
名称不能用作data.frame的rowname
。您需要添加下标(1,2,3)以使其唯一。
方法是先author
分组。添加具有作者姓名的行号以使其唯一。最后使用column_to_rownames
将newAuthor
名称设为rownames
。
library(tidyverse)
df <- df %>% remove_rownames() %>% group_by(author) %>%
mutate(newAuthor = paste0(author, row_number())) %>%
as.data.frame() %>%
remove_rownames() %>%
column_to_rownames("newAuthor") %>%
ungroup() %>%
select(-author)
#Display of result. Just included first 3 columns as data.frame is very big
head(df[,1:3])
# filename a all
# dispt1 dispt_fed_49.txt 0.280 0.052
# dispt2 dispt_fed_50.txt 0.177 0.063
# dispt3 dispt_fed_51.txt 0.339 0.090
# dispt4 dispt_fed_52.txt 0.270 0.024
# dispt5 dispt_fed_53.txt 0.303 0.054
# dispt6 dispt_fed_54.txt 0.245 0.059