我有一个表格,格式如下。
Category Value
Name_01 10
Name_01 12
Name_01 11
Name_02 12
Name_02 1
Name_03 13
Name_04 11
Name_05 12
Name_06 21
Name_07 3
Name_08 1
Name_09 23
Name_10 1
Name_11 123
Name_12 12
Name_13 1
Name_14 1
Name_15 12
Name_16 1
Name_17 2
Name_18 33
Name_19 21
Name_20 123
Name_21 32
Name_22 23
Name_23 21
我想将表格写入.txt文件,但每个.txt文件最多只能处理20个唯一类别。因此,需要根据类别的总数将表拆分为多个.txt文件。有人知道如何用基础R做到这一点吗?另一个复杂因素是所有.txt文件必须具有编号为1-20的类别。在下面的示例中,我将有一个包含names_01-20的.txt文件,以及包含names_21-23但重命名为names_01-03的第二个.txt文件。
答案 0 :(得分:1)
试试这个:
# split
myList <-
lapply(split(df1, as.numeric(df1$Category) %/% 21 + 1),
function(i){
x <- i
x$Category <- droplevels(x$Category)
x$Category <- as.factor(as.numeric(x$Category))
x
})
# write to csv
lapply(names(myList), function(i) write.csv(myList[[ i ]],
file = paste0(i, ".csv"),
row.names = FALSE))
这将输出2个文件: 1.csv , 2.csv
答案 1 :(得分:1)
另一种解决方案:
maxCategory <- 20
N <- ceiling(1:nrow(data) / maxCategory)
for(i in unique(N)) {
d <- data[N == i, ]
# Get category with 0
foo <- 1:nrow(d)
foo <- ifelse(foo < 10, paste0("0", foo), foo)
d$Category <- paste0("Name_", foo)
# Write text file
write.table(d, paste0("split_", i, ".txt"),
row.names = FALSE, quote = FALSE)
}
输出文件为:split_1.txt
和split_2.txt
。 split_2.txt
看起来像这样:
Category Value
Name_01 32
Name_02 23
Name_03 21
答案 2 :(得分:0)
这是一个整合的解决方案。
library('magrittr')
library('tidyverse')
df <- tribble(
~Category, ~Value,
'Name_01', 10,
'Name_02', 12,
'Name_03', 13,
'Name_04', 11,
'Name_05', 12,
'Name_06', 21,
'Name_07', 3,
'Name_08', 1,
'Name_09', 23,
'Name_10', 1,
'Name_11', 123,
'Name_12', 12,
'Name_13', 1,
'Name_14', 1,
'Name_15', 12,
'Name_16', 1,
'Name_17', 2,
'Name_18', 33,
'Name_19', 21,
'Name_20', 123,
'Name_21', 32,
'Name_22', 23,
'Name_23', 21
)
首先,我们使用parse_number
提取类别ID。我们使用dense_rank(Category)
来获取正在运行的不同类别的数量。我们使用它来每20个不同的类别增加group_id
。我们还会根据群组的最小值/最大值file_name
创建category_id
列。
df2 <- df %>%
mutate(
category_id = parse_number(Category),
group_id = cumsum(dense_rank(Category) %% 20 == 1)) %>%
group_by(group_id) %>%
mutate(file_name = stringr::str_c('names_', min(category_id), '-', max(category_id), '.txt'))
print(df2, n=100)
# # A tibble: 23 x 6
# # Groups: group_id [2]
# Category Value g category_id group_id file_name
# <chr> <dbl> <dbl> <dbl> <int> <chr>
# 1 Name_01 10 1 1 1 names_1-20.txt
# 2 Name_02 12 1 2 1 names_1-20.txt
# 3 Name_03 13 1 3 1 names_1-20.txt
# 4 Name_04 11 1 4 1 names_1-20.txt
# 5 Name_05 12 1 5 1 names_1-20.txt
# 6 Name_06 21 1 6 1 names_1-20.txt
# 7 Name_07 3 2 7 1 names_1-20.txt
# 8 Name_08 1 2 8 1 names_1-20.txt
# 9 Name_09 23 2 9 1 names_1-20.txt
# 10 Name_10 1 2 10 1 names_1-20.txt
# 11 Name_11 123 2 11 1 names_1-20.txt
# 12 Name_12 12 2 12 1 names_1-20.txt
# 13 Name_13 1 2 13 1 names_1-20.txt
# 14 Name_14 1 3 14 1 names_1-20.txt
# 15 Name_15 12 3 15 1 names_1-20.txt
# 16 Name_16 1 3 16 1 names_1-20.txt
# 17 Name_17 2 3 17 1 names_1-20.txt
# 18 Name_18 33 3 18 1 names_1-20.txt
# 19 Name_19 21 3 19 1 names_1-20.txt
# 20 Name_20 123 3 20 1 names_1-20.txt
# 21 Name_21 32 3 21 2 names_21-23.txt
# 22 Name_22 23 3 22 2 names_21-23.txt
# 23 Name_23 21 3 23 2 names_21-23.txt
现在,我们可以nest
将原始列放入列表列。
df2 <- df2 %>%
group_by(group_id, file_name) %>%
nest(Category, Value)
print(df2, n=100)
# # A tibble: 2 x 3
# group_id file_name data
# <int> <chr> <list>
# 1 1 names_1-20.txt <tibble [20 x 2]>
# 2 2 names_21-23.txt <tibble [3 x 2]>
然后我们walk
通过每个data
+ file_name
对,并使用write_delim
输出每个文件。
df2 %$%
walk2(
.$data,
.$file_name,
write_delim)
我们可以将上述所有步骤合并到一个管道中。
df %>%
mutate(
category_id = parse_number(Category),
group_id = cumsum(dense_rank(Category) %% 20 == 1)) %>%
group_by(group_id) %>%
mutate(file_name = stringr::str_c('names_', min(category_id), '-', max(category_id), '.txt')) %>%
group_by(group_id, file_name) %>%
nest(Category, Value) %$%
walk2(
.$data,
.$file_name,
write_delim)