我获得了一些使用情况统计数据。其数月的数据分成多个缺少标题的TSV文件;
07-01-2017_01.tsv
07-01-2017_02.tsv
07-02-2017_01.tsv
07-02-2017_02.tsv
07-03-2017_01.tsv
07-03-2017_02.tsv
07-04-2017_01.tsv
07-04-2017_02.tsv
07-04-2017_03.tsv
我想每天合并数据,添加标题并将其导出为CSV格式。我设法用以下代码做了1天,但我想知道是否有任何方法可以自动化它,这样我就不需要为每个月的每一天运行代码。
data_part1 <- read.delim("~/07-01-2017_01.tsv", header = FALSE, sep = "\t", quote = "", stringsAsFactors=FALSE)
data_part2 <- read.delim("~/07-01-2017_02.tsv", header = FALSE, sep = "\t", quote = "", stringsAsFactors=FALSE)
data_merged <- rbind(data_part1, data_part2)
names(data_merged) <-
c(
"post_visid_high",
"post_visid_low",
"quarterly_visitor",
"visid_timestamp",
"visid_type",
"visit_keywords",
"visit_num",
"visit_page_num",
"visit_ref_domain",
"visit_ref_type",
"visit_referrer",
"visit_search_engine",
"visit_start_page_url",
"visit_start_pagename",
"visit_start_time_gmt",
)
write.csv(data_merged, "~/07-01-2017_02.csv")
预期产出
07-01-2017_merged.csv
07-02-2017_merged.csv
07-03-2017_merged.csv
07-04-2017_merged.csv
答案 0 :(得分:0)
您可以这样做:
setwd("path")
temp = list.files(pattern = "*.tsv")
allCsv <- data.frame()
for(file in 1:length(temp)){
currFile <- temp[file]
k <- read.tsv(currFile)
allCsv <- rbind(allCsv,k)
}
colnames(allCsv) =c(
"post_visid_high",
"post_visid_low",
"quarterly_visitor",
"visid_timestamp",
"visid_type",
"visit_keywords",
"visit_num",
"visit_page_num",
"visit_ref_domain",
"visit_ref_type",
"visit_referrer",
"visit_search_engine",
"visit_start_page_url",
"visit_start_pagename",
"visit_start_time_gmt",
)
write.csv(allCsv, "outputPath")
答案 1 :(得分:0)
您只需根据具体情况调整列表解决方案即可。 for
循环正常工作:
all_files = list.files(pattern = "\\.tsv$")
output_path = "/path/to/output_dir/"
cnames = c(
"post_visid_high",
"post_visid_low",
"quarterly_visitor",
"visid_timestamp",
"visid_type",
"visit_keywords",
"visit_num",
"visit_page_num",
"visit_ref_domain",
"visit_ref_type",
"visit_referrer",
"visit_search_engine",
"visit_start_page_url",
"visit_start_pagename",
"visit_start_time_gmt"
)
all_files = data.frame(fn = all_files, date = sub(pattern = "_[0-9]+\\.tsv", replacement = "", x = all_files))
for (d in unique(all_files$date)) {
data_list = lapply(all_files$fn[all_files$date == d],
read.delim, header = FALSE, sep = "\t",
quote = "", stringsAsFactors = FALSE
)
merged_data = do.call(rbind, data_list)
names(merged_data) = cnames
write.csv(merged_data, paste0(output_path, d, "_merged.csv"))
}
如果您想提高速度,请使用library(data.table)
并将read.delim
替换为fread
,将do.call(rbind...)
替换为rbindlist(data_list)
,将write.csv
替换为fwrite
protected void btnsubmit_Click(object sender, EventArgs e)
{
i = Int32.Parse(lblStart.Text);
i++;
lblStart.Text = i.ToString();
}
。