我有一些带有年份名称的文件。我列出了所有文件,并将年份值取为变量名。我想在所有365天的输出文件中创建一列。但是如何给日期序列中具有年份值的变量名呢?
文件名的格式为“ E1901.txt”,“ E1902.txt”,.... 这是脚本
setwd("location")
input_files = list.files(,pattern="[.]txt$")
total = data.frame()
for(i in 1:length(input_files)){
rf = read.csv(input_files[i])
year = regmatches(rf,regexec("E(.+).txt",rf))
year=sapply(year,"[",2)
print(year)
filenm = sub("txt","csv",rf)
date = seq(as.Date(paste(year,"/1/1")), as.Date(paste(year,"/12/31")), "day")
rf$date = date
rf= rf[,c(220,1:219)]
}
cat("\n Finished processing data of ",filenm)
total = do.call("rbind",rf)
write.csv(total, file="1901-2016.csv", row.names=FALSE, col.names=FALSE,sep =",")
此致
答案 0 :(得分:0)
这就是我最喜欢的工具所要做的:
library(data.table)
input_files <- list.files(pattern = "[.]txt$")
years <- stringr::str_extract(input_files, "\\d{4}")
total <- rbindlist(
lapply(input_files, fread),
idcol = "file_id"
)
total[, date := seq(as.Date(paste0(years[file_id],"-01-01")),
as.Date(paste0(years[file_id],"-12-31")), "day"),
by = file_id][
, file_id := NULL]
setcolorder(total, "date")
fwrite(total, "1901-2016.csv")
使用我的伪数据,total
的内容看起来像
date V1 V2 V3 1: 1901-01-01 1901-01-01 1 G 2: 1901-01-02 1901-01-02 2 J 3: 1901-01-03 1901-01-03 3 O 4: 1901-01-04 1901-01-04 4 X 5: 1901-01-05 1901-01-05 5 F --- 42365: 2016-12-27 2016-12-27 362 F 42366: 2016-12-28 2016-12-28 363 P 42367: 2016-12-29 2016-12-29 364 P 42368: 2016-12-30 2016-12-30 365 X 42369: 2016-12-31 2016-12-31 366 N
# create dummy data files (in base R)
if (basename(getwd()) != "location") {
dir.create("location")
setwd("location")
}
set.seed(1L)
lapply(1901:2016, function(year){
V1 <- seq(as.Date(paste0(year,"-01-01")), as.Date(paste0(year,"-12-31")), "day")
V2 <- seq_along(V1)
V3 <- sample(LETTERS, length(V1), TRUE)
write.csv(
data.frame(V1, V2, V3, stringsAsFactors = FALSE),
sprintf("E%4i.txt", year),
row.names = FALSE
)
})