任何帮助将不胜感激。
我使用以下代码来分解我的大型csv文件(4gb),现在我试图将第2,第3 ......部分保存到csv中。但是,我只能访问我的第一个数据块。
我的代码有什么问题吗? 如何将我的第二块数据保存到csv?
rgfile <- 'filename.csv'
index <- 0
chunkSize <- 100000
con <- file(description = rgfile, open="r")
dataChunk <- read.table(con, nrows= chunkSize, header=T, fill= TRUE, sep= ",")
actualColumnNames <- names(dataChunk)
repeat {
index <- index + 1
print(paste('Processing rows:', index * chunkSize))
if (nrow(dataChunk) != chunkSize){
print('Processed all files!')
break
}
dataChunk <- read.table(
con, nrows = chunkSize, skip=0, header = FALSE,
fill=TRUE, sep = ",", col.names=actualColumnNames
)
break
}
答案 0 :(得分:1)
library(tidyverse)
library(nycflights13)
# make the problelm reproducible
rgfile <- 'flights.csv'
write_csv(flights, rgfile)
# now, get to work
lines <- as.numeric(R.utils::countLines(rgfile))
chunk_size <- 100000
hdr <- read_csv(rgfile, n_max=2)
fnum <- 1
for (i in seq(1, lines, chunk_size)) {
suppressMessages(
read_csv(
rgfile, col_names=colnames(hdr), skip=(i-1), n_max=chunk_size
)
) -> x
if (i>1) colnames(x) <- colnames(hdr)
write_csv(x, sprintf("file%03d.csv", fnum))
fnum <- fnum + 1
}