我正在尝试累计所有文件的数量,但是由于某种原因,它是在计算最后一个文件并在其余分析中使用该编号。如何更改此代码,改为包含所有文件(有51个文件)的计数和唯一计数。
#Move all files to one list
file_list <- list.files(pattern="Dataset 2.*txt")
Read files
for (i in 1:length(file_list)){
file <- read.table(file_list[i], header=TRUE, sep=",")
out.file <- rbind(file)
}
Count total number phone call records
count_PHONECALLRECORDS <- length(out.file$CALLER_ID)
#Count number unique caller id's
count_CALLERID <- length(unique(out.file$CALLER_ID))
答案 0 :(得分:2)
这是您需要的更正-
# Read files
out.file <- NULL
for (i in 1:length(file_list)){
file <- read.table(file_list[i], header=TRUE, sep=",")
out.file <- rbind(out.file, file)
}
请注意,这种增长数据的方法(即对自身进行绑定)效率不高,但是如果您是初学者,我会说除非您必须担心,否则不必担心。
答案 1 :(得分:0)
您应该将计数代码移到循环中,并在循环之前初始化计数变量:
setwd("~/Desktop/GEOG Research/Jordan/compression")
library(plyr)
library(rlang)
library(dplyr)
# Move all files to one list
file_list <- list.files(pattern="Dataset 2.*txt")
# Read files
count_PHONECALLRECORDS <- 0
count_CALLERID <- 0
for (i in 1:length(file_list)){
file <- read.table(file_list[i], header=TRUE, sep=",")
out.file <- rbind(file)
# Count total number phone call records
count_PHONECALLRECORDS <- count_PHONECALLRECORDS + length(out.file$CALLER_ID)
# Count number unique caller id's
count_CALLERID <- count_CALLERID + length(unique(out.file$CALLER_ID))
}
# Construct contingency matrix
tb_1 <- with(out.file, table(CALLEE_PREFIX, CALLER = substr(CALLER_ID, 0, 1)))
colnames(tb_1) <- c("Refugee Caller", "Non-Refugee Caller")
rownames(tb_1) <- c("Refugee Callee", "Non-Refugee Callee", "Unknown Callee")
tb_1