Question

我正在尝试累计所有文件的数量，但是由于某种原因，它是在计算最后一个文件并在其余分析中使用该编号。如何更改此代码，改为包含所有文件（有51个文件）的计数和唯一计数。

#Move all files to one list
file_list <- list.files(pattern="Dataset 2.*txt")

Read files
for (i in 1:length(file_list)){
file <- read.table(file_list[i], header=TRUE, sep=",")
out.file <- rbind(file)
}

Count total number phone call records
count_PHONECALLRECORDS <- length(out.file$CALLER_ID)

#Count number unique caller id's
count_CALLERID <- length(unique(out.file$CALLER_ID))

Answer 1

这是您需要的更正-

# Read files
out.file <- NULL
for (i in 1:length(file_list)){
  file <- read.table(file_list[i], header=TRUE, sep=",")
  out.file <- rbind(out.file, file)
}

请注意，这种增长数据的方法（即对自身进行绑定）效率不高，但是如果您是初学者，我会说除非您必须担心，否则不必担心。

Answer 2

您应该将计数代码移到循环中，并在循环之前初始化计数变量：

setwd("~/Desktop/GEOG Research/Jordan/compression")
library(plyr)
library(rlang)
library(dplyr)

# Move all files to one list
file_list <- list.files(pattern="Dataset 2.*txt")

# Read files
count_PHONECALLRECORDS <- 0
count_CALLERID <- 0
for (i in 1:length(file_list)){
   file <- read.table(file_list[i], header=TRUE, sep=",")
   out.file <- rbind(file)

   # Count total number phone call records
   count_PHONECALLRECORDS <- count_PHONECALLRECORDS + length(out.file$CALLER_ID)

   # Count number unique caller id's
   count_CALLERID <- count_CALLERID + length(unique(out.file$CALLER_ID))
}

# Construct contingency matrix
tb_1 <- with(out.file, table(CALLEE_PREFIX, CALLER = substr(CALLER_ID, 0, 1)))
colnames(tb_1) <- c("Refugee Caller", "Non-Refugee Caller")
rownames(tb_1) <- c("Refugee Callee", "Non-Refugee Callee", "Unknown Callee")
tb_1

累积计数而不是最后一个文件

2 个答案: