我是R的新手。我正在尝试为每个文本文件找到单词频率,并将每个文本文件的输出字频保存到具有相同名称的csv文件中。
Ahja_Shaank_OP.txt to Ahja_Shaank_OP.csv
Bhjg_Dhaenk_OP.txt to Bhjg_Dhaenk_OP.csv
.
.
500 files
pacman::p_load(pacman, tm, SnowballC, dplyr)
infiles <- dir(pattern='\\.txt$')
#docs <- Corpus(DirSource("D:\\PavanSOP\\txt"))
change.files <- function(file){
bookJE <- readLines(file)
# Preliminary corpus
corpusJE <- Corpus(VectorSource(bookJE)) %>%
tm_map(removePunctuation) %>%
#tm_map(removeNumbers) %>%
tm_map(content_transformer(tolower)) %>%
tm_map(removeWords, stopwords("english")) %>%
tm_map(stripWhitespace)
#tm_map(stemDocument)
# Create term-document matrices & remove sparse terms
tdmJE <- DocumentTermMatrix(corpusJE) %>%
removeSparseTerms(1 - (1/length(corpusJE)))
# Calculate and sort by word frequencies
word.freqJE <- sort(colSums(as.matrix(tdmJE)),
decreasing = T)
# Create frequency table
tableJE <- data.frame(word = names(word.freqJE),
absolute.frequency = word.freqJE,
relative.frequency =
word.freqJE/length(word.freqJE))
# Remove the words from the row names
rownames(tableJE) <- NULL
# Show the 10 most common words
head(tableJE, 10)
#write.csv(file)
#write.table(x, file = $file.csv, sep = ",", col.names = NA,
qmethod = "double")
write.table(data, quote=FALSE, sep=", ", sub("\\.txt$",".csv", file))
}
lapply(infiles , change.files)
如何提取文件名并将其保存为csv?
知道为什么这不起作用吗?
write.table(data, quote=FALSE, sep=", ", sub("\\.txt$",".csv", file))
请帮助。感谢。