我有一个程序可以对文本中的字母和字符进行计数,但它无法正常工作。从下面的示例中可以看出,程序不计算列索引。我不知道如何在行中的最后一个字母(字符)之后设置程序以转到下一行并从1开始计数。你能帮助我吗?我有两个不同的程序,一个不计行。
library(tidyverse)
library(stringr)
str_split(text, "")[[1]] %>%
tibble(char = .) %>%
rownames_to_column('rn') %>%
group_by(char) %>%
summarise(Count = n(), row = 1, col = toString(rn))
unique(unlist(strsplit(text, ""))) %>%
sapply(gregexpr, text, fixed = TRUE) %>%
unlist() %>%
data.frame(letter = names(.), col = .) %>%
group_by(letter = gsub("^(.).*", "\\1", letter)) %>%
summarize(count = n(),
col = paste(col, collapse = ","))
text <- "Only two things are infinite,
the universe and human stupidity,
and I'm not sure about the former. (A. Einstein)"
# A tibble: 31 x 4
char Count row col
<chr> <int> <dbl> <chr>
1 ' 1 1 72
2 18 1 5, 9, 16, 20, 30, 35, 44, 48, 54, 65, 70, 74, 78, 83, 89, 93, 101, 105
3 "\n" 2 1 31, 66
4 ( 1 1 102
5 ) 1 1 114
6 , 2 1 29, 64
7 . 2 1 100, 104
8 a 5 1 17, 45, 52, 67, 84
9 A 1 1 103
10 b 1 1 85
# ... with 21 more rows
答案 0 :(得分:1)
你可以尝试
library(tidyverse)
library(stringr)
library(stringi)
data.frame(Text=str_split(text, pattern = "\n")[[1]]) %>%
group_by(n=row_number()) %>%
mutate(letters=sum(stri_count_fixed(tolower(Text), letters))) %>%
mutate(marks=sum(stri_count_regex(tolower(Text), "[[:punct:]]"))) %>%
mutate(spaces=sum(stri_count_regex(tolower(Text),"[[:blank:]]")))
# A tibble: 3 x 5
# Groups: n [3]
Text n letters marks spaces
<fctr> <int> <int> <int> <int>
1 Only two things are infinite, 1 24 1 5
2 the universe and human stupidity, 2 28 1 5
3 and I'm not sure about the former. (A. Einstein) 3 35 5 8