我正在尝试计算文本块中的字母或三个字母序列的数量。我已经有一些代码可以使用2d数组成功计算出双字母(2个字母序列)的数量,但是在将其更改为接受三字母组时遇到了一些麻烦。
#include <stdio.h>
int main(void) {
int count['z' - 'a' + 1]['z' - 'a' + 1] = {{ 0 }};
int c0 = EOF, c1;
FILE *plain = fopen("filename.txt", "r");
if (plain != NULL) {
while ((c1 = getc(plain)) != EOF) {
if (c1 >= 'a' && c1 <= 'z' && c0 >= 'a' && c0 <= 'z') {
count[c0 - 'a'][c1 - 'a']++;
}
c0 = c1;
}
fclose(plain);
for (c0 = 'a'; c0 <= 'z'; c0++) {
for (c1 = 'a'; c1 <= 'z'; c1++) {
int n = count[c0 - 'a'][c1 - 'a'];
if (n) {
printf("%c%c: %d\n", c0, c1, n);
}
}
}
}
return 0;
}
编辑:这是我已经尝试过的代码。我希望将2d数组扩展为3d数组,但这什么也没返回。
#include <stdio.h>
int main(void) {
int count['z' - 'a' + 1]['z' - 'a' + 1]['z' - 'a' + 1] = {{{ 0 }}};
int c0 = EOF, c1, c2;
FILE *plain = fopen("filename.txt", "r");
if (plain != NULL) {
while ((c1 = getc(plain)) != EOF) {
if (c1 >= 'a' && c1 <= 'z' && c0 >= 'a' && c0 <= 'z' && c2 >= 'a' && c2 <= 'z') {
count[c0 - 'a'][c1 - 'a'][c2 - 'a']++;
}
c0 = c1;
c1 = c2;
}
fclose(plain);
for (c0 = 'a'; c0 <= 'z'; c0++) {
for (c1 = 'a'; c1 <= 'z'; c1++) {
for (c2 = 'a'; c2 <= 'z'; c2++) {
int n = count[c0 - 'a'][c1 - 'a'][c2 - 'a'];
if (n) {
printf("%c%c%c: %d\n", c0, c1, c2, n);
}
}
}
}
}
return 0;
}
例如,此代码打印所有出现的双字母组,例如aa,ab,ac等。但是我需要它来计算aaa,aab,... zzz的出现。任何帮助将不胜感激!
编辑2:现在,它可以成功打印正确的输出,但是需要以降序排列(最常用的三字母组合在顶部)
答案 0 :(得分:0)
如果要对垃圾箱进行分类,则需要更多信息;在这段代码中,order
指针数组一对一地指向所有三元组。但是,使用原始代码,很难确定哪个三联词与O(1)
中的哪个索引有关。因此,我已经打包并展平了数组,从而确保从数组索引到三字母组的整个空间(其中有17576个)存在双射。
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
struct Tri { const char c0, c1, c2; };
static size_t tri_to_idx(const struct Tri tri) {
return (tri.c0 - 'a') * ('z' - 'a' + 1) * ('z' - 'a' + 1)
+ (tri.c1 - 'a') * ('z' - 'a' + 1)
+ (tri.c2 - 'a');
}
static struct Tri idx_to_tri(const size_t idx) {
struct Tri tri = {
idx / (('z' - 'a' + 1) * ('z' - 'a' + 1)) + 'a',
(idx % (('z' - 'a' + 1) * ('z' - 'a' + 1))) / ('z' - 'a' + 1) + 'a',
idx % ('z' - 'a' + 1) + 'a' };
assert(tri.c0 >= 'a' && tri.c0 <= 'z'
&& tri.c1 >= 'a' && tri.c1 <= 'z'
&& tri.c2 >= 'a' && tri.c2 <= 'z');
return tri;
}
static const char *tri_to_str(const struct Tri tri) {
static char str[4];
str[0] = tri.c0, str[1] = tri.c1, str[2] = tri.c2, str[3] = '\0';
return str;
}
static int int_reverse_cmp(const int *const a, const int *const b) {
return (*a < *b) - (*b < *a);
}
static int compar(const void *a, const void *b) {
return int_reverse_cmp(*(int **)a, *(int **)b);
}
int main(void) {
const char *const fn = "filename.txt";
int count[('z' - 'a' + 1) * ('z' - 'a' + 1) * ('z' - 'a' + 1)] = { 0 };
int *order[('z' - 'a' + 1) * ('z' - 'a' + 1) * ('z' - 'a' + 1)];
int c0 = EOF, c1 = EOF, c2;
size_t i;
FILE *plain = fopen(fn, "r");
if(!plain) return perror(fn), EXIT_FAILURE;
while ((c2 = getc(plain)) != EOF) {
if (c1 >= 'a' && c1 <= 'z'
&& c0 >= 'a' && c0 <= 'z'
&& c2 >= 'a' && c2 <= 'z') {
struct Tri tri = { c0, c1, c2 };
count[tri_to_idx(tri)]++;
}
c0 = c1;
c1 = c2;
}
fclose(plain);
for (c0 = 'a'; c0 <= 'z'; c0++) {
for (c1 = 'a'; c1 <= 'z'; c1++) {
for (c2 = 'a'; c2 <= 'z'; c2++) {
struct Tri tri = { c0, c1, c2 };
order[tri_to_idx(tri)] = &count[tri_to_idx(tri)];
}
}
}
qsort(order, sizeof order / sizeof *order, sizeof *order, &compar);
for(i = 0; i < ('z' - 'a' + 1) * ('z' - 'a' + 1) * ('z' - 'a' + 1)
&& *order[i]; i++) {
printf("%s: %d\n", tri_to_str(idx_to_tri(order[i] - count)), *order[i]);
}
return EXIT_SUCCESS;
}
现在,可以对指向数组的指针进行排序,并且仍然可以通过三元组的索引order[i] - count
中的printf
将三元组返回三元组。