如何计算仅以大写字母开头的单词?
请参阅本演示中的示例代码:
https://codeforwin.org/2018/02/c-program-count-occurrences-of-all-words-a-file.html
代码示例:
https://onlinegdb.com/HJgWn-K2E
/**
* C program to count occurrences of all words in a file.
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#define MAX_WORDS 50
char *strlwr(char *str)
{
unsigned char *p = (unsigned char *)str;
while (*p) {
*p = tolower((unsigned char)*p);
p++;
}
return str;
}
int main()
{
FILE *fptr;
char path[100];
int i, len, index, isUnique;
// List of distinct words
char words[MAX_WORDS][50];
char word[50];
// Count of distinct words
int count[MAX_WORDS];
/* Input file path */
printf("Enter file path: ");
scanf("%s", path);
/* Try to open file */
fptr = fopen(path, "r");
/* Exit if file not opened successfully */
if (fptr == NULL)
{
printf("Unable to open file.\n");
printf("Please check you have read privileges.\n");
exit(EXIT_FAILURE);
}
// Initialize words count to 0
for (i=0; i<MAX_WORDS; i++)
count[i] = 0;
index = 0;
while (fscanf(fptr, "%s", word) != EOF)
{
// Convert word to lowercase
strlwr(word);
// Remove last punctuation character
len = strlen(word);
if (ispunct(word[len - 1]))
word[len - 1] = '\0';
// Check if word exits in list of all distinct words
isUnique = 1;
for (i=0; i<index && isUnique; i++)
{
if (strcmp(words[i], word) == 0)
isUnique = 0;
}
// If word is unique then add it to distinct words list
// and increment index. Otherwise increment occurrence
// count of current word.
if (isUnique)
{
strcpy(words[index], word);
count[index]++;
index++;
}
else
{
count[i - 1]++;
}
}
// Close file
fclose(fptr);
/*
* Print occurrences of all words in file.
*/
printf("\nOccurrences of all distinct words in file: \n");
for (i=0; i<index; i++)
{
/*
* %-15s prints string in 15 character width.
* - is used to print string left align inside
* 15 character width space.
*/
printf("%-15s %d\n", words[i], count[i]);
}
return 0;
}
在此代码示例中,它们使所有单词都变为小写字母,然后计算所有这些单词。
相反:如果单词以大写字母开头,那么您如何仅将单词添加到唯一列表中,然后计算该单词的所有出现次数
是否应将 fscanf 与 if(isupper [0])
Test.txt文件
Any girl jumped over one boy.
Some car skipped to some boy.
One town drove over the town.
Any town ran under some dog.
Some girl drove to a town.
The boy walked under any town.
A town jumped over any car.
Any boy jumped from a car.
A dog ran over a boy.
A girl ran to some car.
A car ran under the girl.
The car ran on any town.
One dog walked under any dog.
A car jumped on some town.
A boy ran to a boy.
The dog drove over a boy.
A boy jumped over the car.
Some car drove on some girl.
One boy drove under some girl.
A girl walked over some dog.
预期输出:
Any 7
Some 3
One 4
The 6
A 8
当前输出:
any 7
girl 7
jumped 5
over 7
one 4
boy 10
some 10
car 9
skipped 1
to 4
town 8
drove 5
the 6
ran 6
under 5
dog 6
a 13
walked 3
from 1
on 3
可能的解决方案:
// skip the word if it does not contain a capital letter at start
if (islower(word[0])) {
continue;
}
然后创建另一个 FOR 循环,该循环检查这些单词以小写或大写
答案 0 :(得分:1)
您的工作方向正确,只需要重新排列一下代码即可。虽然您可以使用单独的数组来尝试跟踪每个唯一的单词,但同时将该索引与一个数组中的索引(该单词的出现次数)和另一个数组中的同一索引(该单词在文件中的首字母还是大写)进行协调。不是-但是有更好的方法。
每当必须将不同类型的数据作为一个单元进行协调时,都需要考虑struct
。使用struct
,您可以协调每个唯一单词,该单词在文件中是否显示为大写形式以及单个单词出现的次数(不区分大小写),例如
typedef struct { /* use a struct to hold */
char word[MAX_WORD]; /* lowercase word, and */
int cap, count; /* if it appeast capitalized, and its count */
} words_t;
现在,您可以简单地创建一个words_t
数组,并能够将每个word
(小写)添加为结构中的单词,捕获它是否曾经cap
显示为斜体并且总共count
次发生。
这简化了代码中的逻辑。现在,您只需声明一个words_t
的数组,例如
#define MAX_WORD 50 /* max word size */
#define MAX_WORDS 512 /* max number of words */
...
/* Array of struct of distinct words, initialized all zero */
words_t words[MAX_WORDS] = {{ .word = "" }};
您要求输入文件名- 验证每个用户输入 ,例如
/* Input file path */
printf ("Enter file path: ");
if (scanf ("%s", path) != 1) { /* validate every input */
fputs ("error: invalid file path or cancellation.\n", stderr);
return 1;
}
现在遍历您的单词-在保护数组边界以及读取每个单词的同时:
while (index < MAX_WORDS && /* protect array bounds */
fscanf (fptr, "%s", word) == 1) { /* while valid word read */
现在,存储和跟踪哪些单词大写的逻辑的症结开始发挥作用。首先,您需要一个标志来测试该词是否大写以捕获 信息,然后再将该词转换为小写(以及isunique
标志)。您只需使用isupper()
测试第一个字符即可捕获单词是否大写,例如
int iscap = 0, isunique = 1; /* is captial, is unique flags */
if (isupper (*word)) /* is the word uppercase */
iscap = 1;
不仅可以修剪单个标点符号,还可以在将单词转换为小写字母之前轻松地修剪 all 标点符号
/* remove all trailing punctuation characters */
len = strlen (word); /* get length */
while (len && ispunct(word[len - 1])) /* only if len > 0 */
word[--len] = 0;
strlwr (word); /* convert word to lowercase */
现在,从数据存储的角度来看,剩下的就是循环以确定单词isunique
,如果不是,只需设置cap
标志,如果{ {1}}为TRUE,并增加计数。如果它是唯一的,则退出循环后,将单词复制到数组中的新元素(使用iscap
即可,无需重新扫描),然后对memcpy
和cap
做同样的事情,完成后递增count
,
index
剩下的一切就是遍历数组中存储的元素,并检查 /* check if word exits in list of all distinct words */
for (i = 0; i < index; i++) {
if (strcmp(words[i].word, word) == 0) {
isunique = 0; /* set unique flag zero */
if (iscap) /* if capital flag set */
words[i].cap = iscap; /* set capital flag in struct */
words[i].count++; /* increment word count */
break; /* bail - done */
}
}
if (isunique) { /* if unique, add to array, increment index */
memcpy (words[index].word, word, len + 1); /* have len */
if (iscap) /* if cap flag set */
words[index].cap = iscap; /* set capital flag in struct */
words[index++].count++; /* increment count & index */
}
}
fclose (fptr); /* close file */
成员以确定该单词在打印之前是否显示为大写以及出现的次数(注意:您的预期输出:根据您的示例文字,计数错误)
.cap
(注意: /*
* Print occurrences of all words in file.
*/
puts ("\nOccurrences of all distinct words with Cap in file:");
for (i = 0; i < index; i++) {
if (words[i].cap) {
strcpy (word, words[i].word);
*word = toupper (*word);
/*
* %-15s prints string in 15 character width.
* - is used to print string left align inside
* 15 character width space.
*/
printf("%-15s %d\n", word, words[i].count);
}
}
使用puts
代替printf
,因为不需要转换...一个好的编译器将对此进行优化您)
完全将其放入,您可以这样做:
"Occurrences of..."
使用/输出示例
使用您发布的输入内容
/**
* C program to count occurrences of all words in a file.
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <limits.h>
#define MAX_WORD 50 /* max word size */
#define MAX_WORDS 512 /* max number of words */
#ifndef PATH_MAX
#define PATH_MAX 2048 /* max path (defined for Linux in limits.h) */
#endif
typedef struct { /* use a struct to hold */
char word[MAX_WORD]; /* lowercase word, and */
int cap, count; /* if it appeast capitalized, and its count */
} words_t;
char *strlwr (char *str) /* no need for unsigned char */
{
char *p = str;
while (*p) {
*p = tolower(*p);
p++;
}
return str;
}
int main (void) {
FILE *fptr;
char path[PATH_MAX], word[MAX_WORD];
size_t i, len, index = 0;
/* Array of struct of distinct words, initialized all zero */
words_t words[MAX_WORDS] = {{ .word = "" }};
/* Input file path */
printf ("Enter file path: ");
if (scanf ("%s", path) != 1) { /* validate every input */
fputs ("error: invalid file path or cancellation.\n", stderr);
return 1;
}
fptr = fopen (path, "r"); /* open file */
if (fptr == NULL) { /* validate file open */
fputs ( "Unable to open file.\n"
"Please check you have read privileges.\n", stderr);
exit (EXIT_FAILURE);
}
while (index < MAX_WORDS && /* protect array bounds */
fscanf (fptr, "%s", word) == 1) { /* while valid word read */
int iscap = 0, isunique = 1; /* is captial, is unique flags */
if (isupper (*word)) /* is the word uppercase */
iscap = 1;
/* remove all trailing punctuation characters */
len = strlen (word); /* get length */
while (len && ispunct(word[len - 1])) /* only if len > 0 */
word[--len] = 0;
strlwr (word); /* convert word to lowercase */
/* check if word exits in list of all distinct words */
for (i = 0; i < index; i++) {
if (strcmp(words[i].word, word) == 0) {
isunique = 0; /* set unique flag zero */
if (iscap) /* if capital flag set */
words[i].cap = iscap; /* set capital flag in struct */
words[i].count++; /* increment word count */
break; /* bail - done */
}
}
if (isunique) { /* if unique, add to array, increment index */
memcpy (words[index].word, word, len + 1); /* have len */
if (iscap) /* if cap flag set */
words[index].cap = iscap; /* set capital flag in struct */
words[index++].count++; /* increment count & index */
}
}
fclose (fptr); /* close file */
/*
* Print occurrences of all words in file.
*/
puts ("\nOccurrences of all distinct words with Cap in file:");
for (i = 0; i < index; i++) {
if (words[i].cap) {
strcpy (word, words[i].word);
*word = toupper (*word);
/*
* %-15s prints string in 15 character width.
* - is used to print string left align inside
* 15 character width space.
*/
printf("%-15s %d\n", word, words[i].count);
}
}
return 0;
}
(注意:$ ./bin/unique_words_with_cap
Enter file path: dat/girljumped.txt
Occurrences of all distinct words with Cap in file:
Any 7
One 4
Some 10
The 6
A 13
出现了"Some/some"
次,10
出现了"A/a"
次,而不是您的< strong>预期输出:,您可以通过简单的计数来确认)
仔细检查一下,如果还有其他问题,请告诉我。