我试图查找文本中最常用的单词。在我的程序中,判断系统(PC ^ 2)将输入一些单词,分隔符和一些文本。单词和文本由" -----"分开。 (我需要在我的程序中搜索最常用的单词)。
apple
an
banana
-----
i like an apple.
i also like apple-pie.
i like banana
i like banana+ice.
apple 2
banana 2
strcmp
顺序然而,虽然该程序在我测试时运行良好。我从PC ^ 2获得超时限制(TLE)。因此,我想问一下如何才能提高程序的时间复杂度?在显示最频繁的单词及其频率之前,我甚至得到了TLE。
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
char tmp[2048];
char **dicWord;
int *dicWcount;
int dic_assume_num = 1000, dic_actual_num = 0;
int main()
{
char divider[6] = "-----";
ssize_t bytes_read;
size_t nbytes = 2047;
char *my_string;
// first, get the input of words
dicWord = malloc( dic_assume_num * sizeof( *dicWord ));
my_string = malloc((nbytes + 1) * sizeof(*my_string));
while ((bytes_read = getline (&my_string, &nbytes, stdin)) != -1) {
my_string[strcspn(my_string, "\n")] = '\0';
if (strncmp(my_string, divider, 5) == 0) {
dicWcount = calloc(dic_actual_num+1, sizeof(*dicWcount));
break;
}
else {
dicWord[dic_actual_num++] = strdup(my_string);
if (dic_actual_num >= dic_assume_num) {
dic_assume_num *= 2;
dicWord = realloc( dicWord, dic_assume_num * sizeof( *dicWord ));
}
}
}
// second, get the input of text
int cnt;
for (;;) {
scanf("%*[^a-zA-Z0-9_]");
cnt = scanf("%2047[a-zA-Z0-9_]", tmp);
if (cnt != 1) {
break;
}
for ( size_t i = 0; i < dic_actual_num; ++i) {
if (strcmp(dicWord[i], tmp) == 0) {
dicWcount[i]++;
break;
}
}
}
for ( size_t i = 0; i < dic_actual_num; ++i)
free(dicWord[i]);
free(dicWord);
free(dicWcount);
free(my_string);
return 0;
}
答案 0 :(得分:0)
这个想法很简单:
最需要的时间是用于存储单词的数据结构。平衡的树(具有最高计数器的词作为根)应该没问题。
我认为不可能减少所需的时间。
答案 1 :(得分:0)
以下代码:
现在建议的代码:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
struct myData
{
char word[1026];
size_t count;
};
struct myData dicWord[ 1000 ]; // may need to increase the 1000
int main( void )
{
size_t dic_actual_num = 0;
// first, get the input of words
while ( fgets(dicWord[ dic_actual_num ].word, 1026, stdin) )
{
dicWord[ dic_actual_num ].word[ strcspn( dicWord[ dic_actual_num ].word, "\n") ] = '\0';
if (strncmp( dicWord[ dic_actual_num ].word, "-----", 5 ) == 0)
{
break;
}
dic_actual_num++;
}
// second, get the input of text and check for highest count
char tmp[1026]; // +2 for newline and NUL byte
size_t maxCount = 0;
while( 1 == scanf( "%*[^a-zA-Z0-9_] %1024[a-zA-Z0-9_]", tmp ) )
{
for ( size_t i = 0; i < dic_actual_num; ++i)
{
if (strcmp(dicWord[i].word, tmp) == 0)
{
dicWord[i].count++;
if( dicWord[i].count > maxCount )
{
maxCount = dicWord[i].count;
}
break;
}
}
}
// << need to add logic for `strcmp()` sorting
// display answer
for( size_t i = 0; i<dic_actual_num; i++ )
{
if( maxCount == dicWord[i].count )
{
printf( "%s %lu\n", dicWord[i].word, maxCount );
}
}
return 0;
}
编辑:添加了替换“scanf()”
调用的函数警告:此提议的功能尚未经过测试
#include <ctype.h>
#include <stdio.h>
// prototype:
char *getWord( void );
char *getWord()
{
static char newWord[1026];
//memset( newWord, '\0', sizeof( newWord ) );
size_t charCount = 0;
int ch;
// skip leading junk
while( (ch = getchar_unlocked) != EOF
|| '\n' == ch
|| ispunct( ch )
|| isspace( ch ) );
// append desired characters
while( charCount < 1024
&& (ch = getchar_unlocked) != EOF
&& ( '_' == ch || isdigit( ch ) || isalpha( ch ) ) )
{
newWord[ charCount ] = ch;
charCount++;
}
// terminate the string
newWord[ charCount ] = '\0';
return newWord;
}