C编程;通过有限状态机程序识别字符模式

时间:2018-03-06 20:31:08

标签: c pointers state-machine

我在C中构建一个模式识别器程序,它读入用户定义的字符串和4个字符的用户定义模式。然后程序有一个函数来确定是否找到了一个模式以及最初找到模式(根据输入文本索引)。

我知道这对大多数人来说都是基本的,我只希望能够迅速成为一名更熟练的程序员。当我执行我的程序时,它会以我不理解的方式陷入无限循环。

我知道问题出在我的FindMatch函数上,而不是读入输入文本和模式。我的FindMatch函数有什么问题?!请帮忙。

#include <stdio.h>
#include <stdlib.h>

char *getCharBlock(int *size);
int findmatchA(char *text, char *pattern, int tsize, int psize);
void printIt(char *ptr, int index, int size);

int main(){
   char *text, *pattern; //pointers for the characters you will read
   char *p,*q,*r; //some pointer variables
   int tsize,psize,x,y,z; //some integers

   printf("Please input a sequence of character text (characters only will be stored):");
   text = getCharBlock(&tsize);

   printf(" Now input the pattern you seek to search for: ");
   pattern = getCharBlock(&psize);

   x = findmatch(text,pattern,tsize, psize);
   if(x== -1){
      printf("No Match Found \n");
      printf("No starting position for match exists \n");
   }
   else{
      printf("Match Has Been Found! \n");
      printf("Match starting position at index %d \n", x);
      printf("Remaining text after Match: \n");
      printIt(text, x+psize, tsize);
   }


   free(text);
   free(pattern);
}

char *getCharBlock(int *size){
   char *input = (char*) malloc (80*sizeof(char));
   char a;
   int i = 0;
   a = getchar();
   while(i<80 && a!= '\n'){

      if( (a>= 'a' && a <= 'z') || (a>= 'A' && a <= 'Z') ){
         *(input + i) = a;
         i++;
      }
      a = getchar();
   }
   *size = i;

   return input;
 }

 int findmatch(char *text, char *pattern, int tsize, int psize) {
   int index = 0;
   int state = 0;
   while (psize <= tsize) {

     if ((*(text + index) == *pattern) && state == 0){
         state = 1;
         index++;
          printf( "test 1 \n");
     }
     else if ((*(text + index) != *pattern) && state == 0){
          state = 0;
          index++;
          printf( "test1.1 \n");
     }
     else if (*(text + index) == *(pattern + 1) && state ==1) {
       state = 2;
       index++;
       printf( "test 2 \n");
     }
     else if (*(text + index) != *(pattern + 1) && state ==1) {
        state = 0;
        printf("test 2.2 \n");
     }
     else if (*(text + index) == *(pattern + 2) && state ==2) {
        state  = 3;
        printf("test 3 \n");
     }
     else if (*(text + index) != *(pattern + 2) && state ==2) {
        state  = 0;
        printf("test 3.3 \n");
     }
     else if (*(text + index) == *(pattern + 3) && state ==3) {
        state = 4;
        printf("test 4 \n");
     }
     else if (*(text + index) != *(pattern + 3) && state ==3) {
        state = 0;
        printf("test 4.4 \n");
     }
     else {
         return -1;
     }
     index++;
   }
   return index;
   }

4 个答案:

答案 0 :(得分:1)

  

我知道这对你们大多数人来说都是基本的,我希望能够很快   成为一个更熟练的程序员

祝你好运,我给你我的建议。

一些问题阻止程序正常运行。

  

它以我不喜欢的方式陷入无限循环   理解。

1)你永远在这里循环:

`while (psize <= tsize) {`

psize永远不会改变,永远不会达到tsize,循环永远不会结束。

但这不是唯一的问题。

2)textpattern的输入字符串未被'\0'.终止注意: malloc不是calloc。分配的内存可以包含任何内容! 要谨慎,你应该检查,如果记忆分配得当。

3)并非所有州都正确推进index变量:

 else if (*(text + index) == *(pattern + 2) && state ==2) {
    state  = 3;             // sg! index++; is missing!
    printf("test 3 \n");
 }
 else if (*(text + index) != *(pattern + 2) && state ==2) {
    state  = 0;
    printf("test 3.3 \n");
 }

这可能会妨碍正确的模式匹配。

4)未完成输入验证,例如:您应确保模式的长度恰好为4个字符。

如果我被允许只给你一个建议,那就是:"Never, ever use if-else chain!"。将其替换为switch-case-break构造。

您的int findmatch就是一个很好的例子。 if-else链创建丛林难以调试。你的州非常相似,应该创造和谐。 他们不是。您的功能可以用更简单的功能替换:

int findmatch(char *text, char *pattern, int tsize, int psize) { 
   int index = 0;
   int state = 0;       
   printf("Text=<%s> pattern=<%s> tsize=%d psize=%d \n",text, pattern, tsize, psize);
   while (index <= tsize) {

       switch (state)
       {
           case 0:
                state = next_state(text,pattern, "test 1", "test1.1", &index, 0, 1, 1);
            break;                
            case 1:  // pattern[0]          matched
                state = next_state(text, pattern, "test 2", "test2.2", &index, 1, 2, 0);
            break;
            case 2:  // pattern [0] [1]     matched
                state = next_state(text, pattern, "test 3", "test3.3", &index, 2, 3, 0);
            break;                
            case 3:  // pattern [0] [1] [2] matched
                state = next_state(text, pattern, "test 4", "test4.4", &index, 3, 4, 0);
            break;                
            case 4:
                printf("DONE, index = %d \n",index);
                return index;
            break;                
            default:
                printf("We should not be here! \n");
            break;
       } // case
   } // while       
   return -1;
}

编程就像创作音乐或绘画一样。你的创作应该很美。有和谐,平衡。

这是您学习的工作计划。

#include <stdio.h>
#include <stdlib.h>

char *getCharBlock(int *size);
int findmatch(char *text, char *pattern, int tsize, int psize);
void printIt(char *ptr, int index, int size);

void printIt(char *ptr, int index, int size)
{
}

int main(void){
   char *text, *pattern;    // pointers for the characters you will read
   int tsize,psize,x;       // some integers

   printf("Please input a sequence of character text (characters only will be stored):\n");

   text = getCharBlock(&tsize);

   printf("Now input the pattern you seek to search for: \n");
   pattern = getCharBlock(&psize);

   x = findmatch(text, pattern, tsize, psize);

   if(x == -1){
      printf("No Match Found \n");
      printf("No starting position for match exists \n");
   }
   else{
      printf("Match Has Been Found! \n");
      printf("Match starting position at index %d \n", x - 4);
      printf("Remaining text after Match: <%s> \n", text + x );

      printIt(text, x+psize, tsize);
   }

   free(text);
   free(pattern);
}

char *getCharBlock(int *size){

   char *input = (char*) malloc (80*sizeof(char) +1 );
   if (input == NULL)
   {
       printf("No memory!\n");
       exit(-1);
   }

   char a;
   int i = 0;

   a = getchar();

   while( i<80 &&  a != '\n'){

      if( ((a>= 'a') && (a <= 'z'))  || ( (a>= 'A') &&  (a <= 'Z') ) ){

         * (input + i) = a;

         i++;
      }

      a = getchar();
   }

    * (input + i)  = 0; // sg7! terminate the string 

   *size = i;

   return input;
 }

int next_state(char *text, char *pattern, char *m1, char *m2, int *index, int patternInd, int next_state, int advInd )
{
    int state = 0;

    if (text[*index] == pattern[patternInd]){
        state = next_state;
        printf( "%s\n", m1);
        (*index)++;
    }
    else{
        printf( "%s\n", m2);
        if(advInd)
            (*index)++; 
    }
    return state;       
}

int findmatch(char *text, char *pattern, int tsize, int psize) {

   int index = 0;
   int state = 0;

   printf("Text=<%s> pattern=<%s> tsize=%d psize=%d \n",text, pattern, tsize, psize);
   while (index <= tsize) {

       switch (state)
       {
           case 0:
                state = next_state(text,pattern, "test 1", "test1.1", &index, 0, 1, 1);
            break;

            case 1:  // pattern[0]          matched
                state = next_state(text, pattern, "test 2", "test2.2", &index, 1, 2, 0);
            break;

            case 2:  // pattern [0] [1]     matched
                state = next_state(text, pattern, "test 3", "test3.3", &index, 2, 3, 0);
            break;

            case 3:  // pattern [0] [1] [2] matched
                state = next_state(text, pattern, "test 4", "test4.4", &index, 3, 4, 0);
            break;

            case 4:
                printf("DONE, index = %d \n",index);
                return index;
            break;

            default:
                printf("We should not be here! \n");
            break;
       } // case
   } // while

   return -1;
}

输出:

Please input a sequence of character text (characters only will be stored):                                                                   
aaabcdef                                                                                                                                      
Now input the pattern you seek to search for:                                                                                                
abcd                                                                                                                                          
Text=<aaabcdef> pattern=<abcd> tsize=8 psize=4                                                                                                
test 1                                                                                                                                        
test2.2                                                                                                                                       
test 1                                                                                                                                        
test2.2                                                                                                                                       
test 1                                                                                                                                        
test 2                                                                                                                                        
test 3                                                                                                                                        
test 4                                                                                                                                        
DONE, index = 6                                                                                                                               
Match Has Been Found!                                                                                                                         
Match starting position at index 2                                                                                                            
Remaining text after Match: <ef>                                                                                                              


Please input a sequence of character text (characters only will be stored):                                                                   
abcdefgh                                                                                                                                      
Now input the pattern you seek to search for:                                                                                                 
efgh                                                                                                                                          
Text=<abcdefgh> pattern=<efgh> tsize=8 psize=4                                                                                                
test1.1                                                                                                                                       
test1.1                                                                                                                                       
test1.1                                                                                                                                       
test1.1                                                                                                                                       
test 1                                                                                                                                        
test 2                                                                                                                                        
test 3                                                                                                                                        
test 4                                                                                                                                        
DONE, index = 8                                                                                                                               
Match Has Been Found!                                                                                                                         
Match starting position at index 4                                                                                                            
Remaining text after Match: <>  

我希望它有所帮助。如果您有更多问题,请随时提出。

答案 1 :(得分:0)

我还没有阅读你的整个代码,我只关注你的无限循环问题。

findmatch()函数中while循环的控制条件有问题。您正在检查psize&lt; = tsize,但您永远不会更新任何这些变量。退出此循环的唯一其他情况是在最后一个返回-1的情况下,但是使用此代码,您永远不会到达那里(对于每种情况都有if / else情况,并且在情况下继续返回state = 0不匹配)。

你应该检查你的等待条件:

while (state != 4 && index < tsize)

此外,当您从findmatch()获得返回代码时,应检查返回值为4以确保匹配。

如果您使用状态变量作为模式字符串的索引器,您还可以减少此函数中的大量if:

int findmatch(char *text, char *pattern, int tsize, int psize)
{
    int i, state = 0;

    for (i = 0; state < psize && i < tsize; i++)
    {
        if (text[i] != pattern[state])
        {
            // no match, reset state
            i -= state;
            state = 0;
        }
        else
            // found a match, next state
            state++;
    }

    if (state == psize)
        return i - psize;
    else
        return -1;
}

希望它有所帮助!

答案 2 :(得分:0)

我在这看不到[x]。 可以通过数组指针访问数组。所以 state [0]是数组中的第一个char。你有状态== 0等等,状态== 3,这很好,但你必须在数组上工作。 我是菜鸟,但有类似的东西。 希望这会有所帮助。

答案 3 :(得分:0)

#include <stdio.h>
#include <stdlib.h>
int Myindex = 0;
char *getCharBlock(int *size);
int findmatchA(char *text, char *pattern, int tsize, int psize);
int findmatchB(char *text, char *pattern, int tsize, int psize);
int findmatchC(char *text, char *pattern, int tsize, int psize);
void printIt(char *ptr, int index, int size);

int main() {
  char *text, *pattern, *text2, *pattern2, *text3,
  *pattern3; // pointers for the characters you will read
  char *p, *r;   // some pointer variables
 int tsize, psize, tsize2, psize2, tsize3, psize3, x, y, z; // some integers

  printf(" \n");
  printf("Please input a sequence of characters (only characters will be "
     "stored ): \n");
 text = getCharBlock(&tsize);
printf("\n");
// printf( "tsize is %d \n", tsize);
 printf(" Now input the pattern you seek to search for: \n");
 pattern = getCharBlock(&psize);
 // printf( "tsize is %d \n", psize);

 x = findmatchA(text, pattern, tsize, psize);
 if (x == -1) {
 printf("\n");
 printf("No Match Found \n");
 printf("\n");
 printf("No starting position for match exists \n");
} else {
printf("\n");
printf("Match Has Been Found!!! \n");
// printf( "%d\n", x);
printf("\n");
printf("Match starting position at index %d \n", x - 3);
printf("\n");
printf("Remaining text after Match: \n");
printIt(text, x, tsize);
}

 // //what should be passed?);
 // looks for a match, returns -1 for failure or an int which is the index to
// the location where the match starts.  the return values can be used to
// determine IF a match was found, and where.
printf("\n");
printf("Now input a sequence of character text for non-contiguous "
     "matching(characters only will be stored): \n");
text2 = getCharBlock(&tsize2);
printf("\n");
printf(" Now input the pattern you seek to search for in a non-contiguous "
     "manner: \n");
 pattern2 = getCharBlock(&psize2);

y = findmatchB(text2, pattern2, tsize2, psize2);
if (y == -1) {
printf("\n");
printf("No match found \n");
printf("\n");
printf("No starting position for match exists \n");
} else {
printf("\n");
printf("Match has been found!!! \n");
printf("\n");
printf("Match starting position at index %d \n", y - 4);
printf("\n");
printf("Remaining text after match: \n");
printIt(text, y, tsize);
}
printf("\n");
printf("Now input a sequence of character text for multiple "
     "matchings(characters only will be stored): \n");
text3 = getCharBlock(&tsize3);
printf("\n");
printf(" Now input the pattern you seek to search for: \n");
pattern3 = getCharBlock(&psize3);

z = findmatchC(text3, pattern3, tsize3, psize3);
printf("\n");
printf("The Number of Matches Is %d \n", z);

free(text);
free(pattern);
free(text2);
free(pattern2);
free(text3);
free(pattern3);
}

char *getCharBlock(int *size) {
// this would fill in the "string" of chars for the passed in char pointer.

char *input = (char *)malloc(80 * sizeof(char));
char a;
int i = 0;
a = getchar();
while (i < 80 && a != '\n') {

if ((a >= 'a' && a <= 'z') || (a >= 'A' && a <= 'Z')) {
  *(input + i) = a;
  i++;
 }
  a = getchar();
}
*(input + i) = '\0';
*size = i;

return input;
}

int findmatchA(char *text, char *pattern, int tsize, int psize) {
 int index = 0;
 int state = 0;
  while (psize <= tsize) {

 if ((*(text + index) == *pattern) && state == 0) {
   state = 1;
   index++;
   //  printf( "test 1 \n");
 }

 else if ((*(text + index) != *pattern) && state == 0) {
   state = 0;
   index++;

   //  printf( "test1.1 \n");
  }

  else if (*(text + index) == *(pattern + 1) && state == 1) {
    state = 2;
    index++;
    //  printf( "test 2 \n");
   }

   else if (*(text + index) != *(pattern + 1) && state == 1) {
    state = 0;
    index++;

    //  printf("test 2.2 \n");
   }

   else if (*(text + index) == *(pattern + 2) && state == 2) {
    state = 3;
    index++;
    //  printf("test 3 \n");
    }

   else if (*(text + index) != *(pattern + 2) && state == 2) {
     state = 0;
     index++;

      //  printf("test 3.3 \n");
   }

   else if (*(text + index) == *(pattern + 3) && state == 3) {
    state = 4;
    index++;
    return index;
    //  printf("test 4 \n");
    break;
   }

  else if (*(text + index) != *(pattern + 3) && state == 3) {
    state = 0;
    index++;

    //  printf("test 4.4 \n");
    }

   else {
    return -1;
   }
  }
   return index;
 }

 int findmatchB(char *text2, char *pattern2, int tsize2, int psize2) { // non-
      //contiguous case
    int index = 0;
   int state = 0;

  while (psize2 <= tsize2) {

 if ((*(text2 + index) == *pattern2) && state == 0) {
   state = 1;
  index++;
  //  printf("test1\n");
  }

else if ((*(text2 + index) != *pattern2) && state == 0) {
  state = 0;
  index++;
  //  printf("test1.1\n");
}

else if (*(text2 + index) == *(pattern2 + 1) && state == 1) {
  state = 2;
  index++;
  //    printf("test2\n");
}

else if (*(text2 + index) != *(pattern2 + 1) && state == 1) {
  state = 1;
  index++;
  //    printf("test2.2\n");
}

else if (*(text2 + index) == *(pattern2 + 2) && state == 2) {
  state = 3;
  index++;
  //    printf("test3\n");
}

else if (*(text2 + index) != *(pattern2 + 2) && state == 2) {
  state = 2;
  index++;
  //  printf("test3.3\n");
}

else if (*(text2 + index) == *(pattern2 + 3) && state == 3) {
  state = 4;
  index++;
  //  printf("test4\n");
  break;
  return index;
}

else if (*(text2 + index) != *(pattern2 + 3) && state == 3) {
  state = 3;
  index++;
  //  printf("test4.4\n");
}

else {
  return -1;
 }
}
return index;
}

int findmatchC(char *text3, char *pattern3, int tsize3, int psize3) {
int index = 0;
int pindex = 0;
int state = 0;
int matchcount = 0;
// printf( "tsize is %d \n", tsize3);

while (index <= tsize3) {
//  printf( "test of inside loop \n");
// printf( "pindex is %d \n", pindex);
// printf( "tindex is %d \n", index);
// printf( " text value is %c \n", *(text3 + index));
// printf( " pattern value is %c \n", *(pattern3 + pindex));

 if (*(text3 + index) == *(pattern3 + pindex) && state == 0) {
  state = 1;
  index++;
  pindex++;
  //   printf( "test 1 \n");
  }

  else if (*(text3 + index) != *(pattern3 + pindex) && state == 0) {
  state = 0;
  index++;
  // pindex++;
  //   printf( "test1.1 \n");
  }

  else if (*(text3 + index) == *(pattern3 + pindex) && state == 1) {
  state = 2;
  index++;
  pindex++;
  // printf( "test 2 \n");
  }

  else if (*(text3 + index) != *(pattern3 + pindex) && state == 1) {
  state = 0;
  index++;
  //  pindex++;
  //   printf("test 2.2 \n");
  }

  else if (*(text3 + index) == *(pattern3 + pindex) && state == 2) {
  state = 3;
  index++;
  pindex++;
  //   printf("test 3 \n");
  }

  else if (*(text3 + index) != *(pattern3 + pindex) && state == 2) {
  state = 0;
  index++;
  //  pindex++;
  //   printf("test 3.3 \n");
  }

  else if (*(text3 + index) == *(pattern3 + pindex) && state == 3) {
  state = 0;
  index++;
  pindex = 0;
  matchcount++;
  //   printf("test 4 \n");

 }

  else if (*(text3 + index) != *(pattern3 + pindex) && state == 3) {
  state = 0;
  index++;
  pindex = 0;
  //   printf("test 4.4 \n");
  }

  else {
  return -1;
  }
 }

return matchcount;
}

 void printIt(char *ptr, int index, int size) {

  while (index < size) {
  printf("%c", *(ptr + index));
  index++;

   }
   printf("\n");
 }