在C语言中找到两个字符串中最长的常用单词?

时间:2016-05-15 06:50:58

标签: c string comparison

我有一个问题,就是在一段时间内找到两个字符串中最长的常用字。首先,我有一个想法,用#34; isspace"功能,但无法弄清楚如何找到一个常用词。然后" strcmp"我想到了,但到目前为止我只能比较两个字符串。我想到了一些方法来合并strcmp和isspace以便找到不同的单词,然后使用temp值来找到最长的单词,但我无法想出正确的代码。

   #include <stdio.h>

   int strcmp(char s[],char t[]);


 void main()
 {

    char s[20],t[20];
   printf("Type in a string s.\n");
   gets(s);
    printf("Type in a string t.\n");
    gets( t );
   printf("The result of comparison=%d\n",strcmp(s,t));

    return 0;
      }


   int strcmp(char s[],char t[])
  {
   int i;
   for(i=0;s[i]==t[i];i++)
   if(s[i]=='\0')
   return( 0 );
   return(s[i]-t[i]);
   }

请帮我这个。所有的想法(和代码)都受到欢迎和赞赏。提前谢谢!

编辑::

我一直在和这个人争斗一段时间,我认为我有解决方案,但这是一种非常严格的方法。该程序有一个bug,可能是数组&#34; ptrArray1&#34;但我无法修复它。

     #include <stdio.h>
     #include <string.h>
     #include <stdlib.h>


     int returnArrayOfWords (char* str4Parsing, char* arrayParsed[])
       {
// returns the length of array
char seps[]   = " \t\n"; // separators
char *token = NULL;
char *next_token = NULL;
int i = 0;

// Establish string and get the first token:
token = strtok( str4Parsing, seps);

// While there are tokens in "str4Parsing"
while ((token != NULL))
{
    // Get next token:
    arrayParsed[i] = token;
    //printf( " %s\n", arrayParsed[i] );//to be commented
    token = strtok( NULL, seps);
    i++;
}
return i;
      }

      void printArr(char *arr[], int n)
     {
int i;
for ( i = 0; i < n; i++)
{
    printf("Element %d is %s \n", i, arr[i]);
}
   }


     void findLargestWord(char *ptrArray1[], int sizeArr1, char              *ptrArray2[], int sizeArr2)
{
int maxLength = 0;
char *wordMaxLength = NULL ;
int i = 0, j = 0;
char *w1 = NULL, *w2 = NULL; /*pointers*/
int currLength1 = 0, currLength2 = 0 ;

//printArr(&ptrArray1[0], sizeArr1);
//printArr(&ptrArray2[0], sizeArr2);

for (i = 0; i < sizeArr1; i++)
    {
    // to find the largest word in the array
    w1 = (ptrArray1[i]); // value of address (ptrArray1 + i)
    currLength1 = strlen(w1);
    //printf("The word from the first string is: %s and its length is : %d \n", w1, currLength1); // check point

    for (j = 0; j < sizeArr2; j++)
        {
        w2 = (ptrArray2[j]); // value of address (ptrArray2 + j)
        currLength2 = strlen(w2);
        //printf("The word from the second string is : %s and its length is : %d \n", w2, currLength2); // check point

        if (strcoll(w1, w2) == 0 && currLength1 == currLength2)
            // compares the strings
            {
            if (currLength2 >= maxLength)
                // in the variable maxLength -> the length of the longest word
                {
                    maxLength = currLength2;
                    wordMaxLength = w2;
                    printf("The largest word for now is : %s and its length is : %d \n", wordMaxLength, maxLength); // check point
                }
            }
        }
    }
printf("The largest word is: %s \n", wordMaxLength);
printf("Its length is: %d \n", maxLength);
    }



     int main ()
      {
int n = 80; /*max number of words in string*/
char arrS1[80], arrS2[80];
char *ptrArray1 = NULL, *ptrArray2 = NULL;
int sizeArr1 = 0, sizeArr2 = 0;

// to allocate memory:
ptrArray1 = (char*)calloc(80, sizeof(char));
if(ptrArray1 == NULL)
{
    printf("Error! Memory for Pointer 1 is not allocated.");
    exit(0);
}

ptrArray2 = (char*)calloc(80, sizeof(char));
if(ptrArray2 == NULL)
{
    printf("Error! Memory for Pointer 2 is not allocated.");
    exit(0);
}

printf("Type your first string: ");
fgets(arrS1, 80, stdin);
sizeArr1 = returnArrayOfWords (arrS1, &ptrArray1); // sizeArr1 = number of elements in array 1

printf("Type your second string: ");
fgets(arrS2, 80, stdin);
sizeArr2 = returnArrayOfWords (arrS2, &ptrArray2); // sizeArr2 = number of elements in array 2

findLargestWord(&ptrArray1, sizeArr1, &ptrArray2, sizeArr2);

free(ptrArray1);
free(ptrArray2);
return 0;
          }

我也尝试使用后两个发布的解决方案,但是如下所述,我有使用它们的问题。

对我的代码提供任何帮助,解决后一种解决方案的问题或提出新的解决方案都是值得欢迎的。提前谢谢大家!

PS。如果我的代码位置不好,我很抱歉。我使用这个展示位置仍然不太好。

2 个答案:

答案 0 :(得分:1)

有很多方法可以解决这个问题。下面,指向其中一个字符串中每个字符的指针用于使用strchr搜索另一个字符以匹配字符。找到匹配字符后,比较循环会运行推进每个指针,以设计存在的公共子字符串的长度(如果有)。

例程,匹配字符,检查子字符串长度,重复,继续如此长,因为strchr返回一个有效指针。每次找到一个更长的子字符串时,max长度会更新以返回并且子字符串存在被复制到r strncpy nul-terminated ,以便最长公共字符串的文本可供调用函数main使用。

这是一种相当强力的方法,可能会有一些额外的调整来提高效率。功能本身是:

/** return length of longest common substring in 'a' and 'b'.
 *  by searching through each character in 'a' for each match
 *  in 'b' and comparing substrings present at each match. the
 *  size of the longest substring is returned, the test of the
 *  longest common substring is copied to 'r' and made available
 *  in the calling function. (the lengths should also be passed
 *  for validation, but that is left as an exercise)
 */
size_t maxspn (const char *a, const char *b, char *r)
{
    if (!a||!b||!*a||!*b) return 0; /* valdate parameters */

    char *ap = (char *)a;           /* pointer to a       */
    size_t max = 0;                 /* max substring char */

    for (; *ap; ap++) {             /* for each char in a */
        char *bp = (char *)b;       /* find match in b with strchr */
        for (; *bp && (bp = strchr (bp, *ap)); bp++) {
            char *spa = ap, *spb = bp; /* search ptr initialization */
            size_t len = 0;         /* find substring len */
            for (; *spa && *spb && *spa == *spb; spa++, spb++) len++;
            if (len > max) {        /* if max, copy to r  */
                strncpy (r, ap, (max = len));
                r[max] = 0;         /* nul-terminate r    */
            }
        }
    }

    return max;
}

返回长度max,然后在函数执行期间更新r会导致r保存与最长子字符串匹配关联的字符串。

由于其安全风险,其他改进措施是删除在C11中删除的gets而不弃用。任何理智的编码器都不应该使用它(应该覆盖我们大约40%的人)。将剩余的位放在一起,可以使用一小段测试代码:

#include <stdio.h>
#include <string.h>

#define MAXC 128

size_t maxspn (const char *a, const char *b, char *r);
void rmlf (char *s);

int main (void) {

    char res[MAXC] = "", s[MAXC] = "", t[MAXC] = "";

    printf ("Type in a string 's': ");
    if (!fgets (s, MAXC, stdin)) {  /* validate 's' */
        fprintf (stderr, "error: invalid input for 's'.\n");
        return 1;
    }
    rmlf (s);   /* remove trailing newline */
    printf ("Type in a string 't': ");
    if (!fgets (t, MAXC, stdin)) {  /* validate 't' */
        fprintf (stderr, "error: invalid input for 's'.\n");
        return 1;
    }
    rmlf (t);   /* remove trailing newline */

    /* obtain longest commons substring between 's' and 't' */
    printf ("\nThe longest common string is : %zu ('%s')\n",
            maxspn (s, t, res), res);

    return 0;
}

/** return length of longest common substring in 'a' and 'b'.
 *  by searching through each character in 'a' for each match
 *  in 'b' and comparing substrings present at each match. the
 *  size of the longest substring is returned, the test of the
 *  longest common substring is copied to 'r' and made available
 *  in the calling function. (the lengths should also be passed
 *  for validation, but that is left as an exercise)
 */
size_t maxspn (const char *a, const char *b, char *r)
{
    if (!a||!b||!*a||!*b) return 0; /* valdate parameters */

    char *ap = (char *)a;           /* pointer to a       */
    size_t max = 0;                 /* max substring char */

    for (; *ap; ap++) {             /* for each char in a */
        char *bp = (char *)b;       /* find match in b with strchr */
        for (; *bp && (bp = strchr (bp, *ap)); bp++) {
            char *spa = ap, *spb = bp;
            size_t len = 0;         /* find substring len */
            for (; *spa && *spb && *spa == *spb; spa++, spb++) len++;
            if (len > max) {        /* if max, copy to r  */
                strncpy (r, ap, (max = len));
                r[max] = 0;         /* nul-terminate r    */
            }
        }
    }

    return max;
}

/** remove trailing newline from 's'. */
void rmlf (char *s)
{
    if (!s || !*s) return;
    for (; *s && *s != '\n'; s++) {}
    *s = 0;
}

示例使用/输出

$ ./bin/strspn
Type in a string 's': a string with colors123456789 all blue
Type in a string 't': a string without colors1234567890 all red

The longest common string is : 16 (' colors123456789')

或另一个可能更容易想象的内容:

$ ./bin/strspn
Type in a string 's': green eel
Type in a string 't': cold blue steel

The longest common string is : 3 ('eel')

查看代码并与其他答案进行比较。如果您有任何其他问题,请与我们联系。还有一些其他验证应该添加到确保文本不会写入缓冲区的末尾等等。希望这将提供一些帮助或替代方法。

其他子字符串

为了确保你和我看到同样的事情,我在下面列出了其他使用示例。没有错误,代码按预期执行。如果您在修改代码时遇到问题,请告诉我您尝试做什么,我可以提供帮助。我的代码中的每个指针增量都经过验证。如果您更改了有关指针增量或 nul-termination 的任何内容,除非您考虑验证中的更改,否则代码将无效。

$ ./bin/strspn
Type in a string 's': 1
Type in a string 't':

The longest common string is : 0 ('')

$ ./bin/strspn
Type in a string 's': A man a plan a canal panama
Type in a string 't': a man a plan a river panama

The longest common string is : 14 (' man a plan a ')

$ ./bin/strspn
Type in a string 's': this is my favorite string
Type in a string 't': this is my favoritist string

The longest common string is : 18 ('this is my favorit')

$ ./bin/strspn
Type in a string 's': not the same until here
Type in a string 't': cant be equal till here

The longest common string is : 6 ('l here')

$ ./bin/strspn
Type in a string 's': some str with ten in the middle
Type in a string 't': a string often ignorded

The longest common string is : 5 ('ten i')

最长的常用词

好的,在我最终了解您要完成的工作后,您可以通过标记化选择两个字符串's''t'之间的最长公用字 每个字符串都带有strtok,在单独的指针数组中保存指向每个字符串中每个字的指针,然后简单地遍历指针数组以选择最长的常用字(如果是同一个多个常用字,则为第1个字)长度)。您需要的只是以下简单的东西。

注意 strtok修改字符串's''t',因此如果您需要保留原件,请复制一份。

/** return length of longest common word in 'a' and 'b'.
 *  by tokenizing each word in 'a' & 'b' and iterating over
 *  each, returning the length of the logest match, and updating
 *  'r' to contain the longest common word.
 */
size_t maxspnwhole (char *a, char *b, char *r)
{
    if (!a||!b||!*a||!*b) return 0; /* valdate parameters */

    char *arra[MAXC] = {NULL}, *arrb[MAXC] = {NULL};
    char *ap = a, *bp = b;          /* pointers to a & b */
    char *delim = " .,-;\t\n";      /* word delimiters   */
    size_t i, j, len, max, na, nb;  /* len, max, n-words */
    len = max = na = nb = 0;

    /* tokenize both strings into pointer arrays */
    for (ap = strtok (a, delim); ap; ap = strtok (NULL, delim))
        arra[na++] = ap;

    for (bp = strtok (b, delim); bp; bp = strtok (NULL, delim))
        arrb[nb++] = bp;

    for (i = 0; i < na; i++)           /* select longest common word */
        for (j = 0; j < nb; j++)
            if (*arra[i] == *arrb[j])             /* 1st chars match */
                if (!strcmp (arra[i], arrb[j])) { /* check word      */
                    len = strlen (arra[i]);
                    if (len > max) {              /* if longest */
                        max = len;                /* update max */
                        strcpy (r, arra[i]);      /* copy to r  */
                    }
                }

    return max;
}

将其与其他代码集成,您可以比较结果:

#include <stdio.h>
#include <string.h>

#define MAXC 128

size_t maxspn (const char *a, const char *b, char *r);
size_t maxspnwhole (char *a, char *b, char *r);
void rmlf (char *s);

int main (void) {

    char res[MAXC] = "", s[MAXC] = "", t[MAXC] = "";

    printf ("Type in a string 's': ");
    if (!fgets (s, MAXC, stdin)) {  /* validate 's' */
        fprintf (stderr, "error: invalid input for 's'.\n");
        return 1;
    }
    rmlf (s);   /* remove trailing newline */
    printf ("Type in a string 't': ");
    if (!fgets (t, MAXC, stdin)) {  /* validate 't' */
        fprintf (stderr, "error: invalid input for 's'.\n");
        return 1;
    }
    rmlf (t);   /* remove trailing newline */

    /* obtain longest commons substring between 's' and 't' */
    printf ("\nThe longest common string is : %zu ('%s')\n",
            maxspn (s, t, res), res);

    /* obtain longest commons word between 's' and 't' */
    printf ("\nThe longest common word is : %zu ('%s')\n",
            maxspnwhole (s, t, res), res);

    return 0;
}

/** return length of longest common word in 'a' and 'b'.
 *  by tokenizing each word in 'a' & 'b' and iterating over
 *  each, returning the length of the logest match, and updating
 *  'r' to contain the longest common word.
 */
size_t maxspnwhole (char *a, char *b, char *r)
{
    if (!a||!b||!*a||!*b) return 0; /* valdate parameters */

    char *arra[MAXC] = {NULL}, *arrb[MAXC] = {NULL};
    char *ap = a, *bp = b;          /* pointers to a & b */
    char *delim = " .,-;\t\n";      /* word delimiters   */
    size_t i, j, len, max, na, nb;  /* len, max, n-words */
    len = max = na = nb = 0;

    /* tokenize both strings into pointer arrays */
    for (ap = strtok (a, delim); ap; ap = strtok (NULL, delim))
        arra[na++] = ap;

    for (bp = strtok (b, delim); bp; bp = strtok (NULL, delim))
        arrb[nb++] = bp;

    for (i = 0; i < na; i++)
        for (j = 0; j < nb; j++)
            if (*arra[i] == *arrb[j])
                if (!strcmp (arra[i], arrb[j])) {
                    len = strlen (arra[i]);
                    if (len > max) {
                        max = len;
                        strcpy (r, arra[i]);
                    }
                }

    return max;
}

/** return length of longest common substring in 'a' and 'b'.
 *  by searching through each character in 'a' for each match
 *  in 'b' and comparing substrings present at each match. the
 *  size of the longest substring is returned, the test of the
 *  longest common substring is copied to 'r' and made available
 *  in the calling function. (the lengths should also be passed
 *  for validation, but that is left as an exercise)
 */
size_t maxspn (const char *a, const char *b, char *r)
{
    if (!a||!b||!*a||!*b) return 0; /* valdate parameters */

    char *ap = (char *)a;           /* pointer to a       */
    size_t max = 0;                 /* max substring char */

    for (; *ap; ap++) {             /* for each char in a */
        char *bp = (char *)b;       /* find match in b with strchr */
        for (; *bp && (bp = strchr (bp, *ap)); bp++) {
            char *spa = ap, *spb = bp;
            size_t len = 0;         /* find substring len */
            for (; *spa && *spb && *spa == *spb; spa++, spb++) len++;
            if (len > max) {        /* if max, copy to r  */
                strncpy (r, ap, (max = len));
                r[max] = 0;         /* nul-terminate r    */
            }
        }
    }

    return max;
}

/** remove trailing newline from 's'. */
void rmlf (char *s)
{
    if (!s || !*s) return;
    for (; *s && *s != '\n'; s++) {}
    *s = 0;
}

示例使用/输出

$ ./bin/strlongestcmn
Type in a string 's': I have a huge boat.
Type in a string 't': I have a small boat.

The longest common string is : 9 ('I have a ')

The longest common word is : 4 ('have')

仔细看看,如果您有任何其他问题,请告诉我。

答案 1 :(得分:-1)

此示例打印给定两个输入字符串的最长子字符串。

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

int *lcommon(char *str1, char *str2) {
    int strlen1 = (unsigned) strlen(str1);
    int strlen2 = (unsigned) strlen(str2);
    int i, j, k;
    int longest = 0;
    int **ptr = malloc(2 * sizeof(int *));
    static int *ret;
    ret = calloc((unsigned) strlen1 + 1, sizeof(int));
    for (i = 0; i < 2; i++)
        ptr[i] = calloc((unsigned) strlen2, sizeof(int));

    k = 0;
    for (i = 0; i < strlen1; i++) {
        memcpy(ptr[0], ptr[1], strlen2 * sizeof(int));
        for (j = 0; j < strlen2; j++) {
            if (str1[i] == str2[j]) {
                if (i == 0 || j == 0) {
                    ptr[1][j] = 1;
                } else {
                    ptr[1][j] = ptr[0][j-1] + 1;
                }
                if (ptr[1][j] > longest) {
                    longest = ptr[1][j];
                    k = 0;
                    ret[k++] = longest;
                }
                if (ptr[1][j] == longest) {
                    ret[k++] = i;
                    ret[k] = -1;
                }
            } else {
                ptr[1][j] = 0;
            }
        }
    }
    for (i = 0; i < 2; i++)
        free(ptr[i]);
    free(ptr);
    ret[0] = longest;
    return ret;
}

int main(int argc, char *argv[]) {
    int i, longest, *ret;

    if (argc != 3) {
        printf("usage: longest-common-substring string1 string2\n");
        exit(1);
    }

    ret = lcommon(argv[1], argv[2]);
    if ((longest = ret[0]) == 0) {
        printf("There is no common substring\n");
        exit(2);
    }

    i = 0;
    while (ret[++i] != -1) {
        printf("%.*s\n", longest, &argv[1][ret[i]-longest+1]);
    }

    exit(0);
}

<强>测试

$ ./a.out computerprogramming javaprogrammer
programm

您可以阅读有关问题here的更多信息。

您还可以使用交互式程序在控制台中编写字符串:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

int *lcommon(char *str1, char *str2) {
    int strlen1 = (unsigned) strlen(str1);
    int strlen2 = (unsigned) strlen(str2);
    int i, j, k;
    int longest = 0;
    int **ptr = malloc(2 * sizeof(int *));
    static int *ret;
    ret = calloc((unsigned) strlen1 + 1, sizeof(int));
    for (i = 0; i < 2; i++)
        ptr[i] = calloc((unsigned) strlen2, sizeof(int));

    k = 0;
    for (i = 0; i < strlen1; i++) {
        memcpy(ptr[0], ptr[1], strlen2 * sizeof(int));
        for (j = 0; j < strlen2; j++) {
            if (str1[i] == str2[j]) {
                if (i == 0 || j == 0) {
                    ptr[1][j] = 1;
                } else {
                    ptr[1][j] = ptr[0][j - 1] + 1;
                }
                if (ptr[1][j] > longest) {
                    longest = ptr[1][j];
                    k = 0;
                    ret[k++] = longest;
                }
                if (ptr[1][j] == longest) {
                    ret[k++] = i;
                    ret[k] = -1;
                }
            } else {
                ptr[1][j] = 0;
            }
        }
    }
    for (i = 0; i < 2; i++)
        free(ptr[i]);
    free(ptr);
    ret[0] = longest;
    return ret;
}

int main(int argc, char *argv[]) {
    int i, longest, *ret;

    if (argc != 3) {
        //printf("usage: longest-common-substring string1 string2\n");

        char s[20], t[20];
        printf("Type in a string s.\n");
        fgets(s, 20, stdin);
        printf("Type in a string t.\n");
        fgets(t, 20, stdin);

        ret = lcommon(s, t);
        if ((longest = ret[0]) == 0) {
            printf("There is no common substring\n");
            exit(2);
        }

        i = 0;
        while (ret[++i] != -1) {
            printf("%.*s\n", longest, &s[ret[i] - longest + 1]);
        }

        //printf("The result of comparison=%d\n", strcmp(s, t));


        exit(0);
    } else { }
    ret = lcommon(argv[1], argv[2]);
    if ((longest = ret[0]) == 0) {
        printf("There is no common substring\n");
        exit(2);
    }

    i = 0;
    while (ret[++i] != -1) {
        printf("%.*s\n", longest, &argv[1][ret[i] - longest + 1]);
    }
    exit(0);
}

<强>测试

Type in a string s.
string1
Type in a string t.
string2
string