C - 来自Big One的最大字符串

时间:2009-10-03 05:29:52

标签: c string pointers

所以祈祷告诉我,如何从C中的一串垃圾中获取最大的连续字母串?这是一个例子:

char *s = "(2034HEY!!11   th[]thisiswhatwewant44";

会回来......

thisiswhatwewant

我前几天在测验中得到了这个......它让我疯狂(仍然是)试图弄明白!

更新:

我的错,伙计,我忘了包含这样一个事实:你可以使用的唯一功能是strlen功能。从而使它变得更难......

7 个答案:

答案 0 :(得分:3)

Uae strtok()将您的字符串拆分为令牌,使用所有非字母字符作为分隔符,并找到最长的令牌。

要查找最长的令牌,您需要为令牌组织一些存储空间 - 我会使用链接列表。

就这么简单。

修改

好的,如果strlen()是唯一允许的函数,你可以先找到源字符串的长度,然后遍历它并用NULL替换所有非字母字符 - 基本上就是strtok()确实

然后,您需要第二次浏览已修改的源字符串,一次推进一个令牌,并使用strlen()找到最长的一个。

答案 1 :(得分:1)

这听起来类似于标准的UNIX'字符串'实用程序。

跟踪由NULL终止的最长可打印字符。 遍历字节直到您点击可打印字符。开始计数。如果你击中一个不可打印的角色停止计数并扔掉起点。如果您遇到NULL,请检查当前运行的长度是否大于先前的记录持有者。如果是这样,记录它,并开始寻找下一个字符串。

答案 2 :(得分:1)

与其他许多子串相比,什么定义了“好”子串 - 只是小写字母? (即没有空格,数字,标点符号,大写字母和& c)?

无论检查字符是否为“好”的谓词P,通过s对每个字符应用P的单次传递都可以让您轻松识别每个“好字符”的开始和结束“,记住并挑选最长的。在伪代码中:

longest_run_length = 0
longest_run_start = longest_run_end = null
status = bad
for i in (all indices over s):
  if P(s[i]):  # current char is good
    if status == bad:  # previous one was bad
      current_run_start = current_run_end = i
      status = good
    else: # previous one was also good
      current_run_end = i
  else:  # current char is bad
    if status == good:  # previous one was good -> end of run
      current_run_length = current_run_end - current_run_start + 1
      if current_run_length > longest_run_length:
        longest_run_start = current_run_start
        longest_run_end = current_run_end
        longest_run_length = current_run_length
      status = bad

# if a good run ends with end-of-string:
if status == good:  # previous one was good -> end of run
  current_run_length = current_run_end - current_run_start + 1
  if current_run_length > longest_run_length:
    longest_run_start = current_run_start
    longest_run_end = current_run_end
    longest_run_length = current_run_length

答案 3 :(得分:1)

为什么要使用strlen()? 这是我的版本,它不使用任何功能。

#ifdef UNIT_TEST
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#endif

/*
// largest_letter_sequence()
// Returns a pointer to the beginning of the largest letter
//   sequence (including trailing characters which are not letters)
//   or NULL if no letters are found in s
// Passing NULL in `s` causes undefined behaviour
// If the string has two or more sequences with the same number of letters
//   the return value is a pointer to the first sequence.
// The parameter `len`, if not NULL, will have the size of the letter sequence
//
// This function assumes an ASCII-like character set
//   ('z' > 'a'; 'z' - 'a' == 25; ('a' <= each of {abc...xyz} <= 'z'))
//   and the same for uppercase letters
// Of course, ASCII works for the assumptions :)
*/
const char *largest_letter_sequence(const char *s, size_t *len) {
  const char *p = NULL;
  const char *pp = NULL;
  size_t curlen = 0;
  size_t maxlen = 0;

  while (*s) {
    if ((('a' <= *s) && (*s <= 'z')) || (('A' <= *s) && (*s <= 'Z'))) {
      if (p == NULL) p = s;
      curlen++;
      if (curlen > maxlen) {
        maxlen = curlen;
        pp = p;
      }
    } else {
      curlen = 0;
      p = NULL;
    }
    s++;
  }
  if (len != NULL) *len = maxlen;
  return pp;
}

#ifdef UNIT_TEST
void fxtest(const char *s) {
  char *test;
  const char *p;
  size_t len;

  p = largest_letter_sequence(s, &len);
  if (len && (len < 999)) {
    test = malloc(len + 1);
    if (!test) {
      fprintf(stderr, "No memory.\n");
      return;
    }
    strncpy(test, p, len);
    test[len] = 0;
    printf("%s ==> %s\n", s, test);
    free(test);
  } else {
    if (len == 0) {
      printf("no letters found in \"%s\"\n", s);
    } else {
      fprintf(stderr, "ERROR: string too large\n");
    }
  }
}

int main(void) {
  fxtest("(2034HEY!!11   th[]thisiswhatwewant44");
  fxtest("123456789");
  fxtest("");
  fxtest("aaa%ggg");
  return 0;
}
#endif

答案 4 :(得分:0)

首先,定义“string”并定义“garbage”。你认为什么是有效的非垃圾字符串?写下你可以编程的具体定义 - 这就是编写规范的方法。它是一系列字母数字字符吗?它应该以字母而不是数字开头吗?

一旦弄明白,编程就会非常简单。从一个简单的循环方式开始循环“垃圾”,寻找你需要的东西。完成后,查找有用的C库函数(如strtok)以使代码更精简。

答案 5 :(得分:0)

虽然我等你把这个问题发布为我编码的问题。

此代码遍历传递给“最长”函数的字符串,当它找到字母序列中的第一个时,它会设置指向它的指针并开始计算它的长度。如果它是看到的最长的字母序列,它会将另一个指针(“maxStringStart”指针)设置到该序列的开头,直到找到更长的字符。

最后,它为新字符串分配足够的空间并返回指向它的指针。

#include<stdio.h>
#include<stdlib.h>
#include<string.h>

int isLetter(char c){

    return ( (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') );

}

char *longest(char *s) {

    char *newString = 0;
    int maxLength = 0;
    char *maxStringStart = 0;
    int curLength = 0;
    char *curStringStart = 0;

    do {

        //reset the current string length and skip this
        //iteration if it's not a letter
        if( ! isLetter(*s)) {
            curLength = 0;
            continue;
        }

        //increase the current sequence length. If the length before
        //incrementing is zero, then it's the first letter of the sequence:
        //set the pointer to the beginning of the sequence of letters
        if(curLength++ == 0) curStringStart = s;

        //if this is the longest sequence so far, set the
        //maxStringStart pointer to the beginning of it
        //and start increasing the max length.
        if(curLength > maxLength) {
            maxStringStart = curStringStart;
            maxLength++;
        }

    } while(*s++);

    //return null pointer if there were no letters in the string,
    //or if we can't allocate any memory.
    if(maxLength == 0) return NULL;
    if( ! (newString = malloc(maxLength + 1)) ) return NULL;

    //copy the longest string into our newly allocated block of
    //memory (see my update for the strlen() only requirement)
    //and null-terminate the string by putting 0 at the end of it.
    memcpy(newString, maxStringStart, maxLength);
    newString[maxLength + 1] = 0;

    return newString;

}

int main(int argc, char *argv[]) {

    int i;

    for(i = 1; i < argc; i++) {
        printf("longest all-letter string in argument %d:\n", i);
        printf("   argument: \"%s\"\n", argv[i]);
        printf("    longest: \"%s\"\n\n", longest(argv[i]));
    }

    return 0;

}

这是我在简单C中的解决方案,没有任何数据结构。

我可以在我的终端中运行它:

~/c/t $ ./longest "hello there, My name is Carson Myers." "abc123defg4567hijklmnop890"
longest all-letter string in argument 1:
   argument: "hello there, My name is Carson Myers."
    longest: "Carson"

longest all-letter string in argument 2:
   argument: "abc123defg4567hijklmnop890"
    longest: "hijklmnop"

~/c/t $

可以轻松地在isLetter()函数中更改构成字母的标准。例如:

return ( 
    (c >= 'a' && c <= 'z') ||
    (c >= 'A' && c <= 'Z') ||
    (c == '.') || 
    (c == ' ') || 
    (c == ',') );

将句号,逗号和空格计为“字母”。


根据您的更新:

memcpy(newString, maxStringStart, maxLength);替换为:

int i;
for(i = 0; i < maxLength; i++)
    newString[i] = maxStringStart[i];

然而,使用C标准库可以更容易地解决这个问题:

char *longest(char *s) {

    int longest = 0;
    int curLength = 0;
    char *curString = 0;
    char *longestString = 0;
    char *tokens = " ,.!?'\"()@$%\r\n;:+-*/\\";

    curString = strtok(s, tokens);
    do {

        curLength = strlen(curString);
        if( curLength > longest ) {
            longest = curLength;
            longestString = curString;
        }

    } while( curString = strtok(NULL, tokens) );

    char *newString = 0;

    if( longest == 0 ) return NULL;
    if( ! (newString = malloc(longest + 1)) ) return NULL;

    strcpy(newString, longestString);

    return newString;

}

答案 6 :(得分:0)

另一种变体。

#include <stdio.h>
#include <string.h>

int main(void)
{
        char s[] = "(2034HEY!!11   th[]thisiswhatwewant44";
        int len = strlen(s);
        int i = 0;
        int biggest = 0;
        char* p = s;

        while (p[0])
        {
                if (!((p[0] >= 'A' && p[0] <= 'Z') || (p[0] >= 'a' && p[0] <= 'z')))
                {
                        p[0] = '\0';
                }

                p++;
        }

        for (; i < len; i++)
        {
                if (s[i] && strlen(&s[i]) > biggest)
                {
                        biggest = strlen(&s[i]);
                        p = &s[i];
                }
        }

        printf("%s\n", p);
        return 0;
}