Question

我的程序的动机是测试字符串中最长的单词，并返回最长单词是否与预期结果匹配。

我编写了代码，但是我的大脑陷入了我将函数最长字的结果存储到char数组结果中的部分。你是怎么用C做的？

我的努力：

#include <stdio.h>
#include <ctype.h>
#include <string.h>

static int testsExecuted = 0;
static int testsFailed = 0;

char testLongestWord(char line[], char expected[]);
void longestWord(char line[]);

int main(int args, char *argv[]){
    printf("%s\n", "Testing typical cases, including punctuation\n");
    testLongestWord("the quick brown foxes jumped over the lazy dogs", "jumped");
    testLongestWord("hello world she said", "hello");
    testLongestWord("Hello\tworld\tshe\tsaid", "Hello");
    testLongestWord("HELLO, world she said", "HELLO");
    testLongestWord("hello world! she said???", "hello");
    testLongestWord("\"hello world!\", she said.", "hello");
    testLongestWord("easy as abc123", "abc123");
    testLongestWord("easy as abc,123", "easy");


    printf("\n%s\n", "Testing empty cases\n" );
    testLongestWord("", "");
    testLongestWord("!", "");
    testLongestWord(" ", "");
    testLongestWord("\t", "");
    testLongestWord("      ", "");
    testLongestWord("# $ ? % !", "");

    printf("\n%s\n", "Testing edge cases\n" );
    testLongestWord("a", "a");
    testLongestWord("abc", "abc");
    testLongestWord("abc d e f ghi", "abc");
    testLongestWord("a a b cc dd abc", "abc");
    testLongestWord("\"a a b cc dd abc.\"", "abc");


    printf("\n%s\n", "Testing apostrophes and dashes\n" );
    testLongestWord("this isn't five chars", "chars");
    testLongestWord("this should've been eight chars said the computer", "should've");
    testLongestWord("'this should've been eight chars', said the computer", "should've");
    testLongestWord("'hello world!', she said softly.", "softly");
    testLongestWord("topsy-turvy is a tenletter word", "topsy-turvy");
    testLongestWord("topsy-turvy should not be incorrectly eleven characters", "incorrectly");
    testLongestWord("---in-between-these---", "in-between-these");
    testLongestWord("---in---between---these---", "between");
    testLongestWord("here-is-an-edge-case but a muchmuchlongerword", "muchmuchlongerword");
    testLongestWord("d-o-n't-g-o-o-v-e-r-t-h-e-e-d-g-e with muchmuchlongerwords", "muchmuchlongerwords");
    testLongestWord("two=five-3 isn't three", "three");

    printf("\n%s\n", "These tests will be opposite in the C version\n");
    testLongestWord("the word antidisestablishmentarianism is very long but not as long as 'Llanfairpwllgwyngyllgogerychwyrndrobwyll-llantysiliogogogoch'.", "Llanfairpwllgwyngyllgogerychwyrndrobwyll-llantysiliogogogoch");
    testLongestWord("the word antidisestablishmentarianism is very long but not as long as 'Llanfairpwllgwyngyllgogerychwyrndrobwyll-llantysiliogogogoch'.", "antidisestablishment");
    testLongestWord("Java strings may contain \0 in the interior", "interior");
    testLongestWord("C strings cannot contain \0 in the interior", "strings");

    printf("Total number of test executed:  %d\n", testsExecuted );
    printf("number of test passed:  %d\n", (testsExecuted - testsFailed));
    printf("Number of test failed: %d\n", testsFailed );

    //longestWord("Java strings may contain \0 in the interior");

}

char testLongestWord(char line[], char expected[]){
    //char result[200];
      String result = longestWords(line); //This is how it'd have been in Java
    /*longestWord(line);*/
    //strcpy(result, line);
    //char *result = longestWord(line);
    //printf("%s\n", line );
    //longestWord(&line)

    if(strcmp(result,expected)){ // function returns 0 if they are equal
        printf("passed: '%s' from '%s'\n", result, line);
    }else{
        printf("FAILED: '%s' from '%s'\n", expected, result);
        testsFailed++;
    }
    testsExecuted++;
    return 0;

}


void longestWord(char line[]){
    char longest[200];

    int pos = 0;
    int longestLength = 0;
    char current[300];
    int currentLength = 0;
    char ch;
    size_t maxPos = strlen(line);

    while(pos < maxPos){
        ch = line[pos++];
        for(pos = 0; pos < maxPos;pos++){
            ch = line[pos++];
            if((ch == '\'' || ch == '-') && (pos > 0) && isalpha(line[pos-1]) && isalpha(line[pos+1])){
                strcpy(current, &ch);
            }else if(isalpha(ch) || isdigit(ch)){
                strcpy(current, &ch);
                currentLength++;
                //printf("%s\n", longest );

            }else{
                if(currentLength > longestLength){
                    strcpy(longest,current);
                    longestLength = currentLength;
                }
                //strcpy(current, "");
                currentLength =0;
            }
        }

    }

}

输出:(此输出以类似的代码在Java中完成）

Testing typical cases, including punctuation

Passed: 'jumped' from 'the quick brown foxes jumped over the lazy dogs'
Passed: 'hello' from 'hello world she said'
Passed: 'Hello' from 'Hello world   she said'
Passed: 'HELLO' from 'HELLO, world she said'
Passed: 'hello' from 'hello world! she said???'
Passed: 'hello' from '"hello world!", she said.'
Passed: 'abc123' from 'easy as abc123'
Passed: 'easy' from 'easy as abc,123'

Testing empty cases

Passed: '' from ''
Passed: '' from '!'
Passed: '' from ' '
Passed: '' from '   '
Passed: '' from '      '
Passed: '' from '# $ ? % !'

Testing edge cases

Passed: 'a' from 'a'
Passed: 'abc' from 'abc'
Passed: 'abc' from 'abc d e f ghi'
Passed: 'abc' from 'a a b cc dd abc'
Passed: 'abc' from '"a a b cc dd abc."'

Testing apostrophes and dashes

Passed: 'chars' from 'this isn't five chars'
Passed: 'should've' from 'this should've been eight chars said the computer'
Passed: 'should've' from ''this should've been eight chars', said the computer'
Passed: 'softly' from ''hello world!', she said softly.'
Passed: 'topsy-turvy' from 'topsy-turvy is a tenletter word'
Passed: 'incorrectly' from 'topsy-turvy should not be incorrectly eleven characters'
Passed: 'in-between-these' from '---in-between-these---'
Passed: 'between' from '---in---between---these---'
Passed: 'muchmuchlongerword' from 'here-is-an-edge-case but a muchmuchlongerword'
Passed: 'muchmuchlongerwords' from 'd-o-n't-g-o-o-v-e-r-t-h-e-e-d-g-e with muchmuchlongerwords'
Passed: 'three' from 'two=five-3 isn't three'

These tests will be opposite in the C version

Passed: 'Llanfairpwllgwyngyllgogerychwyrndrobwyll-llantysiliogogogoch' from 'the word antidisestablishmentarianism is very long but not as long as 'Llanfairpwllgwyngyllgogerychwyrndrobwyll-llantysiliogogogoch'.'
FAILED: 'Llanfairpwllgwyngyllgogerychwyrndrobwyll-llantysiliogogogoch' instead of 'antidisestablishment' from 'the word antidisestablishmentarianism is very long but not as long as 'Llanfairpwllgwyngyllgogerychwyrndrobwyll-llantysiliogogogoch'.'
Passed: 'interior' from 'Java strings may contain  in the interior'
FAILED: 'interior' instead of 'strings' from 'C strings cannot contain  in the interior'

Total number of tests executed: 34
Number of tests passed:         32
Number of tests failed:         2

Answer 1

复制{em>动态分配的char [] line

char * result = (char*)malloc(sizeof(char)*sizeof(line));
strcpy(result, line);

在动态分配的情况下，您需要保持数组的大小，因为sizeof不适用于动态分配的数组。一般来说，你可以做这样的事情

struct String
{
    char * c;
    size_t size;
};

String* str(char* in, size_t size)
{
    String* out = (String*)malloc(sizeof(String));
    out->c = 0;
    out->size = size;

    if (size) out->c = (char*)malloc(sizeof(char)*size);

    if (input) memcpy(out->c, in, size);
    else out->c[0] = '\0';

    return out;
}

int main (void)
{
    // t is static string
    char t[255] = "Test";

    // a is dynamic string; copy of t
    String* a = str(t, sizeof(t));

    // b is dynamic string; empty string with size of a
    String* b = str(0, a->size);

    // copy b into a
    memcpy(b->c, a->c);

    return 0;
}

Answer 2

您的代码存在一些问题。最终strcmp的意义如果被颠倒了。单词查找逻辑有点复杂，多次[错误]推进pos，并且返回字符串的机制（ala java）不起作用。一个简化是将一些longestWord代码拆分为nextWord

我已经用一些注释修复了代码[请原谅无偿的样式清理]：

#include <stdio.h>
#include <ctype.h>
#include <string.h>

static int testsExecuted = 0;
static int testsFailed = 0;

char testLongestWord(char line[], char expected[]);
//void longestWord(char line[]);

int
main(int args, char *argv[])
{
    printf("%s\n", "Testing typical cases, including punctuation\n");
    testLongestWord("the quick brown foxes jumped over the lazy dogs", "jumped");
    testLongestWord("hello world she said", "hello");
    testLongestWord("Hello\tworld\tshe\tsaid", "Hello");
    testLongestWord("HELLO, world she said", "HELLO");
    testLongestWord("hello world! she said???", "hello");
    testLongestWord("\"hello world!\", she said.", "hello");
    testLongestWord("easy as abc123", "abc123");
    testLongestWord("easy as abc,123", "easy");

    printf("\n%s\n", "Testing empty cases\n");
    testLongestWord("", "");
    testLongestWord("!", "");
    testLongestWord(" ", "");
    testLongestWord("\t", "");
    testLongestWord("      ", "");
    testLongestWord("# $ ? % !", "");

    printf("\n%s\n", "Testing edge cases\n");
    testLongestWord("a", "a");
    testLongestWord("abc", "abc");
    testLongestWord("abc d e f ghi", "abc");
    testLongestWord("a a b cc dd abc", "abc");
    testLongestWord("\"a a b cc dd abc.\"", "abc");

    printf("\n%s\n", "Testing apostrophes and dashes\n");
    testLongestWord("this isn't five chars", "chars");
    testLongestWord("this should've been eight chars said the computer", "should've");
    testLongestWord("'this should've been eight chars', said the computer", "should've");
    testLongestWord("'hello world!', she said softly.", "softly");
    testLongestWord("topsy-turvy is a tenletter word", "topsy-turvy");
    testLongestWord("topsy-turvy should not be incorrectly eleven characters", "incorrectly");
    testLongestWord("---in-between-these---", "in-between-these");
    testLongestWord("---in---between---these---", "between");
    testLongestWord("here-is-an-edge-case but a muchmuchlongerword", "muchmuchlongerword");
    testLongestWord("d-o-n't-g-o-o-v-e-r-t-h-e-e-d-g-e with muchmuchlongerwords", "muchmuchlongerwords");
    testLongestWord("two=five-3 isn't three", "three");

    printf("\n%s\n", "These tests will be opposite in the C version\n");
    testLongestWord("the word antidisestablishmentarianism is very long but not as long as 'Llanfairpwllgwyngyllgogerychwyrndrobwyll-llantysiliogogogoch'.", "Llanfairpwllgwyngyllgogerychwyrndrobwyll-llantysiliogogogoch");
    testLongestWord("the word antidisestablishmentarianism is very long but not as long as 'Llanfairpwllgwyngyllgogerychwyrndrobwyll-llantysiliogogogoch'.", "antidisestablishment");
    testLongestWord("Java strings may contain \0 in the interior", "interior");
    testLongestWord("C strings cannot contain \0 in the interior", "strings");

    printf("Total number of test executed:  %d\n", testsExecuted);
    printf("number of test passed:  %d\n", (testsExecuted - testsFailed));
    printf("Number of test failed: %d\n", testsFailed);

    // longestWord("Java strings may contain \0 in the interior");

}

// nextWord -- get next word
char *
nextWord(char *word,char *line)
{
    char *lhs;
    int c2;
    int alfcnt;
    int alf1;
    int alf2;
    int ch;

    //printf("nextWord: ENTER line='%s'\n",line);

    alfcnt = 0;
    lhs = word;

    for (ch = *line;  ch != 0;  ch = *++line) {
        alf1 = isalpha(ch) || isdigit(ch);

        // store word chars
        if (alf1) {
            *lhs++ = ch;
            alfcnt = 1;
            continue;
        }

        if (alfcnt) {
            c2 = line[1];
            alf2 = isalpha(c2) || isdigit(c2);

            if (alf2) {
                switch (ch) {
                case '\'': // store single quote [if it's part of a contraction]
                    *lhs++ = ch;
                    continue;
                    break;

                case '-': // store hyphen
                    *lhs++ = ch;
                    continue;
                    break;
                }
            }

            // didn't get a word char -- stop because we're at the end of a word
            break;
        }
    }

    *lhs = 0;

    if (! alfcnt)
        line = NULL;

    //printf("nextWord: EXIT lhs='%s' line='%s'\n",lhs,line);

    return line;
}

void
longestWord(char *longest,char *line)
{
    int longestLength = 0;
    char current[300];
    int currentLength = 0;
    char *cp;

    longest[0] = 0;

    while (1) {
        // get next word in line [we advance the line pointer for next round]
        line = nextWord(current,line);
        if (line == NULL)
            break;

        // get string length -- punctuation is _not_ counted in string length
        currentLength = 0;
        for (cp = current;  *cp != 0;  ++cp) {
            switch (*cp) {
            case '-':
            case '\'':
                break;
            default:
                currentLength += 1;
                break;
            }
        }

        // store longer word
        if (currentLength > longestLength) {
            strcpy(longest,current);
            longestLength = currentLength;
            continue;
        }

        if (currentLength < longestLength)
            continue;

        cp = strchr(longest,'-');
        if (cp == NULL)
            continue;

        cp = strchr(current,'-');
        if (cp != NULL)
            continue;

        // prefer non-dash over dash
        strcpy(longest,current);
    }
}

char
testLongestWord(char *line,char *expected)
{
    char result[2000];
    //char result = longestWords(line); // This is how it'd have been in Java

    longestWord(result,line);

    /* longestWord(line); */
    // strcpy(result, line);
    // char *result = longestWord(line);
    // printf("%s\n", line );
    // longestWord(&line)

    // function returns 0 if they are equal
    // NOTE/BUG: the sense of the if was wrong
    if (strcmp(result, expected) == 0) {
        printf("passed: '%s' from '%s'\n", result, line);
    }
    else {
        printf("FAILED: got '%s' from '%s' -- expected '%s'\n",
            result, line, expected);
        testsFailed++;
    }

    testsExecuted++;

    return 0;
}

<强>更新

根据您的请求，下面是完全注释版本，解释变量定义并注释控制流。

当我这样做的时候，我意识到nextWord计算[知道] longestWord必须重新计算的事情，仅仅是因为nextWord无法传达这些信息。

因此，我添加了nextword结构，允许nextWord传回多个值。我不知道这是否在java中完成，但它在C中相当常见。

返回多个值的替代方法是执行以下操作：

nextWord(blah,&var1,&var2,&var3,...);

这很快变得笨拙，所以我决定选择更干净的方法，即使它起初稍微难以理解。

实际上，另一种思考方式是nextword就像一个[java]类，只有一个方法nextWord。在这种情况下，我可能会颠倒nextWord参数的顺序（例如nextWord(&rtn,current)），因为我在C中的约定是对象实例指针是第一个参数。

#include <stdio.h>
#include <ctype.h>
#include <string.h>

static int testsExecuted = 0;
static int testsFailed = 0;

char testLongestWord(char line[],char expected[]);

//void longestWord(char line[]);

int
main(int args,char *argv[])
{
    printf("%s\n", "Testing typical cases, including punctuation\n");
    testLongestWord("the quick brown foxes jumped over the lazy dogs", "jumped");
    testLongestWord("hello world she said", "hello");
    testLongestWord("Hello\tworld\tshe\tsaid", "Hello");
    testLongestWord("HELLO, world she said", "HELLO");
    testLongestWord("hello world! she said???", "hello");
    testLongestWord("\"hello world!\", she said.", "hello");
    testLongestWord("easy as abc123", "abc123");
    testLongestWord("easy as abc,123", "easy");

    printf("\n%s\n", "Testing empty cases\n");
    testLongestWord("", "");
    testLongestWord("!", "");
    testLongestWord(" ", "");
    testLongestWord("\t", "");
    testLongestWord("      ", "");
    testLongestWord("# $ ? % !", "");

    printf("\n%s\n", "Testing edge cases\n");
    testLongestWord("a", "a");
    testLongestWord("abc", "abc");
    testLongestWord("abc d e f ghi", "abc");
    testLongestWord("a a b cc dd abc", "abc");
    testLongestWord("\"a a b cc dd abc.\"", "abc");

    printf("\n%s\n", "Testing apostrophes and dashes\n");
    testLongestWord("this isn't five chars", "chars");
    testLongestWord("this should've been eight chars said the computer", "should've");
    testLongestWord("'this should've been eight chars', said the computer", "should've");
    testLongestWord("'hello world!', she said softly.", "softly");
    testLongestWord("topsy-turvy is a tenletter word", "topsy-turvy");
    testLongestWord("topsy-turvy should not be incorrectly eleven characters", "incorrectly");
    testLongestWord("---in-between-these---", "in-between-these");
    testLongestWord("---in---between---these---", "between");
    testLongestWord("here-is-an-edge-case but a muchmuchlongerword", "muchmuchlongerword");
    testLongestWord("d-o-n't-g-o-o-v-e-r-t-h-e-e-d-g-e with muchmuchlongerwords", "muchmuchlongerwords");
    testLongestWord("two=five-3 isn't three", "three");

    printf("\n%s\n", "These tests will be opposite in the C version\n");
    testLongestWord("the word antidisestablishmentarianism is very long but not as long as 'Llanfairpwllgwyngyllgogerychwyrndrobwyll-llantysiliogogogoch'.", "Llanfairpwllgwyngyllgogerychwyrndrobwyll-llantysiliogogogoch");
    testLongestWord("the word antidisestablishmentarianism is very long but not as long as 'Llanfairpwllgwyngyllgogerychwyrndrobwyll-llantysiliogogogoch'.", "antidisestablishment");
    testLongestWord("Java strings may contain \0 in the interior", "interior");
    testLongestWord("C strings cannot contain \0 in the interior", "strings");

    printf("Total number of test executed:  %d\n", testsExecuted);
    printf("number of test passed:  %d\n", (testsExecuted - testsFailed));
    printf("Number of test failed: %d\n", testsFailed);

    // longestWord("Java strings may contain \0 in the interior");

}

// nextWord state control ("helper")
// NOTE: we use this sort of struct when we must maintain/update _multiple_
// variables across a function call [or calls]
struct nextword {
    char *line;                         // current line position [updated]
    int alfcnt;                         // alphanumeric length
    int hypflg;                         // 1=word is hyphenated
};

// nextWord -- get next word
void
nextWord(char *word,struct nextword *rtn)
// word -- pointer to place to store extracted word
// rtn -- pointer to our state control and values we return
{
    char *line;  // pointer to sentence/phrase to extract words from
    char *wp;  // current position in word
    int ch;  // current character
    int c2;  // next character [lookahead if needed]
    int alf1;  // 1=current char is alphanumeric
    int alf2;  // 1=next char is alphanumeric
    int alfcnt;  // number of alphanumeric characters

    // printf("nextWord: ENTER line='%s'\n",line);

    // say no alpha chars and "not hyphenated"
    alfcnt = 0;
    rtn->hypflg = 0;

    // set the word "creeper" [current position for destination]
    // NOTES:
    // (1) using a separate "wp" in unnecessary as we don't need to retain the
    //     original word value (i.e. below we could just do "word" in place of
    //     "wp")
    // (2) in the previous version, this was called "lhs" [which, as you
    //     surmised, meant "left hand side"]
    // (3) in my style, this might also be called "dst" [for destination]
    wp = word;

    // get "source" pointer
    // NOTES:
    // (1) in my style, this could also be named "rhs" ["right hand side"]
    // (2) another style would be "src" for "source" if we used "dst" above
    line = rtn->line;

    for (ch = *line;  ch != 0;  ch = *++line) {
        // is current char alphanumeric?
        alf1 = isalpha(ch) || isdigit(ch);

        // store word chars
        // NOTE: only these characters are considered for our purposes of
        // "string" length
        if (alf1) {
            *wp++ = ch;
            alfcnt += 1;
            continue;
        }

        // iff we've started a word, we consider internal punctuation and
        // hyphenation
        if (alfcnt) {
            // peek at _next_ character in line
            c2 = line[1];
            alf2 = isalpha(c2) || isdigit(c2);

            // consider special characters if _next_ character is alpha
            if (alf2) {
                switch (ch) {
                case '\'':  // store quote [if it's part of a contraction]
                    *wp++ = ch;
                    continue;
                    break;

                case '-':  // store hyphen
                    *wp++ = ch;
                    rtn->hypflg = 1;  // remember that word is hyphenated
                    continue;
                    break;
                }
            }

            // didn't get a word char -- stop because we're at the end of a word
            break;
        }

        // wait for start of a word (i.e. alphanumeric)
    }

    // finish off the extracted word
    *wp = 0;

    // if we didn't find any word chars, tell caller to stop
    if (! alfcnt)
        line = NULL;

    // return multiple values to caller
    // NOTE: we already updated hypflg above [if necessary]
    rtn->alfcnt = alfcnt;
    rtn->line = line;

    // printf("nextWord: EXIT lhs='%s' line='%s'\n",lhs,line);
}

// longestWord -- find longest word in a line
void
longestWord(char *longest,char *line)
// longest -- pointer to buffer where we return the longest extracted word
// line -- pointer to string that has the phrase to extract words from
{
    int longlen = 0;  // length of longest string so far
    int longhyp = 0;  // 1=longest word is hyphenated
    char current[300];  // current word being considered
    int curlen;
    struct nextword rtn;  // control struct to allow nextWord to update state

    // handle empty strings
    longest[0] = 0;

    // initialize this once -- nextWord will update it
    rtn.line = line;

    while (1) {
        // get next word in line [we advance the line pointer for next round]
        nextWord(current,&rtn);

        // no more words found
        if (rtn.line == NULL)
            break;

        // nextWord has already computed the length for us
        curlen = rtn.alfcnt;

        // store longer word [remembering its length and whether it's
        // hyphenated or not]
        if (curlen > longlen) {
            strcpy(longest,current);
            longlen = curlen;
            longhyp = rtn.hypflg;
            continue;
        }

        // skip shorter words than what we already have
        if (curlen < longlen)
            continue;

        // decide if longest is hyphenated -- ignore if not
        if (! longhyp)
            continue;

        // decide if current is hyphenated -- ignore if so (i.e. it's no better)
        if (rtn.hypflg)
            continue;

        // prefer non-dash over dash
        strcpy(longest,current);
        longhyp = rtn.hypflg;
    }
}

char
testLongestWord(char *line,char *expected)
{
    char result[2000];

    // char result = longestWords(line); // This is how it'd have been in Java

    longestWord(result,line);

    /* longestWord(line); */
    // strcpy(result,line);
    // char *result = longestWord(line);
    // printf("%s\n",line );
    // longestWord(&line)

    // function returns 0 if they are equal
    // NOTE/BUG: the sense of the if was wrong
    if (strcmp(result,expected) == 0) {
        printf("passed: '%s' from '%s'\n",result,line);
    }
    else {
        printf("FAILED: got '%s' from '%s' -- expected '%s'\n",
            result,line,expected);
        testsFailed++;
    }

    testsExecuted++;

    return 0;
}

如何将从函数中获得的结果传递给C中的char数组？

2 个答案: