C - 从字符串中提取单词

时间:2012-03-09 05:41:49

标签: c string pointers extract words


我正在尝试从一串ASCII字符中提取单词。 例如,如果我有字符串@@ Hello..world>>,我想从字符串中获取单词“Hello”和“world”,并将它们添加到我的链接列表中。


基本上,我正在使用fscanf从文件中扫描字符串,然后为每个字符串调用我的函数read_words(char * s)以从字符串中获取正确的单词并将它们添加到我的链接列表中以供进一步使用。


struct node {
    char *val;
    struct node *next;
    int count;
} *words = NULL;

void read_words(char *s)
    struct node *tmp;
    char word[64+1];
    int i, check, wordStarted = 0, count = 0;

    for (i = 0; s[i] != '\0'; i++)
            if ((isspace(s[i]) != 0) || !isalpha(s[i]))
                    if (wordStarted == 1)
                            check = check_list(word);
                            if (check != 1) {
                                    word[count] = '\0';
                                    tmp = malloc(sizeof(struct node));
                                    tmp->val = word;
                                    tmp->count = 1;
                                    tmp->next = words;
                                    words = tmp;
                            count = 0;
                            wordStarted = 0;
                    word[count++] = s[i];
                    wordStarted = 1;




2 个答案:

答案 0 :(得分:6)



#include <stdio.h>
#include <string.h>

int main ()
  char str[] ="- This, a sample string.";
  char * pch;
  printf ("Splitting string \"%s\" into tokens:\n",str);
  pch = strtok (str," ,.-");
  while (pch != NULL)
    printf ("%s\n",pch);
    pch = strtok (NULL, " ,.-");
  return 0;


Splitting string "- This, a sample string." into tokens:

答案 1 :(得分:1)

ANSI C的更好解决方案。

用法 strtok()并不总是好的:

  1. 它将更改原始数组。
  2. 空间的分隔符&#34; &#34;忽略类似的字符:&#34; \ n&#34;,&#34; \ t&#34;和其他。
  3. 请尝试下一步并阅读评论以获取详细信息:

    #include <stdio.h>      // printf
    #include <string.h>     // strlen, strncpy
    #include <ctype.h>      // isalnum
    #include <stdlib.h>     // malloc, calloc
        A logical type
    typedef enum {
    } bool;
        A Struct for hold 2D-array with count items
    typedef struct _ListWithLength {
        char **list;
        size_t length;
    } ListWithLength;
        Parse a text and return pointer to a ListWithLength words and count it
    ListWithLength* getWords(char *text) {
        // a variable for count words
        int count = 0;
        // keep length of the text
        size_t text_len = strlen(text);
        // a flag indicating the a beginning of a word
        bool new_word = false;
        // an index of a start found a word
        int index_start_word = 0;
        // 2D-array for found word
        // it will be same memory size as the original text
        char **words = malloc(text_len * sizeof(char));
        for (int i = 0; i <= text_len; ++i) {
            // if found ascii letter or digits and new no traced early
            // keep index of beginning a new word
            // and change the flag
            if (isalnum(text[i]) != 0) {
                if (new_word == false) {
                    new_word = true;
                    index_start_word = i;
            // if it is not ascii letter or digits and a word traced early
            // it means the word ended
            } else {
                if (new_word == true) {
                    // allocate a memory for a new word in the array of words
                    words[count] = malloc(i - index_start_word * sizeof(char) + 1);
                    // copy the found word from the text by indexes
                    strncpy(words[count], text + index_start_word, i - index_start_word);
                    // change the flag
                    new_word = false;
                    // increase the counter of words
        // bind the found words and it count to a structure and return it
        ListWithLength *list_with_length = malloc(sizeof(ListWithLength));
        list_with_length->length = count;
        list_with_length->list = words;
        return list_with_length;
        Print information of a ListWithLength
    void printListWithLength(ListWithLength *list_with_length) {
        printf("Total items: %li\n", list_with_length->length);
        for (int i = 0; i < list_with_length->length; ++i) {
            printf("%d. %s\n", i + 1, list_with_length->list[i]);
    int main(int argc, char const *argv[])
        char c_keywords[300] = "auto else    long    switch\
        break\t   enum \t register    typedef\
        \ncase    extern,  return  union\
        ?char    float.   short   unsigned\
        const   !for signed  void\
        continue    goto    sizeof  volatile\
        .default???? if  static  while\
        do  int struct,,,,  _Packed\
        ListWithLength *list_with_length = getWords(c_keywords);
        return 0;


    $ gcc -Wall -ansi -std=c11 -o main main.c
    $ ./main 
    Total items: 33
    1. auto
    2. else
    3. long
    4. switch
    5. break
    6. enum
    7. register
    8. typedef
    9. case
    10. extern
    11. return
    12. union
    13. char
    14. float
    15. short
    16. unsigned
    17. const
    18. for
    19. signed
    20. void
    21. continue
    22. goto
    23. sizeof
    24. volatile
    25. default
    26. if
    27. static
    28. while
    29. do
    30. int
    31. struct
    32. Packed
    33. double