Question

我基本上在一个字符串中有一个句子，并希望将每个单词分解为单词。每个单词都应该进入一个字符串数组。我不被允许使用strtok。我有这个代码，但它没有用。有人可以帮忙吗？

在互联网上肯定有类似的东西，但我找不到任何东西......

int main(){

    char s[10000];                        // sentence
    char array[100][100];                 // array where I put every word

    printf("Insert sentence: ");          // receive the sentence
    gets(s);

    int i = 0;
    int j = 0;

    for(j = 0; s[j] != '\0'; j++){        // loop until I reach the end
        for(i = 0; s[i] != ' '; i++){     // loop until the word is over
            array[j][i] = s[i];           // put every char in the array
        }
    }

    return 0;
}

Answer 1

每个单词都应该进入一个字符串数组。我不被允许使用 strtok。

有趣的问题，可以在紧凑的算法中解决。它处理check(char c)中指定的多个空格和标点符号。

问题中最困难的部分是妥善处理角落案件。当字词长度超过WORD_LEN或字数超过array的容量时，我们可能会遇到这种情况。

两种情况都得到妥善处理。该算法截断过多的单词并仅解析数组的容量。

（顺便说一句。请勿使用gets：Why is the gets function so dangerous that it should not be used?）

编辑：已经提供了经过全面测试的find_tokens功能。

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#define WORD_LEN            3 // 100 // MAX WORD LEN
#define NR_OF_WORDS         3 // 100 // MAX NUMBER OF WORDS
#define INPUT_SIZE 10000

int is_delimiter(const char * delimiters, char c) // check for a delimiter
{
    char *p = strchr (delimiters, c);    // if not NULL c is separator

    if (p) return 1;                     // delimeter
    else return 0;                       // not a delimeter
}    

int skip(int *i, char *str, int skip_delimiters, const char *delimiters)
{
    while(1){
        if(skip_delimiters) {
            if(  (str[(*i)+1] =='\0') || (!is_delimiter(delimiters, str[(*i)+1])) )  
                break;        // break on nondelimeter or '\0' 
            else (*i)++;      // advance to next character
        } 
        else{                 // skip excess characters in the token
            if( is_delimiter(delimiters, str[(*i)]) ) 
            {
                if(  (str[(*i)+1] =='\0') || !is_delimiter(delimiters, str[(*i)+1]) )
                    break;    // break on non delimiter or '\0'
                else (*i)++;  // skip delimiters
            }
            else (*i)++;      // skip non delimiters               
        }
    }        

    if ( str[(*i)+1] =='\0') return 0;
    else return 1;
}                

int find_tokens(int max_tokens, int token_len, char *str, char array[][token_len+1], const char *delimiters, int *nr_of_tokens)
{
    int i =  0;
    int j =  0;
    int l =  0;
    *nr_of_tokens = 0;
    int status = 0;                           // all OK!
    int skip_leading_delimiters = 1;
    int token = 0;
    int more;

    for(i = 0; str[i] != '\0'; i++){          // loop until I reach the end

        // skip leading delimiters
        if( skip_leading_delimiters )
        {
           if( is_delimiter( delimiters, str[i]) ) continue;
           skip_leading_delimiters = 0;
        }

        if( !is_delimiter(delimiters,str[i]) && (j < token_len) )          
        {
            array[l][j] = str[i];             // put  char in the array
            //printf("%c!\n", array[l][j] );
            j++;
            array[l][j] = 0;
            token = 1;
        }
        else
        {   
            //printf("%c?\n", str[i] );
            array[l][j] = '\0';                        // token terminations

            if (j < token_len) {
               more = skip(&i, str, 1, delimiters);    // skip delimiters
            }
            else{
                more = skip(&i, str, 0, delimiters);  // skip excess of the characters in token
                status = status | 0x01;               // token has been truncated
            }

            j = 0;
            //printf("more %d\n",more);
            if(token){
                if (more) l++;
            }

            if(l >= max_tokens){
                status = status | 0x02;              // more tokens than expected
                break;
            }
        }
    }

    if(l>=max_tokens)
        *nr_of_tokens = max_tokens;
    else{
        if(l<=0 && token)
          *nr_of_tokens = 1;
        else
        {
            if(token)
                *nr_of_tokens = l+1;
            else   
                *nr_of_tokens = l;
        }
    }
    return status;
}    

int main(void){
    char input[INPUT_SIZE+1];                // sentence
    char array[NR_OF_WORDS][WORD_LEN+1];     // array where I put every word, remeber to include null terminator!!!

    int number_of_words;
    const char * delimiters =  " .,;:\t";    // word delimiters 
    char *p;

    printf("Insert sentence: ");             // receive the sentence
    fgets(input, INPUT_SIZE, stdin);
    if ( (p = strchr(input, '\n')) != NULL) *p = '\0'; // remove '\n'

    int ret = find_tokens(NR_OF_WORDS, WORD_LEN, input, array, delimiters, &number_of_words);

    printf("tokens= %d ret= %d\n", number_of_words, ret);

    for (int i=0; i < number_of_words; i++)
        printf("%d: %s\n", i, array[i]);

    printf("End\n");    
    return 0;
}

测试：

Insert sentence: ..........1234567,,,,,,abcdefgh....123::::::::::::                                                                          
tokens= 3 ret= 1                                                                                                                             
0: 123                                                                                                                                       
1: abc                                                                                                                                       
2: 123                                                                                                                                       
End

Answer 2

您不是'\0' - 终止字符串并且您正在扫描来源每当你找到一个空角色时就开始了。

你只需要一个循环，内循环和条件必须是s[i] != 0：

int j = 0; // index for array
int k = 0; // index for array[j]
for(i = 0; s[i] != '\0'; ++i)
{
    if(k == 99)
    {
        // word longer than array[j] can hold, aborting
        array[j][99] = 0; // 0-terminating string
        break;
    }

    if(j == 99)
    {
        // more words than array can hold, aborting
        break;
    }

    if(s[i] == ' ')
    {
        array[j][k] = 0; // 0-terminating string
        j++; // for the next entry in array
        k = 0;
    } else
        array[j][k++] = s[i]; 
}

请注意，此算法不处理多个空格和标点符号。这可以通过使用存储最后状态的变量来解决。

int j = 0; // index for array
int k = 0; // index for array[j]
int sep_state = 0; // 0 normal mode, 1 separation mode
for(i = 0; s[i] != '\0'; ++i)
{
    if(k == 99)
    {
        // word longer than array[j] can hold, aborting
        array[j][99] = 0; // 0-terminating string
        break;
    }

    if(j == 99)
    {
        // more words than array can hold, aborting
        break;
    }

    // check for usual word separators
    if(s[i] == ' ' || s[i] == '.' || s[i] == ',' || s[i] == ';' || s[i] == ':')
    {
        if(sep_state == 1)
            continue; // skip multiple separators
        array[j][k] = 0; // 0-terminating string
        j++; // for the next entry in array
        k = 0;
        sep_state = 1; // enter separation mode
    } else {
        array[j][k++] = s[i];
        sep_state = 0; // leave separation mode
    }
}

如您所见，使用sep_state变量我可以检查是否有多个分隔符一个接一个地跳过并跳过后续的分隔符。我也检查常见的标点符号。

Answer 3

var coreDataStack = CoreDataStack(modelName: "MyAppName2")

请注意，获取功能非常不安全，在任何情况下都不应使用，请使用scanf或fgets

如何从字符串中获取单词并将它们放在字符串数组中？在C

3 个答案: