C中单独的标点符号

时间:2017-12-28 19:58:28

标签: c arrays string

我正试图将所有单词与文本分开,我也需要将标点符号分开。

将它们分开并将其保存在字符串数组中的最佳方法是什么?

这是一个例子:

  1. 输入

    • “嗨,我叫萨拉!”
  2. 预期输出

    • “您好”
    • “”
    • “我的”
    • “名称”
    • “是”
    • “萨拉”
    • “!”
  3. 实际输出

    • “嗨,”
    • “我的”
    • “名称”
    • “是”
    • “萨拉!”
  4. 我的代码:

    palavra_linha[i] = strtok (linhas[i], " \n\r");
    
    while (palavra_linha[i] != NULL) {
        palavras_finais[j] = palavra_linha[i];
        j++;
        palavra_linha[i] = strtok (NULL, " \n\r");
    }
    

    我知道我必须使用类似的东西,但它不起作用,因为假设是假的:

    if (strlen(palavra_linha[i])-1) == '.') {
        palavras_finais[j] = palavra_linha[i];
    }
    

2 个答案:

答案 0 :(得分:1)

现在它正常工作并给我输出:

[Hello] 
[,] 
[Sara] 
[!] 
[How] 
[are] 
[You] 
[?] 

使用后不要忘记释放数组,也可以在程序开始时保存原始字符串以指定tmp指针(例如)。

#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <ctype.h>
#include <unistd.h>

static void skip_copied_bytes(char *str, int *i)
{
    char c = 0;
    while (*str == ' ' && str)
    {
        str++;
        (*i)++;
    }

    while (*str != ' ' && *str != '\0' && !ispunct(c))
    {
        str++;
        (*i)++;
        c = *str;
    }

    while (*str == ' ' && str)
    {
        str++;
        (*i)++;
    }
}

static int count_char(char *str)
{

    int count = 0;
    char c = 0;
    while (*str == ' ' && str)
        str++;


    while (*str != ' ' && *str != '\0' && !ispunct(c))
    {
        count++;
        str++;
        c = *str;
    }

    while (*str == ' ' && str)
    {
        str++;
    }

    return count;
}

static int count_word(char const *s, char c)
{
    int i;
    int count;

    count = 0;
    i = 0;
    while (s[i] != '\0')
    {
        while (s[i] == c)
            i++;
        if (s[i] != '\0')
            count++;
        while (s[i] != c && s[i] != '\0') {
            if (ispunct(s[i]))
                count++;
            i++;
        }
    }

    return count;
}
int main(void)
{
    char *str = "Hello, Sara! How are You?";
    char **array;
    int i = 0;
    int j = 0;
    int size = 0;

    size = count_word(str, ' ');
    if((array = malloc(sizeof(char *) * (size + 1))) == NULL)
        return -1;

    while (str[i])
    {
        size = count_char(&str[i]);
        if ((array[j] = malloc(sizeof(char) * (size))) == NULL)
            return -1;

        strncpy(array[j], &str[i], size);
        array[j][size] = '\0';

        skip_copied_bytes(&str[i], &i);
        j++;
    }

    array[j] = 0;

    for(i = 0; array[i]; i++) {
        printf("[%s] \n", array[i]);
    }
}

答案 1 :(得分:1)

一个想法是,您可以保留原始文本并创建它的副本。在创建副本时,迭代文本的每个字符并检查当前字符是否是任何标点符号。如果是这样,请在标点符号之前向副本插入空格,并从标点符号的下一个字符继续,直到到达文本的终止空字符。最后,您可以使用strtok()来标记文本副本,方法与操作相同。以下是实现上述想法的示例。

#include <stdlib.h>
#include <stdio.h>
#include <ctype.h>
#include <string.h>

int main(void) {
    char *stnc_org, *stnc_new;
    int size;
    printf("What is the expected size of the sentence: ");
    scanf("%d", &size);
    stnc_org = (char *)malloc(size * sizeof(char));

    printf("Input: \n");
    scanf(" %[^\n]", stnc_org);

    // get the number of punct
    int punct_num = 0;
    int i;
    for (i = 0; *(stnc_org + i) != '\0'; i++) {
        if (ispunct(*(stnc_org + i))) {
            punct_num++;
        }
    }

    char * stnc_backup = (char *)malloc((size + punct_num * 2) * sizeof(char));
    stnc_new = stnc_backup;

    // copy the original str to the new and add space before each punct
    for (i = 0; *(stnc_org + i) != '\0'; i++) {
        if (ispunct(*(stnc_org + i))) { // found a punct
            // boundary check!!!
            // 1. the first character is the punct
            if (i == 0) {
                *(stnc_new + i) = *(stnc_org + i);
                *(stnc_new + i + 1) = ' ';
                stnc_new = stnc_new + (i + 2);
            } 
            // 2. the last character is the punct
            else if (*(stnc_org + i + 1) == '\0') {
                if (*(stnc_org + i - 1) != ' ') {
                    strncpy(stnc_new, stnc_org, i);
                    *(stnc_new + i) = ' ';
                    *(stnc_new + i + 1) = *(stnc_org + i);
                    *(stnc_new + i + 2) = '\0';
                    stnc_new = stnc_new + (i + 1);
                }
            }

            // check the previous and next characters of the punct
            // 3. previous not the space && next is space -> insert ' ' before the punct
            else if (*(stnc_org + i - 1) != ' ' && *(stnc_org + i + 1) == ' ') {
                strncpy(stnc_new, stnc_org, i);
                *(stnc_new + i) = ' ';
                *(stnc_new + i + 1) = *(stnc_org + i);
                stnc_new = stnc_new + (i + 1);
            }

            // 4. previous is space && next is not space -> insert ' ' after the punct
            else if (*(stnc_org + i - 1) == ' ' && *(stnc_org + i + 1) != ' ') {
                strncpy(stnc_new, stnc_org, i);
                *(stnc_new + i) = *(stnc_org + i);
                *(stnc_new + i + 1) = ' ';
                stnc_new = stnc_new + (i + 2);
            }

            // 5. no space before or after -> insert ' ' both before and after the punct
            else if (*(stnc_org + i - 1) != ' ' && *(stnc_org + i + 1) != ' ') {
                strncpy(stnc_new, stnc_org, i);
                *(stnc_new + i) = ' ';
                *(stnc_new + i + 1) = *(stnc_org + i);
                *(stnc_new + i + 2) = ' ';
                stnc_new = stnc_new + (i + 3);
            }

            // reset the pointer of the original text
            stnc_org = stnc_org + i + 1;
            // reset the index, so that it starts from 0 in the next loop
            i = -1;
        }
    }

    //printf("%s\n", stnc_backup);

    printf("\nOutput:\n");
    char *str;
    str = strtok(stnc_backup, " \n\r");

    while(str != NULL) {
        printf("%s\n", str);
        str = strtok(NULL, " \n\r");
    }
}

示例输出如下:

Running "/home/ubuntu/workspace/replace.c"
What is the expected size of the sentence: 300
Input: 
"Isn't it true that Bill O'Reilly didn't win (he came in 3rd!)? 'Tain't necessarily so!"

Output:
"
Isn
'
t
it
true
that
Bill
O
'
Reilly
didn
'
t
win
(
he
came
in
3rd
!
)
?
'
Tain
'
t
necessarily
so
!
"


Process exited with code: 0