将不同类型字符的字符串拆分为单独的字符串

时间:2015-09-06 22:07:38

标签: c

有没有办法将不同类型字符的字符串拆分成单独的字符串?

char str[] ="(I) 44like22 .cookies. ,This, /is\ ?tricky?";

这样的东西?

( I ) 44 like 22 . cookies . , This , / is \ ? tricky ?

奥拉夫这个。

char *a;
char *strings[100] = {0};
int k = 0;
a = strtok (line_storage," ");
k = 0;
while (a != NULL)
{
    strings[k] = malloc(strlen(a)+1);
    strcpy(strings[l], a);
    a = strtok (NULL," ");
    k++;
}

修改了Jonathan Leffler,但不是很好。我正试图找出一种方法,使我可以在每次在文件的每一行上运行fgets时重复使用它。

#include <ctype.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>

int main(void)
{
    char  str[] = "(I) 44like22 .cookies. ,This, /is\\ ?tricky?";
    char *s = (char *)str;
    char  str1[] = "(I) 44like22 .cookies. ,This, /is\\ ?tricky? 1122Keep.. (giving) 13me09 %more% 1magic1";

    char   out[2 * sizeof(str)];
    char  *ptr[sizeof(str)];
    char **p = ptr;
    char  *o = out;
    int    n = 0;
    int    i = 0;


    int c = 0;


    printf("Whole string: <<%s>>\n", str);

    while ((c = (unsigned char)*s++) != '\0')
    {
        if (isdigit(c))
        {
            *p++ = o;
            *o++ = c;
            while (isdigit((unsigned char)*s))
                *o++ = *s++;
            *o++ = '\0';
            n++;
        }
        else if (isalpha(c))
        {
            *p++ = o;
            *o++ = c;
            while (isalpha((unsigned char)*s))
                *o++ = *s++;
            *o++ = '\0';
            n++;
        }
        else if (ispunct(c))
        {
            printf("o is %d \n", *o);
            printf("c is %d \n", c);
            *p++ = o;
            *o++ = c;
            while (ispunct((unsigned char)*s))
                *o++ = *s++;
            *o++ = '\0';
            n++;
        }
        else if (!isspace(c))
        {
            printf("Hmmm: don't know how to classify %d (%c)\n", c, c);
        }
    }

    for (i = 0; i < n; i++)
    {
        printf("Fragment[%2d] = %s\n", i, ptr[i]);
    }

    free(p);
    free(o);
    //free(out);
    //free(ptr);

    s = str1;
    out[2 * sizeof(str1)];
    *ptr[sizeof(str1)];
    **p = (char **)ptr;
    *o = out;
    n = 0;
    i = 0;

    printf("Whole string: <<%s>>\n", str);

    while ((c = (unsigned char)*s++) != '\0')
    {
        if (isdigit(c))
        {
            *p++ = o;
            *o++ = c;
            while (isdigit((unsigned char)*s))
                *o++ = *s++;
            *o++ = '\0';
            n++;
        }
        else if (isalpha(c))
        {
            *p++ = o;
            *o++ = c;
            while (isalpha((unsigned char)*s))
                *o++ = *s++;
            *o++ = '\0';
            n++;
        }
        else if (ispunct(c))
        {
            printf("o is %d \n", *o);
            printf("c is %d \n", c);
            *p++ = o;
            *o++ = c;
            while (ispunct((unsigned char)*s))
                *o++ = *s++;
            *o++ = '\0';
            n++;
        }
        else if (!isspace(c))
        {
            printf("Hmmm: don't know how to classify %d (%c)\n", c, c);
        }
    }

    return 0;
}

新的调整。我想做一个小小的调整,将"]"")"";"分开。就像这"];" "]" ";"和“);”进入")" ";"

for(x = 0; x < n; x++)
{
    printf("ptr[x] is %s \n", ptr[x]);
    cmp_str3 = strcmp(ptr[x], "];");
    cmp_str4 = strcmp(ptr[x], ");");
    if(cmp_str3 == 0)
    {
        printf("Match1 \n");
        strcpy(str1, ptr[x]);
        printf("Match2 \n");
        ptr[x][1] = '\0';
        printf("Match3 \n");
        //printf("ptr[x+1] %c %d \n", ptr[x+1], ptr[x+1]);
        //printf("ptr[x+1][0] %c %d \n", ptr[x+1][0], ptr[x+1][0]);
        ptr[x+1] = malloc(strlen("foo") + 1);
        ptr[x+1][0] = str1[1];
        printf("Match4 \n");
        ptr[x+1][1] = '\0';
        printf("Match5 \n");
        n++;
    }
    if(cmp_str4 == 0)
    {
    }
}
cmp_str3 = 0;
cmp_str4 = 0;
memset(str1, 0, 15);
memset(str2, 0, 15);

3 个答案:

答案 0 :(得分:0)

我对这段代码并不完全满意,但它相对干净,似乎可以胜任。

splitit.c

#include <ctype.h>
#include <stdio.h>
#include <string.h>

int main(void)
{
    const char  str[] = "(I) 44like22 .cookies. ,This, /is\\ ?tricky?";
    const char *s = str;

    char   out[2 * sizeof(str)];
    char  *ptr[sizeof(str)];
    char **p = ptr;
    char  *o = out;
    int    n = 0;

    /* ( I ) 44 like 22 . cookies . , This , / is \ ? tricky ? */

    int c;

    printf("Whole string: <<%s>>\n", str);

    while ((c = (unsigned char)*s++) != '\0')
    {
        if (isdigit(c))
        {
            *p++ = o;
            *o++ = c;
            while (isdigit((unsigned char)*s))
                *o++ = *s++;
            *o++ = '\0';
            n++;
        }
        else if (isalpha(c))
        {
            *p++ = o;
            *o++ = c;
            while (isalpha((unsigned char)*s))
                *o++ = *s++;
            *o++ = '\0';
            n++;
        }
        else if (ispunct(c))
        {
            *p++ = o;
            *o++ = c;
            while (ispunct((unsigned char)*s))
                *o++ = *s++;
            *o++ = '\0';
            n++;
        }
        else if (!isspace(c))
        {
            printf("Hmmm: don't know how to classify %d (%c)\n", c, c);
        }
    }

    for (int i = 0; i < n; i++)
        printf("Fragment[%2d] = %s\n", i, ptr[i]);

    return 0;
}

编译(Mac OS X 10.10.5上的GCC 5.1.0):

$ gcc -O3 -g -std=c11 -Wall -Wextra -Wmissing-prototypes -Wstrict-prototypes \
      -Wold-style-definition -Werror splitit.c -o splitit

示例运行:

Whole string: <<(I) 44like22 .cookies. ,This, /is\ ?tricky?>>
Fragment[ 0] = (
Fragment[ 1] = I
Fragment[ 2] = )
Fragment[ 3] = 44
Fragment[ 4] = like
Fragment[ 5] = 22
Fragment[ 6] = .
Fragment[ 7] = cookies
Fragment[ 8] = .
Fragment[ 9] = ,
Fragment[10] = This
Fragment[11] = ,
Fragment[12] = /
Fragment[13] = is
Fragment[14] = \
Fragment[15] = ?
Fragment[16] = tricky
Fragment[17] = ?

我观察到样本数据中没有多标点序列;但是,代码是对称的,因此它可以处理它们。

由于if语句中的重复,我对代码不满意,但是要操作的控制数据量使编写函数变得混乱。

你也可以争论变量名称;我会接受他们处于“简短但有意义”的过分简洁的一面。

避免重复

此代码使用函数指针来避免重复。使用指向函数的指针是强大的,但不一定是你准备好处理的东西。它允许我避免使用具有六个左右参数的单独函数。

#include <ctype.h>
#include <stdio.h>
#include <string.h>

int main(void)
{
    const char  str[] = "(I) 44like22 .cookies. ,This, /is\\ ?tricky?"
        "\nBut (as he said, \"Isn't it delicous fun!\"), he vanished.";
    const char *s = str;

    char   out[2 * sizeof(str)];
    char  *ptr[sizeof(str)];
    char **p = ptr;
    char  *o = out;
    int    n = 0;
    int    c;

    printf("Whole string: <<%s>>\n", str);

    while ((c = (unsigned char)*s++) != '\0')
    {
        int (*ctype)(int c) = 0;
        if (isdigit(c))
            ctype = isdigit;
        else if (isalpha(c))
            ctype = isalpha;
        else if (ispunct(c))
            ctype = ispunct;
        else if (isspace(c))
            continue;
        else
        {
            printf("Hmmm: don't know how to classify %d (%c)\n", c, c);
            continue;
        }
        *p++ = o;
        *o++ = c;
        while ((*ctype)((unsigned char)*s))
            *o++ = *s++;
        *o++ = '\0';
        n++;
    }

    for (int i = 0; i < n; i++)
        printf("Fragment[%2d] = %s\n", i, ptr[i]);

    return 0;
}

示例输出:

Whole string: <<(I) 44like22 .cookies. ,This, /is\ ?tricky?
But (as he said, "Isn't it delicous fun!"), he vanished.>>
Fragment[ 0] = (
Fragment[ 1] = I
Fragment[ 2] = )
Fragment[ 3] = 44
Fragment[ 4] = like
Fragment[ 5] = 22
Fragment[ 6] = .
Fragment[ 7] = cookies
Fragment[ 8] = .
Fragment[ 9] = ,
Fragment[10] = This
Fragment[11] = ,
Fragment[12] = /
Fragment[13] = is
Fragment[14] = \
Fragment[15] = ?
Fragment[16] = tricky
Fragment[17] = ?
Fragment[18] = But
Fragment[19] = (
Fragment[20] = as
Fragment[21] = he
Fragment[22] = said
Fragment[23] = ,
Fragment[24] = "
Fragment[25] = Isn
Fragment[26] = '
Fragment[27] = t
Fragment[28] = it
Fragment[29] = delicous
Fragment[30] = fun
Fragment[31] = !"),
Fragment[32] = he
Fragment[33] = vanished
Fragment[34] = .

使用fgets()

关键是要弄清楚恰好将字符串设置为值的代码与合理一致地工作的代码之间的边界,并将更通用的代码分离为从{调用的函数{1}}功能。我为此目的调整了更重复的代码:

main()

函数中的第一个#include <ctype.h> #include <stdio.h> #include <string.h> static void split_string(const char *buffer, size_t buflen) { const char *s = buffer; char out[2 * buflen]; char *ptr[buflen]; char **p = ptr; char *o = out; int n = 0; int c; printf("Whole string: <<%s>>\n", buffer); while ((c = (unsigned char)*s++) != '\0') { if (isdigit(c)) { *p++ = o; *o++ = c; while (isdigit((unsigned char)*s)) *o++ = *s++; *o++ = '\0'; n++; } else if (isalpha(c)) { *p++ = o; *o++ = c; while (isalpha((unsigned char)*s)) *o++ = *s++; *o++ = '\0'; n++; } else if (ispunct(c)) { *p++ = o; *o++ = c; while (ispunct((unsigned char)*s)) *o++ = *s++; *o++ = '\0'; n++; } else if (!isspace(c)) { printf("Hmmm: don't know how to classify %d (%c)\n", c, c); } } for (int i = 0; i < n; i++) printf("Fragment[%2d] = %s\n", i, ptr[i]); } int main(void) { const char str[] = "(I) 44like22 .cookies. ,This, /is\\ ?tricky?"; split_string(str, sizeof(str)); char buffer[4096]; while (fgets(buffer, sizeof(buffer), stdin) != 0) split_string(buffer, strlen(buffer)+1); return 0; } 有四个变化(除了定义函数本身);函数的其余部分与前面的代码相同。这四个更改使修订后的代码引用函数的参数(printf()buffer)而不是变量buflen及其在str中的大小。

请注意,在此函数中使用重复性较低的代码会很简单。您将以相同的方式更改函数顶部的相同行,并保持其余部分不变 - 并且修订后的main()对于机制的更改不会更明智。

示例运行:

main()

答案 1 :(得分:0)

要收集包含或排他字符的范围,请使用create table dbo.foo ( dateColumn datetime default (dateadd(hour,8,getdate())) ) (包括)或strspn(不包括;两者均为in the same link)。它们都返回满足条件的字符数,并且您可以打印此范围,或者以您喜欢的任何其他方式处理它,然后跳过该部分继续其余部分。

strcspn

为方便起见,我将#include <stdio.h> #include <string.h> void print_range (char *input, int start, int length) { putchar ('"'); while (length--) putchar (input[start++]); printf ("\"\n"); } void print_continuous (char *input, int start, int length) { while (length--) putchar (input[start++]); printf (" "); } void split_string (char *input, void(*process)(char *,int,int) ) { int current_pos = 0, next_length; while (input[current_pos]) { /* 1. letters? */ next_length = strspn (input+current_pos, "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"); if (next_length) { process (input, current_pos, next_length); current_pos += next_length; continue; } /* 2. digits? */ next_length = strspn (input+current_pos, "0123456789"); if (next_length) { process (input, current_pos, next_length); current_pos += next_length; continue; } /* 3. neither letters nor digits */ /* first, skip spaces. These don't need processing (apparently?) */ next_length = strspn (input+current_pos, " "); current_pos += next_length; /* then skip all others as well */ next_length = strcspn (input+current_pos, " 0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"); if (next_length) { process (input, current_pos, next_length); current_pos += next_length; } } } int main(void) { char str[] ="(I) 44like22 .cookies. ,This, /is\x5C ?tricky?"; printf ("input: [%s]\n", str); split_string (str, print_range); split_string (str, print_continuous); return 0; } 函数添加为函数指针。这样,您可以轻松调整函数,以便为每个片段执行任何操作。

输出:完整字符串,每行一个片段,并用单个空格分隔:

process

答案 2 :(得分:0)

#include <stdio.h> 
#include <stdlib.h>
#include <string.h>
#include <ctype.h>

#define BUFFER_SIZE 100

void split_one(const char **p, char *out){
    if(**p){
        while(isspace(**p))
            ++*p;
        if(isalpha(**p)){
            while(isalpha(**p))
                *out++ = *(*p)++;
        } else if(isdigit(**p)){
            while(isdigit(**p))
                *out++ = *(*p)++;
        } else {
            while(**p && !isalpha(**p) && !isdigit(**p) && !isspace(**p))
                *out++ = *(*p)++;
        }
    }
    *out = '\0';
}

int main (void){
    char str[BUFFER_SIZE] = "(I) 44like22 .cookies. ,This, /is\\ ?tricky?";
    char *strings[BUFFER_SIZE] = {0};
    char piece[BUFFER_SIZE];
    const char *p = &str[0];

    int k = 0;

    while(*p){
        split_one(&p, piece);
        if(!*piece)
            continue;
        strings[k++] = strdup(piece);
    }
    for(int i = 0; i < k; ++i){
        printf("'%s'\n", strings[i]);
        free(strings[i]);
    }
    return 0; 
}