在C中解析一个字符串并将其保存到一个结构数组中

时间:2013-05-16 16:20:52

标签: python c string parsing

我对Python编码非常熟悉,但现在我必须在C语言中进行字符串分析。

我的意见:

input =“command1 args1 args2 arg3; command2 args1 args2 args3; cmd3 arg1 arg2 arg3”

我的Python解决方案:     

input = "command1 args1 args2 arg3;command2 args1 args2 args3;command3 arg1 arg2 arg3"
compl = input.split(";")
tmplist =[]
tmpdict = {}

for line in compl:
    spl = line.split()
    tmplist.append(spl)

for l in tmplist:
     first, rest = l[0], l[1:]
     tmpdict[first] = ' '.join(rest)

print tmpdict

#The Output: 
#{'command1': 'args1 args2 arg3', 'command2': 'args1 args2 args3', 'cmd3': 'arg1 arg2 arg3'}

预期输出:使用命令作为键进行Dict,并将args作为值

中的字符串连接 到目前为止

我的C解决方案:

我想将命令和args保存在这样的结构中:

struct cmdr{
    char* command;
    char* args[19];
};
  1. 我创建一个struct char *数组来保存由“;”分隔的cmd + args:

    struct ari {char * value [200];};

  2. 功能:

    struct ari inputParser(char* string){
        char delimiter[] = ";";
        char *ptrsemi;  
        int i = 0;
        struct ari sepcmds;
        ptrsemi = strtok(string, delimiter);
    
        while(ptrsemi != NULL) {
            sepcmds.value[i] = ptrsemi;
            ptrsemi = strtok(NULL, delimiter);
            i++;
    
        }
    return sepcmds;     
    
    1. 按空格分隔命令和数组并将其保存在我的结构中:
    2. 首先我添加了一个帮助结构:

      struct arraycmd {
      struct cmdr lol[10];
      };
      
      
      
      struct arraycmd parseargs (struct ari z){
          struct arraycmd result;
          char * pch;
          int i;
          int j = 0;
      
          for (i=0; i < 200;i++){
               j = 0;
               if (z.value[i] == NULL){
                     break;
                   }
                  pch = strtok(z.value[i]," ");
          while(pch != NULL) {
              if (j == 0){
                  result.lol[i].command = pch;    
                  pch = strtok(NULL, " ");
                  j++;
              } else {
              result.lol[i].args[j]= pch;
              pch = strtok(NULL, " ");
              j++;
              }
          }
          pch = strtok(NULL, " ");
            }
               return result; 
      

      我的输出功能如下:

      void output(struct arraycmd b){ 
      int i;
      int j;
      
      for(i=0; i<200;i++){
           if (b.lol[i].command != NULL){
              printf("Command %d: %s",i,b.lol[i].command);
          }
          for (j = 0; j < 200;j++){
              if  (b.lol[i].args[j] != NULL){
                  printf(" Arg %d = %s",j,b.lol[i].args[j]);
              }
          }   
          printf(" \n");  
      }    
      }
      

      但它只生成垃圾(与我的python解决方案中相同的输入):
      (command1 args1 args2 arg3; command2 args1 args2 args3; command3 arg1 arg2 arg3)

      命令0:command1 Arg 0 = command2 Arg 1 = args1 Arg 2 = args2 Arg 3 = arg3 Arg 19 = command2 Arg 21 = args1 Arg 22 = args2 Arg 23 = args3 Arg 39 = command3 Arg 41 = arg1 Arg 42 = arg2 Arg 43 = arg3 分段错误

      所以我希望有人可以帮我解决这个问题。

3 个答案:

答案 0 :(得分:1)

在python中直接获得C逻辑可能更容易。这更接近于C,您可以尝试将其音译为C.您可以使用strncpy来提取字符串并将其复制到您的结构中。

str = "command1 args1 args2 arg3;command2 args1 args2 args3;command3 arg1 arg2 arg3\000"

start = 0
state = 'in_command'

structs = []

command = ''
args = []
for i in xrange(len(str)):
    ch = str[i]
    if ch == ' ' or ch == ';' or ch == '\0':
        if state == 'in_command':
            command = str[start:i]
        elif state == 'in_args':
            arg = str[start:i]
            args.append(arg)
        state = 'in_args'
        start = i + 1
    if ch == ';' or ch == '\0':
        state = 'in_command'
        structs.append((command, args))
        command = ''
        args = []

for s in structs:
    print s

答案 1 :(得分:1)

您的问题是您依赖结构中的指针初始化为NULL。

它们只是随机值,因此是SEGV。

当结构只有10个命令和19个参数时,您还打印200个命令和200个参数。

答案 2 :(得分:1)

检查此解决方案。用valgrind测试没有泄漏。 但是我在内部实现打印.u可以自己实现查看自由函数。另外你可以改进分割函数来实现更好的解析。

#include <stdio.h>
#include <stdlib.h>

typedef struct arr {
    char** words;
    int count;
} uarr;
#define null 0

typedef struct cmdr {
    char* command;
    char** argv;
    int argc;
} cmd;

typedef struct list {
    cmd* listcmd;
    int count;

} cmdlist;

uarr splitter(char* str, char delim);
cmdlist* getcommandstruct(char* string);
void freecmdlist(cmdlist* cmdl);

int main(int argc, char** argv) {
    char input[] = "command1 arg1 arg2 arg3 arg4;command2 arg1 arg2 ;command3 arg1 arg2  arg3;command4 arg1 arg2  arg3";

    cmdlist* cmdl = getcommandstruct((char*) input);
    //it will free . also i added print logic inside free u can seperate
    freecmdlist(cmdl);
    free(cmdl);
    return (EXIT_SUCCESS);
}

/**
 * THIS FUNCTION U CAN USE FOR GETTING STRUCT
 * @param string
 * @return 
 */
cmdlist* getcommandstruct(char* string) {
    cmdlist* cmds = null;
    cmd* listcmd = null;
    uarr resultx = splitter(string, ';');
    //lets allocate
    if (resultx.count > 0) {
        listcmd = (cmd*) malloc(sizeof (cmd) * resultx.count);
        memset(listcmd, 0, sizeof (cmd) * resultx.count);
        int i = 0;
        for (i = 0; i < resultx.count; i++) {
            if (resultx.words[i] != null) {

                printf("%s\n", resultx.words[i]);
                char* def = resultx.words[i];
                uarr defres = splitter(def, ' ');

                listcmd[i].argc = defres.count - 1;
                listcmd[i].command = defres.words[0];
                if (defres.count > 1) {
                    listcmd[i].argv = (char**) malloc(sizeof (char*) *(defres.count - 1));
                    int j = 0;
                    for (; j < defres.count - 1; j++) {
                        listcmd[i].argv[j] = defres.words[j + 1];
                    }

                }
                free(defres.words);
                free(def);
            }
        }

        cmds = (cmdlist*) malloc(sizeof (cmdlist));
        cmds->count = resultx.count;
        cmds->listcmd = listcmd;
    }
    free(resultx.words);
    return cmds;

}

uarr splitter(char* str, char delim) {
    char* holder = str;
    uarr result = {null, 0};
    int count = 0;
    while (1) {
        if (*holder == delim) {
            count++;
        }
        if (*holder == '\0') {
            count++;
            break;
        };
        holder++;
    }
    if (count > 0) {

        char** arr = (char**) malloc(sizeof (char*) *count);
        result.words = arr;
        result.count = count;
        //real split
        holder = str;
        char* begin = holder;
        int index = 0;
        while (index < count) {
            if (*holder == delim || *holder == '\0') {
                int size = holder + 1 - begin;
                if (size > 1) {
                    char* dest = (char*) malloc(size);
                    memcpy(dest, begin, size);
                    dest[size - 1] = '\0';
                    arr[index] = dest;
                } else {
                    arr[index] = null;
                }
                index++;
                begin = holder + 1;
            }
            holder++;
        }

    }
    return result;
}

void freecmdlist(cmdlist* cmdl) {
    if (cmdl != null) {
        int i = 0;
        for (; i < cmdl->count; i++) {
            cmd def = cmdl->listcmd[i];
            char* defcommand = def.command;
            char** defargv = def.argv;
            if (defcommand != null)printf("command=%s\n", defcommand);
            free(defcommand);
            int j = 0;
            for (; j < def.argc; j++) {
                char* defa = defargv[j];
                if (defa != null)printf("arg[%i] = %s\n", j, defa);
                free(defa);
            }
            free(defargv);
        }
        free(cmdl->listcmd);
    }

}