在C中将XML解析为树

时间:2016-12-15 18:59:13

标签: c xml

在不使用任何库的情况下,将嵌套的XML文件解析为C中的(n-ary)树的最智能方法是什么?目前我已尝试用fgets分别读取行并逐行读取文件,但它不起作用,因为嵌套元素的上下文不能考虑。你会怎么做呢?

<!-- Root node -->
<root>
  <node_id>0</node_id>
  <n>2</n>
  <library_choice>1</library_choice>

  <!-- Child node -->
  <child level="1">
    <node_player>1</node_player>
    <node_id>1</node_id>
    <n>2</n>
    <library_choice>1</library_choice>
    <kr>11000</kr>

      <!-- Childs child node -->
      <child level="2">
          <node_player>2</node_player>
          <node_id>1</node_id>
          <library_choice>5</library_choice>
          <kr>11000</kr>
          <utility>100</utility>
      </child>

      <!-- Childs child node -->
      <child level="2">
          <node_player>2</node_player>
          <node_id>1</node_id>
          <library_choice>8</library_choice>
          <kr>11000</kr>
          <utility>100</utility>
      </child>
  </child>

  <!-- Child node -->
  <child level="1">
    <node_player>1</node_player>
    <node_id>2</node_id>
    <n>2</n>
    <library_choice>2</library_choice>
    <kr>10000</kr>

      <!-- Childs child node -->
      <child>
          <node_id>3</node_id>
          <node_player>2</node_player>
          <probability>0.5</probability>
          <utility>100</utility>
      </child>

      <!-- Childs child node -->
      <child>
          <node_id>6</node_id>
          <node_player>2</node_player>
          <probability>0.5</probability>
          <utility>100</utility>
      </child>
  </child>
</root>

1 个答案:

答案 0 :(得分:0)

  

我只需要解析看起来像上面那个文件的文件-可能会有更多的孩子。

由于您的文件结构非常简单,并且碰巧也是XML格式,因此确实没有必要使用完整的XML解析器。您使用fgets分别读取行并逐行读取文件的方法 不错,因为可以很好地考虑嵌套元素的上下文,例如。 g。:

char *tags[] =
{ "node_player","node_id","n","library_choice","kr","probability","utility",
  "child"
};
enum tags
{ node_player, node_id, n, library_choice, kr, probability, utility, child };
typedef struct node
{
    struct node *sibling,   /* next node on same level */
                *child;     /* next node on next level */
    float content[child];
} node_t;

#include <math.h>
#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

char *getln()
{
    static char ln[99]; // max. line length without '\n' is 97; adjust if needed
    char *s;
    do  if (!fgets(s = ln, sizeof ln, stdin)) puts("EOF"), exit(1);
        else while (isspace(*s)) ++s;   // skip leading space
    while (!s[0] || s[1] == '!');   // skip empty lines and comments
    if (s[0] != '<') printf("expected a tag, but got: %s\n", s), exit(1);
    return s;
}

node_t *parse(char *cp, char *tagname)
{
    int lng;
    if (strncmp(cp+1, tagname, lng = strlen(tagname)) != 0)
        printf("expected %s tag, but got: %s\n", tagname, cp), exit(1);
    node_t *np = malloc(sizeof *np);
    np->sibling = np->child = NULL;
    node_t **childp = &np->child;   // eventual child address stored here
    int i;
    for (i = 0; i < child; ) np->content[i++] = HUGE_VALF;  // no value
    if (!np) puts("OOM"), exit(1);
    while (cp = getln(), cp[1] != '/' || strncmp(cp+2, tagname, lng) != 0)
    {
        char innnername[16];
        for (i = 0; strncmp(cp+1, tags[i], strlen(tags[i])) != 0; )
            if (++i > child) printf("unknown tag: %s\n", cp), exit(1);
        if (i == child) // a child element
            childp = &(*childp = parse(cp, "child"))->sibling;  // next child
        else            // a leaf element
            np->content[i] = atof(strchr(cp, '>')+1);   // ToDo: error check
    }
    return np;
}

int level;
void print(node_t *np)
{   // print a tree, show by example how to traverse it
    printf(level ? "%*cchild level %d:" : "root:", level, ' ', level);
    int i;
    for (i = 0; i < child; ++i) // first, print the existing leaf elements:
        if (np->content[i] != HUGE_VALF)
            printf(" %s=%g", tags[i], np->content[i]);
    puts("");
    ++level;
    for (np = np->child; np; np = np->sibling) print(np);
    --level;
}

main()
{
    node_t *root = parse(getln(), "root");
    level = 0, print(root);
}