使用libxml2解析多文档RELAX-NG模式

时间:2016-04-25 20:12:30

标签: c xml reference libxml2 relaxng

我想将 RELAX-NG 架构转换为 schemaInfo 对象,以便可以在codemirror中用于xml-completion。

https://codemirror.net/demo/xmlcomplete.html

xmllint用法

当用于验证这样的文档时,libxml2已经支持多文档relax-NG模式:

xmllint --schema myschema.rng mydoc.xml

问题

libxml2 是否也可用于解析多文档模式文件?

以下是多文档架构的示例:

这里有一些libxml2功能,我不明白,但可能会有所帮助:

假设

我认为我必须使用以下工具将多文档架构转换为单个文档架构:https://github.com/h4l/rnginline/tree/master/rnginline

直接使用 libxml2 会很棒,因为我可以在没有预处理的情况下支持模式。

更新3.5.2016

正如您所看到的那样解析relax-NG模式只显示顶级文件,并且它不包含使用relax-NG主文件中的include指令包含的任何文件(注意:relax-NG模式可以分为几个文件。)

<!-- XHTML Basic -->

<grammar ns="http://www.w3.org/1999/xhtml"
         xmlns="http://relaxng.org/ns/structure/1.0">

<include href="modules/datatypes.rng"/>
<include href="modules/attribs.rng"/>
<include href="modules/struct.rng"/>
<include href="modules/text.rng"/>
<include href="modules/hypertext.rng"/>
<include href="modules/list.rng"/>
<include href="modules/basic-form.rng"/>
<include href="modules/basic-table.rng"/>
<include href="modules/image.rng"/>
<include href="modules/param.rng"/>
<include href="modules/object.rng"/>
<include href="modules/meta.rng"/>
<include href="modules/link.rng"/>
<include href="modules/base.rng"/>

</grammar>

源代码

/**
 * section: Tree
 * synopsis: Navigates a tree to print element names
 * purpose: Parse a file to a tree, use xmlDocGetRootElement() to
 *          get the root element, then walk the document and print
 *          all the element name in document order.
 * usage: tree1 filename_or_URL
 * test: tree1 test2.xml > tree1.tmp && diff tree1.tmp $(srcdir)/tree1.res
 * author: Dodji Seketeli
 * copy: see Copyright for the status of this software.
 */
#include <stdio.h>
#include <libxml/parser.h>
#include <libxml/tree.h>

#ifdef LIBXML_TREE_ENABLED


#define ANSI_COLOR_RED     "\x1b[31m"
#define ANSI_COLOR_GREEN   "\x1b[32m"
#define ANSI_COLOR_YELLOW  "\x1b[33m"
#define ANSI_COLOR_BLUE    "\x1b[34m"
#define ANSI_COLOR_MAGENTA "\x1b[35m"
#define ANSI_COLOR_CYAN    "\x1b[36m"
#define ANSI_COLOR_RESET   "\x1b[0m"


/*
 *To compile this file using gcc you can type
 *gcc `xml2-config --cflags --libs` -o xmlexample libxml2-example.c
 */

/**
 * print_element_names:
 * @a_node: the initial xml node to consider.
 *
 * Prints the names of the all the xml elements
 * that are siblings or children of a given xml node.
 */

char* pad(int depth) {
//   if (depth <= 0)
//     return "";
  char str[2000];
//   sprintf(str, "%*s", " ", depth);
  for (int i=0; i <= depth; i++) {
    str[i] = ' ';
  }
  str[depth+1] = 0;
  return &str;
}

static void
print_element_names(xmlNode * a_node, int depth)
{
    xmlNode *cur_node = NULL;

    for (cur_node = a_node; cur_node; cur_node = cur_node->next) {
        if (cur_node->type == XML_ELEMENT_NODE) {
//        if (strcmp(cur_node->name, "element") == 0) {
//             printf("node type: Element, name: %s\n", cur_node->name);
            printf("%s %s\n", pad(depth), cur_node->name);
            for(xmlAttrPtr attr = cur_node->properties; NULL != attr; attr = attr->next)
            {
                printf("%s", ANSI_COLOR_MAGENTA);
                printf("%s %s: ", pad(depth), attr->name);
                xmlChar* value = xmlNodeListGetString(cur_node->doc, attr->children, 1);
                printf("%s \n", value);
                printf("%s", ANSI_COLOR_RESET);
            }
//   }

        }

        print_element_names(cur_node->children, depth+1);
    }
}


/**
 * Simple example to parse a file called "file.xml",
 * walk down the DOM, and print the name of the
 * xml elements nodes.
 */
int
main(int argc, char **argv)
{
    xmlDoc *doc = NULL;
    xmlNode *root_element = NULL;

    if (argc != 2)
        return(1);

    /*
     * this initialize the library and check potential ABI mismatches
     * between the version it was compiled for and the actual shared
     * library used.
     */
    LIBXML_TEST_VERSION

    /*parse the file and get the DOM */
    doc = xmlReadFile(argv[1], NULL, 0);

    if (doc == NULL) {
        printf("error: could not parse file %s\n", argv[1]);
    }

    /*Get the root element node */
    root_element = xmlDocGetRootElement(doc);

    print_element_names(root_element, 0);

    /*free the document */
    xmlFreeDoc(doc);

    /*
     *Free the global variables that may
     *have been allocated by the parser.
     */
    xmlCleanupParser();

    return 0;
}
#else
int main(void) {
    fprintf(stderr, "Tree support not compiled in\n");
    exit(1);
}
#endif

示例用法

[nix-shell:~/Desktop/projects/nlnet/nlnet]$ ./tree1 html5-rng/xhtml-basic.rng
 grammar
  ns: http://www.w3.org/1999/xhtml 
   include
   href: modules/datatypes.rng 
   include
   href: modules/attribs.rng 
   include
   href: modules/struct.rng 
   include
   href: modules/text.rng 
   include
   href: modules/hypertext.rng 
   include
   href: modules/list.rng 
   include
   href: modules/basic-form.rng 
   include
   href: modules/basic-table.rng 
   include
   href: modules/image.rng 
   include
   href: modules/param.rng 
   include
   href: modules/object.rng 
   include
   href: modules/meta.rng 
   include
   href: modules/link.rng 
   include
   href: modules/base.rng 

1 个答案:

答案 0 :(得分:0)

  

libxml2也可用于解析多文档模式文件吗?

xmllint调用libxml2的xmlRelaxNGValidateDoc方法:

xmlRelaxNGValidateDoc(xmlRelaxNGValidCtxtPtr ctxt,xmlDocPtr doc)

例如:

 #include <stdio.h>
 #include <stdlib.h>
 #include <sys/types.h>

 #include <libxml/xmlmemory.h>
 #include <libxml/parser.h>
 #include <libxml/relaxng.h>

 int main(int argc, char *argv[])
 {
    int status;
    xmlDoc *doc;
    xmlRelaxNGPtr schema;
    xmlRelaxNGValidCtxtPtr validctxt;
    xmlRelaxNGParserCtxtPtr rngparser;

    doc = xmlParseFile(argv[1]);

    rngparser = xmlRelaxNGNewParserCtxt(argv[2]);
    schema = xmlRelaxNGParse(rngparser);
    validctxt = xmlRelaxNGNewValidCtxt(schema);

    status = xmlRelaxNGValidateDoc(validctxt, doc);
    printf("status == %d\n", status);

    xmlRelaxNGFree(schema);
    xmlRelaxNGFreeValidCtxt(validctxt);
    xmlRelaxNGFreeParserCtxt(rngparser);
    xmlFreeDoc(doc);
    exit(EXIT_SUCCESS);
 }

验证以下来源:

<?xml version="1.0"?>
<root>
  <t>foo</t>
</root>

使用以下架构:

<?xml version="1.0" encoding="UTF-8"?>
<grammar ns="" xmlns="http://relaxng.org/ns/structure/1.0">
  <start>
    <element name="t">
      <ref name="tcont"/>
    </element>
  </start>
  <define name="tcont">
    <text/>
  </define>
</grammar>

区别在于对externalRef元素的支持:

  

externalRef模式可用于引用单独文件中定义的模式。 externalRef元素具有必需的href属性,该属性指定包含模式的文件的URL。如果指定网址中包含的模式匹配,则externalRef会匹配。

例如:

<?xml version="1.0" encoding="UTF-8"?>
<grammar ns="" xmlns="http://relaxng.org/ns/structure/1.0">
  <start>
    <element name="root">
      <externalRef href="595792-ext.rng"/>
    </element>
  </start>
</grammar>

include元素:

  

include元素允许将语法合并在一起。语法模式可以包含作为子元素的元素。 include元素具有必需的href属性,该属性指定包含语法模式的文件的URL。引用的语法模式中的定义将包含在包含include元素的语法模式中。

     

combine属性与include结合使用时特别有用。   如果语法包含多个具有相同名称的定义,则定义必须指定如何使用combine属性将它们组合到单个定义中。

例如:

  

demo.rng

<?xml version="1.0" encoding="iso-8859-1"?>
<grammar xmlns="http://relaxng.org/ns/structure/1.0"
 datatypeLibrary="http://www.w3.org/2001/XMLSchema-datatypes">

<include href="demo2.rng">
<define name="TEI.prose"><ref name="INCLUDE"/></define>
</include>
</grammar>
  

demo2.rng

<?xml version="1.0" encoding="utf-8"?>
<grammar xmlns="http://relaxng.org/ns/structure/1.0" xmlns:t="http://www.thaiopensource.com/ns/annotations" xmlns:a="http://relaxng.org/ns/compatibility/annotations/1.0" datatypeLibrary="http://www.w3.org/2001/XMLSchema-datatypes">

   <start>
         <ref name="TEI.2"/>
   </start>
   <define name="IGNORE">
      <notAllowed/>
   </define>
   <define name="INCLUDE">
      <empty/>
   </define>


  <include href="demo3.rng"/>

   <define name="TEI.2">
      <element name="TEI.2">
         <text/>
      </element>
   </define>

</grammar>
  

demo3.rng

<?xml version="1.0" encoding="utf-8"?>
<grammar xmlns="http://relaxng.org/ns/structure/1.0" xmlns:t="http://www.thaiopensource.com/ns/annotations" xmlns:a="http://relaxng.org/ns/compatibility/annotations/1.0" datatypeLibrary="http://www.w3.org/2001/XMLSchema-datatypes">

   <define name="TEI.prose" combine="interleave">
      <ref name="IGNORE"/>
   </define>

</grammar>

<强>参考