libxml2 + pblSet(或任何其他集合) - 无法比较结果

时间:2015-04-22 02:37:25

标签: c xpath set libxml2

我正在使用C libxml2。我的目标是从不同文件的标签中收集字符串并进行比较。

我正在使用xpath获取所有节点并成功从节点中提取文本。当我试图比较它们时会出现问题;

我尝试了什么: 自己实现了3种不同的集合。 - >正在考虑它们中的问题所以我选择了PblSet,因为它是我发现的唯一可以在两组之间产生差异的库。

这里的乐趣开始了; 我能够拉取项目,我能够将它们存储在PblSet中,然后从中获取它们,但是当我在两个中进行比较时,它会返回,因为两个集合都不同。 解析器的精简测试用例:

#include <stdio.h>
#include "pbl_1_04_04/src/pbl.h"
#include <libxml/tree.h>
#include <libxml/parser.h>
#include <libxml/xpath.h>
#include <string.h>
#include <ctype.h>

int checkPtr(void *ptr) {
    if (ptr == NULL) {
        printf("Cannot allocate memory");
        return 1;
    }
    return 0;
}

const char *strstrip(char *s) {
    size_t size;
    char *end;

    size = strlen(s);

    if (!size)
        return s;

    end = s + size - 1;
    while (end >= s && isspace(*end))
        end--;
    *(end + 1) = '\0';

    while (*s && isspace(*s))
        s++;

    return s;
}

xmlDocPtr loadFile(const char *filename) {
    xmlDocPtr doc;
    doc = xmlParseFile(filename);
    if (doc == NULL ) {
        printf("%s cannot be loaded\n", filename);
        xmlFreeDoc(doc);
        return NULL;
    }
    return doc;
}

PblSet* getDocIDs(xmlDocPtr doc, const char *xpath) {
    xmlXPathContextPtr context;
    xmlXPathObjectPtr result;
    PblSet *set = pblSetNewTreeSet();
    xmlNodeSetPtr nodeset;
    xmlNodePtr cur;
    size_t size;
    size_t i;
    xmlChar *key;

    context = xmlXPathNewContext(doc);
    if (checkPtr(context) > 0) {
        xmlXPathFreeContext(context);
        return NULL;
    }
    result = xmlXPathEvalExpression((xmlChar *)xpath, context);
    if (checkPtr(result) > 0) {
        xmlXPathFreeObject(result);
        return NULL;
    }
    if (xmlXPathNodeSetIsEmpty(result->nodesetval)) {
        xmlXPathFreeObject(result);
        printf("No results for %s\n", xpath);
        return NULL;
    }

    printf("%d objects found with path \"%s\"\n", result->nodesetval->nodeNr, \
            xpath);

    nodeset = result->nodesetval;
    if (checkPtr(nodeset) > 0) {
        xmlXPathFreeNodeSet(nodeset);
        return NULL;
    }
    size = nodeset->nodeNr;
    for (i=0; i<size; ++i) {
        cur = nodeset->nodeTab[i];
        key = xmlNodeGetContent(cur);
        pblSetAdd(set, key);
    }

    /*xmlXPathFreeNodeSet(nodeset);*/
    xmlFreeNode(cur);
    xmlXPathFreeContext(context);
    xmlXPathFreeObject(result);

    return set;
}


int main(void) {
    // Arrays holding new and old items in sets
    xmlDocPtr doc1, doc2;

    const char *filename1 = "js_new.xml";
    const char *filename2 = "js_old.xml";
    const char *xpath = "//id";

    doc1 = loadFile(filename1);
    if (checkPtr(doc1) > 0) return EXIT_FAILURE;
    doc2 = loadFile(filename2);
    if (checkPtr(doc2) > 0) return EXIT_FAILURE;


    PblSet *set1 = getDocIDs(doc1, xpath);
    if (checkPtr(set1) > 0) return EXIT_FAILURE;
    PblSet *set2 = getDocIDs(doc2, xpath);
    if (checkPtr(set2) > 0) return EXIT_FAILURE;
    PblSet *new;
    PblSet *old;

    printf("arr1 size: %d\n", pblSetSize(set1));
    printf("arr2 size: %d\n", pblSetSize(set2));

    new = pblSetDifference(set1, set2);
    old = pblSetDifference(set2, set1);

    printf("new size: %d\n", pblSetSize(new));
    printf("old size: %d\n", pblSetSize(old));

    printf("First element of set1: %s\n", pblSetGetFirst(new));
    printf("First element of set2: %s\n", pblSetGetFirst(old));

    pblSetFree(set1);
    pblSetFree(set2);
    pblSetFree(new);
    pblSetFree(old);

    xmlFreeDoc(doc1);
    xmlFreeDoc(doc2);
}

测试xml文件在这里:

https://gist.github.com/osleg/176f3a921ed91b3c9e6f

https://gist.github.com/osleg/dbc85f8642bfa255f9ba

1 个答案:

答案 0 :(得分:0)

找到答案。 pblSet中的比较函数存在问题。 必须将pblSetCompareFunction设置为pblCollectionStringCompareFunction,如下所示:

PblSet *set1 = pblSetNewHashSet();
PblSet *set2 = pblSetNewHashSet();
pblSetCompareFunction(set1, pblCollectionStringCompareFunction);
pblSetCompareFunction(set2, pblCollectionStringCompareFunction);

使其按预期工作。

虽然我仍然想知道为什么当我用自己设置字符串而不是从xmlParser中解析字符串时它才起作用。