实现内存中的B树插入

时间:2012-11-16 22:35:36

标签: c algorithm tree associative-array b-tree

我一直在创建一个B树的内存中C实现,因为我发现没有可读的在线(即。这个可怕的代码:http://www.freewebs.com/attractivechaos/kbtree.h.html)。

它不太有效,因为偶尔插入元素时,它将无法找到先前插入的元素。此外,我不确定我的一般实现是否非常好,并且我正在以明智的方式进行插入。任何人都可以批评我所做的事情,并找出为什么它不能一直无效?

显然,B-Trees比Red-black或AVL树更有效,因为元素在每个节点的内存中一起存储。这让我感兴趣。

请注意,“顺序”是元素的数量,而不是子指针的数量。原因很简单,因为它对我来说更有意义。

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>

#define CB_BTREE_ORDER 8
#define CB_BTREE_HALF_ORDER CB_BTREE_ORDER/2

typedef struct{
    void * parent;
    void * children[CB_BTREE_ORDER + 1];
    unsigned char numElements;
} CBBTreeNode;

typedef struct{
    unsigned char found;
    CBBTreeNode * node;
    unsigned char pos;
} CBFindResult;

typedef struct{
    unsigned char keySize;
    unsigned char dataSize;
    int nodeSize;
    CBBTreeNode * root;
} CBAssociativeArray;

CBFindResult CBAssociativeArrayFind(CBAssociativeArray * self, unsigned char * key);
void CBAssociativeArrayInsert(CBAssociativeArray * self, unsigned char * key, void * data, CBFindResult pos, CBBTreeNode * right);
CBFindResult CBBTreeNodeBinarySearch(CBBTreeNode * self, unsigned char * key, unsigned char keySize);
void CBInitAssociativeArray(CBAssociativeArray * self, unsigned char keySize, unsigned char dataSize);

CBFindResult CBAssociativeArrayFind(CBAssociativeArray * self, unsigned char * key){
    CBFindResult result;
    CBBTreeNode * node = self->root;
    for (;;) {
        result = CBBTreeNodeBinarySearch(node, key, self->keySize);
        if (result.found){
            result.node = node;
            return result;
        }else{
            if (node->children[result.pos])
                node = node->children[result.pos];
            else{
                result.node = node;
                return result;
            }
        }
    }
}
void CBAssociativeArrayInsert(CBAssociativeArray * self, unsigned char * key, void * data, CBFindResult pos, CBBTreeNode * right){
    // See if we can insert data in this node
    unsigned char * keys = (unsigned char *)(pos.node + 1);
    unsigned char * dataElements = keys + self->keySize * CB_BTREE_ORDER;
    if (pos.node->numElements < CB_BTREE_ORDER) {
        if (pos.node->numElements > pos.pos){
            memmove(keys + (pos.pos + 1) * self->keySize, keys + pos.pos * self->keySize, (pos.node->numElements - pos.pos) * self->keySize);
            memmove(dataElements + (pos.pos + 1) * self->dataSize, dataElements + pos.pos * self->dataSize, (pos.node->numElements - pos.pos) * self->dataSize);
            memmove(pos.node->children + pos.pos + 2, pos.node->children + pos.pos + 1, (pos.node->numElements - pos.pos) *  sizeof(*pos.node->children));
        }
        memcpy(keys + pos.pos * self->keySize, key, self->keySize);
        memcpy(dataElements + pos.pos * self->dataSize, data, self->dataSize);
        pos.node->children[pos.pos + 1] = right;
        pos.node->numElements++;
    }else{
        CBBTreeNode * new = malloc(self->nodeSize);
        unsigned char * newKeys = (unsigned char *)(new + 1);
        unsigned char * newData = newKeys + self->keySize * CB_BTREE_ORDER;
        new->numElements = CB_BTREE_HALF_ORDER;
        pos.node->numElements = CB_BTREE_HALF_ORDER;
        unsigned char * midKey;
        unsigned char * midVal;
        if (pos.pos >= CB_BTREE_HALF_ORDER) {
            if (pos.pos == CB_BTREE_HALF_ORDER) {
                memcpy(newKeys, keys + CB_BTREE_HALF_ORDER * self->keySize, CB_BTREE_HALF_ORDER * self->keySize);
                memcpy(newData, dataElements + CB_BTREE_HALF_ORDER * self->dataSize, CB_BTREE_HALF_ORDER * self->dataSize);
                memcpy(new->children + 1, pos.node->children + CB_BTREE_HALF_ORDER + 1, CB_BTREE_HALF_ORDER * sizeof(*new->children));
                new->children[0] = right;
                midKey = key;
                midVal = data;
            }else{
                if (pos.pos > CB_BTREE_HALF_ORDER + 1){
                    memcpy(newKeys, keys + (CB_BTREE_HALF_ORDER + 1) * self->keySize, (pos.pos - CB_BTREE_HALF_ORDER - 1) * self->keySize);
                    memcpy(newData, dataElements + (CB_BTREE_HALF_ORDER + 1) * self->dataSize, (pos.pos - CB_BTREE_HALF_ORDER - 1)  * self->dataSize);
                }
                memcpy(newKeys + (pos.pos - CB_BTREE_HALF_ORDER - 1) * self->keySize, key, self->keySize);
                memcpy(newData + (pos.pos - CB_BTREE_HALF_ORDER - 1) * self->dataSize, data, self->dataSize);
                memcpy(newKeys + (pos.pos - CB_BTREE_HALF_ORDER) * self->keySize, keys + pos.pos * self->keySize, (CB_BTREE_ORDER - pos.pos) * self->keySize);
                memcpy(newData + (pos.pos - CB_BTREE_HALF_ORDER) * self->dataSize, dataElements + pos.pos * self->dataSize, (CB_BTREE_ORDER - pos.pos) * self->dataSize); // o 0 i 1 ii 2 iii 3 iv
                memcpy(new->children, pos.node->children + CB_BTREE_HALF_ORDER + 1, (pos.pos - CB_BTREE_HALF_ORDER) * sizeof(*new->children));
                new->children[pos.pos - CB_BTREE_HALF_ORDER] = right;
                if (CB_BTREE_ORDER > pos.pos)
                    memcpy(new->children + pos.pos - CB_BTREE_HALF_ORDER + 1, pos.node->children + pos.pos + 1, (CB_BTREE_ORDER - pos.pos) * sizeof(*new->children));
                midKey = keys + CB_BTREE_HALF_ORDER * self->keySize;
                midVal = dataElements + CB_BTREE_HALF_ORDER * self->dataSize;
            }
        }else{
            memcpy(newKeys, keys + CB_BTREE_HALF_ORDER * self->keySize, CB_BTREE_HALF_ORDER * self->keySize);
            memcpy(newData, dataElements + CB_BTREE_HALF_ORDER * self->dataSize, CB_BTREE_HALF_ORDER * self->dataSize);
            memcpy(new->children, pos.node->children + CB_BTREE_HALF_ORDER, (CB_BTREE_HALF_ORDER + 1) * sizeof(*new->children));
            memmove(keys + (pos.pos + 1) * self->keySize, keys + pos.pos * self->keySize, (CB_BTREE_HALF_ORDER - pos.pos) * self->keySize);
            memmove(dataElements + (pos.pos + 1) * self->dataSize, dataElements + pos.pos * self->dataSize, (CB_BTREE_HALF_ORDER - pos.pos) * self->dataSize);
            if (CB_BTREE_HALF_ORDER > 1 + pos.pos)
                memmove(pos.node->children + pos.pos + 2, pos.node->children + pos.pos + 1, (CB_BTREE_HALF_ORDER - pos.pos - 1) * self->dataSize);
            memcpy(keys + pos.pos * self->keySize, key, self->keySize);
            memcpy(dataElements + pos.pos * self->dataSize, data, self->dataSize);
            pos.node->children[pos.pos + 1] = right;
            midKey = keys + CB_BTREE_HALF_ORDER * self->keySize;
            midVal = dataElements + CB_BTREE_HALF_ORDER * self->dataSize;
        }
        if ( ! pos.node->parent) {
            self->root = malloc(self->nodeSize);
            self->root->numElements = 0;
            self->root->parent = NULL;
            pos.node->parent = self->root;
            self->root->children[0] = pos.node;
        }
        new->parent = pos.node->parent;
        CBFindResult res = CBBTreeNodeBinarySearch(pos.node->parent, midKey, self->keySize);
        res.node = pos.node->parent;
        return CBAssociativeArrayInsert(self, midKey, midVal, res, new);
    }
}
CBFindResult CBBTreeNodeBinarySearch(CBBTreeNode * self, unsigned char * key, unsigned char keySize){
    CBFindResult res;
    res.found = 0;
    if ( ! self->numElements) {
        res.pos = 0;
        return res;
    }
    unsigned char left = 0;
    unsigned char right = self->numElements - 1;
    unsigned char * keys = (unsigned char *)(self + 1);
    int cmp;
    while (left <= right) {
        res.pos = (right+left)/2;
        cmp = memcmp(key, keys + res.pos * keySize, keySize);
        if (cmp == 0) {
            res.found = 1;
            break;
        }else if (cmp < 0){
            if ( ! res.pos)
                break;
            right = res.pos - 1;
        }else
            left = res.pos + 1;
    }
    if (cmp > 0)
        res.pos++;
    return res;
}
void CBInitAssociativeArray(CBAssociativeArray * self, unsigned char keySize, unsigned char dataSize){
    self->keySize = keySize;
    self->dataSize = dataSize;
    self->nodeSize = sizeof(*self->root) + (keySize + dataSize) * CB_BTREE_ORDER;
    self->root = malloc(self->nodeSize);
    self->root->parent = NULL;
    self->root->numElements = 0;
    for (unsigned char x = 0; x < CB_BTREE_ORDER + 1; x++)
        self->root->children[x] = NULL;
}

int main(){
    srand(1);
    CBAssociativeArray array;
    CBInitAssociativeArray(&array, 10, 10);
    int size = CB_BTREE_ORDER * (CB_BTREE_ORDER + 2) * 10;;
    unsigned char * keys = malloc(size);
    for (int x = 0; x < size; x++) {
        keys[x] = rand();
    }
    for (int x = 0; x < size; x += 10) {
        CBAssociativeArrayInsert(&array, keys + x, keys + x, CBAssociativeArrayFind(&array, keys + x), NULL);
        for (int y = 0; y <= x; y += 10) {
            if ( ! CBAssociativeArrayFind(&array, keys + y).found) {
                printf("RANDOM FIND FAIL %u - %u\n", y, x);
                return 1;
            }
        }
    }
    return 0;
}

感谢。

1 个答案:

答案 0 :(得分:1)

问题与memcpy或memmove操作无关。我忘了在拆分节点时更改父指针。

在拆分节点后添加此项将使其正常工作:

if (new->children[0])
    for (unsigned char x = 0; x < CB_BTREE_HALF_ORDER + 1; x++) {
        CBBTreeNode * child = new->children[x];
        child->parent = new;
    }

忘记添加这个是非常愚蠢的。该算法现在适用于许多随机插入。