在 B 树中插入

时间:2021-07-07 14:52:32

标签: c algorithm data-structures b-tree b-tree-index

我需要这方面的帮助。

我正在尝试实现 B 树插入,但我在某些部分卡住了,因为我对这个算法感到困惑。根据作者 Cormen 的算法介绍一书中的 B 树伪代码,在我看来,我正在编写 B 树插入正确的代码,但是当我在运行算法后检查磁盘中的 B 树时,数据是错误的。我不知道为什么我在这段代码中做错了。

算法简介页面:http://staff.ustc.edu.cn/~csli/graduate/algorithms/book6/chap19.htm

我的 btree.c 文件:

#include "../include/btree.h"
#include <stdio.h>

struct BTreeNode
{
    // leaf
    char leaf;
    // numberOfIndexedKeys
    int numberOfIndexedKeys;
    // RRNnode
    int RRNnode;
    // Set of keys
    int key[4];
    // Set of references to other file
    ll dataRegReference[4];
    // Subtree pointers
    int pointers[5];
};

void BTreeWriteHeader(BTreeHeader_t *header, s_file_t *indexFile)
{
    if (indexFile->fp == NULL)
    {
        printf("Processing file failed.");
        return;
    }
    fseek(indexFile->fp, 0, SEEK_SET);
    fwrite(&header->status, sizeof(char), 1, indexFile->fp);
    fwrite(&header->noderoot, sizeof(int), 1, indexFile->fp);
    fwrite(&header->RRNnextNode, sizeof(int), 1, indexFile->fp);
    fwrite(header->trash, sizeof(char), 68, indexFile->fp);
}

BTreeHeader_t *BTreeReadHeader(s_file_t *indexFile)
{
    if (indexFile->fp == NULL || indexFile->fileConsistency == '0')
    {
        printf("Processing file failed.");
        return NULL;
    }
    BTreeHeader_t *header = (BTreeHeader_t *)calloc(1, sizeof(BTreeHeader_t));
    fseek(indexFile->fp, 0, SEEK_SET);
    fread(&header->status, sizeof(char), 1, indexFile->fp);
    fread(&header->noderoot, sizeof(int), 1, indexFile->fp);
    fread(&header->RRNnextNode, sizeof(int), 1, indexFile->fp);
    fread(header->trash, sizeof(char), 68, indexFile->fp);
    return header;
}

void BTreeWriteNode(BTreeNode_t *WriteNode, s_file_t *indexFile)
{
    if (indexFile->fp == NULL)
    {
        printf("Processing file failed.");
        return;
    }

    fseek(indexFile->fp, 77 * (WriteNode->RRNnode + 1), SEEK_SET);
    fwrite(&WriteNode->leaf, sizeof(char), 1, indexFile->fp);
    fwrite(&WriteNode->numberOfIndexedKeys, sizeof(int), 1, indexFile->fp);
    fwrite(&WriteNode->RRNnode, sizeof(int), 1, indexFile->fp);
    for (int i = 0; i < MAX_INDEXED_KEYS; i++)
    {
        fwrite(&WriteNode->pointers[i], sizeof(int), 1, indexFile->fp);
        fwrite(&WriteNode->key[i], sizeof(int), 1, indexFile->fp);
        fwrite(&WriteNode->dataRegReference[i], sizeof(ll), 1,
               indexFile->fp);
    }
    fwrite(&WriteNode->pointers[MAX_INDEXED_KEYS], sizeof(int), 1, indexFile->fp);
}

BTreeNode_t *BTreeReadNode(s_file_t *indexFile, int RRNnode)
{
    if (indexFile->fp == NULL || indexFile->fileConsistency == '0')
    {
        printf("Processing file failed.");
        return NULL;
    }
    BTreeNode_t *ReadNode = (BTreeNode_t *)calloc(1, sizeof(BTreeNode_t));
    fseek(indexFile->fp, 77 * (RRNnode + 1), SEEK_SET);
    fread(&ReadNode->leaf, sizeof(char), 1, indexFile->fp);
    fread(&ReadNode->numberOfIndexedKeys, sizeof(int), 1, indexFile->fp);
    fread(&ReadNode->RRNnode, sizeof(int), 1, indexFile->fp);
    for (int i = 0; i < MAX_INDEXED_KEYS; i++)
    {
        fread(&ReadNode->pointers[i], sizeof(int), 1, indexFile->fp);
        fread(&ReadNode->key[i], sizeof(int), 1, indexFile->fp);
        fread(&ReadNode->dataRegReference[i], sizeof(ll), 1, indexFile->fp);
    }
    fread(&ReadNode->pointers[MAX_INDEXED_KEYS], sizeof(int), 1, indexFile->fp);
    return ReadNode;
}

// Creating B-Tree Header
void BTreeHeaderCreate(s_file_t *indexFile)
{
    if (indexFile->fp == NULL)
    {
        printf("Processing file failed.");
        return;
    }
    BTreeHeader_t *header = (BTreeHeader_t *)calloc(1, sizeof(BTreeHeader_t));
    header->RRNnextNode = 0;
    header->noderoot = -1;
    header->status = '0';
    strcpy(
        header->trash,
        "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@");
    BTreeWriteHeader(header, indexFile);
}

//                    INSERTION
// =========================================================================

void cleanNode(BTreeNode_t *node)
{
    for (int i = 0; i < MAX_INDEXED_KEYS; i++)
    {
        node->key[i] = node->dataRegReference[i] = node->pointers[i] = -1;
    }
    node->pointers[MAX_INDEXED_KEYS] = -1;
    node->numberOfIndexedKeys = 0;
}

BTreeNode_t *createNode(int RRNnode, char leaf)
{
    BTreeNode_t *newNode = (BTreeNode_t *)calloc(1, sizeof(BTreeNode_t));
    newNode->RRNnode = RRNnode;
    newNode->leaf = leaf;
    newNode->numberOfIndexedKeys = 0;
    cleanNode(newNode);

    return newNode;
}

void InsertInNode(BTreeNode_t *root, s_file_t *indexFile, int key, ll dataRegReference, int pointerLeft, int pointerRight)
{
    if (indexFile->fp == NULL)
    {
        printf("Processing file failed.");
        return;
    }

    if (root->numberOfIndexedKeys > MAX_INDEXED_KEYS)
    {
        printf("Aqui!");
    }

    /*
  for(int i = 0; i < root->numberOfIndexedKeys; i++){
    if(key.key == root->chaves[i].key) return;
  }
  */

    int numberOfIndexedKeysOfNode = root->numberOfIndexedKeys - 1;
    while (numberOfIndexedKeysOfNode >= 0 && key < root->key[numberOfIndexedKeysOfNode])
    {
        // Shifting keys and dataRegReference
        root->key[numberOfIndexedKeysOfNode + 1] = root->key[numberOfIndexedKeysOfNode];
        root->dataRegReference[numberOfIndexedKeysOfNode + 1] = root->dataRegReference[numberOfIndexedKeysOfNode];
        // Shifting pointers
        root->pointers[numberOfIndexedKeysOfNode + 2] = root->pointers[numberOfIndexedKeysOfNode + 1];
        numberOfIndexedKeysOfNode--;
    }
    numberOfIndexedKeysOfNode++;
    root->pointers[numberOfIndexedKeysOfNode + 1] = root->pointers[numberOfIndexedKeysOfNode];
    root->key[numberOfIndexedKeysOfNode] = key;
    root->dataRegReference[numberOfIndexedKeysOfNode] = dataRegReference;
    root->pointers[numberOfIndexedKeysOfNode] = pointerLeft;
    root->pointers[numberOfIndexedKeysOfNode + 1] = pointerRight;
    root->numberOfIndexedKeys++;
    BTreeWriteNode(root, indexFile);
}

BTreeNode_t *SearchNode(s_file_t *indexFile, int RRNnode, int key)
{
    if (indexFile->fp == NULL || indexFile->fileConsistency == '0')
    {
        printf("Processing file failed.");
        return NULL;
    }

    BTreeNode_t *searchNode = BTreeReadNode(indexFile, RRNnode);

    int pos = 0;
    while (key > searchNode->key[pos] && pos < searchNode->numberOfIndexedKeys)
        pos++;

    if (key == searchNode->key[pos])
        return searchNode;
    if (searchNode->leaf == '1')
        return NULL;

    return SearchNode(indexFile, searchNode->pointers[pos], key);
}

void SplitChild(BTreeHeader_t *header, s_file_t *indexFile, BTreeNode_t *root, BTreeNode_t *child)
{
    int keys[MAX_INDEXED_KEYS];
    ll dataRegReference[MAX_INDEXED_KEYS];
    int pointers[MAX_INDEXED_KEYS + 1];

    for (int i = 0; i < MAX_INDEXED_KEYS; i++)
    {
        keys[i] = child->key[i];
        dataRegReference[i] = child->dataRegReference[i];
        pointers[i] = child->pointers[i];
    }
    pointers[MAX_INDEXED_KEYS] = child->pointers[MAX_INDEXED_KEYS];

    cleanNode(child);
    BTreeNode_t *newNode = createNode(header->RRNnextNode, child->leaf);
    header->RRNnextNode++;

    InsertInNode(child, indexFile, keys[0], dataRegReference[0], pointers[0], pointers[1]);
    InsertInNode(child, indexFile, keys[1], dataRegReference[1], pointers[1], pointers[2]);
    pointers[2] = child->RRNnode;
    InsertInNode(root, indexFile, keys[2], dataRegReference[2], pointers[2], newNode->RRNnode);
    InsertInNode(newNode, indexFile, keys[3], dataRegReference[3], pointers[3], pointers[4]);
}

void InsertNonFull(BTreeHeader_t *header, BTreeNode_t *root, s_file_t *indexFile, int key, ll dataRegReference, int pointerLeft, int pointerRight)
{
    if (indexFile->fp == NULL)
    {
        printf("Processing file failed.");
        return;
    }
    int numberOfIndexedKeysOfNode = root->numberOfIndexedKeys - 1;
    if (root->leaf == '1')
    {
        InsertInNode(root, indexFile, key, dataRegReference, pointerLeft, pointerRight);
    }
    else
    {
        while (numberOfIndexedKeysOfNode >= 0 && key < root->key[numberOfIndexedKeysOfNode])
            numberOfIndexedKeysOfNode--;
        numberOfIndexedKeysOfNode++;
        BTreeNode_t *childNode = NULL;
        childNode = BTreeReadNode(indexFile, root->pointers[numberOfIndexedKeysOfNode]);
        if (childNode->numberOfIndexedKeys == MAX_INDEXED_KEYS)
        {
            SplitChild(header, indexFile, root, childNode);
        }
        if (key > root->key[numberOfIndexedKeysOfNode])
            numberOfIndexedKeysOfNode++;
        InsertNonFull(header, childNode, indexFile, key, dataRegReference, pointerLeft, pointerRight);
    }
}

void BtreeInsertion(s_file_t *indexFile, int codLinha, ll offsetRegistro,
                    char removido)
{

    if (indexFile->fp == NULL)
    {
        printf("Processing file failed.");
        return;
    }
    if (removido == '0')
        return;
    BTreeHeader_t *header = BTreeReadHeader(indexFile);
    BTreeNode_t *root = BTreeReadNode(indexFile, header->noderoot);

    if (root->numberOfIndexedKeys == MAX_INDEXED_KEYS)
    {
        BTreeNode_t *newRoot = createNode(header->RRNnextNode + 1, '0');
        SplitChild(header, indexFile, newRoot, root);
        header->RRNnextNode++;
        header->noderoot = newRoot->RRNnode;
        InsertNonFull(header, newRoot, indexFile, codLinha, offsetRegistro, -1, -1);
    }
    else
        InsertNonFull(header, root, indexFile, codLinha, offsetRegistro, -1, -1);
    BTreeWriteHeader(header, indexFile);
}

void BtreeCreate(s_file_t *indexFile)
{
    BTreeHeaderCreate(indexFile);
    BTreeHeader_t *header = BTreeReadHeader(indexFile);
    BTreeNode_t *newRoot = createNode(header->RRNnextNode, '1');
    header->RRNnextNode++;
    header->noderoot = newRoot->RRNnode;
    BTreeWriteHeader(header, indexFile);
    BTreeWriteNode(newRoot, indexFile);
    free(header);
    free(newRoot);
}

// ==========================================================================

//                            B-TREE TRAVERSAL
// ===========================================================================

void InOrderTraversal(int RRNroot, s_file_t *indexFile)
{
    if (RRNroot == -1)
        return;
    BTreeNode_t *root = BTreeReadNode(indexFile, RRNroot);
    //printf("node: %d ", root->RRNnode);

    for (int i = 0; i < root->numberOfIndexedKeys; i++)
    {
        InOrderTraversal(root->pointers[i], indexFile);
        //printf("(%d, %lld)", root->chaves[i].key, root->chaves[i].dataRegReference);
    }
    InOrderTraversal(root->pointers[root->numberOfIndexedKeys], indexFile);
}

我的 btree.h 文件:

#ifndef BTREE_H_
#define BTREE_H_

#include "funcoes-gerais.h"
#include "linha.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#define MAX_INDEXED_KEYS 4

typedef struct BTreeNode BTreeNode_t;

typedef struct BTreeHeader BTreeHeader_t;

typedef long long int ll;

struct BTreeHeader {
  char status;
  int noderoot;
  int RRNnextNode;
  // This guy is not important, ignore this
  char trash[69];
};

void BTreeHeaderCreate(s_file_t *indexFile);
void BtreeInsertion(s_file_t *indexFile, int codLinha, ll offsetRegistro,
                    char removido);
void BtreeCreate(s_file_t *indexFile);
void InOrderTraversal(int RRNroot, s_file_t *indexFile);
BTreeHeader_t *BTreeReadHeader(s_file_t *indexFile);
void BTreeWriteHeader(BTreeHeader_t *header, s_file_t *indexFile);
#endif

在这个 B 树中,顺序是 5。那么,我最多可以在同一个节点中得到 4 个键和 5 个指针。我的疑问之一是 splitChild 算法。如果我根据本书的算法准确地获得了节点中的最大密钥,我该如何拆分节点并提升密钥?如果我为每个节点的插入键提供更多空间,并且仅在节点中具有最大 + 1 个键时才拆分,则算法将仅在下一次插入时拆分。这让我变得一团糟。

0 个答案:

没有答案
相关问题