缩短线程安全(非平衡)二进制树中的关键部分

时间:2017-04-18 21:13:25

标签: c multithreading concurrency pthreads mutex

我正在实现一组线程安全函数,用Pthreads更新和搜索不平衡二叉树(参见The Linux Programming Interface:练习30.2)。

很容易实现锁定整个函数体的工作树,例如adddelete,但我相信可以实现具有细粒度锁定的不平衡二进制(搜索)树策略。

我试图从“The Art of Multiprocessor Programming”中描述的其他数据结构“移植”各种手动锁定方案,但每个都要求我将树转换为不同的原始数据结构(I我希望使用一个不平衡的二叉树作为它的原始数据类型,而不只是提供相同的接口)

为清楚起见,这里是用于表示此树节点的数据结构。

typedef struct bt_node {
    Key key;
    int value;
    struct bt_node *left;
    struct bt_node *right;
    pthread_mutex_t mutex;
} bt_node;

(我还保留bt_root结构用于簿记目的,您可以在下面的某些功能中看到这些结构)

add函数基本上是单线程的,因为我最终将根节点锁定在add的开头,并在工作完成后将其解锁。这里可以做得更好吗? (lockunlock是上面结构的pthread_mutex_lockpthread_mutex_unlock周围的包装。

// This function is responsible for finding the last node that is going to be the parent of whichever node we are inserting.
bt_node *closest_leaf(bt_node *root, Key key) {
    bt_node *current = root;
    bt_node *prev = NULL;
    while (current != NULL) {
        prev = current;
        if      (key < current->key)  current = current->left;
        else if (key > current->key)  current = current->right;
        else  {
            prev = current;
            break;
        }
    }

    return prev;
}

int add(bt_root *root_tree, Key key, int value) {
    bt_node *new_node = create_node(key, value);
    bt_node *current = root_tree->root;

    // this root creation code is not and need not be thread-safe.
    if (current == NULL) {
        root_tree->root = new_node;
        root_tree->node_count = 1;
        return 1;
    }

    lock(current);
    // just locking `leaf` & `current` inside closest_leaf is incorrect
    bt_node *leaf = closest_leaf(current, key);
    if (key < leaf->key) {
        leaf->left = new_node;
        ++root_tree->node_count;
    }
    else if (key > leaf->key) {
        leaf->right = new_node;
        ++root_tree->node_count;
    } else {
        free(new_node);
    }
    unlock(current);


    return 1;
}

删除共享此问题:成功删除方案有效锁定整个树。下面显示的代码省略了互斥量获取和放弃代码(但概念与上面相同)。

// splice is the function responsible for actually removing a node from the tree
void splice(bt_node *node, bt_node *parent) {
    // if it's a leaf, destroy it.
    if (!node->left && !node->right) {
             replace_parent(parent, node, NULL);

    // if it has one child, have them be adopted by their grandparent
    } else if (node->left && !node->right) {
            replace_parent(parent, node, node->left);
    } else if (node->right && !node->left) {
            replace_parent(parent, node, node->right);

    /* if it has two children, take the smallest value of the right tree and
       replace the node with that one. */
    } else if (node->right && node->left) {
        // find the smallest element of the right tree
        bt_node *smallest_parent = node;
        bt_node *smallest = node->right;
        while (smallest->left) {
            smallest_parent = smallest;
            // left is the smaller side
            smallest = smallest->left;
        }

        // "swap" the smallest.
        node->key = smallest->key;
        node->value = smallest->value;

        // unsure if this is correct -- seems to be only sensible thing to do
        smallest_parent->left = smallest->right;
        free(smallest);
    }
}

// search the tree depth-first for the node to remove
int delete_dfs(bt_node *node, bt_node *parent, Key key) {
    if (node == NULL) return 0;
    Key node_key = node->key;

    // we've found the key, let's actually splice it out of the tree
    if (node_key == key) {
        splice(node, parent);
        return 1;
    }

    // otherwise we should search (depth first).
    if (key < node_key) {
        return delete_dfs(node->left, node, key);
    } else if (key > node_key) {
        return delete_dfs(node->right, node, key);
    }

    return 0;
}

void delete(bt_root *root_tree, Key key) {
    if (delete_dfs(root_tree->root, NULL, key)) root_tree->node_count--;
}

有更好的方法吗?

2 个答案:

答案 0 :(得分:2)

要为插入功能实现手动锁定方案,您需要在closest_leaf()函数

中执行此操作
// This function is responsible for finding the last node that is going to be the parent of whichever node we are inserting.

// post-condition: returned node is locked
bt_node *closest_leaf(bt_node *root, Key key) {
    bt_node *current = root;
    bt_node *prev = NULL;
    while (current != NULL) {
        lock(current);
        if (prev)
            unlock(prev);
        prev = current;
        if      (key < current->key)  current = current->left;
        else if (key > current->key)  current = current->right;
        else
            break;
    }

    return prev;
}

(当我们沿着树下行时,我们会在检查之前锁定每个节点,并且只有在我们锁定了我们要检查的下一个节点之后才解锁它)。这会在节点锁定的情况下返回,因此我们可以在add()函数中修改它:

int add(bt_root *root_tree, Key key, int value)
{
    bt_node *new_node = create_node(key, value);
    bt_node *current = root_tree->root;

    // this root creation code is not and need not be thread-safe.
    if (current == NULL) {
        root_tree->root = new_node;
        root_tree->node_count = 1;
        return 1;
    }

    bt_node *leaf = closest_leaf(current, key);
    if (key < leaf->key) {
        leaf->left = new_node;
        ++root_tree->node_count;
    }
    else if (key > leaf->key) {
        leaf->right = new_node;
        ++root_tree->node_count;
    } else {
        free(new_node);
    }
    unlock(leaf);

    return 1;
}

删除有点复杂。首先,我们需要修复splice()函数中的错误 - 考虑smallest_parent == node循环之后while()会发生什么(即循环执行零次):它将覆盖{{1什么时候应该覆盖node->left。修复此问题的最简单方法是使用指向指针的指针指向我们要更新的父字段,而不是指向父本身的指针。

同时,node->right可以通过观察splice()parent所做的修改只是改变父指针node的指针字段来简化{ - 1}} - 所以我们可以只接受一个指向该指针字段的指针。这意味着我们不再需要replace_parent()功能。简化的splice()没有锁定,如下所示:

// splice is the function responsible for actually removing a node from the tree
void splice(bt_node **node_ptr) {
    bt_node *node = *node_ptr;
    // if it has one or zero child nodes, have them be adopted by their grandparent
    if (!node->left) {
        *node_ptr = node->right;
        free(node);
    }
    else if (!node->right) {
        *node_ptr = node->left;
        free(node);
    }
    /* if it has two children, take the smallest value of the right tree and
       replace the node with that one. */
    else {
        // find the smallest element of the right tree
        bt_node **smallest_ptr = &node->right;
        bt_node *smallest = *smallest_ptr;

        while (smallest->left) {
            smallest_ptr = &smallest->left;
            smallest = *smallest_ptr;
        }

        // "swap" the smallest.
        node->key = smallest->key;
        node->value = smallest->value;
        *smallest_ptr = smallest->right;
        free(smallest);
    }
}

要添加锁定,我们将要求调用splice(),同时将父节点和子节点都删除锁定。在简单的拼接案例中,我们只需要在释放之前解锁子节点。在复杂的情况下,当我们找到要交换的下一个最小的节点时,我们需要再次进行手动锁定:

// splice is the function responsible for actually removing a node from the tree
// pre-condition: node **node_ptr and its parent (containing the pointer *node_ptr) are both locked
void splice(bt_node **node_ptr) {
    bt_node *node = *node_ptr;
    // if it has one or zero child nodes, have them be adopted by their grandparent
    if (!node->left) {
        *node_ptr = node->right;
        unlock(node);
        free(node);
    }
    else if (!node->right) {
        *node_ptr = node->left;
        unlock(node);
        free(node);
    }
    /* if it has two children, take the smallest value of the right tree and
       replace the node with that one. */
    else {
        // find the smallest element of the right tree
        bt_node **smallest_ptr = &node->right;
        bt_node *smallest = *smallest_ptr;

        lock(smallest);
        while (smallest->left) {
            smallest_ptr = &smallest->left;
            lock(*smallest_ptr);
            unlock(smallest);
            smallest = *smallest_ptr;
        }

        // "swap" the smallest.
        node->key = smallest->key;
        node->value = smallest->value;
        *smallest_ptr = smallest->right;
        unlock(node);
        unlock(smallest);
        free(smallest);
    }
}

请注意,在我们找到下一个最小节点的删除的最后一部分中,我们不需要保持“最小”的父节点。即使我们修改它也会锁定。原因是我们保留了节点&#39;锁定了整个时间,所以在我们完成了手动锁定到最小的&#39;我们可以独家访问该链中的所有节点。

要使这种手动删除工作,最简单的方法是用迭代版本替换递归删除。 (您还需要提供一个函数来锁定树的根,因为如果根节点少于两个子节点并被删除,则根目录中的指针将被更新,因此需要互斥):

void delete(bt_root *root_tree, Key key)
{
    bt_node *node;
    bt_node *prev = NULL;
    bt_node **node_ptr = &root_tree->root;
    int deleted = 0;

    lock_root(root_tree);
    while ((node = *node_ptr) != NULL)
    {
        lock(node);
        if (key == node->key)
        {
            splice(node_ptr);
            deleted = 1;
            break;
        }
        else
        {
            if (prev)
                unlock(prev);
            else
                unlock_root(root_tree);
            prev = node;
            if (key < node->key)
                node_ptr = &node->left;
            else
                node_ptr = &node->right;
        }
    }

    if (prev) {
        unlock(prev);
        if (deleted) {
            lock_root(root_tree);
            root_tree->node_count--;
            unlock_root(root_tree);
        }           
    } else {
        if (deleted) {
            root_tree->node_count--;
        }
        unlock_root(root_tree);
    }
}

在遍历添加功能的树时,您还需要使用此lock_root() / unlock_root()对,以确保您不会遍历到根节点中即将被并发删除释放。这需要更新closest_leaf()以获取bt_root *而不是bt_node *参数,但也会使第一次插入树中的线程安全:

// This function is responsible for finding the last node that is going to be the parent of whichever node we are inserting.
// post-condition: returned node is locked, or tree is locked if NULL is returned
bt_node *closest_leaf(bt_root *tree, Key key)
{
    bt_node *current;
    bt_node *prev = NULL;

    lock_root(tree);
    current = tree->root;
    while (current != NULL) {
        lock(current);
        if (prev)
            unlock(prev);
        else
            unlock_root(tree);
        prev = current;
        if      (key < current->key)  current = current->left;
        else if (key > current->key)  current = current->right;
        else
            break;
    }

    return prev;
}

int add(bt_root *root_tree, Key key, int value)
{
    bt_node *new_node = create_node(key, value);
    bt_node *leaf = closest_leaf(root_tree, key);

    /* NULL returned by closest_leaf() means new node is the root */
    if (leaf == NULL) {
        root_tree->root = new_node;
        root_tree->node_count = 1;
        unlock_root(root_tree);
        return 1;
    }

    if (key == leaf->key) {
        free(new_node);
        unlock(leaf);
    } else {    
        if (key < leaf->key) {
            leaf->left = new_node;
        }
        else {
            leaf->right = new_node;
        }
        unlock(leaf);

        lock_root(root_tree);
        ++root_tree->node_count;
        unlock_root(root_tree);
    }

    return 1;
}

请注意为了使node_count保持最新所需的额外复杂性和锁定 - 您应该重新考虑它是否值得拥有。

答案 1 :(得分:1)

  

我实现了一组更新和搜索的线程安全函数   带有Pthreads的不平衡二叉树(参见Linux Programming   界面:练习30.2)。

     

很容易实现“功能性”功能。简单地锁定的树   整个功能,如添加或删除,但我相信它是可能的   使用锁定实现不平衡的二进制(搜索)树   允许几乎所有工作并行化的策略   最大限度地减少在关键部分花费的时间。

是和否。如果两个或多个线程要访问相同的共享数据,并且至少有一个线程修改它,那么两个线程都会被访问。访问必须受某种形式的同步保护。由于您通常不能预先预测哪些节点可能被修改,因此需要同步所有访问。另一方面,受保护区域的范围可以动态变化。不平衡树的树搜索,添加和删除算法可以缩小所需同步的范围,因为原则上它们只需要保护子树。另一个线程可以在一个单独的子树上同时合理地运行。

然而,在您对此感到兴奋之前,请注意更改同步范围将需要互斥锁和解锁(或反之亦然,具体取决于您的方法),这些不是廉价。实际上,您可能会发现所有互斥锁定和解锁都会占用并行操作的大部分或全部收益。

但是,如果您预计搜索将成为树上的主要操作,添加和删除相对较少,那么您可以考虑实现读/写锁定。这个想法是你允许任意数量的线程同时搜索树,但是一个想要修改它的线程必须被授予对整个树的独占访问权限。