Question

作为学习Rust的练习，我决定实现一个位向量库，灵感来自std::vec::Vec，提供哪些方法。

我有以下代码：

extern crate num;

use std::cmp::Eq;
use std::ops::{BitAnd,BitOrAssign,Index,Shl};
use num::{One,Zero,Unsigned,NumCast};

pub trait BitStorage: Sized + 
    BitAnd<Self, Output = Self> + 
    BitOrAssign<Self> + 
    Shl<Self, Output = Self> + 
    Eq + Zero + One + Unsigned + NumCast + Copy {}

impl<S> BitStorage for S where S: Sized + 
    BitAnd<S, Output = S> + 
    BitOrAssign<S> + 
    Shl<S, Output = S> + 
    Eq + Zero + One + Unsigned + NumCast + Copy {}

pub struct BitVector<S: BitStorage> {
    data: Vec<S>,
    capacity: usize,
    storage_size: usize
}

impl<S: BitStorage> BitVector<S> {
    pub fn with_capacity(capacity: usize) -> BitVector<S> {
        let storage_size = std::mem::size_of::<S>() * 8;
        let len = (capacity / storage_size) + 1;
        BitVector { 
            data: vec![S::zero(); len],
            capacity: capacity,
            storage_size: storage_size
        }
    }

    pub fn get(&self, index: usize) -> Option<bool> {
        match self.index_in_bounds(index) {
            true => Some(self.get_unchecked(index)),
            false => None
        }
    }

    pub fn set(&mut self, index: usize, value: bool) {
        self.panic_index_bounds(index);
        let (data_index, remainder) = self.compute_data_index_and_remainder(index);
        let value = if value { S::one() } else { S::zero() };
        self.data[data_index] |= value << remainder;
    }

    pub fn capacity(&self) -> usize {
        self.capacity
    }

    pub fn split_at(&self, index: usize) -> (&BitVector<S>, &BitVector<S>) {
        self.panic_index_not_on_storage_bound(index);
        let data_index = self.compute_data_index(index);
        let (capacity_left, capacity_right) = self.compute_capacities(index);
        let (data_left, data_right) = self.data.split_at(data_index);

        let left = BitVector {
            data: data_left.to_vec(),
            capacity: capacity_left,
            storage_size: self.storage_size
        };
        let right = BitVector {
            data: data_right.to_vec(),
            capacity: capacity_right,
            storage_size: self.storage_size
        };
        (&left, &right)
    }

    pub fn split_at_mut(&mut self, index: usize) -> (&mut BitVector<S>, &mut BitVector<S>) {
        self.panic_index_not_on_storage_bound(index);
        let data_index = self.compute_data_index(index);
        let (capacity_left, capacity_right) = self.compute_capacities(index);
        let (data_left, data_right) = self.data.split_at_mut(data_index);

        let mut left = BitVector {
            data: data_left.to_vec(),
            capacity: capacity_left,
            storage_size: self.storage_size
        };
        let mut right = BitVector {
            data: data_right.to_vec(),
            capacity: capacity_right,
            storage_size: self.storage_size
        };
        (&mut left, &mut right)
    }

    #[inline]
    fn get_unchecked(&self, index: usize) -> bool {
        let (data_index, remainder) = self.compute_data_index_and_remainder(index);
        (self.data[data_index] & (S::one() << remainder)) != S::zero()
    }

    #[inline]
    fn compute_data_index_and_remainder(&self, index: usize) -> (usize, S) {
        let data_index = self.compute_data_index(index);
        let remainder = self.compute_data_remainder(index);
        (data_index, remainder)
    }

    #[inline]
    fn compute_data_index(&self, index: usize) -> usize {
        index / self.storage_size
    }

    #[inline]
    fn compute_data_remainder(&self, index: usize) -> S {
        let remainder = index % self.storage_size;
        // we know that remainder is always smaller or equal to the size that S can hold
        // for example if S = u8 then remainder <= 2^8 - 1
        let remainder: S = num::cast(remainder).unwrap();
        remainder
    }

    #[inline]
    fn compute_capacities(&self, index_to_split: usize) -> (usize, usize) {
        (index_to_split, self.capacity - index_to_split)
    }

    #[inline]
    fn index_in_bounds(&self, index: usize) -> bool {
        index < self.capacity
    }

    #[inline]
    fn panic_index_bounds(&self, index: usize) {
        if !self.index_in_bounds(index) {
            panic!("Index out of bounds. Length = {}, Index = {}", self.capacity, index);
        }
    }

    #[inline]
    fn panic_index_not_on_storage_bound(&self, index: usize) {
        if index % self.storage_size != 0 {
            panic!("Index not on storage bound. Storage size = {}, Index = {}", self.storage_size, index);
        }
    }
}

static TRUE: bool = true;
static FALSE: bool = false;

macro_rules! bool_ref {
    ($cond:expr) => (if $cond { &TRUE } else { &FALSE })
}

impl<S: BitStorage> Index<usize> for BitVector<S> {
    type Output = bool;

    fn index(&self, index: usize) -> &bool {
        self.panic_index_bounds(index);
        bool_ref!(self.get_unchecked(index))
    }
}

编译器错误发生在split_at和split_at_mut方法上：它们基本上告诉我left和right在这两种情况下都没有足够长的时间来返回一个参考。我理解这一点，因为它们是在堆栈上创建的，然后我想将它们作为参考返回。

然而，我的设计受到std::vec::Vec的启发，您可以看到in the SliceExt trait他们的定义如下：

#[stable(feature = "core", since = "1.6.0")]
fn split_at(&self, mid: usize) -> (&[Self::Item], &[Self::Item]);

#[stable(feature = "core", since = "1.6.0")]
fn split_at_mut(&mut self, mid: usize) -> (&mut [Self::Item], &mut [Self::Item]);

我认为这样做是为了方便最终用户，因为他们宁愿处理引用而不是框。

我想我可以通过将返回的位向量放入Box<_>来修复我的错误，但有没有办法将创建的结构作为参考返回？

作为一个额外的问题：如果我返回(BitVector<S>, BitVector<S>)它会起作用，这样做的缺点是什么？为什么SliceExt特征不能做到这一点？

Answer 1

如何将新创建的结构作为参考返回？

你不能。没办法解决这个问题;这根本不可能。如你所说，如果它在堆栈上声明，那么该值将被删除，任何引用都将失效。

那么是什么让Vec与众不同？

Vec<T>是切片（&[T]）的拥有对应物。虽然Vec具有指向数据开头，计数和容量的指针，但切片仅具有指针和计数。两者都保证所有数据都是连续的。在伪Rust中，它们看起来像这样：

struct Vec<T> {
    data: *mut T,
    size: usize,
    capacity: usize,
}

struct Slice<'a, T> {
    data: *mut T,
    size: usize,
}

Vec::split_at可以返回切片，因为它基本上包含切片。它没有创建并返回对它的引用，它只是指针和计数的副本。

如果您为自己拥有的数据类型创建了一个借用的副本，那么您可以返回该数据类型。像

这样的东西

struct BitVector {
    data: Vec<u8>,
    capacity: usize,
    storage_size: usize
}

struct BitSlice<'a> {
    data: &'a [u8],
    storage_size: usize,
}

impl BitVector {
    fn with_capacity(capacity: usize) -> BitVector {
        let storage_size = std::mem::size_of::<u8>() * 8;
        let len = (capacity / storage_size) + 1;
        BitVector { 
            data: vec![0; len],
            capacity: capacity,
            storage_size: storage_size
        }
    }

    fn split_at<'a>(&'a self) -> (BitSlice<'a>, BitSlice<'a>) {
        let (data_left, data_right) = self.data.split_at(0);
        let left = BitSlice {
            data: data_left,
            storage_size: self.storage_size
        };
        let right = BitSlice {
            data: data_right,
            storage_size: self.storage_size
        };
        (left, right)
    }
}

fn main() {}

要遵循Vec的主题，您可能需要Deref和DerefMut到BitSlice，然后实施所有非容量更改方法BitSlice。

我认为这样做是为了方便最终用户，因为他们宁愿处理引用而不是框。

参考和框应该在使用现场大部分是透明的。主要原因是表现。 <{1}}是堆分配的。

我想我可以通过将返回的位向量放入Box＆lt; _＆gt;
来修复错误

这不是一个好主意。您已经通过Box进行了堆分配，而装箱会引入另一个间接和额外的堆使用。

如果我返回Vec它会起作用，这样做的缺点是什么？为什么(BitVector<S>, BitVector<S>)特征不能做到这一点？

是的，在这里您将返回堆分配的结构。返回这些没有任何缺点，这只是执行分配的缺点。这就是为什么SliceExt没有这样做的原因。

这是否也直接转换为split_at_mut变体？

是

SliceExt

这有助于指出struct BitSliceMut<'a> { data: &'a mut [u8], storage_size: usize, } fn split_at_mut<'a>(&'a mut self) -> (BitSliceMut<'a>, BitSliceMut<'a>) { let (data_left, data_right) = self.data.split_at_mut (0); let left = BitSliceMut { data: data_left, storage_size: self.storage_size }; let right = BitSliceMut { data: data_right, storage_size: self.storage_size }; (left, right) }和&T 不同类型并且行为方式不同。

它不被允许（mut BitSlice＆lt;＆＃39; a＆gt;，mut BitSlice＆lt;＆＃39; a＆gt;作为返回类型。

返回&mut T：What's the difference in `mut` before a variable name and after the `:`?是没有意义的。对于mut T，可变性是包含类型（BitSliceMut）的一个方面。

Answer 2

为什么标准库被允许的答案＆＃39;通过引用返回的是，它不会在堆栈上分配任何内容。它返回对已经分配的内存的引用，该内存的存活时间足够长。

所以你基本上有两个选择：

如果在堆栈上分配内存，则必须将其作为值返回。这包括Box＆lt; _＆gt;场景。将具有指向堆分配内存的指针的Box返回为值。
如果您没有在堆栈上分配内存，则可以返回对已存在于内存中的结果的引用。

在Rust中，按值返回是有效的，因为值被移动，而不是被复制。

如何将新创建的结构作为参考返回？

2 个答案: