如何为Iterator <item>实现std :: io :: Read?

时间:2017-12-02 15:38:07

标签: io iterator rust

更具体地说,我正在使用标准版,使用csv::Reader过滤掉某些行,我希望将结果与{{1}}一起使用。

3 个答案:

答案 0 :(得分:2)

如果允许它返回部分读取,则有Read::read的简单实现。从与Shepmaster的类似结构开始。

use std::io::{self, Read};

struct IteratorAsRead<I>
where
    I: Iterator,
{
    iter: I,
    leftover: Option<(I::Item, usize)>,
}

impl<I> IteratorAsRead<I>
where
    I: Iterator,
{
    pub fn new<T>(iter: T) -> Self
    where
        T: IntoIterator<IntoIter = I, Item = I::Item>,
    {
        IteratorAsRead {
            iter: iter.into_iter(),
            leftover: None,
        }
    }
}

然后通过首先找到要读取的非空字符串,尝试将其写入缓冲区,然后在需要时存储任何剩余数据来实现该函数。

impl<I> Read for IteratorAsRead<I>
where
    I: Iterator,
    I::Item: AsRef<[u8]>,
{
    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
        let (leftover, skip) = match self.leftover.take() {
            Some(last) => last,
            None => match self.iter.find(|next| !next.as_ref().is_empty()) {
                Some(next) => (next, 0),
                None => return Ok(0),
            }
        };

        let read = (&leftover.as_ref()[skip..]).read(buf)?;

        if skip + read < leftover.as_ref().len() {
            self.leftover = Some((leftover, skip + read));
        } else {
            self.leftover = None;
        }

        return Ok(read);
    }
}

答案 1 :(得分:2)

标准库提供类型std::io::Cursor,它将缓冲区与缓冲区中的位置一起包装。这可用于进一步简化Veedrac's answer中的代码:

use std::io::{self, Cursor, Read};

struct IteratorAsRead<I>
where
    I: Iterator,
{
    iter: I,
    cursor: Option<Cursor<I::Item>>,
}

impl<I> IteratorAsRead<I>
where
    I: Iterator,
{
    pub fn new<T>(iter: T) -> Self
    where
        T: IntoIterator<IntoIter = I, Item = I::Item>,
    {
        let mut iter = iter.into_iter();
        let cursor = iter.next().map(Cursor::new);
        IteratorAsRead { iter, cursor }
    }
}

impl<I> Read for IteratorAsRead<I>
where
    I: Iterator,
    Cursor<I::Item>: Read,
{
    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
        while let Some(ref mut cursor) = self.cursor {
            let read = cursor.read(buf)?;
            if read > 0 {
                return Ok(read);
            }
            self.cursor = self.iter.next().map(Cursor::new);
        }
        Ok(0)
    }
}

#[test]
fn small_pieces_are_combined() {
    let iterable = ["h", "e", "l", "l", "o"];
    let mut reader = IteratorAsRead::new(&iterable);

    let mut buf = vec![];
    let bytes = reader.read_to_end(&mut buf).unwrap();
    assert_eq!(&buf[..bytes], b"hello");
}

#[test]
fn partial_reads() {
    let iterable = ["hello"];
    let mut reader = IteratorAsRead::new(&iterable);

    let mut buf = [0; 2];

    let bytes = reader.read(&mut buf).unwrap();
    assert_eq!(&buf[..bytes], b"he");

    let bytes = reader.read(&mut buf).unwrap();
    assert_eq!(&buf[..bytes], b"ll");

    let bytes = reader.read(&mut buf).unwrap();
    assert_eq!(&buf[..bytes], b"o");
}

Playground

答案 2 :(得分:1)

最简单的解决方案是将所有输入一次读入一个巨大的缓冲区,然后从中读取:

let iterable = ["h", "e", "l", "l", "o"];
let combined_string: String = iterable.iter().cloned().collect(); 
let bytes = combined_string.into_bytes();

let mut buf = vec![];
let bytes = (&bytes[..]).read_to_end(&mut buf).unwrap();
assert_eq!(&buf[..bytes], b"hello");

如果你真的需要避免将它全部加载到内存中,可以实现一个包装器,但它有一些小巧的位,因为可用的字节数和要读取的字节数并不总是匹配。您必须保留一些临时值以跟踪您的位置,有时会获得更多数据以继续阅读:

use std::io::{self, Read};
use std::cmp;

/// Eagerly consumes elements from the underlying iterator instead of
/// returning partial reads.
struct IteratorAsRead<I>
where
    I: Iterator,
{
    iter: I,
    value: Option<I::Item>,
    offset: usize,
}

impl<I> IteratorAsRead<I>
where
    I: Iterator,
{
    pub fn new<T>(iter: T) -> Self
    where
        T: IntoIterator<IntoIter = I, Item = I::Item>,
    {
        IteratorAsRead {
            iter: iter.into_iter(),
            value: None,
            offset: 0,
        }
    }
}

impl<I> Read for IteratorAsRead<I>
where
    I: Iterator,
    I::Item: AsRef<[u8]>,
{
    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
        let mut copied = 0;
        loop {
            match self.value.take() {
                None => {
                    match self.iter.next() {
                        None => {
                            return Ok(copied);
                        }
                        Some(value) => {
                            self.value = Some(value);
                        }
                    }
                }
                Some(original_value) => {
                    let entire_value_len = {
                        let entire_value = original_value.as_ref();

                        // Skip over bytes we already copied
                        let value = &entire_value[self.offset..];
                        let buf = &mut buf[copied..];

                        // Make the slices the same length
                        let len_to_copy = cmp::min(value.len(), buf.len());
                        let value = &value[..len_to_copy];
                        let buf = &mut buf[..len_to_copy];

                        // Copy
                        buf.copy_from_slice(value);

                        // Advance our values
                        copied += len_to_copy;
                        self.offset += len_to_copy;

                        entire_value.len()
                    };

                    // If we completely used the value, reset our counters,
                    // otherwise put it back for the next call.
                    if self.offset == entire_value_len {
                        self.offset = 0;
                    } else {
                        self.value = Some(original_value);
                    }
                }
            }

            // If we've filled the buffer, return it
            if copied == buf.len() {
                return Ok(copied);
            }
        }
    }
}

#[test]
fn small_pieces_are_combined() {
    let iterable = ["h", "e", "l", "l", "o"];
    let mut reader = IteratorAsRead::new(&iterable);

    let mut buf = vec![];
    let bytes = reader.read_to_end(&mut buf).unwrap();
    assert_eq!(&buf[..bytes], b"hello");
}

#[test]
fn partial_reads() {
    let iterable = ["hello"];
    let mut reader = IteratorAsRead::new(&iterable);

    let mut buf = [0; 2];

    let bytes = reader.read(&mut buf).unwrap();
    assert_eq!(&buf[..bytes], b"he");

    let bytes = reader.read(&mut buf).unwrap();
    assert_eq!(&buf[..bytes], b"ll");

    let bytes = reader.read(&mut buf).unwrap();
    assert_eq!(&buf[..bytes], b"o");
}