Question

我在字符串中有一些数据。我有一个函数，它将流作为输入。我想将我的数据提供给我的函数，而不必将完整的字符串复制到流中。基本上我正在寻找一个可以包装字符串并从中读取的流类。

我在网上看到的唯一建议是建议不是流的StringReader，或创建内存流并写入内存流，这意味着复制数据。我可以编写自己的流对象，但棘手的部分是处理编码，因为流以字节为单位进行处理。有没有办法在不编写新流类的情况下执行此操作？

我在BizTalk中实现管道组件。 BizTalk完全使用流来处理所有内容，因此您总是将内容传递给BizTalk。 BizTalk将始终以小块的形式从该流中读取，因此将整个字符串复制到流中是没有意义的（特别是如果字符串很大），如果我可以从流中读取BizTalk想要它的话。 / p>

Answer 1

您可以防止必须维护整个事物的副本，但是您将被迫使用对每个字符产生相同字节数的编码。这样，您可以通过Encoding.GetBytes(str, strIndex, byteCount, byte[], byteIndex)提供数据块，因为它们被直接请求到读缓冲区。

每个Stream.Read()操作总会有一个复制操作，因为它允许调用者提供目标缓冲区。

Answer 2

这是一个适当的StringReaderStream，具有以下缺点：

Read的缓冲区长度必须至少为maxBytesPerChar。通过保留内部一个字符Read，可以为小型缓冲区实现buff = new byte[maxBytesPerChar]。但是对于大多数用法而言，并不是必须的。
没有Seek，可以寻求，但一般来说会非常棘手。（某些寻求案例，例如寻求开始，寻求结束，很容易实现。）

/// <summary>
/// Convert string to byte stream.
/// <para>
/// Slower than <see cref="Encoding.GetBytes()"/>, but saves memory for a large string.
/// </para>
/// </summary>
public class StringReaderStream : Stream
{
    private string input;
    private readonly Encoding encoding;
    private int maxBytesPerChar;
    private int inputLength;
    private int inputPosition;
    private readonly long length;
    private long position;

    public StringReaderStream(string input)
        : this(input, Encoding.UTF8)
    { }

    public StringReaderStream(string input, Encoding encoding)
    {
        this.encoding = encoding ?? throw new ArgumentNullException(nameof(encoding));
        this.input = input;
        inputLength = input == null ? 0 : input.Length;
        if (!string.IsNullOrEmpty(input))
            length = encoding.GetByteCount(input);
            maxBytesPerChar = encoding == Encoding.ASCII ? 1 : encoding.GetMaxByteCount(1);
    }

    public override bool CanRead => true;

    public override bool CanSeek => false;

    public override bool CanWrite => false;

    public override long Length => length;

    public override long Position
    {
        get => position;
        set => throw new NotImplementedException();
    }

    public override void Flush()
    {
    }

    public override int Read(byte[] buffer, int offset, int count)
    {
        if (inputPosition >= inputLength)
            return 0;
        if (count < maxBytesPerChar)
            throw new ArgumentException("count has to be greater or equal to max encoding byte count per char");
        int charCount = Math.Min(inputLength - inputPosition, count / maxBytesPerChar);
        int byteCount = encoding.GetBytes(input, inputPosition, charCount, buffer, offset);
        inputPosition += charCount;
        position += byteCount;
        return byteCount;
    }

    public override long Seek(long offset, SeekOrigin origin)
    {
        throw new NotImplementedException();
    }

    public override void SetLength(long value)
    {
        throw new NotImplementedException();
    }

    public override void Write(byte[] buffer, int offset, int count)
    {
        throw new NotImplementedException();
    }
}

Answer 3

虽然这个问题最初被标记为 c#-4.0，但在 .NET 5 中通过引入 Encoding.CreateTranscodingStream 可以很容易地做到这一点：

<块引用>

创建一个流，用于在内部编码和外部编码之间对数据进行转码，类似于 Convert(Encoding, Encoding, Byte[])。

诀窍是定义一个直接访问 UnicodeStream 字节的底层 string，然后将其包装在转码流中以呈现具有所需编码的流内容。

以下类和扩展方法可以完成这项工作：

public static partial class TextExtensions
{
    public static Encoding PlatformCompatibleUnicode => BitConverter.IsLittleEndian ? Encoding.Unicode : Encoding.BigEndianUnicode;
    static bool IsPlatformCompatibleUnicode(this Encoding encoding) => BitConverter.IsLittleEndian ? encoding.CodePage == 1200 : encoding.CodePage == 1201;
    
    public static Stream AsStream(this string @string, Encoding encoding = default) => 
        (@string ?? throw new ArgumentNullException(nameof(@string))).AsMemory().AsStream(encoding);
    public static Stream AsStream(this ReadOnlyMemory<char> charBuffer, Encoding encoding = default) =>
        ((encoding ??= Encoding.UTF8).IsPlatformCompatibleUnicode())
            ? new UnicodeStream(charBuffer)
            : Encoding.CreateTranscodingStream(new UnicodeStream(charBuffer), PlatformCompatibleUnicode, encoding, false);
}

sealed class UnicodeStream : Stream
{
    const int BytesPerChar = 2;

    // By sealing UnicodeStream we avoid a lot of the complexity of MemoryStream.
    ReadOnlyMemory<char> charMemory;
    int position = 0;
    Task<int> _cachedResultTask; // For async reads, avoid allocating a Task.FromResult<int>(nRead) every time we read.

    public UnicodeStream(string @string) : this((@string ?? throw new ArgumentNullException(nameof(@string))).AsMemory()) { }
    public UnicodeStream(ReadOnlyMemory<char> charMemory) => this.charMemory = charMemory;

    public override int Read(Span<byte> buffer)
    {
        EnsureOpen();
        var charPosition = position / BytesPerChar;
        // MemoryMarshal.AsBytes will throw on strings longer than int.MaxValue / 2, so only slice what we need. 
        var byteSlice = MemoryMarshal.AsBytes(charMemory.Slice(charPosition, Math.Min(charMemory.Length - charPosition, 1 + buffer.Length / BytesPerChar)).Span);
        var slicePosition = position % BytesPerChar;
        var nRead = Math.Min(buffer.Length, byteSlice.Length - slicePosition);
        byteSlice.Slice(slicePosition, nRead).CopyTo(buffer);
        position += nRead;
        return nRead;
    }

    public override int Read(byte[] buffer, int offset, int count) 
    {
        ValidateBufferArgs(buffer, offset, count);
        return Read(buffer.AsSpan(offset, count));
    }

    public override int ReadByte()
    {
        // Could be optimized.
        Span<byte> span = stackalloc byte[1];
        return Read(span) == 0 ? -1 : span[0];
    }

    public override ValueTask<int> ReadAsync(Memory<byte> buffer, CancellationToken cancellationToken = default)
    {
        EnsureOpen();
        if (cancellationToken.IsCancellationRequested) 
            return ValueTask.FromCanceled<int>(cancellationToken);
        try
        {
            return new ValueTask<int>(Read(buffer.Span));
        }
        catch (Exception exception)
        {
            return ValueTask.FromException<int>(exception);
        }   
    }
    
    public override Task<int> ReadAsync(byte[] buffer, int offset, int count, CancellationToken cancellationToken)
    {
        ValidateBufferArgs(buffer, offset, count);
        var valueTask = ReadAsync(buffer.AsMemory(offset, count));
        if (!valueTask.IsCompletedSuccessfully)
            return valueTask.AsTask();
        var lastResultTask = _cachedResultTask;
        return (lastResultTask != null && lastResultTask.Result == valueTask.Result) ? lastResultTask : (_cachedResultTask = Task.FromResult<int>(valueTask.Result));
    }

    void EnsureOpen()
    {
        if (position == -1)
            throw new ObjectDisposedException(GetType().Name);
    }
    
    // https://docs.microsoft.com/en-us/dotnet/api/system.io.stream.flush?view=net-5.0
    // In a class derived from Stream that doesn't support writing, Flush is typically implemented as an empty method to ensure full compatibility with other Stream types since it's valid to flush a read-only stream.
    public override void Flush() { }
    public override Task FlushAsync(CancellationToken cancellationToken) => cancellationToken.IsCancellationRequested ? Task.FromCanceled(cancellationToken) : Task.CompletedTask;
    public override bool CanRead => true;
    public override bool CanSeek => false;
    public override bool CanWrite => false;
    public override long Length => throw new NotSupportedException();
    public override long Position { get => throw new NotSupportedException(); set => throw new NotSupportedException(); }
    public override long Seek(long offset, SeekOrigin origin) => throw new NotSupportedException();
    public override void SetLength(long value) => throw new NotSupportedException();
    public override void Write(byte[] buffer, int offset, int count) =>  throw new NotSupportedException();
    
    protected override void Dispose(bool disposing)
    {
        try 
        {
            if (disposing) 
            {
                _cachedResultTask = null;
                charMemory = default;
                position = -1;
            }
        }
        finally 
        {
            base.Dispose(disposing);
        }
    }   
    
    static void ValidateBufferArgs(byte[] buffer, int offset, int count)
    {
        if (buffer == null)
            throw new ArgumentNullException(nameof(buffer));
        if (offset < 0 || count < 0)
            throw new ArgumentOutOfRangeException();
        if (count > buffer.Length - offset)
            throw new ArgumentException();
    }
}

注意事项：

您可以通过将 string、char [] 数组或其切片转换为 ReadOnlyMemory<char> 缓冲区来流式传输它们。这个 conversion 只是包装了底层的字符串或数组内存而不分配任何东西。
使用 Encoding.GetBytes() 对字符串块进行编码的解决方案已被破坏，因为它们无法处理在块之间拆分的 surrogate pairs。要正确处理代理对，必须调用 Encoding.GetEncoder() 以最初保存 Encoder。稍后，Encoder.GetBytes(ReadOnlySpan<Char>, Span<Byte>, flush: false) 可用于在卡盘中编码并记住调用之间的状态。

（微软的 TranscodingStream 正确地做到了这一点。）
使用 Encoding.Unicode 可以获得最佳性能，因为（在几乎所有 .Net 平台上）此编码与 String 类型本身的编码相同。

当提供与平台兼容的 Unicode 编码时，不使用 TranscodingStream，返回的 Stream 直接从字符数据缓冲区读取。
要做的事情：
- 在大端平台（rare）上进行测试。
- 测试长度超过 int.MaxValue / 2 的字符串。

演示小提琴，包括一些基本测试 here。

Answer 4

Stream只能复制数据。此外，它处理的是byte s，而不是char s，因此您必须通过解码过程复制数据。但是，如果要将字符串视为ASCII字节流，则可以创建一个实现Stream的类来执行此操作。例如：

public class ReadOnlyStreamStringWrapper : Stream
{
    private readonly string theString;

    public ReadOnlyStreamStringWrapper(string theString)
    {
        this.theString = theString;
    }

    public override void Flush()
    {
        throw new NotSupportedException();
    }

    public override long Seek(long offset, SeekOrigin origin)
    {
        switch (origin)
        {
            case SeekOrigin.Begin:
                if(offset < 0 || offset >= theString.Length)
                    throw new InvalidOperationException();

                Position = offset;
                break;
            case SeekOrigin.Current:
                if ((Position + offset) < 0)
                    throw new InvalidOperationException();
                if ((Position + offset) >= theString.Length)
                    throw new InvalidOperationException();

                Position += offset;
                break;
            case SeekOrigin.End:
                if ((theString.Length + offset) < 0)
                    throw new InvalidOperationException();
                if ((theString.Length + offset) >= theString.Length)
                    throw new InvalidOperationException();
                Position = theString.Length + offset;
                break;
        }

        return Position;
    }

    public override void SetLength(long value)
    {
        throw new NotSupportedException();
    }

    public override int Read(byte[] buffer, int offset, int count)
    {
        return Encoding.ASCII.GetBytes(theString, (int)Position, count, buffer, offset);
    }

    public override void Write(byte[] buffer, int offset, int count)
    {
        throw new NotSupportedException();
    }

    public override bool CanRead
    {
        get { return true; }
    }

    public override bool CanSeek
    {
        get { return true; }
    }

    public override bool CanWrite
    {
        get { return false; }
    }

    public override long Length
    {
        get { return theString.Length; }
    }

    public override long Position { get; set; }
}

但是，这需要做很多工作来避免＆＃34;复制＆＃34;数据...

将字符串作为流读取而不复制

4 个答案: