在C#中读取UTF-8文件

时间:2013-11-15 20:06:04

标签: c# file-io utf-8 binaryreader

我有这个文件,它使用UTF-8编码,我正在尝试阅读它。到目前为止,我已经使用了BinaryReader,FileStream,我尝试了File.ReadAllLines。到目前为止,我只获得了文件的第一行。

以下是我迄今为止尝试过的一些示例:

public partial class Form1 : Form
    {
        private string filename = @"C:\UNICORN\Server\Fil\Users30.mpm";
        public Form1()
        {
            InitializeComponent();
        }

        private static void clearText(RichTextBox rtb)
        {
            rtb.Text = "";
        }

        private void button1_Click(object sender, EventArgs e)
        {
            // use a binary reader
            clearText(richTextBox1);
            StringBuilder sb = new StringBuilder();

            using(BinaryReader br = new BinaryReader(File.Open(filename, FileMode.Open)))
            {
                int pos = 0;
                int length = (int) br.BaseStream.Length;    // length of the file
                byte[] bytes = new byte[length];

                for(int i = 0; i < length; i++)
                {
                    bytes[i] = br.ReadByte();
                }
                sb.Append("File Size: " + bytes.Length + "\n");
                sb.AppendLine(System.Text.ASCIIEncoding.ASCII.GetString(bytes));
            }

            richTextBox1.Text = sb.ToString();
        }

        private void button2_Click(object sender, EventArgs e)
        {
            // use a binary reader
            clearText(richTextBox1);
            StringBuilder sb = new StringBuilder();

            using(BinaryReader br = new BinaryReader(File.Open(filename, FileMode.Open)))
            {
                int pos = 0;
                int length = (int) br.BaseStream.Length;

                sb.Append("File Size: " + length + "\n");
                while (pos < length)
                {
                    var v = br.ReadInt32();
                    sb.Append((char)v + "\n");
                    pos += sizeof (int);
                }

                richTextBox1.Text = sb.ToString();
            }
        }

        private void button3_Click(object sender, EventArgs e)
        {
            // use a binary reader
            clearText(richTextBox1);
            StringBuilder sb = new StringBuilder();

            using (BinaryReader br = new BinaryReader(File.Open(filename, FileMode.Open)))
            {
                int pos = 0;
                int length = (int)br.BaseStream.Length;    // length of the file
                byte[] bytes = new byte[length];

                sb.Append("File Size: " + bytes.Length + "\n");
                for (int i = 0; i < length; i++)
                {
                    var b = br.ReadByte();
                    sb.Append("Byte: " + b + " - " + (char) b + "\n");
                }

                //sb.AppendLine(System.Text.ASCIIEncoding.ASCII.GetString(bytes));
            }

            richTextBox1.Text = sb.ToString();
        }

        private void button4_Click(object sender, EventArgs e)
        {
            // use a stream reader
            clearText(richTextBox1);
            StringBuilder sb = new StringBuilder();

            using(StreamReader sr = new StreamReader(filename, Encoding.UTF8))
            {
                sb.Append(sr.ReadLine() + "\n");
            }

            richTextBox1.Text = sb.ToString();
        }

        private void button5_Click(object sender, EventArgs e)
        {
            // use a stream reader
            clearText(richTextBox1);
            StringBuilder sb = new StringBuilder();

            using (StreamReader reader = new StreamReader(File.OpenRead(filename)))
            {
                while(!reader.EndOfStream)
                {
                    var line = reader.ReadLine();
                    if(line != null)
                    {
                        sb.AppendLine(line);
                    }
                }
            }

            richTextBox1.Text = sb.ToString();
        }

        private void button6_Click(object sender, EventArgs e)
        {
            // use a file stream and a decoder
            clearText(richTextBox1);
            StringBuilder sb = new StringBuilder();
            byte[] byData = new byte[255];
            char[] charData = new char[255];
            try
            {
                FileStream aFile = new FileStream(filename, FileMode.Open);
                aFile.Seek(55, SeekOrigin.Begin);
                aFile.Read(byData, 0, 100);
            } catch (Exception ex)
            {
                sb.Append("ERROR: " + ex.ToString());
            }

            Decoder d = Encoding.UTF8.GetDecoder();
            d.GetChars(byData, 0, byData.Length, charData, 0);

            foreach(char c in charData)
            {
                sb.Append(c + " ");
            }

            richTextBox1.Text = sb.ToString();
        }

        private void button7_Click(object sender, EventArgs e)
        {
            // find the encoding of a file, just trying to find out the encoding with this
            clearText(richTextBox1);
            StringBuilder sb = new StringBuilder();

            using(var r = new StreamReader(filename, detectEncodingFromByteOrderMarks: true))
            {
                var es = r.CurrentEncoding;
                sb.Append("Encoding: " + es);
            }

            richTextBox1.Text = sb.ToString();
        }

        private void button8_Click(object sender, EventArgs e)
        {
            // use File.ReadAllLines()
            clearText(richTextBox1);
            StringBuilder sb = new StringBuilder();

            foreach(var line in File.ReadAllLines(filename, Encoding.UTF8))
            {
                sb.Append(line.ToString() + "\n");
            }

            richTextBox1.Text = sb.ToString();
        }
    }

所有这些示例只显示该文件的一行。您将如何阅读/解析整个文件?

该文件包含用户列表,我最终尝试读取该文件以查找这些用户名。现在,它只显示文件的第一行是&#34; UNICORN 3.06&#34;。

2 个答案:

答案 0 :(得分:2)

如果您只想要文本,ReadAllText方法将整个文件读取为utf-8:

    private void button1_Click(object sender, EventArgs e)
    {
        richTextBox1.Text = File.ReadAllText(filename);
    }

如果您还需要有关文件长度的信息,请将文件作为字节读取并将其解码为utf-8:

    private void button1_Click(object sender, EventArgs e)
    {
        byte[] data = File.ReadAllBytes(filename);
        richTextBox1.Text = "File size: " + data.Length + "\r\n" + Encoding.UTF8.GetString(data);
    }

答案 1 :(得分:1)

我相信ReadByte应该在其构造函数中具有len位置 但是:

byte[] fileBytes = File.ReadAllBytes(inputFilename);
StringBuilder sb = new StringBuilder();

foreach(byte b in fileBytes)
{
    sb.Append(Convert.ToString(b, 2).PadLeft(8, '0'));  // adds 8 '0's to left of the string
}

File.WriteAllText(outputFilename, sb.ToString());