Unicode到Mazovia编码冗余char

时间:2012-12-01 01:54:57

标签: c# unicode encoding character-encoding char

我一直在处理这个问题几个小时。我正在将包含波兰变音符号ąśółńźć等的字符串保存到文件中,但我必须使用的软件才能读取Mazovia encoding,这是一种非常古老的编码,Microsoft Encoding class不支持3}}

<。> .Net字符串由UTF-16字符组成,因此我一直在使用此代码将Unicode转换为Mazovia。

string rekord = (linia.Substring(0, linia.Length - 1)) + Environment.NewLine;
string rekordMazovia = Kodowanie.UnicodeNaMazovia(rekord);
File.AppendAllText(sciezka, rekordMazovia);
public static class Kodowanie {
  public static string UnicodeNaMazovia(string tekst) {
    return tekst
    .Replace((char)0x104, (char)0x8F) //Ą
    .Replace((char)0x106, (char)0x95) //Ć
    .Replace((char)0x118, (char)0x90) //Ę
    .Replace((char)0x141, (char)0x9C) //Ł
    .Replace((char)0x143, (char)0xA5) //Ń
    .Replace((char)0xD3, (char)0xA3) //Ó
    .Replace((char)0x15A, (char)0x98) //Ś
    .Replace((char)0x179, (char)0xA0) //Ź
    .Replace((char)0x17B, (char)0xA1) //Ż
    .Replace((char)0x105, (char)0x86) //ą
    .Replace((char)0x107, (char)0x8D) //ć
    .Replace((char)0x119, (char)0x91) //ę
    .Replace((char)0x142, (char)0x92) //ł
    .Replace((char)0x144, (char)0xA4) //ń
    .Replace((char)0xF3, (char)0xA2) //ó
    .Replace((char)0x15B, (char)0x9E) //ś
    .Replace((char)0x17A, (char)0xA6) //ź
    .Replace((char)0x17C, (char)0xA7); //ż            
  }
}

除了在应用程序中读取生成的文件之后我得到一个冗余的字符>然后每个变音符号之外,一切都会好的。它看起来像http://imgur.com/q7DZo

如何摆脱它?怎么做得更好?

1 个答案:

答案 0 :(得分:9)

Mazovia编码与代码页437类似,但在某些位置有不同的字母,所以你不能使用437。

如果您实施MazoviaEncoding,则可以轻松使用

Encoding encoding = new MazoviaEncoding();
String output = "ąśółńźć";
File.WriteAllText(@"test.txt", output, encoding);
//File.AppendAllText(@"test.txt", output, encoding);
// will work just as well, just pass the encoding as 3rd parameter

该文件将包含:

0x86 0x9E 0xA2 0x92 0xA4 0xA6 0x8D

根据http://en.wikipedia.org/wiki/Mazovia_encoding

,这是正确的

然后可以像在C#中的其他Encoding一样使用该实现。例如,回读文件也是有效的:

Encoding encoding = new MazoviaEncoding();
String result = File.ReadAllText(@"test.txt", encoding);

这是我的实施:

using System.Collections.Generic;
using System.Text;

namespace System.Text {
    class MazoviaEncoding : Encoding
    {
        private static int[] codePoints =  {
            0x0000,0x0001,0x0002,0x0003,0x0004,0x0005,0x0006,0x0007,0x0008,0x0009,0x000A,0x000B,0x000C,0x000D,0x000E,0x000F
            ,0x0010,0x0011,0x0012,0x0013,0x0014,0x0015,0x0016,0x0017,0x0018,0x0019,0x001A,0x001B,0x001C,0x001D,0x001E,0x001F
            ,0x0020,0x0021,0x0022,0x0023,0x0024,0x0025,0x0026,0x0027,0x0028,0x0029,0x002A,0x002B,0x002C,0x002D,0x002E,0x002F
            ,0x0030,0x0031,0x0032,0x0033,0x0034,0x0035,0x0036,0x0037,0x0038,0x0039,0x003A,0x003B,0x003C,0x003D,0x003E,0x003F
            ,0x0040,0x0041,0x0042,0x0043,0x0044,0x0045,0x0046,0x0047,0x0048,0x0049,0x004A,0x004B,0x004C,0x004D,0x004E,0x004F
            ,0x0050,0x0051,0x0052,0x0053,0x0054,0x0055,0x0056,0x0057,0x0058,0x0059,0x005A,0x005B,0x005C,0x005D,0x005E,0x005F
            ,0x0060,0x0061,0x0062,0x0063,0x0064,0x0065,0x0066,0x0067,0x0068,0x0069,0x006A,0x006B,0x006C,0x006D,0x006E,0x006F
            ,0x0070,0x0071,0x0072,0x0073,0x0074,0x0075,0x0076,0x0077,0x0078,0x0079,0x007A,0x007B,0x007C,0x007D,0x007E,0x007F
            ,0x00C7,0x00FC,0x00E9,0x00E2,0x00E4,0x00E0,0x0105,0x00E7,0x00EA,0x00EB,0x00E8,0x00EF,0x00EE,0x0107,0x00C4,0x0104
            ,0x0118,0x0119,0x0142,0x00F4,0x00F6,0x0106,0x00FB,0x00F9,0x015A,0x00D6,0x00DC,0x00A2,0x0141,0x00A5,0x015B,0x0192
            ,0x0179,0x017B,0x00F3,0x00D3,0x0144,0x0143,0x017A,0x017C,0x00BF,0x2310,0x00AC,0x00BD,0x00BC,0x00A1,0x00AB,0x00BB
            ,0x2591,0x2592,0x2593,0x2502,0x2524,0x2561,0x2562,0x2556,0x2555,0x2563,0x2551,0x2557,0x255D,0x255C,0x255B,0x2510
            ,0x2514,0x2534,0x252C,0x251C,0x2500,0x253C,0x255E,0x255F,0x255A,0x2554,0x2569,0x2566,0x2560,0x2550,0x256C,0x2567
            ,0x2568,0x2564,0x2565,0x2559,0x2558,0x2552,0x2553,0x256B,0x256A,0x2518,0x250C,0x2588,0x2584,0x258C,0x2590,0x2580
            ,0x03B1,0x00DF,0x0393,0x03C0,0x03A3,0x03C3,0x00B5,0x03C4,0x03A6,0x0398,0x03A9,0x03B4,0x221E,0x03C6,0x03B5,0x2229
            ,0x2261,0x00B1,0x2265,0x2264,0x2320,0x2321,0x00F7,0x2248,0x00B0,0x2219,0x00B7,0x221A,0x207F,0x00B2,0x25A0,0x00A0
        };

        private static Dictionary<char, byte> unicodeToByte;


        static MazoviaEncoding()
        {
            unicodeToByte = new Dictionary<char, byte>();

            for (int i = 0; i < codePoints.Length; ++i)
            {
                unicodeToByte.Add((char)codePoints[i], (byte)i);
            }

        }



        public override int GetMaxByteCount(int charCount)
        {
            if (charCount < 0)
            {
                throw new ArgumentOutOfRangeException();
            }
            return charCount;
        }

        public override int GetMaxCharCount(int byteCount)
        {
            if (byteCount < 0)
            {
                throw new ArgumentOutOfRangeException();
            }
            return byteCount;
        }

        public override int GetBytes(char[] chars, int charIndex, int charCount, byte[] bytes, int byteIndex)
        {
            if( chars == null || bytes == null ) {
                throw new ArgumentNullException();
            }
            if( charIndex + charCount > chars.Length ||
                charIndex < 0 ||
                byteIndex < 0 ||
                byteIndex + charCount > bytes.Length
                ) {
                throw new ArgumentOutOfRangeException();
            }

            int total = 0;
            int j = 0;
            for (int i = charIndex; i < charIndex + charCount; ++i)
            {
                char cur = chars[i];
                byte asMazovia;
                if (!unicodeToByte.TryGetValue(cur, out asMazovia))
                {

                    asMazovia = (byte)0x003F; // "?"
                }
                total++;
                bytes[j+byteIndex] = asMazovia;
                j++;
            }
            return total;
        }

        public override int GetChars( byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex )
        {
            if (chars == null || bytes == null)
            {
                throw new ArgumentNullException();
            }
            if ( byteIndex + byteCount > bytes.Length ||
                charIndex < 0 ||
                byteIndex < 0 ||
                charIndex + byteCount > chars.Length
                )
            {
                throw new ArgumentOutOfRangeException();
            }

            int total = 0;
            int j = 0;
            for (int i = byteIndex; i < byteIndex + byteCount; ++i)
            {
                byte cur = bytes[i];
                char decoded = (char)codePoints[cur];
                total++;
                chars[charIndex + j] = decoded;
                j++;

            }
            return total;
        }

        public override int GetByteCount(char[] charArray, int index, int count)
        {
            if (charArray == null)
            {
                throw new ArgumentNullException();
            }

            if (index + count <= charArray.Length && index >= 0 && count >= 0)
            {
                return count;
            }
            else
            {

                throw new ArgumentOutOfRangeException();
            }
        }

        public override int GetCharCount( byte[] bytes, int index, int count )
        {
            if (bytes == null)
            {
                throw new ArgumentNullException();
            }

            if (index < 0 || count < 0 || index + count > bytes.Length)
            {
                throw new ArgumentOutOfRangeException();
            }

            return count;
        }



    }
}