Windows-1252通过byte []编码

时间:2014-05-01 13:47:53

标签: c# string winapi encoding utf-8

我正在使用Setup ...方法(例如SetupGetLineText)来读取inf文件中的一些内容(我需要这个,对通用的ini解析器不感兴趣)。这些方法使用Windows-1252编码,我需要将其转换为Unicode。我使用了一个字符串,比如这个(输入是字符串类型):

  

Encoding.UTF8.GetString(Encoding.GetEncoding(1252).GetBytes(输入));

即使这样可以正常工作,您也可以立即从SetupGetLineText方法(和其他方法)中检索字节。但是,我现在还不完全确定如何转换字节,因为它们与Encoding.GetEncoding(1252)返回的不同。为了使这一点更清楚,我上传了screenshot当前情况。如您所见,大多数字符匹配(忽略0),但有几种情况存在差异。例如,[4]和[5]是26和32,而字符串变体只有130列出。我如何从26和32到130?或者更好,我怎么能直接从字节数组转到UTF-8字符串?

一些代码:

public static readonly IntPtr INVALID_HANDLE = new IntPtr(-1);

public const int INF_STYLE_OLDNT = 0x00000001;

public const int INF_STYLE_WIN4 = 0x00000002;

[StructLayout(LayoutKind.Sequential)]
public struct InfContext
{
    IntPtr Inf;
    IntPtr CurrentInf;
    uint Section;
    uint Line;
}

[DllImport("setupapi.dll", CharSet = CharSet.Unicode, SetLastError = true)]
public static extern bool SetupGetLineText([MarshalAs(UnmanagedType.Struct)] ref InfContext context, IntPtr infHandle, string section, string key, string returnBuffer, int returnBufferSize, out int requiredSize);

[DllImport("setupapi.dll", CharSet = CharSet.Unicode, SetLastError = true)]
public static extern IntPtr SetupOpenInfFile([MarshalAs(UnmanagedType.LPWStr)] string fileName, [MarshalAs(UnmanagedType.LPWStr)] string infClass, Int32 infStyle, out uint errorLine);

[DllImport("setupapi.dll", CharSet = CharSet.Unicode, SetLastError = true)]
public static extern bool SetupEnumInfSections(IntPtr infHandle, uint index, string returnBuffer, int returnBufferSize, out int requiredSize);

 [DllImport("setupapi.dll", CharSet = CharSet.Unicode, SetLastError = true)]
 public static extern bool SetupFindFirstLine(IntPtr infHandle, string section, string key, [MarshalAs(UnmanagedType.Struct)]ref InfContext context);

    [DllImport("setupapi.dll", CharSet = CharSet.Unicode, SetLastError = true)]
    public static extern bool SetupFindNextLine([MarshalAs(UnmanagedType.Struct)] ref InfContext contextIn, [MarshalAs(UnmanagedType.Struct)] ref InfContext contextOut);

    [DllImport("setupapi.dll", CharSet = CharSet.Unicode, SetLastError = true)]
    public static extern bool SetupFindNextMatchLine([MarshalAs(UnmanagedType.Struct)] ref InfContext contextIn, string key, [MarshalAs(UnmanagedType.Struct)] ref InfContext contextOut);


// InfFile class

public InfFile(string path)
{
    _file = path;
}

public bool Open()
{
    uint errorLineNumber;
    _handle = NativeMethodsInf.SetupOpenInfFile(_file, null, INF_STYLE_OLDNT | INF_STYLE_WIN4, out errorLineNumber);

    return _handle != INVALID_HANDLE;
}

    public string EnumSection(uint index)
    {
        int requiredSize;
        string result = String.Empty.PadLeft(75-1);

        bool success = SetupEnumInfSections(_handle, index, result, 75, out requiredSize);
        if (requiredSize > 75)
        {
            result = result.PadLeft(requiredSize - 1);
            success = SetupEnumInfSections(_handle, index, result, requiredSize, out requiredSize);
        }

        return !success ? null : result.Substring(0, requiredSize - 1); // Still needs to be converted to proper encoding.
    }

    public InfLine FindFirstLine(string section)
    {
        return FindFirstKey(section, null);
    }

    public InfLine FindFirstKey(string section, string key)
    {
        InfContext infContext = new InfContext();

        return !SetupFindFirstLine(_handle, section, key, ref infContext) ? null : new InfLine(infContext);
    }

// InfLine class

    public bool FindNextLine()
    {
        return SetupFindNextLine(ref _context, ref _context);
    }

    public bool FindNextMatchLine(string key)
    {
        return SetupFindNextMatchLine(ref _context, key, ref _context);
    }

    public string GetCompleteValue()
    {
        int requiredSize;
        string result = String.Empty.PadLeft(250-1);

        bool success = SetupGetLineText(ref _context, IntPtr.Zero, null, null, result, 250, out requiredSize);
        if (requiredSize > 250)
        {
            result = result.PadLeft(requiredSize - 1);
            success = SetupGetLineText(ref _context, IntPtr.Zero, null, null, result, requiredSize, out requiredSize);
        }

        return !success ? null : result.Substring(0, requiredSize - 1);
    }

// And then use with something like:
using (InfFile file = new InfFile(@"..\..\..\test.inf"))
        {
            if (file.Open())
            {
                uint currentSection = 0;
                string section;
                while ((section = file.EnumSection(currentSection++)) != null)
                {
                    Console.WriteLine("Section: " + section);
                    var x = file.FindFirstKey(section, null);
                    if (x != null)
                        while (true)
                        {
                            string key = x.GetFieldValue(0);
                            string value = x.GetCompleteValue();
                            Console.WriteLine("Key: " + key + " || Value: " + value);
                            if (!x.FindNextLine())
                                break;
                        }
                }
            }
         }

示例inf:

; German Specific 
[Strings.0007] ; German
Provider="Hewlett-Packard"
Mfg="Hewlett-Packard"
CD="hp cd"

BUTTON_SCAN="Taste "Scannen" gedrückt"
LAUNCH_APPLICATION_SCAN="HP Scansoftware"

; Japanese Specific 
[Strings.0411] ; Japanese
Provider="Hewlett-Packard"
Mfg="Hewlett-Packard"
CD="hp cd"

BUTTON_SCAN="[スキャン] ボタンを押す"
LAUNCH_APPLICATION_SCAN="hp スキャニング ソフトウェア"

我需要使用以下方法转换部分,键和值:

public static string ConvertToUTF8(string input)
    {
        try
        {
            return Encoding.UTF8.GetString(Encoding.GetEncoding(1252).GetBytes(input)).Trim().Trim('\0');
        }
        catch
        {
            return input;
        }
    }

要获得正确的值,否则您将看到它们不是原始字符。

例如: 品尝“Scannen”gedrückt品味Scannengedrückt

不先调用ConvertToUTF8。

1 个答案:

答案 0 :(得分:1)

您目前正在将字符串转换为Windows-1252,然后通过将这些字节解释为UTF-8将其返回转换为字符串。

工作正常 - 基本上已经坏了。

如果您已经有一个字符串,它不在Windows-1252中......它在内部以UTF-16表示,但您可以将其视为字符序列。如果您实际以字节数组开头,那么您应该使用Encoding.GetEncoding(1252).GetString(bytes)将该字节数组转换为字符串。

(如果您可以使用SetupGetLineTextW,则可以完全避免所有这些ANSI业务。)