我在MSSQL数据库中有一堆类似西里尔文的文本,需要在C#中将其转换为Cyrillic。
所以...Ðàáîòàâãðìàíèè
应该成为
Работавгермании
有什么建议吗?
我应该补充说,我最接近的是?aaioa a aa?iaiee
这是我正在使用的代码:
str = Encoding.UTF8.GetString(Encoding.GetEncoding("Windows-1251").GetBytes(drCurrent["myfield"].ToString()));
str = Encoding.GetEncoding(1251).GetString(Encoding.Convert(Encoding.UTF8, Encoding.GetEncoding(1251), Encoding.UTF8.GetBytes(str)));
答案 0 :(得分:16)
// To find out source and target
const string source = "Ðàáîòà â ãåðìàíèè";
const string destination = "Работа в германии";
foreach (var sourceEncoding in Encoding.GetEncodings())
{
var bytes = sourceEncoding.GetEncoding().GetBytes(source);
foreach (var targetEncoding in Encoding.GetEncodings())
{
if (targetEncoding.GetEncoding().GetString(bytes) == destination)
{
Console.WriteLine("Source Encoding: {0} TargetEncoding: {1}",sourceEncoding.CodePage,targetEncoding.CodePage);
}
}
}
// Result1: Source Encoding: 1252 TargetEncoding: 1251
// Result2: Source Encoding: 28591 TargetEncoding: 1251
// Result3: Source Encoding: 28605 TargetEncoding: 1251
// The code for you to use
var decodedCyrillic = Encoding.GetEncoding(1251).GetString(Encoding.GetEncoding(1252).GetBytes(source));
// Result: Работа в германии
答案 1 :(得分:4)
ADO.Net将SQL Server提供程序中的所有字符串类型公开为C#字符串,这意味着它们已经转换为Unicode。对于非unicode源列(显然如此),如char(n)
或varchar(n)
,ADO.Net SQL Server提供程序使用源collation信息来确定编码。因此,如果您的非unicode SQL Server数据在.Net中以错误的编码表示,则必须使用错误的排序规则将其呈现给提供程序。为您的数据选择适当的排序规则,SQL Server的ADO.Net提供程序将使用适当的编码对其进行翻译。例如,如Collation and Code Page Architecture中所述,西里尔字母校对将导致code page 1251,这很可能是您想要的。链接的文章包含解决问题所需的所有信息。
using System;
using System.Text;
using System.Data.SqlClient;
using System.Windows.Forms;
public class Hello1
{
public static void Main()
{
try
{
using (SqlConnection conn = new SqlConnection("server=.;integrated security=true"))
{
conn.Open ();
// The .cs file must be saved as Unicode, obviously...
//
string s = "Работа в германии";
byte[] b = Encoding.GetEncoding(1251).GetBytes (s);
// Create a test table
//
SqlCommand cmd = new SqlCommand (
@"create table #t (
c1 varchar(100) collate Latin1_General_CI_AS,
c2 varchar(100) collate Cyrillic_General_CI_AS)",
conn);
cmd.ExecuteNonQuery ();
// Insert the same value twice, the original Unicode string
// encoded as CP1251
//
cmd = new SqlCommand (
@"insert into #t (c1, c2) values (@b, @b)", conn);
cmd.Parameters.AddWithValue("@b", b);
cmd.ExecuteNonQuery ();
// Read the value as Latin collation
//
cmd = new SqlCommand (
@"select c1 from #t", conn);
string def = (string) cmd.ExecuteScalar ();
// Read the same value as Cyrillic collation
//
cmd = new SqlCommand (
@"select c2 from #t", conn);
string cyr = (string) cmd.ExecuteScalar ();
// Cannot use Console.Write since the console is not Unicode
//
MessageBox.Show(String.Format(
@"Original: {0} Default collation: {1} Cyrillic collation: {2}",
s, def, cyr));
}
}
catch(Exception e)
{
Console.WriteLine (e);
}
}
}
结果是:
---------------------------
---------------------------
Original: Работа в германии Default collation: Ðàáîòà â ãåðìàíèè Cyrillic collation: Работа в германии
---------------------------
OK
---------------------------