string strTurkish =“ÜST”;
如何将strTurkish的价值定为“UST”?
答案 0 :(得分:15)
var text = "ÜST";
var unaccentedText = String.Join("", text.Normalize(NormalizationForm.FormD)
.Where(c => char.GetUnicodeCategory(c) != UnicodeCategory.NonSpacingMark));
答案 1 :(得分:12)
您可以使用以下方法解决问题。其他方法不能正确转换“土耳其小写I(\ u0131)”。
public static string RemoveDiacritics(string text)
{
Encoding srcEncoding = Encoding.UTF8;
Encoding destEncoding = Encoding.GetEncoding(1252); // Latin alphabet
text = destEncoding.GetString(Encoding.Convert(srcEncoding, destEncoding, srcEncoding.GetBytes(text)));
string normalizedString = text.Normalize(NormalizationForm.FormD);
StringBuilder result = new StringBuilder();
for (int i = 0; i < normalizedString.Length; i++)
{
if (!CharUnicodeInfo.GetUnicodeCategory(normalizedString[i]).Equals(UnicodeCategory.NonSpacingMark))
{
result.Append(normalizedString[i]);
}
}
return result.ToString();
}
答案 2 :(得分:7)
我不是这方面的专家,但我认为你可以使用string.Normalize
来做,通过分解值然后有效地删除非ASCII字符:
using System;
using System.Linq;
using System.Text;
class Test
{
static void Main()
{
string text = "\u00DCST";
string normalized = text.Normalize(NormalizationForm.FormD);
string asciiOnly = new string(normalized.Where(c => c < 128).ToArray());
Console.WriteLine(asciiOnly);
}
}
但在某些情况下,这完全有可能成为可怕的事情。
答案 3 :(得分:2)
这不是一个需要通用解决方案的问题。众所周知,土耳其语字母中只有12个特殊字符必须标准化。这些是ı,İ,ö,Ö,ç,Ç,ü,Ü,ğ,Ğ,ş,Ş。您可以编写12条规则来替换它们与英语对应的规则:i,I,o,O,c,C,u,U,g,G,s,S。
答案 4 :(得分:1)
Public Function Ceng(ByVal _String As String) As String
Dim Source As String = "ığüşöçĞÜŞİÖÇ"
Dim Destination As String = "igusocGUSIOC"
For i As Integer = 0 To Source.Length - 1
_String = _String.Replace(Source(i), Destination(i))
Next
Return _String
End Function
答案 5 :(得分:0)
public string TurkishCharacterToEnglish(string text)
{
char[] turkishChars = {'ı', 'ğ', 'İ', 'Ğ', 'ç', 'Ç', 'ş', 'Ş', 'ö', 'Ö', 'ü', 'Ü'};
char[] englishChars = {'i', 'g', 'I', 'G', 'c', 'C', 's', 'S', 'o', 'O', 'u', 'U'};
// Match chars
for (int i = 0; i < turkishChars.Length; i++)
text = text.Replace(turkishChars[i], englishChars[i]);
return text;
}