我使用abcpdf.net通过read(pdfpath)
和gettext()
函数转换阿拉伯语pdf,结果文本(字符串)看起来像是不可读的,因为阿拉伯语是RTL语言。我的问题是现在我必须反转字符串的阿拉伯语部分以使其可读但我不知道我该怎么做。如何仅提取阿拉伯语部分然后将其反转?
我正在使用C#,这是使用abcpdf .net库时我的PDF中提取的字符串的示例:
0.00
KCCUSER1
6:17:19PM28/10/2010ةعابطلا خيرات
(200) لوادتملا زكارمو تاكرح
ةصاقملل ةيتيوكلا ةكرشلا
28/10/2010
RBKPI012
لمعلا خيرات
عمجم/ ح - 88لجلا عيبلل افيا ةيلودلا ةيلاملا تاراشتسلا ةكرش - 65646
C023
يحاتتفلا ديصرلا
答案 0 :(得分:1)
private string Convert(string source)
{
string arabicWord = string.Empty;
StringBuilder sbDestination = new StringBuilder();
foreach (var ch in source)
{
if (IsArabic(ch))
arabicWord += ch;
else
{
if (arabicWord != string.Empty)
sbDestination.Append(Reverse(arabicWord));
sbDestination.Append(ch);
arabicWord = string.Empty;
}
}
// if the last word was arabic
if (arabicWord != string.Empty)
sbDestination.Append(Reverse(arabicWord));
return sbDestination.ToString();
}
来自here
的IsArabic方法private bool IsArabic(char character)
{
if (character >= 0x600 && character <= 0x6ff)
return true;
if (character >= 0x750 && character <= 0x77f)
return true;
if (character >= 0xfb50 && character <= 0xfc3f)
return true;
if (character >= 0xfe70 && character <= 0xfefc)
return true;
return false;
}
// Reverse the characters of string
string Reverse(string source)
{
return new string(source.ToCharArray().Reverse().ToArray());
}
祝你好运!