给出像
这样的源文本nin2 hao3 ma
(这是编写ASCII拼音的典型方法,没有正确强调的字符) 并给出(UTF8)转换表,如
a1;ā
e1;ē
i1;ī
o1;ō
u1;ū
ü1;ǖ
A1;Ā
E1;Ē
...
如何将源文本转换为
nín hǎo ma
对于我使用PHP的价值,这可能是我正在研究的正则表达式?
答案 0 :(得分:10)
Ollie的算法是一个不错的开始,但它没有正确应用标记。例如,qiao1变成qīāō。这个是正确和完整的。您可以轻松查看如何定义替换规则。
除了删除数字之外,它对音调5也做了整个事情,虽然它不影响输出。我把它留了下来,以防你想用音调5做点什么。
该算法的工作原理如下:
示例:
qiao => (iao becomes ia*o) => qia*o => qiǎo
此策略以及使用strtr
(优先考虑更长时间的替换)确保不会发生这种情况:
qiao1 =>巧
function pinyin_addaccents($string) {
# Find words with a number behind them, and replace with callback fn.
return preg_replace_callback(
'~([a-zA-ZüÜ]+)(\d)~',
'pinyin_addaccents_cb',
$string);
}
# Helper callback
function pinyin_addaccents_cb($match) {
static $accentmap = null;
if( $accentmap === null ) {
# Where to place the accent marks
$stars =
'a* e* i* o* u* ü* '.
'A* E* I* O* U* Ü* '.
'a*i a*o e*i ia* ia*o ie* io* iu* '.
'A*I A*O E*I IA* IA*O IE* IO* IU* '.
'o*u ua* ua*i ue* ui* uo* üe* '.
'O*U UA* UA*I UE* UI* UO* ÜE*';
$nostars = str_replace('*', '', $stars);
# Build an array like Array('a' => 'a*') and store statically
$accentmap = array_combine(explode(' ',$nostars), explode(' ', $stars));
unset($stars, $nostars);
}
static $vowels =
Array('a*','e*','i*','o*','u*','ü*','A*','E*','I*','O*','U*','Ü*');
static $pinyin = Array(
1 => Array('ā','ē','ī','ō','ū','ǖ','Ā','Ē','Ī','Ō','Ū','Ǖ'),
2 => Array('á','é','í','ó','ú','ǘ','Á','É','Í','Ó','Ú','Ǘ'),
3 => Array('ǎ','ě','ǐ','ǒ','ǔ','ǚ','Ǎ','Ě','Ǐ','Ǒ','Ǔ','Ǚ'),
4 => Array('à','è','ì','ò','ù','ǜ','À','È','Ì','Ò','Ù','Ǜ'),
5 => Array('a','e','i','o','u','ü','A','E','I','O','U','Ü')
);
list(,$word,$tone) = $match;
# Add star to vowelcluster
$word = strtr($word, $accentmap);
# Replace starred letter with accented
$word = str_replace($vowels, $pinyin[$tone], $word);
return $word;
}
答案 1 :(得分:1)
<?php
$in = 'nin2 hao3 ma';
$out = 'nín hǎo ma';
function replacer($match) {
static $trTable = array(
1 => array(
'a' => 'ā',
'e' => 'ē',
'i' => 'ī',
'o' => 'ō',
'u' => 'ū',
'ü' => 'ǖ',
'A' => 'Ā',
'E' => 'Ē'),
2 => array('i' => 'í'),
3 => array('a' => 'ǎ')
);
list(, $word, $i) = $match;
return str_replace(
array_keys($trTable[$i]),
array_values($trTable[$i]),
$word); }
// Outputs: bool(true)
var_dump(preg_replace_callback('~(\w+)(\d+)~', 'replacer', $in) === $out);
答案 2 :(得分:1)
对于.NET解决方案,请尝试Pinyin4j.NET
特性 将中文(简体和繁体)转换为最流行的拼音系统。下面列出了支持拼音系统。
答案 3 :(得分:1)
添加javascript解决方案:
此代码根据官方算法放置Tonemarks, 见wikipedia。
希望能帮助你们中的一些人,建议和改进!
var ACCENTED = {
'1': {'a': '\u0101', 'e': '\u0113', 'i': '\u012B', 'o': '\u014D', 'u': '\u016B', 'ü': '\u01D6'},
'2': {'a': '\u00E1', 'e': '\u00E9', 'i': '\u00ED', 'o': '\u00F3', 'u': '\u00FA', 'ü': '\u01D8'},
'3': {'a': '\u01CE', 'e': '\u011B', 'i': '\u01D0', 'o': '\u01D2', 'u': '\u01D4', 'ü': '\u01DA'},
'4': {'a': '\u00E0', 'e': '\u00E8', 'i': '\u00EC', 'o': '\u00F2', 'u': '\u00F9', 'ü': '\u01DC'},
'5': {'a': 'a', 'e': 'e', 'i': 'i', 'o': 'o', 'u': 'u', 'ü': 'ü'}
};
function getPos (token) {
if (token.length === 1){
// only one letter, nothing to differentiate
return 0;
}
var precedence = ['a', 'e', 'o'];
for (i=0; i<precedence.length; i += 1){
var pos = token.indexOf(precedence[i]);
// checking a before o, will take care of ao automatically
if (pos >= 0){
return pos;
}
}
var u = token.indexOf('u');
var i = token.indexOf('i');
if (i < u){
// -iu OR u-only case, accent goes to u
return u;
} else {
// -ui OR i-only case, accent goes to i
return i;
}
// the only vowel left is ü
var ü = token.indexOf('ü');
if (ü >= 0){
return ü;
}
}
//and finally:
// we asume the input to be valid PinYin, therefore no security checks....
function placeTone(numbered_PinYin){
var ToneIndex = numbered_PinYin.charAt(numbered_PinYin.length -1);
var accentpos = getPos(numbered_PinYin);
var accented_Char = ACCENTED[ToneIndex][numbered_PinYin.charAt(accentpos)];
var accented_PinYin = "";
if (accentpos === 0){
// minus one to trimm the number off
accented_PinYin = accented_Char + numbered_PinYin.substr(1, numbered_PinYin.length-1);
} else {
var before = numbered_PinYin.substr(0, accentpos);
var after = numbered_PinYin.substring(accentpos+1, numbered_PinYin.length-1);
accented_PinYin = before + accented_Char + after;
}
return accented_PinYin;
}
console.log(placeTone('han4 zi4'));
答案 4 :(得分:0)
VB Macro(Libre)Office:Convert pinyin tone numbers to accents
希望算法对于我和你的拼音规则是正确的。
sub replaceNumberByTones
call PinyinTonesNumber("a([a-z]*[a-z]*)0", "a$1")
call PinyinTonesNumber("a([a-z]*[a-z]*)1", "a$1")
call PinyinTonesNumber("a([a-z]*[a-z]*)2", "á$1")
call PinyinTonesNumber("a([a-z]*[a-z]*)3", "a$1")
call PinyinTonesNumber("a([a-z]*[a-z]*)4", "à$1")
call PinyinTonesNumber("o([a-z]*[a-z]*)0", "o$1")
call PinyinTonesNumber("o([a-z]*[a-z]*)1", "o$1")
call PinyinTonesNumber("o([a-z]*[a-z]*)2", "ó$1")
call PinyinTonesNumber("o([a-z]*[a-z]*)3", "o$1")
call PinyinTonesNumber("o([a-z]*[a-z]*)4", "ò$1")
call PinyinTonesNumber("e([a-z]*[a-z]*)0", "e$1")
call PinyinTonesNumber("e([a-z]*[a-z]*)1", "e$1")
call PinyinTonesNumber("e([a-z]*[a-z]*)2", "é$1")
call PinyinTonesNumber("e([a-z]*[a-z]*)3", "e$1")
call PinyinTonesNumber("e([a-z]*[a-z]*)4", "è$1")
call PinyinTonesNumber("u([a-hj-z]*[a-hj-z]*)0", "u$1")
call PinyinTonesNumber("u([a-hj-z]*[a-hj-z]*)1", "u$1")
call PinyinTonesNumber("u([a-hj-z]*[a-hj-z]*)2", "ú$1")
call PinyinTonesNumber("u([a-hj-z]*[a-hj-z]*)3", "u$1")
call PinyinTonesNumber("u([a-hj-z]*[a-hj-z]*)4", "ù$1")
call PinyinTonesNumber("i([a-z]*[a-z]*)0", "i$1")
call PinyinTonesNumber("i([a-z]*[a-z]*)1", "i$1")
call PinyinTonesNumber("i([a-z]*[a-z]*)2", "í$1")
call PinyinTonesNumber("i([a-z]*[a-z]*)3", "i$1")
call PinyinTonesNumber("i([a-z]*[a-z]*)4", "ì$1")
End sub
sub PinyinTonesNumber(expression, replacement)
rem ----------------------------------------------------------------------
rem define variables
dim document as object
dim dispatcher as object
rem ----------------------------------------------------------------------
rem get access to the document
document = ThisComponent.CurrentController.Frame
dispatcher = createUnoService("com.sun.star.frame.DispatchHelper")
rem ----------------------------------------------------------------------
dim args1(18) as new com.sun.star.beans.PropertyValue
args1(0).Name = "SearchItem.StyleFamily"
args1(0).Value = 2
args1(1).Name = "SearchItem.CellType"
args1(1).Value = 0
args1(2).Name = "SearchItem.RowDirection"
args1(2).Value = true
args1(3).Name = "SearchItem.AllTables"
args1(3).Value = false
args1(4).Name = "SearchItem.Backward"
args1(4).Value = false
args1(5).Name = "SearchItem.Pattern"
args1(5).Value = false
args1(6).Name = "SearchItem.Content"
args1(6).Value = false
args1(7).Name = "SearchItem.AsianOptions"
args1(7).Value = false
args1(8).Name = "SearchItem.AlgorithmType"
args1(8).Value = 1
args1(9).Name = "SearchItem.SearchFlags"
args1(9).Value = 65536
args1(10).Name = "SearchItem.SearchString"
args1(10).Value = expression
args1(11).Name = "SearchItem.ReplaceString"
args1(11).Value = replacement
args1(12).Name = "SearchItem.Locale"
args1(12).Value = 255
args1(13).Name = "SearchItem.ChangedChars"
args1(13).Value = 2
args1(14).Name = "SearchItem.DeletedChars"
args1(14).Value = 2
args1(15).Name = "SearchItem.InsertedChars"
args1(15).Value = 2
args1(16).Name = "SearchItem.TransliterateFlags"
args1(16).Value = 1280
args1(17).Name = "SearchItem.Command"
args1(17).Value = 3
args1(18).Name = "Quiet"
args1(18).Value = true
dispatcher.executeDispatch(document, ".uno:ExecuteSearch", "", 0, args1())
end sub
希望这有助于某人
弗朗索瓦