使用此代码进行unicode的Javascript ascii

时间:2010-10-18 04:09:57

标签: javascript unicode ascii

我正在尝试将以本地ascii非英语字体编写的文本转换为标准unicode。问题是我们必须使用映射文件来映射unicode char的每个char。幸运的是,我在firefox插件中找到了一个准备好的开源代码片段。

这是更大的应用程序的一部分,我不明白我如何独立使用它。此代码执行的功能实际上是将ascii字体中的文本转换为标准unicode。

这是代码:

/* ***** BEGIN LICENSE BLOCK ***** 
 *  This file is part of Padma.
 *
 *  Padma is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.

 *  Padma is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with Padma; if not, write to the Free Software
 *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 *
 * ***** END LICENSE BLOCK ***** */

//Panchami Malayalam

function Panchami()
{
}

//The interface every dynamic font encoding should implement
Panchami.maxLookupLen = 2;
Panchami.fontFace     = "Panchami";
Panchami.displayName  = "Panchami";
Panchami.script       = Padma.script_MALAYALAM;

Panchami.lookup = function (str) 
{
    return Panchami.toPadma[str];
}

Panchami.isPrefixSymbol = function (str)
{
    return Panchami.prefixList[str] != null;
}

Panchami.isOverloaded = function (str)
{
    return Panchami.overloadList[str] != null;
}

Panchami.handleTwoPartVowelSigns = function (sign1, sign2)
{
    if ((sign1 == Padma.vowelsn_E && sign2 == Padma.vowelsn_AA) ||
        (sign1 == Padma.vowelsn_AA && sign2 == Padma.vowelsn_E))
        return Padma.vowelsn_O;
    if ((sign1 == Padma.vowelsn_EE && sign2 == Padma.vowelsn_AA) ||
        (sign1 == Padma.vowelsn_AA && sign2 == Padma.vowelsn_EE))
        return Padma.vowelsn_OO;
    return sign1 + sign2;
}

Panchami.isRedundant = function (str)
{
    return Panchami.redundantList[str] != null;
}

//Implementation details start here

//Specials
Panchami.visarga        = "\u00AF";
Panchami.anusvara       = "\u00AC";
Panchami.virama         = "\u00AE"; //Chandrakkala

//Vowels
Panchami.vowel_A        = "\u00C5"; 
Panchami.vowel_AA       = "\u00BF"; 
Panchami.vowel_I        = "\u201A"; 
Panchami.vowel_II       = "\u201A\u00AA";
Panchami.vowel_U        = "\u0192";
Panchami.vowel_UU       = "\u0192\u00AA";
Panchami.vowel_R        = "\u201E";
Panchami.vowel_RR       = "\u201E\u00AA";
Panchami.vowel_E        = "\u2026";
Panchami.vowel_EE       = "\u2020";               
Panchami.vowel_AI       = "\u00A5\u2026";
Panchami.vowel_O        = "\u2021";
Panchami.vowel_OO       = "\u2021\u00A1";
Panchami.vowel_AU       = "\u2021\u00AA";

//Consonants
Panchami.consnt_KA      = "\u02C6";
Panchami.consnt_KHA     = "\u2030";
Panchami.consnt_GA      = "\u0160";
Panchami.consnt_GHA     = "\u2039";
Panchami.consnt_NGA     = "\u0152";

Panchami.consnt_CA      = "\u00DE";
Panchami.consnt_CHA     = "\u00D3"; 
Panchami.consnt_JA      = "\u00D1"; 
Panchami.consnt_JHA     = "\u00CB"; 
Panchami.consnt_NYA     = "\u00C7";

Panchami.consnt_TTA     = "\u00D0";
Panchami.consnt_TTHA    = "\u007E";
Panchami.consnt_DDA     = "\u00B0";
Panchami.consnt_DDHA    = "\u0153";
Panchami.consnt_NNA     = "\u00C3";

Panchami.consnt_TA      = "\u00BB";
Panchami.consnt_THA     = "\u02DC";
Panchami.consnt_DA      = "\u2122";
Panchami.consnt_DHA     = "\u0161";
Panchami.consnt_NA      = "\u203A";

Panchami.consnt_PA      = "\u00B2";
Panchami.consnt_PHA     = "\u00BA";
Panchami.consnt_BA      = "\u00DD";
Panchami.consnt_BHA     = "\u0178";
Panchami.consnt_MA      = "\u00C4";

Panchami.consnt_YA      = "\u00F0";
Panchami.consnt_RA      = "\u00F1";
Panchami.consnt_LA      = "\u00F2";
Panchami.consnt_VA      = "\u00F3";
Panchami.consnt_SHA     = "\u00F4";
Panchami.consnt_SSA     = "\u00F5";
Panchami.consnt_SA      = "\u00F6";

Panchami.consnt_HA      = "\u00F7";
Panchami.consnt_LLA     = "\u00F8";
Panchami.consnt_ZHA     = "\u00F9";
Panchami.consnt_RRA     = "\u00FA";

//Gunintamulu
Panchami.vowelsn_AA     = "\u00A1";
Panchami.vowelsn_I      = "\u00A2";
Panchami.vowelsn_II     = "\u00A3";
Panchami.vowelsn_U      = "\u00A9";
Panchami.vowelsn_UU     = "\u00A8";
Panchami.vowelsn_R      = "\u00A6";
Panchami.vowelsn_RR     = "\u00A6\u00AA";
Panchami.vowelsn_E      = "\u00A5";
Panchami.vowelsn_EE     = "\u00A4";
Panchami.vowelsn_AI     = "\u00A5\u00A5";
//vowelsigns o and O have two separate glyphs, one on left and one on right.
Panchami.vowelsn_AU     = "\u00AA";

//Chillu (5)
Panchami.chillu_ENN     = "\u00FF";
Panchami.chillu_IN      = "\u00FC";
Panchami.chillu_IR      = "\u00FB";
Panchami.chillu_IL      = "\u00FD";
Panchami.chillu_ILL     = "\u00FE";

//vattulu (consonant signs)
Panchami.vattu_GA       = "\u00EA";
Panchami.vattu_TTA      = "\u00E9";
Panchami.vattu_DDA      = "\u00EC";
Panchami.vattu_NNA      = "\u00E5";
Panchami.vattu_TA       = "\u00ED";
Panchami.vattu_DA       = "\u00EF";
Panchami.vattu_DHA      = "\u00E8";
Panchami.vattu_NA       = "\u00EE";
Panchami.vattu_PA       = "\u00E7";
Panchami.vattu_MA       = "\u00E4";
Panchami.vattu_YA       = "\u00AB";
Panchami.vattu_RA       = "\u00B1";
Panchami.vattu_LA       = "\u00EB";
Panchami.vattu_VA       = "\u00A7";
Panchami.vattu_SA       = "\u00E6";

//kooTTaksharangngaL
Panchami.conj_KK        = "\u00B4";
Panchami.conj_KSH       = "\u00BC";
Panchami.conj_NGK       = "\u00C6";  
Panchami.conj_NGNG      = "\u00B9";

Panchami.conj_CC        = "\u00C1";
Panchami.conj_CCH       = "\u00D4";
Panchami.conj_JJ        = "\u00D2";
Panchami.conj_NYC       = "\u00D5";
Panchami.conj_NYNY      = "\u00C8";

Panchami.conj_TTTT      = "\u00B8";
Panchami.conj_NNTT      = "\u00D9";
Panchami.conj_NNDD      = "\u00DF";

Panchami.conj_T_T       = "\u00B7";
Panchami.conj_T_TH      = "\u00CE";
Panchami.conj_TBH       = "\u00CF";
Panchami.conj_TS        = "\u00CC";
Panchami.conj_DD        = "\u00C0";
Panchami.conj_D_DH      = "\u00DA";
Panchami.conj_NT        = "\u00C2";
Panchami.conj_ND        = "\u00B3";
Panchami.conj_NDH       = "\u00DC";
Panchami.conj_N_N       = "\u00C9";
Panchami.conj_NM        = "\u00B5";
Panchami.conj_NRR_1     = "\u00CA"; 
Panchami.conj_NRR_2     = "\u00FC\u00FA"; 

Panchami.conj_PP        = "\u00B6";
Panchami.conj_BB        = "\u00E2";
Panchami.conj_MP        = "\u00D8";
Panchami.conj_MM        = "\u00BD";

Panchami.conj_YY        = "\u00E0";
Panchami.conj_L_L       = "\u00F2\u00EB";
Panchami.conj_VV        = "\u00E1";

Panchami.conj_SHC       = "\u00D6";
Panchami.conj_SHSH      = "\u00DB";
Panchami.conj_SRR       = "\u00CD";

Panchami.conj_HN        = "\u00DB";
Panchami.conj_LLLL      = "\u00BE";

Panchami.conj_RRRR      = "\u00D7"; //ta as in steel

//Digits
Panchami.digit_ZERO     = "\u0030";
Panchami.digit_ONE      = "\u0031";
Panchami.digit_TWO      = "\u0032";
Panchami.digit_THREE    = "\u0033";
Panchami.digit_FOUR     = "\u0034";
Panchami.digit_FIVE     = "\u0035";
Panchami.digit_SIX      = "\u0036";
Panchami.digit_SEVEN    = "\u0037";
Panchami.digit_EIGHT    = "\u0038";
Panchami.digit_NINE     = "\u0039";

//Matches ASCII from 00-0x7D
//Does not match ASCII
Panchami.extra_QTSINGLE_1 = "\u2018";
Panchami.extra_QTSINGLE_2 = "\u2019";
Panchami.extra_QTDOUBLE_1 = "\u201C";
Panchami.extra_QTDOUBLE_2 = "\u201D";
Panchami.extra_HYPHEN     = "\u00AD";

//Dont need
Panchami.misc_UNKNOWN_1  = "\u002D";

Panchami.toPadma = new Object();

Panchami.toPadma[Panchami.anusvara] = Padma.anusvara;
Panchami.toPadma[Panchami.visarga]  = Padma.visarga;
Panchami.toPadma[Panchami.virama]   = Padma.chandrakkala;

Panchami.toPadma[Panchami.vowel_A]  = Padma.vowel_A;
Panchami.toPadma[Panchami.vowel_AA] = Padma.vowel_AA;
Panchami.toPadma[Panchami.vowel_I]  = Padma.vowel_I;
Panchami.toPadma[Panchami.vowel_II] = Padma.vowel_II;
Panchami.toPadma[Panchami.vowel_U]  = Padma.vowel_U;
Panchami.toPadma[Panchami.vowel_UU] = Padma.vowel_UU;
Panchami.toPadma[Panchami.vowel_R]  = Padma.vowel_R;
Panchami.toPadma[Panchami.vowel_RR] = Padma.vowel_RR;
Panchami.toPadma[Panchami.vowel_E]  = Padma.vowel_E;
Panchami.toPadma[Panchami.vowel_EE] = Padma.vowel_EE;
Panchami.toPadma[Panchami.vowel_AI] = Padma.vowel_AI;
Panchami.toPadma[Panchami.vowel_O]  = Padma.vowel_O;
Panchami.toPadma[Panchami.vowel_OO] = Padma.vowel_OO;
Panchami.toPadma[Panchami.vowel_AU] = Padma.vowel_AU;

Panchami.toPadma[Panchami.consnt_KA]  = Padma.consnt_KA;
Panchami.toPadma[Panchami.consnt_KHA] = Padma.consnt_KHA;
Panchami.toPadma[Panchami.consnt_GA]  = Padma.consnt_GA;
Panchami.toPadma[Panchami.consnt_GHA] = Padma.consnt_GHA;
Panchami.toPadma[Panchami.consnt_NGA] = Padma.consnt_NGA;

Panchami.toPadma[Panchami.consnt_CA]  = Padma.consnt_CA;
Panchami.toPadma[Panchami.consnt_CHA] = Padma.consnt_CHA;
Panchami.toPadma[Panchami.consnt_JA]  = Padma.consnt_JA;
Panchami.toPadma[Panchami.consnt_JHA] = Padma.consnt_JHA;
Panchami.toPadma[Panchami.consnt_NYA] = Padma.consnt_NYA;

Panchami.toPadma[Panchami.consnt_TTA]  = Padma.consnt_TTA;
Panchami.toPadma[Panchami.consnt_TTHA] = Padma.consnt_TTHA;
Panchami.toPadma[Panchami.consnt_DDA]  = Padma.consnt_DDA;
Panchami.toPadma[Panchami.consnt_DDHA] = Padma.consnt_DDHA;
Panchami.toPadma[Panchami.consnt_NNA]  = Padma.consnt_NNA;

Panchami.toPadma[Panchami.consnt_TA]  = Padma.consnt_TA;
Panchami.toPadma[Panchami.consnt_THA] = Padma.consnt_THA;
Panchami.toPadma[Panchami.consnt_DA]  = Padma.consnt_DA;
Panchami.toPadma[Panchami.consnt_DHA] = Padma.consnt_DHA;
Panchami.toPadma[Panchami.consnt_NA]  = Padma.consnt_NA;

Panchami.toPadma[Panchami.consnt_PA]  = Padma.consnt_PA;
Panchami.toPadma[Panchami.consnt_PHA] = Padma.consnt_PHA;
Panchami.toPadma[Panchami.consnt_BA]  = Padma.consnt_BA;
Panchami.toPadma[Panchami.consnt_BHA] = Padma.consnt_BHA;
Panchami.toPadma[Panchami.consnt_MA]  = Padma.consnt_MA;

Panchami.toPadma[Panchami.consnt_YA]  = Padma.consnt_YA;
Panchami.toPadma[Panchami.consnt_RA]  = Padma.consnt_RA;
Panchami.toPadma[Panchami.consnt_LA]  = Padma.consnt_LA;
Panchami.toPadma[Panchami.consnt_VA]  = Padma.consnt_VA;
Panchami.toPadma[Panchami.consnt_SHA] = Padma.consnt_SHA;
Panchami.toPadma[Panchami.consnt_SSA] = Padma.consnt_SSA;
Panchami.toPadma[Panchami.consnt_SA]  = Padma.consnt_SA;

Panchami.toPadma[Panchami.consnt_HA] = Padma.consnt_HA;
Panchami.toPadma[Panchami.consnt_LLA] = Padma.consnt_LLA;
Panchami.toPadma[Panchami.consnt_ZHA] = Padma.consnt_ZHA;
Panchami.toPadma[Panchami.consnt_RRA] = Padma.consnt_RRA;

//Gunintamulu
Panchami.toPadma[Panchami.vowelsn_AA] = Padma.vowelsn_AA;
Panchami.toPadma[Panchami.vowelsn_I]  = Padma.vowelsn_I;
Panchami.toPadma[Panchami.vowelsn_II] = Padma.vowelsn_II;
Panchami.toPadma[Panchami.vowelsn_U]  = Padma.vowelsn_U;
Panchami.toPadma[Panchami.vowelsn_UU] = Padma.vowelsn_UU;
Panchami.toPadma[Panchami.vowelsn_R]  = Padma.vowelsn_R;
Panchami.toPadma[Panchami.vowelsn_E]  = Padma.vowelsn_E;
Panchami.toPadma[Panchami.vowelsn_EE] = Padma.vowelsn_EE;
Panchami.toPadma[Panchami.vowelsn_AI] = Padma.vowelsn_AI;
Panchami.toPadma[Panchami.vowelsn_AU] = Padma.vowelsn_AU;

//Chillu
Panchami.toPadma[Panchami.chillu_ENN] = Padma.consnt_NNA + Padma.chillu;
Panchami.toPadma[Panchami.chillu_IN]  = Padma.consnt_NA + Padma.chillu;
Panchami.toPadma[Panchami.chillu_IR]  = Padma.consnt_RA + Padma.chillu;
Panchami.toPadma[Panchami.chillu_IL]  = Padma.consnt_LA + Padma.chillu;
Panchami.toPadma[Panchami.chillu_ILL] = Padma.consnt_LLA + Padma.chillu;

//vattulu
Panchami.toPadma[Panchami.vattu_GA]  = Padma.vattu_GA;
Panchami.toPadma[Panchami.vattu_TTA] = Padma.vattu_TTA;
Panchami.toPadma[Panchami.vattu_DDA] = Padma.vattu_DDA;
Panchami.toPadma[Panchami.vattu_NNA] = Padma.vattu_NNA;
Panchami.toPadma[Panchami.vattu_TA]  = Padma.vattu_TA;
Panchami.toPadma[Panchami.vattu_DA]  = Padma.vattu_DA;
Panchami.toPadma[Panchami.vattu_DHA] = Padma.vattu_DHA;
Panchami.toPadma[Panchami.vattu_NA]  = Padma.vattu_NA;
Panchami.toPadma[Panchami.vattu_PA]  = Padma.vattu_PA;
Panchami.toPadma[Panchami.vattu_MA]  = Padma.vattu_MA;
Panchami.toPadma[Panchami.vattu_YA]  = Padma.vattu_YA;
Panchami.toPadma[Panchami.vattu_RA]  = Padma.vattu_RA;
Panchami.toPadma[Panchami.vattu_LA]  = Padma.vattu_LA;
Panchami.toPadma[Panchami.vattu_VA]  = Padma.vattu_VA;
Panchami.toPadma[Panchami.vattu_SA]  = Padma.vattu_SA;

//kooTTaksharangngaL
Panchami.toPadma[Panchami.conj_KK]   = Padma.consnt_KA +  Padma.vattu_KA;
Panchami.toPadma[Panchami.conj_KSH]  = Padma.consnt_KA +  Padma.vattu_SSA;
Panchami.toPadma[Panchami.conj_NGK]  = Padma.consnt_NGA +  Padma.vattu_KA;
Panchami.toPadma[Panchami.conj_NGNG] = Padma.consnt_NGA +  Padma.vattu_NGA;

Panchami.toPadma[Panchami.conj_CC]   = Padma.consnt_CA +  Padma.vattu_CA;
Panchami.toPadma[Panchami.conj_CCH]  = Padma.consnt_CA +  Padma.vattu_CHA;
Panchami.toPadma[Panchami.conj_JJ]   = Padma.consnt_JA +  Padma.vattu_JA;
Panchami.toPadma[Panchami.conj_NYC]  = Padma.consnt_NYA +  Padma.vattu_CA;
Panchami.toPadma[Panchami.conj_NYNY] = Padma.consnt_NYA +  Padma.vattu_NYA;

Panchami.toPadma[Panchami.conj_TTTT] = Padma.consnt_TTA +  Padma.vattu_TTA;
Panchami.toPadma[Panchami.conj_NNTT] = Padma.consnt_NNA +  Padma.vattu_TTA;
Panchami.toPadma[Panchami.conj_NNDD] = Padma.consnt_NNA +  Padma.vattu_DDA;

Panchami.toPadma[Panchami.conj_T_T]  = Padma.consnt_TA +  Padma.vattu_TA;
Panchami.toPadma[Panchami.conj_T_TH] = Padma.consnt_TA +  Padma.vattu_THA;
Panchami.toPadma[Panchami.conj_TBH]  = Padma.consnt_TA +  Padma.vattu_BHA;
Panchami.toPadma[Panchami.conj_TS]   = Padma.consnt_TA +  Padma.vattu_SA;
Panchami.toPadma[Panchami.conj_DD]   = Padma.consnt_DA +  Padma.vattu_DA;
Panchami.toPadma[Panchami.conj_D_DH] = Padma.consnt_DA +  Padma.vattu_DHA;
Panchami.toPadma[Panchami.conj_NT]   = Padma.consnt_NA +  Padma.vattu_TA;
Panchami.toPadma[Panchami.conj_ND]   = Padma.consnt_NA +  Padma.vattu_DA;
Panchami.toPadma[Panchami.conj_NDH]  = Padma.consnt_NA +  Padma.vattu_DHA;
Panchami.toPadma[Panchami.conj_N_N]  = Padma.consnt_NA +  Padma.vattu_NA;
Panchami.toPadma[Panchami.conj_NM]   = Padma.consnt_NA +  Padma.vattu_MA;
Panchami.toPadma[Panchami.conj_NRR_1] = Padma.consnt_NA +  Padma.vattu_RRA;
Panchami.toPadma[Panchami.conj_NRR_2] = Padma.consnt_NA +  Padma.vattu_RRA;

Panchami.toPadma[Panchami.conj_PP]  = Padma.consnt_PA +  Padma.vattu_PA;
Panchami.toPadma[Panchami.conj_BB]  = Padma.consnt_BA +  Padma.vattu_BA;
Panchami.toPadma[Panchami.conj_MP]  = Padma.consnt_MA +  Padma.vattu_PA;
Panchami.toPadma[Panchami.conj_MM]  = Padma.consnt_MA +  Padma.vattu_MA;

Panchami.toPadma[Panchami.conj_YY]  = Padma.consnt_YA +  Padma.vattu_YA;
Panchami.toPadma[Panchami.conj_L_L] = Padma.consnt_LA +  Padma.vattu_LA;
Panchami.toPadma[Panchami.conj_VV]  = Padma.consnt_VA +  Padma.vattu_VA;

Panchami.toPadma[Panchami.conj_SHC]  = Padma.consnt_SHA +  Padma.vattu_CA;
Panchami.toPadma[Panchami.conj_SHSH] = Padma.consnt_SHA +  Padma.vattu_SHA;
Panchami.toPadma[Panchami.conj_SRR]  = Padma.consnt_SA +  Padma.vattu_RRA;

Panchami.toPadma[Panchami.conj_HN]   = Padma.consnt_HA +  Padma.vattu_NA;
Panchami.toPadma[Panchami.conj_LLLL] = Padma.consnt_LLA +  Padma.vattu_LLA;

Panchami.toPadma[Panchami.conj_RRRR] = Padma.consnt_RRA +  Padma.vattu_RRA;

//Miscellaneous(where it doesn't match ASCII representation)
Panchami.toPadma[Panchami.extra_QTSINGLE_1] = "'";
Panchami.toPadma[Panchami.extra_QTSINGLE_2] = "'";
Panchami.toPadma[Panchami.extra_QTDOUBLE_1] = '"';
Panchami.toPadma[Panchami.extra_QTDOUBLE_2] = '"';
Panchami.toPadma[Panchami.extra_HYPHEN]   = '-';

Panchami.redundantList = new Object();
Panchami.redundantList[Panchami.misc_UNKNOWN_1] = true;

Panchami.prefixList = new Object();
Panchami.prefixList[Panchami.vattu_RA]   = true;
Panchami.prefixList[Panchami.vowelsn_E]  = true;
Panchami.prefixList[Panchami.vowelsn_EE] = true;
Panchami.prefixList[Panchami.vowelsn_AI] = true;

Panchami.overloadList = new Object();
Panchami.overloadList[Panchami.vowel_I]        = true;
Panchami.overloadList[Panchami.vowel_U]        = true;
Panchami.overloadList[Panchami.vowel_R]        = true;
Panchami.overloadList[Panchami.vowel_O]        = true;
Panchami.overloadList[Panchami.consnt_LA]      = true;
Panchami.overloadList[Panchami.chillu_IN]      = true;
Panchami.overloadList[Panchami.vowelsn_R]      = true;
Panchami.overloadList[Panchami.vowelsn_E]      = true;

我想做这个工作。转换文本中的文本并将结果放入另一个输入中。

更新:很抱歉没有正确解释。语言是印度语。之前没有标准化的字符直到UTF8(猜猜我是对的)。因此人们会根据自己的需要制作字体。现在是文本的This is the font。现在,在unicode出现之后,许多标准字体都可以在网上正确呈现并被新网站和所有网站使用。 Here is the list of some unicode fonts已经有一些用python编写的脚本用于转换它。这是python code 我粘贴的javascript代码来自firefox中的一个插件,它将我想要转换的字体中的网页转换为标准的unicode。插件很大,适用于许多30多种语言。我只想要这个,所以拿了这个文件。但是可以弄清楚我是如何做到的。希望这个解释是gud enuf。

2 个答案:

答案 0 :(得分:0)

这会有用吗?

var s = "..."; // your input string
var r = ""; // result
for (i in s) {
    r += Panchami.lookup(s[i]);
}
alert(r);

答案 1 :(得分:0)

关闭问题,因为问题是关于社区不知道的当地问题。