utf8 mysql字符串音译

时间:2012-07-28 09:25:30

标签: mysql utf-8

mysql中是否有类似于php的iconv的函数将重音的utf8字符转换为ascii字符串?

我想要SQL查询,如下所示:

SELECT some_func("LĄBĄS VĄKĄRŪŠ")

并返回labas vakarus

3 个答案:

答案 0 :(得分:2)

我找到了this MySQL transliteration功能,这可能就是你要找的东西。

您可以在作者的the associated blog post处阅读更多相关信息。

建议:

我的建议是不要相信MySQL和PHP iconv()音译产生相同的结果,因为它们都是猜测。如果你只是坚持使用MySQL或PHP方面,那将是最好的。

答案 1 :(得分:1)

在MySQL中创建搜索引擎友好URL地址的功能。

DELIMITER ;;

CREATE FUNCTION `cool_url` (`original` varchar(512) CHARSET utf8mb4 COLLATE utf8mb4_bin) RETURNS varchar(512) CHARSET utf8mb4
BEGIN

    DECLARE translit VARCHAR(512) DEFAULT '';
    DECLARE len INT(3) DEFAULT 0;
    DECLARE pos INT(3) DEFAULT 1;
    DECLARE letter VARCHAR(2);

    SET original = TRIM(LOWER(original));
    SET len = CHAR_LENGTH(original);

    WHILE (pos <= len) DO

        # get one letter from original
        SET letter = SUBSTRING(original, pos, 1) COLLATE utf8mb4_bin;

        CASE TRUE
          # basic chars
          WHEN letter IN ('a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z','0','1','2','3','4','5','6','7','8','9' COLLATE utf8mb4_bin) THEN SET letter = letter; 
          # chars with diacritics and azbuka
          WHEN letter IN('á','à','â','ä','å','ā','ą','ă' COLLATE utf8mb4_bin) THEN SET letter = 'a';
          WHEN letter IN('б' COLLATE utf8mb4_bin) THEN SET letter = 'b';
          WHEN letter IN('č','ć','ç','ć' COLLATE utf8mb4_bin) THEN SET letter = 'c';
          WHEN letter IN('ď','đ','д' COLLATE utf8mb4_bin) THEN SET letter = 'd';
          WHEN letter IN('é','ě','ë','ё','è','ê','ē','ę','ь','э','œ' COLLATE utf8mb4_bin) THEN SET letter = 'e';
          WHEN letter IN('ф','ѳ' COLLATE utf8mb4_bin) THEN SET letter = 'f';
          WHEN letter IN('ģ','ğ','г' COLLATE utf8mb4_bin) THEN SET letter = 'g';
          WHEN letter IN('í','î','ï','ī','î','і','и','й' COLLATE utf8mb4_bin) THEN SET letter = 'i';
          WHEN letter IN('ķ','к' COLLATE utf8mb4_bin) THEN SET letter = 'k';
          WHEN letter IN('ľ','ĺ','ļ','ł','л' COLLATE utf8mb4_bin) THEN SET letter = 'l';
          WHEN letter IN('м' COLLATE utf8mb4_bin) THEN SET letter = 'm';
          WHEN letter IN('ň','ņ','ń','ñ','н' COLLATE utf8mb4_bin) THEN SET letter = 'n';
          WHEN letter IN('ó','ö','ø','õ','ô','ő','ơ','о' COLLATE utf8mb4_bin) THEN SET letter = 'o';
          WHEN letter IN('п' COLLATE utf8mb4_bin) THEN SET letter = 'p';
          WHEN letter IN('ŕ','ř','р' COLLATE utf8mb4_bin) THEN SET letter = 'r';
          WHEN letter IN('š','ś','ș','ş','с','ß' COLLATE utf8mb4_bin) THEN SET letter = 's';
          WHEN letter IN('ť','ț','т' COLLATE utf8mb4_bin) THEN SET letter = 't';
          WHEN letter IN('ú','ů','ü','ù','û','ū','ű','ư','у' COLLATE utf8mb4_bin) THEN SET letter = 'u';
          WHEN letter IN('в' COLLATE utf8mb4_bin) THEN SET letter = 'v';
          WHEN letter IN('ý','ы','ѵ' COLLATE utf8mb4_bin) THEN SET letter = 'y';
          WHEN letter IN('ž','ź','ż','з' COLLATE utf8mb4_bin) THEN SET letter = 'z';
          WHEN letter = 'х' COLLATE utf8mb4_bin THEN SET letter = 'kh'; -- is not X
          WHEN letter = 'ю' COLLATE utf8mb4_bin THEN SET letter = 'ju';
          WHEN letter = 'я' COLLATE utf8mb4_bin THEN SET letter = 'ja';
          WHEN letter = 'ж' COLLATE utf8mb4_bin THEN SET letter = 'zh';
          WHEN letter = 'ч' COLLATE utf8mb4_bin THEN SET letter = 'ch';
          WHEN letter = 'ш' COLLATE utf8mb4_bin THEN SET letter = 'sh';
          WHEN letter = 'щ' COLLATE utf8mb4_bin THEN SET letter = 'shch';
          WHEN letter = 'ѣ' COLLATE utf8mb4_bin THEN SET letter = 'ie';
          WHEN letter = 'ц' COLLATE utf8mb4_bin THEN SET letter = 'ts';
          WHEN letter = 'ъ' COLLATE utf8mb4_bin THEN SET letter = ''; -- hard sign
          WHEN letter = 'ь' COLLATE utf8mb4_bin THEN SET letter = ''; -- soft sign

          ELSE SET letter = '-';
        END CASE;

        # joining new string
        SET translit = CONCAT(translit, letter);

        SET pos = pos + 1;
    END WHILE;

    # replacing more dashes by one
    WHILE (translit REGEXP '\-{2,}') DO
        SET translit = REPLACE(translit, '--', '-');
    END WHILE;

    RETURN TRIM(BOTH '-' FROM translit);

END;;
DELIMITER ;

示例(测试):

mysql> SELECT cool_url('BŒautiful day áàâäåāąă б čćçć ďđд éěëёèêēęьэœ фѳ ģğг íîïīîіий ķк ľĺļłл м ňņńñн óöøõôőơо п ŕřр šśșşсß ťțт úůüùûūűưу в ýыѵ žźżз х ю я ж ч ш щ ѣ ц ъ ь');
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| beautiful-day-aaaaaaaa-b-cccc-ddd-eeeeeeeeeee-ff-ggg-iiiiiiii-kk-lllll-m-nnnnn-oooooooo-p-rrr-ssssss-ttt-uuuuuuuuu-v-yyy-zzzz-kh-ju-ja-zh-ch-sh-sh-ie-ts-e                  |
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
1 row in set (0.01 sec)

答案 2 :(得分:1)

我需要一个简单但快速的解决方案来为wp中的post标签创建数千个slug。所以这是我从上面的答案中取得的代码,我修改了元音和数字。希望它能在某一天帮助某人,因为它今天对我有所帮助。 :)

DELIMITER $$

DROP FUNCTION IF EXISTS `transliterate` $$
CREATE FUNCTION `transliterate` (original VARCHAR(512)) RETURNS VARCHAR(512)
BEGIN

  DECLARE translit VARCHAR(512) DEFAULT '';
  DECLARE len INT(3) DEFAULT 0;
  DECLARE pos INT(3) DEFAULT 1;
  DECLARE letter CHAR(2);

  SET original = TRIM(LOWER(original));
  SET len = CHAR_LENGTH(original);

  WHILE (pos <= len) DO
    SET letter = SUBSTRING(original, pos, 1);

    CASE TRUE

      WHEN letter IN('á','à','â','ä','å','ā','ą','ă') THEN SET letter = 'a';
      WHEN letter IN('č','ć','ç','ć') THEN SET letter = 'c';
      WHEN letter IN('ď','đ') THEN SET letter = 'd';
      WHEN letter IN('é','ě','ë','è','ê','ē','ę') THEN SET letter = 'e';
      WHEN letter IN('ģ','ğ') THEN SET letter = 'g';
      WHEN letter IN('í','î','ï','ī','î') THEN SET letter = 'i';
      WHEN letter IN('ķ') THEN SET letter = 'k';
      WHEN letter IN('ľ','ĺ','ļ','ł') THEN SET letter = 'l';
      WHEN letter IN('ň','ņ','ń','ñ') THEN SET letter = 'n';
      WHEN letter IN('ó','ö','ø','õ','ô','ő','ơ') THEN SET letter = 'o';
      WHEN letter IN('ŕ','ř') THEN SET letter = 'r';
      WHEN letter IN('š','ś','ș','ş') THEN SET letter = 's';
      WHEN letter IN('ť','ț') THEN SET letter = 't';
      WHEN letter IN('ú','ů','ü','ù','û','ū','ű','ư') THEN SET letter = 'u';
      WHEN letter IN('ý') THEN SET letter = 'y';
      WHEN letter IN('ž','ź','ż') THEN SET letter = 'z';

    WHEN letter = 'а' THEN SET letter = 'a';
      WHEN letter = 'б' THEN SET letter = 'b';
      WHEN letter = 'в' THEN SET letter = 'v';
      WHEN letter = 'г' THEN SET letter = 'g';
      WHEN letter = 'д' THEN SET letter = 'd';
    WHEN letter = 'е' THEN SET letter = 'e';
    WHEN letter = 'ё' THEN SET letter = 'e';      
      WHEN letter = 'ж' THEN SET letter = 'z';
      WHEN letter = 'з' THEN SET letter = 'z';
      WHEN letter = 'и' THEN SET letter = 'i';
      WHEN letter = 'й' THEN SET letter = 'i';
      WHEN letter = 'к' THEN SET letter = 'k';
      WHEN letter = 'л' THEN SET letter = 'l';
      WHEN letter = 'м' THEN SET letter = 'm';
      WHEN letter = 'н' THEN SET letter = 'n';
    WHEN letter = 'о' THEN SET letter = 'o';
      WHEN letter = 'п' THEN SET letter = 'p';
    WHEN letter = 'р' THEN SET letter = 'r';
    WHEN letter = 'с' THEN SET letter = 's';
      WHEN letter = 'т' THEN SET letter = 't';
    WHEN letter = 'у' THEN SET letter = 'u';
      WHEN letter = 'ф' THEN SET letter = 'f';
      WHEN letter = 'х' THEN SET letter = 'ch';
      WHEN letter = 'ц' THEN SET letter = 'c';
      WHEN letter = 'ч' THEN SET letter = 'c';
      WHEN letter = 'ш' THEN SET letter = 's';
      WHEN letter = 'щ' THEN SET letter = 's';
      WHEN letter = 'ъ' THEN SET letter = '';
      WHEN letter = 'ы' THEN SET letter = 'y';
      WHEN letter = 'ь' THEN SET letter = 'e';
      WHEN letter = 'э' THEN SET letter = 'e';
      WHEN letter = 'ю' THEN SET letter = 'ju';
      WHEN letter = 'я' THEN SET letter = 'ja';

    WHEN letter = '0' THEN SET letter = '0';
    WHEN letter = '1' THEN SET letter = '1';
    WHEN letter = '2' THEN SET letter = '2';
    WHEN letter = '3' THEN SET letter = '3';
    WHEN letter = '4' THEN SET letter = '4';
    WHEN letter = '5' THEN SET letter = '5';
    WHEN letter = '6' THEN SET letter = '6';      
    WHEN letter = '7' THEN SET letter = '7';
    WHEN letter = '8' THEN SET letter = '8';
    WHEN letter = '9' THEN SET letter = '9';



      WHEN letter IN ('a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','x','y','z')
        THEN SET letter = letter;

      ELSE
    SET letter = '-';

    END CASE;

    SET translit = CONCAT(translit, letter);
    SET pos = pos + 1;
  END WHILE;

  WHILE (translit REGEXP '\-{2,}') DO
        SET translit = REPLACE(translit, '--', '-');
  END WHILE;

  RETURN TRIM(BOTH '-' FROM translit);

END $$

DELIMITER ;