From 879205e1ab4bfb4518e96376849495cb62329f7c Mon Sep 17 00:00:00 2001 From: Andreas Gohr <andi@splitbrain.org> Date: Thu, 8 May 2008 23:24:44 +0200 Subject: [PATCH] Japanese romanization update Down to 57 fails darcs-hash:20080508212444-7ad00-16286e9f5be2bbbd3069d5c22ab8c270b2e1b23e.gz --- _test/cases/inc/utf8_romanize.test.php | 4 +- inc/utf8.php | 54 +++++++++++++++----------- 2 files changed, 35 insertions(+), 23 deletions(-) diff --git a/_test/cases/inc/utf8_romanize.test.php b/_test/cases/inc/utf8_romanize.test.php index e1c1be7c2..08f561f5c 100644 --- a/_test/cases/inc/utf8_romanize.test.php +++ b/_test/cases/inc/utf8_romanize.test.php @@ -12,12 +12,14 @@ class utf8_substr_test extends UnitTestCase { */ function test_japanese(){ $tests = file(dirname(__FILE__).'/utf8_kanaromaji.txt'); + $line = 1; foreach($tests as $test){ list($jap,$rom) = explode(';',trim($test)); $chk = utf8_romanize($jap); - #if($chk != $rom) echo "$jap\t->\t$chk\t!=\t$rom\n"; + #if($chk != $rom) echo "$jap\t->\t$chk\t!=\t$rom\t($line)\n"; $this->assertEqual($chk,$rom); + $line++; } } } diff --git a/inc/utf8.php b/inc/utf8.php index d79526179..7291987b0 100644 --- a/inc/utf8.php +++ b/inc/utf8.php @@ -1221,25 +1221,28 @@ $UTF8_ROMANIZATION = array( 'Ø·'=>'t\'','ظ'=>'z\'','ع'=>'y','غ'=>'gh','Ù'=>'f','Ù‚'=>'q','Ùƒ'=>'k', 'Ù„'=>'l','Ù…'=>'m','Ù†'=>'n','Ù‡'=>'x\'','Ùˆ'=>'u','ÙŠ'=>'i', - // Japanese characters (last update: 2008-05-05) - + // Japanese characters (last update: 2008-05-08) + // Japanese hiragana // 3 character syllables, 㣠doubles the consonant after 'ã£ã¡ã‚ƒ'=>'ccha','ã£ã¡ã‡'=>'cche','ã£ã¡ã‚‡'=>'ccho','ã£ã¡ã‚…'=>'cchu', - 'ã£ã³ã‚ƒ'=>'bya','ã£ã³ã‡'=>'bye','ã£ã³ãƒ'=>'byi','ã£ã³ã‚‡'=>'byo','ã£ã³ã‚…'=>'byu', - 'ã£ã¡ã‚ƒ'=>'cha','ã£ã¡ã‡'=>'che','ã£ã¡'=>'chi','ã£ã¡ã‚‡'=>'cho','ã£ã¡ã‚…'=>'chu', - 'ã£ã²ã‚ƒ'=>'hya','ã£ã²ã‡'=>'hye','ã£ã²ãƒ'=>'hyi','ã£ã²ã‚‡'=>'hyo','ã£ã²ã‚…'=>'hyu', - 'ã£ãゃ'=>'kya','ã£ãã‡'=>'kye','ã£ããƒ'=>'kyi','ã£ãょ'=>'kyo','ã£ãã‚…'=>'kyu', - 'ã£ãŽã‚ƒ'=>'gya','ã£ãŽã‡'=>'gye','ã£ãŽãƒ'=>'gyi','ã£ãŽã‚‡'=>'gyo','ã£ãŽã‚…'=>'gyu', - 'ã£ã¿ã‚ƒ'=>'mya','ã£ã¿ã‡'=>'mye','ã£ã¿ãƒ'=>'myi','ã£ã¿ã‚‡'=>'myo','ã£ã¿ã‚…'=>'myu', - 'ã£ã«ã‚ƒ'=>'nya','ã£ã«ã‡'=>'nye','ã£ã«ãƒ'=>'nyi','ã£ã«ã‚‡'=>'nyo','ã£ã«ã‚…'=>'nyu', - 'ã£ã‚Šã‚ƒ'=>'rya','ã£ã‚Šã‡'=>'rye','ã£ã‚Šãƒ'=>'ryi','ã£ã‚Šã‚‡'=>'ryo','ã£ã‚Šã‚…'=>'ryu', - 'ã£ã—ゃ'=>'sha','ã£ã—ã‡'=>'she','ã£ã—'=>'shi','ã£ã—ょ'=>'sho','ã£ã—ã‚…'=>'shu', - + 'ã£ã³ã‚ƒ'=>'bbya','ã£ã³ã‡'=>'bbye','ã£ã³ãƒ'=>'bbyi','ã£ã³ã‚‡'=>'bbyo','ã£ã³ã‚…'=>'bbyu', + 'ã£ã¡ã‚ƒ'=>'ccha','ã£ã¡ã‡'=>'cche','ã£ã¡'=>'cchi','ã£ã¡ã‚‡'=>'ccho','ã£ã¡ã‚…'=>'cchu', + // 'ã£ã²ã‚ƒ'=>'hya','ã£ã²ã‡'=>'hye','ã£ã²ãƒ'=>'hyi','ã£ã²ã‚‡'=>'hyo','ã£ã²ã‚…'=>'hyu', + 'ã£ãゃ'=>'kkya','ã£ãã‡'=>'kkye','ã£ããƒ'=>'kkyi','ã£ãょ'=>'kkyo','ã£ãã‚…'=>'kkyu', + 'ã£ãŽã‚ƒ'=>'ggya','ã£ãŽã‡'=>'ggye','ã£ãŽãƒ'=>'ggyi','ã£ãŽã‚‡'=>'ggyo','ã£ãŽã‚…'=>'ggyu', + 'ã£ã¿ã‚ƒ'=>'mmya','ã£ã¿ã‡'=>'mmye','ã£ã¿ãƒ'=>'mmyi','ã£ã¿ã‚‡'=>'mmyo','ã£ã¿ã‚…'=>'mmyu', + 'ã£ã«ã‚ƒ'=>'nnya','ã£ã«ã‡'=>'nnye','ã£ã«ãƒ'=>'nnyi','ã£ã«ã‚‡'=>'nnyo','ã£ã«ã‚…'=>'nnyu', + 'ã£ã‚Šã‚ƒ'=>'rrya','ã£ã‚Šã‡'=>'rrye','ã£ã‚Šãƒ'=>'rryi','ã£ã‚Šã‚‡'=>'rryo','ã£ã‚Šã‚…'=>'rryu', + 'ã£ã—ゃ'=>'ssha','ã£ã—ã‡'=>'sshe','ã£ã—'=>'sshi','ã£ã—ょ'=>'ssho','ã£ã—ã‚…'=>'sshu', + + // seperate hiragana 'n' ('n' + 'i' != 'ni', normally we would write "kon'nichi wa" but the apostrophe would be converted to _ anyway) + 'ã‚“ã‚'=>'n_a','ã‚“ãˆ'=>'n_e','ã‚“ã„'=>'n_i','ã‚“ãŠ'=>'n_o','ã‚“ã†'=>'n_u', + 'ã‚“ã‚„'=>'n_ya','んよ'=>'n_yo','んゆ'=>'n_yu', + // 2 character syllables - normal - 'ãµã'=>'fa','ãµã‡'=>'fe','ãµãƒ'=>'fi','ãµã‰'=>'fo','ãµ'=>'fu', - 'ヴã'=>'va','ヴã‡'=>'ve','ヴãƒ'=>'vi','ヴã‰'=>'vo','ヴ'=>'vu', + 'ãµã'=>'fa','ãµã‡'=>'fe','ãµãƒ'=>'fi','ãµã‰'=>'fo', 'ã³ã‚ƒ'=>'bya','ã³ã‡'=>'bye','ã³ãƒ'=>'byi','ã³ã‚‡'=>'byo','ã³ã‚…'=>'byu', 'ã¡ã‚ƒ'=>'cha','ã¡ã‡'=>'che','ã¡'=>'chi','ã¡ã‚‡'=>'cho','ã¡ã‚…'=>'chu', 'ã²ã‚ƒ'=>'hya','ã²ã‡'=>'hye','ã²ãƒ'=>'hyi','ã²ã‚‡'=>'hyo','ã²ã‚…'=>'hyu', @@ -1249,7 +1252,9 @@ $UTF8_ROMANIZATION = array( 'ã«ã‚ƒ'=>'nya','ã«ã‡'=>'nye','ã«ãƒ'=>'nyi','ã«ã‚‡'=>'nyo','ã«ã‚…'=>'nyu', 'りゃ'=>'rya','ã‚Šã‡'=>'rye','ã‚Šãƒ'=>'ryi','りょ'=>'ryo','ã‚Šã‚…'=>'ryu', 'ã—ゃ'=>'sha','ã—ã‡'=>'she','ã—'=>'shi','ã—ょ'=>'sho','ã—ã‚…'=>'shu', - 'ã˜ã‚ƒ'=>'ja','ã˜ã‡'=>'je','ã˜'=>'ji','ã˜ã‚‡'=>'jo','ã˜ã‚…'=>'ju', + 'ã˜ã‚ƒ'=>'ja','ã˜ã‡'=>'je','ã˜ã‚‡'=>'jo','ã˜ã‚…'=>'ju', + 'ã†ã‡'=>'we','ã†ãƒ'=>'wi', + 'ã„ã‡'=>'ye', // 2 character syllables, 㣠doubles the consonant after 'ã£ã°'=>'bba','ã£ã¹'=>'bbe','ã£ã³'=>'bbi','ã£ã¼'=>'bbo','ã£ã¶'=>'bbu', @@ -1266,7 +1271,7 @@ $UTF8_ROMANIZATION = array( // 1 character syllabels 'ã‚'=>'a','ãˆ'=>'e','ã„'=>'i','ãŠ'=>'o','ã†'=>'u','ã‚“'=>'n', - 'ã¯'=>'ha','ã¸'=>'he','ã²'=>'hi','ã»'=>'ho','ãµ'=>'hu', + 'ã¯'=>'ha','ã¸'=>'he','ã²'=>'hi','ã»'=>'ho','ãµ'=>'fu', 'ã°'=>'ba','ã¹'=>'be','ã³'=>'bi','ã¼'=>'bo','ã¶'=>'bu', 'ã±'=>'pa','ãº'=>'pe','ã´'=>'pi','ã½'=>'po','ã·'=>'pu', 'ãŸ'=>'ta','ã¦'=>'te','ã¡'=>'chi','ã¨'=>'to','ã¤'=>'tsu', @@ -1277,9 +1282,9 @@ $UTF8_ROMANIZATION = array( 'ãª'=>'na','ã'=>'ne','ã«'=>'ni','ã®'=>'no','ã¬'=>'nu', 'ら'=>'ra','ã‚Œ'=>'re','ã‚Š'=>'ri','ã‚'=>'ro','ã‚‹'=>'ru', 'ã•'=>'sa','ã›'=>'se','ã—'=>'shi','ã'=>'so','ã™'=>'su', - 'ã‚'=>'wa','ã†ã‡'=>'we','ã†ãƒ'=>'wi','ã‚’'=>'wo', - 'ã–'=>'za','ãœ'=>'ze','ã˜'=>'zi','ãž'=>'zo','ãš'=>'zu', - 'ã‚„'=>'ya','ã„ã‡'=>'ye','よ'=>'yo','ゆ'=>'yu', + 'ã‚'=>'wa','ã‚’'=>'wo', + 'ã–'=>'za','ãœ'=>'ze','ã˜'=>'ji','ãž'=>'zo','ãš'=>'zu', + 'ã‚„'=>'ya','よ'=>'yo','ゆ'=>'yu', // old characters 'ã‚‘'=>'we','ã‚'=>'wi', @@ -1288,6 +1293,7 @@ $UTF8_ROMANIZATION = array( // 'ゃ'=>'ya','ょ'=>'yo','ã‚…'=>'yu', // never seen one of those (disabled for the moment) + // 'ヴã'=>'va','ヴã‡'=>'ve','ヴãƒ'=>'vi','ヴã‰'=>'vo','ヴ'=>'vu', // 'ã§ã‚ƒ'=>'dha','ã§ã‡'=>'dhe','ã§ãƒ'=>'dhi','ã§ã‚‡'=>'dho','ã§ã‚…'=>'dhu', // 'ã©ã'=>'dwa','ã©ã‡'=>'dwe','ã©ãƒ'=>'dwi','ã©ã‰'=>'dwo','ã©ã…'=>'dwu', // 'ã¢ã‚ƒ'=>'dya','ã¢ã‡'=>'dye','ã¢ãƒ'=>'dyi','ã¢ã‚‡'=>'dyo','ã¢ã‚…'=>'dyu', @@ -1390,8 +1396,8 @@ $UTF8_ROMANIZATION = array( 'ミャ'=>'mya','ミェ'=>'mye','ミィ'=>'myi','ミョ'=>'myo','ミュ'=>'myu', 'ニャ'=>'nya','ニェ'=>'nye','ニィ'=>'nyi','ニョ'=>'nyo','ニュ'=>'nyu', 'リャ'=>'rya','リェ'=>'rye','リィ'=>'ryi','リョ'=>'ryo','リュ'=>'ryu', - 'シャ'=>'sha','シェ'=>'she','ã‚·'=>'shi','ショ'=>'sho','シュ'=>'shu', - 'ジャ'=>'ja','ジェ'=>'je','ジ'=>'ji','ジョ'=>'jo','ジュ'=>'ju', + 'シャ'=>'sha','シェ'=>'she','ショ'=>'sho','シュ'=>'shu', + 'ジャ'=>'ja','ジェ'=>'je','ジョ'=>'jo','ジュ'=>'ju', 'スァ'=>'swa','スェ'=>'swe','スィ'=>'swi','スォ'=>'swo','スゥ'=>'swu', 'デァ'=>'da','デェ'=>'de','ディ'=>'di','デォ'=>'do','デゥ'=>'du', 'ãƒãƒ£'=>'cha','ãƒã‚§'=>'che','ãƒ'=>'chi','ãƒãƒ§'=>'cho','ãƒãƒ¥'=>'chu', @@ -1425,6 +1431,10 @@ $UTF8_ROMANIZATION = array( // old characters 'ヱー'=>'wee','ヰー'=>'wii', + // seperate katakana 'n' + 'ンア'=>'n_a','ンエ'=>'n_e','ンイ'=>'n_i','ンオ'=>'n_o','ンウ'=>'n_u', + 'ンヤ'=>'n_ya','ンヨ'=>'n_yo','ンユ'=>'n_yu', + // 2 character syllables - doubled consonants 'ッãƒ'=>'bba','ッベ'=>'bbe','ッビ'=>'bbi','ッボ'=>'bbo','ッブ'=>'bbu', 'ッパ'=>'ppa','ッペ'=>'ppe','ッピ'=>'ppi','ッãƒ'=>'ppo','ップ'=>'ppu', @@ -1449,7 +1459,7 @@ $UTF8_ROMANIZATION = array( 'ナ'=>'na','ãƒ'=>'ne','ニ'=>'ni','ノ'=>'no','ヌ'=>'nu', 'ラ'=>'ra','レ'=>'re','リ'=>'ri','ãƒ'=>'ro','ル'=>'ru', 'サ'=>'sa','ã‚»'=>'se','ã‚·'=>'shi','ソ'=>'so','ス'=>'su', - 'ザ'=>'za','ゼ'=>'ze','ジ'=>'zi','ゾ'=>'zo','ズ'=>'zu', + 'ザ'=>'za','ゼ'=>'ze','ジ'=>'ji','ゾ'=>'zo','ズ'=>'zu', 'ã‚¿'=>'ta','テ'=>'te','ãƒ'=>'chi','ト'=>'to','ツ'=>'tsu', 'ダ'=>'da','デ'=>'de','ヂ'=>'di','ド'=>'do','ヅ'=>'du', 'ワ'=>'wa','ヲ'=>'wo', -- GitLab