From 879205e1ab4bfb4518e96376849495cb62329f7c Mon Sep 17 00:00:00 2001
From: Andreas Gohr <andi@splitbrain.org>
Date: Thu, 8 May 2008 23:24:44 +0200
Subject: [PATCH] Japanese romanization update

Down to 57 fails

darcs-hash:20080508212444-7ad00-16286e9f5be2bbbd3069d5c22ab8c270b2e1b23e.gz
---
 _test/cases/inc/utf8_romanize.test.php |  4 +-
 inc/utf8.php                           | 54 +++++++++++++++-----------
 2 files changed, 35 insertions(+), 23 deletions(-)

diff --git a/_test/cases/inc/utf8_romanize.test.php b/_test/cases/inc/utf8_romanize.test.php
index e1c1be7c2..08f561f5c 100644
--- a/_test/cases/inc/utf8_romanize.test.php
+++ b/_test/cases/inc/utf8_romanize.test.php
@@ -12,12 +12,14 @@ class utf8_substr_test extends UnitTestCase {
      */
     function test_japanese(){
         $tests = file(dirname(__FILE__).'/utf8_kanaromaji.txt');
+        $line = 1;
         foreach($tests as $test){
             list($jap,$rom) = explode(';',trim($test));
 
             $chk = utf8_romanize($jap);
-            #if($chk != $rom) echo "$jap\t->\t$chk\t!=\t$rom\n";
+            #if($chk != $rom) echo "$jap\t->\t$chk\t!=\t$rom\t($line)\n";
             $this->assertEqual($chk,$rom);
+            $line++;
         }
     }
 }
diff --git a/inc/utf8.php b/inc/utf8.php
index d79526179..7291987b0 100644
--- a/inc/utf8.php
+++ b/inc/utf8.php
@@ -1221,25 +1221,28 @@ $UTF8_ROMANIZATION = array(
   'ط'=>'t\'','ظ'=>'z\'','ع'=>'y','غ'=>'gh','ف'=>'f','ق'=>'q','ك'=>'k',
   'Ù„'=>'l','Ù…'=>'m','Ù†'=>'n','Ù‡'=>'x\'','Ùˆ'=>'u','ÙŠ'=>'i',
 
-  // Japanese characters  (last update: 2008-05-05)
-  
+  // Japanese characters  (last update: 2008-05-08)
+
   // Japanese hiragana
 
   // 3 character syllables, っ doubles the consonant after
   'っちゃ'=>'ccha','っちぇ'=>'cche','っちょ'=>'ccho','っちゅ'=>'cchu',
-  'っびゃ'=>'bya','っびぇ'=>'bye','っびぃ'=>'byi','っびょ'=>'byo','っびゅ'=>'byu',
-  'っちゃ'=>'cha','っちぇ'=>'che','っち'=>'chi','っちょ'=>'cho','っちゅ'=>'chu',
-  'っひゃ'=>'hya','っひぇ'=>'hye','っひぃ'=>'hyi','っひょ'=>'hyo','っひゅ'=>'hyu',
-  'っきゃ'=>'kya','っきぇ'=>'kye','っきぃ'=>'kyi','っきょ'=>'kyo','っきゅ'=>'kyu',
-  'っぎゃ'=>'gya','っぎぇ'=>'gye','っぎぃ'=>'gyi','っぎょ'=>'gyo','っぎゅ'=>'gyu',
-  'っみゃ'=>'mya','っみぇ'=>'mye','っみぃ'=>'myi','っみょ'=>'myo','っみゅ'=>'myu',
-  'っにゃ'=>'nya','っにぇ'=>'nye','っにぃ'=>'nyi','っにょ'=>'nyo','っにゅ'=>'nyu',
-  'っりゃ'=>'rya','っりぇ'=>'rye','っりぃ'=>'ryi','っりょ'=>'ryo','っりゅ'=>'ryu',
-  'っしゃ'=>'sha','っしぇ'=>'she','っし'=>'shi','っしょ'=>'sho','っしゅ'=>'shu',
-  
+  'っびゃ'=>'bbya','っびぇ'=>'bbye','っびぃ'=>'bbyi','っびょ'=>'bbyo','っびゅ'=>'bbyu',
+  'っちゃ'=>'ccha','っちぇ'=>'cche','っち'=>'cchi','っちょ'=>'ccho','っちゅ'=>'cchu',
+  // 'っひゃ'=>'hya','っひぇ'=>'hye','っひぃ'=>'hyi','っひょ'=>'hyo','っひゅ'=>'hyu',
+  'っきゃ'=>'kkya','っきぇ'=>'kkye','っきぃ'=>'kkyi','っきょ'=>'kkyo','っきゅ'=>'kkyu',
+  'っぎゃ'=>'ggya','っぎぇ'=>'ggye','っぎぃ'=>'ggyi','っぎょ'=>'ggyo','っぎゅ'=>'ggyu',
+  'っみゃ'=>'mmya','っみぇ'=>'mmye','っみぃ'=>'mmyi','っみょ'=>'mmyo','っみゅ'=>'mmyu',
+  'っにゃ'=>'nnya','っにぇ'=>'nnye','っにぃ'=>'nnyi','っにょ'=>'nnyo','っにゅ'=>'nnyu',
+  'っりゃ'=>'rrya','っりぇ'=>'rrye','っりぃ'=>'rryi','っりょ'=>'rryo','っりゅ'=>'rryu',
+  'っしゃ'=>'ssha','っしぇ'=>'sshe','っし'=>'sshi','っしょ'=>'ssho','っしゅ'=>'sshu',
+
+  // seperate hiragana 'n' ('n' + 'i' != 'ni', normally we would write "kon'nichi wa" but the apostrophe would be converted to _ anyway)
+  'んあ'=>'n_a','んえ'=>'n_e','んい'=>'n_i','んお'=>'n_o','んう'=>'n_u',
+  'んや'=>'n_ya','んよ'=>'n_yo','んゆ'=>'n_yu',
+
    // 2 character syllables - normal
-  'ふぁ'=>'fa','ふぇ'=>'fe','ふぃ'=>'fi','ふぉ'=>'fo','ふ'=>'fu',
-  'ヴぁ'=>'va','ヴぇ'=>'ve','ヴぃ'=>'vi','ヴぉ'=>'vo','ヴ'=>'vu',
+  'ふぁ'=>'fa','ふぇ'=>'fe','ふぃ'=>'fi','ふぉ'=>'fo',
   'びゃ'=>'bya','びぇ'=>'bye','びぃ'=>'byi','びょ'=>'byo','びゅ'=>'byu',
   'ちゃ'=>'cha','ちぇ'=>'che','ち'=>'chi','ちょ'=>'cho','ちゅ'=>'chu',
   'ひゃ'=>'hya','ひぇ'=>'hye','ひぃ'=>'hyi','ひょ'=>'hyo','ひゅ'=>'hyu',
@@ -1249,7 +1252,9 @@ $UTF8_ROMANIZATION = array(
   'にゃ'=>'nya','にぇ'=>'nye','にぃ'=>'nyi','にょ'=>'nyo','にゅ'=>'nyu',
   'りゃ'=>'rya','りぇ'=>'rye','りぃ'=>'ryi','りょ'=>'ryo','りゅ'=>'ryu',
   'しゃ'=>'sha','しぇ'=>'she','し'=>'shi','しょ'=>'sho','しゅ'=>'shu',
-  'じゃ'=>'ja','じぇ'=>'je','じ'=>'ji','じょ'=>'jo','じゅ'=>'ju',
+  'じゃ'=>'ja','じぇ'=>'je','じょ'=>'jo','じゅ'=>'ju',
+  'うぇ'=>'we','うぃ'=>'wi',
+  'いぇ'=>'ye',
 
   // 2 character syllables, っ doubles the consonant after
   'っば'=>'bba','っべ'=>'bbe','っび'=>'bbi','っぼ'=>'bbo','っぶ'=>'bbu',
@@ -1266,7 +1271,7 @@ $UTF8_ROMANIZATION = array(
   
   // 1 character syllabels
   'あ'=>'a','え'=>'e','い'=>'i','お'=>'o','う'=>'u','ん'=>'n',
-  'は'=>'ha','へ'=>'he','ひ'=>'hi','ほ'=>'ho','ふ'=>'hu',
+  'は'=>'ha','へ'=>'he','ひ'=>'hi','ほ'=>'ho','ふ'=>'fu',
   'ば'=>'ba','べ'=>'be','び'=>'bi','ぼ'=>'bo','ぶ'=>'bu',
   'ぱ'=>'pa','ぺ'=>'pe','ぴ'=>'pi','ぽ'=>'po','ぷ'=>'pu',
   'た'=>'ta','て'=>'te','ち'=>'chi','と'=>'to','つ'=>'tsu',
@@ -1277,9 +1282,9 @@ $UTF8_ROMANIZATION = array(
   'な'=>'na','ね'=>'ne','に'=>'ni','の'=>'no','ぬ'=>'nu',
   'ら'=>'ra','れ'=>'re','り'=>'ri','ろ'=>'ro','る'=>'ru',
   'さ'=>'sa','せ'=>'se','し'=>'shi','そ'=>'so','す'=>'su',
-  'わ'=>'wa','うぇ'=>'we','うぃ'=>'wi','を'=>'wo',
-  'ざ'=>'za','ぜ'=>'ze','じ'=>'zi','ぞ'=>'zo','ず'=>'zu',
-  'や'=>'ya','いぇ'=>'ye','よ'=>'yo','ゆ'=>'yu',
+  'わ'=>'wa','を'=>'wo',
+  'ざ'=>'za','ぜ'=>'ze','じ'=>'ji','ぞ'=>'zo','ず'=>'zu',
+  'や'=>'ya','よ'=>'yo','ゆ'=>'yu',
   // old characters
   'ゑ'=>'we','ゐ'=>'wi',
 
@@ -1288,6 +1293,7 @@ $UTF8_ROMANIZATION = array(
   // 'ゃ'=>'ya','ょ'=>'yo','ゅ'=>'yu',
 
   // never seen one of those (disabled for the moment)
+  // 'ヴぁ'=>'va','ヴぇ'=>'ve','ヴぃ'=>'vi','ヴぉ'=>'vo','ヴ'=>'vu',
   // 'でゃ'=>'dha','でぇ'=>'dhe','でぃ'=>'dhi','でょ'=>'dho','でゅ'=>'dhu',
   // 'どぁ'=>'dwa','どぇ'=>'dwe','どぃ'=>'dwi','どぉ'=>'dwo','どぅ'=>'dwu',
   // 'ぢゃ'=>'dya','ぢぇ'=>'dye','ぢぃ'=>'dyi','ぢょ'=>'dyo','ぢゅ'=>'dyu',
@@ -1390,8 +1396,8 @@ $UTF8_ROMANIZATION = array(
   'ミャ'=>'mya','ミェ'=>'mye','ミィ'=>'myi','ミョ'=>'myo','ミュ'=>'myu',
   'ニャ'=>'nya','ニェ'=>'nye','ニィ'=>'nyi','ニョ'=>'nyo','ニュ'=>'nyu',
   'リャ'=>'rya','リェ'=>'rye','リィ'=>'ryi','リョ'=>'ryo','リュ'=>'ryu',
-  'シャ'=>'sha','シェ'=>'she','シ'=>'shi','ショ'=>'sho','シュ'=>'shu',
-  'ジャ'=>'ja','ジェ'=>'je','ジ'=>'ji','ジョ'=>'jo','ジュ'=>'ju',
+  'シャ'=>'sha','シェ'=>'she','ショ'=>'sho','シュ'=>'shu',
+  'ジャ'=>'ja','ジェ'=>'je','ジョ'=>'jo','ジュ'=>'ju',
   'スァ'=>'swa','スェ'=>'swe','スィ'=>'swi','スォ'=>'swo','スゥ'=>'swu',
   'デァ'=>'da','デェ'=>'de','ディ'=>'di','デォ'=>'do','デゥ'=>'du',
   'チャ'=>'cha','チェ'=>'che','チ'=>'chi','チョ'=>'cho','チュ'=>'chu',
@@ -1425,6 +1431,10 @@ $UTF8_ROMANIZATION = array(
   // old characters
   'ヱー'=>'wee','ヰー'=>'wii',
 
+  // seperate katakana 'n'
+  'ンア'=>'n_a','ンエ'=>'n_e','ンイ'=>'n_i','ンオ'=>'n_o','ンウ'=>'n_u',
+  'ンヤ'=>'n_ya','ンヨ'=>'n_yo','ンユ'=>'n_yu',
+
   // 2 character syllables - doubled consonants
   'ッバ'=>'bba','ッベ'=>'bbe','ッビ'=>'bbi','ッボ'=>'bbo','ッブ'=>'bbu',
   'ッパ'=>'ppa','ッペ'=>'ppe','ッピ'=>'ppi','ッポ'=>'ppo','ップ'=>'ppu',
@@ -1449,7 +1459,7 @@ $UTF8_ROMANIZATION = array(
   'ナ'=>'na','ネ'=>'ne','ニ'=>'ni','ノ'=>'no','ヌ'=>'nu',
   'ラ'=>'ra','レ'=>'re','リ'=>'ri','ロ'=>'ro','ル'=>'ru',
   'サ'=>'sa','セ'=>'se','シ'=>'shi','ソ'=>'so','ス'=>'su',
-  'ザ'=>'za','ゼ'=>'ze','ジ'=>'zi','ゾ'=>'zo','ズ'=>'zu',
+  'ザ'=>'za','ゼ'=>'ze','ジ'=>'ji','ゾ'=>'zo','ズ'=>'zu',
   'タ'=>'ta','テ'=>'te','チ'=>'chi','ト'=>'to','ツ'=>'tsu',
   'ダ'=>'da','デ'=>'de','ヂ'=>'di','ド'=>'do','ヅ'=>'du',
   'ワ'=>'wa','ヲ'=>'wo',
-- 
GitLab