diff --git a/_test/cases/inc/utf8_correctidx.test.php b/_test/cases/inc/utf8_correctidx.test.php index 1e7abf04a7cff87dbab4304fb30f5a91abe750eb..d95ce9ae08c324925d3bf22b74bfbb8daa1640e1 100644 --- a/_test/cases/inc/utf8_correctidx.test.php +++ b/_test/cases/inc/utf8_correctidx.test.php @@ -6,15 +6,68 @@ require_once DOKU_INC.'inc/utf8.php'; class utf8_correctidx_test extends UnitTestCase { - function test1(){ + function test_singlebyte(){ // we test multiple cases here - format: in, offset, length, out $tests = array(); - $tests[] = array('живπά우리をã‚öä',1,false,0); - $tests[] = array('живπά우리をã‚öä',2,false,2); - $tests[] = array('живπά우리をã‚öä',1,true,2); - $tests[] = array('живπά우리をã‚öä',0,false,0); - $tests[] = array('живπά우리をã‚öä',2,true,2); + // single byte, should return current index + $tests[] = array('aaживπά우리をã‚öä',0,false,0); + $tests[] = array('aaживπά우리をã‚öä',1,false,1); + $tests[] = array('aaживπά우리をã‚öä',1,true,1); + + foreach($tests as $test){ + $this->assertEqual(utf8_correctIdx($test[0],$test[1],$test[2]),$test[3]); + } + } + + function test_twobyte(){ + // we test multiple cases here - format: in, offset, length, out + $tests = array(); + + // two byte, should move to boundary, expect even number + $tests[] = array('aaживπά우리をã‚öä',2,false,2); + $tests[] = array('aaживπά우리をã‚öä',3,false,2); + $tests[] = array('aaживπά우리をã‚öä',4,false,4); + + $tests[] = array('aaживπά우리をã‚öä',2,true,2); + $tests[] = array('aaживπά우리をã‚öä',3,true,4); + $tests[] = array('aaживπά우리をã‚öä',4,true,4); + + foreach($tests as $test){ + $this->assertEqual(utf8_correctIdx($test[0],$test[1],$test[2]),$test[3]); + } + } + + function test_threebyte(){ + // we test multiple cases here - format: in, offset, length, out + $tests = array(); + + // three byte, should move to boundary 10 or 13 + $tests[] = array('aaживπά우리をã‚öä',10,false,10); + $tests[] = array('aaживπά우리をã‚öä',11,false,10); + $tests[] = array('aaживπά우리をã‚öä',12,false,10); + $tests[] = array('aaживπά우리をã‚öä',13,false,13); + + $tests[] = array('aaживπά우리をã‚öä',10,true,10); + $tests[] = array('aaживπά우리をã‚öä',11,true,13); + $tests[] = array('aaживπά우리をã‚öä',12,true,13); + $tests[] = array('aaживπά우리をã‚öä',13,true,13); + + foreach($tests as $test){ + $this->assertEqual(utf8_correctIdx($test[0],$test[1],$test[2]),$test[3]); + } + } + + function test_bounds(){ + // we test multiple cases here - format: in, offset, length, out + $tests = array(); + + // bounds checking + $tests[] = array('aaживπά우리をã‚öä',-2,false,0); + $tests[] = array('aaживπά우리をã‚öä',128,false,29); + + $tests[] = array('aaживπά우리をã‚öä',-2,true,0); + $tests[] = array('aaживπά우리をã‚öä',128,true,29); foreach($tests as $test){ $this->assertEqual(utf8_correctIdx($test[0],$test[1],$test[2]),$test[3]); diff --git a/inc/utf8.php b/inc/utf8.php index 0323bed4b47522a839c7b58fe7015cad3c9552c4..ef056bfa4f9872d3b9d622f4fac30e9f473db2a9 100644 --- a/inc/utf8.php +++ b/inc/utf8.php @@ -776,15 +776,19 @@ function utf8_bad_replace($str, $replace = '') { * @author chris smith <chris@jalakai.co.uk> */ function utf8_correctIdx(&$str,$i,$next=false) { - + + if ($i <= 0) return 0; + + $limit = strlen($str); + if ($i>=$limit) return $limit; + if ($next) { - $limit = strlen($str); - while (($i<$limit) && ((ord($str[$i]) & 0xC0) == 0x80)) $i++; - } else { - while ($i && ((ord($str[$i]) & 0xC0) == 0x80)) $i--; - } - - return $i; + while (($i<$limit) && ((ord($str[$i]) & 0xC0) == 0x80)) $i++; + } else { + while ($i && ((ord($str[$i]) & 0xC0) == 0x80)) $i--; + } + + return $i; } // only needed if no mb_string available