From f50163d1d772ac773f6d03ed049e4a0329674db9 Mon Sep 17 00:00:00 2001
From: chris <chris@jalakai.co.uk>
Date: Sun, 27 Aug 2006 17:32:54 +0200
Subject: [PATCH] utf8_correctIdx bounds checking and more unittests

darcs-hash:20060827153254-9b6ab-3c76fde7cb5534ca12628e9aa6e6d59d9bb02f45.gz
---
 _test/cases/inc/utf8_correctidx.test.php | 65 +++++++++++++++++++++---
 inc/utf8.php                             | 20 +++++---
 2 files changed, 71 insertions(+), 14 deletions(-)

diff --git a/_test/cases/inc/utf8_correctidx.test.php b/_test/cases/inc/utf8_correctidx.test.php
index 1e7abf04a..d95ce9ae0 100644
--- a/_test/cases/inc/utf8_correctidx.test.php
+++ b/_test/cases/inc/utf8_correctidx.test.php
@@ -6,15 +6,68 @@ require_once DOKU_INC.'inc/utf8.php';
 class utf8_correctidx_test extends UnitTestCase {
 
 
-    function test1(){
+    function test_singlebyte(){
         // we test multiple cases here - format: in, offset, length, out
         $tests   = array();
 
-        $tests[] = array('живπά우리をあöä',1,false,0);
-        $tests[] = array('живπά우리をあöä',2,false,2);
-        $tests[] = array('живπά우리をあöä',1,true,2);
-        $tests[] = array('живπά우리をあöä',0,false,0);
-        $tests[] = array('живπά우리をあöä',2,true,2);
+        // single byte, should return current index
+        $tests[] = array('aaживπά우리をあöä',0,false,0);
+        $tests[] = array('aaживπά우리をあöä',1,false,1);
+        $tests[] = array('aaживπά우리をあöä',1,true,1);
+
+        foreach($tests as $test){
+            $this->assertEqual(utf8_correctIdx($test[0],$test[1],$test[2]),$test[3]);
+        }
+    }
+
+    function test_twobyte(){
+        // we test multiple cases here - format: in, offset, length, out
+        $tests   = array();
+
+        // two byte, should move to boundary, expect even number
+        $tests[] = array('aaживπά우리をあöä',2,false,2);
+        $tests[] = array('aaживπά우리をあöä',3,false,2);
+        $tests[] = array('aaживπά우리をあöä',4,false,4);
+
+        $tests[] = array('aaживπά우리をあöä',2,true,2);
+        $tests[] = array('aaживπά우리をあöä',3,true,4);
+        $tests[] = array('aaживπά우리をあöä',4,true,4);
+
+        foreach($tests as $test){
+            $this->assertEqual(utf8_correctIdx($test[0],$test[1],$test[2]),$test[3]);
+        }
+    }
+
+    function test_threebyte(){
+        // we test multiple cases here - format: in, offset, length, out
+        $tests   = array();
+
+        // three byte, should move to boundary 10 or 13
+        $tests[] = array('aaживπά우리をあöä',10,false,10);
+        $tests[] = array('aaживπά우리をあöä',11,false,10);
+        $tests[] = array('aaживπά우리をあöä',12,false,10);
+        $tests[] = array('aaживπά우리をあöä',13,false,13);
+
+        $tests[] = array('aaживπά우리をあöä',10,true,10);
+        $tests[] = array('aaживπά우리をあöä',11,true,13);
+        $tests[] = array('aaживπά우리をあöä',12,true,13);
+        $tests[] = array('aaживπά우리をあöä',13,true,13);
+
+        foreach($tests as $test){
+            $this->assertEqual(utf8_correctIdx($test[0],$test[1],$test[2]),$test[3]);
+        }
+    }
+
+    function test_bounds(){
+        // we test multiple cases here - format: in, offset, length, out
+        $tests   = array();
+
+        // bounds checking
+        $tests[] = array('aaживπά우리をあöä',-2,false,0);
+        $tests[] = array('aaживπά우리をあöä',128,false,29);
+
+        $tests[] = array('aaживπά우리をあöä',-2,true,0);
+        $tests[] = array('aaживπά우리をあöä',128,true,29);
 
         foreach($tests as $test){
             $this->assertEqual(utf8_correctIdx($test[0],$test[1],$test[2]),$test[3]);
diff --git a/inc/utf8.php b/inc/utf8.php
index 0323bed4b..ef056bfa4 100644
--- a/inc/utf8.php
+++ b/inc/utf8.php
@@ -776,15 +776,19 @@ function utf8_bad_replace($str, $replace = '') {
  * @author       chris smith <chris@jalakai.co.uk>
  */
 function utf8_correctIdx(&$str,$i,$next=false) {
-	
+
+  if ($i <= 0) return 0;
+
+  $limit = strlen($str);
+  if ($i>=$limit) return $limit;
+
   if ($next) {
-	  $limit = strlen($str);
-	  while (($i<$limit) && ((ord($str[$i]) & 0xC0) == 0x80)) $i++;
-	} else {
-	  while ($i && ((ord($str[$i]) & 0xC0) == 0x80)) $i--;
-	}
-	
-	return $i;
+    while (($i<$limit) && ((ord($str[$i]) & 0xC0) == 0x80)) $i++;
+  } else {
+    while ($i && ((ord($str[$i]) & 0xC0) == 0x80)) $i--;
+  }
+
+  return $i;
 }
 
 // only needed if no mb_string available
-- 
GitLab