From 0e70946d23660c4814db46cd3f89cb8340e51336 Mon Sep 17 00:00:00 2001
From: chris <chris@jalakai.co.uk>
Date: Thu, 31 Aug 2006 11:21:46 +0200
Subject: [PATCH] add unittests for bug#891

darcs-hash:20060831092146-9b6ab-b00aa29c982ab18117f476b3d01d5111915c9d4b.gz
---
 _test/cases/inc/utf8_substr.test.php | 13 +++++++++++++
 inc/fulltext.php                     |  5 ++---
 2 files changed, 15 insertions(+), 3 deletions(-)

diff --git a/_test/cases/inc/utf8_substr.test.php b/_test/cases/inc/utf8_substr.test.php
index 92bc65fe3..5cb93034a 100644
--- a/_test/cases/inc/utf8_substr.test.php
+++ b/_test/cases/inc/utf8_substr.test.php
@@ -26,5 +26,18 @@ class utf8_substr_test extends UnitTestCase {
         }
     }
 
+    function test2_bug891() {
+        // we test multiple cases here - format: in, offset, length, out
+        $tests   = array();
+
+        $str = str_pad('',strlen('Ð²')*66000,'Ð²').'@@';
+        $tests[] = array($str, 65600, 1, 'Ð²');
+        $tests[] = array($str,0,66000,$str);
+
+        foreach($tests as $test){
+            $this->assertEqual(utf8_substr($test[0],$test[1],$test[2]),$test[3]);
+        }
+    }
+
 }
 //Setup VIM: ex: et ts=4 enc=utf-8 :
diff --git a/inc/fulltext.php b/inc/fulltext.php
index fa3ec05d2..280ba0c89 100644
--- a/inc/fulltext.php
+++ b/inc/fulltext.php
@@ -333,7 +333,6 @@ switch ($algorithm) {
       // establish context, 100 bytes surrounding the match string
       // first look to see if we can go 100 either side,
       // then drop to 50 adding any excess if the other side can't go to 50,
-      // NOTE: these are byte adjustments and will have to be corrected for utf-8
         $pre = min($utf8_idx-$utf8_offset,100);
         $post = min($len-$utf8_idx-$utf8_len,100);
 
@@ -371,9 +370,9 @@ switch ($algorithm) {
       } else {
         // code for strings too large for utf8_substr
         // use a larger context number as its bytes not characters
-        $pre = 70;
+        // no need to check for short pre, $idx is nearly 64k
         $post = min(strlen($text)-$idx-strlen($str), 70);
-        if ($post < 70) { $pre = 70 - $post; }
+        $pre = ($post < 70) ?  140 - $post : 70;
 
         $start = utf8_correctIdx($text,$idx - $pre);
         $end = utf8_correctIdx($text, $idx + strlen($str) + $post);
-- 
GitLab