From 10f09f2a66400f77b4696f973c4c526424e44bc1 Mon Sep 17 00:00:00 2001
From: Andreas Gohr <andi@splitbrain.org>
Date: Mon, 3 Apr 2006 21:25:37 +0200
Subject: [PATCH] better utf8_substr function

darcs-hash:20060403192537-7ad00-72b129ce494066bce491821a0396db7576873ec2.gz
---
 inc/utf8.php | 66 ++++++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 54 insertions(+), 12 deletions(-)

diff --git a/inc/utf8.php b/inc/utf8.php
index 9ef17936c..18b09c29f 100644
--- a/inc/utf8.php
+++ b/inc/utf8.php
@@ -109,22 +109,64 @@ function utf8_strlen($string){
 }
 
 /**
- * Unicode aware replacement for substr()
+ * UTF-8 aware alternative to substr
  *
- * @author lmak at NOSPAM dot iti dot gr
- * @link   http://www.php.net/manual/en/function.substr.php
- * @see    substr()
+ * Return part of a string given character offset (and optionally length)
+ * Note: supports use of negative offsets and lengths but will be slower
+ * when doing so
+ *
+ * @author Harry Fuecks <hfuecks@gmail.com>
+ * @param string
+ * @param integer number of UTF-8 characters offset (from left)
+ * @param integer (optional) length in UTF-8 characters from offset
+ * @return mixed string or FALSE if failure
  */
-function utf8_substr($str,$start,$length=null){
-   preg_match_all("/./u", $str, $ar);
-
-   if($length != null) {
-       return join("",array_slice($ar[0],$start,$length));
-   } else {
-       return join("",array_slice($ar[0],$start));
-   }
+function utf8_substr($str, $offset, $length = null) {
+    if(!defined('UTF8_NOMBSTRING') && function_exists('mb_substr')){
+        if( $length === null ){
+            mb_substr($str, $offset);
+        }else{
+            mb_substr($str, $offset, $length);
+        }
+    }
+
+    if ( $offset >= 0 && $length >= 0 ) {
+        if ( $length === null ) {
+            $length = '*';
+        } else {
+            $strlen = strlen(utf8_decode($str));
+            if ( $offset > $strlen ) {
+                return '';
+            }
+
+            if ( ( $offset + $length ) > $strlen ) {
+               $length = '*';
+            } else {
+                $length = '{'.$length.'}';
+            }
+        }
+
+        $pattern = '/^.{'.$offset.'}(.'.$length.')/us';
+        preg_match($pattern, $str, $matches);
+
+        if ( isset($matches[1]) ) {
+            return $matches[1];
+        }
+        return false;
+
+    } else {
+        // Handle negatives using different, slower technique
+        // From: http://www.php.net/manual/en/function.substr.php#44838
+        preg_match_all('/./u', $str, $ar);
+        if( $length !== null ) {
+            return join('',array_slice($ar[0],$offset,$length));
+        } else {
+            return join('',array_slice($ar[0],$offset));
+        }
+    }
 }
 
+
 /**
  * Unicode aware replacement for substr_replace()
  *
-- 
GitLab