From 84e581a6fc319435b4b46835b63cb4dccf05195b Mon Sep 17 00:00:00 2001
From: Andreas Gohr <andi@splitbrain.org>
Date: Sat, 28 Jul 2012 16:57:15 +0200
Subject: [PATCH] fix word boundary matching on broken platforms FS#2440

Seems like matching \b on unicode strings is unreliable across different
platforms (Debian). Using Unicode class lookahed/behinds seems to work
though.
---
 inc/fulltext.php | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/inc/fulltext.php b/inc/fulltext.php
index 8f4db111d..eab8850dc 100644
--- a/inc/fulltext.php
+++ b/inc/fulltext.php
@@ -394,19 +394,24 @@ function ft_snippet_re_preprocess($term) {
         return $term;
     }
 
+    // unicode word boundaries
+    // see http://stackoverflow.com/a/2449017/172068
+    $BL = '(?<!\pL)';
+    $BR = '(?!\pL)';
+
     if(substr($term,0,2) == '\\*'){
         $term = substr($term,2);
     }else{
-        $term = '\b'.$term;
+        $term = $BL.$term;
     }
 
     if(substr($term,-2,2) == '\\*'){
         $term = substr($term,0,-2);
     }else{
-        $term = $term.'\b';
+        $term = $term.$BR;
     }
 
-    if($term == '\b' || $term == '\b\b') $term = '';
+    if($term == $BL || $term == $BR || $term == $BL.$BR) $term = '';
     return $term;
 }
 
-- 
GitLab