From 84e581a6fc319435b4b46835b63cb4dccf05195b Mon Sep 17 00:00:00 2001 From: Andreas Gohr <andi@splitbrain.org> Date: Sat, 28 Jul 2012 16:57:15 +0200 Subject: [PATCH] fix word boundary matching on broken platforms FS#2440 Seems like matching \b on unicode strings is unreliable across different platforms (Debian). Using Unicode class lookahed/behinds seems to work though. --- inc/fulltext.php | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/inc/fulltext.php b/inc/fulltext.php index 8f4db111d..eab8850dc 100644 --- a/inc/fulltext.php +++ b/inc/fulltext.php @@ -394,19 +394,24 @@ function ft_snippet_re_preprocess($term) { return $term; } + // unicode word boundaries + // see http://stackoverflow.com/a/2449017/172068 + $BL = '(?<!\pL)'; + $BR = '(?!\pL)'; + if(substr($term,0,2) == '\\*'){ $term = substr($term,2); }else{ - $term = '\b'.$term; + $term = $BL.$term; } if(substr($term,-2,2) == '\\*'){ $term = substr($term,0,-2); }else{ - $term = $term.'\b'; + $term = $term.$BR; } - if($term == '\b' || $term == '\b\b') $term = ''; + if($term == $BL || $term == $BR || $term == $BL.$BR) $term = ''; return $term; } -- GitLab