From 3161005d07beb46bb8a866ec56a768938571ec9d Mon Sep 17 00:00:00 2001
From: Andreas Gohr <andi@splitbrain.org>
Date: Mon, 12 Nov 2012 21:22:26 +0100
Subject: [PATCH] check for unicode preg capabilities in UTF-8 lib FS#2636

We now have two defines for checking for UTF-8 and Unicode property
support in PREG and use them to work around FS#2636 on older systems.
---
 inc/fulltext.php  | 15 +++++++++++----
 inc/infoutils.php |  4 ++--
 inc/utf8.php      | 19 +++++++++++++++++++
 3 files changed, 32 insertions(+), 6 deletions(-)

diff --git a/inc/fulltext.php b/inc/fulltext.php
index eab8850dc..7ee386063 100644
--- a/inc/fulltext.php
+++ b/inc/fulltext.php
@@ -394,10 +394,17 @@ function ft_snippet_re_preprocess($term) {
         return $term;
     }
 
-    // unicode word boundaries
-    // see http://stackoverflow.com/a/2449017/172068
-    $BL = '(?<!\pL)';
-    $BR = '(?!\pL)';
+    if (UTF8_PROPERTYSUPPORT) {
+        // unicode word boundaries
+        // see http://stackoverflow.com/a/2449017/172068
+        $BL = '(?<!\pL)';
+        $BR = '(?!\pL)';
+    } else {
+        // not as correct as above, but at least won't break
+        $BL = '\b';
+        $BR = '\b';
+    }
+
 
     if(substr($term,0,2) == '\\*'){
         $term = substr($term,2);
diff --git a/inc/infoutils.php b/inc/infoutils.php
index 0dc7092ad..a9c33acfd 100644
--- a/inc/infoutils.php
+++ b/inc/infoutils.php
@@ -176,10 +176,10 @@ function check(){
         msg('mb_string extension not available - PHP only replacements will be used',0);
     }
 
-    if (!preg_match("/^.$/u", "ñ")) {
+    if (!UTF8_PREGSUPPORT) {
         msg('PHP is missing UTF-8 support in Perl-Compatible Regular Expressions (PCRE)', -1);
     }
-    if (!preg_match("/^\pL$/u", "ñ")) {
+    if (!UTF8_PROPERTYSUPPORT) {
         msg('PHP is missing Unicode properties support in Perl-Compatible Regular Expressions (PCRE)', -1);
     }
 
diff --git a/inc/utf8.php b/inc/utf8.php
index 6fab8502c..c944667f7 100644
--- a/inc/utf8.php
+++ b/inc/utf8.php
@@ -17,6 +17,25 @@ if(!defined('UTF8_MBSTRING')){
     }
 }
 
+/**
+ * Check if PREG was compiled with UTF-8 support
+ *
+ * Without this many of the functions below will not work, so this is a minimal requirement
+ */
+if(!defined('UTF8_PREGSUPPORT')){
+    define('UTF8_PREGSUPPORT', (bool) @preg_match('/^.$/u', 'ñ'));
+}
+
+/**
+ * Check if PREG was compiled with Unicode Property support
+ *
+ * This is not required for the functions below, but might be needed in a UTF-8 aware application
+ */
+if(!defined('UTF8_PROPERTYSUPPORT')){
+    define('UTF8_PROPERTYSUPPORT', (bool) @preg_match('/^\pL$/u', 'ñ'));
+}
+
+
 if(UTF8_MBSTRING){ mb_internal_encoding('UTF-8'); }
 
 if(!function_exists('utf8_isASCII')){
-- 
GitLab