From 3161005d07beb46bb8a866ec56a768938571ec9d Mon Sep 17 00:00:00 2001 From: Andreas Gohr <andi@splitbrain.org> Date: Mon, 12 Nov 2012 21:22:26 +0100 Subject: [PATCH] check for unicode preg capabilities in UTF-8 lib FS#2636 We now have two defines for checking for UTF-8 and Unicode property support in PREG and use them to work around FS#2636 on older systems. --- inc/fulltext.php | 15 +++++++++++---- inc/infoutils.php | 4 ++-- inc/utf8.php | 19 +++++++++++++++++++ 3 files changed, 32 insertions(+), 6 deletions(-) diff --git a/inc/fulltext.php b/inc/fulltext.php index eab8850dc..7ee386063 100644 --- a/inc/fulltext.php +++ b/inc/fulltext.php @@ -394,10 +394,17 @@ function ft_snippet_re_preprocess($term) { return $term; } - // unicode word boundaries - // see http://stackoverflow.com/a/2449017/172068 - $BL = '(?<!\pL)'; - $BR = '(?!\pL)'; + if (UTF8_PROPERTYSUPPORT) { + // unicode word boundaries + // see http://stackoverflow.com/a/2449017/172068 + $BL = '(?<!\pL)'; + $BR = '(?!\pL)'; + } else { + // not as correct as above, but at least won't break + $BL = '\b'; + $BR = '\b'; + } + if(substr($term,0,2) == '\\*'){ $term = substr($term,2); diff --git a/inc/infoutils.php b/inc/infoutils.php index 0dc7092ad..a9c33acfd 100644 --- a/inc/infoutils.php +++ b/inc/infoutils.php @@ -176,10 +176,10 @@ function check(){ msg('mb_string extension not available - PHP only replacements will be used',0); } - if (!preg_match("/^.$/u", "ñ")) { + if (!UTF8_PREGSUPPORT) { msg('PHP is missing UTF-8 support in Perl-Compatible Regular Expressions (PCRE)', -1); } - if (!preg_match("/^\pL$/u", "ñ")) { + if (!UTF8_PROPERTYSUPPORT) { msg('PHP is missing Unicode properties support in Perl-Compatible Regular Expressions (PCRE)', -1); } diff --git a/inc/utf8.php b/inc/utf8.php index 6fab8502c..c944667f7 100644 --- a/inc/utf8.php +++ b/inc/utf8.php @@ -17,6 +17,25 @@ if(!defined('UTF8_MBSTRING')){ } } +/** + * Check if PREG was compiled with UTF-8 support + * + * Without this many of the functions below will not work, so this is a minimal requirement + */ +if(!defined('UTF8_PREGSUPPORT')){ + define('UTF8_PREGSUPPORT', (bool) @preg_match('/^.$/u', 'ñ')); +} + +/** + * Check if PREG was compiled with Unicode Property support + * + * This is not required for the functions below, but might be needed in a UTF-8 aware application + */ +if(!defined('UTF8_PROPERTYSUPPORT')){ + define('UTF8_PROPERTYSUPPORT', (bool) @preg_match('/^\pL$/u', 'ñ')); +} + + if(UTF8_MBSTRING){ mb_internal_encoding('UTF-8'); } if(!function_exists('utf8_isASCII')){ -- GitLab