From d5a2a500a561c1264c42fd7113cbd20426ed8dad Mon Sep 17 00:00:00 2001 From: andi <andi@splitbrain.org> Date: Sun, 23 Jan 2005 17:51:00 +0100 Subject: [PATCH] utf8 compatible fulltext search darcs-hash:20050123165100-9977f-697d62ef2daa33eaf167e59754bb6f9444a06135.gz --- inc/html.php | 4 ++-- inc/search.php | 10 ++++++---- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/inc/html.php b/inc/html.php index 16269d0a8..cd8651dde 100644 --- a/inc/html.php +++ b/inc/html.php @@ -536,7 +536,7 @@ function html_search(){ //do quick pagesearch $data = array(); - search($data,$conf['datadir'],'search_pagename',array(query => $QUERY)); + search($data,$conf['datadir'],'search_pagename',array(query => cleanID($QUERY))); if(count($data)){ sort($data); print '<div class="search_quickresult">'; @@ -554,7 +554,7 @@ function html_search(){ //do fulltext search $data = array(); - search($data,$conf['datadir'],'search_fulltext',array(query => $QUERY)); + search($data,$conf['datadir'],'search_fulltext',array(query => utf8_strtolower($QUERY))); if(count($data)){ usort($data,'sort_search_fulltext'); foreach($data as $row){ diff --git a/inc/search.php b/inc/search.php index ebc7eda9d..725cd22c9 100644 --- a/inc/search.php +++ b/inc/search.php @@ -269,6 +269,8 @@ function search_fulltext(&$data,$base,$file,$type,$lvl,$opts){ //get text $text = io_readfile($base.'/'.$file); + //lowercase text (u modifier does not help with case) + $lctext = utf8_strtolower($text); //create regexp from queries $qpreg = preg_split('/\s+/',preg_quote($opts['query'],'#')); @@ -276,17 +278,17 @@ function search_fulltext(&$data,$base,$file,$type,$lvl,$opts){ //do the fulltext search $matches = array(); - if($cnt = preg_match_all('#'.$qpreg.'#si',$text,$matches)){ + if($cnt = preg_match_all('#'.$qpreg.'#usi',$lctext,$matches)){ //this is not the best way for snippet generation but the fastest I could find //split query and only use the first token $q = preg_split('/\s+/',$opts['query'],2); $q = $q[0]; - $p = strpos(strtolower($text),$q); + $p = utf8_strpos($lctext,$q); $f = $p - 100; - $l = strlen($q) + 200; + $l = utf8_strlen($q) + 200; if($f < 0) $f = 0; $snippet = '<span class="search_sep"> ... </span>'. - htmlspecialchars(substr($text,$f,$l)). + htmlspecialchars(utf8_substr($text,$f,$l)). '<span class="search_sep"> ... </span>'; $snippet = preg_replace('#'.$qpreg.'#si','<span class="search_hit">\\1</span>',$snippet); -- GitLab