From d5a2a500a561c1264c42fd7113cbd20426ed8dad Mon Sep 17 00:00:00 2001
From: andi <andi@splitbrain.org>
Date: Sun, 23 Jan 2005 17:51:00 +0100
Subject: [PATCH] utf8 compatible fulltext search

darcs-hash:20050123165100-9977f-697d62ef2daa33eaf167e59754bb6f9444a06135.gz
---
 inc/html.php   |  4 ++--
 inc/search.php | 10 ++++++----
 2 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/inc/html.php b/inc/html.php
index 16269d0a8..cd8651dde 100644
--- a/inc/html.php
+++ b/inc/html.php
@@ -536,7 +536,7 @@ function html_search(){
 
   //do quick pagesearch
   $data = array();
-  search($data,$conf['datadir'],'search_pagename',array(query => $QUERY));
+  search($data,$conf['datadir'],'search_pagename',array(query => cleanID($QUERY)));
   if(count($data)){
     sort($data);
     print '<div class="search_quickresult">';
@@ -554,7 +554,7 @@ function html_search(){
 
   //do fulltext search
   $data = array();
-  search($data,$conf['datadir'],'search_fulltext',array(query => $QUERY));
+  search($data,$conf['datadir'],'search_fulltext',array(query => utf8_strtolower($QUERY)));
   if(count($data)){
     usort($data,'sort_search_fulltext');
     foreach($data as $row){
diff --git a/inc/search.php b/inc/search.php
index ebc7eda9d..725cd22c9 100644
--- a/inc/search.php
+++ b/inc/search.php
@@ -269,6 +269,8 @@ function search_fulltext(&$data,$base,$file,$type,$lvl,$opts){
 
   //get text
   $text = io_readfile($base.'/'.$file);
+  //lowercase text (u modifier does not help with case)
+  $lctext = utf8_strtolower($text);
 
   //create regexp from queries  
   $qpreg = preg_split('/\s+/',preg_quote($opts['query'],'#'));
@@ -276,17 +278,17 @@ function search_fulltext(&$data,$base,$file,$type,$lvl,$opts){
 
   //do the fulltext search
   $matches = array();
-  if($cnt = preg_match_all('#'.$qpreg.'#si',$text,$matches)){
+  if($cnt = preg_match_all('#'.$qpreg.'#usi',$lctext,$matches)){
     //this is not the best way for snippet generation but the fastest I could find
     //split query and only use the first token
     $q = preg_split('/\s+/',$opts['query'],2);
     $q = $q[0];
-    $p = strpos(strtolower($text),$q);
+    $p = utf8_strpos($lctext,$q);
     $f = $p - 100;
-    $l = strlen($q) + 200;
+    $l = utf8_strlen($q) + 200;
     if($f < 0) $f = 0;
     $snippet = '<span class="search_sep"> ... </span>'.
-               htmlspecialchars(substr($text,$f,$l)).
+               htmlspecialchars(utf8_substr($text,$f,$l)).
                '<span class="search_sep"> ... </span>';
     $snippet = preg_replace('#'.$qpreg.'#si','<span class="search_hit">\\1</span>',$snippet);
 
-- 
GitLab