From 4efb9a42baed214da00a12a58c72586499dafc95 Mon Sep 17 00:00:00 2001
From: Andreas Gohr <andi@splitbrain.org>
Date: Sun, 18 Jun 2006 15:45:15 +0200
Subject: [PATCH] fixed stupid bug in search query parser

darcs-hash:20060618134515-7ad00-3097e310ccdaf793b5da3bd49a54723fea7ec260.gz
---
 _test/cases/inc/utf8_stripspecials.test.php | 27 +++++++++++++++++++++
 inc/indexer.php                             |  4 +--
 2 files changed, 29 insertions(+), 2 deletions(-)
 create mode 100644 _test/cases/inc/utf8_stripspecials.test.php

diff --git a/_test/cases/inc/utf8_stripspecials.test.php b/_test/cases/inc/utf8_stripspecials.test.php
new file mode 100644
index 000000000..a42507a3c
--- /dev/null
+++ b/_test/cases/inc/utf8_stripspecials.test.php
@@ -0,0 +1,27 @@
+<?php
+// use no mbstring help here
+if(!defined('UTF8_NOMBSTRING')) define('UTF8_NOMBSTRING',1);
+require_once DOKU_INC.'inc/utf8.php';
+
+class utf8_stripspecials extends UnitTestCase {
+
+
+    function test1(){
+        // we test multiple cases here - format: string, repl, additional, test
+        $tests   = array();
+        $tests[] = array('asciistring','','','asciistring');
+        $tests[] = array('asciistring','','\._\-:','asciistring');
+        $tests[] = array('ascii.string','','\._\-:','asciistring');
+        $tests[] = array('ascii.string',' ','\._\-:','ascii string');
+        $tests[] = array('2.1.14',' ','\._\-:','2 1 14');
+        $tests[] = array('ascii.string','','\._\-:\*','asciistring');
+        $tests[] = array('ascii.string',' ','\._\-:\*','ascii string');
+        $tests[] = array('2.1.14',' ','\._\-:\*','2 1 14');
+
+        foreach($tests as $test){
+            $this->assertEqual(utf8_stripspecials($test[0],$test[1],$test[2]),$test[3]);
+        }
+    }
+
+}
+//Setup VIM: ex: et ts=4 enc=utf-8 :
diff --git a/inc/indexer.php b/inc/indexer.php
index 789a98d91..4a5d0b4e2 100644
--- a/inc/indexer.php
+++ b/inc/indexer.php
@@ -357,14 +357,14 @@ function idx_parseIndexLine(&$page_idx,$line){
  */
 function idx_tokenizer($string,&$stopwords,$wc=false){
     $words = array();
-    if(!$wc) $wc = '\*';
+    $wc = ($wc) ? '' : $wc = '\*';
 
     if(preg_match('/[^0-9A-Za-z]/u', $string)){
         // handle asian chars as single words (may fail on older PHP version)
         $asia = @preg_replace('/('.IDX_ASIAN.')/u','\1 ',$string);
         if(!is_null($asia)) $string = $asia; //recover from regexp failure
 
-        $arr = explode(' ', utf8_stripspecials($string,' ','._\-:'.$wc));
+        $arr = explode(' ', utf8_stripspecials($string,' ','\._\-:'.$wc));
         foreach ($arr as $w) {
             if (!is_numeric($w) && strlen($w) < 3) continue;
             $w = utf8_strtolower($w);
-- 
GitLab