From 33815ce27ad57e922146632ece1f6d9464db0225 Mon Sep 17 00:00:00 2001
From: Chris Smith <chris.eureka@jalakai.co.uk>
Date: Sun, 7 Dec 2008 17:11:29 +0100
Subject: [PATCH] Change search index min. token length to a define
 (IDX_MINWORDLENGTH)

Currently the min. token length is 3 (note, this doesn't apply to numeric tokens).
The value set in inc/indexer.php can be overridden by defining IDX_MINWORDLENGTH
elsewhere (e.g. conf/local.protected.php).

darcs-hash:20081207161129-f07c6-6432947fe5d74666409d1e00222eaa489374c32f.gz
---
 inc/indexer.php | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/inc/indexer.php b/inc/indexer.php
index ff2d332dc..490ba1393 100644
--- a/inc/indexer.php
+++ b/inc/indexer.php
@@ -12,6 +12,9 @@
   require_once(DOKU_INC.'inc/utf8.php');
   require_once(DOKU_INC.'inc/parserutils.php');
 
+// set the minimum token length to use in the index (note, this doesn't apply to numeric tokens)
+if (!defined('IDX_MINWORDLENGTH')) define('IDX_MINWORDLENGTH',3);
+
 // Asian characters are handled as words. The following regexp defines the
 // Unicode-Ranges for Asian characters
 // Ranges taken from http://en.wikipedia.org/wiki/Unicode_block
@@ -472,7 +475,7 @@ function idx_getIndexWordsSorted($words,&$result){
             $wild |= 2;
             $wlen -= 1;
         }
-        if ($wlen < 3 && $wild == 0 && !is_numeric($xword)) continue;
+        if ($wlen < IDX_MINWORDLENGTH && $wild == 0 && !is_numeric($xword)) continue;
         if(!isset($tokens[$xword])){
             $tokenlength[$wlen][] = $xword;
         }
@@ -620,14 +623,14 @@ function idx_tokenizer($string,&$stopwords,$wc=false){
 
         $arr = explode(' ', utf8_stripspecials($string,' ','\._\-:'.$wc));
         foreach ($arr as $w) {
-            if (!is_numeric($w) && strlen($w) < 3) continue;
+            if (!is_numeric($w) && strlen($w) < IDX_MINWORDLENGTH) continue;
             $w = utf8_strtolower($w);
             if($stopwords && is_int(array_search("$w\n",$stopwords))) continue;
             $words[] = $w;
         }
     }else{
         $w = $string;
-        if (!is_numeric($w) && strlen($w) < 3) return $words;
+        if (!is_numeric($w) && strlen($w) < IDX_MINWORDLENGTH) return $words;
         $w = strtolower($w);
         if(is_int(array_search("$w\n",$stopwords))) return $words;
         $words[] = $w;
-- 
GitLab