From 33815ce27ad57e922146632ece1f6d9464db0225 Mon Sep 17 00:00:00 2001 From: Chris Smith <chris.eureka@jalakai.co.uk> Date: Sun, 7 Dec 2008 17:11:29 +0100 Subject: [PATCH] Change search index min. token length to a define (IDX_MINWORDLENGTH) Currently the min. token length is 3 (note, this doesn't apply to numeric tokens). The value set in inc/indexer.php can be overridden by defining IDX_MINWORDLENGTH elsewhere (e.g. conf/local.protected.php). darcs-hash:20081207161129-f07c6-6432947fe5d74666409d1e00222eaa489374c32f.gz --- inc/indexer.php | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/inc/indexer.php b/inc/indexer.php index ff2d332dc..490ba1393 100644 --- a/inc/indexer.php +++ b/inc/indexer.php @@ -12,6 +12,9 @@ require_once(DOKU_INC.'inc/utf8.php'); require_once(DOKU_INC.'inc/parserutils.php'); +// set the minimum token length to use in the index (note, this doesn't apply to numeric tokens) +if (!defined('IDX_MINWORDLENGTH')) define('IDX_MINWORDLENGTH',3); + // Asian characters are handled as words. The following regexp defines the // Unicode-Ranges for Asian characters // Ranges taken from http://en.wikipedia.org/wiki/Unicode_block @@ -472,7 +475,7 @@ function idx_getIndexWordsSorted($words,&$result){ $wild |= 2; $wlen -= 1; } - if ($wlen < 3 && $wild == 0 && !is_numeric($xword)) continue; + if ($wlen < IDX_MINWORDLENGTH && $wild == 0 && !is_numeric($xword)) continue; if(!isset($tokens[$xword])){ $tokenlength[$wlen][] = $xword; } @@ -620,14 +623,14 @@ function idx_tokenizer($string,&$stopwords,$wc=false){ $arr = explode(' ', utf8_stripspecials($string,' ','\._\-:'.$wc)); foreach ($arr as $w) { - if (!is_numeric($w) && strlen($w) < 3) continue; + if (!is_numeric($w) && strlen($w) < IDX_MINWORDLENGTH) continue; $w = utf8_strtolower($w); if($stopwords && is_int(array_search("$w\n",$stopwords))) continue; $words[] = $w; } }else{ $w = $string; - if (!is_numeric($w) && strlen($w) < 3) return $words; + if (!is_numeric($w) && strlen($w) < IDX_MINWORDLENGTH) return $words; $w = strtolower($w); if(is_int(array_search("$w\n",$stopwords))) return $words; $words[] = $w; -- GitLab