From b8c040db1fdc0eee80963e57d95a15fd3813912d Mon Sep 17 00:00:00 2001
From: Tom N Harris <tnharris@whoopdedo.org>
Date: Wed, 23 Feb 2011 15:01:10 -0500
Subject: [PATCH] Add minimum length option to index histogram

---
 inc/indexer.php | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/inc/indexer.php b/inc/indexer.php
index 2e36b6ed7..6b21797af 100644
--- a/inc/indexer.php
+++ b/inc/indexer.php
@@ -707,11 +707,12 @@ class Doku_Indexer {
      *
      * @param int       $min    bottom frequency threshold
      * @param int       $max    upper frequency limit. No limit if $max<$min
+     * @param int       $length minimum length of words to count
      * @param string    $key    metadata key to list. Uses the fulltext index if not given
      * @return array            list of words as the keys and frequency as values
      * @author Tom N Harris <tnharris@whoopdedo.org>
      */
-    public function histogram($min=1, $max=0, $key=null) {
+    public function histogram($min=1, $max=0, $minlen=3, $key=null) {
         if ($min < 1)
             $min = 1;
         if ($max < $min)
@@ -723,7 +724,7 @@ class Doku_Indexer {
             $index = $this->_getIndex('title', '');
             $index = array_count_values($index);
             foreach ($index as $val => $cnt) {
-                if ($cnt >= $min && (!$max || $cnt <= $max))
+                if ($cnt >= $min && (!$max || $cnt <= $max) && strlen($val) >= $minlen)
                     $result[$val] = $cnt;
             }
         }
@@ -733,7 +734,7 @@ class Doku_Indexer {
             $val_idx = array();
             foreach ($index as $wid => $line) {
                 $freq = $this->_countTuples($line);
-                if ($freq >= $min && (!$max || $freq <= $max))
+                if ($freq >= $min && (!$max || $freq <= $max) && strlen($val) >= $minlen)
                     $val_idx[$wid] = $freq;
             }
             if (!empty($val_idx)) {
@@ -745,6 +746,7 @@ class Doku_Indexer {
         else {
             $lengths = idx_listIndexLengths();
             foreach ($lengths as $length) {
+                if ($length < $minlen) continue;
                 $index = $this->_getIndex('i', $length);
                 $words = null;
                 foreach ($index as $wid => $line) {
-- 
GitLab