From 229529655f061863ec76db9ea557fef8b1a5161b Mon Sep 17 00:00:00 2001 From: YoBoY <yoboy.leguesh@gmail.com> Date: Tue, 23 Mar 2010 22:50:41 +0100 Subject: [PATCH] Limiting use of readdir in the idx_indexLengths function (v2). Each searches on the wiki use this function. Scanning the index directory eachtime is time consuming with a constant series of disk access. Switching a normal search to use file_exists 1 or more times, and not readdir all the directory. Switching a wildcard search to use a lengths.idx file containing all the word lengths used in the wiki, file generated if a new configuration parameter $conf[readdircache] is not 0 and fixed to a time in second. Creation of a new function idx_listIndexLengths to do this part. --- .../inc/indexer_idx_indexlengths.test.php | 121 ++++++++++++++++++ conf/dokuwiki.php | 1 + inc/indexer.php | 85 +++++++++--- lib/plugins/config/lang/en/lang.php | 1 + .../config/settings/config.metadata.php | 1 + 5 files changed, 189 insertions(+), 20 deletions(-) create mode 100644 _test/cases/inc/indexer_idx_indexlengths.test.php diff --git a/_test/cases/inc/indexer_idx_indexlengths.test.php b/_test/cases/inc/indexer_idx_indexlengths.test.php new file mode 100644 index 000000000..d1339a111 --- /dev/null +++ b/_test/cases/inc/indexer_idx_indexlengths.test.php @@ -0,0 +1,121 @@ +<?php + +require_once DOKU_INC.'inc/indexer.php'; + +class indexer_idx_indexlengths_test extends UnitTestCase { + + /** + * Test the function with an array of one value + */ + function test_oneWord(){ + global $conf; + $filter[8] = array('dokuwiki'); + // one word should return the index + $ref[] = 8; + sort($ref); + $result = idx_indexLengths(&$filter); + sort($result); + $this->assertIdentical($result, $ref); + } + + /** + * Test the function with an array of values + */ + function test_moreWords() { + global $conf; + $filter = array( 4 => array('test'), 8 => array('dokuwiki'), 7 => array('powered')); + // more words should return the indexes + $ref = array(4, 7, 8); + sort($ref); + $result = idx_indexLengths(&$filter); + sort($result); + $this->assertIdentical($result, $ref); + } + + /** + * Test a minimal value in case of wildcard search + */ + function test_minValue() { + global $conf; + $filter = 5; + // construction of the list of the index to compare + $dir = @opendir($conf['indexdir']); + $ref = array(); + while (($f = readdir($dir)) !== false) { + if (substr($f,0,1) == 'i' && substr($f,-4) == '.idx'){ + $i = substr($f,1,-4); + if (is_numeric($i) && $i >= $filter) + $ref[] = (int)$i; + } + } + closedir($dir); + sort($ref); + $result = idx_indexLengths(&$filter); + sort($result); + $this->assertIdentical($result, $ref); + } +} + +class indexer_idx_indexlengths_time extends UnitTestCase { + + /** + * Test the time improvments of the new function + * Time reference for 10000 call oneWords: 4,6s + * It's 90% faster + */ + function test_oneWord(){ + global $conf; + $filter[8] = array('dokuwiki'); + $start = microtime(true); + for ($i = 0; $i < 10000; $i++) { + $result = idx_indexLengths(&$filter); + } + $end = microtime(true); + $time = $end - $start; + $timeref = 4.6*0.10; // actual execution time of 4,6s for 10000 calls + echo "1) 10% ref : $timeref -> $time \n"; + $this->assertTrue($time < $timeref); + } + + /** + * Test the time improvments of the new function + * Time reference for 10000 call moreWords: 4,6s + * It's 90% faster + */ + function test_moreWords() { + global $conf; + $filter = array( 4 => array('test'), 8 => array('dokuwiki'), 7 => array('powered')); + // more words should return the indexes + $start = microtime(true); + for ($i = 0; $i < 10000; $i++) { + $result = idx_indexLengths(&$filter); + } + $end = microtime(true); + $time = $end - $start; + $timeref = 4.6*0.10; // actual execution time of 4,6s for 10000 calls + echo "2) 10% ref : $timeref -> $time \n"; + $this->assertTrue($time < $timeref); + } + + /** + * Test the time improvments of the new function + * Time reference for 10000 call on minValue: 4,9s + * Sould be at least 65% faster + * Test fail with no cache + */ + function test_minValue() { + global $conf; + $filter = 5; + $start = microtime(true); + for ($i = 0; $i < 10000; $i++) { + $result = idx_indexLengths(&$filter); + } + $end = microtime(true); + $time = $end - $start; + $timeref = 4.9 * 0.35; // actual execution time of 4,9s for 10000 calls + echo "3) 35% ref : $timeref -> $time \n"; + $this->assertTrue($time < $timeref); + } +} + +//Setup VIM: ex: et ts=4 enc=utf-8 : diff --git a/conf/dokuwiki.php b/conf/dokuwiki.php index 19397861c..d3823eb94 100644 --- a/conf/dokuwiki.php +++ b/conf/dokuwiki.php @@ -156,3 +156,4 @@ $conf['ftp']['user'] = 'user'; $conf['ftp']['pass'] = 'password'; $conf['ftp']['root'] = '/home/user/htdocs'; +$conf['readdircache'] = 0; //time cache in second for the readdir opération, 0 to deactivate. diff --git a/inc/indexer.php b/inc/indexer.php index 07b67c014..54277a88c 100644 --- a/inc/indexer.php +++ b/inc/indexer.php @@ -410,41 +410,86 @@ function idx_updateIndexLine($line,$pid,$count){ return join(':',$updated)."\n"; } +/** + * Get the list of lenghts indexed in the wiki + * + * Read the index directory or a cache file and returns + * a sorted array of lengths of the words used in the wiki. + * + * @author YoBoY <yoboy.leguesh@gmail.com> + */ +function idx_listIndexLengths() { + global $conf; + // testing what we have to do, create a cache file or not. + if ($conf['readdircache'] == 0) { + $docache = false; + } else { + clearstatcache(); + if (@file_exists($conf['indexdir'].'/lengths.idx') and (time() < @filemtime($conf['indexdir'].'/lengths.idx') + $conf['readdircache'])) { + if (($lengths = @file($conf['indexdir'].'/lengths.idx', FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES) ) !== false) { + $idx = array(); + foreach ( $lengths as $length) { + $idx[] = (int)$length; + } + return $idx; + } + } + $docache = true; + } + + if ($conf['readdircache'] == 0 or $docache ) { + $dir = @opendir($conf['indexdir']); + if($dir===false) + return array(); + $idx[] = array(); + while (($f = readdir($dir)) !== false) { + if (substr($f,0,1) == 'i' && substr($f,-4) == '.idx'){ + $i = substr($f,1,-4); + if (is_numeric($i)) + $idx[] = (int)$i; + } + } + closedir($dir); + sort($idx); + // we save this in a file. + if ($docache === true) { + $handle = @fopen($conf['indexdir'].'/lengths.idx','w'); + @fwrite($handle, implode("\n",$idx)); + @fclose($handle); + } + return $idx; + } + + return array(); +} + /** * Get the word lengths that have been indexed. * * Reads the index directory and returns an array of lengths * that there are indices for. * - * @author Tom N Harris <tnharris@whoopdedo.org> + * @author YoBoY <yoboy.leguesh@gmail.com> */ function idx_indexLengths(&$filter){ global $conf; - $dir = @opendir($conf['indexdir']); - if($dir===false) - return array(); $idx = array(); - if(is_array($filter)){ - while (($f = readdir($dir)) !== false) { - if (substr($f,0,1) == 'i' && substr($f,-4) == '.idx'){ - $i = substr($f,1,-4); - if (is_numeric($i) && isset($filter[(int)$i])) - $idx[] = (int)$i; + if (is_array($filter)){ + // testing if index files exists only + foreach ($filter as $key => $value) { + if (@file_exists($conf['indexdir']."/i$key.idx")) { + $idx[] = $key; } } - }else{ - // Exact match first. - if(@file_exists($conf['indexdir']."/i$filter.idx")) - $idx[] = $filter; - while (($f = readdir($dir)) !== false) { - if (substr($f,0,1) == 'i' && substr($f,-4) == '.idx'){ - $i = substr($f,1,-4); - if (is_numeric($i) && $i > $filter) - $idx[] = (int)$i; + } else { + $lengths = idx_listIndexLengths(); + foreach ( $lengths as $key => $length) { + // we keep all the values equal or superior + if ((int)$length >= (int)$filter) { + $idx[] = $length; } } } - closedir($dir); return $idx; } diff --git a/lib/plugins/config/lang/en/lang.php b/lib/plugins/config/lang/en/lang.php index 7198ca36d..2bcd17c12 100644 --- a/lib/plugins/config/lang/en/lang.php +++ b/lib/plugins/config/lang/en/lang.php @@ -234,3 +234,4 @@ $lang['useheading_o_navigation'] = 'Navigation Only'; $lang['useheading_o_content'] = 'Wiki Content Only'; $lang['useheading_o_1'] = 'Always'; +$lang['readdircache'] = 'Maximum age for readdir cache (sec)'; diff --git a/lib/plugins/config/settings/config.metadata.php b/lib/plugins/config/settings/config.metadata.php index 2f91b0ad8..cea191f56 100644 --- a/lib/plugins/config/settings/config.metadata.php +++ b/lib/plugins/config/settings/config.metadata.php @@ -188,6 +188,7 @@ $meta['rss_show_summary'] = array('onoff'); $meta['broken_iua'] = array('onoff'); $meta['xsendfile'] = array('multichoice','_choices' => array(0,1,2,3)); $meta['renderer_xhtml'] = array('renderer','_format' => 'xhtml','_choices' => array('xhtml')); +$meta['readdircache'] = array('numeric'); $meta['_network'] = array('fieldset'); $meta['proxy____host'] = array('string','_pattern' => '#^(|[a-z0-9\-\.+]+)$#i'); -- GitLab