diff --git a/_test/core/DokuWikiTest.php b/_test/core/DokuWikiTest.php index b9e151456961d75b40956e82cc5195dfa04e02da..91eb5293b8e94103b21bad5bd4c86429f9843ff7 100644 --- a/_test/core/DokuWikiTest.php +++ b/_test/core/DokuWikiTest.php @@ -30,6 +30,8 @@ abstract class DokuWikiTest extends PHPUnit_Framework_TestCase { // remove any leftovers from the last run if(is_dir(DOKU_TMP_DATA)){ + // clear indexer data and cache + idx_get_indexer()->clear(); TestUtils::rdelete(DOKU_TMP_DATA); } diff --git a/_test/tests/inc/indexer_pid.test.php b/_test/tests/inc/indexer_pid.test.php new file mode 100644 index 0000000000000000000000000000000000000000..8c58b1abd6b9e3a26d7b1ce4f018d02e90d97969 --- /dev/null +++ b/_test/tests/inc/indexer_pid.test.php @@ -0,0 +1,18 @@ +<?php +/** + * Tests the pid functions of the indexer. + * + * @author Michael Hamann <michael@content-space.de> + */ +class indexer_pid_test extends DokuWikiTest { + function test_pid() { + $indexer = idx_get_indexer(); + $syntaxPID = $indexer->getPID('wiki:syntax'); + $this->assertEquals('wiki:syntax', $indexer->getPageFromPID($syntaxPID), 'getPageFromPID(getPID(\'wiki:syntax\')) != \'wiki:syntax\''); + $dokuwikiPID = $indexer->getPID('wiki:dokuwiki'); + $this->assertEquals('wiki:syntax', $indexer->getPageFromPID($syntaxPID), 'getPageFromPID(getPID(\'wiki:syntax\')) != \'wiki:syntax\' after getting the PID for wiki:dokuwiki'); + $this->assertEquals($syntaxPID, $indexer->getPID('wiki:syntax'), 'getPID(\'wiki:syntax\') didn\'t returned different PIDs when called twice'); + $this->assertNotEquals($syntaxPID, $dokuwikiPID, 'Same PID returned for different pages'); + $this->assertTrue(is_numeric($syntaxPID) && is_numeric($dokuwikiPID), 'PIDs are not numeric'); + } +} diff --git a/bin/indexer.php b/bin/indexer.php index f6aeb4f0e3293eee3409d55a93fbd815ca365c3d..6f6b5d9fa53997d3d90656bbbc4dc5ef9de9b369 100755 --- a/bin/indexer.php +++ b/bin/indexer.php @@ -5,11 +5,6 @@ if ('cli' != php_sapi_name()) die(); ini_set('memory_limit','128M'); if(!defined('DOKU_INC')) define('DOKU_INC',realpath(dirname(__FILE__).'/../').'/'); require_once(DOKU_INC.'inc/init.php'); -require_once(DOKU_INC.'inc/common.php'); -require_once(DOKU_INC.'inc/pageutils.php'); -require_once(DOKU_INC.'inc/search.php'); -require_once(DOKU_INC.'inc/indexer.php'); -require_once(DOKU_INC.'inc/auth.php'); require_once(DOKU_INC.'inc/cliopts.php'); session_write_close(); @@ -67,10 +62,6 @@ function _usage() { function _update(){ global $conf; - global $INDEXER; - - $INDEXER = idx_get_indexer(); - $data = array(); _quietecho("Searching pages... "); search($data,$conf['datadir'],'search_allpages',array('skipacl' => true)); @@ -82,7 +73,6 @@ function _update(){ } function _index($id){ - global $INDEXER; global $CLEAR; global $QUIET; @@ -91,63 +81,13 @@ function _index($id){ _quietecho("done.\n"); } -/** - * lock the indexer system - */ -function _lock(){ - global $conf; - $lock = $conf['lockdir'].'/_indexer.lock'; - $said = false; - while(!@mkdir($lock, $conf['dmode'])){ - if(time()-@filemtime($lock) > 60*5){ - // looks like a stale lock - remove it - @rmdir($lock); - }else{ - if($said){ - _quietecho("."); - }else{ - _quietecho("Waiting for lockfile (max. 5 min)"); - $said = true; - } - sleep(15); - } - } - if($conf['dperm']) chmod($lock, $conf['dperm']); - if($said) _quietecho("\n"); -} - -/** - * unlock the indexer sytem - */ -function _unlock(){ - global $conf; - $lock = $conf['lockdir'].'/_indexer.lock'; - @rmdir($lock); -} - /** * Clear all index files */ function _clearindex(){ - global $conf; - _lock(); _quietecho("Clearing index... "); - io_saveFile($conf['indexdir'].'/page.idx',''); - io_saveFile($conf['indexdir'].'/title.idx',''); - io_saveFile($conf['indexdir'].'/pageword.idx',''); - io_saveFile($conf['indexdir'].'/metadata.idx',''); - $dir = @opendir($conf['indexdir']); - if($dir!==false){ - while(($f = readdir($dir)) !== false){ - if(substr($f,-4)=='.idx' && - (substr($f,0,1)=='i' || substr($f,0,1)=='w' - || substr($f,-6)=='_w.idx' || substr($f,-6)=='_i.idx' || substr($f,-6)=='_p.idx')) - @unlink($conf['indexdir']."/$f"); - } - } - @unlink($conf['indexdir'].'/lengths.idx'); + idx_get_indexer()->clear(); _quietecho("done.\n"); - _unlock(); } function _quietecho($msg) { diff --git a/inc/indexer.php b/inc/indexer.php index f22aee3a010046e5462253439f420fa1d59464ec..7a62345bf35ea7994a2f5350067c4cb37e3e30f5 100644 --- a/inc/indexer.php +++ b/inc/indexer.php @@ -102,6 +102,10 @@ function wordlen($w){ * @author Tom N Harris <tnharris@whoopdedo.org> */ class Doku_Indexer { + /** + * @var array $pidCache Cache for getPID() + */ + protected $pidCache = array(); /** * Adds the contents of a page to the fulltext index @@ -120,7 +124,7 @@ class Doku_Indexer { return "locked"; // load known documents - $pid = $this->addIndexKey('page', '', $page); + $pid = $this->getPIDNoLock($page); if ($pid === false) { $this->unlock(); return false; @@ -256,7 +260,7 @@ class Doku_Indexer { return "locked"; // load known documents - $pid = $this->addIndexKey('page', '', $page); + $pid = $this->getPIDNoLock($page); if ($pid === false) { $this->unlock(); return false; @@ -348,7 +352,7 @@ class Doku_Indexer { return "locked"; // load known documents - $pid = $this->addIndexKey('page', '', $page); + $pid = $this->getPIDNoLock($page); if ($pid === false) { $this->unlock(); return false; @@ -397,6 +401,38 @@ class Doku_Indexer { return true; } + /** + * Clear the whole index + * + * @return bool If the index has been cleared successfully + */ + public function clear() { + global $conf; + + if (!$this->lock()) return false; + + @unlink($conf['indexdir'].'/page.idx'); + @unlink($conf['indexdir'].'/title.idx'); + @unlink($conf['indexdir'].'/pageword.idx'); + @unlink($conf['indexdir'].'/metadata.idx'); + $dir = @opendir($conf['indexdir']); + if($dir!==false){ + while(($f = readdir($dir)) !== false){ + if(substr($f,-4)=='.idx' && + (substr($f,0,1)=='i' || substr($f,0,1)=='w' + || substr($f,-6)=='_w.idx' || substr($f,-6)=='_i.idx' || substr($f,-6)=='_p.idx')) + @unlink($conf['indexdir']."/$f"); + } + } + @unlink($conf['indexdir'].'/lengths.idx'); + + // clear the pid cache + $this->pidCache = array(); + + $this->unlock(); + return true; + } + /** * Split the text into words for fulltext search * @@ -453,6 +489,58 @@ class Doku_Indexer { return array_values($wordlist); } + /** + * Get the numeric PID of a page + * + * @param string $page The page to get the PID for + * @return bool|int The page id on success, false on error + */ + public function getPID($page) { + // return PID without locking when it is in the cache + if (isset($this->pidCache[$page])) return $this->pidCache[$page]; + + if (!$this->lock()) + return false; + + // load known documents + $pid = $this->getPIDNoLock($page); + if ($pid === false) { + $this->unlock(); + return false; + } + + $this->unlock(); + return $pid; + } + + /** + * Get the numeric PID of a page without locking the index. + * Only use this function when the index is already locked. + * + * @param string $page The page to get the PID for + * @return bool|int The page id on success, false on error + */ + protected function getPIDNoLock($page) { + // avoid expensive addIndexKey operation for the most recently requested pages by using a cache + if (isset($this->pidCache[$page])) return $this->pidCache[$page]; + $pid = $this->addIndexKey('page', '', $page); + // limit cache to 10 entries by discarding the oldest element as in DokuWiki usually only the most recently + // added item will be requested again + if (count($this->pidCache) > 10) array_shift($this->pidCache); + $this->pidCache[$page] = $pid; + return $pid; + } + + /** + * Get the page id of a numeric PID + * + * @param int $pid The PID to get the page id for + * @return string The page id + */ + public function getPageFromPID($pid) { + return $this->getIndexKey('page', '', $pid); + } + /** * Find pages in the fulltext index containing the words, * @@ -946,7 +1034,7 @@ class Doku_Indexer { * @param string $idx name of the index * @param string $suffix subpart identifier * @param string $value line to find in the index - * @return int line number of the value in the index + * @return int|bool line number of the value in the index or false if writing the index failed * @author Tom N Harris <tnharris@whoopdedo.org> */ protected function addIndexKey($idx, $suffix, $value) { @@ -1140,8 +1228,8 @@ class Doku_Indexer { * @author Tom N Harris <tnharris@whoopdedo.org> */ function idx_get_indexer() { - static $Indexer = null; - if (is_null($Indexer)) { + static $Indexer; + if (!isset($Indexer)) { $Indexer = new Doku_Indexer(); } return $Indexer; @@ -1223,6 +1311,12 @@ function idx_addPage($page, $verbose=false, $force=false) { return $result; } + $Indexer = idx_get_indexer(); + $pid = $Indexer->getPID($page); + if ($pid === false) { + if ($verbose) print("Indexer: getting the PID failed for $page".DOKU_LF); + return false; + } $body = ''; $metadata = array(); $metadata['title'] = p_get_metadata($page, 'title', METADATA_RENDER_UNLIMITED); @@ -1230,14 +1324,13 @@ function idx_addPage($page, $verbose=false, $force=false) { $metadata['relation_references'] = array_keys($references); else $metadata['relation_references'] = array(); - $data = compact('page', 'body', 'metadata'); + $data = compact('page', 'body', 'metadata', 'pid'); $evt = new Doku_Event('INDEXER_PAGE_ADD', $data); if ($evt->advise_before()) $data['body'] = $data['body'] . " " . rawWiki($page); $evt->advise_after(); unset($evt); extract($data); - $Indexer = idx_get_indexer(); $result = $Indexer->addPageWords($page, $body); if ($result === "locked") { if ($verbose) print("Indexer: locked".DOKU_LF);