From 65aa8490874917620bd9a9a773cd0fb7621afb41 Mon Sep 17 00:00:00 2001 From: Michael Hamann <michael@content-space.de> Date: Sun, 8 May 2011 11:44:12 +0200 Subject: [PATCH] Add more render/cache logic to the metadata code This adds a new rendering limit of currently 5 pages to the p_get_metadata function. This means that in one request not more than 3 pages will be parsed/rendered. Pages for which the cache can be used aren't counted. This should make the new cache modes safe to use and should provide backwards compatibility while keeping the advantage of rendering metadata on demand (i.e. imagine one included page out of 10 is updated, then the metadata for that page can be rendered, but when you request a purge of the cache not 10 pages are rendered). In this commit most of the changes to the p_get_first_heading function are reverted and the title index is no longer used. This makes the first heading functionality no longer depends on the search index of DokuWiki. Maybe it can be added again later when the indexer provides a proper API for getting metadata values for all or selected pages. The performance of the p_get_first_heading function should be almost back to the performance in Anteater as the simple cache of p_get_metadata is used and also the limit of p_get_metadata is of course applied. --- inc/indexer.php | 6 +-- inc/parserutils.php | 115 ++++++++++++++++++++------------------------ inc/template.php | 2 +- 3 files changed, 56 insertions(+), 67 deletions(-) diff --git a/inc/indexer.php b/inc/indexer.php index 64e172b5b..3b4796676 100644 --- a/inc/indexer.php +++ b/inc/indexer.php @@ -1197,7 +1197,7 @@ function idx_addPage($page, $verbose=false, $force=false) { @unlink($idxtag); return $result; } - $indexenabled = p_get_metadata($page, 'internal index', true); + $indexenabled = p_get_metadata($page, 'internal index', METADATA_RENDER_UNLIMITED); if ($indexenabled === false) { $result = false; if (@file_exists($idxtag)) { @@ -1215,8 +1215,8 @@ function idx_addPage($page, $verbose=false, $force=false) { $body = ''; $metadata = array(); - $metadata['title'] = p_get_metadata($page, 'title', true); - if (($references = p_get_metadata($page, 'relation references', true)) !== null) + $metadata['title'] = p_get_metadata($page, 'title', METADATA_RENDER_UNLIMITED); + if (($references = p_get_metadata($page, 'relation references', METADATA_RENDER_UNLIMITED)) !== null) $metadata['relation_references'] = array_keys($references); else $metadata['relation_references'] = array(); diff --git a/inc/parserutils.php b/inc/parserutils.php index d7451cee4..abba89b5a 100644 --- a/inc/parserutils.php +++ b/inc/parserutils.php @@ -10,20 +10,43 @@ if(!defined('DOKU_INC')) die('meh.'); /** - * For how many different pages shall the first heading be loaded from the - * metadata? When this limit is reached the title index is loaded and used for - * all following requests. + * How many pages shall be rendered for getting metadata during one request + * at maximum? Note that this limit isn't respected when METADATA_RENDER_UNLIMITED + * is passed as render parameter to p_get_metadata. */ -if (!defined('P_GET_FIRST_HEADING_METADATA_LIMIT')) define('P_GET_FIRST_HEADING_METADATA_LIMIT', 20); +if (!defined('P_GET_METADATA_RENDER_LIMIT')) define('P_GET_METADATA_RENDER_LIMIT', 5); /** Don't render metadata even if it is outdated or doesn't exist */ define('METADATA_DONT_RENDER', 0); -/** Render metadata when the page is really newer or the metadata doesn't exist. Uses just a simple check, - but should work pretty well for loading simple metadata values like the page title and avoids - rendering a lot of pages in one request. */ +/** + * Render metadata when the page is really newer or the metadata doesn't exist. + * Uses just a simple check, but should work pretty well for loading simple + * metadata values like the page title and avoids rendering a lot of pages in + * one request. The P_GET_METADATA_RENDER_LIMIT is used in this mode. + * Use this if it is unlikely that the metadata value you are requesting + * does depend e.g. on pages that are included in the current page using + * the include plugin (this is very likely the case for the page title, but + * not for relation references). + */ define('METADATA_RENDER_USING_SIMPLE_CACHE', 1); -/** Render metadata using the metadata cache logic. */ +/** + * Render metadata using the metadata cache logic. The P_GET_METADATA_RENDER_LIMIT + * is used in this mode. Use this mode when you are requesting more complex + * metadata. Although this will cause rendering more often it might actually have + * the effect that less current metadata is returned as it is more likely than in + * the simple cache mode that metadata needs to be rendered for all pages at once + * which means that when the metadata for the page is requested that actually needs + * to be updated the limit might have been reached already. + */ define('METADATA_RENDER_USING_CACHE', 2); +/** + * Render metadata without limiting the number of pages for which metadata is + * rendered. Use this mode with care, normally it should only be used in places + * like the indexer or in cli scripts where the execution time normally isn't + * limited. This can be combined with the simple cache using + * METADATA_RENDER_USING_CACHE | METADATA_RENDER_UNLIMITED. + */ +define('METADATA_RENDER_UNLIMITED', 4); /** * Returns the parsed Wikitext in XHTML for the given id and revision. @@ -239,8 +262,9 @@ function p_get_instructions($text){ * @param string $id The id of the page the metadata should be returned from * @param string $key The key of the metdata value that shall be read (by default everything) - separate hierarchies by " " like "date created" * @param int $render If the page should be rendererd - possible values: - * METADATA_DONT_RENDER, METADATA_RENDER_USING_SIMPLE_CACHE, METADATA_RENDER_USING_CACHE, default: - * METADATA_RENDER_USING_CACHE + * METADATA_DONT_RENDER, METADATA_RENDER_USING_SIMPLE_CACHE, METADATA_RENDER_USING_CACHE + * METADATA_RENDER_UNLIMITED (also combined with the previous two options), + * default: METADATA_RENDER_USING_CACHE * @return mixed The requested metadata fields * * @author Esther Brunner <esther@kaffeehaus.ch> @@ -248,6 +272,10 @@ function p_get_instructions($text){ */ function p_get_metadata($id, $key='', $render=METADATA_RENDER_USING_CACHE){ global $ID; + static $render_count = 0; + // track pages that have already been rendered in order to avoid rendering the same page + // again + static $rendered_pages = array(); // cache the current page // Benchmarking shows the current page's metadata is generally the only page metadata @@ -265,22 +293,26 @@ function p_get_metadata($id, $key='', $render=METADATA_RENDER_USING_CACHE){ // prevent recursive calls in the cache static $recursion = false; - if (!$recursion && $render != METADATA_DONT_RENDER && page_exists($id)){ + if (!$recursion && $render != METADATA_DONT_RENDER && !isset($rendered_pages[$id])&& page_exists($id)){ $recursion = true; $cachefile = new cache_renderer($id, wikiFN($id), 'metadata'); $do_render = false; - if ($render == METADATA_RENDER_USING_SIMPLE_CACHE) { - $pagefn = wikiFN($id); - $metafn = metaFN($id, '.meta'); - if (!@file_exists($metafn) || @filemtime($pagefn) > @filemtime($cachefile->cache)) { + if ($render & METADATA_RENDER_UNLIMITED || $render_count < P_GET_METADATA_RENDER_LIMIT) { + if ($render & METADATA_RENDER_USING_SIMPLE_CACHE) { + $pagefn = wikiFN($id); + $metafn = metaFN($id, '.meta'); + if (!@file_exists($metafn) || @filemtime($pagefn) > @filemtime($cachefile->cache)) { + $do_render = true; + } + } elseif (!$cachefile->useCache()){ $do_render = true; } - } elseif (!$cachefile->useCache()){ - $do_render = true; } if ($do_render) { + ++$render_count; + $rendered_pages[$id] = true; $old_meta = $meta; $meta = p_render_metadata($id, $meta); // only update the file when the metadata has been changed @@ -676,62 +708,19 @@ function & p_get_renderer($mode) { /** * Gets the first heading from a file * - * After P_GET_FIRST_HEADING_METADATA_LIMIT requests for different pages the title - * index will be loaded and used instead. Use METADATA_DONT_RENDER when you are - * requesting a lot of titles, METADATA_RENDER_USING_CACHE when you think - * rendering the page although it hasn't changed might be needed (or also - * want to influence rendering using events) and METADATA_RENDER_USING_SIMPLE_CACHE - * otherwise. Use METADATA_RENDER_USING_CACHE with care as it could cause - * parsing and rendering a lot of pages in one request. - * * @param string $id dokuwiki page id * @param int $render rerender if first heading not known * default: METADATA_RENDER_USING_SIMPLE_CACHE * Possible values: METADATA_DONT_RENDER, * METADATA_RENDER_USING_SIMPLE_CACHE, - * METADATA_RENDER_USING_CACHE + * METADATA_RENDER_USING_CACHE, + * METADATA_RENDER_UNLIMITED * * @author Andreas Gohr <andi@splitbrain.org> * @author Michael Hamann <michael@content-space.de> */ function p_get_first_heading($id, $render=METADATA_RENDER_USING_SIMPLE_CACHE){ - // counter how many titles have been requested using p_get_metadata - static $count = 1; - // the index of all titles, only loaded when many titles are requested - static $title_index = null; - // cache for titles requested using p_get_metadata - static $title_cache = array(); - - $id = cleanID($id); - - // check if this title has already been requested - if (isset($title_cache[$id])) - return $title_cache[$id]; - - // check if already too many titles have been requested and probably - // using the title index is better - if ($count > P_GET_FIRST_HEADING_METADATA_LIMIT) { - if (is_null($title_index)) { - $pages = array_map('rtrim', idx_getIndex('page', '')); - $titles = array_map('rtrim', idx_getIndex('title', '')); - // check for corrupt title index #FS2076 - if(count($pages) != count($titles)){ - $titles = array_fill(0,count($pages),''); - @unlink($conf['indexdir'].'/title.idx'); // will be rebuilt in inc/init.php - } else { - if (!empty($pages)) // array_combine throws a warning when the parameters are empty arrays - $title_index = array_combine($pages, $titles); - else - $title_index = array(); - } - } - if (!empty($title_index)) // don't use the index when it obviously isn't working - return $title_index[$id]; - } - - ++$count; - $title_cache[$id] = p_get_metadata($id,'title',$render); - return $title_cache[$id]; + return p_get_metadata(cleanID($id),'title',$render); } /** diff --git a/inc/template.php b/inc/template.php index e028fe39c..99ab8f35f 100644 --- a/inc/template.php +++ b/inc/template.php @@ -155,7 +155,7 @@ function tpl_toc($return=false){ $toc = $TOC; }elseif(($ACT == 'show' || substr($ACT,0,6) == 'export') && !$REV && $INFO['exists']){ // get TOC from metadata, render if neccessary - $meta = p_get_metadata($ID, false, true); + $meta = p_get_metadata($ID, false, METADATA_RENDER_USING_CACHE); if(isset($meta['internal']['toc'])){ $tocok = $meta['internal']['toc']; }else{ -- GitLab