diff --git a/conf/dokuwiki.php b/conf/dokuwiki.php index 1b4a44a382485b7d05ae471a4f7bf3cc6be8baa4..1e4ec61d518c89ec229d11d14544bb9434601d69 100644 --- a/conf/dokuwiki.php +++ b/conf/dokuwiki.php @@ -81,6 +81,7 @@ $conf['pluginmanager'] = 0; //enable automated plugin management (r $conf['compress'] = 1; //Strip whitespaces and comments from Styles and JavaScript? 1|0 $conf['hidepages'] = ''; //Regexp for pages to be skipped from RSS, Search and Recent Changes $conf['send404'] = 0; //Send a HTTP 404 status for non existing pages? +$conf['sitemap'] = 0; //Create a google sitemap? How often? In days. $conf['rss_type'] = 'rss1'; //type of RSS feed to provide, by default: // 'rss' - RSS 0.91 // 'rss1' - RSS 1.0 diff --git a/lib/exe/indexer.php b/lib/exe/indexer.php index 80dbb77f543a6aaab857ca6f7e74d5eba41d214c..2eea32f020d78576f76826f612a78957f9788d60 100644 --- a/lib/exe/indexer.php +++ b/lib/exe/indexer.php @@ -5,77 +5,143 @@ * @license GPL 2 (http://www.gnu.org/licenses/gpl.html) * @author Andreas Gohr <andi@splitbrain.org> */ - -/** - * Just send a 1x1 pixel blank gif to the browser and exit - - */ -function sendGIF(){ - $img = base64_decode('R0lGODlhAQABAIAAAAAAAP///yH5BAEAAAEALAAAAAABAAEAAAIBTAA7'); - header('Content-Type: image/gif'); - header('Content-Length: '.strlen($img)); - header('Connection: Close'); - print $img; - // Browser should drop connection after this - // Thinks it's got the whole image -} - -// Make sure image is sent to the browser immediately -ob_implicit_flush(TRUE); +if(!defined('DOKU_INC')) define('DOKU_INC',realpath(dirname(__FILE__).'/../../').'/'); +require_once(DOKU_INC.'inc/init.php'); +require_once(DOKU_INC.'inc/auth.php'); +session_write_close(); //close session // keep running after browser closes connection @ignore_user_abort(true); +// send gif sendGIF(); -// Switch off implicit flush again - we don't want to send any more output -ob_implicit_flush(FALSE); - // Catch any possible output (e.g. errors) // - probably not needed but better safe... ob_start(); -// Called to exit - we don't want any output going anywhere -function indexer_stop() { - ob_end_clean(); - exit(); -} - // Now start work -if(!defined('DOKU_INC')) define('DOKU_INC',realpath(dirname(__FILE__).'/../../').'/'); -require_once(DOKU_INC.'inc/init.php'); -//close session -session_write_close(); +require_once(DOKU_INC.'inc/utf8.php'); +require_once(DOKU_INC.'inc/auth.php'); -require_once(DOKU_INC.'inc/indexer.php'); +// run one of the jobs +runIndexer() or runSitemapper(); -$ID = cleanID($_REQUEST['id']); -if(!$ID) indexer_stop(); +ob_end_clean(); +exit; -// check if indexing needed -$last = @filemtime(metaFN($ID,'.indexed')); -if($last > @filemtime(wikiFN($ID))) indexer_stop(); +// -------------------------------------------------------------------- -// try to aquire a lock -$lock = $conf['lockdir'].'/_indexer.lock'; -while(!@mkdir($lock,0777)){ - if(time()-@filemtime($lock) > 60*5){ - // looks like a stale lock - remove it - @rmdir($lock); - }else{ - indexer_stop(); +/** + * Runs the indexer for the current page + * + * @author Andreas Gohr <andi@splitbrain.org> + */ +function runIndexer(){ + global $conf; + + $ID = cleanID($_REQUEST['id']); + if(!$ID) return false; + + // check if indexing needed + $last = @filemtime(metaFN($ID,'.indexed')); + if($last > @filemtime(wikiFN($ID))) return false; + + // try to aquire a lock + $lock = $conf['lockdir'].'/_indexer.lock'; + while(!@mkdir($lock,0777)){ + if(time()-@filemtime($lock) > 60*5){ + // looks like a stale lock - remove it + @rmdir($lock); + }else{ + return false; + } } + + require_once(DOKU_INC.'inc/indexer.php'); + + // do the work + idx_addPage($ID); + + // we're finished - save and free lock + io_saveFile(metaFN($ID,'.indexed'),' '); + @rmdir($lock); + return true; } -// do the work -idx_addPage($ID); +/** + * Builds a Google Sitemap of all public pages known to the indexer + * + * The map is placed in the root directory named sitemap.xml.gz - This + * file needs to be writable! + * + * @author Andreas Gohr + * @link https://www.google.com/webmasters/sitemaps/docs/en/about.html + */ +function runSitemapper(){ + global $conf; + if(!$conf['sitemap']) return false; + if(!defined('NL')) define('NL',"\n"); -// we're finished -io_saveFile(metaFN($ID,'.indexed'),' '); -@rmdir($lock); -indexer_stop(); + if($conf['usegzip']){ + $sitemap = DOKU_INC.'sitemap.xml.gz'; + }else{ + $sitemap = DOKU_INC.'sitemap.xml'; + } -//Setup VIM: ex: et ts=4 enc=utf-8 : + if(!is_writable($sitemap)) return false; + if(@filesize($sitemap) && + @filemtime($sitemap) > (time()-($conf['sitemap']*60*60*24))){ + return false; + } + + ob_start(); + $pages = file($conf['cachedir'].'/page.idx'); + + print '<?xml version="1.0" encoding="UTF-8"?>'.NL; + print '<urlset xmlns="http://www.google.com/schemas/sitemap/0.84">'.NL; + foreach($pages as $id){ + $id = trim($id); + $file = wikiFN($id); + + //skip hidden, non existing and restricted files + if(isHiddenPage($id)) return false; + $date = @filemtime($file); + if(!$date) continue; + if(auth_aclcheck($id,'','') < AUTH_READ) continue; + + print ' <url>'.NL; + print ' <loc>'.wl($id,'',true).'</loc>'.NL; + print ' <lastmod>'.date('Y-m-d\TH:i:s',$date).'</lastmod>'.NL; + print ' </url>'.NL; + } + print '</urlset>'.NL; + + $data = ob_get_contents(); + ob_end_clean(); + + io_saveFile($sitemap,$data); + return true; +} + +/** + * Just send a 1x1 pixel blank gif to the browser + * + * @author Andreas Gohr <andi@splitbrain.org> + * @author Harry Fuecks <fuecks@gmail.com> + */ +function sendGIF(){ + $img = base64_decode('R0lGODlhAQABAIAAAAAAAP///yH5BAEAAAEALAAAAAABAAEAAAIBTAA7'); + header('Content-Type: image/gif'); + header('Content-Length: '.strlen($img)); + header('Connection: Close'); + print $img; + flush(); + // Browser should drop connection after this + // Thinks it's got the whole image +} + +//Setup VIM: ex: et ts=4 enc=utf-8 : // No trailing PHP closing tag - no output please! // See Note at http://www.php.net/manual/en/language.basic-syntax.instruction-separation.php