diff --git a/inc/actions.php b/inc/actions.php index 3e0cb1207f6afdb3b7513d2c42c76168c651c4bd..2d70ac8ed6a6755c789063065145ec2dd1fab4ac 100644 --- a/inc/actions.php +++ b/inc/actions.php @@ -56,6 +56,10 @@ function act_dispatch(){ //check permissions $ACT = act_permcheck($ACT); + //sitemap + if ($ACT == 'sitemap') + $ACT = act_sitemap($ACT); + //register $nil = array(); if($ACT == 'register' && $_POST['save'] && register()){ @@ -205,7 +209,7 @@ function act_clean($act){ 'preview','search','show','check','index','revisions', 'diff','recent','backlink','admin','subscribe','revert', 'unsubscribe','profile','resendpwd','recover', - 'draftdel','subscribens','unsubscribens',)) && substr($act,0,7) != 'export_' ) { + 'draftdel','subscribens','unsubscribens','sitemap')) && substr($act,0,7) != 'export_' ) { msg('Command unknown: '.htmlspecialchars($act),-1); return 'show'; } @@ -233,7 +237,8 @@ function act_permcheck($act){ }else{ $permneed = AUTH_CREATE; } - }elseif(in_array($act,array('login','search','recent','profile','index'))){ + }elseif(in_array($act,array('login','search','recent','profile','index', 'sitemap'))){ + }elseif(in_array($act,array('login','search','recent','profile','sitemap'))){ $permneed = AUTH_NONE; }elseif($act == 'revert'){ $permneed = AUTH_ADMIN; @@ -586,6 +591,54 @@ function act_export($act){ return 'show'; } +/** + * Handle sitemap delivery + * + * @author Michael Hamann <michael@content-space.de> + */ +function act_sitemap($act) { + global $conf; + + if (!$conf['sitemap']) { + header("HTTP/1.0 404 Not Found"); + print "Sitemap generation is disabled."; + exit; + } + + $sitemap = $conf['cachedir'].'/sitemap.xml'; + if($conf['compression'] == 'bz2' || $conf['compression'] == 'gz'){ + $mime = 'application/x-gzip'; + $sitemap .= '.gz'; + } else { + $mime = 'application/xml; charset=utf-8'; + } + + // Check if sitemap file exists, otherwise create it + if (!is_readable($sitemap)) { + require_once DOKU_INC.'inc/sitemap.php'; + sitemapGenerate(); + } + + if (is_readable($sitemap)) { + // Send headers + header('Content-Type: '.$mime); + + // Send file + //use x-sendfile header to pass the delivery to compatible webservers + if (http_sendfile($sitemap)) exit; + + $fp = @fopen($sitemap,"rb"); + if($fp){ + http_rangeRequest($fp,filesize($sitemap),$mime); + exit; + } + } + + header("HTTP/1.0 500 Internal Server Error"); + print "Could not read $sitemap - bad permissions?"; + exit; +} + /** * Handle page 'subscribe' * diff --git a/inc/common.php b/inc/common.php index bf5987c2843276ebca044146313acae9156cf5be..0816d9fbbcfc5defbceff7b30cfd04c7da849d74 100644 --- a/inc/common.php +++ b/inc/common.php @@ -1266,6 +1266,21 @@ function dformat($dt=null,$format=''){ return strftime($format,$dt); } +/** + * Formats a timestamp as ISO 8601 date + * + * @author <ungu at terong dot com> + * @link http://www.php.net/manual/en/function.date.php#54072 + */ +function date_iso8601($int_date) { + //$int_date: current date in UNIX timestamp + $date_mod = date('Y-m-d\TH:i:s', $int_date); + $pre_timezone = date('O', $int_date); + $time_zone = substr($pre_timezone, 0, 3).":".substr($pre_timezone, 3, 2); + $date_mod .= $time_zone; + return $date_mod; +} + /** * return an obfuscated email address in line with $conf['mailguard'] setting * diff --git a/inc/sitemap.php b/inc/sitemap.php new file mode 100644 index 0000000000000000000000000000000000000000..bbed7d269501e2672c2cdb74a990ee38ae40edc2 --- /dev/null +++ b/inc/sitemap.php @@ -0,0 +1,101 @@ +<?php +/** + * Sitemap handling functions + * + * @license GPL 2 (http://www.gnu.org/licenses/gpl.html) + * @author Michael Hamann <michael@content-space.de> + */ + +if(!defined('DOKU_INC')) die('meh.'); + +/** + * Builds a Google Sitemap of all public pages known to the indexer + * + * The map is placed in the cache directory named sitemap.xml.gz - This + * file needs to be writable! + * + * @author Andreas Gohr + * @link https://www.google.com/webmasters/sitemaps/docs/en/about.html + */ +function sitemapGenerate(){ + global $conf; + dbglog('sitemapGenerate(): started'); + if(!$conf['sitemap']) return false; + + $sitemap = sitemapGetFilePath(); + dbglog("runSitemapper(): using $sitemap"); + + if(@file_exists($sitemap)){ + if(!is_writable($sitemap)) return false; + }else{ + if(!is_writable(dirname($sitemap))) return false; + } + + if(@filesize($sitemap) && + @filemtime($sitemap) > (time()-($conf['sitemap']*60*60*24))){ + dbglog('runSitemapper(): Sitemap up to date'); + return false; + } + + $pages = idx_getIndex('page', ''); + dbglog('runSitemapper(): creating sitemap using '.count($pages).' pages'); + + // build the sitemap + ob_start(); + print '<?xml version="1.0" encoding="UTF-8"?>'.NL; + print '<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">'.NL; + foreach($pages as $id){ + $id = trim($id); + $file = wikiFN($id); + + //skip hidden, non existing and restricted files + if(isHiddenPage($id)) continue; + $date = @filemtime($file); + if(!$date) continue; + if(auth_aclcheck($id,'','') < AUTH_READ) continue; + + print ' <url>'.NL; + print ' <loc>'.wl($id,'',true).'</loc>'.NL; + print ' <lastmod>'.date_iso8601($date).'</lastmod>'.NL; + print ' </url>'.NL; + } + print '</urlset>'.NL; + $data = ob_get_contents(); + ob_end_clean(); + + //save the new sitemap + return io_saveFile($sitemap,$data); +} + +function sitemapGetFilePath() { + global $conf; + + $sitemap = $conf['cachedir'].'/sitemap.xml'; + if($conf['compression'] == 'bz2' || $conf['compression'] == 'gz'){ + $sitemap .= '.gz'; + } + + return $sitemap; +} + +function sitemapPingSearchEngines() { + //ping search engines... + $http = new DokuHTTPClient(); + $http->timeout = 8; + + $encoded_sitemap_url = urlencode(wl('', array('do' => 'sitemap'), true, '&')); + $ping_urls = array( + 'google' => 'http://www.google.com/webmasters/sitemaps/ping?sitemap='.$encoded_sitemap_url, + 'yahoo' => 'http://search.yahooapis.com/SiteExplorerService/V1/updateNotification?appid=dokuwiki&url='.$encoded_sitemap_url, + 'microsoft' => 'http://www.bing.com/webmaster/ping.aspx?siteMap='.$encoded_sitemap_url, + ); + + foreach ($ping_urls as $name => $url) { + dbglog("sitemapPingSearchEngines(): pinging $name"); + $resp = $http->get($url); + if($http->error) dbglog("runSitemapper(): $http->error"); + dbglog('runSitemapper(): '.preg_replace('/[\n\r]/',' ',strip_tags($resp))); + } + + return true; +} diff --git a/lib/exe/indexer.php b/lib/exe/indexer.php index f8e2f7981cba4c684de6099158b93cbb83054a5f..63ad5931fdf394f78f2ef44d494f0ba159a55f37 100644 --- a/lib/exe/indexer.php +++ b/lib/exe/indexer.php @@ -232,88 +232,11 @@ function metaUpdate(){ * @link https://www.google.com/webmasters/sitemaps/docs/en/about.html */ function runSitemapper(){ - global $conf; print "runSitemapper(): started".NL; - if(!$conf['sitemap']) return false; - - if($conf['compression'] == 'bz2' || $conf['compression'] == 'gz'){ - $sitemap = 'sitemap.xml.gz'; - }else{ - $sitemap = 'sitemap.xml'; - } - print "runSitemapper(): using $sitemap".NL; - - if(@file_exists(DOKU_INC.$sitemap)){ - if(!is_writable(DOKU_INC.$sitemap)) return false; - }else{ - if(!is_writable(DOKU_INC)) return false; - } - - if(@filesize(DOKU_INC.$sitemap) && - @filemtime(DOKU_INC.$sitemap) > (time()-($conf['sitemap']*60*60*24))){ - print 'runSitemapper(): Sitemap up to date'.NL; - return false; - } - - $pages = idx_getIndex('page', ''); - print 'runSitemapper(): creating sitemap using '.count($pages).' pages'.NL; - - // build the sitemap - ob_start(); - print '<?xml version="1.0" encoding="UTF-8"?>'.NL; - print '<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">'.NL; - foreach($pages as $id){ - $id = trim($id); - $file = wikiFN($id); - - //skip hidden, non existing and restricted files - if(isHiddenPage($id)) continue; - $date = @filemtime($file); - if(!$date) continue; - if(auth_aclcheck($id,'','') < AUTH_READ) continue; - - print ' <url>'.NL; - print ' <loc>'.wl($id,'',true).'</loc>'.NL; - print ' <lastmod>'.date_iso8601($date).'</lastmod>'.NL; - print ' </url>'.NL; - } - print '</urlset>'.NL; - $data = ob_get_contents(); - ob_end_clean(); - - //save the new sitemap - io_saveFile(DOKU_INC.$sitemap,$data); - - //ping search engines... - $http = new DokuHTTPClient(); - $http->timeout = 8; - - //ping google - print 'runSitemapper(): pinging google'.NL; - $url = 'http://www.google.com/webmasters/sitemaps/ping?sitemap='; - $url .= urlencode(DOKU_URL.$sitemap); - $resp = $http->get($url); - if($http->error) print 'runSitemapper(): '.$http->error.NL; - print 'runSitemapper(): '.preg_replace('/[\n\r]/',' ',strip_tags($resp)).NL; - - //ping yahoo - print 'runSitemapper(): pinging yahoo'.NL; - $url = 'http://search.yahooapis.com/SiteExplorerService/V1/updateNotification?appid=dokuwiki&url='; - $url .= urlencode(DOKU_URL.$sitemap); - $resp = $http->get($url); - if($http->error) print 'runSitemapper(): '.$http->error.NL; - print 'runSitemapper(): '.preg_replace('/[\n\r]/',' ',strip_tags($resp)).NL; - - //ping microsoft - print 'runSitemapper(): pinging microsoft'.NL; - $url = 'http://www.bing.com/webmaster/ping.aspx?siteMap='; - $url .= urlencode(DOKU_URL.$sitemap); - $resp = $http->get($url); - if($http->error) print 'runSitemapper(): '.$http->error.NL; - print 'runSitemapper(): '.preg_replace('/[\n\r]/',' ',strip_tags($resp)).NL; - + require_once DOKU_INC.'inc/sitemap.php'; + $result = sitemapGenerate() && sitemapPingSearchEngines(); print 'runSitemapper(): finished'.NL; - return true; + return $result; } /** @@ -408,21 +331,6 @@ function sendDigest() { $_SERVER['REMOTE_USER'] = $olduser; } -/** - * Formats a timestamp as ISO 8601 date - * - * @author <ungu at terong dot com> - * @link http://www.php.net/manual/en/function.date.php#54072 - */ -function date_iso8601($int_date) { - //$int_date: current date in UNIX timestamp - $date_mod = date('Y-m-d\TH:i:s', $int_date); - $pre_timezone = date('O', $int_date); - $time_zone = substr($pre_timezone, 0, 3).":".substr($pre_timezone, 3, 2); - $date_mod .= $time_zone; - return $date_mod; -} - /** * Just send a 1x1 pixel blank gif to the browser *