Skip to content
Snippets Groups Projects
Commit 6c062f5e authored by Michael Hamann's avatar Michael Hamann
Browse files

Sitemapper code improved and documentation added

Removed some calls to dbglog, improved the code performance and added
documentation for all functions and classes of the Sitemapper.
parent 2b54e1e1
No related branches found
No related tags found
No related merge requests found
......@@ -8,6 +8,11 @@
if(!defined('DOKU_INC')) die('meh.');
/**
* A class for building sitemaps and pinging search engines with the sitemap URL.
*
* @author Michael Hamann
*/
class Sitemapper {
/**
* Builds a Google Sitemap of all public pages known to the indexer
......@@ -15,16 +20,16 @@ class Sitemapper {
* The map is placed in the cache directory named sitemap.xml.gz - This
* file needs to be writable!
*
* @author Michael Hamann
* @author Andreas Gohr
* @link https://www.google.com/webmasters/sitemaps/docs/en/about.html
* @link http://www.sitemaps.org/
*/
public function generate(){
global $conf;
dbglog('sitemapGenerate(): started');
if(!$conf['sitemap']) return false;
if($conf['sitemap'] < 1 || !is_numeric($conf['sitemap'])) return false;
$sitemap = Sitemapper::getFilePath();
dbglog("runSitemapper(): using $sitemap");
if(@file_exists($sitemap)){
if(!is_writable($sitemap)) return false;
......@@ -33,13 +38,15 @@ class Sitemapper {
}
if(@filesize($sitemap) &&
@filemtime($sitemap) > (time()-($conf['sitemap']*60*60*24))){
dbglog('runSitemapper(): Sitemap up to date');
@filemtime($sitemap) > (time()-($conf['sitemap']*86400))){ // 60*60*24=86400
dbglog('Sitemapper::generate(): Sitemap up to date'); // FIXME: only in debug mode
return false;
}
dbglog("Sitemapper::generate(): using $sitemap"); // FIXME: Only in debug mode
$pages = idx_getIndex('page', '');
dbglog('runSitemapper(): creating sitemap using '.count($pages).' pages');
dbglog('Sitemapper::generate(): creating sitemap using '.count($pages).' pages');
$items = array();
// build the sitemap items
......@@ -61,31 +68,49 @@ class Sitemapper {
return $result;
}
/**
* Builds the sitemap XML string from the given array auf SitemapItems.
*
* @param $items array The SitemapItems that shall be included in the sitemap.
* @return string The sitemap XML.
* @author Michael Hamann
*/
private function getXML($items) {
ob_start();
print '<?xml version="1.0" encoding="UTF-8"?>'.NL;
print '<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">'.NL;
echo '<?xml version="1.0" encoding="UTF-8"?>'.NL;
echo '<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">'.NL;
foreach ($items as $item) {
print $item->toXML();
echo $item->toXML();
}
print '</urlset>'.NL;
echo '</urlset>'.NL;
$result = ob_get_contents();
ob_end_clean();
return $result;
}
/**
* Helper function for getting the path to the sitemap file.
*
* @return The path to the sitemap file.
* @author Michael Hamann
*/
public function getFilePath() {
global $conf;
$sitemap = $conf['cachedir'].'/sitemap.xml';
if($conf['compression'] == 'bz2' || $conf['compression'] == 'gz'){
if($conf['compression'] === 'bz2' || $conf['compression'] === 'gz'){
$sitemap .= '.gz';
}
return $sitemap;
}
/**
* Pings search engines with the sitemap url. Plugins can add or remove
* urls to ping using the SITEMAP_PING event.
*
* @author Michael Hamann
*/
public function pingSearchEngines() {
//ping search engines...
$http = new DokuHTTPClient();
......@@ -98,13 +123,16 @@ class Sitemapper {
'microsoft' => 'http://www.bing.com/webmaster/ping.aspx?siteMap='.$encoded_sitemap_url,
);
$event = new Doku_Event('SITEMAP_PING', $ping_urls);
$data = array('ping_urls' => $ping_urls,
'encoded_sitemap_url' => $encoded_sitemap_url
);
$event = new Doku_Event('SITEMAP_PING', $data);
if ($event->advise_before(true)) {
foreach ($ping_urls as $name => $url) {
dbglog("sitemapPingSearchEngines(): pinging $name");
foreach ($data['ping_urls'] as $name => $url) {
dbglog("Sitemapper::PingSearchEngines(): pinging $name");
$resp = $http->get($url);
if($http->error) dbglog("runSitemapper(): $http->error");
dbglog('runSitemapper(): '.preg_replace('/[\n\r]/',' ',strip_tags($resp)));
if($http->error) dbglog("Sitemapper:pingSearchengines(): $http->error");
dbglog('Sitemapper:pingSearchengines(): '.preg_replace('/[\n\r]/',' ',strip_tags($resp)));
}
}
$event->advise_after();
......@@ -113,12 +141,25 @@ class Sitemapper {
}
}
/**
* An item of a sitemap.
*
* @author Michael Hamann
*/
class SitemapItem {
public $url;
public $lastmod;
public $changefreq;
public $priority;
/**
* Create a new item.
*
* @param $url string The url of the item
* @param $lastmod int Timestamp of the last modification
* @param $changefreq string How frequently the item is likely to change. Valid values: always, hourly, daily, weekly, monthly, yearly, never.
* @param $priority float|string The priority of the item relative to other URLs on your site. Valid values range from 0.0 to 1.0.
*/
public function __construct($url, $lastmod, $changefreq = null, $priority = null) {
$this->url = $url;
$this->lastmod = $lastmod;
......@@ -126,6 +167,14 @@ class SitemapItem {
$this->priority = $priority;
}
/**
* Helper function for creating an item for a wikipage id.
*
* @param $id string A wikipage id.
* @param $changefreq string How frequently the item is likely to change. Valid values: always, hourly, daily, weekly, monthly, yearly, never.
* @param $priority float|string The priority of the item relative to other URLs on your site. Valid values range from 0.0 to 1.0.
* @return The sitemap item.
*/
public static function createFromID($id, $changefreq = null, $priority = null) {
$id = trim($id);
$date = @filemtime(wikiFN($id));
......@@ -133,10 +182,15 @@ class SitemapItem {
return new SitemapItem(wl($id, '', true), $date, $changefreq, $priority);
}
/**
* Get the XML representation of the sitemap item.
*
* @return The XML representation.
*/
public function toXML() {
$result = ' <url>'.NL;
$result .= ' <loc>'.hsc($this->url).'</loc>'.NL;
$result .= ' <lastmod>'.date_iso8601($this->lastmod).'</lastmod>'.NL;
$result = ' <url>'.NL
.' <loc>'.hsc($this->url).'</loc>'.NL
.' <lastmod>'.date_iso8601($this->lastmod).'</lastmod>'.NL;
if ($this->changefreq !== NULL)
$result .= ' <changefreq>'.hsc($this->changefreq).'</changefreq>'.NL;
if ($this->priority !== NULL)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment