diff --git a/feed.php b/feed.php index 9454a9b955dcd482534389a968422052c0f7ba04..c4c386261ed0dd22163881653026bc378646e9c7 100644 --- a/feed.php +++ b/feed.php @@ -98,19 +98,19 @@ function rssRecentChanges(&$rss,$num,$ltype,$ns){ //this can take some time if a lot of recaching has to be done @set_time_limit(90); // set max execution time - foreach(array_keys($recents) as $id){ + foreach($recents as $recent){ $item = new FeedItem(); - $item->title = $id; - $xhtml = p_wiki_xhtml($id,'',false); + $item->title = $recent['id']; + $xhtml = p_wiki_xhtml($recent['id'],'',false); if($conf['useheading']) { $matches = array(); if(preg_match('|<h([1-9])>(.*?)</h\1>|', $xhtml, $matches)) $item->title = trim($matches[2]); } - if(!empty($recents[$id]['sum'])){ - $item->title .= ' - '.strip_tags($recents[$id]['sum']); + if(!empty($recent['sum'])){ + $item->title .= ' - '.strip_tags($recent['sum']); } $desc = cleanDesc($xhtml); @@ -120,27 +120,26 @@ function rssRecentChanges(&$rss,$num,$ltype,$ns){ switch ($ltype){ case 'page': - $item->link = wl($id,'rev='.$recents[$id]['date'],true); + $item->link = wl($recent['id'],'rev='.$recent['date'],true); break; case 'rev': - $item->link = wl($id,'do=revisions&rev='.$recents[$id]['date'],true); + $item->link = wl($recent['id'],'do=revisions&rev='.$recent['date'],true); break; case 'current': - $item->link = wl($id, '', true); + $item->link = wl($recent['id'], '', true); break; case 'diff': default: - $item->link = wl($id,'do=diff&'.$recents[$id]['date'],true); + $item->link = wl($recent['id'],'do=diff&'.$recent['date'],true); } $item->description = $desc; - $item->date = date('r',$recents[$id]['date']); - if(strpos($id,':')!==false){ - $item->category = substr($id,0,strrpos($id,':')); - } + $item->date = date('r',$recent['date']); + $cat = getNS($recent['id']); + if($cat) $item->category = $cat; $user = null; - $user = @$recents[$id]['user']; // the @ spares time repeating lookup + $user = @$recent['user']; // the @ spares time repeating lookup $item->author = ''; if($user){ @@ -148,12 +147,12 @@ function rssRecentChanges(&$rss,$num,$ltype,$ns){ $item->author = $userInfo['name']; if($guardmail) { //cannot obfuscate because some RSS readers may check validity - $item->authorEmail = $user.'@'.$recents[$id]['ip']; + $item->authorEmail = $user.'@'.$recent['ip']; }else{ $item->authorEmail = $userInfo['mail']; } }else{ - $item->authorEmail = 'anonymous@'.$recents[$id]['ip']; + $item->authorEmail = 'anonymous@'.$recent['ip']; } $rss->addItem($item); } diff --git a/inc/common.php b/inc/common.php index eb4b560a63a8d256852f2f07bb8e614f9a2ebc87..4585634fcff428dc5833fddd71330214737a54b4 100644 --- a/inc/common.php +++ b/inc/common.php @@ -594,18 +594,71 @@ function addLogEntry($date,$id,$summary=""){ io_saveFile($conf['changelog'],$logline,true); } +/** + * Internal function used by getRecents + * + * don't call directly + * + * @see getRecents() + * @author Andreas Gohr <andi@splitbrain.org> + */ +function _handleRecent($line,$incdel,$ns,$subNS){ + static $seen = array(); //caches seen pages and skip them + if(empty($line)) return false; //skip empty lines + + // split the line into parts + list($dt,$ip,$id,$usr,$sum) = explode("\t",$line); + + // skip seen ones + if($seen[$id]) return false; + + // remember in seen to skip additional sights + $seen[$id] = 1; + + // filter namespace + if (($ns) && (strpos($id,$ns.':') !== 0)) return false; + + // exclude subnamespaces + if ((!$subNS) && (getNS($id) != $ns)) return false; + + // check existance + if(!@file_exists(wikiFN($id))){ + if(!$incdel){ + return false; + }else{ + $recent = array(); + $recent['del'] = true; + } + }else{ + $recent = array(); + $recent['del'] = false; + } + + $recent['id'] = $id; + $recent['date'] = $dt; + $recent['ip'] = $ip; + $recent['user'] = $usr; + $recent['sum'] = $sum; + + return $recent; +} + /** * returns an array of recently changed files using the * changelog - * first : first entry in array returned - * num : return 'num' entries + * + * @param int $first number of first entry returned (for paginating + * @param int $num return $num entries + * @param bool $incdel include deleted pages? + * @param string $ns restrict to given namespace + * @param bool $subNS include subnamespaces * * @author Andreas Gohr <andi@splitbrain.org> */ function getRecents($first,$num,$incdel=false,$ns='',$subNS=true){ global $conf; $recent = array(); - $names = array(); + $count = 0; if(!$num) return $recent; @@ -615,37 +668,47 @@ function getRecents($first,$num,$incdel=false,$ns='',$subNS=true){ return $recent; } - $loglines = file($conf['changelog']); - rsort($loglines); //reverse sort on timestamp - - foreach ($loglines as $line){ - $line = rtrim($line); //remove newline - if(empty($line)) continue; //skip empty lines - $info = split("\t",$line); //split into parts - //add id if not in yet and file still exists and is allowed to read - if(!$names[$info[2]] && - (@file_exists(wikiFN($info[2])) || $incdel) && - (auth_quickaclcheck($info[2]) >= AUTH_READ) - ){ - // filter namespace - if (($ns) && (strpos($info[2],$ns.':') !== 0)) continue; - - // exclude subnamespaces - if ((!$subNS) && (getNS($info[2]) != $ns)) continue; + $fh = fopen($conf['changelog'],'r'); + $buf = ''; + $csz = 4096; //chunksize + fseek($fh,0,SEEK_END); // jump to the end + $pos = ftell($fh); // position pointer - $names[$info[2]] = 1; - if(--$first >= 0) continue; /* skip "first" entries */ - - $recent[$info[2]]['date'] = $info[0]; - $recent[$info[2]]['ip'] = $info[1]; - $recent[$info[2]]['user'] = $info[3]; - $recent[$info[2]]['sum'] = $info[4]; - $recent[$info[2]]['del'] = !@file_exists(wikiFN($info[2])); + // now read backwards into buffer + while($pos > 0){ + $pos -= $csz; // seek to previous chunk... + if($pos < 0) $pos = 0; // ...or rest of file + fseek($fh,$pos); + + $buf = fread($fh,$csz).$buf; // prepend to buffer + + $lines = explode("\n",$buf); // split buffer into lines + + if($pos > 0){ + $buf = array_shift($lines); // first one may be still incomplete } - if(count($recent) >= $num){ - break; //finish if enough items found + + $cnt = count($lines); + if(!$cnt) continue; // no lines yet + + // handle lines + for($i = $cnt-1; $i >= 0; $i--){ + $rec = _handleRecent($lines[$i],$incdel,$ns,$subNS); + if($rec !== false){ + if(--$first >= 0) continue; // skip first entries + $recent[] = $rec; + $count++; + + // break while when we have enough entries + if($count >= $num){ + $pos = 0; // will break the while loop + break; // will break the for loop + } + } } - } + }// end of while + + fclose($fh); return $recent; } diff --git a/inc/html.php b/inc/html.php index 7c4e62cda75b92dbddc434ab244f804b5320ee59..1e48b548c3ad7558aad09385515168c187a74a4e 100644 --- a/inc/html.php +++ b/inc/html.php @@ -469,15 +469,13 @@ function html_recent($first=0){ print p_locale_xhtml('recent'); print '<ul>'; - $keys = array_keys($recents); - for ($n=0; $n < $cnt; $n++){ - $id = $keys[$n]; - $date = date($conf['dformat'],$recents[$id]['date']); + foreach($recents as $recent){ + $date = date($conf['dformat'],$recent['date']); print '<li>'; print $date.' '; - print '<a href="'.wl($id,"do=diff").'">'; + print '<a href="'.wl($recent['id'],"do=diff").'">'; $p = array(); $p['src'] = DOKU_BASE.'lib/images/diff.png'; $p['border'] = 0; @@ -489,7 +487,7 @@ function html_recent($first=0){ print "<img $att />"; print '</a> '; - print '<a href="'.wl($id,"do=revisions").'">'; + print '<a href="'.wl($recent['id'],"do=revisions").'">'; $p = array(); $p['src'] = DOKU_BASE.'lib/images/history.png'; $p['border'] = 0; @@ -501,14 +499,14 @@ function html_recent($first=0){ print "<img $att />"; print '</a> '; - print html_wikilink(":$id",$conf['useheading']?NULL:$id); + print html_wikilink(':'.$recent['id'],$conf['useheading']?NULL:$recent['id']); - print ' '.htmlspecialchars($recents[$id]['sum']); + print ' '.htmlspecialchars($recent['sum']); print ' <span class="user">'; - if($recents[$id]['user']){ - print $recents[$id]['user']; + if($recent['user']){ + print $recent['user']; }else{ - print $recents[$id]['ip']; + print $recent['ip']; } print '</span>'; diff --git a/inc/indexer.php b/inc/indexer.php index 2bc707269be55ee4725c83c9dbf8c955823d03b8..fe8e74bd9d1fac8a2042753dafe80aadff781bc2 100644 --- a/inc/indexer.php +++ b/inc/indexer.php @@ -34,7 +34,7 @@ function idx_getPageWords($page){ $body = strtr($body, "\r\n\t", ' '); $tokens = explode(' ', $body); $tokens = array_count_values($tokens); // count the frequency of each token - + $words = array(); foreach ($tokens as $word => $count) { @@ -54,7 +54,7 @@ function idx_getPageWords($page){ $words[$word] = $count + (isset($words[$word]) ? $words[$word] : 0); } } - + // arrive here with $words = array(word => frequency) $index = array(); //resulting index