Skip to content
Snippets Groups Projects
Commit a05e297a authored by Andreas Gohr's avatar Andreas Gohr
Browse files

use fulltext index to search for used media files FS#1336 FS#1275

This changes how DokuWiki looks for reference toa media file which is
about to deleted. Instead of doing a full grep through all pages it now
uses the fulltext index first, then does an exact match on the found
pages.

This speeds up the search significantly on larger wikis. However the
fulltext search limits now apply: images with names shorter than 3 charcters
may not be found.

This needs extensive testing!

darcs-hash:20080223205254-7ad00-486de0a4125d51b4e7999827f710d1d9de8bc60d.gz
parent b5742ced
No related branches found
No related tags found
No related merge requests found
......@@ -152,6 +152,56 @@ function ft_backlinks($id){
return $result;
}
/**
* Returns the pages that use a given media file
*
* Does a quick lookup with the fulltext index, then
* evaluates the instructions of the found pages
*
* Aborts after $max found results
*/
function ft_mediause($id,$max){
global $conf;
$swfile = DOKU_INC.'inc/lang/'.$conf['lang'].'/stopwords.txt';
$stopwords = @file_exists($swfile) ? file($swfile) : array();
if(!$max) $max = 1; // need to find at least one
$result = array();
// quick lookup of the mediafile
$media = noNS($id);
$matches = idx_lookup(idx_tokenizer($media,$stopwords));
$docs = array_keys(ft_resultCombine(array_values($matches)));
if(!count($docs)) return $result;
// go through all found pages
$found = 0;
$pcre = preg_quote($media,'/');
foreach($docs as $doc){
$ns = getNS($doc);
preg_match_all('/\{\{([^|}]*'.$pcre.'[^|}]*)(|[^}]+)?\}\}/i',rawWiki($doc),$matches);
foreach($matches[1] as $img){
$img = trim($img);
if(preg_match('/^https?:\/\//i',$img)) continue; // skip external images
list($img) = explode('?',$img); // remove any parameters
resolve_mediaid($ns,$img,$exists); // resolve the possibly relative img
if($img == $id){ // we have a match
$result[] = $doc;
$found++;
break;
}
}
if($found >= $max) break;
}
sort($result);
return $result;
}
/**
* Quicksearch for pagenames
*
......
......@@ -27,13 +27,10 @@ function media_filesinuse($data,$id){
echo '<p>'.hsc($lang['ref_inuse']).'</p>';
$hidden=0; //count of hits without read permission
usort($data,'sort_search_fulltext');
foreach($data as $row){
if(auth_quickaclcheck($row['id']) >= AUTH_READ){
if(auth_quickaclcheck($row) >= AUTH_READ && isVisiblePage($row)){
echo '<div class="search_result">';
echo '<span class="mediaref_ref">'.$row['id'].'</span>';
echo ': <span class="search_cnt">'.$row['count'].' '.$lang['hits'].'</span><br />';
echo '<div class="search_snippet">'.$row['snippet'].'</div>';
echo '<span class="mediaref_ref">'.hsc($row).'</span>';
echo '</div>';
}else
$hidden++;
......@@ -142,6 +139,7 @@ function media_metaform($id,$auth){
echo '</form>'.NL;
}
/**
* Handles media file deletions
*
......@@ -156,9 +154,11 @@ function media_delete($id,$auth){
global $conf;
global $lang;
// check for references if needed
$mediareferences = array();
if($conf['refcheck']){
search($mediareferences,$conf['datadir'],'search_reference',array('query' => $id));
require_once(DOKU_INC.'inc/fulltext.php');
$mediareferences = ft_mediause($id,$conf['refshow']);
}
if(!count($mediareferences)){
......
......@@ -18,6 +18,7 @@
// get namespace to display (either direct or from deletion order)
if($_REQUEST['delete']){
$DEL = cleanID($_REQUEST['delete']);
$IMG = $DEL;
$NS = getNS($DEL);
}elseif($_REQUEST['edit']){
$IMG = cleanID($_REQUEST['edit']);
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment