From a05e297acbd41dc059369b143e2cadf281a581a1 Mon Sep 17 00:00:00 2001 From: Andreas Gohr <andi@splitbrain.org> Date: Sat, 23 Feb 2008 21:52:54 +0100 Subject: [PATCH] use fulltext index to search for used media files FS#1336 FS#1275 This changes how DokuWiki looks for reference toa media file which is about to deleted. Instead of doing a full grep through all pages it now uses the fulltext index first, then does an exact match on the found pages. This speeds up the search significantly on larger wikis. However the fulltext search limits now apply: images with names shorter than 3 charcters may not be found. This needs extensive testing! darcs-hash:20080223205254-7ad00-486de0a4125d51b4e7999827f710d1d9de8bc60d.gz --- inc/fulltext.php | 50 ++++++++++++++++++++++++++++++++++++++++ inc/media.php | 12 +++++----- lib/exe/mediamanager.php | 1 + 3 files changed, 57 insertions(+), 6 deletions(-) diff --git a/inc/fulltext.php b/inc/fulltext.php index b10cbde8e..a0be280bf 100644 --- a/inc/fulltext.php +++ b/inc/fulltext.php @@ -152,6 +152,56 @@ function ft_backlinks($id){ return $result; } +/** + * Returns the pages that use a given media file + * + * Does a quick lookup with the fulltext index, then + * evaluates the instructions of the found pages + * + * Aborts after $max found results + */ +function ft_mediause($id,$max){ + global $conf; + $swfile = DOKU_INC.'inc/lang/'.$conf['lang'].'/stopwords.txt'; + $stopwords = @file_exists($swfile) ? file($swfile) : array(); + + if(!$max) $max = 1; // need to find at least one + + $result = array(); + + // quick lookup of the mediafile + $media = noNS($id); + $matches = idx_lookup(idx_tokenizer($media,$stopwords)); + $docs = array_keys(ft_resultCombine(array_values($matches))); + if(!count($docs)) return $result; + + // go through all found pages + $found = 0; + $pcre = preg_quote($media,'/'); + foreach($docs as $doc){ + $ns = getNS($doc); + preg_match_all('/\{\{([^|}]*'.$pcre.'[^|}]*)(|[^}]+)?\}\}/i',rawWiki($doc),$matches); + foreach($matches[1] as $img){ + $img = trim($img); + if(preg_match('/^https?:\/\//i',$img)) continue; // skip external images + list($img) = explode('?',$img); // remove any parameters + resolve_mediaid($ns,$img,$exists); // resolve the possibly relative img + + if($img == $id){ // we have a match + $result[] = $doc; + $found++; + break; + } + } + if($found >= $max) break; + } + + sort($result); + return $result; +} + + + /** * Quicksearch for pagenames * diff --git a/inc/media.php b/inc/media.php index efa200d4c..663a35051 100644 --- a/inc/media.php +++ b/inc/media.php @@ -27,13 +27,10 @@ function media_filesinuse($data,$id){ echo '<p>'.hsc($lang['ref_inuse']).'</p>'; $hidden=0; //count of hits without read permission - usort($data,'sort_search_fulltext'); foreach($data as $row){ - if(auth_quickaclcheck($row['id']) >= AUTH_READ){ + if(auth_quickaclcheck($row) >= AUTH_READ && isVisiblePage($row)){ echo '<div class="search_result">'; - echo '<span class="mediaref_ref">'.$row['id'].'</span>'; - echo ': <span class="search_cnt">'.$row['count'].' '.$lang['hits'].'</span><br />'; - echo '<div class="search_snippet">'.$row['snippet'].'</div>'; + echo '<span class="mediaref_ref">'.hsc($row).'</span>'; echo '</div>'; }else $hidden++; @@ -142,6 +139,7 @@ function media_metaform($id,$auth){ echo '</form>'.NL; } + /** * Handles media file deletions * @@ -156,9 +154,11 @@ function media_delete($id,$auth){ global $conf; global $lang; + // check for references if needed $mediareferences = array(); if($conf['refcheck']){ - search($mediareferences,$conf['datadir'],'search_reference',array('query' => $id)); + require_once(DOKU_INC.'inc/fulltext.php'); + $mediareferences = ft_mediause($id,$conf['refshow']); } if(!count($mediareferences)){ diff --git a/lib/exe/mediamanager.php b/lib/exe/mediamanager.php index 7e71f800e..32849be62 100644 --- a/lib/exe/mediamanager.php +++ b/lib/exe/mediamanager.php @@ -18,6 +18,7 @@ // get namespace to display (either direct or from deletion order) if($_REQUEST['delete']){ $DEL = cleanID($_REQUEST['delete']); + $IMG = $DEL; $NS = getNS($DEL); }elseif($_REQUEST['edit']){ $IMG = cleanID($_REQUEST['edit']); -- GitLab