From e5d09fddcd17a2fe896650b64b81313a7d000975 Mon Sep 17 00:00:00 2001
From: Michael Hamann <michael@content-space.de>
Date: Wed, 31 Jul 2013 17:30:08 +0200
Subject: [PATCH] Index media file usage in the metadata index and use it in
 ft_mediause()

---
 _test/tests/inc/fulltext_mediause.test.php | 77 ++++++++++++++++++++++
 inc/fulltext.php                           | 48 +++++---------
 inc/indexer.php                            |  8 ++-
 inc/media.php                              |  2 +-
 inc/parser/metadata.php                    | 30 +++++++--
 5 files changed, 127 insertions(+), 38 deletions(-)
 create mode 100644 _test/tests/inc/fulltext_mediause.test.php

diff --git a/_test/tests/inc/fulltext_mediause.test.php b/_test/tests/inc/fulltext_mediause.test.php
new file mode 100644
index 000000000..9d5b2dc84
--- /dev/null
+++ b/_test/tests/inc/fulltext_mediause.test.php
@@ -0,0 +1,77 @@
+<?php
+
+// must be run within Dokuwiki
+if (!defined('DOKU_INC')) die();
+
+/**
+ * Test cases for the media usage index
+ *
+ * @author Michael Hamann <michael@content-space.de>
+ */
+class fultext_mediause_test extends DokuWikiTest {
+
+    public function test_internalmedia() {
+        saveWikiText('test:internalmedia_usage', '{{internalmedia.png}} {{..:internal media.png}}', 'Test initialization');
+        idx_addPage('test:internalmedia_usage');
+
+        $this->assertEquals(array('test:internalmedia_usage'), ft_mediause('internal_media.png'));
+        $this->assertEquals(array('test:internalmedia_usage'), ft_mediause('test:internalmedia.png'));
+    }
+
+    public function test_media_in_links() {
+        saveWikiText('test:medialinks', '[[doku>wiki:dokuwiki|{{wiki:logo.png}}]] [[http://www.example.com|{{example.png?200x800}}]]', 'Test init');
+        idx_addPage('test:medialinks');
+
+        $this->assertEquals(array('test:medialinks'), ft_mediause('wiki:logo.png'));
+        $this->assertEquals(array('test:medialinks'), ft_mediause('test:example.png'));
+    }
+
+    public function test_media_in_footnotes() {
+        saveWikiText('test:media_footnotes', '(({{footnote.png?20x50}} [[foonote|{{:footlink.png}}]]))', 'Test initialization');
+        idx_addPage('test:media_footnotes');
+
+        $this->assertEquals(array('test:media_footnotes'), ft_mediause('test:footnote.png'));
+        $this->assertEquals(array('test:media_footnotes'), ft_mediause('footlink.png'));
+    }
+
+    public function test_media_in_hidden_pages() {
+        global $conf;
+        $conf['hidepages'] = 'hidden:.*';
+        saveWikiText('hidden:medias', '[[doku>wiki:dokuwiki|{{wiki:hiddenlogo.png}}]]', 'Test initialization');
+        idx_addPage('hidden:medias');
+
+        $this->assertEquals(array(), ft_mediause('wiki:hiddenlogo.png'));
+        $this->assertEquals(array(), ft_mediause('wiki:hiddenlogo.png', false));
+        $this->assertEquals(array('hidden:medias'), ft_mediause('wiki:hiddenlogo.png', true));
+    }
+
+    public function test_media_in_protected_pages() {
+        global $conf;
+        global $AUTH_ACL;
+        $conf['superuser'] = 'alice';
+        $conf['useacl']    = 1;
+
+        $AUTH_ACL = array(
+            '*           @ALL           8',
+            'secret:*      @ALL           0',
+        );
+
+        $_SERVER['REMOTE_USER'] = 'eve';
+
+        saveWikiText('secret:medias', '[[doku>wiki:dokuwiki|{{wiki:secretlogo.png}}]]', 'Test initialization');
+        idx_addPage('secret:medias');
+
+        $this->assertEquals(array(), ft_mediause('wiki:secretlogo.png'));
+        $this->assertEquals(array(), ft_mediause('wiki:secretlogo.png', false));
+        $this->assertEquals(array('secret:medias'), ft_mediause('wiki:secretlogo.png', true));
+    }
+
+    public function test_media_in_deleted_pages() {
+        saveWikiText('test:internalmedia_usage', '{{internalmedia.png}} {{..:internal media.png}}', 'Test initialization');
+        idx_addPage('test:internalmedia_usage');
+        saveWikiText('test:internalmedia_usage', '', 'Deleted');
+
+        $this->assertEquals(array(), ft_mediause('internal_media.png'));
+        $this->assertEquals(array(), ft_mediause('test:internalmedia.png'));
+    }
+}
diff --git a/inc/fulltext.php b/inc/fulltext.php
index 1afff25dd..c03126994 100644
--- a/inc/fulltext.php
+++ b/inc/fulltext.php
@@ -151,42 +151,28 @@ function ft_backlinks($id, $ignore_perms = false){
 /**
  * Returns the pages that use a given media file
  *
- * Does a quick lookup with the fulltext index, then
- * evaluates the instructions of the found pages
+ * Uses the relation media metadata property and the metadata index.
  *
- * Aborts after $max found results
+ * Note that before 2013-07-31 the second parameter was the maximum number of results and
+ * permissions were ignored. That's why the parameter is now checked to be explicitely set
+ * to true (with type bool) in order to be compatible with older uses of the function.
+ *
+ * @param string $id           The media id to look for
+ * @param bool   $ignore_perms Ignore hidden pages and acls (optional, default: false)
+ * @return array A list of pages that use the given media file
  */
-function ft_mediause($id,$max){
-    if(!$max) $max = 1; // need to find at least one
+function ft_mediause($id, $ignore_perms = false){
+    $result = idx_get_indexer()->lookupKey('relation_media', $id);
 
-    $result = array();
+    if(!count($result)) return $result;
 
-    // quick lookup of the mediafile
-    // FIXME use metadata key lookup
-    $media   = noNS($id);
-    $matches = idx_lookup(idx_tokenizer($media));
-    $docs    = array_keys(ft_resultCombine(array_values($matches)));
-    if(!count($docs)) return $result;
-
-    // go through all found pages
-    $found = 0;
-    $pcre  = preg_quote($media,'/');
-    foreach($docs as $doc){
-        $ns = getNS($doc);
-        preg_match_all('/\{\{([^|}]*'.$pcre.'[^|}]*)(|[^}]+)?\}\}/i',rawWiki($doc),$matches);
-        foreach($matches[1] as $img){
-            $img = trim($img);
-            if(media_isexternal($img)) continue; // skip external images
-                list($img) = explode('?',$img);                  // remove any parameters
-            resolve_mediaid($ns,$img,$exists);               // resolve the possibly relative img
-
-            if($img == $id){                                 // we have a match
-                $result[] = $doc;
-                $found++;
-                break;
-            }
+    // check ACL permissions
+    foreach(array_keys($result) as $idx){
+        if(($ignore_perms !== true && (
+                    isHiddenPage($result[$idx]) || auth_quickaclcheck($result[$idx]) < AUTH_READ
+                )) || !page_exists($result[$idx], '', false)){
+            unset($result[$idx]);
         }
-        if($found >= $max) break;
     }
 
     sort($result);
diff --git a/inc/indexer.php b/inc/indexer.php
index 2f3ab25dc..8f0ba7ec6 100644
--- a/inc/indexer.php
+++ b/inc/indexer.php
@@ -10,7 +10,7 @@
 if(!defined('DOKU_INC')) die('meh.');
 
 // Version tag used to force rebuild on upgrade
-define('INDEXER_VERSION', 5);
+define('INDEXER_VERSION', 6);
 
 // set the minimum token length to use in the index (note, this doesn't apply to numeric tokens)
 if (!defined('IDX_MINWORDLENGTH')) define('IDX_MINWORDLENGTH',2);
@@ -1365,6 +1365,12 @@ function idx_addPage($page, $verbose=false, $force=false) {
         $metadata['relation_references'] = array_keys($references);
     else
         $metadata['relation_references'] = array();
+
+    if (($media = p_get_metadata($page, 'relation media', METADATA_RENDER_UNLIMITED)) !== null)
+        $metadata['relation_media'] = array_keys($media);
+    else
+        $metadata['relation_media'] = array();
+
     $data = compact('page', 'body', 'metadata', 'pid');
     $evt = new Doku_Event('INDEXER_PAGE_ADD', $data);
     if ($evt->advise_before()) $data['body'] = $data['body'] . " " . rawWiki($page);
diff --git a/inc/media.php b/inc/media.php
index fbe1363ec..c76f2986c 100644
--- a/inc/media.php
+++ b/inc/media.php
@@ -178,7 +178,7 @@ function media_inuse($id) {
     global $conf;
     $mediareferences = array();
     if($conf['refcheck']){
-        $mediareferences = ft_mediause($id,$conf['refshow']);
+        $mediareferences = ft_mediause($id,true);
         if(!count($mediareferences)) {
             return false;
         } else {
diff --git a/inc/parser/metadata.php b/inc/parser/metadata.php
index e17b82f8b..d64fe4d77 100644
--- a/inc/parser/metadata.php
+++ b/inc/parser/metadata.php
@@ -282,8 +282,10 @@ class Doku_Renderer_metadata extends Doku_Renderer {
   function internallink($id, $name = NULL){
     global $ID;
 
-    if(is_array($name))
+    if(is_array($name)) {
         $this->_firstimage($name['src']);
+        if ($name['type'] == 'internalmedia') $this->_recordMediaUsage($name['src']);
+    }
 
     $default = $this->_simpleTitle($id);
 
@@ -304,8 +306,10 @@ class Doku_Renderer_metadata extends Doku_Renderer {
   }
 
   function externallink($url, $name = NULL){
-    if(is_array($name))
+    if(is_array($name)) {
         $this->_firstimage($name['src']);
+        if ($name['type'] == 'internalmedia') $this->_recordMediaUsage($name['src']);
+    }
 
     if ($this->capture){
       $this->doc .= $this->_getLinkTitle($name, '<' . $url . '>');
@@ -313,8 +317,10 @@ class Doku_Renderer_metadata extends Doku_Renderer {
   }
 
   function interwikilink($match, $name = NULL, $wikiName, $wikiUri){
-    if(is_array($name))
+    if(is_array($name)) {
         $this->_firstimage($name['src']);
+        if ($name['type'] == 'internalmedia') $this->_recordMediaUsage($name['src']);
+    }
 
     if ($this->capture){
       list($wikiUri, $hash) = explode('#', $wikiUri, 2);
@@ -324,8 +330,10 @@ class Doku_Renderer_metadata extends Doku_Renderer {
   }
 
   function windowssharelink($url, $name = NULL){
-    if(is_array($name))
+    if(is_array($name)) {
         $this->_firstimage($name['src']);
+        if ($name['type'] == 'internalmedia') $this->_recordMediaUsage($name['src']);
+    }
 
     if ($this->capture){
       if ($name) $this->doc .= $name;
@@ -334,8 +342,10 @@ class Doku_Renderer_metadata extends Doku_Renderer {
   }
 
   function emaillink($address, $name = NULL){
-    if(is_array($name))
+    if(is_array($name)) {
         $this->_firstimage($name['src']);
+        if ($name['type'] == 'internalmedia') $this->_recordMediaUsage($name['src']);
+    }
 
     if ($this->capture){
       if ($name) $this->doc .= $name;
@@ -347,6 +357,7 @@ class Doku_Renderer_metadata extends Doku_Renderer {
                          $height=NULL, $cache=NULL, $linking=NULL){
     if ($this->capture && $title) $this->doc .= '['.$title.']';
     $this->_firstimage($src);
+    $this->_recordMediaUsage($src);
   }
 
   function externalmedia($src, $title=NULL, $align=NULL, $width=NULL,
@@ -439,6 +450,15 @@ class Doku_Renderer_metadata extends Doku_Renderer {
         $this->firstimage = $src;
     }
   }
+
+  function _recordMediaUsage($src) {
+      global $ID;
+
+      list ($src, $hash) = explode('#', $src, 2);
+      if (media_isexternal($src)) return;
+      resolve_mediaid(getNS($ID), $src, $exists);
+      $this->meta['relation']['media'][$src] = $exists;
+  }
 }
 
 //Setup VIM: ex: et ts=4 :
-- 
GitLab