From 60e91a171860bce870e3b3e9109d1313ed6bc071 Mon Sep 17 00:00:00 2001
From: Andreas Gohr <gohr@cosmocode.de>
Date: Mon, 26 Oct 2009 10:23:59 +0100
Subject: [PATCH] added FULLTEXT_SNIPPET_CREATE event

Ignore-this: a0ebcdd129f4256e4be029e7fdf7ca45

darcs-hash:20091026092359-6e07b-4c41896825e091a3c8fbbeadc3bc7764d0735bf6.gz
---
 inc/fulltext.php | 153 ++++++++++++++++++++++++++---------------------
 1 file changed, 84 insertions(+), 69 deletions(-)

diff --git a/inc/fulltext.php b/inc/fulltext.php
index 06834f5ae..afb15528e 100644
--- a/inc/fulltext.php
+++ b/inc/fulltext.php
@@ -278,81 +278,96 @@ function ft_pagesorter($a, $b){
  * Creates a snippet extract
  *
  * @author Andreas Gohr <andi@splitbrain.org>
+ * @triggers FULLTEXT_SNIPPET_CREATE
  */
 function ft_snippet($id,$highlight){
-    $text     = rawWiki($id);
-    $match = array();
-    $snippets = array();
-    $utf8_offset = $offset = $end = 0;
-    $len = utf8_strlen($text);
-
-    // build a regexp from the phrases to highlight
-    $re1 = '('.join('|',array_map('preg_quote_cb',array_filter((array) $highlight))).')';
-    $re2 = "$re1.{0,75}(?!\\1)$re1";
-    $re3 = "$re1.{0,45}(?!\\1)$re1.{0,45}(?!\\1)(?!\\2)$re1";
-
-    for ($cnt=4; $cnt--;) {
-      if (0) {
-      } else if (preg_match('/'.$re3.'/iu',$text,$match,PREG_OFFSET_CAPTURE,$offset)) {
-      } else if (preg_match('/'.$re2.'/iu',$text,$match,PREG_OFFSET_CAPTURE,$offset)) {
-      } else if (preg_match('/'.$re1.'/iu',$text,$match,PREG_OFFSET_CAPTURE,$offset)) {
-      } else {
-        break;
-      }
-
-      list($str,$idx) = $match[0];
-
-      // convert $idx (a byte offset) into a utf8 character offset
-      $utf8_idx = utf8_strlen(substr($text,0,$idx));
-      $utf8_len = utf8_strlen($str);
-
-      // establish context, 100 bytes surrounding the match string
-      // first look to see if we can go 100 either side,
-      // then drop to 50 adding any excess if the other side can't go to 50,
-      $pre = min($utf8_idx-$utf8_offset,100);
-      $post = min($len-$utf8_idx-$utf8_len,100);
-
-      if ($pre>50 && $post>50) {
-        $pre = $post = 50;
-      } else if ($pre>50) {
-        $pre = min($pre,100-$post);
-      } else if ($post>50) {
-        $post = min($post, 100-$pre);
-      } else {
-        // both are less than 50, means the context is the whole string
-        // make it so and break out of this loop - there is no need for the
-        // complex snippet calculations
-        $snippets = array($text);
-        break;
-      }
-
-      // establish context start and end points, try to append to previous
-      // context if possible
-      $start = $utf8_idx - $pre;
-      $append = ($start < $end) ? $end : false;  // still the end of the previous context snippet
-      $end = $utf8_idx + $utf8_len + $post;      // now set it to the end of this context
+    $text = rawWiki($id);
+    $evdata = array(
+                'id'        => $id,
+                'text'      => &$text,
+                'highlight' => &$highlight,
+                'snippet'   => '',
+              );
+
+    $evt = new Doku_Event('FULLTEXT_SNIPPET_CREATE',$evdata);
+    if ($evt->advise_before()) {
+        $match = array();
+        $snippets = array();
+        $utf8_offset = $offset = $end = 0;
+        $len = utf8_strlen($text);
+
+        // build a regexp from the phrases to highlight
+        $re1 = '('.join('|',array_map('preg_quote_cb',array_filter((array) $highlight))).')';
+        $re2 = "$re1.{0,75}(?!\\1)$re1";
+        $re3 = "$re1.{0,45}(?!\\1)$re1.{0,45}(?!\\1)(?!\\2)$re1";
+
+        for ($cnt=4; $cnt--;) {
+          if (0) {
+          } else if (preg_match('/'.$re3.'/iu',$text,$match,PREG_OFFSET_CAPTURE,$offset)) {
+          } else if (preg_match('/'.$re2.'/iu',$text,$match,PREG_OFFSET_CAPTURE,$offset)) {
+          } else if (preg_match('/'.$re1.'/iu',$text,$match,PREG_OFFSET_CAPTURE,$offset)) {
+          } else {
+            break;
+          }
+
+          list($str,$idx) = $match[0];
+
+          // convert $idx (a byte offset) into a utf8 character offset
+          $utf8_idx = utf8_strlen(substr($text,0,$idx));
+          $utf8_len = utf8_strlen($str);
+
+          // establish context, 100 bytes surrounding the match string
+          // first look to see if we can go 100 either side,
+          // then drop to 50 adding any excess if the other side can't go to 50,
+          $pre = min($utf8_idx-$utf8_offset,100);
+          $post = min($len-$utf8_idx-$utf8_len,100);
+
+          if ($pre>50 && $post>50) {
+            $pre = $post = 50;
+          } else if ($pre>50) {
+            $pre = min($pre,100-$post);
+          } else if ($post>50) {
+            $post = min($post, 100-$pre);
+          } else {
+            // both are less than 50, means the context is the whole string
+            // make it so and break out of this loop - there is no need for the
+            // complex snippet calculations
+            $snippets = array($text);
+            break;
+          }
+
+          // establish context start and end points, try to append to previous
+          // context if possible
+          $start = $utf8_idx - $pre;
+          $append = ($start < $end) ? $end : false;  // still the end of the previous context snippet
+          $end = $utf8_idx + $utf8_len + $post;      // now set it to the end of this context
+
+          if ($append) {
+            $snippets[count($snippets)-1] .= utf8_substr($text,$append,$end-$append);
+          } else {
+            $snippets[] = utf8_substr($text,$start,$end-$start);
+          }
+
+          // set $offset for next match attempt
+          //   substract strlen to avoid splitting a potential search success,
+          //   this is an approximation as the search pattern may match strings
+          //   of varying length and it will fail if the context snippet
+          //   boundary breaks a matching string longer than the current match
+          $utf8_offset = $utf8_idx + $post;
+          $offset = $idx + strlen(utf8_substr($text,$utf8_idx,$post));
+          $offset = utf8_correctIdx($text,$offset);
+        }
 
-      if ($append) {
-        $snippets[count($snippets)-1] .= utf8_substr($text,$append,$end-$append);
-      } else {
-        $snippets[] = utf8_substr($text,$start,$end-$start);
-      }
+        $m = "\1";
+        $snippets = preg_replace('/'.$re1.'/iu',$m.'$1'.$m,$snippets);
+        $snippet = preg_replace('/'.$m.'([^'.$m.']*?)'.$m.'/iu','<strong class="search_hit">$1</strong>',hsc(join('... ',$snippets)));
 
-      // set $offset for next match attempt
-      //   substract strlen to avoid splitting a potential search success,
-      //   this is an approximation as the search pattern may match strings
-      //   of varying length and it will fail if the context snippet
-      //   boundary breaks a matching string longer than the current match
-      $utf8_offset = $utf8_idx + $post;
-      $offset = $idx + strlen(utf8_substr($text,$utf8_idx,$post));
-      $offset = utf8_correctIdx($text,$offset);
+        $evdata['snippet'] = $snippet;
     }
+    $evt->advise_after();
+    unset($evt);
 
-    $m = "\1";
-    $snippets = preg_replace('/'.$re1.'/iu',$m.'$1'.$m,$snippets);
-    $snippet = preg_replace('/'.$m.'([^'.$m.']*?)'.$m.'/iu','<strong class="search_hit">$1</strong>',hsc(join('... ',$snippets)));
-
-    return $snippet;
+    return $evdata['snippet'];
 }
 
 /**
-- 
GitLab