Skip to content
Snippets Groups Projects
Commit 60e91a17 authored by Andreas Gohr's avatar Andreas Gohr
Browse files

added FULLTEXT_SNIPPET_CREATE event

Ignore-this: a0ebcdd129f4256e4be029e7fdf7ca45

darcs-hash:20091026092359-6e07b-4c41896825e091a3c8fbbeadc3bc7764d0735bf6.gz
parent c155c65d
No related branches found
No related tags found
No related merge requests found
......@@ -278,81 +278,96 @@ function ft_pagesorter($a, $b){
* Creates a snippet extract
*
* @author Andreas Gohr <andi@splitbrain.org>
* @triggers FULLTEXT_SNIPPET_CREATE
*/
function ft_snippet($id,$highlight){
$text = rawWiki($id);
$match = array();
$snippets = array();
$utf8_offset = $offset = $end = 0;
$len = utf8_strlen($text);
// build a regexp from the phrases to highlight
$re1 = '('.join('|',array_map('preg_quote_cb',array_filter((array) $highlight))).')';
$re2 = "$re1.{0,75}(?!\\1)$re1";
$re3 = "$re1.{0,45}(?!\\1)$re1.{0,45}(?!\\1)(?!\\2)$re1";
for ($cnt=4; $cnt--;) {
if (0) {
} else if (preg_match('/'.$re3.'/iu',$text,$match,PREG_OFFSET_CAPTURE,$offset)) {
} else if (preg_match('/'.$re2.'/iu',$text,$match,PREG_OFFSET_CAPTURE,$offset)) {
} else if (preg_match('/'.$re1.'/iu',$text,$match,PREG_OFFSET_CAPTURE,$offset)) {
} else {
break;
}
list($str,$idx) = $match[0];
// convert $idx (a byte offset) into a utf8 character offset
$utf8_idx = utf8_strlen(substr($text,0,$idx));
$utf8_len = utf8_strlen($str);
// establish context, 100 bytes surrounding the match string
// first look to see if we can go 100 either side,
// then drop to 50 adding any excess if the other side can't go to 50,
$pre = min($utf8_idx-$utf8_offset,100);
$post = min($len-$utf8_idx-$utf8_len,100);
if ($pre>50 && $post>50) {
$pre = $post = 50;
} else if ($pre>50) {
$pre = min($pre,100-$post);
} else if ($post>50) {
$post = min($post, 100-$pre);
} else {
// both are less than 50, means the context is the whole string
// make it so and break out of this loop - there is no need for the
// complex snippet calculations
$snippets = array($text);
break;
}
// establish context start and end points, try to append to previous
// context if possible
$start = $utf8_idx - $pre;
$append = ($start < $end) ? $end : false; // still the end of the previous context snippet
$end = $utf8_idx + $utf8_len + $post; // now set it to the end of this context
$text = rawWiki($id);
$evdata = array(
'id' => $id,
'text' => &$text,
'highlight' => &$highlight,
'snippet' => '',
);
$evt = new Doku_Event('FULLTEXT_SNIPPET_CREATE',$evdata);
if ($evt->advise_before()) {
$match = array();
$snippets = array();
$utf8_offset = $offset = $end = 0;
$len = utf8_strlen($text);
// build a regexp from the phrases to highlight
$re1 = '('.join('|',array_map('preg_quote_cb',array_filter((array) $highlight))).')';
$re2 = "$re1.{0,75}(?!\\1)$re1";
$re3 = "$re1.{0,45}(?!\\1)$re1.{0,45}(?!\\1)(?!\\2)$re1";
for ($cnt=4; $cnt--;) {
if (0) {
} else if (preg_match('/'.$re3.'/iu',$text,$match,PREG_OFFSET_CAPTURE,$offset)) {
} else if (preg_match('/'.$re2.'/iu',$text,$match,PREG_OFFSET_CAPTURE,$offset)) {
} else if (preg_match('/'.$re1.'/iu',$text,$match,PREG_OFFSET_CAPTURE,$offset)) {
} else {
break;
}
list($str,$idx) = $match[0];
// convert $idx (a byte offset) into a utf8 character offset
$utf8_idx = utf8_strlen(substr($text,0,$idx));
$utf8_len = utf8_strlen($str);
// establish context, 100 bytes surrounding the match string
// first look to see if we can go 100 either side,
// then drop to 50 adding any excess if the other side can't go to 50,
$pre = min($utf8_idx-$utf8_offset,100);
$post = min($len-$utf8_idx-$utf8_len,100);
if ($pre>50 && $post>50) {
$pre = $post = 50;
} else if ($pre>50) {
$pre = min($pre,100-$post);
} else if ($post>50) {
$post = min($post, 100-$pre);
} else {
// both are less than 50, means the context is the whole string
// make it so and break out of this loop - there is no need for the
// complex snippet calculations
$snippets = array($text);
break;
}
// establish context start and end points, try to append to previous
// context if possible
$start = $utf8_idx - $pre;
$append = ($start < $end) ? $end : false; // still the end of the previous context snippet
$end = $utf8_idx + $utf8_len + $post; // now set it to the end of this context
if ($append) {
$snippets[count($snippets)-1] .= utf8_substr($text,$append,$end-$append);
} else {
$snippets[] = utf8_substr($text,$start,$end-$start);
}
// set $offset for next match attempt
// substract strlen to avoid splitting a potential search success,
// this is an approximation as the search pattern may match strings
// of varying length and it will fail if the context snippet
// boundary breaks a matching string longer than the current match
$utf8_offset = $utf8_idx + $post;
$offset = $idx + strlen(utf8_substr($text,$utf8_idx,$post));
$offset = utf8_correctIdx($text,$offset);
}
if ($append) {
$snippets[count($snippets)-1] .= utf8_substr($text,$append,$end-$append);
} else {
$snippets[] = utf8_substr($text,$start,$end-$start);
}
$m = "\1";
$snippets = preg_replace('/'.$re1.'/iu',$m.'$1'.$m,$snippets);
$snippet = preg_replace('/'.$m.'([^'.$m.']*?)'.$m.'/iu','<strong class="search_hit">$1</strong>',hsc(join('... ',$snippets)));
// set $offset for next match attempt
// substract strlen to avoid splitting a potential search success,
// this is an approximation as the search pattern may match strings
// of varying length and it will fail if the context snippet
// boundary breaks a matching string longer than the current match
$utf8_offset = $utf8_idx + $post;
$offset = $idx + strlen(utf8_substr($text,$utf8_idx,$post));
$offset = utf8_correctIdx($text,$offset);
$evdata['snippet'] = $snippet;
}
$evt->advise_after();
unset($evt);
$m = "\1";
$snippets = preg_replace('/'.$re1.'/iu',$m.'$1'.$m,$snippets);
$snippet = preg_replace('/'.$m.'([^'.$m.']*?)'.$m.'/iu','<strong class="search_hit">$1</strong>',hsc(join('... ',$snippets)));
return $snippet;
return $evdata['snippet'];
}
/**
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment