From ff8e5c090a534b4cf658804ca3fa36229f5f8876 Mon Sep 17 00:00:00 2001 From: "stephane.chazelas" <stephane.chazelas@emerson.com> Date: Fri, 4 Jan 2008 15:53:01 +0100 Subject: [PATCH] parenthesis_escaper_fix darcs-hash:20080104145301-3d17c-6ba186de4a2661f7d4f6b080e4cf74fed9ae1904.gz --- inc/parser/lexer.php | 99 ++++++++++++++++++++------------------------ 1 file changed, 44 insertions(+), 55 deletions(-) diff --git a/inc/parser/lexer.php b/inc/parser/lexer.php index 0887dde93..88deb7fe9 100644 --- a/inc/parser/lexer.php +++ b/inc/parser/lexer.php @@ -138,60 +138,49 @@ class Doku_LexerParallelRegex { $cnt = count($this->_patterns); for ($i = 0; $i < $cnt; $i++) { - // Replace lookaheads / lookbehinds with marker - $m = "\1\1"; - $pattern = preg_replace( - array ( - '/\(\?(i|m|s|x|U)\)/U', - '/\(\?(\-[i|m|s|x|U])\)/U', - '/\(\?\=(.*)\)/sU', - '/\(\?\!(.*)\)/sU', - '/\(\?\<\=(.*)\)/sU', - '/\(\?\<\!(.*)\)/sU', - '/\(\?\:(.*)\)/sU', - ), - array ( - $m.'SO:\\1'.$m, - $m.'SOR:\\1'.$m, - $m.'LA:IS:\\1'.$m, - $m.'LA:NOT:\\1'.$m, - $m.'LB:IS:\\1'.$m, - $m.'LB:NOT:\\1'.$m, - $m.'GRP:\\1'.$m, - ), - $this->_patterns[$i] - ); - // Quote the rest - $pattern = str_replace( - array('/', '(', ')'), - array('\/', '\(', '\)'), - $pattern - ); - - // Restore lookaheads / lookbehinds - $pattern = preg_replace( - array ( - '/'.$m.'SO:(.{1})'.$m.'/', - '/'.$m.'SOR:(.{2})'.$m.'/', - '/'.$m.'LA:IS:(.*)'.$m.'/sU', - '/'.$m.'LA:NOT:(.*)'.$m.'/sU', - '/'.$m.'LB:IS:(.*)'.$m.'/sU', - '/'.$m.'LB:NOT:(.*)'.$m.'/sU', - '/'.$m.'GRP:(.*)'.$m.'/sU', - ), - array ( - '(?\\1)', - '(?\\1)', - '(?=\\1)', - '(?!\\1)', - '(?<=\\1)', - '(?<!\\1)', - '(?:\\1)', - ), - $pattern - ); - - $this->_patterns[$i] = '('.$pattern.')'; + /* + * decompose the input pattern into "(", "(?", ")", + * "[...]", "[]..]", "[^]..]", "[...[:...:]..]", "\x"... + * elements. + */ + preg_match_all('/\\\\.|' . + '\(\?|' . + '[()]|' . + '\[\^?\]?(?:\\\\.|\[:[^]]*:\]|[^]\\\\])*\]|' . + '[^[()\\\\]+/', $this->_patterns[$i], $elts); + + $pattern = ""; + $level = 0; + + foreach ($elts[0] as $elt) { + /* + * for "(", ")" remember the nesting level, add "\" + * only to the non-"(?" ones. + */ + + switch($elt) { + case '(': + $pattern .= '\('; + break; + case ')': + if ($level > 0) + $level--; /* closing (? */ + else + $pattern .= '\\'; + $pattern .= ')'; + break; + case '(?': + $level++; + $pattern .= '(?'; + break; + default: + if (substr($elt, 0, 1) == '\\') + $pattern .= $elt; + else + $pattern .= str_replace('/', '\/', $elt); + } + } + $this->_patterns[$i] = "($pattern)"; } $this->_regex = "/" . implode("|", $this->_patterns) . "/" . $this->_getPerlMatchingFlags(); } @@ -591,4 +580,4 @@ function Doku_Lexer_Escape($str) { return preg_replace($chars, $escaped, $str); } -//Setup VIM: ex: et ts=4 enc=utf-8 : +//Setup VIM: ex: et ts=4 sw=4 enc=utf-8 : -- GitLab