From 409f26c8f6fa4e96f27030b5d38ffaf6acc83419 Mon Sep 17 00:00:00 2001
From: matthiasgrimm <matthiasgrimm@users.sourceforge.net>
Date: Sun, 12 Jun 2005 17:42:41 +0200
Subject: [PATCH] spellchecker fix for broken aspell

The current Aspell version has a bug that causes a corrupt output file.
Output lines beginning with '?' weren't terminated with a newline.
This patch fixes the broken output format.

It is not possible to detect automatically if a Aspell version handles
'?'-lines correctly. Therefore DokuWiki checks for Aspells version
number and corrects the output format accordingly if version <

darcs-hash:20050612154241-7ef76-dfa98470651d6701562ca51908e6f8c8392b28bd.gz
---
 inc/aspell.php         | 66 ++++++++++++++++++------------------------
 lib/exe/spellcheck.php | 22 +-------------
 2 files changed, 29 insertions(+), 59 deletions(-)

diff --git a/inc/aspell.php b/inc/aspell.php
index 62c3a2ae9..1cbbb2dc3 100644
--- a/inc/aspell.php
+++ b/inc/aspell.php
@@ -168,55 +168,45 @@ class Aspell{
         );
 
         $process = proc_open(ASPELL_BIN.' -a'.$this->args, $descspec, $pipes);
+        $terse   = 1;   // terse mode active
         if ($process) {
             // write specials if given
             if(is_array($specials)){
-                foreach($specials as $s) fwrite($pipes[0],"$s\n");
+                foreach($specials as $s){
+                    if ($s == '!') $terse = 0;
+                    fwrite($pipes[0],"$s\n");
+                }
             }
 
-            // write line and read answer
-            $data = explode("\n",$text);
-            foreach($data as $line){
-                fwrite($pipes[0],"^$line\n"); // aspell uses ^ to escape the line
-                fflush($pipes[0]);
-                do{
-                    $r = fgets($pipes[1],8192);
-                    
-                    // Aspell returns lines with preceding '?' like ispell do
-                    // but this lines are badly corrupted. We had to correct
-                    // those lines here due to not to break our result parser.
-                    if($r[0] == '?'){
-                        $pos = strpos($r, "&");
-                        if ($pos === false){
-                           // Is this the last spelling error in the source line,
-                           // then the result line is not terminated with a newline.
-                           // We add one here. The pipe is empty so we prepare
-                           // to leave the loop.
-                           $out .= $r."\n";
-                           $r = "\n";  // trick to exit the loop
-                        }else{
-                           // If another word in the source line is misspelled,
-                           // the result line is directly joined to the '?'
-                           // line. We divide them here and add the missing
-                           // newlines. After that we continue to read the pipe.
-                           $out .= str_replace("&", "\n&", $r);
-                           $r = "x";   // trick to loop again for sure
-                        }
-                    }else{
-                      $out .= $r;
-                    }
-
-                    if(feof($pipes[1])) break;
-                }while($r != "\n");
-            }
+            // prepare text for Aspell and handle it over
+            $string = "^".str_replace("\n", "\n^",$text)."^\n";
+            fwrite($pipes[0],$string);  // send text to Aspell
             fclose($pipes[0]);
-
-            // read remaining stdout (shouldn't be any)
+            
+            // read Aspells response from stdin
             while (!feof($pipes[1])) {
                 $out .= fread($pipes[1], 8192);
             }
             fclose($pipes[1]);
 
+            // Aspell has a bug that can't be autodetected because both versions
+            // might produce the same output but under different conditions. So
+            // we check Aspells version number here to divide broken and working
+            // versions of Aspell.
+            $tmp = array();
+            preg_match('/^\@.*Aspell (\d+)\.(\d+).(\d+)/',$out,$tmp);
+            $version = $tmp[1]*1000 + $tmp[2]*10 + $tmp[3];
+            
+            if ($version <= 603)  // version 0.60.3
+                $r = $terse ? "\n*\n\$1" : "\n\$1"; // replacement for broken Aspell
+            else
+                $r = $terse ? "\n*\n" : "\n";    // replacement for good Aspell
+
+            // lines starting with a '?' are no realy misspelled words and some
+            // Aspell versions doesn't produce usable output anyway so we filter
+            // them out here.
+            $out = preg_replace('/\n\? [^\n\&\*]*([\n]?)/',$r, $out);
+
             // read stderr
             while (!feof($pipes[2])) {
                 $err .= fread($pipes[2], 8192);
diff --git a/lib/exe/spellcheck.php b/lib/exe/spellcheck.php
index d8faa0291..d9086f9c6 100644
--- a/lib/exe/spellcheck.php
+++ b/lib/exe/spellcheck.php
@@ -141,29 +141,9 @@ function spell_check() {
       return;
     }
 
-
     $misspell = true;
     $len  = utf8_strlen($word);
-
-    // try to insert markup
-    // Aspell sometimes returns too few blank lines, the following loop
-    // tries to compensate by skipping to next line if Aspell's output
-    // doesn't match - we're skipping maximal 2 lines before giving up and
-    // throwing an error
-    for($x=0; $x<3; $x++){
-      $lcnt -= $x;
-      if(utf8_substr($data[$lcnt],$off,$len) == $word){
-        $data[$lcnt] = utf8_substr_replace($data[$lcnt],
-                                           spell_formatword($word,$sug),
-                                           $off, $len);
-        break;
-      }elseif($x == 2){
-        print '2';
-        print "The spellchecker output doesn't match the input data.\n";
-        print "Offending word: '$word' offset: $off, line $i";
-        return;
-      }
-    }
+    $data[$lcnt] = utf8_substr_replace($data[$lcnt],spell_formatword($word,$sug),$off, $len);
 
   }//end of output parsing
 
-- 
GitLab