From f03fd957525a714da1cde7e2957939046bd51bd5 Mon Sep 17 00:00:00 2001
From: Andreas Gohr <andi@splitbrain.org>
Date: Sun, 4 Apr 2010 20:28:39 +0200
Subject: [PATCH] new fnencode option FS#1649

This patch adds an option to choose how filenames are encoded
when saved to the file system. You can choose between urlencoding
(url), the new SafeFn method (safe) and storing real UTF-8 (utf-8).
---
 conf/dokuwiki.php                             |  1 +
 inc/load.php                                  |  1 +
 inc/pageutils.php                             | 50 +++++++++++++++++++
 inc/utf8.php                                  | 39 ---------------
 lib/plugins/config/lang/en/lang.php           |  1 +
 lib/plugins/config/settings/config.class.php  |  2 +-
 .../config/settings/config.metadata.php       |  1 +
 7 files changed, 55 insertions(+), 40 deletions(-)

diff --git a/conf/dokuwiki.php b/conf/dokuwiki.php
index d3823eb94..f2a843f96 100644
--- a/conf/dokuwiki.php
+++ b/conf/dokuwiki.php
@@ -89,6 +89,7 @@ $conf['usedraft']    = 1;                //automatically save a draft while edit
 $conf['sepchar']     = '_';              //word separator character in page names; may be a
                                          //  letter, a digit, '_', '-', or '.'.
 $conf['canonical']   = 0;                //Should all URLs use full canonical http://... style?
+$conf['fnencode']    = 'url';            //encode filenames (url|safe|utf-8)
 $conf['autoplural']  = 0;                //try (non)plural form of nonexisting files?
 $conf['compression'] = 'gz';             //compress old revisions: (0: off) ('gz': gnuzip) ('bz2': bzip)
                                          //  bz2 generates smaller files, but needs more cpu-power
diff --git a/inc/load.php b/inc/load.php
index faf4e9570..2f5be6d63 100644
--- a/inc/load.php
+++ b/inc/load.php
@@ -73,6 +73,7 @@ function load_autoload($name){
         'ZipLib'                => DOKU_INC.'inc/ZipLib.class.php',
         'DokuWikiFeedCreator'   => DOKU_INC.'inc/feedcreator.class.php',
         'Doku_Parser_Mode'      => DOKU_INC.'inc/parser/parser.php',
+        'SafeFN'                => DOKU_INC.'inc/SafeFN.class.php',
 
         'DokuWiki_Action_Plugin' => DOKU_PLUGIN.'action.php',
         'DokuWiki_Admin_Plugin'  => DOKU_PLUGIN.'admin.php',
diff --git a/inc/pageutils.php b/inc/pageutils.php
index cd3cf1fce..43c84038f 100644
--- a/inc/pageutils.php
+++ b/inc/pageutils.php
@@ -543,3 +543,53 @@ function prettyprint_id($id) {
     }
     return hsc($id);
 }
+
+/**
+ * Encode a UTF-8 filename to use on any filesystem
+ *
+ * Uses the 'fnencode' option to determine encoding
+ *
+ * When the second parameter is true the string will
+ * be encoded only if non ASCII characters are detected -
+ * This makes it safe to run it multiple times on the
+ * same string (default is true)
+ *
+ * @author Andreas Gohr <andi@splitbrain.org>
+ * @see    urlencode
+ */
+function utf8_encodeFN($file,$safe=true){
+    global $conf;
+    if($conf['fnencode'] == 'utf-8') return $file;
+
+    if($safe && preg_match('#^[a-zA-Z0-9/_\-\.%]+$#',$file)){
+        return $file;
+    }
+
+    if($conf['fnencode'] == 'safe'){
+        return SafeFN::encode($file);
+    }
+
+    $file = urlencode($file);
+    $file = str_replace('%2F','/',$file);
+    return $file;
+}
+
+/**
+ * Decode a filename back to UTF-8
+ *
+ * Uses the 'fnencode' option to determine encoding
+ *
+ * @author Andreas Gohr <andi@splitbrain.org>
+ * @see    urldecode
+ */
+function utf8_decodeFN($file){
+    global $conf;
+    if($conf['fnencode'] == 'utf-8') return $file;
+
+    if($conf['fnencode'] == 'safe'){
+        return SafeFN::decode($file);
+    }
+
+    return urldecode($file);
+}
+
diff --git a/inc/utf8.php b/inc/utf8.php
index b078540d2..c10e33ffa 100644
--- a/inc/utf8.php
+++ b/inc/utf8.php
@@ -19,45 +19,6 @@ if(!defined('UTF8_MBSTRING')){
 
 if(UTF8_MBSTRING){ mb_internal_encoding('UTF-8'); }
 
-if(!function_exists('utf8_encodeFN')){
-    /**
-     * URL-Encode a filename to allow unicodecharacters
-     *
-     * Slashes are not encoded
-     *
-     * When the second parameter is true the string will
-     * be encoded only if non ASCII characters are detected -
-     * This makes it safe to run it multiple times on the
-     * same string (default is true)
-     *
-     * @author Andreas Gohr <andi@splitbrain.org>
-     * @see    urlencode
-     */
-    function utf8_encodeFN($file,$safe=true){
-        if($safe && preg_match('#^[a-zA-Z0-9/_\-.%]+$#',$file)){
-            return $file;
-        }
-        $file = urlencode($file);
-        $file = str_replace('%2F','/',$file);
-        return $file;
-    }
-}
-
-if(!function_exists('utf8_decodeFN')){
-    /**
-     * URL-Decode a filename
-     *
-     * This is just a wrapper around urldecode
-     *
-     * @author Andreas Gohr <andi@splitbrain.org>
-     * @see    urldecode
-     */
-    function utf8_decodeFN($file){
-        $file = urldecode($file);
-        return $file;
-    }
-}
-
 if(!function_exists('utf8_isASCII')){
     /**
      * Checks if a string contains 7bit ASCII only
diff --git a/lib/plugins/config/lang/en/lang.php b/lib/plugins/config/lang/en/lang.php
index 2bcd17c12..dd13464fe 100644
--- a/lib/plugins/config/lang/en/lang.php
+++ b/lib/plugins/config/lang/en/lang.php
@@ -116,6 +116,7 @@ $lang['useslash']    = 'Use slash as namespace separator in URLs';
 $lang['usedraft']    = 'Automatically save a draft while editing';
 $lang['sepchar']     = 'Page name word separator';
 $lang['canonical']   = 'Use fully canonical URLs';
+$lang['fnencode']    = 'Method for encoding non-ASCII filenames.';
 $lang['autoplural']  = 'Check for plural forms in links';
 $lang['compression'] = 'Compression method for attic files';
 $lang['cachetime']   = 'Maximum age for cache (sec)';
diff --git a/lib/plugins/config/settings/config.class.php b/lib/plugins/config/settings/config.class.php
index b7428bf6c..2a1d3a28f 100644
--- a/lib/plugins/config/settings/config.class.php
+++ b/lib/plugins/config/settings/config.class.php
@@ -343,7 +343,7 @@ if (!class_exists('setting')) {
     var $_cautionList = array(
         'basedir' => 'danger', 'baseurl' => 'danger', 'savedir' => 'danger', 'useacl' => 'danger', 'authtype' => 'danger', 'superuser' => 'danger', 'userewrite' => 'danger',
         'start' => 'warning', 'camelcase' => 'warning', 'deaccent' => 'warning', 'sepchar' => 'warning', 'compression' => 'warning', 'xsendfile' => 'warning', 'renderer_xhtml' => 'warning',
-        'allowdebug' => 'security', 'htmlok' => 'security', 'phpok' => 'security', 'iexssprotect' => 'security', 'xmlrpc' => 'security'
+        'allowdebug' => 'security', 'htmlok' => 'security', 'phpok' => 'security', 'iexssprotect' => 'security', 'xmlrpc' => 'security', 'fnencode' => 'warning'
     );
 
     function setting($key, $params=NULL) {
diff --git a/lib/plugins/config/settings/config.metadata.php b/lib/plugins/config/settings/config.metadata.php
index cea191f56..316b4d1c5 100644
--- a/lib/plugins/config/settings/config.metadata.php
+++ b/lib/plugins/config/settings/config.metadata.php
@@ -171,6 +171,7 @@ $meta['userewrite']  = array('multichoice','_choices' => array(0,1,2));
 $meta['useslash']    = array('onoff');
 $meta['sepchar']     = array('sepchar');
 $meta['canonical']   = array('onoff');
+$meta['fnencode']    = array('multichoice','_choices' => array('url','safe','utf-8'));
 $meta['autoplural']  = array('onoff');
 $meta['mailfrom']    = array('richemail');
 $meta['compress']    = array('onoff');
-- 
GitLab