From f03fd957525a714da1cde7e2957939046bd51bd5 Mon Sep 17 00:00:00 2001 From: Andreas Gohr <andi@splitbrain.org> Date: Sun, 4 Apr 2010 20:28:39 +0200 Subject: [PATCH] new fnencode option FS#1649 This patch adds an option to choose how filenames are encoded when saved to the file system. You can choose between urlencoding (url), the new SafeFn method (safe) and storing real UTF-8 (utf-8). --- conf/dokuwiki.php | 1 + inc/load.php | 1 + inc/pageutils.php | 50 +++++++++++++++++++ inc/utf8.php | 39 --------------- lib/plugins/config/lang/en/lang.php | 1 + lib/plugins/config/settings/config.class.php | 2 +- .../config/settings/config.metadata.php | 1 + 7 files changed, 55 insertions(+), 40 deletions(-) diff --git a/conf/dokuwiki.php b/conf/dokuwiki.php index d3823eb94..f2a843f96 100644 --- a/conf/dokuwiki.php +++ b/conf/dokuwiki.php @@ -89,6 +89,7 @@ $conf['usedraft'] = 1; //automatically save a draft while edit $conf['sepchar'] = '_'; //word separator character in page names; may be a // letter, a digit, '_', '-', or '.'. $conf['canonical'] = 0; //Should all URLs use full canonical http://... style? +$conf['fnencode'] = 'url'; //encode filenames (url|safe|utf-8) $conf['autoplural'] = 0; //try (non)plural form of nonexisting files? $conf['compression'] = 'gz'; //compress old revisions: (0: off) ('gz': gnuzip) ('bz2': bzip) // bz2 generates smaller files, but needs more cpu-power diff --git a/inc/load.php b/inc/load.php index faf4e9570..2f5be6d63 100644 --- a/inc/load.php +++ b/inc/load.php @@ -73,6 +73,7 @@ function load_autoload($name){ 'ZipLib' => DOKU_INC.'inc/ZipLib.class.php', 'DokuWikiFeedCreator' => DOKU_INC.'inc/feedcreator.class.php', 'Doku_Parser_Mode' => DOKU_INC.'inc/parser/parser.php', + 'SafeFN' => DOKU_INC.'inc/SafeFN.class.php', 'DokuWiki_Action_Plugin' => DOKU_PLUGIN.'action.php', 'DokuWiki_Admin_Plugin' => DOKU_PLUGIN.'admin.php', diff --git a/inc/pageutils.php b/inc/pageutils.php index cd3cf1fce..43c84038f 100644 --- a/inc/pageutils.php +++ b/inc/pageutils.php @@ -543,3 +543,53 @@ function prettyprint_id($id) { } return hsc($id); } + +/** + * Encode a UTF-8 filename to use on any filesystem + * + * Uses the 'fnencode' option to determine encoding + * + * When the second parameter is true the string will + * be encoded only if non ASCII characters are detected - + * This makes it safe to run it multiple times on the + * same string (default is true) + * + * @author Andreas Gohr <andi@splitbrain.org> + * @see urlencode + */ +function utf8_encodeFN($file,$safe=true){ + global $conf; + if($conf['fnencode'] == 'utf-8') return $file; + + if($safe && preg_match('#^[a-zA-Z0-9/_\-\.%]+$#',$file)){ + return $file; + } + + if($conf['fnencode'] == 'safe'){ + return SafeFN::encode($file); + } + + $file = urlencode($file); + $file = str_replace('%2F','/',$file); + return $file; +} + +/** + * Decode a filename back to UTF-8 + * + * Uses the 'fnencode' option to determine encoding + * + * @author Andreas Gohr <andi@splitbrain.org> + * @see urldecode + */ +function utf8_decodeFN($file){ + global $conf; + if($conf['fnencode'] == 'utf-8') return $file; + + if($conf['fnencode'] == 'safe'){ + return SafeFN::decode($file); + } + + return urldecode($file); +} + diff --git a/inc/utf8.php b/inc/utf8.php index b078540d2..c10e33ffa 100644 --- a/inc/utf8.php +++ b/inc/utf8.php @@ -19,45 +19,6 @@ if(!defined('UTF8_MBSTRING')){ if(UTF8_MBSTRING){ mb_internal_encoding('UTF-8'); } -if(!function_exists('utf8_encodeFN')){ - /** - * URL-Encode a filename to allow unicodecharacters - * - * Slashes are not encoded - * - * When the second parameter is true the string will - * be encoded only if non ASCII characters are detected - - * This makes it safe to run it multiple times on the - * same string (default is true) - * - * @author Andreas Gohr <andi@splitbrain.org> - * @see urlencode - */ - function utf8_encodeFN($file,$safe=true){ - if($safe && preg_match('#^[a-zA-Z0-9/_\-.%]+$#',$file)){ - return $file; - } - $file = urlencode($file); - $file = str_replace('%2F','/',$file); - return $file; - } -} - -if(!function_exists('utf8_decodeFN')){ - /** - * URL-Decode a filename - * - * This is just a wrapper around urldecode - * - * @author Andreas Gohr <andi@splitbrain.org> - * @see urldecode - */ - function utf8_decodeFN($file){ - $file = urldecode($file); - return $file; - } -} - if(!function_exists('utf8_isASCII')){ /** * Checks if a string contains 7bit ASCII only diff --git a/lib/plugins/config/lang/en/lang.php b/lib/plugins/config/lang/en/lang.php index 2bcd17c12..dd13464fe 100644 --- a/lib/plugins/config/lang/en/lang.php +++ b/lib/plugins/config/lang/en/lang.php @@ -116,6 +116,7 @@ $lang['useslash'] = 'Use slash as namespace separator in URLs'; $lang['usedraft'] = 'Automatically save a draft while editing'; $lang['sepchar'] = 'Page name word separator'; $lang['canonical'] = 'Use fully canonical URLs'; +$lang['fnencode'] = 'Method for encoding non-ASCII filenames.'; $lang['autoplural'] = 'Check for plural forms in links'; $lang['compression'] = 'Compression method for attic files'; $lang['cachetime'] = 'Maximum age for cache (sec)'; diff --git a/lib/plugins/config/settings/config.class.php b/lib/plugins/config/settings/config.class.php index b7428bf6c..2a1d3a28f 100644 --- a/lib/plugins/config/settings/config.class.php +++ b/lib/plugins/config/settings/config.class.php @@ -343,7 +343,7 @@ if (!class_exists('setting')) { var $_cautionList = array( 'basedir' => 'danger', 'baseurl' => 'danger', 'savedir' => 'danger', 'useacl' => 'danger', 'authtype' => 'danger', 'superuser' => 'danger', 'userewrite' => 'danger', 'start' => 'warning', 'camelcase' => 'warning', 'deaccent' => 'warning', 'sepchar' => 'warning', 'compression' => 'warning', 'xsendfile' => 'warning', 'renderer_xhtml' => 'warning', - 'allowdebug' => 'security', 'htmlok' => 'security', 'phpok' => 'security', 'iexssprotect' => 'security', 'xmlrpc' => 'security' + 'allowdebug' => 'security', 'htmlok' => 'security', 'phpok' => 'security', 'iexssprotect' => 'security', 'xmlrpc' => 'security', 'fnencode' => 'warning' ); function setting($key, $params=NULL) { diff --git a/lib/plugins/config/settings/config.metadata.php b/lib/plugins/config/settings/config.metadata.php index cea191f56..316b4d1c5 100644 --- a/lib/plugins/config/settings/config.metadata.php +++ b/lib/plugins/config/settings/config.metadata.php @@ -171,6 +171,7 @@ $meta['userewrite'] = array('multichoice','_choices' => array(0,1,2)); $meta['useslash'] = array('onoff'); $meta['sepchar'] = array('sepchar'); $meta['canonical'] = array('onoff'); +$meta['fnencode'] = array('multichoice','_choices' => array('url','safe','utf-8')); $meta['autoplural'] = array('onoff'); $meta['mailfrom'] = array('richemail'); $meta['compress'] = array('onoff'); -- GitLab