From 15fa0b4f1d7eb4b260ff5b8025c56fce3681148e Mon Sep 17 00:00:00 2001 From: Andreas Gohr <andi@splitbrain.org> Date: Mon, 17 Oct 2005 22:04:31 +0200 Subject: [PATCH] removed mb_string requirement in JSON.php #592 darcs-hash:20051017200431-7ad00-c9ed74326a44b078cc516e08515375d9d0fba811.gz --- _test/cases/inc/utf8_utf16be.test.php | 28 ++++++++++++++++++++++++ inc/JSON.php | 24 +++++++++++++++------ inc/utf8.php | 31 +++++++++++++++++++++++++-- 3 files changed, 74 insertions(+), 9 deletions(-) create mode 100644 _test/cases/inc/utf8_utf16be.test.php diff --git a/_test/cases/inc/utf8_utf16be.test.php b/_test/cases/inc/utf8_utf16be.test.php new file mode 100644 index 000000000..2d79ca35a --- /dev/null +++ b/_test/cases/inc/utf8_utf16be.test.php @@ -0,0 +1,28 @@ +<?php + +require_once DOKU_INC.'inc/utf8.php'; + +// use no mbstring help here +define('UTF8_NOMBSTRING',1); + +class utf8_utf16be_test extends UnitTestCase { + // some chars from various code regions + var $utf8 = '鈩ℵÅöx'; + var $utf16 = "\x92\x29\x21\x35\x1\x41\x0\xf6\x0\x78"; + + /** + * Convert from UTF-8 to UTF-16BE + */ + function test_to16be(){ + $this->assertEqual(utf8_to_utf16be($this->utf8), $this->utf16); + } + + /** + * Convert from UTF-16BE to UTF-8 + */ + function test_from16be(){ + $this->assertEqual(utf16be_to_utf8($this->utf16),$this->utf8); + } +} + +//Setup VIM: ex: et ts=2 enc=utf-8 : diff --git a/inc/JSON.php b/inc/JSON.php index 6345743fc..2958d3419 100644 --- a/inc/JSON.php +++ b/inc/JSON.php @@ -57,6 +57,10 @@ * @link http://pear.php.net/pepr/pepr-proposal-show.php?id=198 */ +// for DokuWiki +if(!defined('DOKU_INC')) define('DOKU_INC',realpath(dirname(__FILE__).'/../').'/'); +require_once(DOKU_INC.'inc/utf8.php'); + /** * Marker constant for JSON::decode(), used to flag stack state */ @@ -191,7 +195,8 @@ class JSON // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 $char = pack('C*', $ord_var_c, ord($var{$c+1})); $c+=1; - $utf16 = mb_convert_encoding($char, 'UTF-16', 'UTF-8'); + //$utf16 = mb_convert_encoding($char, 'UTF-16', 'UTF-8'); + $utf16 = utf8_to_utf16be($char); $ascii .= sprintf('\u%04s', bin2hex($utf16)); break; @@ -202,7 +207,8 @@ class JSON ord($var{$c+1}), ord($var{$c+2})); $c+=2; - $utf16 = mb_convert_encoding($char, 'UTF-16', 'UTF-8'); + //$utf16 = mb_convert_encoding($char, 'UTF-16', 'UTF-8'); + $utf16 = utf8_to_utf16be($char); $ascii .= sprintf('\u%04s', bin2hex($utf16)); break; @@ -214,7 +220,8 @@ class JSON ord($var{$c+2}), ord($var{$c+3})); $c+=3; - $utf16 = mb_convert_encoding($char, 'UTF-16', 'UTF-8'); + //$utf16 = mb_convert_encoding($char, 'UTF-16', 'UTF-8'); + $utf16 = utf8_to_utf16be($char); $ascii .= sprintf('\u%04s', bin2hex($utf16)); break; @@ -227,7 +234,8 @@ class JSON ord($var{$c+3}), ord($var{$c+4})); $c+=4; - $utf16 = mb_convert_encoding($char, 'UTF-16', 'UTF-8'); + //$utf16 = mb_convert_encoding($char, 'UTF-16', 'UTF-8'); + $utf16 = utf8_to_utf16be($char); $ascii .= sprintf('\u%04s', bin2hex($utf16)); break; @@ -241,7 +249,8 @@ class JSON ord($var{$c+4}), ord($var{$c+5})); $c+=5; - $utf16 = mb_convert_encoding($char, 'UTF-16', 'UTF-8'); + //$utf16 = mb_convert_encoding($char, 'UTF-16', 'UTF-8'); + $utf16 = utf8_to_utf16be($char); $ascii .= sprintf('\u%04s', bin2hex($utf16)); break; } @@ -411,7 +420,8 @@ class JSON // single, escaped unicode character $utf16 = chr(hexdec(substr($chrs, ($c+2), 2))) . chr(hexdec(substr($chrs, ($c+4), 2))); - $utf8 .= mb_convert_encoding($utf16, 'UTF-8', 'UTF-16'); + //$utf8 .= mb_convert_encoding($utf16, 'UTF-8', 'UTF-16'); + $utf8 .= utf16be_to_utf8($utf16); $c+=5; } elseif(($ord_chrs_c >= 0x20) && ($ord_chrs_c <= 0x7F)) { @@ -612,4 +622,4 @@ class JSON } -?> \ No newline at end of file +?> diff --git a/inc/utf8.php b/inc/utf8.php index 79baf2df7..7e82e7cd2 100644 --- a/inc/utf8.php +++ b/inc/utf8.php @@ -377,7 +377,7 @@ function utf8_tohtml ($str) { * @link http://www.randomchaos.com/document.php?source=php_and_unicode * @see unicode_to_utf8() */ -function utf8_to_unicode( $str ) { +function utf8_to_unicode( &$str ) { $unicode = array(); $values = array(); $lookingFor = 1; @@ -408,7 +408,7 @@ function utf8_to_unicode( $str ) { * @link http://www.randomchaos.com/document.php?source=php_and_unicode * @see utf8_to_unicode() */ -function unicode_to_utf8( $str ) { +function unicode_to_utf8( &$str ) { $utf8 = ''; foreach( $str as $unicode ) { if ( $unicode < 128 ) { @@ -425,6 +425,33 @@ function unicode_to_utf8( $str ) { return $utf8; } +/** + * UTF-8 to UTF-16BE conversion. + * + * Maybe really UCS-2 without mb_string due to utf8_to_unicode limits + */ +function utf8_to_utf16be(&$str, $bom = false) { + $out = $bom ? "\xFE\xFF" : ''; + if(!defined('UTF8_NOMBSTRING') && function_exists('mb_convert_encoding')) + return $out.mb_convert_encoding($str,'UTF-16BE','UTF-8'); + + $uni = utf8_to_unicode($str); + foreach($uni as $cp){ + $out .= pack('n',$cp); + } + return $out; +} + +/** + * UTF-8 to UTF-16BE conversion. + * + * Maybe really UCS-2 without mb_string due to utf8_to_unicode limits + */ +function utf16be_to_utf8(&$str) { + $uni = unpack('n*',$str); + return unicode_to_utf8($uni); +} + /** * UTF-8 Case lookup table * -- GitLab