From 15fa0b4f1d7eb4b260ff5b8025c56fce3681148e Mon Sep 17 00:00:00 2001
From: Andreas Gohr <andi@splitbrain.org>
Date: Mon, 17 Oct 2005 22:04:31 +0200
Subject: [PATCH] removed mb_string requirement in JSON.php #592

darcs-hash:20051017200431-7ad00-c9ed74326a44b078cc516e08515375d9d0fba811.gz
---
 _test/cases/inc/utf8_utf16be.test.php | 28 ++++++++++++++++++++++++
 inc/JSON.php                          | 24 +++++++++++++++------
 inc/utf8.php                          | 31 +++++++++++++++++++++++++--
 3 files changed, 74 insertions(+), 9 deletions(-)
 create mode 100644 _test/cases/inc/utf8_utf16be.test.php

diff --git a/_test/cases/inc/utf8_utf16be.test.php b/_test/cases/inc/utf8_utf16be.test.php
new file mode 100644
index 000000000..2d79ca35a
--- /dev/null
+++ b/_test/cases/inc/utf8_utf16be.test.php
@@ -0,0 +1,28 @@
+<?php
+
+require_once DOKU_INC.'inc/utf8.php';
+
+// use no mbstring help here
+define('UTF8_NOMBSTRING',1);
+
+class utf8_utf16be_test extends UnitTestCase {
+    // some chars from various code regions
+    var $utf8  = '鈩ℵŁöx';
+    var $utf16 = "\x92\x29\x21\x35\x1\x41\x0\xf6\x0\x78";
+
+    /**
+     * Convert from UTF-8 to UTF-16BE
+     */
+    function test_to16be(){
+        $this->assertEqual(utf8_to_utf16be($this->utf8), $this->utf16);
+    }
+
+    /**
+     * Convert from UTF-16BE to UTF-8
+     */
+    function test_from16be(){
+        $this->assertEqual(utf16be_to_utf8($this->utf16),$this->utf8);
+    }
+}
+
+//Setup VIM: ex: et ts=2 enc=utf-8 :
diff --git a/inc/JSON.php b/inc/JSON.php
index 6345743fc..2958d3419 100644
--- a/inc/JSON.php
+++ b/inc/JSON.php
@@ -57,6 +57,10 @@
  * @link        http://pear.php.net/pepr/pepr-proposal-show.php?id=198
  */
 
+// for DokuWiki
+if(!defined('DOKU_INC')) define('DOKU_INC',realpath(dirname(__FILE__).'/../').'/');
+require_once(DOKU_INC.'inc/utf8.php');
+
 /**
  * Marker constant for JSON::decode(), used to flag stack state
  */
@@ -191,7 +195,8 @@ class JSON
                             // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
                             $char = pack('C*', $ord_var_c, ord($var{$c+1}));
                             $c+=1;
-                            $utf16 = mb_convert_encoding($char, 'UTF-16', 'UTF-8');
+                            //$utf16 = mb_convert_encoding($char, 'UTF-16', 'UTF-8');
+                            $utf16 = utf8_to_utf16be($char);
                             $ascii .= sprintf('\u%04s', bin2hex($utf16));
                             break;
     
@@ -202,7 +207,8 @@ class JSON
                                          ord($var{$c+1}),
                                          ord($var{$c+2}));
                             $c+=2;
-                            $utf16 = mb_convert_encoding($char, 'UTF-16', 'UTF-8');
+                            //$utf16 = mb_convert_encoding($char, 'UTF-16', 'UTF-8');
+                            $utf16 = utf8_to_utf16be($char);
                             $ascii .= sprintf('\u%04s', bin2hex($utf16));
                             break;
     
@@ -214,7 +220,8 @@ class JSON
                                          ord($var{$c+2}),
                                          ord($var{$c+3}));
                             $c+=3;
-                            $utf16 = mb_convert_encoding($char, 'UTF-16', 'UTF-8');
+                            //$utf16 = mb_convert_encoding($char, 'UTF-16', 'UTF-8');
+                            $utf16 = utf8_to_utf16be($char);
                             $ascii .= sprintf('\u%04s', bin2hex($utf16));
                             break;
     
@@ -227,7 +234,8 @@ class JSON
                                          ord($var{$c+3}),
                                          ord($var{$c+4}));
                             $c+=4;
-                            $utf16 = mb_convert_encoding($char, 'UTF-16', 'UTF-8');
+                            //$utf16 = mb_convert_encoding($char, 'UTF-16', 'UTF-8');
+                            $utf16 = utf8_to_utf16be($char);
                             $ascii .= sprintf('\u%04s', bin2hex($utf16));
                             break;
     
@@ -241,7 +249,8 @@ class JSON
                                          ord($var{$c+4}),
                                          ord($var{$c+5}));
                             $c+=5;
-                            $utf16 = mb_convert_encoding($char, 'UTF-16', 'UTF-8');
+                            //$utf16 = mb_convert_encoding($char, 'UTF-16', 'UTF-8');
+                            $utf16 = utf8_to_utf16be($char);
                             $ascii .= sprintf('\u%04s', bin2hex($utf16));
                             break;
                     }
@@ -411,7 +420,8 @@ class JSON
                                     // single, escaped unicode character
                                     $utf16 = chr(hexdec(substr($chrs, ($c+2), 2)))
                                            . chr(hexdec(substr($chrs, ($c+4), 2)));
-                                    $utf8 .= mb_convert_encoding($utf16, 'UTF-8', 'UTF-16');
+                                    //$utf8 .= mb_convert_encoding($utf16, 'UTF-8', 'UTF-16');
+                                    $utf8 .= utf16be_to_utf8($utf16);
                                     $c+=5;
         
                                 } elseif(($ord_chrs_c >= 0x20) && ($ord_chrs_c <= 0x7F)) {
@@ -612,4 +622,4 @@ class JSON
     
 }
     
-?>
\ No newline at end of file
+?>
diff --git a/inc/utf8.php b/inc/utf8.php
index 79baf2df7..7e82e7cd2 100644
--- a/inc/utf8.php
+++ b/inc/utf8.php
@@ -377,7 +377,7 @@ function utf8_tohtml ($str) {
  * @link   http://www.randomchaos.com/document.php?source=php_and_unicode
  * @see    unicode_to_utf8()
  */
-function utf8_to_unicode( $str ) {
+function utf8_to_unicode( &$str ) {
   $unicode = array();  
   $values = array();
   $lookingFor = 1;
@@ -408,7 +408,7 @@ function utf8_to_unicode( $str ) {
  * @link   http://www.randomchaos.com/document.php?source=php_and_unicode
  * @see    utf8_to_unicode()
  */
-function unicode_to_utf8( $str ) {
+function unicode_to_utf8( &$str ) {
   $utf8 = '';
   foreach( $str as $unicode ) {
     if ( $unicode < 128 ) {
@@ -425,6 +425,33 @@ function unicode_to_utf8( $str ) {
   return $utf8;
 }
 
+/**
+ * UTF-8 to UTF-16BE conversion.
+ *
+ * Maybe really UCS-2 without mb_string due to utf8_to_unicode limits
+ */
+function utf8_to_utf16be(&$str, $bom = false) {
+  $out = $bom ? "\xFE\xFF" : '';
+  if(!defined('UTF8_NOMBSTRING') && function_exists('mb_convert_encoding'))
+    return $out.mb_convert_encoding($str,'UTF-16BE','UTF-8');
+
+  $uni = utf8_to_unicode($str);
+  foreach($uni as $cp){
+    $out .= pack('n',$cp);
+  }
+  return $out;
+}
+
+/**
+ * UTF-8 to UTF-16BE conversion.
+ *
+ * Maybe really UCS-2 without mb_string due to utf8_to_unicode limits
+ */
+function utf16be_to_utf8(&$str) {
+  $uni = unpack('n*',$str);
+  return unicode_to_utf8($uni);
+}
+
 /**
  * UTF-8 Case lookup table
  *
-- 
GitLab