From 9b307a83ee61a4896227f453603a5991ad5a1f5f Mon Sep 17 00:00:00 2001
From: Andreas Gohr <andi@splitbrain.org>
Date: Wed, 31 Aug 2005 22:55:50 +0200
Subject: [PATCH] HTTP Client added

This patch adds an HTTP client written in PHP. It supports proxy
handling and SSL if PHP was compiled with it.

darcs-hash:20050831205550-7ad00-6dcdff0208d7f18a8ff731febb155d126742c768.gz
---
 inc/HTTPClient.php | 393 +++++++++++++++++++++++++++++++++++++++++++++
 inc/io.php         |  28 ++--
 2 files changed, 403 insertions(+), 18 deletions(-)
 create mode 100644 inc/HTTPClient.php

diff --git a/inc/HTTPClient.php b/inc/HTTPClient.php
new file mode 100644
index 000000000..8167952c6
--- /dev/null
+++ b/inc/HTTPClient.php
@@ -0,0 +1,393 @@
+<?php
+/**
+ * HTTP Client
+ *
+ * @license    GPL 2 (http://www.gnu.org/licenses/gpl.html)
+ * @author     Andreas Goetz <cpuidle@gmx.de>
+ */
+
+if(!defined('DOKU_INC')) define('DOKU_INC',realpath(dirname(__FILE__).'/../').'/');
+require_once(DOKU_CONF.'dokuwiki.php');
+
+define('HTTP_NL',"\r\n");
+
+
+/**
+ * Adds DokuWiki specific configs to the HTTP client
+ *
+ * @author Andreas Goetz <cpuidle@gmx.de>
+ */
+class DokuHTTPClient extends HTTPClient {
+    
+    /**
+     * Constructor.
+     *
+     * @author Andreas Gohr <andi@splitbrain.org>
+     */
+    function DokuHTTPClient(){
+        global $conf;
+
+        // call parent constructor
+        $this->HTTPClient();
+
+        // set some values from the config
+        $this->proxy_host = $conf['proxy']['host'];
+        $this->proxy_port = $conf['proxy']['port'];
+        $this->proxy_user = $conf['proxy']['user'];
+        $this->proxy_pass = $conf['proxy']['pass'];
+        $this->proxy_ssl  = $conf['proxy']['usessl'];
+    }
+}
+
+/**
+ * This class implements a basic HTTP client
+ *
+ * It supports POST and GET, Proxy usage, basic authentication,
+ * handles cookies and referers. It is based upon the httpclient
+ * function from the VideoDB project.
+ *
+ * @link   http://www.splitbrain.org/go/videodb
+ * @author Andreas Goetz <cpuidle@gmx.de>
+ * @author Andreas Gohr <andi@splitbrain.org>
+ */
+class HTTPClient {
+    //set these if you like
+    var $agent;         // User agent
+    var $http;          // HTTP version defaults to 1.0
+    var $timeout;       
+    var $cookies;
+    var $referer;
+    var $max_redirect;
+    var $max_bodysize;  // abort if the response body is bigger than this
+    var $headers;
+    var $debug;
+
+    // don't set these, read on error
+    var $error;
+    var $redirect_count;
+
+    // read these after a successful request
+    var $resp_status;
+    var $resp_body;
+    var $resp_headers;
+
+    // set these to do basic authentication
+    var $user;
+    var $pass;
+
+    // set these if you need to use a proxy
+    var $proxy_host;
+    var $proxy_port;
+    var $proxy_user;
+    var $proxy_pass;
+    var $proxy_ssl; //boolean set to true if your proxy needs SSL
+
+    /**
+     * Constructor.
+     *
+     * @author Andreas Gohr <andi@splitbrain.org>
+     */
+    function HTTPClient(){
+        $this->agent        = 'Mozilla/4.0 (compatible; DokuWiki HTTP Client; '.PHP_OS.')';
+        $this->timeout      = 15;
+        $this->cookies      = array();
+        $this->referer      = '';
+        $this->max_redirect = 3;
+        $this->redirect_count = 0;
+        $this->status       = 0;
+        $this->headers      = array();
+        $this->http         = '1.0';
+        $this->debug        = false;
+        $this->max_bodysize = 0;
+        if(extension_loaded('zlib')) $this->headers['Accept-encoding'] = 'gzip';
+        $this->headers['Accept'] = 'text/xml,application/xml,application/xhtml+xml,'.
+                                   'text/html,text/plain,image/png,image/jpeg,image/gif,*/*';
+        $this->headers['Accept-Language'] = 'en-us';
+    }
+
+
+    /**
+     * Simple function to do a GET request
+     *
+     * Returns the wanted page or false on an error;
+     *
+     * @author Andreas Gohr <andi@splitbrain.org>
+     */
+    function get($url){
+        if(!$this->sendRequest($url)) return false;
+        if($this->status != 200) return false;
+        return $this->resp_body;
+    }
+
+    /**
+     * Simple function to do a POST request
+     *
+     * Returns the resulting page or false on an error;
+     *
+     * @author Andreas Gohr <andi@splitbrain.org>
+     */
+    function post($url,$data){
+        if(!$this->sendRequest($url,$data,'POST')) return false;
+        if($this->status != 200) return false;
+        return $this->resp_body;
+    }
+
+    /**
+     * Do an HTTP request
+     *
+     * @author Andreas Goetz <cpuidle@gmx.de>
+     * @author Andreas Gohr <andi@splitbrain.org>
+     */
+    function sendRequest($url,$data=array(),$method='GET'){
+        $this->error = '';
+
+        // parse URL into bits
+        $uri = parse_url($url);
+        $server = $uri['host'];
+        $path   = $uri['path'];
+        if(empty($path)) $path = '/';
+        if(!empty($uri['query'])) $path .= '?'.$uri['query'];
+        $port = $uri['port'];
+        if($uri['user']) $this->user = $uri['user'];
+        if($uri['pass']) $this->user = $uri['pass'];
+
+        // proxy setup
+        if($this->proxy_host){
+            $request_url = $url;
+            $server      = $config['proxy_host'];
+            $port        = $config['proxy_port'];
+            if (empty($port)) $port = 8080;
+        }else{
+            $request_url = $path;
+            $server      = $server;
+            if (empty($port)) $port = ($uri['scheme'] == 'https') ? 443 : 80;
+        }
+
+        // add SSL stream prefix if needed - needs SSL support in PHP
+        if($port == 443 || $this->proxy_ssl) $server = 'ssl://'.$server;
+
+        // prepare headers
+        $headers               = $this->headers;
+        $headers['Host']       = $uri['host'];
+        $headers['User-Agent'] = $this->agent;
+        $headers['Referer']    = $this->referer;
+        $headers['Connection'] = 'Close';
+        if($method == 'POST'){
+            $post = _postEncode($data);
+            $headers['Content-Type']   = 'application/x-www-form-urlencoded';
+            $headers['Content-Length'] = strlen($post);
+        }
+        if($this->user) {
+            $headers['Authorization'] = 'BASIC '.base64_encode($this->user.':'.$this->pass);
+        }
+        if($this->proxy_user) {
+            $headers['Proxy-Authorization'] = 'BASIC '.base64_encode($this->proxy_user.':'.$this->proxy_pass);
+        }
+
+        // open socket
+        $socket = @fsockopen($server,$port,$errno, $errstr, $this->timeout);
+        if (!$socket){
+            $this->error = "Could not connect to $server:$port\n$errstr ($errno)";
+            return $false;
+        }
+
+        // build request
+        $request  = "$method $request_url HTTP/".$this->http.HTTP_NL;
+        $request .= $this->_buildHeaders($headers);
+        $request .= $this->_getCookies();
+        $request .= HTTP_NL;
+        $request .= $post;
+
+        $this->_debug('request',$request);
+
+        // send request
+        fputs($socket, $request);
+
+        // read headers from socket
+        $r_headers = '';
+        do{
+            $r_headers .= fread($socket,1); #FIXME read full lines here?
+        }while(!preg_match('/\r\n\r\n$/',$r_headers));
+
+        //read body (with chunked encoding if needed)
+        $r_body    = '';
+        if(preg_match('/transfer\-(en)?coding:\s+chunked\r\n/i',$r_header)){
+            do {
+                unset($chunk_size);
+                do {
+                    $byte = fread($socket,1);
+                    $chunk_size .= $byte;
+                } while (preg_match('/[a-zA-Z0-9]/',$byte)); // read chunksize including \r
+
+                $byte = fread($socket,1);     // readtrailing \n
+                $chunk_size = hexdec($chunk_size);
+                $this_chunk = fread($socket,$chunk_size);
+                $r_body    .= $this_chunk;
+                if ($chunk_size) $byte = fread($socket,2); // read trailing \r\n
+
+                if($this->max_bodysize && strlen($r_body) > $this->max_bodysize){
+                    $this->error = 'Allowed response size exceeded';
+                    return false;
+                }
+            } while ($chunk_size);
+        }else{
+            // read entire socket
+            while (!feof($socket)) {
+                $r_body .= fread($socket,4096);
+                if($this->max_bodysize && strlen($r_body) > $this->max_bodysize){
+                    $this->error = 'Allowed response size exceeded';
+                    return false;
+                }
+            }
+        }
+        
+        // close socket
+        $status = socket_get_status($socket);
+        fclose($socket);
+
+        $this->_debug('response headers',$r_headers);
+
+        // check for timeout
+        if ($status['timed_out']){
+            $this->error = "Connection timed out";
+            return false;
+        }
+
+        // get Status
+        if (!preg_match('/^HTTP\/(\d\.\d)\s*(\d+).*?\n/', $r_headers, $m)) {
+            $this->error = 'Server returned bad answer';
+            return false;
+        }
+        $this->status = $m[2];
+
+        // handle headers and cookies
+        $this->resp_headers = $this->_parseHeaders($r_headers);
+        if(isset($this->resp_headers['set-cookie'])){
+            foreach ($this->resp_headers['set-cookie'] as $c){ 
+                list($key, $value, $foo) = split('=', $cookie);
+                $this->cookies[$key] = $value;
+            }
+        }
+
+        $this->_debug('Object headers',$this->resp_headers);
+
+        // check server status code to follow redirect
+        if($this->status == 301 || $this->status == 302 ){
+            if (empty($this->resp_headers['location'])){
+                $this->error = 'Redirect but no Location Header found';
+                return false;
+            }elseif($this->redirect_count == $this->max_redirect){
+                $this->error = 'Maximum number of redirects exceeded';
+                return false;
+            }else{
+                $this->redirect_count++;
+                $this->referer = $url;
+                if (!preg_match('/^http/i', $this->resp_headers['location'])){
+                    $this->resp_headers['location'] = $uri['scheme'].'://'.$uri['host'].
+                                                      $this->resp_headers['location'];
+                }
+                // perform redirected request, always via GET (required by RFC)
+                return $this->_sendRequest($this->resp_headers['location'],array(),'GET');
+            }
+        }
+
+        // decode gzip if needed
+        if($this->resp_headers['content-encoding'] == 'gzip'){
+            $this->resp_body = gzinflate(substr($r_body, 10));
+        }else{
+            $this->resp_body = $r_body;
+        }
+
+        $this->_debug('response body',$this->resp_body);
+        $this->redirect_count = 0;
+        return true;
+    }
+
+    /**
+     * print debug info
+     *
+     * @author Andreas Gohr <andi@splitbrain.org>
+     */
+    function _debug($info,$var){
+        if(!$this->debug) return;
+        print '<b>'.$info.'</b><br />';
+        ob_start();
+        print_r($var);
+        $content = htmlspecialchars(ob_get_contents());
+        ob_end_clean();
+        print '<pre>'.$content.'</pre>';
+    }
+
+    /**
+     * convert given header string to Header array
+     *
+     * All Keys are lowercased.
+     *
+     * @author Andreas Gohr <andi@splitbrain.org>
+     */
+    function _parseHeaders($string){
+        $headers = array();
+        $lines = explode("\n",$string);
+        foreach($lines as $line){
+            list($key,$val) = explode(':',$line,2);
+            $key = strtolower(trim($key));
+            $val = trim($val);
+            if(empty($val)) continue;
+            if(isset($headers[$key])){
+                if(is_array($headers[$key])){
+                    $headers[$key][] = $val;
+                }else{
+                    $headers[$key] = array($headers[$key],$val);
+                }
+            }else{
+                $headers[$key] = $val;
+            }
+        }
+        return $headers;
+    }
+
+    /**
+     * convert given header array to header string
+     *
+     * @author Andreas Gohr <andi@splitbrain.org>
+     */
+    function _buildHeaders($headers){
+        $string = '';
+        foreach($headers as $key => $value){
+            if(empty($value)) continue;
+            $string .= $key.': '.$value.HTTP_NL;
+        }
+        return $string;
+    }
+
+    /**
+     * get cookies as http header string
+     *
+     * @author Andreas Goetz <cpuidle@gmx.de>
+     */
+    function _getCookies(){
+        foreach ($this->cookies as $key => $val){           
+            if ($headers) $headers .= '; ';
+            $headers .= $key.'='.$val;
+        }             
+            
+        if ($headers) $headers = "Cookie: $headers".HTTP_NL;
+        return $headers;
+    } 
+
+    /**
+     * Encode data for posting
+     *
+     * @todo handle mixed encoding for file upoads
+     * @author Andreas Gohr <andi@splitbrain.org>
+     */
+    function _postEncode($data){
+        foreach($params as $key => $val){
+            if($url) $url .= '&';
+            $url .= $key.'='.urlencode($val);
+        }
+        return $url;
+    }
+}
+
+//Setup VIM: ex: et ts=4 enc=utf-8 :
diff --git a/inc/io.php b/inc/io.php
index 5cf50af8f..c25bd8975 100644
--- a/inc/io.php
+++ b/inc/io.php
@@ -8,6 +8,7 @@
 
   if(!defined('DOKU_INC')) define('DOKU_INC',realpath(dirname(__FILE__).'/../').'/');
   require_once(DOKU_INC.'inc/common.php');
+  require_once(DOKU_INC.'inc/HTTPClient.php');
 
 /**
  * Removes empty directories
@@ -261,30 +262,21 @@ function io_mkdir_ftp($dir){
  * downloads a file from the net and saves it to the given location
  *
  * @author Andreas Gohr <andi@splitbrain.org>
- * @todo   Add size limit
  */
 function io_download($url,$file){
-  $fp = @fopen($url,"rb");
-  if(!$fp) return false;
+  $http = new DokuHTTPClient();
+  $http->max_bodysize = 2*1024*1024; //max. 2MB
+  $http->timeout = 25; //max. 25 sec
 
-  $kb  = 0;
-  $now = time();
+  $data = $http->get($url);
+  if(!$data) return false;
 
-  while(!feof($fp)){
-    if($kb++ > 2048 || (time() - $now) > 45){
-      //abort on 2 MB and timeout on 45 sec
-      return false;
-    }
-    $cont.= fread($fp,1024);
-  }
+  $fp = @fopen($file,"w");
+  if(!$fp) return false;
+  fwrite($fp,$data);
   fclose($fp);
-
-  $fp2 = @fopen($file,"w");
-  if(!$fp2) return false;
-  fwrite($fp2,$cont);
-  fclose($fp2);
   return true;
-} 
+}
 
 /**
  * Runs an external command and returns it's output as string
-- 
GitLab