From 90a1db709d3590e849a5a4966fbdf8fb58ae75cd Mon Sep 17 00:00:00 2001
From: Andreas Gohr <andi@splitbrain.org>
Date: Sun, 4 Nov 2012 11:31:20 +0100
Subject: [PATCH] Tar: support for creating archives with long filenames

The library now creates either a POSIX ustar prefix or a GNU longlink
entry for files which have a name longer than 100 bytes
---
 _test/tests/inc/tar.test.php |  97 +++++++++++++++++++++-------
 inc/Tar.class.php            | 121 ++++++++++++++++++-----------------
 2 files changed, 137 insertions(+), 81 deletions(-)

diff --git a/_test/tests/inc/tar.test.php b/_test/tests/inc/tar.test.php
index 9abd27612..47851fd4c 100644
--- a/_test/tests/inc/tar.test.php
+++ b/_test/tests/inc/tar.test.php
@@ -8,7 +8,7 @@ class Tar_TestCase extends DokuWikiTest {
      *
      * No check for format correctness
      */
-    public function test_createdynamic(){
+    public function test_createdynamic() {
         $tar = new Tar();
 
         $dir = dirname(__FILE__).'/tar';
@@ -38,7 +38,7 @@ class Tar_TestCase extends DokuWikiTest {
      *
      * No check for format correctness
      */
-    public function test_createfile(){
+    public function test_createfile() {
         $tar = new Tar();
 
         $dir = dirname(__FILE__).'/tar';
@@ -70,10 +70,10 @@ class Tar_TestCase extends DokuWikiTest {
     /**
      * List the contents of the prebuilt TAR files
      */
-    public function test_tarcontent(){
+    public function test_tarcontent() {
         $dir = dirname(__FILE__).'/tar';
 
-        foreach(array('tar','tgz','tbz') as $ext){
+        foreach(array('tar', 'tgz', 'tbz') as $ext) {
             $tar  = new Tar();
             $file = "$dir/test.$ext";
 
@@ -92,11 +92,11 @@ class Tar_TestCase extends DokuWikiTest {
     /**
      * Extract the prebuilt tar files
      */
-    public function test_tarextract(){
+    public function test_tarextract() {
         $dir = dirname(__FILE__).'/tar';
         $out = sys_get_temp_dir().'/dwtartest'.md5(time());
 
-        foreach(array('tar', 'tgz', 'tbz') as $ext){
+        foreach(array('tar', 'tgz', 'tbz') as $ext) {
             $tar  = new Tar();
             $file = "$dir/test.$ext";
 
@@ -118,16 +118,16 @@ class Tar_TestCase extends DokuWikiTest {
     /**
      * Extract the prebuilt tar files with component stripping
      */
-    public function test_compstripextract(){
+    public function test_compstripextract() {
         $dir = dirname(__FILE__).'/tar';
         $out = sys_get_temp_dir().'/dwtartest'.md5(time());
 
-        foreach(array('tar', 'tgz', 'tbz') as $ext){
+        foreach(array('tar', 'tgz', 'tbz') as $ext) {
             $tar  = new Tar();
             $file = "$dir/test.$ext";
 
             $tar->open($file);
-            $tar->extract($out,1);
+            $tar->extract($out, 1);
 
             clearstatcache();
 
@@ -144,16 +144,16 @@ class Tar_TestCase extends DokuWikiTest {
     /**
      * Extract the prebuilt tar files with prefix stripping
      */
-    public function test_prefixstripextract(){
+    public function test_prefixstripextract() {
         $dir = dirname(__FILE__).'/tar';
         $out = sys_get_temp_dir().'/dwtartest'.md5(time());
 
-        foreach(array('tar', 'tgz', 'tbz') as $ext){
+        foreach(array('tar', 'tgz', 'tbz') as $ext) {
             $tar  = new Tar();
             $file = "$dir/test.$ext";
 
             $tar->open($file);
-            $tar->extract($out,'tar/foobar/');
+            $tar->extract($out, 'tar/foobar/');
 
             clearstatcache();
 
@@ -170,22 +170,21 @@ class Tar_TestCase extends DokuWikiTest {
     /**
      * Extract the prebuilt tar files with include regex
      */
-    public function test_includeextract(){
+    public function test_includeextract() {
         $dir = dirname(__FILE__).'/tar';
         $out = sys_get_temp_dir().'/dwtartest'.md5(time());
 
-        foreach(array('tar', 'tgz', 'tbz') as $ext){
+        foreach(array('tar', 'tgz', 'tbz') as $ext) {
             $tar  = new Tar();
             $file = "$dir/test.$ext";
 
             $tar->open($file);
-            $tar->extract($out,'','','/\/foobar\//');
+            $tar->extract($out, '', '', '/\/foobar\//');
 
             clearstatcache();
 
             $this->assertFileNotExists($out.'/tar/testdata1.txt', "Extracted $file");
 
-
             $this->assertFileExists($out.'/tar/foobar/testdata2.txt', "Extracted $file");
             $this->assertEquals(13, filesize($out.'/tar/foobar/testdata2.txt'), "Extracted $file");
 
@@ -196,16 +195,16 @@ class Tar_TestCase extends DokuWikiTest {
     /**
      * Extract the prebuilt tar files with exclude regex
      */
-    public function test_excludeextract(){
+    public function test_excludeextract() {
         $dir = dirname(__FILE__).'/tar';
         $out = sys_get_temp_dir().'/dwtartest'.md5(time());
 
-        foreach(array('tar', 'tgz', 'tbz') as $ext){
+        foreach(array('tar', 'tgz', 'tbz') as $ext) {
             $tar  = new Tar();
             $file = "$dir/test.$ext";
 
             $tar->open($file);
-            $tar->extract($out,'','/\/foobar\//');
+            $tar->extract($out, '', '/\/foobar\//');
 
             clearstatcache();
 
@@ -221,8 +220,8 @@ class Tar_TestCase extends DokuWikiTest {
     /**
      * Check the extension to compression guesser
      */
-    public function test_filetype(){
-        $tar  = new Tar();
+    public function test_filetype() {
+        $tar = new Tar();
         $this->assertEquals(Tar::COMPRESS_NONE, $tar->filetype('foo'));
         $this->assertEquals(Tar::COMPRESS_GZIP, $tar->filetype('foo.tgz'));
         $this->assertEquals(Tar::COMPRESS_GZIP, $tar->filetype('foo.tGZ'));
@@ -234,12 +233,12 @@ class Tar_TestCase extends DokuWikiTest {
         $this->assertEquals(Tar::COMPRESS_BZIP, $tar->filetype('foo.tar.bz2'));
     }
 
-    public function test_longpathextract(){
+    public function test_longpathextract() {
         $dir = dirname(__FILE__).'/tar';
         $out = sys_get_temp_dir().'/dwtartest'.md5(time());
 
-        foreach(array('ustar','gnu') as $format){
-            $tar  = new Tar();
+        foreach(array('ustar', 'gnu') as $format) {
+            $tar = new Tar();
             $tar->open("$dir/longpath-$format.tgz");
             $tar->extract($out);
 
@@ -249,4 +248,54 @@ class Tar_TestCase extends DokuWikiTest {
         }
     }
 
+    public function test_createlongpathustar() {
+        $tar = new Tar();
+        $tmp = tempnam(sys_get_temp_dir(), 'dwtartest');
+
+        $path = '';
+        for($i=0; $i<11; $i++) $path .= '1234567890/';
+        $path = rtrim($path,'/');
+
+        $tar->create($tmp, Tar::COMPRESS_NONE);
+        $tar->addData("$path/test.txt", 'testcontent1');
+        $tar->close();
+
+        $this->assertTrue(filesize($tmp) > 30); //arbitrary non-zero number
+        $data = file_get_contents($tmp);
+
+        // We should find the path and filename separated, no longlink entry
+        $this->assertTrue(strpos($data, 'testcontent1') !== false, 'content in TAR');
+        $this->assertTrue(strpos($data, 'test.txt') !== false, 'filename in TAR');
+        $this->assertTrue(strpos($data, $path) !== false, 'path in TAR');
+        $this->assertFalse(strpos($data, "$path/test.txt") !== false, 'full filename in TAR');
+        $this->assertFalse(strpos($data, '@LongLink') !== false, '@LongLink in TAR');
+
+        @unlink($tmp);
+    }
+
+    public function test_createlongpathgnu() {
+        $tar = new Tar();
+        $tmp = tempnam(sys_get_temp_dir(), 'dwtartest');
+
+        $path = '';
+        for($i=0; $i<20; $i++) $path .= '1234567890/';
+        $path = rtrim($path,'/');
+
+        $tar->create($tmp, Tar::COMPRESS_NONE);
+        $tar->addData("$path/test.txt", 'testcontent1');
+        $tar->close();
+
+        $this->assertTrue(filesize($tmp) > 30); //arbitrary non-zero number
+        $data = file_get_contents($tmp);
+
+        // We should find the complete path/filename and a longlink entry
+        $this->assertTrue(strpos($data, 'testcontent1') !== false, 'content in TAR');
+        $this->assertTrue(strpos($data, 'test.txt') !== false, 'filename in TAR');
+        $this->assertTrue(strpos($data, $path) !== false, 'path in TAR');
+        $this->assertTrue(strpos($data, "$path/test.txt") !== false, 'full filename in TAR');
+        $this->assertTrue(strpos($data, '@LongLink') !== false, '@LongLink in TAR');
+
+        @unlink($tmp);
+    }
+
 }
\ No newline at end of file
diff --git a/inc/Tar.class.php b/inc/Tar.class.php
index 8da30e736..59e14c705 100644
--- a/inc/Tar.class.php
+++ b/inc/Tar.class.php
@@ -4,6 +4,8 @@
  * To keep things simple, the modification of existing archives is not supported. It handles
  * uncompressed, gzip and bzip2 compressed tar files.
  *
+ * Long pathnames (>100 chars) are supported in POSIX ustar and GNU longlink formats.
+ *
  * To list the contents of an existing TAR archive, open() it and use contents() on it:
  *
  *     $tar = new Tar();
@@ -145,10 +147,10 @@ class Tar {
      * @throws TarIOException
      * @return array
      */
-    function extract($outdir, $strip='', $exclude='', $include='') {
+    function extract($outdir, $strip = '', $exclude = '', $include = '') {
         if($this->closed || !$this->file) throw(new TarIOException('Can not read from a closed archive'));
 
-        $outdir = rtrim($outdir,'/');
+        $outdir = rtrim($outdir, '/');
         io_mkdir_p($outdir);
         $striplen = strlen($strip);
 
@@ -164,32 +166,32 @@ class Tar {
             $filename = $this->cleanPath($header['filename']);
             if(is_int($strip)) {
                 // if $strip is an integer we strip this many path components
-                $parts = explode('/',$filename);
-                if(!$header['typeflag']){
+                $parts = explode('/', $filename);
+                if(!$header['typeflag']) {
                     $base = array_pop($parts); // keep filename itself
-                }else{
+                } else {
                     $base = '';
                 }
-                $filename = join('/',array_slice($parts,$strip));
+                $filename = join('/', array_slice($parts, $strip));
                 if($base) $filename .= "/$base";
-            }else{
+            } else {
                 // ifstrip is a string, we strip a prefix here
-                if(substr($filename,0,$striplen) == $strip) $filename = substr($filename,$striplen);
+                if(substr($filename, 0, $striplen) == $strip) $filename = substr($filename, $striplen);
             }
 
             // check if this should be extracted
             $extract = true;
-            if(!$filename){
+            if(!$filename) {
                 $extract = false;
-            }else{
-                if($include){
-                    if(preg_match($include, $filename)){
+            } else {
+                if($include) {
+                    if(preg_match($include, $filename)) {
                         $extract = true;
-                    }else{
+                    } else {
                         $extract = false;
                     }
                 }
-                if($exclude && preg_match($exclude, $filename)){
+                if($exclude && preg_match($exclude, $filename)) {
                     $extract = false;
                 }
             }
@@ -203,7 +205,7 @@ class Tar {
                 io_mkdir_p($directory);
 
                 // is this a file?
-                if(!$header['typeflag']){
+                if(!$header['typeflag']) {
                     $fp = fopen($output, "wb");
                     if(!$fp) throw(new TarIOException('Could not open file for writing: '.$output));
 
@@ -216,10 +218,10 @@ class Tar {
                     fclose($fp);
                     touch($output, $header['mtime']);
                     chmod($output, $header['perm']);
-                }else{
+                } else {
                     $this->skipbytes(ceil($header['size'] / 512) * 512); // the size is usually 0 for directories
                 }
-            }else{
+            } else {
                 $this->skipbytes(ceil($header['size'] / 512) * 512);
             }
         }
@@ -261,7 +263,7 @@ class Tar {
             if(!$this->fh) throw(new TarIOException('Could not open file for writing: '.$this->file));
         }
         $this->writeaccess = false;
-        $this->closed = false;
+        $this->closed      = false;
     }
 
     /**
@@ -270,7 +272,6 @@ class Tar {
      * @todo handle directory adding
      * @param string $file the original file
      * @param string $name the name to use for the file in the archive
-     * @throws TarBadFilename
      * @throws TarIOException
      */
     public function addFile($file, $name = '') {
@@ -279,9 +280,6 @@ class Tar {
         if(!$name) $name = $file;
         $name = $this->cleanPath($name);
 
-        // FIXME ustar should support up 256 chars
-        if(strlen($name) > 99) throw(new TarBadFilename('Filenames may not exceed 99 bytes: '.$name));
-
         $fp = fopen($file, 'rb');
         if(!$fp) throw(new TarIOException('Could not open file for reading: '.$file));
 
@@ -294,8 +292,7 @@ class Tar {
             $stat[5],
             fileperms($file),
             filesize($file),
-            filemtime($file),
-            false
+            filemtime($file)
         );
 
         while(!feof($fp)) {
@@ -306,26 +303,21 @@ class Tar {
     }
 
     /**
-     * Add a file to the current TAR archive using in memory data
+     * Add a file to the current TAR archive using the given $data as content
      *
-     * @param     $name
-     * @param     $data
-     * @param int $uid
-     * @param int $gid
-     * @param int $perm
-     * @param int $mtime
+     * @param string $name
+     * @param string $data
+     * @param int    $uid
+     * @param int    $gid
+     * @param int    $perm
+     * @param int    $mtime
      * @throws TarIOException
-     * @throws TarBadFilename
      */
     public function addData($name, $data, $uid = 0, $gid = 0, $perm = 0666, $mtime = 0) {
         if($this->closed) throw(new TarIOException('Archive has been closed, files can no longer be added'));
 
         $name = $this->cleanPath($name);
-
-        // FIXME ustar should support up 256 chars
-        if(strlen($name) > 99) throw(new TarBadFilename('Filenames may not exceed 99 bytes: '.$name));
-
-        $len = strlen($data);
+        $len  = strlen($data);
 
         $this->writeFileHeader(
             $name,
@@ -333,8 +325,7 @@ class Tar {
             $gid,
             $perm,
             $len,
-            ($mtime) ? $mtime : time(),
-            false
+            ($mtime) ? $mtime : time()
         );
 
         for($s = 0; $s < $len; $s += 512) {
@@ -357,23 +348,23 @@ class Tar {
         if($this->closed) return; // we did this already
 
         // write footer
-        if($this->writeaccess){
+        if($this->writeaccess) {
             $this->writebytes(pack("a512", ""));
             $this->writebytes(pack("a512", ""));
         }
 
         // close file handles
-        if($this->file){
-            if($this->comptype === Tar::COMPRESS_GZIP){
+        if($this->file) {
+            if($this->comptype === Tar::COMPRESS_GZIP) {
                 gzclose($this->fh);
-            }elseif($this->comptype === Tar::COMPRESS_BZIP){
+            } elseif($this->comptype === Tar::COMPRESS_BZIP) {
                 bzclose($this->fh);
-            }else{
+            } else {
                 fclose($this->fh);
             }
 
             $this->file = '';
-            $this->fh = 0;
+            $this->fh   = 0;
         }
 
         $this->closed = true;
@@ -460,12 +451,12 @@ class Tar {
      * @param int  $bytes seek to this position
      */
     function skipbytes($bytes) {
-        if($this->comptype === Tar::COMPRESS_GZIP){
+        if($this->comptype === Tar::COMPRESS_GZIP) {
             @gzseek($this->fh, $bytes, SEEK_CUR);
-        }elseif($this->comptype === Tar::COMPRESS_BZIP){
+        } elseif($this->comptype === Tar::COMPRESS_BZIP) {
             // there is no seek in bzip2, we simply read on
             @bzread($this->fh, $bytes);
-        }else{
+        } else {
             @fseek($this->fh, $bytes, SEEK_CUR);
         }
     }
@@ -479,19 +470,38 @@ class Tar {
      * @param int    $perm
      * @param int    $size
      * @param int    $mtime
-     * @param bool   $isdir
+     * @param string $typeflag Set to '5' for directories
      */
-    protected function writeFileHeader($name, $uid, $gid, $perm, $size, $mtime, $isdir = false) {
+    protected function writeFileHeader($name, $uid, $gid, $perm, $size, $mtime, $typeflag = '') {
+        // handle filename length restrictions
+        $prefix  = '';
+        $namelen = strlen($name);
+        if($namelen > 100) {
+            $file = basename($name);
+            $dir  = dirname($name);
+            if(strlen($file) > 100 || strlen($dir) > 155) {
+                // we're still too large, let's use GNU longlink
+                $this->writeFileHeader('././@LongLink', 0, 0, 0, $namelen, 0, 'L');
+                for($s = 0; $s < $namelen; $s += 512) {
+                    $this->writebytes(pack("a512", substr($name, $s, 512)));
+                }
+                $name = substr($name, 0, 100); // cut off name
+            } else {
+                // we're fine when splitting, use POSIX ustar
+                $prefix = $dir;
+                $name   = $file;
+            }
+        }
+
         // values are needed in octal
         $uid   = sprintf("%6s ", DecOct($uid));
         $gid   = sprintf("%6s ", DecOct($gid));
         $perm  = sprintf("%6s ", DecOct($perm));
         $size  = sprintf("%11s ", DecOct($size));
         $mtime = sprintf("%11s", DecOct($mtime));
-        $dir   = ($isdir) ? '5' : '';
 
         $data_first = pack("a100a8a8a8a12A12", $name, $perm, $uid, $gid, $size, $mtime);
-        $data_last  = pack("a1a100a6a2a32a32a8a8a155a12", $dir, '', '', '', '', '', '', '', '', "");
+        $data_last  = pack("a1a100a6a2a32a32a8a8a155a12", $typeflag, '', 'ustar', '', '', '', '', '', $prefix, "");
 
         for($i = 0, $chks = 0; $i < 148; $i++)
             $chks += ord($data_first[$i]);
@@ -541,11 +551,11 @@ class Tar {
         if(trim($header['prefix'])) $return['filename'] = trim($header['prefix']).'/'.$return['filename'];
 
         // Handle Long-Link entries from GNU Tar
-        if($return['typeflag'] == 'L'){
+        if($return['typeflag'] == 'L') {
             // following data block(s) is the filename
             $filename = trim($this->readbytes(ceil($header['size'] / 512) * 512));
             // next block is the real header
-            $block = $this->readbytes(512);
+            $block  = $this->readbytes(512);
             $return = $this->parseHeader($block);
             // overwrite the filename
             $return['filename'] = $filename;
@@ -617,9 +627,6 @@ class Tar {
     }
 }
 
-class TarBadFilename extends Exception {
-}
-
 class TarIOException extends Exception {
 }
 
-- 
GitLab