diff --git a/src/tar.php b/src/tar.php index 644f462..a9b4674 100644 --- a/src/tar.php +++ b/src/tar.php @@ -1,708 +1,75 @@ fh = @gzopen($this->file, 'rb'); - $result = array(); - while ($read = $this->readbytes(512)) { - $header = $this->parseHeader($read); - if (!is_array($header)) { - continue; - } - - $this->skipbytes(ceil($header['size'] / 512) * 512); - $result[] = $this->header2fileinfo($header); - } - - $this->close(); - return $result; +class TarHeader { + public $filename; + public $size; + public $perm; + public $checksum; + public $type; +} /** - * Class Tar - * - * Creates or extracts Tar archives. Supports gz and bzip compression - * - * Long pathnames (>100 chars) are supported in POSIX ustar and GNU longlink formats. - * - * @author Andreas Gohr - * @package splitbrain\PHPArchive - * @license MIT + * extract tar archive into destination directory */ -class Tar extends Archive -{ - - protected $file = ''; - protected $comptype = Archive::COMPRESS_AUTO; - protected $complevel = 9; - protected $fh; - protected $memory = ''; - protected $closed = true; - protected $writeaccess = false; - - /** - * Sets the compression to use - * - * @param int $level Compression level (0 to 9) - * @param int $type Type of compression to use (use COMPRESS_* constants) - * @throws ArchiveIllegalCompressionException - */ - public function setCompression($level = 9, $type = Archive::COMPRESS_AUTO) - { - $this->compressioncheck($type); - if ($level < -1 || $level > 9) { - throw new ArchiveIllegalCompressionException('Compression level should be between -1 and 9'); - } - $this->comptype = $type; - $this->complevel = $level; - if($level == 0) $this->comptype = Archive::COMPRESS_NONE; - if($type == Archive::COMPRESS_NONE) $this->complevel = 0; - } - - /** - * Open an existing TAR file for reading - * - * @param string $file - * @throws ArchiveIOException - * @throws ArchiveIllegalCompressionException - */ - public function open($file) - { - $this->file = $file; - - // update compression to mach file - if ($this->comptype == Tar::COMPRESS_AUTO) { - $this->setCompression($this->complevel, $this->filetype($file)); - } - - // open file handles - if ($this->comptype === Archive::COMPRESS_GZIP) { - $this->fh = @gzopen($this->file, 'rb'); - } elseif ($this->comptype === Archive::COMPRESS_BZIP) { - $this->fh = @bzopen($this->file, 'r'); - } else { - $this->fh = @fopen($this->file, 'rb'); - } - - if (!$this->fh) { - throw new ArchiveIOException('Could not open file for reading: '.$this->file); - } - $this->closed = false; - } - - /** - * Read the contents of a TAR archive - * - * This function lists the files stored in the archive - * - * The archive is closed afer reading the contents, because rewinding is not possible in bzip2 streams. - * Reopen the file with open() again if you want to do additional operations - * - * @throws ArchiveIOException - * @throws ArchiveCorruptedException - * @returns FileInfo[] - */ - public function contents() - { - if ($this->closed || !$this->file) { - throw new ArchiveIOException('Can not read from a closed archive'); - } - - $result = array(); - while ($read = $this->readbytes(512)) { - $header = $this->parseHeader($read); - if (!is_array($header)) { - continue; +function tar_extract(string $file, string $destination = "") : ?bool { + $input = gzopen($file, 'rb'); + if ($input == false) { return \TF\debug("unable to open [%s]", $file); } + + while(($header = tar_read_header($input, $destination))) { + if ((bool) $header->type) { + @mkdir($header->filename, 0755, true); + } else { + $output = @fopen($header->filename, "wb"); + if (!$output) { return \TF\debug("unable to open [%s]", $header->filename); } + + while ($header->size > 0) { + $tmp = gzread($input, 512); + $len = strlen($tmp); + if ($len != fwrite($output, $tmp)) { return \TF\debug("unable to write %d bytes [%s]", $len, $header->filename); } + $header->size -= $len; } - - $this->skipbytes(ceil($header['size'] / 512) * 512); - $result[] = $this->header2fileinfo($header); + fclose($output); + @chmod($header->filename, $header->perm); } - - $this->close(); - return $result; } + return true; +} - /** - * Extract an existing TAR archive - * - * The $strip parameter allows you to strip a certain number of path components from the filenames - * found in the tar file, similar to the --strip-components feature of GNU tar. This is triggered when - * an integer is passed as $strip. - * Alternatively a fixed string prefix may be passed in $strip. If the filename matches this prefix, - * the prefix will be stripped. It is recommended to give prefixes with a trailing slash. - * - * By default this will extract all files found in the archive. You can restrict the output using the $include - * and $exclude parameter. Both expect a full regular expression (including delimiters and modifiers). If - * $include is set only files that match this expression will be extracted. Files that match the $exclude - * expression will never be extracted. Both parameters can be used in combination. Expressions are matched against - * stripped filenames as described above. - * - * The archive is closed afer reading the contents, because rewinding is not possible in bzip2 streams. - * Reopen the file with open() again if you want to do additional operations - * - * @param string $outdir the target directory for extracting - * @param int|string $strip either the number of path components or a fixed prefix to strip - * @param string $exclude a regular expression of files to exclude - * @param string $include a regular expression of files to include - * @throws ArchiveIOException - * @throws ArchiveCorruptedException - * @return FileInfo[] - */ - public function extract($outdir, $strip = '', $exclude = '', $include = '') - { - if ($this->closed || !$this->file) { - throw new ArchiveIOException('Can not read from a closed archive'); - } - - $outdir = rtrim($outdir, '/'); - @mkdir($outdir, 0777, true); - if (!is_dir($outdir)) { - throw new ArchiveIOException("Could not create directory '$outdir'"); - } - - $extracted = array(); - while ($dat = $this->readbytes(512)) { - // read the file header - $header = $this->parseHeader($dat); - if (!is_array($header)) { - continue; - } - $fileinfo = $this->header2fileinfo($header); - - // apply strip rules - $fileinfo->strip($strip); - - // skip unwanted files - if (!strlen($fileinfo->getPath()) || !$fileinfo->matchExpression($include, $exclude)) { - $this->skipbytes(ceil($header['size'] / 512) * 512); - continue; - } - - // create output directory - $output = $outdir.'/'.$fileinfo->getPath(); - $directory = ($fileinfo->getIsdir()) ? $output : dirname($output); - @mkdir($directory, 0777, true); - - // extract data - if (!$fileinfo->getIsdir()) { - $fp = @fopen($output, "wb"); - if (!$fp) { - throw new ArchiveIOException('Could not open file for writing: '.$output); - } - - $size = floor($header['size'] / 512); - for ($i = 0; $i < $size; $i++) { - fwrite($fp, $this->readbytes(512), 512); - } - if (($header['size'] % 512) != 0) { - fwrite($fp, $this->readbytes(512), $header['size'] % 512); - } - - fclose($fp); - @touch($output, $fileinfo->getMtime()); - @chmod($output, $fileinfo->getMode()); - } else { - $this->skipbytes(ceil($header['size'] / 512) * 512); // the size is usually 0 for directories - } - - if(is_callable($this->callback)) { - call_user_func($this->callback, $fileinfo); - } - $extracted[] = $fileinfo; - } - - $this->close(); - return $extracted; - } - - /** - * Create a new TAR file - * - * If $file is empty, the tar file will be created in memory - * - * @param string $file - * @throws ArchiveIOException - * @throws ArchiveIllegalCompressionException - */ - public function create($file = '') - { - $this->file = $file; - $this->memory = ''; - $this->fh = 0; - - if ($this->file) { - // determine compression - if ($this->comptype == Archive::COMPRESS_AUTO) { - $this->setCompression($this->complevel, $this->filetype($file)); - } - - if ($this->comptype === Archive::COMPRESS_GZIP) { - $this->fh = @gzopen($this->file, 'wb'.$this->complevel); - } elseif ($this->comptype === Archive::COMPRESS_BZIP) { - $this->fh = @bzopen($this->file, 'w'); - } else { - $this->fh = @fopen($this->file, 'wb'); - } - - if (!$this->fh) { - throw new ArchiveIOException('Could not open file for writing: '.$this->file); - } - } - $this->writeaccess = true; - $this->closed = false; - } - - /** - * Add a file to the current TAR archive using an existing file in the filesystem - * - * @param string $file path to the original file - * @param string|FileInfo $fileinfo either the name to us in archive (string) or a FileInfo oject with all meta data, empty to take from original - * @throws ArchiveCorruptedException when the file changes while reading it, the archive will be corrupt and should be deleted - * @throws ArchiveIOException there was trouble reading the given file, it was not added - * @throws FileInfoException trouble reading file info, it was not added - */ - public function addFile($file, $fileinfo = '') - { - if (is_string($fileinfo)) { - $fileinfo = FileInfo::fromPath($file, $fileinfo); - } - - if ($this->closed) { - throw new ArchiveIOException('Archive has been closed, files can no longer be added'); - } - - // create file header - $this->writeFileHeader($fileinfo); - - // write data, but only if we have data to write. - // note: on Windows fopen() on a directory will fail, so we prevent - // errors on Windows by testing if we have data to write. - if (!$fileinfo->getIsdir() && $fileinfo->getSize() > 0) { - $read = 0; - $fp = @fopen($file, 'rb'); - if (!$fp) { - throw new ArchiveIOException('Could not open file for reading: ' . $file); - } - while (!feof($fp)) { - $data = fread($fp, 512); - $read += strlen($data); - if ($data === false) { - break; - } - if ($data === '') { - break; - } - $packed = pack("a512", $data); - $this->writebytes($packed); - } - fclose($fp); - - if ($read != $fileinfo->getSize()) { - $this->close(); - throw new ArchiveCorruptedException("The size of $file changed while reading, archive corrupted. read $read expected ".$fileinfo->getSize()); - } - } - - if(is_callable($this->callback)) { - call_user_func($this->callback, $fileinfo); - } - } - - /** - * Add a file to the current TAR archive using the given $data as content - * - * @param string|FileInfo $fileinfo either the name to us in archive (string) or a FileInfo oject with all meta data - * @param string $data binary content of the file to add - * @throws ArchiveIOException - */ - public function addData($fileinfo, $data) - { - if (is_string($fileinfo)) { - $fileinfo = new FileInfo($fileinfo); - } - - if ($this->closed) { - throw new ArchiveIOException('Archive has been closed, files can no longer be added'); - } - - $len = strlen($data); - $fileinfo->setSize($len); - $this->writeFileHeader($fileinfo); - - for ($s = 0; $s < $len; $s += 512) { - $this->writebytes(pack("a512", substr($data, $s, 512))); - } - - if (is_callable($this->callback)) { - call_user_func($this->callback, $fileinfo); - } - } - - /** - * Add the closing footer to the archive if in write mode, close all file handles - * - * After a call to this function no more data can be added to the archive, for - * read access no reading is allowed anymore - * - * "Physically, an archive consists of a series of file entries terminated by an end-of-archive entry, which - * consists of two 512 blocks of zero bytes" - * - * @link http://www.gnu.org/software/tar/manual/html_chapter/tar_8.html#SEC134 - * @throws ArchiveIOException - */ - public function close() - { - if ($this->closed) { - return; - } // we did this already - - // write footer - if ($this->writeaccess) { - $this->writebytes(pack("a512", "")); - $this->writebytes(pack("a512", "")); - } - - // close file handles - if ($this->file) { - if ($this->comptype === Archive::COMPRESS_GZIP) { - gzclose($this->fh); - } elseif ($this->comptype === Archive::COMPRESS_BZIP) { - bzclose($this->fh); - } else { - fclose($this->fh); - } - - $this->file = ''; - $this->fh = 0; - } - - $this->writeaccess = false; - $this->closed = true; - } - - /** - * Returns the created in-memory archive data - * - * This implicitly calls close() on the Archive - * @throws ArchiveIOException - */ - public function getArchive() - { - $this->close(); - - if ($this->comptype === Archive::COMPRESS_AUTO) { - $this->comptype = Archive::COMPRESS_NONE; - } - - if ($this->comptype === Archive::COMPRESS_GZIP) { - return gzencode($this->memory, $this->complevel); - } - if ($this->comptype === Archive::COMPRESS_BZIP) { - return bzcompress($this->memory); - } - return $this->memory; - } - - /** - * Save the created in-memory archive data - * - * Note: It more memory effective to specify the filename in the create() function and - * let the library work on the new file directly. - * - * @param string $file - * @throws ArchiveIOException - * @throws ArchiveIllegalCompressionException - */ - public function save($file) - { - if ($this->comptype === Archive::COMPRESS_AUTO) { - $this->setCompression($this->complevel, $this->filetype($file)); - } - - if (!@file_put_contents($file, $this->getArchive())) { - throw new ArchiveIOException('Could not write to file: '.$file); - } - } - - /** - * Read from the open file pointer - * - * @param int $length bytes to read - * @return string - */ - protected function readbytes($length) - { - if ($this->comptype === Archive::COMPRESS_GZIP) { - return @gzread($this->fh, $length); - } elseif ($this->comptype === Archive::COMPRESS_BZIP) { - return @bzread($this->fh, $length); - } else { - return @fread($this->fh, $length); - } - } - - /** - * Write to the open filepointer or memory - * - * @param string $data - * @throws ArchiveIOException - * @return int number of bytes written - */ - protected function writebytes($data) - { - if (!$this->file) { - $this->memory .= $data; - $written = strlen($data); - } elseif ($this->comptype === Archive::COMPRESS_GZIP) { - $written = @gzwrite($this->fh, $data); - } elseif ($this->comptype === Archive::COMPRESS_BZIP) { - $written = @bzwrite($this->fh, $data); - } else { - $written = @fwrite($this->fh, $data); - } - if ($written === false) { - throw new ArchiveIOException('Failed to write to archive stream'); - } - return $written; - } - - /** - * Skip forward in the open file pointer - * - * This is basically a wrapper around seek() (and a workaround for bzip2) - * - * @param int $bytes seek to this position - */ - protected function skipbytes($bytes) - { - if ($this->comptype === Archive::COMPRESS_GZIP) { - @gzseek($this->fh, $bytes, SEEK_CUR); - } elseif ($this->comptype === Archive::COMPRESS_BZIP) { - // there is no seek in bzip2, we simply read on - // bzread allows to read a max of 8kb at once - while($bytes) { - $toread = min(8192, $bytes); - @bzread($this->fh, $toread); - $bytes -= $toread; - } - } else { - @fseek($this->fh, $bytes, SEEK_CUR); - } - } - - /** - * Write the given file meta data as header - * - * @param FileInfo $fileinfo - * @throws ArchiveIOException - */ - protected function writeFileHeader(FileInfo $fileinfo) - { - $this->writeRawFileHeader( - $fileinfo->getPath(), - $fileinfo->getUid(), - $fileinfo->getGid(), - $fileinfo->getMode(), - $fileinfo->getSize(), - $fileinfo->getMtime(), - $fileinfo->getIsdir() ? '5' : '0' - ); - } - - /** - * Write a file header to the stream - * - * @param string $name - * @param int $uid - * @param int $gid - * @param int $perm - * @param int $size - * @param int $mtime - * @param string $typeflag Set to '5' for directories - * @throws ArchiveIOException - */ - protected function writeRawFileHeader($name, $uid, $gid, $perm, $size, $mtime, $typeflag = '') - { - // handle filename length restrictions - $prefix = ''; - $namelen = strlen($name); - if ($namelen > 100) { - $file = basename($name); - $dir = dirname($name); - if (strlen($file) > 100 || strlen($dir) > 155) { - // we're still too large, let's use GNU longlink - $this->writeRawFileHeader('././@LongLink', 0, 0, 0, $namelen, 0, 'L'); - for ($s = 0; $s < $namelen; $s += 512) { - $this->writebytes(pack("a512", substr($name, $s, 512))); - } - $name = substr($name, 0, 100); // cut off name - } else { - // we're fine when splitting, use POSIX ustar - $prefix = $dir; - $name = $file; - } - } - - // values are needed in octal - $uid = sprintf("%6s ", decoct($uid)); - $gid = sprintf("%6s ", decoct($gid)); - $perm = sprintf("%6s ", decoct($perm)); - $size = sprintf("%11s ", decoct($size)); - $mtime = sprintf("%11s", decoct($mtime)); - - $data_first = pack("a100a8a8a8a12A12", $name, $perm, $uid, $gid, $size, $mtime); - $data_last = pack("a1a100a6a2a32a32a8a8a155a12", $typeflag, '', 'ustar', '', '', '', '', '', $prefix, ""); - - for ($i = 0, $chks = 0; $i < 148; $i++) { - $chks += ord($data_first[$i]); - } - - for ($i = 156, $chks += 256, $j = 0; $i < 512; $i++, $j++) { - $chks += ord($data_last[$j]); - } - - $this->writebytes($data_first); - - $chks = pack("a8", sprintf("%6s ", decoct($chks))); - $this->writebytes($chks.$data_last); - } - - /** - * Decode the given tar file header - * - * @param string $block a 512 byte block containing the header data - * @return array|false returns false when this was a null block - * @throws ArchiveCorruptedException - */ - protected function parseHeader($block) - { - if (!$block || strlen($block) != 512) { - throw new ArchiveCorruptedException('Unexpected length of header'); - } - - // null byte blocks are ignored - if(trim($block) === '') return false; - - for ($i = 0, $chks = 0; $i < 148; $i++) { - $chks += ord($block[$i]); - } - - for ($i = 156, $chks += 256; $i < 512; $i++) { - $chks += ord($block[$i]); - } - - $header = @unpack( - "a100filename/a8perm/a8uid/a8gid/a12size/a12mtime/a8checksum/a1typeflag/a100link/a6magic/a2version/a32uname/a32gname/a8devmajor/a8devminor/a155prefix", - $block - ); - if (!$header) { - throw new ArchiveCorruptedException('Failed to parse header'); - } - - $return['checksum'] = OctDec(trim($header['checksum'])); - if ($return['checksum'] != $chks) { - throw new ArchiveCorruptedException('Header does not match its checksum'); - } - - $return['filename'] = trim($header['filename']); - $return['perm'] = OctDec(trim($header['perm'])); - $return['uid'] = OctDec(trim($header['uid'])); - $return['gid'] = OctDec(trim($header['gid'])); - $return['size'] = OctDec(trim($header['size'])); - $return['mtime'] = OctDec(trim($header['mtime'])); - $return['typeflag'] = $header['typeflag']; - $return['link'] = trim($header['link']); - $return['uname'] = trim($header['uname']); - $return['gname'] = trim($header['gname']); - - // Handle ustar Posix compliant path prefixes - if (trim($header['prefix'])) { - $return['filename'] = trim($header['prefix']).'/'.$return['filename']; - } - - // Handle Long-Link entries from GNU Tar - if ($return['typeflag'] == 'L') { - // following data block(s) is the filename - $filename = trim($this->readbytes(ceil($return['size'] / 512) * 512)); - // next block is the real header - $block = $this->readbytes(512); - $return = $this->parseHeader($block); - // overwrite the filename - $return['filename'] = $filename; - } - - return $return; - } - - /** - * Creates a FileInfo object from the given parsed header - * - * @param $header - * @return FileInfo - */ - protected function header2fileinfo($header) - { - $fileinfo = new FileInfo(); - $fileinfo->setPath($header['filename']); - $fileinfo->setMode($header['perm']); - $fileinfo->setUid($header['uid']); - $fileinfo->setGid($header['gid']); - $fileinfo->setSize($header['size']); - $fileinfo->setMtime($header['mtime']); - $fileinfo->setOwner($header['uname']); - $fileinfo->setGroup($header['gname']); - $fileinfo->setIsdir((bool) $header['typeflag']); - - return $fileinfo; - } +/** + * calculate a checksum for a header block + */ +function tar_calc_checksum(string $block) : int { + $checksum = 0; + for ($i=0; $i<148; $i++) { $checksum += ord($block[$i]); } - /** - * Checks if the given compression type is available and throws an exception if not - * - * @param $comptype - * @throws ArchiveIllegalCompressionException - */ - protected function compressioncheck($comptype) - { - if ($comptype === Archive::COMPRESS_GZIP && !function_exists('gzopen')) { - throw new ArchiveIllegalCompressionException('No gzip support available'); - } + for ($i=156, $checksum+=256; $i<512; $i++) { $checksum += ord($block[$i]); } + return $checksum; +} - if ($comptype === Archive::COMPRESS_BZIP && !function_exists('bzopen')) { - throw new ArchiveIllegalCompressionException('No bzip2 support available'); - } +/** + * parse a tar header + */ +function tar_read_header($fh) : ?TarHeader { + $block = gzread($fh, 512); + if ($block === false || strlen($block) != 512 || trim($block) === '') { + return \TF\debug("unable to read header block"); } - /** - * Guesses the wanted compression from the given file - * - * Uses magic bytes for existing files, the file extension otherwise - * - * You don't need to call this yourself. It's used when you pass Archive::COMPRESS_AUTO somewhere - * - * @param string $file - * @return int - */ - public function filetype($file) - { - // for existing files, try to read the magic bytes - if(file_exists($file) && is_readable($file) && filesize($file) > 5) { - $fh = @fopen($file, 'rb'); - if(!$fh) return false; - $magic = fread($fh, 5); - fclose($fh); - - if(strpos($magic, "\x42\x5a") === 0) return Archive::COMPRESS_BZIP; - if(strpos($magic, "\x1f\x8b") === 0) return Archive::COMPRESS_GZIP; - } - - // otherwise rely on file name - $file = strtolower($file); - if (substr($file, -3) == '.gz' || substr($file, -4) == '.tgz') { - return Archive::COMPRESS_GZIP; - } elseif (substr($file, -4) == '.bz2' || substr($file, -4) == '.tbz') { - return Archive::COMPRESS_BZIP; - } + $header = new TarHeader(); + $header->checksum = tar_calc_checksum($block); - return Archive::COMPRESS_NONE; + $data = @unpack( + "a100filename/a8perm/a8uid/a8gid/a12size/a12mtime/a8checksum/a1typeflag/a100link/a6magic/a2version/a32uname/a32gname/a8devmajor/a8devminor/a155prefix", + $block + ); + if (!$header || $header->checksum != OctDec(trim($data['checksum']))) { + return \TF\debug("calc checksum failed [%d] / [%d]", $header->checksum, $data['checksum']); } -} \ No newline at end of file + $header->filename = trim($data['filename']); + $header->perm = OctDec(trim($data['perm'])); + $header->size = OctDec(trim($data['size'])); + $header->type = $data['typeflag']; + return $header; +}