2011-12-23 06:38:02 +08:00
|
|
|
"""
|
2018-04-18 06:19:29 +08:00
|
|
|
Based on "python-archive" -- https://pypi.org/project/python-archive/
|
2011-12-23 06:38:02 +08:00
|
|
|
|
2012-05-12 01:03:51 +08:00
|
|
|
Copyright (c) 2010 Gary Wilson Jr. <gary.wilson@gmail.com> and contributors.
|
2011-12-23 06:38:02 +08:00
|
|
|
|
|
|
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
|
|
of this software and associated documentation files (the "Software"), to deal
|
|
|
|
in the Software without restriction, including without limitation the rights
|
|
|
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
|
|
copies of the Software, and to permit persons to whom the Software is
|
|
|
|
furnished to do so, subject to the following conditions:
|
|
|
|
|
|
|
|
The above copyright notice and this permission notice shall be included in
|
|
|
|
all copies or substantial portions of the Software.
|
|
|
|
|
|
|
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
|
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
|
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
|
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
|
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
|
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
|
|
THE SOFTWARE.
|
|
|
|
"""
|
|
|
|
import os
|
|
|
|
import shutil
|
2016-12-29 04:20:24 +08:00
|
|
|
import stat
|
2011-12-23 06:38:02 +08:00
|
|
|
import tarfile
|
|
|
|
import zipfile
|
|
|
|
|
2021-01-22 19:23:18 +08:00
|
|
|
from django.core.exceptions import SuspiciousOperation
|
|
|
|
|
2011-12-23 06:38:02 +08:00
|
|
|
|
|
|
|
class ArchiveException(Exception):
|
|
|
|
"""
|
|
|
|
Base exception class for all archive errors.
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
|
|
class UnrecognizedArchiveFormat(ArchiveException):
|
|
|
|
"""
|
|
|
|
Error raised when passed file is not a recognized archive format.
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
2019-02-16 07:33:21 +08:00
|
|
|
def extract(path, to_path):
|
2011-12-23 06:38:02 +08:00
|
|
|
"""
|
|
|
|
Unpack the tar or zip file at the specified path to the directory
|
|
|
|
specified by to_path.
|
|
|
|
"""
|
2012-08-15 17:53:40 +08:00
|
|
|
with Archive(path) as archive:
|
|
|
|
archive.extract(to_path)
|
2011-12-23 06:38:02 +08:00
|
|
|
|
|
|
|
|
2017-01-19 15:39:46 +08:00
|
|
|
class Archive:
|
2011-12-23 06:38:02 +08:00
|
|
|
"""
|
|
|
|
The external API class that encapsulates an archive implementation.
|
|
|
|
"""
|
|
|
|
def __init__(self, file):
|
|
|
|
self._archive = self._archive_cls(file)(file)
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
def _archive_cls(file):
|
|
|
|
cls = None
|
2016-12-29 23:27:49 +08:00
|
|
|
if isinstance(file, str):
|
2011-12-23 06:38:02 +08:00
|
|
|
filename = file
|
|
|
|
else:
|
|
|
|
try:
|
|
|
|
filename = file.name
|
|
|
|
except AttributeError:
|
|
|
|
raise UnrecognizedArchiveFormat(
|
|
|
|
"File object not a recognized archive format.")
|
|
|
|
base, tail_ext = os.path.splitext(filename.lower())
|
|
|
|
cls = extension_map.get(tail_ext)
|
|
|
|
if not cls:
|
|
|
|
base, ext = os.path.splitext(base)
|
|
|
|
cls = extension_map.get(ext)
|
|
|
|
if not cls:
|
|
|
|
raise UnrecognizedArchiveFormat(
|
|
|
|
"Path not a recognized archive format: %s" % filename)
|
|
|
|
return cls
|
|
|
|
|
2012-08-15 17:53:40 +08:00
|
|
|
def __enter__(self):
|
|
|
|
return self
|
|
|
|
|
|
|
|
def __exit__(self, exc_type, exc_value, traceback):
|
|
|
|
self.close()
|
|
|
|
|
2019-02-16 07:33:21 +08:00
|
|
|
def extract(self, to_path):
|
2011-12-23 06:38:02 +08:00
|
|
|
self._archive.extract(to_path)
|
|
|
|
|
|
|
|
def list(self):
|
|
|
|
self._archive.list()
|
|
|
|
|
2012-08-15 17:53:40 +08:00
|
|
|
def close(self):
|
|
|
|
self._archive.close()
|
|
|
|
|
2011-12-23 06:38:02 +08:00
|
|
|
|
2017-01-19 15:39:46 +08:00
|
|
|
class BaseArchive:
|
2011-12-23 06:38:02 +08:00
|
|
|
"""
|
|
|
|
Base Archive class. Implementations should inherit this class.
|
|
|
|
"""
|
2016-12-29 04:20:24 +08:00
|
|
|
@staticmethod
|
|
|
|
def _copy_permissions(mode, filename):
|
|
|
|
"""
|
|
|
|
If the file in the archive has some permissions (this assumes a file
|
|
|
|
won't be writable/executable without being readable), apply those
|
|
|
|
permissions to the unarchived file.
|
|
|
|
"""
|
|
|
|
if mode & stat.S_IROTH:
|
|
|
|
os.chmod(filename, mode)
|
|
|
|
|
2011-12-23 06:38:02 +08:00
|
|
|
def split_leading_dir(self, path):
|
|
|
|
path = str(path)
|
|
|
|
path = path.lstrip('/').lstrip('\\')
|
2016-04-04 08:37:32 +08:00
|
|
|
if '/' in path and (('\\' in path and path.find('/') < path.find('\\')) or '\\' not in path):
|
2011-12-23 06:38:02 +08:00
|
|
|
return path.split('/', 1)
|
|
|
|
elif '\\' in path:
|
|
|
|
return path.split('\\', 1)
|
|
|
|
else:
|
|
|
|
return path, ''
|
|
|
|
|
|
|
|
def has_leading_dir(self, paths):
|
|
|
|
"""
|
2017-01-25 04:32:33 +08:00
|
|
|
Return True if all the paths have the same leading path name
|
|
|
|
(i.e., everything is in one subdirectory in an archive).
|
2011-12-23 06:38:02 +08:00
|
|
|
"""
|
|
|
|
common_prefix = None
|
|
|
|
for path in paths:
|
|
|
|
prefix, rest = self.split_leading_dir(path)
|
|
|
|
if not prefix:
|
|
|
|
return False
|
|
|
|
elif common_prefix is None:
|
|
|
|
common_prefix = prefix
|
|
|
|
elif prefix != common_prefix:
|
|
|
|
return False
|
|
|
|
return True
|
|
|
|
|
2021-01-22 19:23:18 +08:00
|
|
|
def target_filename(self, to_path, name):
|
|
|
|
target_path = os.path.abspath(to_path)
|
|
|
|
filename = os.path.abspath(os.path.join(target_path, name))
|
|
|
|
if not filename.startswith(target_path):
|
|
|
|
raise SuspiciousOperation("Archive contains invalid path: '%s'" % name)
|
|
|
|
return filename
|
|
|
|
|
2011-12-23 06:38:02 +08:00
|
|
|
def extract(self):
|
2013-09-07 02:24:52 +08:00
|
|
|
raise NotImplementedError('subclasses of BaseArchive must provide an extract() method')
|
2011-12-23 06:38:02 +08:00
|
|
|
|
|
|
|
def list(self):
|
2013-09-07 02:24:52 +08:00
|
|
|
raise NotImplementedError('subclasses of BaseArchive must provide a list() method')
|
2011-12-23 06:38:02 +08:00
|
|
|
|
|
|
|
|
|
|
|
class TarArchive(BaseArchive):
|
|
|
|
|
|
|
|
def __init__(self, file):
|
|
|
|
self._archive = tarfile.open(file)
|
|
|
|
|
|
|
|
def list(self, *args, **kwargs):
|
|
|
|
self._archive.list(*args, **kwargs)
|
|
|
|
|
|
|
|
def extract(self, to_path):
|
2016-10-20 22:39:14 +08:00
|
|
|
members = self._archive.getmembers()
|
2014-05-22 20:12:22 +08:00
|
|
|
leading = self.has_leading_dir(x.name for x in members)
|
2011-12-23 06:38:02 +08:00
|
|
|
for member in members:
|
|
|
|
name = member.name
|
|
|
|
if leading:
|
|
|
|
name = self.split_leading_dir(name)[1]
|
2021-01-22 19:23:18 +08:00
|
|
|
filename = self.target_filename(to_path, name)
|
2011-12-23 06:38:02 +08:00
|
|
|
if member.isdir():
|
2019-01-31 23:12:55 +08:00
|
|
|
if filename:
|
|
|
|
os.makedirs(filename, exist_ok=True)
|
2011-12-23 06:38:02 +08:00
|
|
|
else:
|
|
|
|
try:
|
|
|
|
extracted = self._archive.extractfile(member)
|
2012-08-04 17:31:44 +08:00
|
|
|
except (KeyError, AttributeError) as exc:
|
2011-12-23 06:38:02 +08:00
|
|
|
# Some corrupt tar files seem to produce this
|
|
|
|
# (specifically bad symlinks)
|
2012-08-04 17:31:44 +08:00
|
|
|
print("In the tar file %s the member %s is invalid: %s" %
|
2013-12-13 04:23:24 +08:00
|
|
|
(name, member.name, exc))
|
2011-12-23 06:38:02 +08:00
|
|
|
else:
|
|
|
|
dirname = os.path.dirname(filename)
|
2019-01-31 23:12:55 +08:00
|
|
|
if dirname:
|
|
|
|
os.makedirs(dirname, exist_ok=True)
|
2011-12-23 06:38:02 +08:00
|
|
|
with open(filename, 'wb') as outfile:
|
|
|
|
shutil.copyfileobj(extracted, outfile)
|
2016-12-29 04:20:24 +08:00
|
|
|
self._copy_permissions(member.mode, filename)
|
2011-12-23 06:38:02 +08:00
|
|
|
finally:
|
|
|
|
if extracted:
|
|
|
|
extracted.close()
|
|
|
|
|
2012-08-15 17:53:40 +08:00
|
|
|
def close(self):
|
|
|
|
self._archive.close()
|
|
|
|
|
2011-12-23 06:38:02 +08:00
|
|
|
|
|
|
|
class ZipArchive(BaseArchive):
|
|
|
|
|
|
|
|
def __init__(self, file):
|
|
|
|
self._archive = zipfile.ZipFile(file)
|
|
|
|
|
|
|
|
def list(self, *args, **kwargs):
|
|
|
|
self._archive.printdir(*args, **kwargs)
|
|
|
|
|
|
|
|
def extract(self, to_path):
|
|
|
|
namelist = self._archive.namelist()
|
|
|
|
leading = self.has_leading_dir(namelist)
|
|
|
|
for name in namelist:
|
|
|
|
data = self._archive.read(name)
|
2016-12-02 00:05:08 +08:00
|
|
|
info = self._archive.getinfo(name)
|
2011-12-23 06:38:02 +08:00
|
|
|
if leading:
|
|
|
|
name = self.split_leading_dir(name)[1]
|
2021-01-22 19:23:18 +08:00
|
|
|
if not name:
|
|
|
|
continue
|
|
|
|
filename = self.target_filename(to_path, name)
|
|
|
|
if name.endswith(('/', '\\')):
|
2011-12-23 06:38:02 +08:00
|
|
|
# A directory
|
2019-01-31 23:12:55 +08:00
|
|
|
os.makedirs(filename, exist_ok=True)
|
2011-12-23 06:38:02 +08:00
|
|
|
else:
|
2019-01-31 23:12:55 +08:00
|
|
|
dirname = os.path.dirname(filename)
|
|
|
|
if dirname:
|
|
|
|
os.makedirs(dirname, exist_ok=True)
|
2011-12-23 06:38:02 +08:00
|
|
|
with open(filename, 'wb') as outfile:
|
|
|
|
outfile.write(data)
|
2016-12-29 04:20:24 +08:00
|
|
|
# Convert ZipInfo.external_attr to mode
|
2016-12-02 00:05:08 +08:00
|
|
|
mode = info.external_attr >> 16
|
2016-12-29 04:20:24 +08:00
|
|
|
self._copy_permissions(mode, filename)
|
2011-12-23 06:38:02 +08:00
|
|
|
|
2012-08-15 17:53:40 +08:00
|
|
|
def close(self):
|
|
|
|
self._archive.close()
|
|
|
|
|
2016-11-13 01:11:23 +08:00
|
|
|
|
2019-02-07 05:37:25 +08:00
|
|
|
extension_map = dict.fromkeys((
|
|
|
|
'.tar',
|
|
|
|
'.tar.bz2', '.tbz2', '.tbz', '.tz2',
|
|
|
|
'.tar.gz', '.tgz', '.taz',
|
2019-02-06 06:00:56 +08:00
|
|
|
'.tar.lzma', '.tlz',
|
|
|
|
'.tar.xz', '.txz',
|
2019-02-07 05:37:25 +08:00
|
|
|
), TarArchive)
|
|
|
|
extension_map['.zip'] = ZipArchive
|