summaryrefslogtreecommitdiff
path: root/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/distlib/_backport/tarfile.py
diff options
context:
space:
mode:
authorShubham Saini <shubham6405@gmail.com>2018-12-11 10:01:23 +0000
committerShubham Saini <shubham6405@gmail.com>2018-12-11 10:01:23 +0000
commit68df54d6629ec019142eb149dd037774f2d11e7c (patch)
tree345bc22d46b4e01a4ba8303b94278952a4ed2b9e /venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/distlib/_backport/tarfile.py
First commit
Diffstat (limited to 'venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/distlib/_backport/tarfile.py')
-rw-r--r--venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/distlib/_backport/tarfile.py2607
1 files changed, 2607 insertions, 0 deletions
diff --git a/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/distlib/_backport/tarfile.py b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/distlib/_backport/tarfile.py
new file mode 100644
index 0000000..b0599bc
--- /dev/null
+++ b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/distlib/_backport/tarfile.py
@@ -0,0 +1,2607 @@
1#-------------------------------------------------------------------
2# tarfile.py
3#-------------------------------------------------------------------
4# Copyright (C) 2002 Lars Gustaebel <lars@gustaebel.de>
5# All rights reserved.
6#
7# Permission is hereby granted, free of charge, to any person
8# obtaining a copy of this software and associated documentation
9# files (the "Software"), to deal in the Software without
10# restriction, including without limitation the rights to use,
11# copy, modify, merge, publish, distribute, sublicense, and/or sell
12# copies of the Software, and to permit persons to whom the
13# Software is furnished to do so, subject to the following
14# conditions:
15#
16# The above copyright notice and this permission notice shall be
17# included in all copies or substantial portions of the Software.
18#
19# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
20# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
21# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
22# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
23# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
24# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
25# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
26# OTHER DEALINGS IN THE SOFTWARE.
27#
28from __future__ import print_function
29
30"""Read from and write to tar format archives.
31"""
32
33__version__ = "$Revision$"
34
35version = "0.9.0"
36__author__ = "Lars Gust\u00e4bel (lars@gustaebel.de)"
37__date__ = "$Date: 2011-02-25 17:42:01 +0200 (Fri, 25 Feb 2011) $"
38__cvsid__ = "$Id: tarfile.py 88586 2011-02-25 15:42:01Z marc-andre.lemburg $"
39__credits__ = "Gustavo Niemeyer, Niels Gust\u00e4bel, Richard Townsend."
40
41#---------
42# Imports
43#---------
44import sys
45import os
46import stat
47import errno
48import time
49import struct
50import copy
51import re
52
53try:
54 import grp, pwd
55except ImportError:
56 grp = pwd = None
57
58# os.symlink on Windows prior to 6.0 raises NotImplementedError
59symlink_exception = (AttributeError, NotImplementedError)
60try:
61 # WindowsError (1314) will be raised if the caller does not hold the
62 # SeCreateSymbolicLinkPrivilege privilege
63 symlink_exception += (WindowsError,)
64except NameError:
65 pass
66
67# from tarfile import *
68__all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError"]
69
70if sys.version_info[0] < 3:
71 import __builtin__ as builtins
72else:
73 import builtins
74
75_open = builtins.open # Since 'open' is TarFile.open
76
77#---------------------------------------------------------
78# tar constants
79#---------------------------------------------------------
80NUL = b"\0" # the null character
81BLOCKSIZE = 512 # length of processing blocks
82RECORDSIZE = BLOCKSIZE * 20 # length of records
83GNU_MAGIC = b"ustar \0" # magic gnu tar string
84POSIX_MAGIC = b"ustar\x0000" # magic posix tar string
85
86LENGTH_NAME = 100 # maximum length of a filename
87LENGTH_LINK = 100 # maximum length of a linkname
88LENGTH_PREFIX = 155 # maximum length of the prefix field
89
90REGTYPE = b"0" # regular file
91AREGTYPE = b"\0" # regular file
92LNKTYPE = b"1" # link (inside tarfile)
93SYMTYPE = b"2" # symbolic link
94CHRTYPE = b"3" # character special device
95BLKTYPE = b"4" # block special device
96DIRTYPE = b"5" # directory
97FIFOTYPE = b"6" # fifo special device
98CONTTYPE = b"7" # contiguous file
99
100GNUTYPE_LONGNAME = b"L" # GNU tar longname
101GNUTYPE_LONGLINK = b"K" # GNU tar longlink
102GNUTYPE_SPARSE = b"S" # GNU tar sparse file
103
104XHDTYPE = b"x" # POSIX.1-2001 extended header
105XGLTYPE = b"g" # POSIX.1-2001 global header
106SOLARIS_XHDTYPE = b"X" # Solaris extended header
107
108USTAR_FORMAT = 0 # POSIX.1-1988 (ustar) format
109GNU_FORMAT = 1 # GNU tar format
110PAX_FORMAT = 2 # POSIX.1-2001 (pax) format
111DEFAULT_FORMAT = GNU_FORMAT
112
113#---------------------------------------------------------
114# tarfile constants
115#---------------------------------------------------------
116# File types that tarfile supports:
117SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE,
118 SYMTYPE, DIRTYPE, FIFOTYPE,
119 CONTTYPE, CHRTYPE, BLKTYPE,
120 GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
121 GNUTYPE_SPARSE)
122
123# File types that will be treated as a regular file.
124REGULAR_TYPES = (REGTYPE, AREGTYPE,
125 CONTTYPE, GNUTYPE_SPARSE)
126
127# File types that are part of the GNU tar format.
128GNU_TYPES = (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
129 GNUTYPE_SPARSE)
130
131# Fields from a pax header that override a TarInfo attribute.
132PAX_FIELDS = ("path", "linkpath", "size", "mtime",
133 "uid", "gid", "uname", "gname")
134
135# Fields from a pax header that are affected by hdrcharset.
136PAX_NAME_FIELDS = set(("path", "linkpath", "uname", "gname"))
137
138# Fields in a pax header that are numbers, all other fields
139# are treated as strings.
140PAX_NUMBER_FIELDS = {
141 "atime": float,
142 "ctime": float,
143 "mtime": float,
144 "uid": int,
145 "gid": int,
146 "size": int
147}
148
149#---------------------------------------------------------
150# Bits used in the mode field, values in octal.
151#---------------------------------------------------------
152S_IFLNK = 0o120000 # symbolic link
153S_IFREG = 0o100000 # regular file
154S_IFBLK = 0o060000 # block device
155S_IFDIR = 0o040000 # directory
156S_IFCHR = 0o020000 # character device
157S_IFIFO = 0o010000 # fifo
158
159TSUID = 0o4000 # set UID on execution
160TSGID = 0o2000 # set GID on execution
161TSVTX = 0o1000 # reserved
162
163TUREAD = 0o400 # read by owner
164TUWRITE = 0o200 # write by owner
165TUEXEC = 0o100 # execute/search by owner
166TGREAD = 0o040 # read by group
167TGWRITE = 0o020 # write by group
168TGEXEC = 0o010 # execute/search by group
169TOREAD = 0o004 # read by other
170TOWRITE = 0o002 # write by other
171TOEXEC = 0o001 # execute/search by other
172
173#---------------------------------------------------------
174# initialization
175#---------------------------------------------------------
176if os.name in ("nt", "ce"):
177 ENCODING = "utf-8"
178else:
179 ENCODING = sys.getfilesystemencoding()
180
181#---------------------------------------------------------
182# Some useful functions
183#---------------------------------------------------------
184
185def stn(s, length, encoding, errors):
186 """Convert a string to a null-terminated bytes object.
187 """
188 s = s.encode(encoding, errors)
189 return s[:length] + (length - len(s)) * NUL
190
191def nts(s, encoding, errors):
192 """Convert a null-terminated bytes object to a string.
193 """
194 p = s.find(b"\0")
195 if p != -1:
196 s = s[:p]
197 return s.decode(encoding, errors)
198
199def nti(s):
200 """Convert a number field to a python number.
201 """
202 # There are two possible encodings for a number field, see
203 # itn() below.
204 if s[0] != chr(0o200):
205 try:
206 n = int(nts(s, "ascii", "strict") or "0", 8)
207 except ValueError:
208 raise InvalidHeaderError("invalid header")
209 else:
210 n = 0
211 for i in range(len(s) - 1):
212 n <<= 8
213 n += ord(s[i + 1])
214 return n
215
216def itn(n, digits=8, format=DEFAULT_FORMAT):
217 """Convert a python number to a number field.
218 """
219 # POSIX 1003.1-1988 requires numbers to be encoded as a string of
220 # octal digits followed by a null-byte, this allows values up to
221 # (8**(digits-1))-1. GNU tar allows storing numbers greater than
222 # that if necessary. A leading 0o200 byte indicates this particular
223 # encoding, the following digits-1 bytes are a big-endian
224 # representation. This allows values up to (256**(digits-1))-1.
225 if 0 <= n < 8 ** (digits - 1):
226 s = ("%0*o" % (digits - 1, n)).encode("ascii") + NUL
227 else:
228 if format != GNU_FORMAT or n >= 256 ** (digits - 1):
229 raise ValueError("overflow in number field")
230
231 if n < 0:
232 # XXX We mimic GNU tar's behaviour with negative numbers,
233 # this could raise OverflowError.
234 n = struct.unpack("L", struct.pack("l", n))[0]
235
236 s = bytearray()
237 for i in range(digits - 1):
238 s.insert(0, n & 0o377)
239 n >>= 8
240 s.insert(0, 0o200)
241 return s
242
243def calc_chksums(buf):
244 """Calculate the checksum for a member's header by summing up all
245 characters except for the chksum field which is treated as if
246 it was filled with spaces. According to the GNU tar sources,
247 some tars (Sun and NeXT) calculate chksum with signed char,
248 which will be different if there are chars in the buffer with
249 the high bit set. So we calculate two checksums, unsigned and
250 signed.
251 """
252 unsigned_chksum = 256 + sum(struct.unpack("148B", buf[:148]) + struct.unpack("356B", buf[156:512]))
253 signed_chksum = 256 + sum(struct.unpack("148b", buf[:148]) + struct.unpack("356b", buf[156:512]))
254 return unsigned_chksum, signed_chksum
255
256def copyfileobj(src, dst, length=None):
257 """Copy length bytes from fileobj src to fileobj dst.
258 If length is None, copy the entire content.
259 """
260 if length == 0:
261 return
262 if length is None:
263 while True:
264 buf = src.read(16*1024)
265 if not buf:
266 break
267 dst.write(buf)
268 return
269
270 BUFSIZE = 16 * 1024
271 blocks, remainder = divmod(length, BUFSIZE)
272 for b in range(blocks):
273 buf = src.read(BUFSIZE)
274 if len(buf) < BUFSIZE:
275 raise IOError("end of file reached")
276 dst.write(buf)
277
278 if remainder != 0:
279 buf = src.read(remainder)
280 if len(buf) < remainder:
281 raise IOError("end of file reached")
282 dst.write(buf)
283 return
284
285filemode_table = (
286 ((S_IFLNK, "l"),
287 (S_IFREG, "-"),
288 (S_IFBLK, "b"),
289 (S_IFDIR, "d"),
290 (S_IFCHR, "c"),
291 (S_IFIFO, "p")),
292
293 ((TUREAD, "r"),),
294 ((TUWRITE, "w"),),
295 ((TUEXEC|TSUID, "s"),
296 (TSUID, "S"),
297 (TUEXEC, "x")),
298
299 ((TGREAD, "r"),),
300 ((TGWRITE, "w"),),
301 ((TGEXEC|TSGID, "s"),
302 (TSGID, "S"),
303 (TGEXEC, "x")),
304
305 ((TOREAD, "r"),),
306 ((TOWRITE, "w"),),
307 ((TOEXEC|TSVTX, "t"),
308 (TSVTX, "T"),
309 (TOEXEC, "x"))
310)
311
312def filemode(mode):
313 """Convert a file's mode to a string of the form
314 -rwxrwxrwx.
315 Used by TarFile.list()
316 """
317 perm = []
318 for table in filemode_table:
319 for bit, char in table:
320 if mode & bit == bit:
321 perm.append(char)
322 break
323 else:
324 perm.append("-")
325 return "".join(perm)
326
327class TarError(Exception):
328 """Base exception."""
329 pass
330class ExtractError(TarError):
331 """General exception for extract errors."""
332 pass
333class ReadError(TarError):
334 """Exception for unreadable tar archives."""
335 pass
336class CompressionError(TarError):
337 """Exception for unavailable compression methods."""
338 pass
339class StreamError(TarError):
340 """Exception for unsupported operations on stream-like TarFiles."""
341 pass
342class HeaderError(TarError):
343 """Base exception for header errors."""
344 pass
345class EmptyHeaderError(HeaderError):
346 """Exception for empty headers."""
347 pass
348class TruncatedHeaderError(HeaderError):
349 """Exception for truncated headers."""
350 pass
351class EOFHeaderError(HeaderError):
352 """Exception for end of file headers."""
353 pass
354class InvalidHeaderError(HeaderError):
355 """Exception for invalid headers."""
356 pass
357class SubsequentHeaderError(HeaderError):
358 """Exception for missing and invalid extended headers."""
359 pass
360
361#---------------------------
362# internal stream interface
363#---------------------------
364class _LowLevelFile(object):
365 """Low-level file object. Supports reading and writing.
366 It is used instead of a regular file object for streaming
367 access.
368 """
369
370 def __init__(self, name, mode):
371 mode = {
372 "r": os.O_RDONLY,
373 "w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC,
374 }[mode]
375 if hasattr(os, "O_BINARY"):
376 mode |= os.O_BINARY
377 self.fd = os.open(name, mode, 0o666)
378
379 def close(self):
380 os.close(self.fd)
381
382 def read(self, size):
383 return os.read(self.fd, size)
384
385 def write(self, s):
386 os.write(self.fd, s)
387
388class _Stream(object):
389 """Class that serves as an adapter between TarFile and
390 a stream-like object. The stream-like object only
391 needs to have a read() or write() method and is accessed
392 blockwise. Use of gzip or bzip2 compression is possible.
393 A stream-like object could be for example: sys.stdin,
394 sys.stdout, a socket, a tape device etc.
395
396 _Stream is intended to be used only internally.
397 """
398
399 def __init__(self, name, mode, comptype, fileobj, bufsize):
400 """Construct a _Stream object.
401 """
402 self._extfileobj = True
403 if fileobj is None:
404 fileobj = _LowLevelFile(name, mode)
405 self._extfileobj = False
406
407 if comptype == '*':
408 # Enable transparent compression detection for the
409 # stream interface
410 fileobj = _StreamProxy(fileobj)
411 comptype = fileobj.getcomptype()
412
413 self.name = name or ""
414 self.mode = mode
415 self.comptype = comptype
416 self.fileobj = fileobj
417 self.bufsize = bufsize
418 self.buf = b""
419 self.pos = 0
420 self.closed = False
421
422 try:
423 if comptype == "gz":
424 try:
425 import zlib
426 except ImportError:
427 raise CompressionError("zlib module is not available")
428 self.zlib = zlib
429 self.crc = zlib.crc32(b"")
430 if mode == "r":
431 self._init_read_gz()
432 else:
433 self._init_write_gz()
434
435 if comptype == "bz2":
436 try:
437 import bz2
438 except ImportError:
439 raise CompressionError("bz2 module is not available")
440 if mode == "r":
441 self.dbuf = b""
442 self.cmp = bz2.BZ2Decompressor()
443 else:
444 self.cmp = bz2.BZ2Compressor()
445 except:
446 if not self._extfileobj:
447 self.fileobj.close()
448 self.closed = True
449 raise
450
451 def __del__(self):
452 if hasattr(self, "closed") and not self.closed:
453 self.close()
454
455 def _init_write_gz(self):
456 """Initialize for writing with gzip compression.
457 """
458 self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED,
459 -self.zlib.MAX_WBITS,
460 self.zlib.DEF_MEM_LEVEL,
461 0)
462 timestamp = struct.pack("<L", int(time.time()))
463 self.__write(b"\037\213\010\010" + timestamp + b"\002\377")
464 if self.name.endswith(".gz"):
465 self.name = self.name[:-3]
466 # RFC1952 says we must use ISO-8859-1 for the FNAME field.
467 self.__write(self.name.encode("iso-8859-1", "replace") + NUL)
468
469 def write(self, s):
470 """Write string s to the stream.
471 """
472 if self.comptype == "gz":
473 self.crc = self.zlib.crc32(s, self.crc)
474 self.pos += len(s)
475 if self.comptype != "tar":
476 s = self.cmp.compress(s)
477 self.__write(s)
478
479 def __write(self, s):
480 """Write string s to the stream if a whole new block
481 is ready to be written.
482 """
483 self.buf += s
484 while len(self.buf) > self.bufsize:
485 self.fileobj.write(self.buf[:self.bufsize])
486 self.buf = self.buf[self.bufsize:]
487
488 def close(self):
489 """Close the _Stream object. No operation should be
490 done on it afterwards.
491 """
492 if self.closed:
493 return
494
495 if self.mode == "w" and self.comptype != "tar":
496 self.buf += self.cmp.flush()
497
498 if self.mode == "w" and self.buf:
499 self.fileobj.write(self.buf)
500 self.buf = b""
501 if self.comptype == "gz":
502 # The native zlib crc is an unsigned 32-bit integer, but
503 # the Python wrapper implicitly casts that to a signed C
504 # long. So, on a 32-bit box self.crc may "look negative",
505 # while the same crc on a 64-bit box may "look positive".
506 # To avoid irksome warnings from the `struct` module, force
507 # it to look positive on all boxes.
508 self.fileobj.write(struct.pack("<L", self.crc & 0xffffffff))
509 self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFF))
510
511 if not self._extfileobj:
512 self.fileobj.close()
513
514 self.closed = True
515
516 def _init_read_gz(self):
517 """Initialize for reading a gzip compressed fileobj.
518 """
519 self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS)
520 self.dbuf = b""
521
522 # taken from gzip.GzipFile with some alterations
523 if self.__read(2) != b"\037\213":
524 raise ReadError("not a gzip file")
525 if self.__read(1) != b"\010":
526 raise CompressionError("unsupported compression method")
527
528 flag = ord(self.__read(1))
529 self.__read(6)
530
531 if flag & 4:
532 xlen = ord(self.__read(1)) + 256 * ord(self.__read(1))
533 self.read(xlen)
534 if flag & 8:
535 while True:
536 s = self.__read(1)
537 if not s or s == NUL:
538 break
539 if flag & 16:
540 while True:
541 s = self.__read(1)
542 if not s or s == NUL:
543 break
544 if flag & 2:
545 self.__read(2)
546
547 def tell(self):
548 """Return the stream's file pointer position.
549 """
550 return self.pos
551
552 def seek(self, pos=0):
553 """Set the stream's file pointer to pos. Negative seeking
554 is forbidden.
555 """
556 if pos - self.pos >= 0:
557 blocks, remainder = divmod(pos - self.pos, self.bufsize)
558 for i in range(blocks):
559 self.read(self.bufsize)
560 self.read(remainder)
561 else:
562 raise StreamError("seeking backwards is not allowed")
563 return self.pos
564
565 def read(self, size=None):
566 """Return the next size number of bytes from the stream.
567 If size is not defined, return all bytes of the stream
568 up to EOF.
569 """
570 if size is None:
571 t = []
572 while True:
573 buf = self._read(self.bufsize)
574 if not buf:
575 break
576 t.append(buf)
577 buf = "".join(t)
578 else:
579 buf = self._read(size)
580 self.pos += len(buf)
581 return buf
582
583 def _read(self, size):
584 """Return size bytes from the stream.
585 """
586 if self.comptype == "tar":
587 return self.__read(size)
588
589 c = len(self.dbuf)
590 while c < size:
591 buf = self.__read(self.bufsize)
592 if not buf:
593 break
594 try:
595 buf = self.cmp.decompress(buf)
596 except IOError:
597 raise ReadError("invalid compressed data")
598 self.dbuf += buf
599 c += len(buf)
600 buf = self.dbuf[:size]
601 self.dbuf = self.dbuf[size:]
602 return buf
603
604 def __read(self, size):
605 """Return size bytes from stream. If internal buffer is empty,
606 read another block from the stream.
607 """
608 c = len(self.buf)
609 while c < size:
610 buf = self.fileobj.read(self.bufsize)
611 if not buf:
612 break
613 self.buf += buf
614 c += len(buf)
615 buf = self.buf[:size]
616 self.buf = self.buf[size:]
617 return buf
618# class _Stream
619
620class _StreamProxy(object):
621 """Small proxy class that enables transparent compression
622 detection for the Stream interface (mode 'r|*').
623 """
624
625 def __init__(self, fileobj):
626 self.fileobj = fileobj
627 self.buf = self.fileobj.read(BLOCKSIZE)
628
629 def read(self, size):
630 self.read = self.fileobj.read
631 return self.buf
632
633 def getcomptype(self):
634 if self.buf.startswith(b"\037\213\010"):
635 return "gz"
636 if self.buf.startswith(b"BZh91"):
637 return "bz2"
638 return "tar"
639
640 def close(self):
641 self.fileobj.close()
642# class StreamProxy
643
644class _BZ2Proxy(object):
645 """Small proxy class that enables external file object
646 support for "r:bz2" and "w:bz2" modes. This is actually
647 a workaround for a limitation in bz2 module's BZ2File
648 class which (unlike gzip.GzipFile) has no support for
649 a file object argument.
650 """
651
652 blocksize = 16 * 1024
653
654 def __init__(self, fileobj, mode):
655 self.fileobj = fileobj
656 self.mode = mode
657 self.name = getattr(self.fileobj, "name", None)
658 self.init()
659
660 def init(self):
661 import bz2
662 self.pos = 0
663 if self.mode == "r":
664 self.bz2obj = bz2.BZ2Decompressor()
665 self.fileobj.seek(0)
666 self.buf = b""
667 else:
668 self.bz2obj = bz2.BZ2Compressor()
669
670 def read(self, size):
671 x = len(self.buf)
672 while x < size:
673 raw = self.fileobj.read(self.blocksize)
674 if not raw:
675 break
676 data = self.bz2obj.decompress(raw)
677 self.buf += data
678 x += len(data)
679
680 buf = self.buf[:size]
681 self.buf = self.buf[size:]
682 self.pos += len(buf)
683 return buf
684
685 def seek(self, pos):
686 if pos < self.pos:
687 self.init()
688 self.read(pos - self.pos)
689
690 def tell(self):
691 return self.pos
692
693 def write(self, data):
694 self.pos += len(data)
695 raw = self.bz2obj.compress(data)
696 self.fileobj.write(raw)
697
698 def close(self):
699 if self.mode == "w":
700 raw = self.bz2obj.flush()
701 self.fileobj.write(raw)
702# class _BZ2Proxy
703
704#------------------------
705# Extraction file object
706#------------------------
707class _FileInFile(object):
708 """A thin wrapper around an existing file object that
709 provides a part of its data as an individual file
710 object.
711 """
712
713 def __init__(self, fileobj, offset, size, blockinfo=None):
714 self.fileobj = fileobj
715 self.offset = offset
716 self.size = size
717 self.position = 0
718
719 if blockinfo is None:
720 blockinfo = [(0, size)]
721
722 # Construct a map with data and zero blocks.
723 self.map_index = 0
724 self.map = []
725 lastpos = 0
726 realpos = self.offset
727 for offset, size in blockinfo:
728 if offset > lastpos:
729 self.map.append((False, lastpos, offset, None))
730 self.map.append((True, offset, offset + size, realpos))
731 realpos += size
732 lastpos = offset + size
733 if lastpos < self.size:
734 self.map.append((False, lastpos, self.size, None))
735
736 def seekable(self):
737 if not hasattr(self.fileobj, "seekable"):
738 # XXX gzip.GzipFile and bz2.BZ2File
739 return True
740 return self.fileobj.seekable()
741
742 def tell(self):
743 """Return the current file position.
744 """
745 return self.position
746
747 def seek(self, position):
748 """Seek to a position in the file.
749 """
750 self.position = position
751
752 def read(self, size=None):
753 """Read data from the file.
754 """
755 if size is None:
756 size = self.size - self.position
757 else:
758 size = min(size, self.size - self.position)
759
760 buf = b""
761 while size > 0:
762 while True:
763 data, start, stop, offset = self.map[self.map_index]
764 if start <= self.position < stop:
765 break
766 else:
767 self.map_index += 1
768 if self.map_index == len(self.map):
769 self.map_index = 0
770 length = min(size, stop - self.position)
771 if data:
772 self.fileobj.seek(offset + (self.position - start))
773 buf += self.fileobj.read(length)
774 else:
775 buf += NUL * length
776 size -= length
777 self.position += length
778 return buf
779#class _FileInFile
780
781
782class ExFileObject(object):
783 """File-like object for reading an archive member.
784 Is returned by TarFile.extractfile().
785 """
786 blocksize = 1024
787
788 def __init__(self, tarfile, tarinfo):
789 self.fileobj = _FileInFile(tarfile.fileobj,
790 tarinfo.offset_data,
791 tarinfo.size,
792 tarinfo.sparse)
793 self.name = tarinfo.name
794 self.mode = "r"
795 self.closed = False
796 self.size = tarinfo.size
797
798 self.position = 0
799 self.buffer = b""
800
801 def readable(self):
802 return True
803
804 def writable(self):
805 return False
806
807 def seekable(self):
808 return self.fileobj.seekable()
809
810 def read(self, size=None):
811 """Read at most size bytes from the file. If size is not
812 present or None, read all data until EOF is reached.
813 """
814 if self.closed:
815 raise ValueError("I/O operation on closed file")
816
817 buf = b""
818 if self.buffer:
819 if size is None:
820 buf = self.buffer
821 self.buffer = b""
822 else:
823 buf = self.buffer[:size]
824 self.buffer = self.buffer[size:]
825
826 if size is None:
827 buf += self.fileobj.read()
828 else:
829 buf += self.fileobj.read(size - len(buf))
830
831 self.position += len(buf)
832 return buf
833
834 # XXX TextIOWrapper uses the read1() method.
835 read1 = read
836
837 def readline(self, size=-1):
838 """Read one entire line from the file. If size is present
839 and non-negative, return a string with at most that
840 size, which may be an incomplete line.
841 """
842 if self.closed:
843 raise ValueError("I/O operation on closed file")
844
845 pos = self.buffer.find(b"\n") + 1
846 if pos == 0:
847 # no newline found.
848 while True:
849 buf = self.fileobj.read(self.blocksize)
850 self.buffer += buf
851 if not buf or b"\n" in buf:
852 pos = self.buffer.find(b"\n") + 1
853 if pos == 0:
854 # no newline found.
855 pos = len(self.buffer)
856 break
857
858 if size != -1:
859 pos = min(size, pos)
860
861 buf = self.buffer[:pos]
862 self.buffer = self.buffer[pos:]
863 self.position += len(buf)
864 return buf
865
866 def readlines(self):
867 """Return a list with all remaining lines.
868 """
869 result = []
870 while True:
871 line = self.readline()
872 if not line: break
873 result.append(line)
874 return result
875
876 def tell(self):
877 """Return the current file position.
878 """
879 if self.closed:
880 raise ValueError("I/O operation on closed file")
881
882 return self.position
883
884 def seek(self, pos, whence=os.SEEK_SET):
885 """Seek to a position in the file.
886 """
887 if self.closed:
888 raise ValueError("I/O operation on closed file")
889
890 if whence == os.SEEK_SET:
891 self.position = min(max(pos, 0), self.size)
892 elif whence == os.SEEK_CUR:
893 if pos < 0:
894 self.position = max(self.position + pos, 0)
895 else:
896 self.position = min(self.position + pos, self.size)
897 elif whence == os.SEEK_END:
898 self.position = max(min(self.size + pos, self.size), 0)
899 else:
900 raise ValueError("Invalid argument")
901
902 self.buffer = b""
903 self.fileobj.seek(self.position)
904
905 def close(self):
906 """Close the file object.
907 """
908 self.closed = True
909
910 def __iter__(self):
911 """Get an iterator over the file's lines.
912 """
913 while True:
914 line = self.readline()
915 if not line:
916 break
917 yield line
918#class ExFileObject
919
920#------------------
921# Exported Classes
922#------------------
923class TarInfo(object):
924 """Informational class which holds the details about an
925 archive member given by a tar header block.
926 TarInfo objects are returned by TarFile.getmember(),
927 TarFile.getmembers() and TarFile.gettarinfo() and are
928 usually created internally.
929 """
930
931 __slots__ = ("name", "mode", "uid", "gid", "size", "mtime",
932 "chksum", "type", "linkname", "uname", "gname",
933 "devmajor", "devminor",
934 "offset", "offset_data", "pax_headers", "sparse",
935 "tarfile", "_sparse_structs", "_link_target")
936
937 def __init__(self, name=""):
938 """Construct a TarInfo object. name is the optional name
939 of the member.
940 """
941 self.name = name # member name
942 self.mode = 0o644 # file permissions
943 self.uid = 0 # user id
944 self.gid = 0 # group id
945 self.size = 0 # file size
946 self.mtime = 0 # modification time
947 self.chksum = 0 # header checksum
948 self.type = REGTYPE # member type
949 self.linkname = "" # link name
950 self.uname = "" # user name
951 self.gname = "" # group name
952 self.devmajor = 0 # device major number
953 self.devminor = 0 # device minor number
954
955 self.offset = 0 # the tar header starts here
956 self.offset_data = 0 # the file's data starts here
957
958 self.sparse = None # sparse member information
959 self.pax_headers = {} # pax header information
960
961 # In pax headers the "name" and "linkname" field are called
962 # "path" and "linkpath".
963 def _getpath(self):
964 return self.name
965 def _setpath(self, name):
966 self.name = name
967 path = property(_getpath, _setpath)
968
969 def _getlinkpath(self):
970 return self.linkname
971 def _setlinkpath(self, linkname):
972 self.linkname = linkname
973 linkpath = property(_getlinkpath, _setlinkpath)
974
975 def __repr__(self):
976 return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self))
977
978 def get_info(self):
979 """Return the TarInfo's attributes as a dictionary.
980 """
981 info = {
982 "name": self.name,
983 "mode": self.mode & 0o7777,
984 "uid": self.uid,
985 "gid": self.gid,
986 "size": self.size,
987 "mtime": self.mtime,
988 "chksum": self.chksum,
989 "type": self.type,
990 "linkname": self.linkname,
991 "uname": self.uname,
992 "gname": self.gname,
993 "devmajor": self.devmajor,
994 "devminor": self.devminor
995 }
996
997 if info["type"] == DIRTYPE and not info["name"].endswith("/"):
998 info["name"] += "/"
999
1000 return info
1001
1002 def tobuf(self, format=DEFAULT_FORMAT, encoding=ENCODING, errors="surrogateescape"):
1003 """Return a tar header as a string of 512 byte blocks.
1004 """
1005 info = self.get_info()
1006
1007 if format == USTAR_FORMAT:
1008 return self.create_ustar_header(info, encoding, errors)
1009 elif format == GNU_FORMAT:
1010 return self.create_gnu_header(info, encoding, errors)
1011 elif format == PAX_FORMAT:
1012 return self.create_pax_header(info, encoding)
1013 else:
1014 raise ValueError("invalid format")
1015
1016 def create_ustar_header(self, info, encoding, errors):
1017 """Return the object as a ustar header block.
1018 """
1019 info["magic"] = POSIX_MAGIC
1020
1021 if len(info["linkname"]) > LENGTH_LINK:
1022 raise ValueError("linkname is too long")
1023
1024 if len(info["name"]) > LENGTH_NAME:
1025 info["prefix"], info["name"] = self._posix_split_name(info["name"])
1026
1027 return self._create_header(info, USTAR_FORMAT, encoding, errors)
1028
1029 def create_gnu_header(self, info, encoding, errors):
1030 """Return the object as a GNU header block sequence.
1031 """
1032 info["magic"] = GNU_MAGIC
1033
1034 buf = b""
1035 if len(info["linkname"]) > LENGTH_LINK:
1036 buf += self._create_gnu_long_header(info["linkname"], GNUTYPE_LONGLINK, encoding, errors)
1037
1038 if len(info["name"]) > LENGTH_NAME:
1039 buf += self._create_gnu_long_header(info["name"], GNUTYPE_LONGNAME, encoding, errors)
1040
1041 return buf + self._create_header(info, GNU_FORMAT, encoding, errors)
1042
1043 def create_pax_header(self, info, encoding):
1044 """Return the object as a ustar header block. If it cannot be
1045 represented this way, prepend a pax extended header sequence
1046 with supplement information.
1047 """
1048 info["magic"] = POSIX_MAGIC
1049 pax_headers = self.pax_headers.copy()
1050
1051 # Test string fields for values that exceed the field length or cannot
1052 # be represented in ASCII encoding.
1053 for name, hname, length in (
1054 ("name", "path", LENGTH_NAME), ("linkname", "linkpath", LENGTH_LINK),
1055 ("uname", "uname", 32), ("gname", "gname", 32)):
1056
1057 if hname in pax_headers:
1058 # The pax header has priority.
1059 continue
1060
1061 # Try to encode the string as ASCII.
1062 try:
1063 info[name].encode("ascii", "strict")
1064 except UnicodeEncodeError:
1065 pax_headers[hname] = info[name]
1066 continue
1067
1068 if len(info[name]) > length:
1069 pax_headers[hname] = info[name]
1070
1071 # Test number fields for values that exceed the field limit or values
1072 # that like to be stored as float.
1073 for name, digits in (("uid", 8), ("gid", 8), ("size", 12), ("mtime", 12)):
1074 if name in pax_headers:
1075 # The pax header has priority. Avoid overflow.
1076 info[name] = 0
1077 continue
1078
1079 val = info[name]
1080 if not 0 <= val < 8 ** (digits - 1) or isinstance(val, float):
1081 pax_headers[name] = str(val)
1082 info[name] = 0
1083
1084 # Create a pax extended header if necessary.
1085 if pax_headers:
1086 buf = self._create_pax_generic_header(pax_headers, XHDTYPE, encoding)
1087 else:
1088 buf = b""
1089
1090 return buf + self._create_header(info, USTAR_FORMAT, "ascii", "replace")
1091
1092 @classmethod
1093 def create_pax_global_header(cls, pax_headers):
1094 """Return the object as a pax global header block sequence.
1095 """
1096 return cls._create_pax_generic_header(pax_headers, XGLTYPE, "utf8")
1097
1098 def _posix_split_name(self, name):
1099 """Split a name longer than 100 chars into a prefix
1100 and a name part.
1101 """
1102 prefix = name[:LENGTH_PREFIX + 1]
1103 while prefix and prefix[-1] != "/":
1104 prefix = prefix[:-1]
1105
1106 name = name[len(prefix):]
1107 prefix = prefix[:-1]
1108
1109 if not prefix or len(name) > LENGTH_NAME:
1110 raise ValueError("name is too long")
1111 return prefix, name
1112
1113 @staticmethod
1114 def _create_header(info, format, encoding, errors):
1115 """Return a header block. info is a dictionary with file
1116 information, format must be one of the *_FORMAT constants.
1117 """
1118 parts = [
1119 stn(info.get("name", ""), 100, encoding, errors),
1120 itn(info.get("mode", 0) & 0o7777, 8, format),
1121 itn(info.get("uid", 0), 8, format),
1122 itn(info.get("gid", 0), 8, format),
1123 itn(info.get("size", 0), 12, format),
1124 itn(info.get("mtime", 0), 12, format),
1125 b" ", # checksum field
1126 info.get("type", REGTYPE),
1127 stn(info.get("linkname", ""), 100, encoding, errors),
1128 info.get("magic", POSIX_MAGIC),
1129 stn(info.get("uname", ""), 32, encoding, errors),
1130 stn(info.get("gname", ""), 32, encoding, errors),
1131 itn(info.get("devmajor", 0), 8, format),
1132 itn(info.get("devminor", 0), 8, format),
1133 stn(info.get("prefix", ""), 155, encoding, errors)
1134 ]
1135
1136 buf = struct.pack("%ds" % BLOCKSIZE, b"".join(parts))
1137 chksum = calc_chksums(buf[-BLOCKSIZE:])[0]
1138 buf = buf[:-364] + ("%06o\0" % chksum).encode("ascii") + buf[-357:]
1139 return buf
1140
1141 @staticmethod
1142 def _create_payload(payload):
1143 """Return the string payload filled with zero bytes
1144 up to the next 512 byte border.
1145 """
1146 blocks, remainder = divmod(len(payload), BLOCKSIZE)
1147 if remainder > 0:
1148 payload += (BLOCKSIZE - remainder) * NUL
1149 return payload
1150
1151 @classmethod
1152 def _create_gnu_long_header(cls, name, type, encoding, errors):
1153 """Return a GNUTYPE_LONGNAME or GNUTYPE_LONGLINK sequence
1154 for name.
1155 """
1156 name = name.encode(encoding, errors) + NUL
1157
1158 info = {}
1159 info["name"] = "././@LongLink"
1160 info["type"] = type
1161 info["size"] = len(name)
1162 info["magic"] = GNU_MAGIC
1163
1164 # create extended header + name blocks.
1165 return cls._create_header(info, USTAR_FORMAT, encoding, errors) + \
1166 cls._create_payload(name)
1167
1168 @classmethod
1169 def _create_pax_generic_header(cls, pax_headers, type, encoding):
1170 """Return a POSIX.1-2008 extended or global header sequence
1171 that contains a list of keyword, value pairs. The values
1172 must be strings.
1173 """
1174 # Check if one of the fields contains surrogate characters and thereby
1175 # forces hdrcharset=BINARY, see _proc_pax() for more information.
1176 binary = False
1177 for keyword, value in pax_headers.items():
1178 try:
1179 value.encode("utf8", "strict")
1180 except UnicodeEncodeError:
1181 binary = True
1182 break
1183
1184 records = b""
1185 if binary:
1186 # Put the hdrcharset field at the beginning of the header.
1187 records += b"21 hdrcharset=BINARY\n"
1188
1189 for keyword, value in pax_headers.items():
1190 keyword = keyword.encode("utf8")
1191 if binary:
1192 # Try to restore the original byte representation of `value'.
1193 # Needless to say, that the encoding must match the string.
1194 value = value.encode(encoding, "surrogateescape")
1195 else:
1196 value = value.encode("utf8")
1197
1198 l = len(keyword) + len(value) + 3 # ' ' + '=' + '\n'
1199 n = p = 0
1200 while True:
1201 n = l + len(str(p))
1202 if n == p:
1203 break
1204 p = n
1205 records += bytes(str(p), "ascii") + b" " + keyword + b"=" + value + b"\n"
1206
1207 # We use a hardcoded "././@PaxHeader" name like star does
1208 # instead of the one that POSIX recommends.
1209 info = {}
1210 info["name"] = "././@PaxHeader"
1211 info["type"] = type
1212 info["size"] = len(records)
1213 info["magic"] = POSIX_MAGIC
1214
1215 # Create pax header + record blocks.
1216 return cls._create_header(info, USTAR_FORMAT, "ascii", "replace") + \
1217 cls._create_payload(records)
1218
1219 @classmethod
1220 def frombuf(cls, buf, encoding, errors):
1221 """Construct a TarInfo object from a 512 byte bytes object.
1222 """
1223 if len(buf) == 0:
1224 raise EmptyHeaderError("empty header")
1225 if len(buf) != BLOCKSIZE:
1226 raise TruncatedHeaderError("truncated header")
1227 if buf.count(NUL) == BLOCKSIZE:
1228 raise EOFHeaderError("end of file header")
1229
1230 chksum = nti(buf[148:156])
1231 if chksum not in calc_chksums(buf):
1232 raise InvalidHeaderError("bad checksum")
1233
1234 obj = cls()
1235 obj.name = nts(buf[0:100], encoding, errors)
1236 obj.mode = nti(buf[100:108])
1237 obj.uid = nti(buf[108:116])
1238 obj.gid = nti(buf[116:124])
1239 obj.size = nti(buf[124:136])
1240 obj.mtime = nti(buf[136:148])
1241 obj.chksum = chksum
1242 obj.type = buf[156:157]
1243 obj.linkname = nts(buf[157:257], encoding, errors)
1244 obj.uname = nts(buf[265:297], encoding, errors)
1245 obj.gname = nts(buf[297:329], encoding, errors)
1246 obj.devmajor = nti(buf[329:337])
1247 obj.devminor = nti(buf[337:345])
1248 prefix = nts(buf[345:500], encoding, errors)
1249
1250 # Old V7 tar format represents a directory as a regular
1251 # file with a trailing slash.
1252 if obj.type == AREGTYPE and obj.name.endswith("/"):
1253 obj.type = DIRTYPE
1254
1255 # The old GNU sparse format occupies some of the unused
1256 # space in the buffer for up to 4 sparse structures.
1257 # Save the them for later processing in _proc_sparse().
1258 if obj.type == GNUTYPE_SPARSE:
1259 pos = 386
1260 structs = []
1261 for i in range(4):
1262 try:
1263 offset = nti(buf[pos:pos + 12])
1264 numbytes = nti(buf[pos + 12:pos + 24])
1265 except ValueError:
1266 break
1267 structs.append((offset, numbytes))
1268 pos += 24
1269 isextended = bool(buf[482])
1270 origsize = nti(buf[483:495])
1271 obj._sparse_structs = (structs, isextended, origsize)
1272
1273 # Remove redundant slashes from directories.
1274 if obj.isdir():
1275 obj.name = obj.name.rstrip("/")
1276
1277 # Reconstruct a ustar longname.
1278 if prefix and obj.type not in GNU_TYPES:
1279 obj.name = prefix + "/" + obj.name
1280 return obj
1281
1282 @classmethod
1283 def fromtarfile(cls, tarfile):
1284 """Return the next TarInfo object from TarFile object
1285 tarfile.
1286 """
1287 buf = tarfile.fileobj.read(BLOCKSIZE)
1288 obj = cls.frombuf(buf, tarfile.encoding, tarfile.errors)
1289 obj.offset = tarfile.fileobj.tell() - BLOCKSIZE
1290 return obj._proc_member(tarfile)
1291
1292 #--------------------------------------------------------------------------
1293 # The following are methods that are called depending on the type of a
1294 # member. The entry point is _proc_member() which can be overridden in a
1295 # subclass to add custom _proc_*() methods. A _proc_*() method MUST
1296 # implement the following
1297 # operations:
1298 # 1. Set self.offset_data to the position where the data blocks begin,
1299 # if there is data that follows.
1300 # 2. Set tarfile.offset to the position where the next member's header will
1301 # begin.
1302 # 3. Return self or another valid TarInfo object.
1303 def _proc_member(self, tarfile):
1304 """Choose the right processing method depending on
1305 the type and call it.
1306 """
1307 if self.type in (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK):
1308 return self._proc_gnulong(tarfile)
1309 elif self.type == GNUTYPE_SPARSE:
1310 return self._proc_sparse(tarfile)
1311 elif self.type in (XHDTYPE, XGLTYPE, SOLARIS_XHDTYPE):
1312 return self._proc_pax(tarfile)
1313 else:
1314 return self._proc_builtin(tarfile)
1315
1316 def _proc_builtin(self, tarfile):
1317 """Process a builtin type or an unknown type which
1318 will be treated as a regular file.
1319 """
1320 self.offset_data = tarfile.fileobj.tell()
1321 offset = self.offset_data
1322 if self.isreg() or self.type not in SUPPORTED_TYPES:
1323 # Skip the following data blocks.
1324 offset += self._block(self.size)
1325 tarfile.offset = offset
1326
1327 # Patch the TarInfo object with saved global
1328 # header information.
1329 self._apply_pax_info(tarfile.pax_headers, tarfile.encoding, tarfile.errors)
1330
1331 return self
1332
1333 def _proc_gnulong(self, tarfile):
1334 """Process the blocks that hold a GNU longname
1335 or longlink member.
1336 """
1337 buf = tarfile.fileobj.read(self._block(self.size))
1338
1339 # Fetch the next header and process it.
1340 try:
1341 next = self.fromtarfile(tarfile)
1342 except HeaderError:
1343 raise SubsequentHeaderError("missing or bad subsequent header")
1344
1345 # Patch the TarInfo object from the next header with
1346 # the longname information.
1347 next.offset = self.offset
1348 if self.type == GNUTYPE_LONGNAME:
1349 next.name = nts(buf, tarfile.encoding, tarfile.errors)
1350 elif self.type == GNUTYPE_LONGLINK:
1351 next.linkname = nts(buf, tarfile.encoding, tarfile.errors)
1352
1353 return next
1354
1355 def _proc_sparse(self, tarfile):
1356 """Process a GNU sparse header plus extra headers.
1357 """
1358 # We already collected some sparse structures in frombuf().
1359 structs, isextended, origsize = self._sparse_structs
1360 del self._sparse_structs
1361
1362 # Collect sparse structures from extended header blocks.
1363 while isextended:
1364 buf = tarfile.fileobj.read(BLOCKSIZE)
1365 pos = 0
1366 for i in range(21):
1367 try:
1368 offset = nti(buf[pos:pos + 12])
1369 numbytes = nti(buf[pos + 12:pos + 24])
1370 except ValueError:
1371 break
1372 if offset and numbytes:
1373 structs.append((offset, numbytes))
1374 pos += 24
1375 isextended = bool(buf[504])
1376 self.sparse = structs
1377
1378 self.offset_data = tarfile.fileobj.tell()
1379 tarfile.offset = self.offset_data + self._block(self.size)
1380 self.size = origsize
1381 return self
1382
1383 def _proc_pax(self, tarfile):
1384 """Process an extended or global header as described in
1385 POSIX.1-2008.
1386 """
1387 # Read the header information.
1388 buf = tarfile.fileobj.read(self._block(self.size))
1389
1390 # A pax header stores supplemental information for either
1391 # the following file (extended) or all following files
1392 # (global).
1393 if self.type == XGLTYPE:
1394 pax_headers = tarfile.pax_headers
1395 else:
1396 pax_headers = tarfile.pax_headers.copy()
1397
1398 # Check if the pax header contains a hdrcharset field. This tells us
1399 # the encoding of the path, linkpath, uname and gname fields. Normally,
1400 # these fields are UTF-8 encoded but since POSIX.1-2008 tar
1401 # implementations are allowed to store them as raw binary strings if
1402 # the translation to UTF-8 fails.
1403 match = re.search(br"\d+ hdrcharset=([^\n]+)\n", buf)
1404 if match is not None:
1405 pax_headers["hdrcharset"] = match.group(1).decode("utf8")
1406
1407 # For the time being, we don't care about anything other than "BINARY".
1408 # The only other value that is currently allowed by the standard is
1409 # "ISO-IR 10646 2000 UTF-8" in other words UTF-8.
1410 hdrcharset = pax_headers.get("hdrcharset")
1411 if hdrcharset == "BINARY":
1412 encoding = tarfile.encoding
1413 else:
1414 encoding = "utf8"
1415
1416 # Parse pax header information. A record looks like that:
1417 # "%d %s=%s\n" % (length, keyword, value). length is the size
1418 # of the complete record including the length field itself and
1419 # the newline. keyword and value are both UTF-8 encoded strings.
1420 regex = re.compile(br"(\d+) ([^=]+)=")
1421 pos = 0
1422 while True:
1423 match = regex.match(buf, pos)
1424 if not match:
1425 break
1426
1427 length, keyword = match.groups()
1428 length = int(length)
1429 value = buf[match.end(2) + 1:match.start(1) + length - 1]
1430
1431 # Normally, we could just use "utf8" as the encoding and "strict"
1432 # as the error handler, but we better not take the risk. For
1433 # example, GNU tar <= 1.23 is known to store filenames it cannot
1434 # translate to UTF-8 as raw strings (unfortunately without a
1435 # hdrcharset=BINARY header).
1436 # We first try the strict standard encoding, and if that fails we
1437 # fall back on the user's encoding and error handler.
1438 keyword = self._decode_pax_field(keyword, "utf8", "utf8",
1439 tarfile.errors)
1440 if keyword in PAX_NAME_FIELDS:
1441 value = self._decode_pax_field(value, encoding, tarfile.encoding,
1442 tarfile.errors)
1443 else:
1444 value = self._decode_pax_field(value, "utf8", "utf8",
1445 tarfile.errors)
1446
1447 pax_headers[keyword] = value
1448 pos += length
1449
1450 # Fetch the next header.
1451 try:
1452 next = self.fromtarfile(tarfile)
1453 except HeaderError:
1454 raise SubsequentHeaderError("missing or bad subsequent header")
1455
1456 # Process GNU sparse information.
1457 if "GNU.sparse.map" in pax_headers:
1458 # GNU extended sparse format version 0.1.
1459 self._proc_gnusparse_01(next, pax_headers)
1460
1461 elif "GNU.sparse.size" in pax_headers:
1462 # GNU extended sparse format version 0.0.
1463 self._proc_gnusparse_00(next, pax_headers, buf)
1464
1465 elif pax_headers.get("GNU.sparse.major") == "1" and pax_headers.get("GNU.sparse.minor") == "0":
1466 # GNU extended sparse format version 1.0.
1467 self._proc_gnusparse_10(next, pax_headers, tarfile)
1468
1469 if self.type in (XHDTYPE, SOLARIS_XHDTYPE):
1470 # Patch the TarInfo object with the extended header info.
1471 next._apply_pax_info(pax_headers, tarfile.encoding, tarfile.errors)
1472 next.offset = self.offset
1473
1474 if "size" in pax_headers:
1475 # If the extended header replaces the size field,
1476 # we need to recalculate the offset where the next
1477 # header starts.
1478 offset = next.offset_data
1479 if next.isreg() or next.type not in SUPPORTED_TYPES:
1480 offset += next._block(next.size)
1481 tarfile.offset = offset
1482
1483 return next
1484
1485 def _proc_gnusparse_00(self, next, pax_headers, buf):
1486 """Process a GNU tar extended sparse header, version 0.0.
1487 """
1488 offsets = []
1489 for match in re.finditer(br"\d+ GNU.sparse.offset=(\d+)\n", buf):
1490 offsets.append(int(match.group(1)))
1491 numbytes = []
1492 for match in re.finditer(br"\d+ GNU.sparse.numbytes=(\d+)\n", buf):
1493 numbytes.append(int(match.group(1)))
1494 next.sparse = list(zip(offsets, numbytes))
1495
1496 def _proc_gnusparse_01(self, next, pax_headers):
1497 """Process a GNU tar extended sparse header, version 0.1.
1498 """
1499 sparse = [int(x) for x in pax_headers["GNU.sparse.map"].split(",")]
1500 next.sparse = list(zip(sparse[::2], sparse[1::2]))
1501
1502 def _proc_gnusparse_10(self, next, pax_headers, tarfile):
1503 """Process a GNU tar extended sparse header, version 1.0.
1504 """
1505 fields = None
1506 sparse = []
1507 buf = tarfile.fileobj.read(BLOCKSIZE)
1508 fields, buf = buf.split(b"\n", 1)
1509 fields = int(fields)
1510 while len(sparse) < fields * 2:
1511 if b"\n" not in buf:
1512 buf += tarfile.fileobj.read(BLOCKSIZE)
1513 number, buf = buf.split(b"\n", 1)
1514 sparse.append(int(number))
1515 next.offset_data = tarfile.fileobj.tell()
1516 next.sparse = list(zip(sparse[::2], sparse[1::2]))
1517
1518 def _apply_pax_info(self, pax_headers, encoding, errors):
1519 """Replace fields with supplemental information from a previous
1520 pax extended or global header.
1521 """
1522 for keyword, value in pax_headers.items():
1523 if keyword == "GNU.sparse.name":
1524 setattr(self, "path", value)
1525 elif keyword == "GNU.sparse.size":
1526 setattr(self, "size", int(value))
1527 elif keyword == "GNU.sparse.realsize":
1528 setattr(self, "size", int(value))
1529 elif keyword in PAX_FIELDS:
1530 if keyword in PAX_NUMBER_FIELDS:
1531 try:
1532 value = PAX_NUMBER_FIELDS[keyword](value)
1533 except ValueError:
1534 value = 0
1535 if keyword == "path":
1536 value = value.rstrip("/")
1537 setattr(self, keyword, value)
1538
1539 self.pax_headers = pax_headers.copy()
1540
1541 def _decode_pax_field(self, value, encoding, fallback_encoding, fallback_errors):
1542 """Decode a single field from a pax record.
1543 """
1544 try:
1545 return value.decode(encoding, "strict")
1546 except UnicodeDecodeError:
1547 return value.decode(fallback_encoding, fallback_errors)
1548
1549 def _block(self, count):
1550 """Round up a byte count by BLOCKSIZE and return it,
1551 e.g. _block(834) => 1024.
1552 """
1553 blocks, remainder = divmod(count, BLOCKSIZE)
1554 if remainder:
1555 blocks += 1
1556 return blocks * BLOCKSIZE
1557
1558 def isreg(self):
1559 return self.type in REGULAR_TYPES
1560 def isfile(self):
1561 return self.isreg()
1562 def isdir(self):
1563 return self.type == DIRTYPE
1564 def issym(self):
1565 return self.type == SYMTYPE
1566 def islnk(self):
1567 return self.type == LNKTYPE
1568 def ischr(self):
1569 return self.type == CHRTYPE
1570 def isblk(self):
1571 return self.type == BLKTYPE
1572 def isfifo(self):
1573 return self.type == FIFOTYPE
1574 def issparse(self):
1575 return self.sparse is not None
1576 def isdev(self):
1577 return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE)
1578# class TarInfo
1579
1580class TarFile(object):
1581 """The TarFile Class provides an interface to tar archives.
1582 """
1583
1584 debug = 0 # May be set from 0 (no msgs) to 3 (all msgs)
1585
1586 dereference = False # If true, add content of linked file to the
1587 # tar file, else the link.
1588
1589 ignore_zeros = False # If true, skips empty or invalid blocks and
1590 # continues processing.
1591
1592 errorlevel = 1 # If 0, fatal errors only appear in debug
1593 # messages (if debug >= 0). If > 0, errors
1594 # are passed to the caller as exceptions.
1595
1596 format = DEFAULT_FORMAT # The format to use when creating an archive.
1597
1598 encoding = ENCODING # Encoding for 8-bit character strings.
1599
1600 errors = None # Error handler for unicode conversion.
1601
1602 tarinfo = TarInfo # The default TarInfo class to use.
1603
1604 fileobject = ExFileObject # The default ExFileObject class to use.
1605
1606 def __init__(self, name=None, mode="r", fileobj=None, format=None,
1607 tarinfo=None, dereference=None, ignore_zeros=None, encoding=None,
1608 errors="surrogateescape", pax_headers=None, debug=None, errorlevel=None):
1609 """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
1610 read from an existing archive, 'a' to append data to an existing
1611 file or 'w' to create a new file overwriting an existing one. `mode'
1612 defaults to 'r'.
1613 If `fileobj' is given, it is used for reading or writing data. If it
1614 can be determined, `mode' is overridden by `fileobj's mode.
1615 `fileobj' is not closed, when TarFile is closed.
1616 """
1617 if len(mode) > 1 or mode not in "raw":
1618 raise ValueError("mode must be 'r', 'a' or 'w'")
1619 self.mode = mode
1620 self._mode = {"r": "rb", "a": "r+b", "w": "wb"}[mode]
1621
1622 if not fileobj:
1623 if self.mode == "a" and not os.path.exists(name):
1624 # Create nonexistent files in append mode.
1625 self.mode = "w"
1626 self._mode = "wb"
1627 fileobj = bltn_open(name, self._mode)
1628 self._extfileobj = False
1629 else:
1630 if name is None and hasattr(fileobj, "name"):
1631 name = fileobj.name
1632 if hasattr(fileobj, "mode"):
1633 self._mode = fileobj.mode
1634 self._extfileobj = True
1635 self.name = os.path.abspath(name) if name else None
1636 self.fileobj = fileobj
1637
1638 # Init attributes.
1639 if format is not None:
1640 self.format = format
1641 if tarinfo is not None:
1642 self.tarinfo = tarinfo
1643 if dereference is not None:
1644 self.dereference = dereference
1645 if ignore_zeros is not None:
1646 self.ignore_zeros = ignore_zeros
1647 if encoding is not None:
1648 self.encoding = encoding
1649 self.errors = errors
1650
1651 if pax_headers is not None and self.format == PAX_FORMAT:
1652 self.pax_headers = pax_headers
1653 else:
1654 self.pax_headers = {}
1655
1656 if debug is not None:
1657 self.debug = debug
1658 if errorlevel is not None:
1659 self.errorlevel = errorlevel
1660
1661 # Init datastructures.
1662 self.closed = False
1663 self.members = [] # list of members as TarInfo objects
1664 self._loaded = False # flag if all members have been read
1665 self.offset = self.fileobj.tell()
1666 # current position in the archive file
1667 self.inodes = {} # dictionary caching the inodes of
1668 # archive members already added
1669
1670 try:
1671 if self.mode == "r":
1672 self.firstmember = None
1673 self.firstmember = self.next()
1674
1675 if self.mode == "a":
1676 # Move to the end of the archive,
1677 # before the first empty block.
1678 while True:
1679 self.fileobj.seek(self.offset)
1680 try:
1681 tarinfo = self.tarinfo.fromtarfile(self)
1682 self.members.append(tarinfo)
1683 except EOFHeaderError:
1684 self.fileobj.seek(self.offset)
1685 break
1686 except HeaderError as e:
1687 raise ReadError(str(e))
1688
1689 if self.mode in "aw":
1690 self._loaded = True
1691
1692 if self.pax_headers:
1693 buf = self.tarinfo.create_pax_global_header(self.pax_headers.copy())
1694 self.fileobj.write(buf)
1695 self.offset += len(buf)
1696 except:
1697 if not self._extfileobj:
1698 self.fileobj.close()
1699 self.closed = True
1700 raise
1701
1702 #--------------------------------------------------------------------------
1703 # Below are the classmethods which act as alternate constructors to the
1704 # TarFile class. The open() method is the only one that is needed for
1705 # public use; it is the "super"-constructor and is able to select an
1706 # adequate "sub"-constructor for a particular compression using the mapping
1707 # from OPEN_METH.
1708 #
1709 # This concept allows one to subclass TarFile without losing the comfort of
1710 # the super-constructor. A sub-constructor is registered and made available
1711 # by adding it to the mapping in OPEN_METH.
1712
1713 @classmethod
1714 def open(cls, name=None, mode="r", fileobj=None, bufsize=RECORDSIZE, **kwargs):
1715 """Open a tar archive for reading, writing or appending. Return
1716 an appropriate TarFile class.
1717
1718 mode:
1719 'r' or 'r:*' open for reading with transparent compression
1720 'r:' open for reading exclusively uncompressed
1721 'r:gz' open for reading with gzip compression
1722 'r:bz2' open for reading with bzip2 compression
1723 'a' or 'a:' open for appending, creating the file if necessary
1724 'w' or 'w:' open for writing without compression
1725 'w:gz' open for writing with gzip compression
1726 'w:bz2' open for writing with bzip2 compression
1727
1728 'r|*' open a stream of tar blocks with transparent compression
1729 'r|' open an uncompressed stream of tar blocks for reading
1730 'r|gz' open a gzip compressed stream of tar blocks
1731 'r|bz2' open a bzip2 compressed stream of tar blocks
1732 'w|' open an uncompressed stream for writing
1733 'w|gz' open a gzip compressed stream for writing
1734 'w|bz2' open a bzip2 compressed stream for writing
1735 """
1736
1737 if not name and not fileobj:
1738 raise ValueError("nothing to open")
1739
1740 if mode in ("r", "r:*"):
1741 # Find out which *open() is appropriate for opening the file.
1742 for comptype in cls.OPEN_METH:
1743 func = getattr(cls, cls.OPEN_METH[comptype])
1744 if fileobj is not None:
1745 saved_pos = fileobj.tell()
1746 try:
1747 return func(name, "r", fileobj, **kwargs)
1748 except (ReadError, CompressionError) as e:
1749 if fileobj is not None:
1750 fileobj.seek(saved_pos)
1751 continue
1752 raise ReadError("file could not be opened successfully")
1753
1754 elif ":" in mode:
1755 filemode, comptype = mode.split(":", 1)
1756 filemode = filemode or "r"
1757 comptype = comptype or "tar"
1758
1759 # Select the *open() function according to
1760 # given compression.
1761 if comptype in cls.OPEN_METH:
1762 func = getattr(cls, cls.OPEN_METH[comptype])
1763 else:
1764 raise CompressionError("unknown compression type %r" % comptype)
1765 return func(name, filemode, fileobj, **kwargs)
1766
1767 elif "|" in mode:
1768 filemode, comptype = mode.split("|", 1)
1769 filemode = filemode or "r"
1770 comptype = comptype or "tar"
1771
1772 if filemode not in "rw":
1773 raise ValueError("mode must be 'r' or 'w'")
1774
1775 stream = _Stream(name, filemode, comptype, fileobj, bufsize)
1776 try:
1777 t = cls(name, filemode, stream, **kwargs)
1778 except:
1779 stream.close()
1780 raise
1781 t._extfileobj = False
1782 return t
1783
1784 elif mode in "aw":
1785 return cls.taropen(name, mode, fileobj, **kwargs)
1786
1787 raise ValueError("undiscernible mode")
1788
1789 @classmethod
1790 def taropen(cls, name, mode="r", fileobj=None, **kwargs):
1791 """Open uncompressed tar archive name for reading or writing.
1792 """
1793 if len(mode) > 1 or mode not in "raw":
1794 raise ValueError("mode must be 'r', 'a' or 'w'")
1795 return cls(name, mode, fileobj, **kwargs)
1796
1797 @classmethod
1798 def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
1799 """Open gzip compressed tar archive name for reading or writing.
1800 Appending is not allowed.
1801 """
1802 if len(mode) > 1 or mode not in "rw":
1803 raise ValueError("mode must be 'r' or 'w'")
1804
1805 try:
1806 import gzip
1807 gzip.GzipFile
1808 except (ImportError, AttributeError):
1809 raise CompressionError("gzip module is not available")
1810
1811 extfileobj = fileobj is not None
1812 try:
1813 fileobj = gzip.GzipFile(name, mode + "b", compresslevel, fileobj)
1814 t = cls.taropen(name, mode, fileobj, **kwargs)
1815 except IOError:
1816 if not extfileobj and fileobj is not None:
1817 fileobj.close()
1818 if fileobj is None:
1819 raise
1820 raise ReadError("not a gzip file")
1821 except:
1822 if not extfileobj and fileobj is not None:
1823 fileobj.close()
1824 raise
1825 t._extfileobj = extfileobj
1826 return t
1827
1828 @classmethod
1829 def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
1830 """Open bzip2 compressed tar archive name for reading or writing.
1831 Appending is not allowed.
1832 """
1833 if len(mode) > 1 or mode not in "rw":
1834 raise ValueError("mode must be 'r' or 'w'.")
1835
1836 try:
1837 import bz2
1838 except ImportError:
1839 raise CompressionError("bz2 module is not available")
1840
1841 if fileobj is not None:
1842 fileobj = _BZ2Proxy(fileobj, mode)
1843 else:
1844 fileobj = bz2.BZ2File(name, mode, compresslevel=compresslevel)
1845
1846 try:
1847 t = cls.taropen(name, mode, fileobj, **kwargs)
1848 except (IOError, EOFError):
1849 fileobj.close()
1850 raise ReadError("not a bzip2 file")
1851 t._extfileobj = False
1852 return t
1853
1854 # All *open() methods are registered here.
1855 OPEN_METH = {
1856 "tar": "taropen", # uncompressed tar
1857 "gz": "gzopen", # gzip compressed tar
1858 "bz2": "bz2open" # bzip2 compressed tar
1859 }
1860
1861 #--------------------------------------------------------------------------
1862 # The public methods which TarFile provides:
1863
1864 def close(self):
1865 """Close the TarFile. In write-mode, two finishing zero blocks are
1866 appended to the archive.
1867 """
1868 if self.closed:
1869 return
1870
1871 if self.mode in "aw":
1872 self.fileobj.write(NUL * (BLOCKSIZE * 2))
1873 self.offset += (BLOCKSIZE * 2)
1874 # fill up the end with zero-blocks
1875 # (like option -b20 for tar does)
1876 blocks, remainder = divmod(self.offset, RECORDSIZE)
1877 if remainder > 0:
1878 self.fileobj.write(NUL * (RECORDSIZE - remainder))
1879
1880 if not self._extfileobj:
1881 self.fileobj.close()
1882 self.closed = True
1883
1884 def getmember(self, name):
1885 """Return a TarInfo object for member `name'. If `name' can not be
1886 found in the archive, KeyError is raised. If a member occurs more
1887 than once in the archive, its last occurrence is assumed to be the
1888 most up-to-date version.
1889 """
1890 tarinfo = self._getmember(name)
1891 if tarinfo is None:
1892 raise KeyError("filename %r not found" % name)
1893 return tarinfo
1894
1895 def getmembers(self):
1896 """Return the members of the archive as a list of TarInfo objects. The
1897 list has the same order as the members in the archive.
1898 """
1899 self._check()
1900 if not self._loaded: # if we want to obtain a list of
1901 self._load() # all members, we first have to
1902 # scan the whole archive.
1903 return self.members
1904
1905 def getnames(self):
1906 """Return the members of the archive as a list of their names. It has
1907 the same order as the list returned by getmembers().
1908 """
1909 return [tarinfo.name for tarinfo in self.getmembers()]
1910
1911 def gettarinfo(self, name=None, arcname=None, fileobj=None):
1912 """Create a TarInfo object for either the file `name' or the file
1913 object `fileobj' (using os.fstat on its file descriptor). You can
1914 modify some of the TarInfo's attributes before you add it using
1915 addfile(). If given, `arcname' specifies an alternative name for the
1916 file in the archive.
1917 """
1918 self._check("aw")
1919
1920 # When fileobj is given, replace name by
1921 # fileobj's real name.
1922 if fileobj is not None:
1923 name = fileobj.name
1924
1925 # Building the name of the member in the archive.
1926 # Backward slashes are converted to forward slashes,
1927 # Absolute paths are turned to relative paths.
1928 if arcname is None:
1929 arcname = name
1930 drv, arcname = os.path.splitdrive(arcname)
1931 arcname = arcname.replace(os.sep, "/")
1932 arcname = arcname.lstrip("/")
1933
1934 # Now, fill the TarInfo object with
1935 # information specific for the file.
1936 tarinfo = self.tarinfo()
1937 tarinfo.tarfile = self
1938
1939 # Use os.stat or os.lstat, depending on platform
1940 # and if symlinks shall be resolved.
1941 if fileobj is None:
1942 if hasattr(os, "lstat") and not self.dereference:
1943 statres = os.lstat(name)
1944 else:
1945 statres = os.stat(name)
1946 else:
1947 statres = os.fstat(fileobj.fileno())
1948 linkname = ""
1949
1950 stmd = statres.st_mode
1951 if stat.S_ISREG(stmd):
1952 inode = (statres.st_ino, statres.st_dev)
1953 if not self.dereference and statres.st_nlink > 1 and \
1954 inode in self.inodes and arcname != self.inodes[inode]:
1955 # Is it a hardlink to an already
1956 # archived file?
1957 type = LNKTYPE
1958 linkname = self.inodes[inode]
1959 else:
1960 # The inode is added only if its valid.
1961 # For win32 it is always 0.
1962 type = REGTYPE
1963 if inode[0]:
1964 self.inodes[inode] = arcname
1965 elif stat.S_ISDIR(stmd):
1966 type = DIRTYPE
1967 elif stat.S_ISFIFO(stmd):
1968 type = FIFOTYPE
1969 elif stat.S_ISLNK(stmd):
1970 type = SYMTYPE
1971 linkname = os.readlink(name)
1972 elif stat.S_ISCHR(stmd):
1973 type = CHRTYPE
1974 elif stat.S_ISBLK(stmd):
1975 type = BLKTYPE
1976 else:
1977 return None
1978
1979 # Fill the TarInfo object with all
1980 # information we can get.
1981 tarinfo.name = arcname
1982 tarinfo.mode = stmd
1983 tarinfo.uid = statres.st_uid
1984 tarinfo.gid = statres.st_gid
1985 if type == REGTYPE:
1986 tarinfo.size = statres.st_size
1987 else:
1988 tarinfo.size = 0
1989 tarinfo.mtime = statres.st_mtime
1990 tarinfo.type = type
1991 tarinfo.linkname = linkname
1992 if pwd:
1993 try:
1994 tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0]
1995 except KeyError:
1996 pass
1997 if grp:
1998 try:
1999 tarinfo.gname = grp.getgrgid(tarinfo.gid)[0]
2000 except KeyError:
2001 pass
2002
2003 if type in (CHRTYPE, BLKTYPE):
2004 if hasattr(os, "major") and hasattr(os, "minor"):
2005 tarinfo.devmajor = os.major(statres.st_rdev)
2006 tarinfo.devminor = os.minor(statres.st_rdev)
2007 return tarinfo
2008
2009 def list(self, verbose=True):
2010 """Print a table of contents to sys.stdout. If `verbose' is False, only
2011 the names of the members are printed. If it is True, an `ls -l'-like
2012 output is produced.
2013 """
2014 self._check()
2015
2016 for tarinfo in self:
2017 if verbose:
2018 print(filemode(tarinfo.mode), end=' ')
2019 print("%s/%s" % (tarinfo.uname or tarinfo.uid,
2020 tarinfo.gname or tarinfo.gid), end=' ')
2021 if tarinfo.ischr() or tarinfo.isblk():
2022 print("%10s" % ("%d,%d" \
2023 % (tarinfo.devmajor, tarinfo.devminor)), end=' ')
2024 else:
2025 print("%10d" % tarinfo.size, end=' ')
2026 print("%d-%02d-%02d %02d:%02d:%02d" \
2027 % time.localtime(tarinfo.mtime)[:6], end=' ')
2028
2029 print(tarinfo.name + ("/" if tarinfo.isdir() else ""), end=' ')
2030
2031 if verbose:
2032 if tarinfo.issym():
2033 print("->", tarinfo.linkname, end=' ')
2034 if tarinfo.islnk():
2035 print("link to", tarinfo.linkname, end=' ')
2036 print()
2037
2038 def add(self, name, arcname=None, recursive=True, exclude=None, filter=None):
2039 """Add the file `name' to the archive. `name' may be any type of file
2040 (directory, fifo, symbolic link, etc.). If given, `arcname'
2041 specifies an alternative name for the file in the archive.
2042 Directories are added recursively by default. This can be avoided by
2043 setting `recursive' to False. `exclude' is a function that should
2044 return True for each filename to be excluded. `filter' is a function
2045 that expects a TarInfo object argument and returns the changed
2046 TarInfo object, if it returns None the TarInfo object will be
2047 excluded from the archive.
2048 """
2049 self._check("aw")
2050
2051 if arcname is None:
2052 arcname = name
2053
2054 # Exclude pathnames.
2055 if exclude is not None:
2056 import warnings
2057 warnings.warn("use the filter argument instead",
2058 DeprecationWarning, 2)
2059 if exclude(name):
2060 self._dbg(2, "tarfile: Excluded %r" % name)
2061 return
2062
2063 # Skip if somebody tries to archive the archive...
2064 if self.name is not None and os.path.abspath(name) == self.name:
2065 self._dbg(2, "tarfile: Skipped %r" % name)
2066 return
2067
2068 self._dbg(1, name)
2069
2070 # Create a TarInfo object from the file.
2071 tarinfo = self.gettarinfo(name, arcname)
2072
2073 if tarinfo is None:
2074 self._dbg(1, "tarfile: Unsupported type %r" % name)
2075 return
2076
2077 # Change or exclude the TarInfo object.
2078 if filter is not None:
2079 tarinfo = filter(tarinfo)
2080 if tarinfo is None:
2081 self._dbg(2, "tarfile: Excluded %r" % name)
2082 return
2083
2084 # Append the tar header and data to the archive.
2085 if tarinfo.isreg():
2086 f = bltn_open(name, "rb")
2087 self.addfile(tarinfo, f)
2088 f.close()
2089
2090 elif tarinfo.isdir():
2091 self.addfile(tarinfo)
2092 if recursive:
2093 for f in os.listdir(name):
2094 self.add(os.path.join(name, f), os.path.join(arcname, f),
2095 recursive, exclude, filter=filter)
2096
2097 else:
2098 self.addfile(tarinfo)
2099
2100 def addfile(self, tarinfo, fileobj=None):
2101 """Add the TarInfo object `tarinfo' to the archive. If `fileobj' is
2102 given, tarinfo.size bytes are read from it and added to the archive.
2103 You can create TarInfo objects using gettarinfo().
2104 On Windows platforms, `fileobj' should always be opened with mode
2105 'rb' to avoid irritation about the file size.
2106 """
2107 self._check("aw")
2108
2109 tarinfo = copy.copy(tarinfo)
2110
2111 buf = tarinfo.tobuf(self.format, self.encoding, self.errors)
2112 self.fileobj.write(buf)
2113 self.offset += len(buf)
2114
2115 # If there's data to follow, append it.
2116 if fileobj is not None:
2117 copyfileobj(fileobj, self.fileobj, tarinfo.size)
2118 blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
2119 if remainder > 0:
2120 self.fileobj.write(NUL * (BLOCKSIZE - remainder))
2121 blocks += 1
2122 self.offset += blocks * BLOCKSIZE
2123
2124 self.members.append(tarinfo)
2125
2126 def extractall(self, path=".", members=None):
2127 """Extract all members from the archive to the current working
2128 directory and set owner, modification time and permissions on
2129 directories afterwards. `path' specifies a different directory
2130 to extract to. `members' is optional and must be a subset of the
2131 list returned by getmembers().
2132 """
2133 directories = []
2134
2135 if members is None:
2136 members = self
2137
2138 for tarinfo in members:
2139 if tarinfo.isdir():
2140 # Extract directories with a safe mode.
2141 directories.append(tarinfo)
2142 tarinfo = copy.copy(tarinfo)
2143 tarinfo.mode = 0o700
2144 # Do not set_attrs directories, as we will do that further down
2145 self.extract(tarinfo, path, set_attrs=not tarinfo.isdir())
2146
2147 # Reverse sort directories.
2148 directories.sort(key=lambda a: a.name)
2149 directories.reverse()
2150
2151 # Set correct owner, mtime and filemode on directories.
2152 for tarinfo in directories:
2153 dirpath = os.path.join(path, tarinfo.name)
2154 try:
2155 self.chown(tarinfo, dirpath)
2156 self.utime(tarinfo, dirpath)
2157 self.chmod(tarinfo, dirpath)
2158 except ExtractError as e:
2159 if self.errorlevel > 1:
2160 raise
2161 else:
2162 self._dbg(1, "tarfile: %s" % e)
2163
2164 def extract(self, member, path="", set_attrs=True):
2165 """Extract a member from the archive to the current working directory,
2166 using its full name. Its file information is extracted as accurately
2167 as possible. `member' may be a filename or a TarInfo object. You can
2168 specify a different directory using `path'. File attributes (owner,
2169 mtime, mode) are set unless `set_attrs' is False.
2170 """
2171 self._check("r")
2172
2173 if isinstance(member, str):
2174 tarinfo = self.getmember(member)
2175 else:
2176 tarinfo = member
2177
2178 # Prepare the link target for makelink().
2179 if tarinfo.islnk():
2180 tarinfo._link_target = os.path.join(path, tarinfo.linkname)
2181
2182 try:
2183 self._extract_member(tarinfo, os.path.join(path, tarinfo.name),
2184 set_attrs=set_attrs)
2185 except EnvironmentError as e:
2186 if self.errorlevel > 0:
2187 raise
2188 else:
2189 if e.filename is None:
2190 self._dbg(1, "tarfile: %s" % e.strerror)
2191 else:
2192 self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename))
2193 except ExtractError as e:
2194 if self.errorlevel > 1:
2195 raise
2196 else:
2197 self._dbg(1, "tarfile: %s" % e)
2198
2199 def extractfile(self, member):
2200 """Extract a member from the archive as a file object. `member' may be
2201 a filename or a TarInfo object. If `member' is a regular file, a
2202 file-like object is returned. If `member' is a link, a file-like
2203 object is constructed from the link's target. If `member' is none of
2204 the above, None is returned.
2205 The file-like object is read-only and provides the following
2206 methods: read(), readline(), readlines(), seek() and tell()
2207 """
2208 self._check("r")
2209
2210 if isinstance(member, str):
2211 tarinfo = self.getmember(member)
2212 else:
2213 tarinfo = member
2214
2215 if tarinfo.isreg():
2216 return self.fileobject(self, tarinfo)
2217
2218 elif tarinfo.type not in SUPPORTED_TYPES:
2219 # If a member's type is unknown, it is treated as a
2220 # regular file.
2221 return self.fileobject(self, tarinfo)
2222
2223 elif tarinfo.islnk() or tarinfo.issym():
2224 if isinstance(self.fileobj, _Stream):
2225 # A small but ugly workaround for the case that someone tries
2226 # to extract a (sym)link as a file-object from a non-seekable
2227 # stream of tar blocks.
2228 raise StreamError("cannot extract (sym)link as file object")
2229 else:
2230 # A (sym)link's file object is its target's file object.
2231 return self.extractfile(self._find_link_target(tarinfo))
2232 else:
2233 # If there's no data associated with the member (directory, chrdev,
2234 # blkdev, etc.), return None instead of a file object.
2235 return None
2236
2237 def _extract_member(self, tarinfo, targetpath, set_attrs=True):
2238 """Extract the TarInfo object tarinfo to a physical
2239 file called targetpath.
2240 """
2241 # Fetch the TarInfo object for the given name
2242 # and build the destination pathname, replacing
2243 # forward slashes to platform specific separators.
2244 targetpath = targetpath.rstrip("/")
2245 targetpath = targetpath.replace("/", os.sep)
2246
2247 # Create all upper directories.
2248 upperdirs = os.path.dirname(targetpath)
2249 if upperdirs and not os.path.exists(upperdirs):
2250 # Create directories that are not part of the archive with
2251 # default permissions.
2252 os.makedirs(upperdirs)
2253
2254 if tarinfo.islnk() or tarinfo.issym():
2255 self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname))
2256 else:
2257 self._dbg(1, tarinfo.name)
2258
2259 if tarinfo.isreg():
2260 self.makefile(tarinfo, targetpath)
2261 elif tarinfo.isdir():
2262 self.makedir(tarinfo, targetpath)
2263 elif tarinfo.isfifo():
2264 self.makefifo(tarinfo, targetpath)
2265 elif tarinfo.ischr() or tarinfo.isblk():
2266 self.makedev(tarinfo, targetpath)
2267 elif tarinfo.islnk() or tarinfo.issym():
2268 self.makelink(tarinfo, targetpath)
2269 elif tarinfo.type not in SUPPORTED_TYPES:
2270 self.makeunknown(tarinfo, targetpath)
2271 else:
2272 self.makefile(tarinfo, targetpath)
2273
2274 if set_attrs:
2275 self.chown(tarinfo, targetpath)
2276 if not tarinfo.issym():
2277 self.chmod(tarinfo, targetpath)
2278 self.utime(tarinfo, targetpath)
2279
2280 #--------------------------------------------------------------------------
2281 # Below are the different file methods. They are called via
2282 # _extract_member() when extract() is called. They can be replaced in a
2283 # subclass to implement other functionality.
2284
2285 def makedir(self, tarinfo, targetpath):
2286 """Make a directory called targetpath.
2287 """
2288 try:
2289 # Use a safe mode for the directory, the real mode is set
2290 # later in _extract_member().
2291 os.mkdir(targetpath, 0o700)
2292 except EnvironmentError as e:
2293 if e.errno != errno.EEXIST:
2294 raise
2295
2296 def makefile(self, tarinfo, targetpath):
2297 """Make a file called targetpath.
2298 """
2299 source = self.fileobj
2300 source.seek(tarinfo.offset_data)
2301 target = bltn_open(targetpath, "wb")
2302 if tarinfo.sparse is not None:
2303 for offset, size in tarinfo.sparse:
2304 target.seek(offset)
2305 copyfileobj(source, target, size)
2306 else:
2307 copyfileobj(source, target, tarinfo.size)
2308 target.seek(tarinfo.size)
2309 target.truncate()
2310 target.close()
2311
2312 def makeunknown(self, tarinfo, targetpath):
2313 """Make a file from a TarInfo object with an unknown type
2314 at targetpath.
2315 """
2316 self.makefile(tarinfo, targetpath)
2317 self._dbg(1, "tarfile: Unknown file type %r, " \
2318 "extracted as regular file." % tarinfo.type)
2319
2320 def makefifo(self, tarinfo, targetpath):
2321 """Make a fifo called targetpath.
2322 """
2323 if hasattr(os, "mkfifo"):
2324 os.mkfifo(targetpath)
2325 else:
2326 raise ExtractError("fifo not supported by system")
2327
2328 def makedev(self, tarinfo, targetpath):
2329 """Make a character or block device called targetpath.
2330 """
2331 if not hasattr(os, "mknod") or not hasattr(os, "makedev"):
2332 raise ExtractError("special devices not supported by system")
2333
2334 mode = tarinfo.mode
2335 if tarinfo.isblk():
2336 mode |= stat.S_IFBLK
2337 else:
2338 mode |= stat.S_IFCHR
2339
2340 os.mknod(targetpath, mode,
2341 os.makedev(tarinfo.devmajor, tarinfo.devminor))
2342
2343 def makelink(self, tarinfo, targetpath):
2344 """Make a (symbolic) link called targetpath. If it cannot be created
2345 (platform limitation), we try to make a copy of the referenced file
2346 instead of a link.
2347 """
2348 try:
2349 # For systems that support symbolic and hard links.
2350 if tarinfo.issym():
2351 os.symlink(tarinfo.linkname, targetpath)
2352 else:
2353 # See extract().
2354 if os.path.exists(tarinfo._link_target):
2355 os.link(tarinfo._link_target, targetpath)
2356 else:
2357 self._extract_member(self._find_link_target(tarinfo),
2358 targetpath)
2359 except symlink_exception:
2360 if tarinfo.issym():
2361 linkpath = os.path.join(os.path.dirname(tarinfo.name),
2362 tarinfo.linkname)
2363 else:
2364 linkpath = tarinfo.linkname
2365 else:
2366 try:
2367 self._extract_member(self._find_link_target(tarinfo),
2368 targetpath)
2369 except KeyError:
2370 raise ExtractError("unable to resolve link inside archive")
2371
2372 def chown(self, tarinfo, targetpath):
2373 """Set owner of targetpath according to tarinfo.
2374 """
2375 if pwd and hasattr(os, "geteuid") and os.geteuid() == 0:
2376 # We have to be root to do so.
2377 try:
2378 g = grp.getgrnam(tarinfo.gname)[2]
2379 except KeyError:
2380 g = tarinfo.gid
2381 try:
2382 u = pwd.getpwnam(tarinfo.uname)[2]
2383 except KeyError:
2384 u = tarinfo.uid
2385 try:
2386 if tarinfo.issym() and hasattr(os, "lchown"):
2387 os.lchown(targetpath, u, g)
2388 else:
2389 if sys.platform != "os2emx":
2390 os.chown(targetpath, u, g)
2391 except EnvironmentError as e:
2392 raise ExtractError("could not change owner")
2393
2394 def chmod(self, tarinfo, targetpath):
2395 """Set file permissions of targetpath according to tarinfo.
2396 """
2397 if hasattr(os, 'chmod'):
2398 try:
2399 os.chmod(targetpath, tarinfo.mode)
2400 except EnvironmentError as e:
2401 raise ExtractError("could not change mode")
2402
2403 def utime(self, tarinfo, targetpath):
2404 """Set modification time of targetpath according to tarinfo.
2405 """
2406 if not hasattr(os, 'utime'):
2407 return
2408 try:
2409 os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime))
2410 except EnvironmentError as e:
2411 raise ExtractError("could not change modification time")
2412
2413 #--------------------------------------------------------------------------
2414 def next(self):
2415 """Return the next member of the archive as a TarInfo object, when
2416 TarFile is opened for reading. Return None if there is no more
2417 available.
2418 """
2419 self._check("ra")
2420 if self.firstmember is not None:
2421 m = self.firstmember
2422 self.firstmember = None
2423 return m
2424
2425 # Read the next block.
2426 self.fileobj.seek(self.offset)
2427 tarinfo = None
2428 while True:
2429 try:
2430 tarinfo = self.tarinfo.fromtarfile(self)
2431 except EOFHeaderError as e:
2432 if self.ignore_zeros:
2433 self._dbg(2, "0x%X: %s" % (self.offset, e))
2434 self.offset += BLOCKSIZE
2435 continue
2436 except InvalidHeaderError as e:
2437 if self.ignore_zeros:
2438 self._dbg(2, "0x%X: %s" % (self.offset, e))
2439 self.offset += BLOCKSIZE
2440 continue
2441 elif self.offset == 0:
2442 raise ReadError(str(e))
2443 except EmptyHeaderError:
2444 if self.offset == 0:
2445 raise ReadError("empty file")
2446 except TruncatedHeaderError as e:
2447 if self.offset == 0:
2448 raise ReadError(str(e))
2449 except SubsequentHeaderError as e:
2450 raise ReadError(str(e))
2451 break
2452
2453 if tarinfo is not None:
2454 self.members.append(tarinfo)
2455 else:
2456 self._loaded = True
2457
2458 return tarinfo
2459
2460 #--------------------------------------------------------------------------
2461 # Little helper methods:
2462
2463 def _getmember(self, name, tarinfo=None, normalize=False):
2464 """Find an archive member by name from bottom to top.
2465 If tarinfo is given, it is used as the starting point.
2466 """
2467 # Ensure that all members have been loaded.
2468 members = self.getmembers()
2469
2470 # Limit the member search list up to tarinfo.
2471 if tarinfo is not None:
2472 members = members[:members.index(tarinfo)]
2473
2474 if normalize:
2475 name = os.path.normpath(name)
2476
2477 for member in reversed(members):
2478 if normalize:
2479 member_name = os.path.normpath(member.name)
2480 else:
2481 member_name = member.name
2482
2483 if name == member_name:
2484 return member
2485
2486 def _load(self):
2487 """Read through the entire archive file and look for readable
2488 members.
2489 """
2490 while True:
2491 tarinfo = self.next()
2492 if tarinfo is None:
2493 break
2494 self._loaded = True
2495
2496 def _check(self, mode=None):
2497 """Check if TarFile is still open, and if the operation's mode
2498 corresponds to TarFile's mode.
2499 """
2500 if self.closed:
2501 raise IOError("%s is closed" % self.__class__.__name__)
2502 if mode is not None and self.mode not in mode:
2503 raise IOError("bad operation for mode %r" % self.mode)
2504
2505 def _find_link_target(self, tarinfo):
2506 """Find the target member of a symlink or hardlink member in the
2507 archive.
2508 """
2509 if tarinfo.issym():
2510 # Always search the entire archive.
2511 linkname = os.path.dirname(tarinfo.name) + "/" + tarinfo.linkname
2512 limit = None
2513 else:
2514 # Search the archive before the link, because a hard link is
2515 # just a reference to an already archived file.
2516 linkname = tarinfo.linkname
2517 limit = tarinfo
2518
2519 member = self._getmember(linkname, tarinfo=limit, normalize=True)
2520 if member is None:
2521 raise KeyError("linkname %r not found" % linkname)
2522 return member
2523
2524 def __iter__(self):
2525 """Provide an iterator object.
2526 """
2527 if self._loaded:
2528 return iter(self.members)
2529 else:
2530 return TarIter(self)
2531
2532 def _dbg(self, level, msg):
2533 """Write debugging output to sys.stderr.
2534 """
2535 if level <= self.debug:
2536 print(msg, file=sys.stderr)
2537
2538 def __enter__(self):
2539 self._check()
2540 return self
2541
2542 def __exit__(self, type, value, traceback):
2543 if type is None:
2544 self.close()
2545 else:
2546 # An exception occurred. We must not call close() because
2547 # it would try to write end-of-archive blocks and padding.
2548 if not self._extfileobj:
2549 self.fileobj.close()
2550 self.closed = True
2551# class TarFile
2552
2553class TarIter(object):
2554 """Iterator Class.
2555
2556 for tarinfo in TarFile(...):
2557 suite...
2558 """
2559
2560 def __init__(self, tarfile):
2561 """Construct a TarIter object.
2562 """
2563 self.tarfile = tarfile
2564 self.index = 0
2565 def __iter__(self):
2566 """Return iterator object.
2567 """
2568 return self
2569
2570 def __next__(self):
2571 """Return the next item using TarFile's next() method.
2572 When all members have been read, set TarFile as _loaded.
2573 """
2574 # Fix for SF #1100429: Under rare circumstances it can
2575 # happen that getmembers() is called during iteration,
2576 # which will cause TarIter to stop prematurely.
2577 if not self.tarfile._loaded:
2578 tarinfo = self.tarfile.next()
2579 if not tarinfo:
2580 self.tarfile._loaded = True
2581 raise StopIteration
2582 else:
2583 try:
2584 tarinfo = self.tarfile.members[self.index]
2585 except IndexError:
2586 raise StopIteration
2587 self.index += 1
2588 return tarinfo
2589
2590 next = __next__ # for Python 2.x
2591
2592#--------------------
2593# exported functions
2594#--------------------
2595def is_tarfile(name):
2596 """Return True if name points to a tar archive that we
2597 are able to handle, else return False.
2598 """
2599 try:
2600 t = open(name)
2601 t.close()
2602 return True
2603 except TarError:
2604 return False
2605
2606bltn_open = open
2607open = TarFile.open