Source code for datalad_next.archive_operations.tarfile

"""TAR archive operation handler"""

# allow for |-type UnionType declarations
from __future__ import annotations

import logging
import tarfile
from contextlib import contextmanager
from pathlib import (
    Path,
    PurePosixPath,
)
from typing import (
    Generator,
    IO,
)

from datalad_next.config import ConfigManager
# TODO we might just want to do it in reverse:
# move the code of `iter_tar` in here and have it call
# `TarArchiveOperations(path).__iter__()` instead.
# However, the flexibility to have `iter_tar()` behave
# differently depending on parameters (fp=True/False)
# is nice, and `__iter__()` only has `self`, such that
# any customization would need to be infused in the whole
# class. Potentially cumbersome.
from datalad_next.iter_collections.tarfile import (
    TarfileItem,
    iter_tar,
)

from . import ArchiveOperations

lgr = logging.getLogger('datalad.ext.next.archive_operations.tarfile')


[docs]class TarArchiveOperations(ArchiveOperations): """Handler for a TAR archive on a local file system Any methods that take an archive item/member name as an argument accept a POSIX path string, or any `PurePath` instance. """ def __init__(self, location: Path, *, cfg: ConfigManager | None = None): """ Parameters ---------- location: Path TAR archive location cfg: ConfigManager, optional A config manager instance that is consulted for any supported configuration items """ # TODO expose `mode` other kwargs of `tarfile.TarFile` super().__init__(location, cfg=cfg) # Consider supporting file-like for `location`, # see tarfile.open(fileobj=) self._tarfile_path = location self._tarfile = None @property def tarfile(self) -> tarfile.TarFile: """Returns `TarFile` instance, after creating it on-demand The instance is cached, and needs to be released by calling ``.close()`` if called outside a context manager. """ if self._tarfile is None: self._tarfile = tarfile.open(self._tarfile_path, 'r') return self._tarfile
[docs] def close(self) -> None: """Closes any opened TAR file handler""" if self._tarfile: self._tarfile.close() self._tarfile = None
[docs] @contextmanager def open(self, item: str | PurePosixPath) -> Generator[IO | None]: """Get a file-like for a TAR archive item The file-like object allows to read from the archive-item specified by `item`. Parameters ---------- item: str | PurePath The identifier must be a POSIX path string, or a `PurePath` instance. Returns ------- IO | None A file-like object to read bytes from the item, if the item is a regular file, else `None`. (This is returned by the context manager that is created via the decorator `@contextmanager`.) Raises ------ KeyError If no item with the name `item` can be found in the tar-archive """ with self.tarfile.extractfile(_anyid2membername(item)) as fp: yield fp
def __contains__(self, item: str | PurePosixPath) -> bool: try: self.tarfile.getmember(_anyid2membername(item)) return True except KeyError: return False def __iter__(self) -> Generator[TarfileItem, None, None]: # if fp=True is needed, either `iter_tar()` can be used # directly, or `TarArchiveOperations.open` yield from iter_tar(self._tarfile_path, fp=False)
def _anyid2membername(item_id: str | PurePosixPath) -> str: if isinstance(item_id, PurePosixPath): return item_id.as_posix() else: return item_id