""" The 'new' conda format, introduced in late 2018/early 2019. https://docs.conda.io/projects/conda/en/latest/user-guide/concepts/packages.html """ from __future__ import annotations import json import os import tarfile from typing import Callable from zipfile import ZIP_STORED, ZipFile import zstandard from . import utils from .interface import AbstractBaseFormat from .streaming import _extract CONDA_PACKAGE_FORMAT_VERSION = 2 DEFAULT_COMPRESSION_TUPLE = (".tar.zst", "zstd", "zstd:compression-level=19") # increase to reduce speed and increase compression (22 = conda's default) ZSTD_COMPRESS_LEVEL = 19 # increase to reduce compression (slightly) and increase speed ZSTD_COMPRESS_THREADS = 1 class CondaFormat_v2(AbstractBaseFormat): """If there's another conda format or breaking changes, please create a new class and keep this one, so that handling of v2 stays working.""" @staticmethod def supported(fn): return fn.endswith(".conda") @staticmethod def extract(fn, dest_dir, **kw): components = utils.ensure_list(kw.get("components")) or ("info", "pkg") if not os.path.isabs(fn): fn = os.path.normpath(os.path.join(os.getcwd(), fn)) if not os.path.isdir(dest_dir): os.makedirs(dest_dir) _extract(str(fn), str(dest_dir), components=components) @staticmethod def extract_info(fn, dest_dir=None): return CondaFormat_v2.extract(fn, dest_dir, components=["info"]) @staticmethod def create( prefix, file_list, out_fn, out_folder=os.getcwd(), compressor: Callable[[], zstandard.ZstdCompressor] | None = None, compression_tuple=(None, None, None), ): if os.path.isabs(out_fn): out_folder = os.path.dirname(out_fn) out_fn = os.path.basename(out_fn) conda_pkg_fn = os.path.join(out_folder, out_fn) file_id = out_fn.replace(".conda", "") pkg_files = utils.filter_info_files(file_list, prefix) # preserve order pkg_files_set = set(pkg_files) info_files = list(f for f in file_list if f not in pkg_files_set) if compressor and (compression_tuple != (None, None, None)): raise ValueError("Supply one of compressor= or (deprecated) compression_tuple=") if compressor is None: compressor = lambda: zstandard.ZstdCompressor( level=ZSTD_COMPRESS_LEVEL, threads=ZSTD_COMPRESS_THREADS, ) # legacy libarchive-ish compatibility ext, comp_filter, filter_opts = compression_tuple if filter_opts and filter_opts.startswith("zstd:compression-level="): compressor = lambda: zstandard.ZstdCompressor( level=int(filter_opts.split("=", 1)[-1]), threads=ZSTD_COMPRESS_THREADS, ) class NullWriter: """ zstd uses less memory on extract if size is known. """ def __init__(self): self.size = 0 def write(self, bytes): self.size += len(bytes) return len(bytes) def tell(self): return self.size with ZipFile(conda_pkg_fn, "w", compression=ZIP_STORED) as conda_file, utils.tmp_chdir( prefix ): pkg_metadata = {"conda_pkg_format_version": CONDA_PACKAGE_FORMAT_VERSION} conda_file.writestr("metadata.json", json.dumps(pkg_metadata)) components_files = (f"pkg-{file_id}.tar.zst", pkg_files), ( f"info-{file_id}.tar.zst", info_files, ) # put the info last, for parity with updated transmute. compress = compressor() for component, files in components_files: # If size is known, the decompressor may be able to allocate less memory. # The compressor will error if size is not correct. with tarfile.TarFile(fileobj=NullWriter(), mode="w") as sizer: # type: ignore for file in files: sizer.add(file, filter=utils.anonymize_tarinfo) size = sizer.fileobj.size # type: ignore with conda_file.open(component, "w") as component_file: # only one stream_writer() per compressor() must be in use at a time component_stream = compress.stream_writer( component_file, size=size, closefd=False ) component_tar = tarfile.TarFile(fileobj=component_stream, mode="w") for file in files: component_tar.add(file, filter=utils.anonymize_tarinfo) component_tar.close() component_stream.close() return conda_pkg_fn @staticmethod def get_pkg_details(in_file): stat_result = os.stat(in_file) size = stat_result.st_size md5, sha256 = utils.checksums(in_file, ("md5", "sha256")) return {"size": size, "md5": md5, "sha256": sha256}