# Copyright (C) 2012 Anaconda, Inc # SPDX-License-Identifier: BSD-3-Clause """Tools for cross-OS portability.""" from __future__ import annotations import os import re import struct import subprocess from logging import getLogger from os.path import basename, realpath from ..auxlib.ish import dals from ..base.constants import PREFIX_PLACEHOLDER from ..base.context import context from ..common.compat import on_linux, on_mac, on_win from ..exceptions import BinaryPrefixReplacementError, CondaIOError from ..gateways.disk.update import CancelOperation, update_file_in_place_as_binary from ..models.enums import FileMode log = getLogger(__name__) # three capture groups: whole_shebang, executable, options SHEBANG_REGEX = ( rb"^(#!" # pretty much the whole match string rb"(?:[ ]*)" # allow spaces between #! and beginning of the executable path rb"(/(?:\\ |[^ \n\r\t])*)" # the executable is the next text block without an escaped space or non-space whitespace character # NOQA rb"(.*)" # the rest of the line can contain option flags rb")$" ) # end whole_shebang group MAX_SHEBANG_LENGTH = 127 if on_linux else 512 # Not used on Windows # These are the most common file encodings that we run across when having to replace our # PREFIX_PLACEHOLDER string. They apply to binary and text formats. # More information/discussion: https://github.com/conda/conda/pull/9946 POPULAR_ENCODINGS = ( "utf-8", "utf-16-le", "utf-16-be", "utf-32-le", "utf-32-be", ) class _PaddingError(Exception): pass def _subdir_is_win(subdir: str) -> bool: if "-" in subdir: os, _ = subdir.lower().split("-", 1) return os == "win" else: # For noarch, check that we are running on windows return on_win def update_prefix( path, new_prefix, placeholder=PREFIX_PLACEHOLDER, mode=FileMode.text, subdir=context.subdir, ): if _subdir_is_win(subdir) and mode == FileMode.text: # force all prefix replacements to forward slashes to simplify need to escape backslashes # replace with unix-style path separators new_prefix = new_prefix.replace("\\", "/") def _update_prefix(original_data): # Step 1. do all prefix replacement data = replace_prefix(mode, original_data, placeholder, new_prefix, subdir) # Step 2. if the shebang is too long or the new prefix contains spaces, shorten it using # /usr/bin/env trick -- NOTE: this trick assumes the environment WILL BE activated if not _subdir_is_win(subdir): data = replace_long_shebang(mode, data) # Step 3. if the before and after content is the same, skip writing if data == original_data: raise CancelOperation() # Step 4. if we have a binary file, make sure the byte size is the same before # and after the update if mode == FileMode.binary and len(data) != len(original_data): raise BinaryPrefixReplacementError( path, placeholder, new_prefix, len(original_data), len(data) ) return data updated = update_file_in_place_as_binary(realpath(path), _update_prefix) if updated and mode == FileMode.binary and subdir == "osx-arm64" and on_mac: # Apple arm64 needs signed executables subprocess.run( ["/usr/bin/codesign", "-s", "-", "-f", realpath(path)], capture_output=True ) def replace_prefix( mode: FileMode, data: bytes, placeholder: str, new_prefix: str, subdir: str = "noarch", ) -> bytes: """ Replaces `placeholder` text with the `new_prefix` provided. The `mode` provided can either be text or binary. We use the `POPULAR_ENCODINGS` module level constant defined above to make several passes at replacing the placeholder. We do this to account for as many encodings as possible. If this causes any performance problems in the future, it could potentially be removed (i.e. just using the most popular "utf-8" encoding"). More information/discussion available here: https://github.com/conda/conda/pull/9946 """ for encoding in POPULAR_ENCODINGS: if mode == FileMode.text: if not _subdir_is_win(subdir): # if new_prefix contains spaces, it might break the shebang! # handle this by escaping the spaces early, which will trigger a # /usr/bin/env replacement later on newline_pos = data.find(b"\n") if newline_pos > -1: shebang_line, rest_of_data = data[:newline_pos], data[newline_pos:] shebang_placeholder = f"#!{placeholder}".encode(encoding) if shebang_placeholder in shebang_line: escaped_shebang = f"#!{new_prefix}".replace(" ", "\\ ").encode( encoding ) shebang_line = shebang_line.replace( shebang_placeholder, escaped_shebang ) data = shebang_line + rest_of_data # the rest of the file can be replaced normally data = data.replace( placeholder.encode(encoding), new_prefix.encode(encoding) ) elif mode == FileMode.binary: data = binary_replace( data, placeholder.encode(encoding), new_prefix.encode(encoding), encoding=encoding, subdir=subdir, ) else: raise CondaIOError("Invalid mode: %r" % mode) return data def binary_replace( data: bytes, search: bytes, replacement: bytes, encoding: str = "utf-8", subdir: str = "noarch", ) -> bytes: """ Perform a binary replacement of `data`, where the placeholder `search` is replaced with `replacement` and the remaining string is padded with null characters. All input arguments are expected to be bytes objects. Parameters ---------- data: The bytes object that will be searched and replaced search: The bytes object to find replacement: The bytes object that will replace `search` encoding: str The encoding of the expected string in the binary. """ zeros = "\0".encode(encoding) if _subdir_is_win(subdir): # on Windows for binary files, we currently only replace a pyzzer-type entry point # we skip all other prefix replacement if has_pyzzer_entry_point(data): return replace_pyzzer_entry_point_shebang(data, search, replacement) else: return data def replace(match): occurrences = match.group().count(search) padding = (len(search) - len(replacement)) * occurrences if padding < 0: raise _PaddingError return match.group().replace(search, replacement) + b"\0" * padding original_data_len = len(data) pat = re.compile(re.escape(search) + b"(?:(?!(?:" + zeros + b")).)*" + zeros) data = pat.sub(replace, data) assert len(data) == original_data_len return data def has_pyzzer_entry_point(data): pos = data.rfind(b"PK\x05\x06") return pos >= 0 def replace_pyzzer_entry_point_shebang(all_data, placeholder, new_prefix): """Code adapted from pyzzer. This is meant to deal with entry point exe's created by distlib, which consist of a launcher, then a shebang, then a zip archive of the entry point code to run. We need to change the shebang. https://bitbucket.org/vinay.sajip/pyzzer/src/5d5740cb04308f067d5844a56fbe91e7a27efccc/pyzzer/__init__.py?at=default&fileviewer=file-view-default#__init__.py-112 # NOQA """ # Copyright (c) 2013 Vinay Sajip. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN # THE SOFTWARE. launcher = shebang = None pos = all_data.rfind(b"PK\x05\x06") if pos >= 0: end_cdr = all_data[pos + 12 : pos + 20] cdr_size, cdr_offset = struct.unpack(" 0: pos = all_data.rfind(b"#!", 0, arc_pos) if pos >= 0: shebang = all_data[pos:arc_pos] if pos > 0: launcher = all_data[:pos] if data and shebang and launcher: if hasattr(placeholder, "encode"): placeholder = placeholder.encode("utf-8") if hasattr(new_prefix, "encode"): new_prefix = new_prefix.encode("utf-8") shebang = shebang.replace(placeholder, new_prefix) all_data = b"".join([launcher, shebang, data]) return all_data def replace_long_shebang(mode, data): # this function only changes a shebang line if it exists and is greater than 127 characters if mode == FileMode.text: if not isinstance(data, bytes): try: data = bytes(data, encoding="utf-8") except: data = data.encode("utf-8") shebang_match = re.match(SHEBANG_REGEX, data, re.MULTILINE) if shebang_match: whole_shebang, executable, options = shebang_match.groups() prefix, executable_name = executable.decode("utf-8").rsplit("/", 1) if len(whole_shebang) > MAX_SHEBANG_LENGTH or "\\ " in prefix: new_shebang = ( f"#!/usr/bin/env {executable_name}{options.decode('utf-8')}" ) data = data.replace(whole_shebang, new_shebang.encode("utf-8")) else: # TODO: binary shebangs exist; figure this out in the future if text works well pass return data def generate_shebang_for_entry_point(executable, with_usr_bin_env=False): """ This function can be used to generate a shebang line for Python entry points. Use cases: - At install/link time, to generate the `noarch: python` entry points. - conda init uses it to create its own entry point during conda-build """ shebang = f"#!{executable}\n" if os.environ.get("CONDA_BUILD") == "1" and "/_h_env_placehold" in executable: # This is being used during a conda-build process, # which uses long prefixes on purpose. This will be replaced # with the real environment prefix at install time. Do not # do nothing for now. return shebang # In principle, the naive shebang will work as long as the path # to the python executable does not contain spaces AND it's not # longer than 127 characters. Otherwise, we must fix it if len(shebang) > MAX_SHEBANG_LENGTH or " " in executable: if with_usr_bin_env: # This approach works well for all cases BUT it requires # the executable to be in PATH. In other words, the environment # needs to be activated! shebang = f"#!/usr/bin/env {basename(executable)}\n" else: # This approach follows a method inspired by `pypa/distlib` # https://github.com/pypa/distlib/blob/91aa92e64/distlib/scripts.py#L129 # Explanation: these lines are both valid Python and shell :) # 1. Python will read it as a triple-quoted string; end of story # 2. The shell will see: # * '' (empty string) # * 'exec' "path/with spaces/to/python" "this file" "arguments" # * # ''' (inline comment with three quotes, ignored by shell) # This method works well BUT in some shells, $PS1 is dropped, which # makes the prompt disappear. This is very problematic for the conda # entry point! Details: https://github.com/conda/conda/issues/11885 shebang = dals( f""" #!/bin/sh '''exec' "{executable}" "$0" "$@" #''' """ ) return shebang