""" Tools for converting Cran packages to conda recipes. """ import argparse import copy from itertools import chain from os import makedirs, listdir, sep, environ from os.path import (basename, commonprefix, exists, isabs, isdir, isfile, join, normpath, realpath, relpath) import re import subprocess import sys import hashlib import requests import tarfile import zipfile import unicodedata import yaml # try to import C dumper try: from yaml import CSafeDumper as SafeDumper except ImportError: from yaml import SafeDumper from conda_build import source, metadata from conda_build.config import get_or_merge_config from conda_build.conda_interface import text_type, iteritems, TemporaryDirectory, cc_conda_build from conda_build.license_family import allowed_license_families, guess_license_family from conda_build.utils import rm_rf, ensure_list from conda_build.variants import get_package_variants, DEFAULT_VARIANTS SOURCE_META = """\ {archive_keys} {git_url_key} {git_url} {git_tag_key} {git_tag} {patches} """ BINARY_META = """\ url: {cranurl}{sel} {hash_entry}{sel} """ VERSION_META = """\ {{% set version = '{cran_version}' %}}{sel}""" CRAN_META = """\ {version_source} {version_binary1} {version_binary2} {{% set posix = 'm2-' if win else '' %}} {{% set native = 'm2w64-' if win else '' %}} package: name: {packagename} version: {{{{ version|replace("-", "_") }}}} source: {source} {binary1} {binary2} build: merge_build_host: True{sel_src_and_win} # If this is a new build for the same version, increment the build number. number: {build_number} {skip_os} {noarch_generic} # This is required to make R link correctly on Linux. rpaths: - lib/R/lib/ - lib/ {script_env} {suggests} requirements: build:{build_depends} host:{host_depends} run:{run_depends} test: commands: # You can put additional test commands to be run here. - $R -e "library('{cran_packagename}')" # [not win] - "\\"%R%\\" -e \\"library('{cran_packagename}')\\"" # [win] # You can also put a file called run_test.py, run_test.sh, or run_test.bat # in the recipe that will be run at test time. # requires: # Put any additional test requirements here. about: {home_comment}home:{homeurl} license: {license} {summary_comment}summary:{summary} license_family: {license_family} {license_file} {extra_recipe_maintainers} # The original CRAN metadata for this package was: {cran_metadata} # See # https://docs.conda.io/projects/conda-build for # more information about meta.yaml """ CRAN_BUILD_SH_SOURCE = """\ #!/bin/bash # 'Autobrew' is being used by more and more packages these days # to grab static libraries from Homebrew bottles. These bottles # are fetched via Homebrew's --force-bottle option which grabs # a bottle for the build machine which may not be macOS 10.9. # Also, we want to use conda packages (and shared libraries) for # these 'system' dependencies. See: # https://github.com/jeroen/autobrew/issues/3 export DISABLE_AUTOBREW=1 # R refuses to build packages that mark themselves as Priority: Recommended mv DESCRIPTION DESCRIPTION.old grep -va '^Priority: ' DESCRIPTION.old > DESCRIPTION # shellcheck disable=SC2086 ${{R}} CMD INSTALL --build . ${{R_ARGS}} # Add more build steps here, if they are necessary. # See # https://docs.conda.io/projects/conda-build # for a list of environment variables that are set during the build process. """ CRAN_BUILD_SH_MIXED = """\ #!/bin/bash set -o errexit -o pipefail if {source_pf_bash}; then export DISABLE_AUTOBREW=1 mv DESCRIPTION DESCRIPTION.old grep -va '^Priority: ' DESCRIPTION.old > DESCRIPTION # shellcheck disable=SC2086 ${{R}} CMD INSTALL --build . ${{R_ARGS}} else mkdir -p "${{PREFIX}}"/lib/R/library/{cran_packagename} mv ./* "${{PREFIX}}"/lib/R/library/{cran_packagename} if [[ ${{target_platform}} == osx-64 ]]; then pushd "${{PREFIX}}" for libdir in lib/R/lib lib/R/modules lib/R/library lib/R/bin/exec sysroot/usr/lib; do pushd "${{libdir}}" || exit 1 while IFS= read -r -d '' SHARED_LIB do echo "fixing SHARED_LIB ${{SHARED_LIB}}" install_name_tool -change /Library/Frameworks/R.framework/Versions/3.5.0-MRO/Resources/lib/libR.dylib "${{PREFIX}}"/lib/R/lib/libR.dylib "${{SHARED_LIB}}" || true install_name_tool -change /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libR.dylib "${{PREFIX}}"/lib/R/lib/libR.dylib "${{SHARED_LIB}}" || true install_name_tool -change /usr/local/clang4/lib/libomp.dylib "${{PREFIX}}"/lib/libomp.dylib "${{SHARED_LIB}}" || true install_name_tool -change /usr/local/gfortran/lib/libgfortran.3.dylib "${{PREFIX}}"/lib/libgfortran.3.dylib "${{SHARED_LIB}}" || true install_name_tool -change /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libquadmath.0.dylib "${{PREFIX}}"/lib/libquadmath.0.dylib "${{SHARED_LIB}}" || true install_name_tool -change /usr/local/gfortran/lib/libquadmath.0.dylib "${{PREFIX}}"/lib/libquadmath.0.dylib "${{SHARED_LIB}}" || true install_name_tool -change /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libgfortran.3.dylib "${{PREFIX}}"/lib/libgfortran.3.dylib "${{SHARED_LIB}}" || true install_name_tool -change /usr/lib/libgcc_s.1.dylib "${{PREFIX}}"/lib/libgcc_s.1.dylib "${{SHARED_LIB}}" || true install_name_tool -change /usr/lib/libiconv.2.dylib "${{PREFIX}}"/sysroot/usr/lib/libiconv.2.dylib "${{SHARED_LIB}}" || true install_name_tool -change /usr/lib/libncurses.5.4.dylib "${{PREFIX}}"/sysroot/usr/lib/libncurses.5.4.dylib "${{SHARED_LIB}}" || true install_name_tool -change /usr/lib/libicucore.A.dylib "${{PREFIX}}"/sysroot/usr/lib/libicucore.A.dylib "${{SHARED_LIB}}" || true install_name_tool -change /usr/lib/libexpat.1.dylib "${{PREFIX}}"/lib/libexpat.1.dylib "${{SHARED_LIB}}" || true install_name_tool -change /usr/lib/libcurl.4.dylib "${{PREFIX}}"/lib/libcurl.4.dylib "${{SHARED_LIB}}" || true install_name_tool -change /usr/lib/libc++.1.dylib "${{PREFIX}}"/lib/libc++.1.dylib "${{SHARED_LIB}}" || true install_name_tool -change /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libc++.1.dylib "${{PREFIX}}"/lib/libc++.1.dylib "${{SHARED_LIB}}" || true done < <(find . \\( -type f -iname "*.dylib" -or -iname "*.so" -or -iname "R" \\) -print0) popd done popd fi fi """ CRAN_BUILD_SH_BINARY = """\ #!/bin/bash set -o errexit -o pipefail mkdir -p "${{PREFIX}}"/lib/R/library/{cran_packagename} mv ./* "${{PREFIX}}"/lib/R/library/{cran_packagename} """ CRAN_BLD_BAT_SOURCE = """\ "%R%" CMD INSTALL --build . %R_ARGS% IF %ERRORLEVEL% NEQ 0 exit /B 1 """ # We hardcode the fact that CRAN does not provide win32 binaries here. CRAN_BLD_BAT_MIXED = """\ if "%target_platform%" == "win-64" goto skip_source_build "%R%" CMD INSTALL --build . %R_ARGS% IF %ERRORLEVEL% NEQ 0 exit /B 1 exit 0 :skip_source_build mkdir %PREFIX%\\lib\\R\\library robocopy /E . "%PREFIX%\\lib\\R\\library\\{cran_packagename}" if %ERRORLEVEL% NEQ 1 exit /B 1 exit 0 """ INDENT = '\n - ' CRAN_KEYS = [ 'Site', 'Archs', 'Depends', 'Enhances', 'Imports', 'License', 'License_is_FOSS', 'License_restricts_use', 'LinkingTo', 'MD5sum', 'NeedsCompilation', 'OS_type', 'Package', 'Path', 'Priority', 'Suggests', 'Version', 'Title', 'Author', 'Maintainer', ] # The following base/recommended package names are derived from R's source # tree (R-3.0.2/share/make/vars.mk). Hopefully they don't change too much # between versions. R_BASE_PACKAGE_NAMES = ( 'base', 'compiler', 'datasets', 'graphics', 'grDevices', 'grid', 'methods', 'parallel', 'splines', 'stats', 'stats4', 'tcltk', 'tools', 'utils', ) R_RECOMMENDED_PACKAGE_NAMES = ( 'MASS', 'lattice', 'Matrix', 'nlme', 'survival', 'boot', 'cluster', 'codetools', 'foreign', 'KernSmooth', 'rpart', 'class', 'nnet', 'spatial', 'mgcv', ) # Stolen then tweaked from debian.deb822.PkgRelation.__dep_RE. VERSION_DEPENDENCY_REGEX = re.compile( r'^\s*(?P[a-zA-Z0-9.+\-]{1,})' r'(\s*\(\s*(?P[>=<]+)\s*' r'(?P[0-9a-zA-Z:\-+~.]+)\s*\))' r'?(\s*\[(?P[\s!\w\-]+)\])?\s*$' ) target_platform_bash_test_by_sel = {'linux': '=~ linux.*', 'linux32': '== linux-32', 'linux64': '== linux-64', 'win32': '== win-32', 'win64': '== win-64', 'osx': '== osx-64'} def package_exists(package_name): # TODO: how can we get cran to spit out package presence? # available.packages() is probably a start, but no channels are working on mac right now? return True # install_output = subprocess.check_output([join(sys.prefix, "r"), "-e", # # ind=2 arbitrarily chooses some CRAN mirror to try. # "chooseCRANmirror(ind=2);install.packages('{}')".format(package_name)]) def add_parser(repos): # for loading default variant info cran = repos.add_parser( "cran", help=""" Create recipe skeleton for packages hosted on the Comprehensive R Archive Network (CRAN) (cran.r-project.org). """, formatter_class=argparse.ArgumentDefaultsHelpFormatter, ) cran.add_argument( "packages", nargs='+', help="""CRAN packages to create recipe skeletons for.""", ) cran.add_argument( "--output-dir", help="Directory to write recipes to (default: %(default)s).", default=".", ) cran.add_argument( "--output-suffix", help="Suffix to add to recipe dir, can contain other dirs (eg: -feedstock/recipe).", default="", ) cran.add_argument( "--add-maintainer", help="Add this github username as a maintainer if not already present.", ) cran.add_argument( "--version", help="Version to use. Applies to all packages.", ) cran.add_argument( "--git-tag", help="Git tag to use for GitHub recipes.", ) cran.add_argument( "--all-urls", action="store_true", help="""Look at all URLs, not just source URLs. Use this if it can't find the right URL.""", ) cran.add_argument( "--cran-url", help="URL to use for as source package repository", ) cran.add_argument( "--r-interp", default='r-base', help="Declare R interpreter package", ) cran.add_argument( "--use-binaries-ver", help=("Repackage binaries from version provided by argument instead of building " "from source."), ) cran.add_argument( "--use-when-no-binary", choices=('src', 'old', 'src-old', 'old-src', 'error'), default='src', help="""Sometimes binaries are not available at the correct version for a given platform (macOS). You can use this flag to specify what fallback to take, either compiling from source or using an older binary or trying one then the other.""" ) cran.add_argument( "--use-noarch-generic", action='store_true', dest='use_noarch_generic', help=("Mark packages that do not need compilation as `noarch: generic`"), ) cran.add_argument( "--use-rtools-win", action='store_true', help="Use Rtools when building from source on Windows", ) cran.add_argument( "--recursive", action='store_true', help='Create recipes for dependencies if they do not already exist.', ) cran.add_argument( "--no-recursive", action='store_false', dest='recursive', help="Don't create recipes for dependencies if they do not already exist.", ) cran.add_argument( '--no-archive', action='store_false', dest='archive', help="Don't include an Archive download url.", ) cran.add_argument( '--allow-archived', action='store_true', dest='allow_archived', help="If the package has been archived, download the latest version.", ) cran.add_argument( "--version-compare", action='store_true', help="""Compare the package version of the recipe with the one available on CRAN. Exits 1 if a newer version is available and 0 otherwise.""" ) cran.add_argument( "--update-policy", action='store', choices=('error', 'skip-up-to-date', 'skip-existing', 'overwrite', 'merge-keep-build-num', 'merge-incr-build-num'), default='error', help="""Dictates what to do when existing packages are encountered in the output directory (set by --output-dir). In the present implementation, the merge options avoid overwriting bld.bat and build.sh and only manage copying across patches, and the `build/{number,script_env}` fields. When the version changes, both merge options reset `build/number` to 0. When the version does not change they either keep the old `build/number` or else increase it by one.""" ) cran.add_argument( '-m', '--variant-config-files', default=cc_conda_build.get('skeleton_config_yaml', None), help="""Variant config file to add. These yaml files can contain keys such as `cran_mirror`. Only one can be provided here.""" ) cran.add_argument( "--add-cross-r-base", action='store_true', default=False, help="""Add cross-r-base to build requirements for cross compiling""" ) cran.add_argument( "--no-comments", action='store_true', default=False, help="""Do not include instructional comments in recipe files""" ) def dict_from_cran_lines(lines): d = {} for line in lines: if not line: continue try: if ': ' in line: (k, v) = line.split(': ', 1) else: # Sometimes fields are included but left blank, e.g.: # - Enhances in data.tree # - Suggests in corpcor (k, v) = line.split(':', 1) except ValueError: sys.exit("Error: Could not parse metadata (%s)" % line) d[k] = v # if k not in CRAN_KEYS: # print("Warning: Unknown key %s" % k) d['orig_lines'] = lines return d def remove_package_line_continuations(chunk): """ >>> chunk = [ 'Package: A3', 'Version: 0.9.2', 'Depends: R (>= 2.15.0), xtable, pbapply', 'Suggests: randomForest, e1071', 'Imports: MASS, R.methodsS3 (>= 1.5.2), R.oo (>= 1.15.8), R.utils (>=', ' 1.27.1), matrixStats (>= 0.8.12), R.filesets (>= 2.3.0), ', ' sampleSelection, scatterplot3d, strucchange, systemfit', 'License: GPL (>= 2)', 'NeedsCompilation: no'] >>> remove_package_line_continuations(chunk) ['Package: A3', 'Version: 0.9.2', 'Depends: R (>= 2.15.0), xtable, pbapply', 'Suggests: randomForest, e1071', 'Imports: MASS, R.methodsS3 (>= 1.5.2), R.oo (>= 1.15.8), R.utils (>= 1.27.1), matrixStats (>= 0.8.12), R.filesets (>= 2.3.0), sampleSelection, scatterplot3d, strucchange, systemfit, rgl,' 'License: GPL (>= 2)', 'NeedsCompilation: no'] """ # NOQA continuation = (' ', '\t') continued_ix = None continued_line = None had_continuation = False accumulating_continuations = False chunk.append('') for (i, line) in enumerate(chunk): if line.startswith(continuation): line = ' ' + line.lstrip() if accumulating_continuations: assert had_continuation continued_line += line chunk[i] = None else: accumulating_continuations = True continued_ix = i - 1 continued_line = chunk[continued_ix] + line had_continuation = True chunk[i] = None else: if accumulating_continuations: assert had_continuation chunk[continued_ix] = continued_line accumulating_continuations = False continued_line = None continued_ix = None if had_continuation: # Remove the None(s). chunk = [c for c in chunk if c] chunk.append('') return chunk def yaml_quote_string(string): """ Quote a string for use in YAML. We can't just use yaml.dump because it adds ellipses to the end of the string, and it in general doesn't handle being placed inside an existing document very well. Note that this function is NOT general. """ return yaml.dump(string, Dumper=SafeDumper).replace('\n...\n', '').replace('\n', '\n ').rstrip('\n ') # Due to how we render the metadata there can be significant areas of repeated newlines. # This collapses them and also strips any trailing spaces. def clear_whitespace(string): lines = [] last_line = '' for line in string.splitlines(): line = line.rstrip() if not (line == '' and last_line == ''): lines.append(line) last_line = line return '\n'.join(lines) def read_description_contents(fp): bytes = fp.read() text = bytes.decode('utf-8', errors='replace') text = clear_whitespace(text) lines = remove_package_line_continuations(text.splitlines()) return dict_from_cran_lines(lines) def get_archive_metadata(path, verbose=True): if verbose: print('Reading package metadata from %s' % path) if basename(path) == 'DESCRIPTION': with open(path, 'rb') as fp: return read_description_contents(fp) elif tarfile.is_tarfile(path): with tarfile.open(path, 'r') as tf: for member in tf: if re.match(r'^[^/]+/DESCRIPTION$', member.name): fp = tf.extractfile(member) return read_description_contents(fp) elif path.endswith('.zip'): with zipfile.ZipFile(path, 'r') as zf: for member in zf.infolist(): if re.match(r'^[^/]+/DESCRIPTION$', member.filename): fp = zf.open(member, 'r') return read_description_contents(fp) else: sys.exit('Cannot extract a DESCRIPTION from file %s' % path) sys.exit('%s does not seem to be a CRAN package (no DESCRIPTION) file' % path) def get_latest_git_tag(config): # SO says to use taggerdate instead of committerdate, but that is invalid for lightweight tags. p = subprocess.Popen(['git', 'for-each-ref', 'refs/tags', '--sort=-committerdate', '--format=%(refname:short)', '--count=1'], stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=config.work_dir) stdout, stderr = p.communicate() stdout = stdout.decode('utf-8') stderr = stderr.decode('utf-8') if stderr or p.returncode: sys.exit("Error: git tag failed (%s)" % stderr) tags = stdout.strip().splitlines() if not tags: sys.exit("Error: no tags found") print("Using tag %s" % tags[-1]) return tags[-1] def _ssl_no_verify(): """Gets whether the SSL_NO_VERIFY environment variable is set to 1 or True. This provides a workaround for users in some corporate environments where MITM style proxies make it difficult to fetch data over HTTPS. """ return environ.get('SSL_NO_VERIFY', '').strip().lower() in ('1', 'true') def get_session(output_dir, verbose=True): session = requests.Session() session.verify = _ssl_no_verify() try: import cachecontrol import cachecontrol.caches except ImportError: if verbose: print("Tip: install CacheControl and lockfile (conda packages) to cache the " "CRAN metadata") else: session = cachecontrol.CacheControl(session, cache=cachecontrol.caches.FileCache(join(output_dir, '.web_cache'))) return session def get_cran_archive_versions(cran_url, session, package, verbose=True): if verbose: print(f"Fetching archived versions for package {package} from {cran_url}") r = session.get(cran_url + "/src/contrib/Archive/" + package + "/") try: r.raise_for_status() except requests.exceptions.HTTPError as e: if e.response.status_code == 404: print("No archive directory for package %s" % package) return [] raise versions = [] for p, dt in re.findall(r'\1\s*]*>([^<]*)', r.text): if p.endswith('.tar.gz') and '_' in p: name, version = p.rsplit('.', 2)[0].split('_', 1) versions.append((dt.strip(), version)) return [v for dt, v in sorted(versions, reverse=True)] def get_cran_index(cran_url, session, verbose=True): if verbose: print("Fetching main index from %s" % cran_url) r = session.get(cran_url + "/src/contrib/") r.raise_for_status() records = {} for p in re.findall(r'\1', r.text): if p.endswith('.tar.gz') and '_' in p: name, version = p.rsplit('.', 2)[0].split('_', 1) records[name.lower()] = (name, version) r = session.get(cran_url + "/src/contrib/Archive/") r.raise_for_status() for p in re.findall(r'\1/', r.text): if re.match(r'^[A-Za-z]', p): records.setdefault(p.lower(), (p, None)) return records def make_array(m, key, allow_empty=False): result = [] try: old_vals = m.get_value(key, []) except: old_vals = [] if old_vals or allow_empty: result.append(key.split('/')[-1] + ":") for old_val in old_vals: result.append(f"{INDENT}{old_val}") return result def existing_recipe_dir(output_dir, output_suffix, package, version): result = None if version: package = package + '-' + version.replace('-', '_') if exists(join(output_dir, package)): result = normpath(join(output_dir, package)) elif exists(join(output_dir, package + output_suffix)): result = normpath(join(output_dir, package + output_suffix)) elif exists(join(output_dir, 'r-' + package + output_suffix)): result = normpath(join(output_dir, 'r-' + package + output_suffix)) return result def strip_end(string, end): if string.endswith(end): return string[:-len(end)] return string def package_to_inputs_dict(output_dir, output_suffix, git_tag, package, version=None): """ Converts `package` (*) into a tuple of: pkg_name (without leading 'r-') location (in a subdir of output_dir - may not exist - or at GitHub) old_git_rev (from existing metadata, so corresponds to the *old* version) metadata or None (if a recipe does *not* already exist) (*) `package` could be: 1. A package name beginning (or not) with 'r-' 2. A GitHub URL 3. A file:// URL to a tarball 4. A relative path to a recipe from output_dir 5. An absolute path to a recipe (fatal unless in the output_dir hierarchy) 6. Any of the above ending (or not) in sep or '/' So this function cleans all that up: Some packages may be from GitHub but we'd like the user not to have to worry about that on the command-line (for pre-existing recipes). Also, we may want to get version information from them (or existing metadata to merge) so lets load *all* existing recipes (later we will add or replace this metadata with any that we create). """ if isfile(package): return None print("Parsing input package %s:" % package) package = strip_end(package, '/') package = strip_end(package, sep) if 'github.com' in package: package = strip_end(package, '.git') pkg_name = basename(package).lower() pkg_name = strip_end(pkg_name, '-feedstock') if output_suffix: pkg_name = strip_end(pkg_name, output_suffix) if pkg_name.startswith('r-'): pkg_name = pkg_name[2:] if package.startswith('file://'): location = package.replace('file://', '') pkg_filename = basename(location) pkg_name = re.match(r'(.*)_(.*)', pkg_filename).group(1).lower() existing_location = existing_recipe_dir(output_dir, output_suffix, 'r-' + pkg_name, version) elif isabs(package): commp = commonprefix((package, output_dir)) if commp != output_dir: raise RuntimeError("package {} specified with abs path outside of output-dir {}".format( package, output_dir)) location = package existing_location = existing_recipe_dir(output_dir, output_suffix, 'r-' + pkg_name, version) elif 'github.com' in package: location = package existing_location = existing_recipe_dir(output_dir, output_suffix, 'r-' + pkg_name, version) else: location = existing_location = existing_recipe_dir(output_dir, output_suffix, package, version) if existing_location: try: m = metadata.MetaData(existing_location) except: # Happens when the folder exists but contains no recipe. m = None else: m = None # It can still be the case that a package without 'github.com' in the location does really # come from there, for that we need to inspect the existing metadata's source/git_url. old_git_rev = git_tag if location and m and 'github.com' not in location: git_url = m.get_value('source/git_url', '') if 'github.com' in git_url: location = git_url old_git_rev = m.get_value('source/git_rev', None) vstr = '-' + version.replace('-', '_') if version else '' new_location = join(output_dir, 'r-' + pkg_name + vstr + output_suffix) print(f".. name: {pkg_name} location: {location} new_location: {new_location}") return {'pkg-name': pkg_name, 'location': location, 'old-git-rev': old_git_rev, 'old-metadata': m, 'new-location': new_location, 'version': version} def get_available_binaries(cran_url, details): url = cran_url + '/' + details['dir'] response = requests.get(url) response.raise_for_status() ext = details['ext'] for filename in re.findall(r'\1', response.text): if filename.endswith(ext): pkg, _, ver = filename.rpartition('_') ver, _, _ = ver.rpartition(ext) details['binaries'].setdefault(pkg, []).append((ver, url + filename)) def remove_comments(template): re_comment = re.compile(r'^\s*#\s') lines = template.split('\n') lines_no_comments = [line for line in lines if not re_comment.match(line)] return '\n'.join(lines_no_comments) def skeletonize(in_packages, output_dir=".", output_suffix="", add_maintainer=None, version=None, git_tag=None, cran_url=None, recursive=False, archive=True, version_compare=False, update_policy='', r_interp='r-base', use_binaries_ver=None, use_noarch_generic=False, use_when_no_binary='src', use_rtools_win=False, config=None, variant_config_files=None, allow_archived=False, add_cross_r_base=False, no_comments=False): if use_when_no_binary != 'error' and \ use_when_no_binary != 'src' and \ use_when_no_binary != 'old' and \ use_when_no_binary != 'old-src': print(f"ERROR: --use_when_no_binary={use_when_no_binary} not yet implemented") sys.exit(1) output_dir = realpath(output_dir) config = get_or_merge_config(config, variant_config_files=variant_config_files) if allow_archived and not archive: print("ERROR: --no-archive and --allow-archived conflict") sys.exit(1) if not cran_url: with TemporaryDirectory() as t: _variant = get_package_variants(t, config)[0] cran_url = ensure_list(_variant.get('cran_mirror', DEFAULT_VARIANTS['cran_mirror']))[0] if len(in_packages) > 1 and version_compare: raise ValueError("--version-compare only works with one package at a time") if update_policy == 'error' and not in_packages: raise ValueError("At least one package must be supplied") package_dicts = {} package_list = [] cran_url = cran_url.rstrip('/') # Get cran index lazily so we don't have to go to CRAN # for a github repo or a local tarball cran_index = None cran_layout_template = \ {'source': {'selector': '{others}', 'dir': 'src/contrib/', 'ext': '.tar.gz', # If we had platform filters we would change this to: # build_for_linux or is_github_url or is_tarfile 'use_this': True}, 'win-64': {'selector': 'win64', 'dir': f'bin/windows/contrib/{use_binaries_ver}/', 'ext': '.zip', 'use_this': True if use_binaries_ver else False}, 'osx-64': {'selector': 'osx', 'dir': 'bin/macosx/el-capitan/contrib/{}/'.format( use_binaries_ver), 'ext': '.tgz', 'use_this': True if use_binaries_ver else False}} # Figure out what binaries are available once: for archive_type, archive_details in iteritems(cran_layout_template): archive_details['binaries'] = dict() if archive_type != 'source' and archive_details['use_this']: get_available_binaries(cran_url, archive_details) for package in in_packages: inputs_dict = package_to_inputs_dict(output_dir, output_suffix, git_tag, package, version) if inputs_dict: package_dicts.update({inputs_dict['pkg-name']: {'inputs': inputs_dict}}) for package_name, package_dict in package_dicts.items(): package_list.append(package_name) while package_list: inputs = package_dicts[package_list.pop()]['inputs'] location = inputs['location'] pkg_name = inputs['pkg-name'] version = inputs['version'] is_github_url = location and 'github.com' in location is_tarfile = location and isfile(location) and tarfile.is_tarfile(location) is_archive = False url = inputs['location'] dir_path = inputs['new-location'] print(f"Making/refreshing recipe for {pkg_name}") # Bodges GitHub packages into cran_metadata if is_tarfile: cran_package = get_archive_metadata(location) elif is_github_url or is_tarfile: rm_rf(config.work_dir) m = metadata.MetaData.fromdict({'source': {'git_url': location}}, config=config) source.git_source(m.get_section('source'), m.config.git_cache, m.config.work_dir) new_git_tag = git_tag if git_tag else get_latest_git_tag(config) p = subprocess.Popen(['git', 'checkout', new_git_tag], stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=config.work_dir) stdout, stderr = p.communicate() stdout = stdout.decode('utf-8') stderr = stderr.decode('utf-8') if p.returncode: sys.exit("Error: 'git checkout %s' failed (%s).\nInvalid tag?" % (new_git_tag, stderr.strip())) if stdout: print(stdout, file=sys.stdout) if stderr: print(stderr, file=sys.stderr) DESCRIPTION = join(config.work_dir, "DESCRIPTION") if not isfile(DESCRIPTION): sub_description_pkg = join(config.work_dir, 'pkg', "DESCRIPTION") sub_description_name = join(config.work_dir, location.split('/')[-1], "DESCRIPTION") if isfile(sub_description_pkg): DESCRIPTION = sub_description_pkg elif isfile(sub_description_name): DESCRIPTION = sub_description_name else: sys.exit("%s does not appear to be a valid R package " "(no DESCRIPTION file in %s, %s)" % (location, sub_description_pkg, sub_description_name)) cran_package = get_archive_metadata(DESCRIPTION) else: if cran_index is None: session = get_session(output_dir) cran_index = get_cran_index(cran_url, session) if pkg_name.lower() not in cran_index: sys.exit("Package %s not found" % pkg_name) package, cran_version = cran_index[pkg_name.lower()] if cran_version and (not version or version == cran_version): version = cran_version elif version and not archive: print(f'ERROR: Version {version} of package {package} is archived, but --no-archive was selected') sys.exit(1) elif not version and not cran_version and not allow_archived: print("ERROR: Package %s is archived; to build, use --allow-archived or a --version value" % pkg_name) sys.exit(1) else: is_archive = True all_versions = get_cran_archive_versions(cran_url, session, package) if cran_version: all_versions = [cran_version] + all_versions if not version: version = all_versions[0] elif version not in all_versions: msg = f'ERROR: Version {version} of package {package} not found.\n Available versions: ' print(msg + ', '.join(all_versions)) sys.exit(1) cran_package = None if cran_package is not None: package = cran_package['Package'] version = cran_package['Version'] plower = package.lower() d = package_dicts[pkg_name] d.update({ 'cran_packagename': package, 'cran_version': version, 'packagename': 'r-' + plower, # Conda versions cannot have -. Conda (verlib) will treat _ as a . 'conda_version': version.replace('-', '_'), 'patches': '', 'build_number': 0, 'build_depends': '', 'host_depends': '', 'run_depends': '', # CRAN doesn't seem to have this metadata :( 'home_comment': '#', 'homeurl': '', 'summary_comment': '#', 'summary': '', 'binary1': '', 'binary2': '' }) if version_compare: sys.exit(not version_compare(dir_path, d['conda_version'])) patches = [] script_env = [] extra_recipe_maintainers = [] build_number = 0 if update_policy.startswith('merge') and inputs['old-metadata']: m = inputs['old-metadata'] patches = make_array(m, 'source/patches') script_env = make_array(m, 'build/script_env') extra_recipe_maintainers = make_array(m, 'extra/recipe-maintainers', add_maintainer) if m.version() == d['conda_version']: build_number = int(m.get_value('build/number', 0)) build_number += 1 if update_policy == 'merge-incr-build-num' else 0 if add_maintainer: new_maintainer = "{indent}{add_maintainer}".format(indent=INDENT, add_maintainer=add_maintainer) if new_maintainer not in extra_recipe_maintainers: if not len(extra_recipe_maintainers): # We hit this case when there is no existing recipe. extra_recipe_maintainers = make_array({}, 'extra/recipe-maintainers', True) extra_recipe_maintainers.append(new_maintainer) if len(extra_recipe_maintainers): extra_recipe_maintainers[1:].sort() extra_recipe_maintainers.insert(0, "extra:\n ") d['extra_recipe_maintainers'] = ''.join(extra_recipe_maintainers) d['patches'] = ''.join(patches) d['script_env'] = ''.join(script_env) d['build_number'] = build_number cached_path = None cran_layout = copy.deepcopy(cran_layout_template) available = {} description_path = None for archive_type, archive_details in iteritems(cran_layout): contrib_url = '' archive_details['cran_version'] = d['cran_version'] archive_details['conda_version'] = d['conda_version'] if is_archive and archive_type == 'source': archive_details['dir'] += 'Archive/' + package + '/' available_artefact = True if archive_type == 'source' else \ package in archive_details['binaries'] and \ any(d['cran_version'] == v for v, _ in archive_details['binaries'][package]) if not available_artefact: if use_when_no_binary == 'error': print("ERROR: --use-when-no-binary is error (and there is no binary)") sys.exit(1) elif use_when_no_binary.startswith('old'): if package not in archive_details['binaries']: if use_when_no_binary.endswith('src'): available_artefact = False archive_details['use_this'] = False continue else: print("ERROR: No binary nor old binary found " "(maybe pass --use-when-no-binary=old-src to fallback to source?)") sys.exit(1) # Version needs to be stored in archive_details. archive_details['cranurl'] = archive_details['binaries'][package][-1][1] archive_details['conda_version'] = archive_details['binaries'][package][-1][0] archive_details['cran_version'] = archive_details['conda_version'].replace('_', '-') available_artefact = True # We may need to inspect the file later to determine which compilers are needed. cached_path = None sha256 = hashlib.sha256() if archive_details['use_this'] and available_artefact: if is_tarfile: filename = basename(location) contrib_url = relpath(location, dir_path) contrib_url_rendered = package_url = contrib_url cached_path = location elif not is_github_url or archive_type != 'source': filename_rendered = '{}_{}{}'.format( package, archive_details['cran_version'], archive_details['ext']) filename = f'{package}_{{{{ version }}}}' + archive_details['ext'] contrib_url = '{{{{ cran_mirror }}}}/{}'.format(archive_details['dir']) contrib_url_rendered = cran_url + '/{}'.format(archive_details['dir']) package_url = contrib_url_rendered + filename_rendered print(f"Downloading {archive_type} from {package_url}") try: cached_path, _ = source.download_to_cache( config.src_cache, '', {'url': package_url, 'fn': archive_type + '-' + filename_rendered}) except: print("logic error, file {} should exist, we found it in a dir listing earlier." .format(package_url)) sys.exit(1) if description_path is None or archive_type == 'source': description_path = cached_path available_details = {} available_details['selector'] = archive_details['selector'] available_details['cran_version'] = archive_details['cran_version'] available_details['conda_version'] = archive_details['conda_version'] if cached_path: sha256.update(open(cached_path, 'rb').read()) archive_details['cranurl'] = package_url available_details['filename'] = filename available_details['contrib_url'] = contrib_url available_details['contrib_url_rendered'] = contrib_url_rendered available_details['hash_entry'] = f'sha256: {sha256.hexdigest()}' available_details['cached_path'] = cached_path # This is rubbish; d[] should be renamed global[] and should be # merged into source and binaryN. if archive_type == 'source': if is_github_url: available_details['url_key'] = '' available_details['git_url_key'] = 'git_url:' available_details['git_tag_key'] = 'git_tag:' hash_msg = '# You can add a hash for the file here, (md5, sha1 or sha256)' available_details['hash_entry'] = hash_msg available_details['filename'] = '' available_details['cranurl'] = '' available_details['git_url'] = url available_details['git_tag'] = new_git_tag available_details['archive_keys'] = '' else: available_details['url_key'] = 'url:' available_details['git_url_key'] = '' available_details['git_tag_key'] = '' available_details['cranurl'] = ' ' + contrib_url + filename available_details['git_url'] = '' available_details['git_tag'] = '' else: available_details['cranurl'] = archive_details['cranurl'] available_details['patches'] = d['patches'] available[archive_type] = available_details # Figure out the selectors according to what is available. _all = ['linux', 'win32', 'win64', 'osx'] from_source = _all[:] binary_id = 1 for archive_type, archive_details in iteritems(available): if archive_type == 'source': for k, v in iteritems(archive_details): d[k] = v else: sel = archive_details['selector'] # Does the file exist? If not we need to build from source. from_source.remove(sel) binary_id += 1 if from_source == _all: sel_src = "" sel_src_and_win = ' # [win]' sel_src_not_win = ' # [not win]' else: sel_src = ' # [' + ' or '.join(from_source) + ']' sel_src_and_win = ' # [' + ' or '.join(fs for fs in from_source if fs.startswith('win')) + ']' sel_src_not_win = ' # [' + ' or '.join(fs for fs in from_source if not fs.startswith('win')) + ']' sel_cross = " # [build_platform != target_platform]" d['sel_src'] = sel_src d['sel_src_and_win'] = sel_src_and_win d['sel_src_not_win'] = sel_src_not_win d['from_source'] = from_source if 'source' in available: available_details = available['source'] available_details['sel'] = sel_src filename = available_details['filename'] if 'contrib_url' in available_details: contrib_url = available_details['contrib_url'] if archive: if is_tarfile: available_details['cranurl'] = (INDENT + contrib_url) elif not is_archive: available_details['cranurl'] = (INDENT + contrib_url + filename + sel_src + INDENT + contrib_url + f'Archive/{package}/' + filename + sel_src) else: available_details['cranurl'] = ' ' + contrib_url + filename + sel_src if not is_github_url: available_details['archive_keys'] = '{url_key}{sel}' \ ' {cranurl}\n' \ ' {hash_entry}{sel}'.format( **available_details) # Extract the DESCRIPTION data from the source if cran_package is None: cran_package = get_archive_metadata(description_path) d['cran_metadata'] = '\n'.join(['# %s' % line for line in cran_package['orig_lines'] if line]) # Render the source and binaryN keys binary_id = 1 d['version_binary1'] = d['version_binary2'] = "" for archive_type, archive_details in iteritems(available): if archive_type == 'source': d['source'] = SOURCE_META.format(**archive_details) d['version_source'] = VERSION_META.format(**archive_details) else: archive_details['sel'] = ' # [' + archive_details['selector'] + ']' d['binary' + str(binary_id)] = BINARY_META.format(**archive_details) d['version_binary' + str(binary_id)] = VERSION_META.format(**archive_details) binary_id += 1 license_info = get_license_info(cran_package.get("License", "None"), allowed_license_families) d['license'], d['license_file'], d['license_family'] = license_info if 'License_is_FOSS' in cran_package: d['license'] += ' (FOSS)' if cran_package.get('License_restricts_use') == 'yes': d['license'] += ' (Restricts use)' if "URL" in cran_package: d['home_comment'] = '' d['homeurl'] = ' ' + yaml_quote_string(cran_package['URL']) else: # use CRAN page as homepage if nothing has been specified d['home_comment'] = '' if is_github_url: d['homeurl'] = f' {location}' else: d['homeurl'] = f' https://CRAN.R-project.org/package={package}' if not use_noarch_generic or cran_package.get("NeedsCompilation", 'no') == 'yes': d['noarch_generic'] = '' else: d['noarch_generic'] = 'noarch: generic' if 'Description' in cran_package: d['summary_comment'] = '' d['summary'] = ' ' + yaml_quote_string(cran_package['Description']) if "Suggests" in cran_package and not no_comments: d['suggests'] = "# Suggests: %s" % cran_package['Suggests'] else: d['suggests'] = '' # Every package depends on at least R. # I'm not sure what the difference between depends and imports is. depends = [s.strip() for s in cran_package.get('Depends', '').split(',') if s.strip()] imports = [s.strip() for s in cran_package.get('Imports', '').split(',') if s.strip()] links = [s.strip() for s in cran_package.get("LinkingTo", '').split(',') if s.strip()] dep_dict = {} seen = set() for s in list(chain(imports, depends, links)): match = VERSION_DEPENDENCY_REGEX.match(s) if not match: sys.exit("Could not parse version from dependency of %s: %s" % (package, s)) name = match.group('name') if name in seen: continue seen.add(name) archs = match.group('archs') relop = match.group('relop') or '' ver = match.group('version') or '' ver = ver.replace('-', '_') # If there is a relop there should be a version assert not relop or ver if archs: sys.exit("Don't know how to handle archs from dependency of " "package %s: %s" % (package, s)) dep_dict[name] = f'{relop}{ver}' if 'R' not in dep_dict: dep_dict['R'] = '' os_type = cran_package.get("OS_type", '') if os_type != 'unix' and os_type != 'windows' and os_type != '': print(f"Unknown OS_type: {os_type} in CRAN package") os_type = '' if os_type == 'unix': d['skip_os'] = 'skip: True # [not unix]' d["noarch_generic"] = "" if os_type == 'windows': d['skip_os'] = 'skip: True # [not win]' d["noarch_generic"] = "" if os_type == '' and no_comments: d['skip_os'] = '' elif os_type == '': d['skip_os'] = '# no skip' need_git = is_github_url if cran_package.get("NeedsCompilation", 'no') == 'yes': with tarfile.open(available['source']['cached_path']) as tf: need_f = any([f.name.lower().endswith(('.f', '.f90', '.f77', '.f95', '.f03')) for f in tf]) # Fortran builds use CC to perform the link (they do not call the linker directly). need_c = True if need_f else \ any([f.name.lower().endswith('.c') for f in tf]) need_cxx = any([f.name.lower().endswith(('.cxx', '.cpp', '.cc', '.c++')) for f in tf]) need_autotools = any([f.name.lower().endswith('/configure') for f in tf]) need_make = True if any((need_autotools, need_f, need_cxx, need_c)) else \ any([f.name.lower().endswith(('/makefile', '/makevars')) for f in tf]) else: need_c = need_cxx = need_f = need_autotools = need_make = False if 'Rcpp' in dep_dict or 'RcppArmadillo' in dep_dict: need_cxx = True if need_cxx: need_c = True for dep_type in ['build', 'host', 'run']: deps = [] # Put non-R dependencies first. if dep_type == 'build': if need_c: deps.append("{indent}{{{{ compiler('c') }}}} {sel}".format( indent=INDENT, sel=sel_src_not_win)) deps.append("{indent}{{{{ compiler('m2w64_c') }}}} {sel}".format( indent=INDENT, sel=sel_src_and_win)) if need_cxx: deps.append("{indent}{{{{ compiler('cxx') }}}} {sel}".format( indent=INDENT, sel=sel_src_not_win)) deps.append("{indent}{{{{ compiler('m2w64_cxx') }}}} {sel}".format( indent=INDENT, sel=sel_src_and_win)) if need_f: deps.append("{indent}{{{{ compiler('fortran') }}}} {sel}".format( indent=INDENT, sel=sel_src_not_win)) deps.append("{indent}{{{{ compiler('m2w64_fortran') }}}}{sel}".format( indent=INDENT, sel=sel_src_and_win)) if use_rtools_win: need_c = need_cxx = need_f = need_autotools = need_make = False deps.append("{indent}rtools {sel}".format( indent=INDENT, sel=sel_src_and_win)) # extsoft is legacy. R packages will download rwinlib subprojects # as necessary according to Jeroen Ooms. (may need to disable that # for non-MRO builds or maybe switch to Jeroen's toolchain?) # deps.append("{indent}{{{{native}}}}extsoft {sel}".format( # indent=INDENT, sel=sel_src_and_win)) if need_autotools or need_make or need_git: deps.append("{indent}{{{{ posix }}}}filesystem {sel}".format( indent=INDENT, sel=sel_src_and_win)) if need_git: deps.append(f"{INDENT}{{{{ posix }}}}git") if need_autotools: deps.append("{indent}{{{{ posix }}}}sed {sel}".format( indent=INDENT, sel=sel_src_and_win)) deps.append("{indent}{{{{ posix }}}}grep {sel}".format( indent=INDENT, sel=sel_src_and_win)) deps.append("{indent}{{{{ posix }}}}autoconf {sel}".format( indent=INDENT, sel=sel_src)) deps.append("{indent}{{{{ posix }}}}automake {sel}".format( indent=INDENT, sel=sel_src_not_win)) deps.append("{indent}{{{{ posix }}}}automake-wrapper{sel}".format( indent=INDENT, sel=sel_src_and_win)) deps.append(f"{INDENT}{{{{ posix }}}}pkg-config") if need_make: deps.append("{indent}{{{{ posix }}}}make {sel}".format( indent=INDENT, sel=sel_src)) if not need_autotools: deps.append("{indent}{{{{ posix }}}}sed {sel}".format( indent=INDENT, sel=sel_src_and_win)) deps.append("{indent}{{{{ posix }}}}coreutils {sel}".format( indent=INDENT, sel=sel_src_and_win)) deps.append("{indent}{{{{ posix }}}}zip {sel}".format( indent=INDENT, sel=sel_src_and_win)) if add_cross_r_base: deps.append(f"{INDENT}cross-r-base {{{{ r_base }}}} {sel_cross}") elif dep_type == 'run': if need_c or need_cxx or need_f: deps.append("{indent}{{{{native}}}}gcc-libs {sel}".format( indent=INDENT, sel=sel_src_and_win)) if dep_type == 'host' or dep_type == 'run': for name in sorted(dep_dict): if name in R_BASE_PACKAGE_NAMES: continue if name == 'R': # Put R first # Regarless of build or run, and whether this is a # recommended package or not, it can only depend on # r_interp since anything else can and will cause # cycles in the dependency graph. The cran metadata # lists all dependencies anyway, even those packages # that are in the recommended group. # We don't include any R version restrictions because # conda-build always pins r-base and mro-base version. deps.insert(0, f'{INDENT}{r_interp}') else: conda_name = 'r-' + name.lower() if dep_dict[name]: deps.append('{indent}{name} {version}'.format(name=conda_name, version=dep_dict[name], indent=INDENT)) else: deps.append('{indent}{name}'.format(name=conda_name, indent=INDENT)) if recursive: lower_name = name.lower() if lower_name not in package_dicts: inputs_dict = package_to_inputs_dict(output_dir, output_suffix, git_tag, lower_name, None) assert lower_name == inputs_dict['pkg-name'], \ "name {} != inputs_dict['pkg-name'] {}".format( name, inputs_dict['pkg-name']) assert lower_name not in package_list package_dicts.update({lower_name: {'inputs': inputs_dict}}) package_list.append(lower_name) d['%s_depends' % dep_type] = ''.join(deps) if no_comments: global CRAN_BUILD_SH_SOURCE, CRAN_META CRAN_BUILD_SH_SOURCE = remove_comments(CRAN_BUILD_SH_SOURCE) CRAN_META = remove_comments(CRAN_META) for package in package_dicts: d = package_dicts[package] dir_path = d['inputs']['new-location'] if exists(dir_path) and not version_compare: if update_policy == 'error': raise RuntimeError("directory already exists " "(and --update-policy is 'error'): %s" % dir_path) elif update_policy == 'overwrite': rm_rf(dir_path) elif update_policy == 'skip-up-to-date': if cran_index is None: session = get_session(output_dir) cran_index = get_cran_index(cran_url, session) if up_to_date(cran_index, d['inputs']['old-metadata']): continue elif update_policy == 'skip-existing' and d['inputs']['old-metadata']: continue from_sources = d['from_source'] # Normalize the metadata values d = {k: unicodedata.normalize("NFKD", text_type(v)).encode('ascii', 'ignore') .decode() for k, v in iteritems(d)} try: makedirs(join(dir_path)) except: pass print("Writing recipe for %s" % package.lower()) with open(join(dir_path, 'meta.yaml'), 'w') as f: f.write(clear_whitespace(CRAN_META.format(**d))) if not exists(join(dir_path, 'build.sh')) or update_policy == 'overwrite': with open(join(dir_path, 'build.sh'), 'wb') as f: if from_sources == _all: f.write(CRAN_BUILD_SH_SOURCE.format(**d).encode('utf-8')) elif from_sources == []: f.write(CRAN_BUILD_SH_BINARY.format(**d).encode('utf-8')) else: tpbt = [target_platform_bash_test_by_sel[t] for t in from_sources] d['source_pf_bash'] = ' || '.join(['[[ ${target_platform} ' + s + ' ]]' for s in tpbt]) f.write(CRAN_BUILD_SH_MIXED.format(**d).encode('utf-8')) if not exists(join(dir_path, 'bld.bat')) or update_policy == 'overwrite': with open(join(dir_path, 'bld.bat'), 'wb') as f: if len([fs for fs in from_sources if fs.startswith('win')]) == 2: f.write(CRAN_BLD_BAT_SOURCE.format(**d).replace('\n', '\r\n').encode('utf-8')) else: f.write(CRAN_BLD_BAT_MIXED.format(**d).replace('\n', '\r\n').encode('utf-8')) def version_compare(recipe_dir, newest_conda_version): m = metadata.MetaData(recipe_dir) local_version = m.version() package = basename(recipe_dir) print(f"Local recipe for {package} has version {local_version}.") print(f"The version on CRAN for {package} is {newest_conda_version}.") return local_version == newest_conda_version def get_outdated(output_dir, cran_index, packages=()): to_update = [] recipes = listdir(output_dir) for recipe in recipes: if not recipe.startswith('r-') or not isdir(recipe): continue recipe_name = recipe[2:] if packages and not (recipe_name in packages or recipe in packages): continue if recipe_name not in cran_index: print("Skipping %s, not found on CRAN" % recipe) continue version_compare(join(output_dir, recipe), cran_index[recipe_name][1].replace('-', '_')) print("Updating %s" % recipe) to_update.append(recipe_name) return to_update def get_existing(output_dir, cran_index, packages=()): existing = [] recipes = listdir(output_dir) for recipe in recipes: if not recipe.startswith('r-') or not isdir(recipe): continue recipe_name = recipe[2:] if packages and not (recipe_name in packages or recipe in packages): continue existing.append(recipe_name) return existing def up_to_date(cran_index, package): r_pkg_name, location, old_git_rev, m = package cran_pkg_name = r_pkg_name[2:] # Does not exist, so is not up to date. if not m: return False # For now. We can do better; need to collect *all* information upfront. if 'github.com' in location: return False else: if cran_pkg_name not in cran_index: return False name, version = cran_index[cran_pkg_name] if version and m.version() != version: return False return True def get_license_info(license_text, allowed_license_families): """ Most R packages on CRAN do not include a license file. Instead, to avoid duplication, R base ships with common software licenses: complete: AGPL-3, Artistic-2.0, GPL-2, GPL-3, LGPL-2, LGPL-2.1, LGPL-3 template: BSD_2_clause BSD_3_clause, MIT The complete licenses can be included in conda binaries by pointing to the license file shipped with R base. The template files are more complicated because they would need to be combined with the license information provided by the package authors. In this case, the template file and the license information file are both packaged. All optional ('|' seperated) licenses are included, if they are matching. This function returns the path to the license file for the unambiguous cases. """ # The list order matters. The first element should be the name of the # license file shipped with r-base. d_license = {'agpl3': ['AGPL-3', 'AGPL (>= 3)', 'AGPL', 'GNU Affero General Public License'], 'artistic2': ['Artistic-2.0', 'Artistic License 2.0'], 'gpl2': ['GPL-2', 'GPL (>= 2)', 'GNU General Public License (>= 2)'], 'gpl3': ['GPL-3', 'GPL (>= 3)', 'GNU General Public License (>= 3)', 'GPL', 'GNU General Public License'], 'lgpl2': ['LGPL-2', 'LGPL (>= 2)'], 'lgpl21': ['LGPL-2.1', 'LGPL (>= 2.1)'], 'lgpl3': ['LGPL-3', 'LGPL (>= 3)', 'LGPL', 'GNU Lesser General Public License'], 'bsd2': ['BSD_2_clause', 'BSD_2_Clause', 'BSD 2-clause License'], 'bsd3': ['BSD_3_clause', 'BSD_3_Clause', 'BSD 3-clause License'], 'mit': ['MIT'], } license_file_template = '\'{{{{ environ["PREFIX"] }}}}/lib/R/share/licenses/{license_id}\'' license_texts = [] license_files = [] # split license_text by "|" and "+" into parts for further matching license_text_parts = [l_opt.strip() for l_opt in re.split(r'\||\+', license_text)] for l_opt in license_text_parts: # the file case if l_opt.startswith("file "): license_files.append(l_opt[5:]) continue # license id string to match for license_id in d_license.keys(): if l_opt in d_license[license_id]: l_opt_text = d_license[license_id][0] license_texts.append(l_opt_text) license_files.append(license_file_template.format(license_id=l_opt_text)) break # Join or fallback to original license_text if matched license_texts is empty license_text = " | ".join(license_texts) or license_text # Build the license_file entry and ensure it is empty if no license file license_file = "license_file:\n - " + "\n - ".join(license_files) if license_files else "" # Only one family is allowed, so guessing it once license_family = guess_license_family(license_text, allowed_license_families) return license_text, license_file, license_family