# TODO: various stuff just pasted together in one place; needs clean up # -*- coding: utf-8 -*- from __future__ import print_function, absolute_import, division import os import importlib import inspect import argparse import distutils.dir_util import shutil def _find_examples(name): module_path = os.path.dirname(inspect.getfile(importlib.import_module(name))) candidates = [ # installed package os.path.join(module_path,"examples"), # git repo os.path.join(module_path,"..","examples")] for candidate in candidates: if os.path.exists(candidate): return candidate raise ValueError("Could not find examples for %s at any of %s"%(name,candidates)) def examples(name,path,verbose=False,use_test_data=False,force=False): """ Copy examples and fetch data (if any) to the supplied path. See copy-examples and fetch-data for more flexibility. NOTE: force operates both on example and data over-writing pre-existing files. """ copy_examples(name, path, verbose, force) fetch_data(name,path,require_datasets=False,use_test_data=use_test_data,force=force) def copy_examples(name,path,verbose=False,force=False): """Copy examples to the supplied path.""" source = _find_examples(name) path = os.path.abspath(path) if os.path.exists(path) and not force: raise ValueError("Path %s already exists; please move it away, choose a different path, or use force."%path) if verbose: print("Copying examples from %s"%source) distutils.dir_util.copy_tree(source, path, verbose=verbose) print("Copied examples to %s"%path) """ Copyright (c) 2011, Kenneth Reitz Permission to use, copy, modify, and/or distribute this software for any purpose with or without fee is hereby granted, provided that the above copyright notice and this permission notice appear in all copies. THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. clint.textui.progress ~~~~~~~~~~~~~~~~~ This module provides the progressbar functionality. """ from collections import OrderedDict import glob import sys import tarfile import time import zipfile import yaml try: import requests except ImportError: requests = None # TODO # if requests is None: # print('this download script requires the requests module: conda install requests') # sys.exit(1) STREAM = sys.stderr BAR_TEMPLATE = '%s[%s%s] %i/%i - %s\r' MILL_TEMPLATE = '%s %s %i/%i\r' DOTS_CHAR = '.' BAR_FILLED_CHAR = '#' BAR_EMPTY_CHAR = ' ' MILL_CHARS = ['|', '/', '-', '\\'] # How long to wait before recalculating the ETA ETA_INTERVAL = 1 # How many intervals (excluding the current one) to calculate the simple moving # average ETA_SMA_WINDOW = 9 DATA_DIR = 'data' DATA_STUBS_DIR = '.data_stubs' class Bar(object): def __enter__(self): return self def __exit__(self, exc_type, exc_val, exc_tb): self.done() return False # we're not suppressing exceptions def __init__(self, label='', width=32, hide=None, empty_char=BAR_EMPTY_CHAR, filled_char=BAR_FILLED_CHAR, expected_size=None, every=1): '''Bar is a class for printing the status of downloads''' self.label = label self.width = width self.hide = hide # Only show bar in terminals by default (better for piping, logging etc.) if hide is None: try: self.hide = not STREAM.isatty() except AttributeError: # output does not support isatty() self.hide = True self.empty_char = empty_char self.filled_char = filled_char self.expected_size = expected_size self.every = every self.start = time.time() self.ittimes = [] self.eta = 0 self.etadelta = time.time() self.etadisp = self.format_time(self.eta) self.last_progress = 0 if (self.expected_size): self.show(0) def show(self, progress, count=None): if count is not None: self.expected_size = count if self.expected_size is None: raise Exception("expected_size not initialized") self.last_progress = progress if (time.time() - self.etadelta) > ETA_INTERVAL: self.etadelta = time.time() self.ittimes = \ self.ittimes[-ETA_SMA_WINDOW:] + \ [-(self.start - time.time()) / (progress+1)] self.eta = \ sum(self.ittimes) / float(len(self.ittimes)) * \ (self.expected_size - progress) self.etadisp = self.format_time(self.eta) x = int(self.width * progress / self.expected_size) if not self.hide: if ((progress % self.every) == 0 or # True every "every" updates (progress == self.expected_size)): # And when we're done STREAM.write(BAR_TEMPLATE % ( self.label, self.filled_char * x, self.empty_char * (self.width - x), progress, self.expected_size, self.etadisp)) STREAM.flush() def done(self): self.elapsed = time.time() - self.start elapsed_disp = self.format_time(self.elapsed) if not self.hide: # Print completed bar with elapsed time STREAM.write(BAR_TEMPLATE % ( self.label, self.filled_char * self.width, self.empty_char * 0, self.last_progress, self.expected_size, elapsed_disp)) STREAM.write('\n') STREAM.flush() def format_time(self, seconds): return time.strftime('%H:%M:%S', time.gmtime(seconds)) def bar(it, label='', width=32, hide=None, empty_char=BAR_EMPTY_CHAR, filled_char=BAR_FILLED_CHAR, expected_size=None, every=1): """Progress iterator. Wrap your iterables with it.""" count = len(it) if expected_size is None else expected_size with Bar(label=label, width=width, hide=hide, empty_char=BAR_EMPTY_CHAR, filled_char=BAR_FILLED_CHAR, expected_size=count, every=every) \ as bar: for i, item in enumerate(it): yield item bar.show(i + 1) def ordered_load(stream, Loader=yaml.Loader, object_pairs_hook=OrderedDict): class OrderedLoader(Loader): pass def construct_mapping(loader, node): loader.flatten_mapping(node) return object_pairs_hook(loader.construct_pairs(node)) OrderedLoader.add_constructor( yaml.resolver.BaseResolver.DEFAULT_MAPPING_TAG, construct_mapping) return yaml.load(stream, OrderedLoader) class DirectoryContext(object): """ Context Manager for changing directories """ def __init__(self, path): self.old_dir = os.getcwd() self.new_dir = path def __enter__(self): os.chdir(self.new_dir) def __exit__(self, *args): os.chdir(self.old_dir) def _url_to_binary_write(url, output_path, title): '''Given a url, output_path and title, write the contents of a requests get operation to the url in binary mode and print the title of operation''' print('Downloading {0}'.format(title)) resp = requests.get(url, stream=True) try: with open(output_path, 'wb') as f: total_length = int(resp.headers.get('content-length')) for chunk in bar(resp.iter_content(chunk_size=1024), expected_size=(total_length/1024) + 1, every=1000): if chunk: f.write(chunk) f.flush() except: # Don't leave a half-written zip file if os.path.exists(output_path): os.remove(output_path) raise def _extract_downloaded_archive(output_path): '''Extract a local archive, e.g. zip or tar, then delete the archive''' if output_path.endswith("tar.gz"): with tarfile.open(output_path, "r:gz") as tar: tar.extractall() os.remove(output_path) elif output_path.endswith("tar"): with tarfile.open(output_path, "r:") as tar: tar.extractall() os.remove(output_path) elif output_path.endswith("tar.bz2"): with tarfile.open(output_path, "r:bz2") as tar: tar.extractall() os.remove(output_path) elif output_path.endswith("zip"): with zipfile.ZipFile(output_path, 'r') as zipf: zipf.extractall() os.remove(output_path) def _process_dataset(dataset, output_dir, here, use_test_data=False, force=False): '''Process each download spec in datasets.yml Typically each dataset list entry in the yml has "files" and "url" and "title" keys/values to show local files that must be present / extracted from a decompression of contents downloaded from the url. If a url endswith '/', then all files given are assumed to be added to the url pattern at the end ''' if not os.path.exists(output_dir): os.makedirs(output_dir) with DirectoryContext(output_dir): requires_download = False for f in dataset.get('files', []): if not os.path.exists(f): requires_download = True break if force is False and not requires_download: print('Skipping {0}'.format(dataset['title'])) return url = dataset['url'] title_fmt = dataset['title'] + ' {} of {}' if url.endswith('/'): urls = [url + f for f in dataset['files']] output_paths = [os.path.join(here, DATA_DIR, fname) for fname in dataset['files']] unpacked = ['.'.join(output_path.split('.')[:(-2 if output_path.endswith('gz') else -1)]) + '*' for output_path in output_paths] else: urls = [url] output_paths = [os.path.split(url)[1]] unpacked = dataset['files'] if not isinstance(unpacked, (tuple, list)): unpacked = [unpacked] zipped = zip(urls, output_paths, unpacked) for idx, (url, output_path, unpack) in enumerate(zipped): running_title = title_fmt.format(idx + 1, len(urls)) if force is False and (glob.glob(unpack) or os.path.exists(unpack.replace('*',''))): # Skip a file if a similar one is downloaded: # i.e. one that has same name but dif't extension print('Skipping {0}'.format(running_title)) continue test = os.path.join(output_dir, DATA_STUBS_DIR, unpack) if use_test_data and os.path.exists(test): target = os.path.join(output_dir, unpack) print("Copying test data file '{0}' to '{1}'".format(test, target)) shutil.copyfile(test, target) continue elif use_test_data and not os.path.exists(test): print("No test file found for: {}. Using regular file instead".format(test)) _url_to_binary_write(url, output_path, running_title) _extract_downloaded_archive(output_path) if requests is None: print('this download script requires the requests module: conda install requests') sys.exit(1) def fetch_data(name,path,datasets="datasets.yml",require_datasets=True,use_test_data=False,force=False): '''Fetch sample datasets as defined by path/datasets if it exists or else module's own examples/datasets otherwise. Datasets are placed in path/data ''' path = os.path.abspath(path) info_file = os.path.join(path,datasets) if not os.path.exists(info_file): info_file = os.path.join(_find_examples(name),datasets) if not os.path.exists(info_file) and require_datasets is False: print("No datasets to download") return print("Fetching data defined in %s and placing in %s"%(info_file,os.path.join(path,DATA_DIR))) # data is added later... with open(info_file) as f: info = ordered_load(f.read()) for topic, downloads in info.items(): output_dir = os.path.join(path, topic) for d in downloads: _process_dataset(d, output_dir, path, use_test_data=use_test_data, force=force) def clean_data(name, path): '''Remove up any data files that are copied from test files ''' path = os.path.abspath(path) if not os.path.exists(path): path = _find_examples(name) data_dir = os.path.join(path, DATA_DIR) test_dir = os.path.join(data_dir, DATA_STUBS_DIR) if not os.path.exists(test_dir) or len(os.listdir(test_dir)) == 0: print("No test files found") return for f in os.listdir(test_dir): data_file = os.path.join(data_dir, f) if not os.path.isfile(data_file): print("Test file was not copied to data:", f) continue test_file = os.path.join(test_dir, f) if os.path.isfile(test_file): data_s = os.path.getsize(data_file) test_s = os.path.getsize(test_file) if data_s == test_s: print("Removing copied test file:", f) os.remove(data_file) else: print("Size of test file {:.2e} did not match " "size of data file {:.2e}".format(test_s, data_s)) # TODO: cmds=None defaults to 'all', basically, which is a bit confusing # the alternative is a plugin system? def add_commands(parser,name,cmds=None,args=None): # use to add commands to existing parser (see substitute_main for alternative) # TODO: should be cleaned up if cmds is None: # again a reg (duplicated in substitute_main) cmds = ['examples','copy-examples','fetch-data','clean-data'] # use dict/reg instead if 'copy-examples' in cmds: eg_parser = parser.add_parser('copy-examples', help=inspect.getdoc(copy_examples)) eg_parser.set_defaults(func=lambda args: copy_examples(name, args.path, args.verbose, args.force)) eg_parser.add_argument('--path',type=str,help='where to copy examples',default='%s-examples'%name) eg_parser.add_argument('-v', '--verbose', action='count', default=0) eg_parser.add_argument('--force', action='store_true', help='if PATH already exists, force overwrite existing files if older than source files') if 'fetch-data' in cmds: d_parser = parser.add_parser('fetch-data', help=inspect.getdoc(fetch_data)) d_parser.set_defaults(func=lambda args: fetch_data(name,args.path,args.datasets,use_test_data=args.use_test_data,force=args.force)) d_parser.add_argument('--path',type=str,help='where to put data',default='%s-examples'%name) d_parser.add_argument('--datasets',type=str,help='*name* of datasets file; must exist either in path specified by --path or in package/examples/',default='datasets.yml') d_parser.add_argument('-v', '--verbose', action='count', default=0) d_parser.add_argument('--force',action='store_true', help='Force any existing data files to be replaced') d_parser.add_argument('--use-test-data',action='store_true', help=("Use data's test files, if any, instead of fetching full data. " "If test file not in '.data_stubs', fall back to fetching full data.")) if 'examples' in cmds: egd_parser = parser.add_parser('examples', help=inspect.getdoc(examples)) egd_parser.set_defaults(func=lambda args: examples(name, args.path, args.verbose, args.use_test_data, args.force)) egd_parser.add_argument('--path',type=str,help='location to place examples and data',default='%s-examples'%name) egd_parser.add_argument('-v', '--verbose', action='count', default=0) egd_parser.add_argument('--force', action='store_true', help=('if PATH already exists, force overwrite existing examples if older ' 'than source examples. ALSO force any existing data files to be replaced')) egd_parser.add_argument('--use-test-data',action='store_true', help=("Use data's test files, if any, instead of fetching full data. " "If test file not in '.data_stubs', fall back to fetching full data.")) if 'clean-data' in cmds: cd_parser = parser.add_parser('clean-data', help=inspect.getdoc(clean_data)) cd_parser.set_defaults(func=lambda args: clean_data(name,args.path)) cd_parser.add_argument('--path',type=str,help='where to clean data',default='%s-examples'%name) def substitute_main(name,cmds=None,args=None): # can use if your module has no other commands if cmds is None: # again a reg cmds = ['examples','copy-examples','fetch-data', 'clean-data'] mod = importlib.import_module(name) parser = argparse.ArgumentParser(description="%s commands"%name) parser.add_argument('--version', action='version', version='%(prog)s '+mod.__version__) subparsers = parser.add_subparsers(title='available commands') add_commands(subparsers,name,cmds,args) args = parser.parse_args() args.func(args) if hasattr(args,'func') else parser.error("must supply command to run")