""" Utility functions that are unrelated to datashape Do not import datashape modules into this module. See util.py in that case """ from __future__ import print_function, division, absolute_import import keyword import re class IndexCallable(object): """ Provide getitem syntax for functions >>> def inc(x): ... return x + 1 >>> I = IndexCallable(inc) >>> I[3] 4 """ __slots__ = 'fn', def __init__(self, fn): self.fn = fn def __getitem__(self, key): return self.fn(key) def remove(predicate, seq): return filter(lambda x: not predicate(x), seq) # Taken from theano/theano/gof/sched.py # Avoids licensing issues because this was written by Matthew Rocklin def reverse_dict(d): """Reverses direction of dependence dict >>> d = {'a': (1, 2), 'b': (2, 3), 'c':()} >>> reverse_dict(d) # doctest: +SKIP {1: ('a',), 2: ('a', 'b'), 3: ('b',)} :note: dict order are not deterministic. As we iterate on the input dict, it make the output of this function depend on the dict order. So this function output order should be considered as undeterministic. """ result = {} for key in d: for val in d[key]: result[val] = result.get(val, tuple()) + (key, ) return result # Taken from theano/theano/gof/sched.py # Avoids licensing issues because this was written by Matthew Rocklin def _toposort(edges): """ Topological sort algorithm by Kahn [1] - O(nodes + vertices) inputs: edges - a dict of the form {a: {b, c}} where b and c depend on a outputs: L - an ordered list of nodes that satisfy the dependencies of edges >>> _toposort({1: (2, 3), 2: (3, )}) [1, 2, 3] Closely follows the wikipedia page [2] [1] Kahn, Arthur B. (1962), "Topological sorting of large networks", Communications of the ACM [2] http://en.wikipedia.org/wiki/Toposort#Algorithms """ incoming_edges = reverse_dict(edges) incoming_edges = dict((k, set(val)) for k, val in incoming_edges.items()) S = set((v for v in edges if v not in incoming_edges)) L = [] while S: n = S.pop() L.append(n) for m in edges.get(n, ()): assert n in incoming_edges[m] incoming_edges[m].remove(n) if not incoming_edges[m]: S.add(m) if any(incoming_edges.get(v) for v in edges): raise ValueError("Input has cycles") return L # Taken from toolz # Avoids licensing issues because this version was authored by Matthew Rocklin def groupby(func, seq): """ Group a collection by a key function >>> names = ['Alice', 'Bob', 'Charlie', 'Dan', 'Edith', 'Frank'] >>> groupby(len, names) # doctest: +SKIP {3: ['Bob', 'Dan'], 5: ['Alice', 'Edith', 'Frank'], 7: ['Charlie']} >>> iseven = lambda x: x % 2 == 0 >>> groupby(iseven, [1, 2, 3, 4, 5, 6, 7, 8]) {False: [1, 3, 5, 7], True: [2, 4, 6, 8]} See Also: ``countby`` """ d = dict() for item in seq: key = func(item) if key not in d: d[key] = list() d[key].append(item) return d def isidentifier(s): return (keyword.iskeyword(s) or re.match(r'^[_a-zA-Z][_a-zA-Z0-9]*$', s) is not None)