from collections import defaultdict from .sympify import sympify, SympifyError from sympy.utilities.iterables import iterable, uniq __all__ = ['default_sort_key', 'ordered'] def default_sort_key(item, order=None): """Return a key that can be used for sorting. The key has the structure: (class_key, (len(args), args), exponent.sort_key(), coefficient) This key is supplied by the sort_key routine of Basic objects when ``item`` is a Basic object or an object (other than a string) that sympifies to a Basic object. Otherwise, this function produces the key. The ``order`` argument is passed along to the sort_key routine and is used to determine how the terms *within* an expression are ordered. (See examples below) ``order`` options are: 'lex', 'grlex', 'grevlex', and reversed values of the same (e.g. 'rev-lex'). The default order value is None (which translates to 'lex'). Examples ======== >>> from sympy import S, I, default_sort_key, sin, cos, sqrt >>> from sympy.core.function import UndefinedFunction >>> from sympy.abc import x The following are equivalent ways of getting the key for an object: >>> x.sort_key() == default_sort_key(x) True Here are some examples of the key that is produced: >>> default_sort_key(UndefinedFunction('f')) ((0, 0, 'UndefinedFunction'), (1, ('f',)), ((1, 0, 'Number'), (0, ()), (), 1), 1) >>> default_sort_key('1') ((0, 0, 'str'), (1, ('1',)), ((1, 0, 'Number'), (0, ()), (), 1), 1) >>> default_sort_key(S.One) ((1, 0, 'Number'), (0, ()), (), 1) >>> default_sort_key(2) ((1, 0, 'Number'), (0, ()), (), 2) While sort_key is a method only defined for SymPy objects, default_sort_key will accept anything as an argument so it is more robust as a sorting key. For the following, using key= lambda i: i.sort_key() would fail because 2 doesn't have a sort_key method; that's why default_sort_key is used. Note, that it also handles sympification of non-string items likes ints: >>> a = [2, I, -I] >>> sorted(a, key=default_sort_key) [2, -I, I] The returned key can be used anywhere that a key can be specified for a function, e.g. sort, min, max, etc...: >>> a.sort(key=default_sort_key); a[0] 2 >>> min(a, key=default_sort_key) 2 Note ---- The key returned is useful for getting items into a canonical order that will be the same across platforms. It is not directly useful for sorting lists of expressions: >>> a, b = x, 1/x Since ``a`` has only 1 term, its value of sort_key is unaffected by ``order``: >>> a.sort_key() == a.sort_key('rev-lex') True If ``a`` and ``b`` are combined then the key will differ because there are terms that can be ordered: >>> eq = a + b >>> eq.sort_key() == eq.sort_key('rev-lex') False >>> eq.as_ordered_terms() [x, 1/x] >>> eq.as_ordered_terms('rev-lex') [1/x, x] But since the keys for each of these terms are independent of ``order``'s value, they do not sort differently when they appear separately in a list: >>> sorted(eq.args, key=default_sort_key) [1/x, x] >>> sorted(eq.args, key=lambda i: default_sort_key(i, order='rev-lex')) [1/x, x] The order of terms obtained when using these keys is the order that would be obtained if those terms were *factors* in a product. Although it is useful for quickly putting expressions in canonical order, it does not sort expressions based on their complexity defined by the number of operations, power of variables and others: >>> sorted([sin(x)*cos(x), sin(x)], key=default_sort_key) [sin(x)*cos(x), sin(x)] >>> sorted([x, x**2, sqrt(x), x**3], key=default_sort_key) [sqrt(x), x, x**2, x**3] See Also ======== ordered, sympy.core.expr.Expr.as_ordered_factors, sympy.core.expr.Expr.as_ordered_terms """ from .basic import Basic from .singleton import S if isinstance(item, Basic): return item.sort_key(order=order) if iterable(item, exclude=str): if isinstance(item, dict): args = item.items() unordered = True elif isinstance(item, set): args = item unordered = True else: # e.g. tuple, list args = list(item) unordered = False args = [default_sort_key(arg, order=order) for arg in args] if unordered: # e.g. dict, set args = sorted(args) cls_index, args = 10, (len(args), tuple(args)) else: if not isinstance(item, str): try: item = sympify(item, strict=True) except SympifyError: # e.g. lambda x: x pass else: if isinstance(item, Basic): # e.g int -> Integer return default_sort_key(item) # e.g. UndefinedFunction # e.g. str cls_index, args = 0, (1, (str(item),)) return (cls_index, 0, item.__class__.__name__ ), args, S.One.sort_key(), S.One def _node_count(e): # this not only counts nodes, it affirms that the # args are Basic (i.e. have an args property). If # some object has a non-Basic arg, it needs to be # fixed since it is intended that all Basic args # are of Basic type (though this is not easy to enforce). if e.is_Float: return 0.5 return 1 + sum(map(_node_count, e.args)) def _nodes(e): """ A helper for ordered() which returns the node count of ``e`` which for Basic objects is the number of Basic nodes in the expression tree but for other objects is 1 (unless the object is an iterable or dict for which the sum of nodes is returned). """ from .basic import Basic from .function import Derivative if isinstance(e, Basic): if isinstance(e, Derivative): return _nodes(e.expr) + sum(i[1] if i[1].is_Number else _nodes(i[1]) for i in e.variable_count) return _node_count(e) elif iterable(e): return 1 + sum(_nodes(ei) for ei in e) elif isinstance(e, dict): return 1 + sum(_nodes(k) + _nodes(v) for k, v in e.items()) else: return 1 def ordered(seq, keys=None, default=True, warn=False): """Return an iterator of the seq where keys are used to break ties in a conservative fashion: if, after applying a key, there are no ties then no other keys will be computed. Two default keys will be applied if 1) keys are not provided or 2) the given keys do not resolve all ties (but only if ``default`` is True). The two keys are ``_nodes`` (which places smaller expressions before large) and ``default_sort_key`` which (if the ``sort_key`` for an object is defined properly) should resolve any ties. If ``warn`` is True then an error will be raised if there were no keys remaining to break ties. This can be used if it was expected that there should be no ties between items that are not identical. Examples ======== >>> from sympy import ordered, count_ops >>> from sympy.abc import x, y The count_ops is not sufficient to break ties in this list and the first two items appear in their original order (i.e. the sorting is stable): >>> list(ordered([y + 2, x + 2, x**2 + y + 3], ... count_ops, default=False, warn=False)) ... [y + 2, x + 2, x**2 + y + 3] The default_sort_key allows the tie to be broken: >>> list(ordered([y + 2, x + 2, x**2 + y + 3])) ... [x + 2, y + 2, x**2 + y + 3] Here, sequences are sorted by length, then sum: >>> seq, keys = [[[1, 2, 1], [0, 3, 1], [1, 1, 3], [2], [1]], [ ... lambda x: len(x), ... lambda x: sum(x)]] ... >>> list(ordered(seq, keys, default=False, warn=False)) [[1], [2], [1, 2, 1], [0, 3, 1], [1, 1, 3]] If ``warn`` is True, an error will be raised if there were not enough keys to break ties: >>> list(ordered(seq, keys, default=False, warn=True)) Traceback (most recent call last): ... ValueError: not enough keys to break ties Notes ===== The decorated sort is one of the fastest ways to sort a sequence for which special item comparison is desired: the sequence is decorated, sorted on the basis of the decoration (e.g. making all letters lower case) and then undecorated. If one wants to break ties for items that have the same decorated value, a second key can be used. But if the second key is expensive to compute then it is inefficient to decorate all items with both keys: only those items having identical first key values need to be decorated. This function applies keys successively only when needed to break ties. By yielding an iterator, use of the tie-breaker is delayed as long as possible. This function is best used in cases when use of the first key is expected to be a good hashing function; if there are no unique hashes from application of a key, then that key should not have been used. The exception, however, is that even if there are many collisions, if the first group is small and one does not need to process all items in the list then time will not be wasted sorting what one was not interested in. For example, if one were looking for the minimum in a list and there were several criteria used to define the sort order, then this function would be good at returning that quickly if the first group of candidates is small relative to the number of items being processed. """ d = defaultdict(list) if keys: if not isinstance(keys, (list, tuple)): keys = [keys] keys = list(keys) f = keys.pop(0) for a in seq: d[f(a)].append(a) else: if not default: raise ValueError('if default=False then keys must be provided') d[None].extend(seq) for k in sorted(d.keys()): if len(d[k]) > 1: if keys: d[k] = ordered(d[k], keys, default, warn) elif default: d[k] = ordered(d[k], (_nodes, default_sort_key,), default=False, warn=warn) elif warn: u = list(uniq(d[k])) if len(u) > 1: raise ValueError( 'not enough keys to break ties: %s' % u) yield from d[k] d.pop(k)