"""Utility functions, mostly for internal use.""" import os import re import inspect import warnings import colorsys from urllib.request import urlopen, urlretrieve import numpy as np from scipy import stats import pandas as pd import matplotlib as mpl import matplotlib.colors as mplcol import matplotlib.pyplot as plt from matplotlib.cbook import normalize_kwargs __all__ = ["desaturate", "saturate", "set_hls_values", "move_legend", "despine", "get_dataset_names", "get_data_home", "load_dataset"] def sort_df(df, *args, **kwargs): """Wrapper to handle different pandas sorting API pre/post 0.17.""" msg = "This function is deprecated and will be removed in a future version" warnings.warn(msg) try: return df.sort_values(*args, **kwargs) except AttributeError: return df.sort(*args, **kwargs) def ci_to_errsize(cis, heights): """Convert intervals to error arguments relative to plot heights. Parameters ---------- cis : 2 x n sequence sequence of confidence interval limits heights : n sequence sequence of plot heights Returns ------- errsize : 2 x n array sequence of error size relative to height values in correct format as argument for plt.bar """ cis = np.atleast_2d(cis).reshape(2, -1) heights = np.atleast_1d(heights) errsize = [] for i, (low, high) in enumerate(np.transpose(cis)): h = heights[i] elow = h - low ehigh = high - h errsize.append([elow, ehigh]) errsize = np.asarray(errsize).T return errsize def pmf_hist(a, bins=10): """Return arguments to plt.bar for pmf-like histogram of an array. DEPRECATED: will be removed in a future version. Parameters ---------- a: array-like array to make histogram of bins: int number of bins Returns ------- x: array left x position of bars h: array height of bars w: float width of bars """ msg = "This function is deprecated and will be removed in a future version" warnings.warn(msg, FutureWarning) n, x = np.histogram(a, bins) h = n / n.sum() w = x[1] - x[0] return x[:-1], h, w def _draw_figure(fig): """Force draw of a matplotlib figure, accounting for back-compat.""" # See https://github.com/matplotlib/matplotlib/issues/19197 for context fig.canvas.draw() if fig.stale: try: fig.draw(fig.canvas.get_renderer()) except AttributeError: pass def desaturate(color, prop): """Decrease the saturation channel of a color by some percent. Parameters ---------- color : matplotlib color hex, rgb-tuple, or html color name prop : float saturation channel of color will be multiplied by this value Returns ------- new_color : rgb tuple desaturated color code in RGB tuple representation """ # Check inputs if not 0 <= prop <= 1: raise ValueError("prop must be between 0 and 1") # Get rgb tuple rep rgb = mplcol.colorConverter.to_rgb(color) # Convert to hls h, l, s = colorsys.rgb_to_hls(*rgb) # Desaturate the saturation channel s *= prop # Convert back to rgb new_color = colorsys.hls_to_rgb(h, l, s) return new_color def saturate(color): """Return a fully saturated color with the same hue. Parameters ---------- color : matplotlib color hex, rgb-tuple, or html color name Returns ------- new_color : rgb tuple saturated color code in RGB tuple representation """ return set_hls_values(color, s=1) def set_hls_values(color, h=None, l=None, s=None): # noqa """Independently manipulate the h, l, or s channels of a color. Parameters ---------- color : matplotlib color hex, rgb-tuple, or html color name h, l, s : floats between 0 and 1, or None new values for each channel in hls space Returns ------- new_color : rgb tuple new color code in RGB tuple representation """ # Get an RGB tuple representation rgb = mplcol.colorConverter.to_rgb(color) vals = list(colorsys.rgb_to_hls(*rgb)) for i, val in enumerate([h, l, s]): if val is not None: vals[i] = val rgb = colorsys.hls_to_rgb(*vals) return rgb def axlabel(xlabel, ylabel, **kwargs): """Grab current axis and label it. DEPRECATED: will be removed in a future version. """ msg = "This function is deprecated and will be removed in a future version" warnings.warn(msg, FutureWarning) ax = plt.gca() ax.set_xlabel(xlabel, **kwargs) ax.set_ylabel(ylabel, **kwargs) def remove_na(vector): """Helper method for removing null values from data vectors. Parameters ---------- vector : vector object Must implement boolean masking with [] subscript syntax. Returns ------- clean_clean : same type as ``vector`` Vector of data with null values removed. May be a copy or a view. """ return vector[pd.notnull(vector)] def get_color_cycle(): """Return the list of colors in the current matplotlib color cycle Parameters ---------- None Returns ------- colors : list List of matplotlib colors in the current cycle, or dark gray if the current color cycle is empty. """ cycler = mpl.rcParams['axes.prop_cycle'] return cycler.by_key()['color'] if 'color' in cycler.keys else [".15"] def despine(fig=None, ax=None, top=True, right=True, left=False, bottom=False, offset=None, trim=False): """Remove the top and right spines from plot(s). fig : matplotlib figure, optional Figure to despine all axes of, defaults to the current figure. ax : matplotlib axes, optional Specific axes object to despine. Ignored if fig is provided. top, right, left, bottom : boolean, optional If True, remove that spine. offset : int or dict, optional Absolute distance, in points, spines should be moved away from the axes (negative values move spines inward). A single value applies to all spines; a dict can be used to set offset values per side. trim : bool, optional If True, limit spines to the smallest and largest major tick on each non-despined axis. Returns ------- None """ # Get references to the axes we want if fig is None and ax is None: axes = plt.gcf().axes elif fig is not None: axes = fig.axes elif ax is not None: axes = [ax] for ax_i in axes: for side in ["top", "right", "left", "bottom"]: # Toggle the spine objects is_visible = not locals()[side] ax_i.spines[side].set_visible(is_visible) if offset is not None and is_visible: try: val = offset.get(side, 0) except AttributeError: val = offset ax_i.spines[side].set_position(('outward', val)) # Potentially move the ticks if left and not right: maj_on = any( t.tick1line.get_visible() for t in ax_i.yaxis.majorTicks ) min_on = any( t.tick1line.get_visible() for t in ax_i.yaxis.minorTicks ) ax_i.yaxis.set_ticks_position("right") for t in ax_i.yaxis.majorTicks: t.tick2line.set_visible(maj_on) for t in ax_i.yaxis.minorTicks: t.tick2line.set_visible(min_on) if bottom and not top: maj_on = any( t.tick1line.get_visible() for t in ax_i.xaxis.majorTicks ) min_on = any( t.tick1line.get_visible() for t in ax_i.xaxis.minorTicks ) ax_i.xaxis.set_ticks_position("top") for t in ax_i.xaxis.majorTicks: t.tick2line.set_visible(maj_on) for t in ax_i.xaxis.minorTicks: t.tick2line.set_visible(min_on) if trim: # clip off the parts of the spines that extend past major ticks xticks = np.asarray(ax_i.get_xticks()) if xticks.size: firsttick = np.compress(xticks >= min(ax_i.get_xlim()), xticks)[0] lasttick = np.compress(xticks <= max(ax_i.get_xlim()), xticks)[-1] ax_i.spines['bottom'].set_bounds(firsttick, lasttick) ax_i.spines['top'].set_bounds(firsttick, lasttick) newticks = xticks.compress(xticks <= lasttick) newticks = newticks.compress(newticks >= firsttick) ax_i.set_xticks(newticks) yticks = np.asarray(ax_i.get_yticks()) if yticks.size: firsttick = np.compress(yticks >= min(ax_i.get_ylim()), yticks)[0] lasttick = np.compress(yticks <= max(ax_i.get_ylim()), yticks)[-1] ax_i.spines['left'].set_bounds(firsttick, lasttick) ax_i.spines['right'].set_bounds(firsttick, lasttick) newticks = yticks.compress(yticks <= lasttick) newticks = newticks.compress(newticks >= firsttick) ax_i.set_yticks(newticks) def move_legend(obj, loc, **kwargs): """ Recreate a plot's legend at a new location. The name is a slight misnomer. Matplotlib legends do not expose public control over their position parameters. So this function creates a new legend, copying over the data from the original object, which is then removed. Parameters ---------- obj : the object with the plot This argument can be either a seaborn or matplotlib object: - :class:`seaborn.FacetGrid` or :class:`seaborn.PairGrid` - :class:`matplotlib.axes.Axes` or :class:`matplotlib.figure.Figure` loc : str or int Location argument, as in :meth:`matplotlib.axes.Axes.legend`. kwargs Other keyword arguments are passed to :meth:`matplotlib.axes.Axes.legend`. Examples -------- .. include:: ../docstrings/move_legend.rst """ # This is a somewhat hackish solution that will hopefully be obviated by # upstream improvements to matplotlib legends that make them easier to # modify after creation. from seaborn.axisgrid import Grid # Avoid circular import # Locate the legend object and a method to recreate the legend if isinstance(obj, Grid): old_legend = obj.legend legend_func = obj.figure.legend elif isinstance(obj, mpl.axes.Axes): old_legend = obj.legend_ legend_func = obj.legend elif isinstance(obj, mpl.figure.Figure): if obj.legends: old_legend = obj.legends[-1] else: old_legend = None legend_func = obj.legend else: err = "`obj` must be a seaborn Grid or matplotlib Axes or Figure instance." raise TypeError(err) if old_legend is None: err = f"{obj} has no legend attached." raise ValueError(err) # Extract the components of the legend we need to reuse handles = old_legend.legendHandles labels = [t.get_text() for t in old_legend.get_texts()] # Extract legend properties that can be passed to the recreation method # (Vexingly, these don't all round-trip) legend_kws = inspect.signature(mpl.legend.Legend).parameters props = {k: v for k, v in old_legend.properties().items() if k in legend_kws} # Delegate default bbox_to_anchor rules to matplotlib props.pop("bbox_to_anchor") # Try to propagate the existing title and font properties; respect new ones too title = props.pop("title") if "title" in kwargs: title.set_text(kwargs.pop("title")) title_kwargs = {k: v for k, v in kwargs.items() if k.startswith("title_")} for key, val in title_kwargs.items(): title.set(**{key[6:]: val}) kwargs.pop(key) # Try to respect the frame visibility kwargs.setdefault("frameon", old_legend.legendPatch.get_visible()) # Remove the old legend and create the new one props.update(kwargs) old_legend.remove() new_legend = legend_func(handles, labels, loc=loc, **props) new_legend.set_title(title.get_text(), title.get_fontproperties()) # Let the Grid object continue to track the correct legend object if isinstance(obj, Grid): obj._legend = new_legend def _kde_support(data, bw, gridsize, cut, clip): """Establish support for a kernel density estimate.""" support_min = max(data.min() - bw * cut, clip[0]) support_max = min(data.max() + bw * cut, clip[1]) support = np.linspace(support_min, support_max, gridsize) return support def percentiles(a, pcts, axis=None): """Like scoreatpercentile but can take and return array of percentiles. DEPRECATED: will be removed in a future version. Parameters ---------- a : array data pcts : sequence of percentile values percentile or percentiles to find score at axis : int or None if not None, computes scores over this axis Returns ------- scores: array array of scores at requested percentiles first dimension is length of object passed to ``pcts`` """ msg = "This function is deprecated and will be removed in a future version" warnings.warn(msg, FutureWarning) scores = [] try: n = len(pcts) except TypeError: pcts = [pcts] n = 0 for i, p in enumerate(pcts): if axis is None: score = stats.scoreatpercentile(a.ravel(), p) else: score = np.apply_along_axis(stats.scoreatpercentile, axis, a, p) scores.append(score) scores = np.asarray(scores) if not n: scores = scores.squeeze() return scores def ci(a, which=95, axis=None): """Return a percentile range from an array of values.""" p = 50 - which / 2, 50 + which / 2 return np.nanpercentile(a, p, axis) def sig_stars(p): """Return a R-style significance string corresponding to p values. DEPRECATED: will be removed in a future version. """ msg = "This function is deprecated and will be removed in a future version" warnings.warn(msg, FutureWarning) if p < 0.001: return "***" elif p < 0.01: return "**" elif p < 0.05: return "*" elif p < 0.1: return "." return "" def iqr(a): """Calculate the IQR for an array of numbers. DEPRECATED: will be removed in a future version. """ msg = "This function is deprecated and will be removed in a future version" warnings.warn(msg, FutureWarning) a = np.asarray(a) q1 = stats.scoreatpercentile(a, 25) q3 = stats.scoreatpercentile(a, 75) return q3 - q1 def get_dataset_names(): """Report available example datasets, useful for reporting issues. Requires an internet connection. """ url = "https://github.com/mwaskom/seaborn-data" with urlopen(url) as resp: html = resp.read() pat = r"/mwaskom/seaborn-data/blob/master/(\w*).csv" datasets = re.findall(pat, html.decode()) return datasets def get_data_home(data_home=None): """Return a path to the cache directory for example datasets. This directory is then used by :func:`load_dataset`. If the ``data_home`` argument is not specified, it tries to read from the ``SEABORN_DATA`` environment variable and defaults to ``~/seaborn-data``. """ if data_home is None: data_home = os.environ.get('SEABORN_DATA', os.path.join('~', 'seaborn-data')) data_home = os.path.expanduser(data_home) if not os.path.exists(data_home): os.makedirs(data_home) return data_home def load_dataset(name, cache=True, data_home=None, **kws): """Load an example dataset from the online repository (requires internet). This function provides quick access to a small number of example datasets that are useful for documenting seaborn or generating reproducible examples for bug reports. It is not necessary for normal usage. Note that some of the datasets have a small amount of preprocessing applied to define a proper ordering for categorical variables. Use :func:`get_dataset_names` to see a list of available datasets. Parameters ---------- name : str Name of the dataset (``{name}.csv`` on https://github.com/mwaskom/seaborn-data). cache : boolean, optional If True, try to load from the local cache first, and save to the cache if a download is required. data_home : string, optional The directory in which to cache data; see :func:`get_data_home`. kws : keys and values, optional Additional keyword arguments are passed to passed through to :func:`pandas.read_csv`. Returns ------- df : :class:`pandas.DataFrame` Tabular data, possibly with some preprocessing applied. """ # A common beginner mistake is to assume that one's personal data needs # to be passed through this function to be usable with seaborn. # Let's provide a more helpful error than you would otherwise get. if isinstance(name, pd.DataFrame): err = ( "This function accepts only strings (the name of an example dataset). " "You passed a pandas DataFrame. If you have your own dataset, " "it is not necessary to use this function before plotting." ) raise TypeError(err) url = f"https://raw.githubusercontent.com/mwaskom/seaborn-data/master/{name}.csv" if cache: cache_path = os.path.join(get_data_home(data_home), os.path.basename(url)) if not os.path.exists(cache_path): if name not in get_dataset_names(): raise ValueError(f"'{name}' is not one of the example datasets.") urlretrieve(url, cache_path) full_path = cache_path else: full_path = url df = pd.read_csv(full_path, **kws) if df.iloc[-1].isnull().all(): df = df.iloc[:-1] # Set some columns as a categorical type with ordered levels if name == "tips": df["day"] = pd.Categorical(df["day"], ["Thur", "Fri", "Sat", "Sun"]) df["sex"] = pd.Categorical(df["sex"], ["Male", "Female"]) df["time"] = pd.Categorical(df["time"], ["Lunch", "Dinner"]) df["smoker"] = pd.Categorical(df["smoker"], ["Yes", "No"]) if name == "flights": months = df["month"].str[:3] df["month"] = pd.Categorical(months, months.unique()) if name == "exercise": df["time"] = pd.Categorical(df["time"], ["1 min", "15 min", "30 min"]) df["kind"] = pd.Categorical(df["kind"], ["rest", "walking", "running"]) df["diet"] = pd.Categorical(df["diet"], ["no fat", "low fat"]) if name == "titanic": df["class"] = pd.Categorical(df["class"], ["First", "Second", "Third"]) df["deck"] = pd.Categorical(df["deck"], list("ABCDEFG")) if name == "penguins": df["sex"] = df["sex"].str.title() if name == "diamonds": df["color"] = pd.Categorical( df["color"], ["D", "E", "F", "G", "H", "I", "J"], ) df["clarity"] = pd.Categorical( df["clarity"], ["IF", "VVS1", "VVS2", "VS1", "VS2", "SI1", "SI2", "I1"], ) df["cut"] = pd.Categorical( df["cut"], ["Ideal", "Premium", "Very Good", "Good", "Fair"], ) return df def axis_ticklabels_overlap(labels): """Return a boolean for whether the list of ticklabels have overlaps. Parameters ---------- labels : list of matplotlib ticklabels Returns ------- overlap : boolean True if any of the labels overlap. """ if not labels: return False try: bboxes = [l.get_window_extent() for l in labels] overlaps = [b.count_overlaps(bboxes) for b in bboxes] return max(overlaps) > 1 except RuntimeError: # Issue on macos backend raises an error in the above code return False def axes_ticklabels_overlap(ax): """Return booleans for whether the x and y ticklabels on an Axes overlap. Parameters ---------- ax : matplotlib Axes Returns ------- x_overlap, y_overlap : booleans True when the labels on that axis overlap. """ return (axis_ticklabels_overlap(ax.get_xticklabels()), axis_ticklabels_overlap(ax.get_yticklabels())) def locator_to_legend_entries(locator, limits, dtype): """Return levels and formatted levels for brief numeric legends.""" raw_levels = locator.tick_values(*limits).astype(dtype) # The locator can return ticks outside the limits, clip them here raw_levels = [l for l in raw_levels if l >= limits[0] and l <= limits[1]] class dummy_axis: def get_view_interval(self): return limits if isinstance(locator, mpl.ticker.LogLocator): formatter = mpl.ticker.LogFormatter() else: formatter = mpl.ticker.ScalarFormatter() formatter.axis = dummy_axis() # TODO: The following two lines should be replaced # once pinned matplotlib>=3.1.0 with: # formatted_levels = formatter.format_ticks(raw_levels) formatter.set_locs(raw_levels) formatted_levels = [formatter(x) for x in raw_levels] return raw_levels, formatted_levels def relative_luminance(color): """Calculate the relative luminance of a color according to W3C standards Parameters ---------- color : matplotlib color or sequence of matplotlib colors Hex code, rgb-tuple, or html color name. Returns ------- luminance : float(s) between 0 and 1 """ rgb = mpl.colors.colorConverter.to_rgba_array(color)[:, :3] rgb = np.where(rgb <= .03928, rgb / 12.92, ((rgb + .055) / 1.055) ** 2.4) lum = rgb.dot([.2126, .7152, .0722]) try: return lum.item() except ValueError: return lum def to_utf8(obj): """Return a string representing a Python object. Strings (i.e. type ``str``) are returned unchanged. Byte strings (i.e. type ``bytes``) are returned as UTF-8-decoded strings. For other objects, the method ``__str__()`` is called, and the result is returned as a string. Parameters ---------- obj : object Any Python object Returns ------- s : str UTF-8-decoded string representation of ``obj`` """ if isinstance(obj, str): return obj try: return obj.decode(encoding="utf-8") except AttributeError: # obj is not bytes-like return str(obj) def _normalize_kwargs(kws, artist): """Wrapper for mpl.cbook.normalize_kwargs that supports <= 3.2.1.""" _alias_map = { 'color': ['c'], 'linewidth': ['lw'], 'linestyle': ['ls'], 'facecolor': ['fc'], 'edgecolor': ['ec'], 'markerfacecolor': ['mfc'], 'markeredgecolor': ['mec'], 'markeredgewidth': ['mew'], 'markersize': ['ms'] } try: kws = normalize_kwargs(kws, artist) except AttributeError: kws = normalize_kwargs(kws, _alias_map) return kws def _check_argument(param, options, value): """Raise if value for param is not in options.""" if value not in options: raise ValueError( f"`{param}` must be one of {options}, but {value} was passed.`" ) def _assign_default_kwargs(kws, call_func, source_func): """Assign default kwargs for call_func using values from source_func.""" # This exists so that axes-level functions and figure-level functions can # both call a Plotter method while having the default kwargs be defined in # the signature of the axes-level function. # An alternative would be to have a decorator on the method that sets its # defaults based on those defined in the axes-level function. # Then the figure-level function would not need to worry about defaults. # I am not sure which is better. needed = inspect.signature(call_func).parameters defaults = inspect.signature(source_func).parameters for param in needed: if param in defaults and param not in kws: kws[param] = defaults[param].default return kws def adjust_legend_subtitles(legend): """Make invisible-handle "subtitles" entries look more like titles.""" # Legend title not in rcParams until 3.0 font_size = plt.rcParams.get("legend.title_fontsize", None) hpackers = legend.findobj(mpl.offsetbox.VPacker)[0].get_children() for hpack in hpackers: draw_area, text_area = hpack.get_children() handles = draw_area.get_children() if not all(artist.get_visible() for artist in handles): draw_area.set_width(0) for text in text_area.get_children(): if font_size is not None: text.set_size(font_size)