Source code for betterplotlib.axes_bpl

from matplotlib.axes import Axes
from matplotlib import colors as mpl_colors
from matplotlib import path, rcParams, ticker
import matplotlib.patheffects as PathEffects
from scipy import optimize, integrate
import numpy as np

from . import colors
from . import tools
from . import type_checking



[docs]
class Axes_bpl(Axes):
    name = "bpl"

    def __init__(self, *args, **kwargs):
        # The only reason to call __init__ is to store the index of the
        # color cycle. Then we'll call the parent __init__
        self.scatter_color_idx = 0
        super(Axes_bpl, self).__init__(*args, **kwargs)


[docs]
    def make_ax_dark(self, grid=True, minor_ticks=False):
        """
        Turns an axis into one with a dark background with white gridlines.

        This will turn an axis into one with a slightly light gray background,
        and with solid white gridlines. All the axes spines are removed (so
        there isn't any outline), and the ticks are removed too.

        :param grid: Whether or not to draw the grid. Defaults to True.
        :type grid: bool
        :param minor_ticks: Whether or not to add minor ticks. They will be
                            drawn as dotted lines, rather than solid lines in
                            the axes space. If `grid` is False then this
                            parameter does not matter.
        :type minor_ticks: bool
        :return: None

        .. plot::
            :include-source:

            import betterplotlib as bpl
            bpl.set_style()

            fig, (ax0, ax1) = bpl.subplots(figsize=[12, 5], ncols=2)
            ax1.make_ax_dark()
            ax0.set_title("Regular")
            ax1.set_title("Dark")


        """
        self.set_facecolor(colors.light_gray)
        if grid:
            self.grid(which="major", color="w", linestyle="-", linewidth=0.5)
            if minor_ticks:
                self.minorticks_on()
                self.grid(which="minor", color="w", linestyle=":", linewidth=0.5)

        self.set_axisbelow(True)  # moves gridlines below the points

        # remove all outer splines
        self.remove_spines("all")



[docs]
    def remove_ticks(self, *ticks_to_remove):
        """
        Removes ticks from the given locations.

        In some situations, ticks aren't needed or wanted. Note that this
        doesn't remove the spine itself, or the labels on that axis.

        Note that this can break when used with the various `remove_*()`
        functions. Order matters with these calls, presumably due to something
        with the way matplotlib works under the hood. Mess around with it if
        you're having trouble.

        :param ticks_to_remove: locations where ticks need to be removed from. Choose
                                from: "all, "top", "bottom", "left", or "right",
                                and pass in as many as you'd like
        :return: None

        .. plot::
            :include-source:

            import betterplotlib as bpl
            bpl.set_style()

            fig, (ax0, ax1) = bpl.subplots(ncols=2, figsize=[10, 5])

            ax0.plot([0, 1, 2], [0, 1, 2])
            ax1.plot([0, 1, 2], [0, 1, 2])

            ax0.remove_ticks("top", "right")
            ax1.remove_ticks("all")

            ax0.set_title("removed top/right ticks")
            ax1.set_title("removed all ticks")
        """
        # If they want to remove all spines, turn that into workable infomation
        ticks_to_remove = set(ticks_to_remove)  # to remove duplicates
        if "all" in ticks_to_remove:
            # have to do weirdness since its a set
            ticks_to_remove.remove("all")
            for tick in ["left", "right", "top", "bottom"]:
                ticks_to_remove.add(tick)

        # matplotlib only allows setting which axes the ticks are on, so figure
        # that out and set the ticks to only be on the desired axes.
        if "left" in ticks_to_remove and "right" in ticks_to_remove:
            self.yaxis.set_ticks_position("none")
        elif "left" in ticks_to_remove:
            self.yaxis.set_ticks_position("right")
        elif "right" in ticks_to_remove:
            self.yaxis.set_ticks_position("left")

        if "top" in ticks_to_remove and "bottom" in ticks_to_remove:
            self.xaxis.set_ticks_position("none")
        elif "top" in ticks_to_remove:
            self.xaxis.set_ticks_position("bottom")
        elif "bottom" in ticks_to_remove:
            self.xaxis.set_ticks_position("top")



[docs]
    def remove_spines(self, *spines_to_remove):
        """
        Remove spines from the axis.

        Spines are the lines on the side of the axes. In many situations, these
        are not needed, and are just junk. Calling this function will remove
        the specified spines from an axes object. Note that it does not remove
        the tick labels if they are visible for that axis.

        Note that this function can mess up if you call this function multiple
        times with the same axes object, due to the way matplotlib works under
        the hood. I haven't really tested it extensively (since I have never
        wanted to call it more than once), but I think the last function call
        is the one that counts. Calling this multiple times on the same axes
        would be pointless, though, since you can specify multiple axes in one
        call. If you really need to call it multiple times and it is breaking,
        let me know and I can try to fix it. This also can break when used with
        the  various `remove_*()` functions. Order matters with these calls,
        for some reason.

        :param spines_to_remove: The desired spines to remove. Can
                                 choose from "all", "top", "bottom", "left",
                                 or "right".
        :return: None

        .. plot::
            :include-source:

            import betterplotlib as bpl
            bpl.set_style()

            fig, (ax0, ax1) = bpl.subplots(ncols=2, figsize=[10, 5])

            ax0.plot([0, 1, 2], [0, 1, 2])
            ax1.plot([0, 1, 2], [0, 1, 2])

            ax0.remove_spines("top", "right")
            ax1.remove_spines("all")

            ax0.set_title("removed top/right spines")
            ax1.set_title("removed all spines")

        """
        # If they want to remove all spines, turn that into workable infomation
        spines_to_remove = set(spines_to_remove)  # to remove duplicates
        if "all" in spines_to_remove:
            spines_to_remove.remove("all")
            for spine in ["left", "right", "top", "bottom"]:
                spines_to_remove.add(spine)

        # remove the spines
        for spine in spines_to_remove:
            self.spines[spine].set_visible(False)

        # remove the ticks that correspond the the splines removed
        self.remove_ticks(*spines_to_remove)



[docs]
    def scatter(self, *args, **kwargs):
        """
        Makes a scatter plot that looks nicer than the matplotlib default.

        The call works just like a call to plt.scatter. It will set a few
        default parameters, but anything you pass in will override the default
        parameters. This function also uses the color cycle, unlike the default
        scatter.

        It also automatically determines a guess at the proper alpha
        (transparency) of the points in the plot.

        NOTE: the `c` parameter tells just the facecolor of the points, while
        `color` specifies the whole color of the point, including the edge line
        color. This follows the default matplotlib scatter implementation.

        :param args: non-keyword arguments that will be passed on to the
                     plt.scatter function. These will typically be the x and y
                     lists.
        :param kwargs: keyword arguments that will be passed on to plt.scatter.
        :return: the output of the plt.scatter call is returned directly.

        .. plot::
            :include-source:

            import betterplotlib as bpl
            import numpy as np
            bpl.set_style()

            x = np.random.normal(0, scale=0.5, size=500)
            y = np.random.normal(0, scale=0.5, size=500)

            for dx in [0, 0.5, 1]:
                bpl.scatter(x + dx, y + dx)
            bpl.equal_scale()

        """
        # put x and y in args if they're not already there
        if len(args) == 0:
            args = kwargs.pop("x"), kwargs.pop("y")
        # get the color, if it hasn't already been set
        if "color" not in kwargs and "c" not in kwargs and "facecolor" not in kwargs:
            # get the default color cycle, and get the next color.
            color_cycle = rcParams["axes.prop_cycle"].by_key()["color"]
            kwargs["c"] = color_cycle[self.scatter_color_idx]
            # move to the next
            self.scatter_color_idx = (self.scatter_color_idx + 1) % len(color_cycle)

        # set other parameters, if they haven't been set already
        # I use setdefault to do that, which puts the values in if they don't
        # already exist, but won't overwrite anything.
        # use the function we defined to get the proper alpha value.
        kwargs.setdefault("alpha", tools._alpha(len(args[0])))

        # we want to make the points in the legend opaque always. To do this
        # we plot nans with all the same parameters, but with alpha of one.
        if "label" in kwargs:
            # we don't want to plot any data here, so exclude the data if
            # it exists. We'll exclude the "x" and "y" kwargs below, too
            if len(args) >= 2:
                label_args = args[2:]
            # we need to process the kwargs a little before plotting the fake
            # data, so make a copy of them
            label_kwargs = kwargs.copy()
            # exclude any plotted data, if it is in a kwarg
            label_kwargs.pop("x", None)
            label_kwargs.pop("y", None)
            # set the alpha to one, which is the whole point
            label_kwargs["alpha"] = 1.0
            # we can then plot the fake data. Due to weirdness in matplotlib, we
            # have to plot a two element NaN list.
            super(Axes_bpl, self).scatter(
                [np.nan, np.nan], [np.nan, np.nan], *label_args, **label_kwargs
            )
            # in the main plotting we don't want to have a label, so we pop it.
            kwargs.pop("label")

        # we then plot the main data
        return super(Axes_bpl, self).scatter(*args, **kwargs)



[docs]
    def hist(self, *args, **kwargs):
        """
        A better histogram function. Also supports relative frequency plots, bin
        size, and hatching better than the default matplotlib implementation.

        Everything is the same as the default matplotlib implementation, with
        the exception a few keyword parameters. `rel_freq` makes the histogram a
        relative frequency plot and `bin_size` controls the width of each bin.

        :param args: non-keyword arguments that will be passed on to the
                     plt.hist() function. These will typically be the list of
                     values.
        :keyword rel_freq: Whether or not to plot the histogram as a relative
                           frequency histogram. Note that this plots the
                           relative frequency of each bin compared to the whole
                           sample. Even if your range excludes some of the data,
                           it will still be included in the relative frequency
                           calculation.
        :type rel_freq: bool
        :keyword bin_size: The width of the bins in the histogram. The bin
                           boundaries will start at zero, and will be integer
                           multiples of bin_size from there. Specify either
                           this, or bins, but not both.
        :type bin_size: float
        :keyword kwargs: additional controls that will be passed on through to
                         the plt.hist() function.
        :return: same output as plt.hist()

        Examples:

        The basic histogram should look nicer than the default histogram.

        .. plot::
            :include-source:

            import betterplotlib as bpl
            import matplotlib.pyplot as plt
            import numpy as np

            bpl.set_style()

            data = np.random.normal(0, 2, 10000)

            fig = plt.figure(figsize=[15, 7])
            ax1 = fig.add_subplot(121)
            ax2 = fig.add_subplot(122, projection="bpl")

            ax1.hist(data)
            ax2.hist(data)

            ax1.set_title("matplotlib")
            ax2.add_labels(title="betterplotlib")

        There are also plenty of options that make other histograms look nice
        too.

        .. plot::
            :include-source:

            import betterplotlib as bpl
            import numpy as np
            bpl.set_style()

            data1 = np.random.normal(-6, 1, size=10000)
            data2 = np.random.normal(-2, 1, size=10000)
            data3 = np.random.normal(2, 1, size=10000)
            data4 = np.random.normal(6, 1, size=10000)
            bin_size = 0.5
            bpl.hist(
                data1,
                rel_freq=True,
                bin_size=bin_size,
            )
            bpl.hist(
                data2,
                rel_freq=True,
                bin_size=bin_size,
                histtype="step",
                linewidth=5,
            )
            bpl.hist(
                data3,
                rel_freq=True,
                bin_size=bin_size,
                histtype="stepfilled",
                hatch="o",
                alpha=0.8,
            )
            bpl.hist(
                data4,
                rel_freq=True,
                bin_size=bin_size,
                histtype="step",
                hatch="x",
                linewidth=4,
            )

            bpl.add_labels(y_label="Relative Frequency")

        """
        # I like white as an edgecolor if we use bars.
        if "histtype" not in kwargs or kwargs["histtype"] != "step":
            kwargs.setdefault("edgecolor", "white")

        # do the relative frequency business if we need to
        if kwargs.pop("rel_freq", False):
            # check that they didn't set weights, since that's what I'll change
            if "weights" in kwargs:
                raise ValueError(
                    "The `weights` keyword can't be used with "
                    "`rel_freq`, since `rel_freq` works by "
                    "modifying the weights."
                )
            if "density" in kwargs:
                raise ValueError("The `weights` keyword can't be used with `density`")

            # the data will be the first arg.
            data = args[0]
            # we weight each item by 1/total items.
            kwargs["weights"] = [1.0 / len(data)] * len(data)

        # if they didn't specify the binning, use our binning
        if "bin_size" in kwargs and "bins" in kwargs:
            raise ValueError(
                "The `bins` and `bin_size` keywords cannot be "
                "used together. Use `bins` if you want to "
                "pass your own bins, or use `bin_size` to "
                "have the code determine its own bins. "
            )
        # the setdefault function calls the second argument no matter what
        # This is a problem if the user's data has no IQR, since the
        # rounded bin width function will raise an error. We'll do the check
        # in a less elegant way
        if "bins" not in kwargs:
            if "bin_size" not in kwargs:
                kwargs["bin_size"] = tools.rounded_bin_width(args[0])
            kwargs["bins"] = tools._binning(
                min(args[0]), max(args[0]), kwargs.pop("bin_size")
            )

        # plot the histogram, and keep the results
        return super(Axes_bpl, self).hist(*args, **kwargs)



[docs]
    def add_labels(self, x_label=None, y_label=None, title=None, *args, **kwargs):
        """
        Adds labels to the x and y axis, plus a title.

        Addition properties will be passed the all single label creations,
        so any properties will be applied to all. If you want the title to be
        different, for example, don't include it here.

        :param x_label: label for the x axis
        :type x_label: str
        :param y_label: label for the y axis
        :type y_label: str
        :param title: title for the given axis
        :type title: str
        :param args: additional properties that will be passed on to all the
                     labels you asked for.
        :param kwargs: additional keyword arguments that will be passed on to
                       all the labels you make.
        :return: None

        Example:

        .. plot::
            :include-source:

            import betterplotlib as bpl
            import numpy as np
            bpl.set_style()

            xs = np.arange(0, 10, 0.1)
            ys = xs**2

            fig, ax = bpl.subplots()
            ax.plot(xs, ys)
            ax.add_labels("X value", "Y value", "Title")
        """
        if x_label is not None:
            self.set_xlabel(x_label, *args, **kwargs)
        if y_label is not None:
            self.set_ylabel(y_label, *args, **kwargs)
        if title is not None:
            self.set_title(title, *args, **kwargs)



[docs]
    def set_limits(self, x_min=None, x_max=None, y_min=None, y_max=None, **kwargs):
        """
        Set axes limits for both x and y axis at once.

        Any additional kwargs will be passed on to the matplotlib functions
        that set the limits, so refer to that documentation to find the
        allowed parameters.

        :param x_min: minimum x value to be plotted
        :type x_min: int, float
        :param x_max: maximum x value to be plotted
        :type x_max: int, float
        :param y_min: minimum y value to be plotted
        :type y_min: int, float
        :param y_max: maximum y value to be plotted
        :type y_max: int, float
        :param kwargs: Kwargs for the set_limits() functions.
        :return: none.

        Example:

        .. plot::
            :include-source:

            import betterplotlib as bpl
            import numpy as np
            bpl.set_style()

            xs = np.arange(0, 10, 0.01)
            ys = np.cos(xs)

            fig, [ax1, ax2] = bpl.subplots(ncols=2)

            ax1.plot(xs, ys)

            ax2.plot(xs, ys)
            ax2.set_limits(0, 2*np.pi, -1.1, 1.1)
        """
        # Any None values won't change the plot any.
        self.set_xlim([x_min, x_max], **kwargs)
        self.set_ylim([y_min, y_max], **kwargs)



[docs]
    def add_text(
        self, x, y, text, coords="data", border_color=None, border_linewidth=3, **kwargs
    ):
        """
        Adds text to the specified location. Allows for easy specification of
        the type of coordinates you are specifying.

        Matplotlib allows the text to be in data or axes coordinates, but it's
        hard to remember the command for that. This fixes that. The param
        `coords` takes care of that.

        The x and y locations can be specified in either data or axes coords.
        If data coords are used, the text is placed at that data point. If axes
        coords are used, the text is placed relative to the axes. (0,0) is the
        bottom left, (1,1) is the top right. Remember to use the
        horizontalalignment and verticalalignment parameters if it isn't quite
        in the spot you expect.

        Also consider using easy_add_text, which gives 9 possible location to
        add text with minimal consternation.

        :param x: x location of the text to be added.
        :type x: int, float
        :param y: y location of the text to be added.
        :type y: int, float
        :param text: text to be added
        :type text: str
        :param coords: type of coordinates. This parameter can be either 'data'
        or 'axes'. 'data' puts the text at that data point. 'axes' puts the
        text in that location relative the axes. See above.
        :type coords: str
        :param border_color: An optional color to add a border around the text added.
                             This is useful for making text more easily visible against
                             a colorful background
        :type border_color: str
        :param border_linewidth: the width of the border added around the text
        :type border_linewidth: int
        :param kwargs: any additional keyword arguments to pass on the text
                       function. Pass things you would pass to plt.text()
        :return: Same as output of plt.text().

        Example:

        .. plot::
            :include-source:

            import betterplotlib as bpl
            import numpy as np
            bpl.set_style()

            xs = np.random.normal(0, 1, 1000)
            ys = np.random.normal(0, 1, 1000)

            bpl.density_contourf(xs, ys, bin_size=0.1, smoothing=0.5)
            bpl.set_limits(-4, 4, -4, 4)
            bpl.equal_scale()
            bpl.add_text(-3, 3, "(-3, 3) data")
            bpl.add_text(0.7, 0.1, "70% across, 10% up", "axes")
            bpl.add_text(
                0,
                0,
                "(0, 0) data, black border",
                color="white",
                border_color=bpl.almost_black,
                border_linewidth=3,
            )

        """

        # this function takes care of the transform keyword already, so don't
        # allow the user to specify it.
        if "transform" in kwargs:
            raise ValueError(
                "add_text takes care of the transform for you when"
                " you specify coords. \n"
                "Don't specify transform in this function."
            )

        # set the proper coordinate transformation
        if coords == "data":
            transform = self.transData
        elif coords == "axes":
            transform = self.transAxes
        else:
            raise ValueError("`coords` must be either 'data' or 'axes'")
        # putting it in kwargs makes it easier to pass on.
        kwargs["transform"] = transform

        # automatically center text on added location. I can't use setdefault for this
        # since there are two names for the same thing
        if "ha" not in kwargs and "horizontalalignment" not in kwargs:
            kwargs["ha"] = "center"
        if "va" not in kwargs and "verticalalignment" not in kwargs:
            kwargs["va"] = "center"

        # add the text
        text = self.text(x, y, text, **kwargs)

        # then add the highlighting
        if border_color is not None:
            text.set_path_effects(
                [
                    PathEffects.withStroke(
                        linewidth=border_linewidth, foreground=border_color
                    )
                ]
            )

        return text



[docs]
    def remove_labels(self, labels_to_remove):
        """
        Removes the labels and tick marks from an axis border.

        This is useful for making conceptual plots where the numbers on the axis
        don't matter. Axes labels still work, also.

        Note that this can break when used with the various `remove_*()`
        functions. Order matters with these calls, presumably due to something
        with the way matplotlib works under the hood. Mess around with it if
        you're having trouble.

        :param labels_to_remove: location of labels to remove. Choose from:
                                 "both", "x", or "y".
        :type labels_to_remove: str
        :return: None

        Example:

        .. plot::
            :include-source:

            import betterplotlib as bpl
            import numpy as np
            bpl.set_style()

            xs = np.arange(0, 5, 0.1)
            ys = xs**2

            fig, ax = bpl.subplots()

            ax.plot(xs, ys)

            ax.remove_labels("y")
            ax.remove_ticks("top")
            ax.add_labels("Conceptual plot", "Axes labels still work")

        """
        # validate their input
        if labels_to_remove not in ["both", "x", "y"]:
            raise ValueError('Please pass in either "x", "y", or "both".')

        # then set the tick parameters.
        self.tick_params(
            axis=labels_to_remove,
            bottom=False,
            top=False,
            left=False,
            right=False,
            labelbottom=False,
            labeltop=False,
            labelleft=False,
            labelright=False,
        )



[docs]
    def legend(self, linewidth=0, *args, **kwargs):
        """
        Create a nicer looking legend.

        Works by calling the ax.legend() function with the given args and
        kwargs. If some are not specified, they will be filled with values that
        make the legend look nice.

        :param linewidth: linewidth of the border of the legend. Defaults to
                          zero.
        :type linewidth: float
        :param args: non-keyword arguments passed on to the ax.legend() fuction.
        :param kwargs: keyword arguments that will be passed on to the
                       ax.legend() function. This will be things like loc,
                       and title, etc.
        :return: legend object returned by the ax.legend() function.

        The default legend is a transparent background with no border, like so.

        .. plot::
            :include-source:

            import numpy as np
            import betterplotlib as bpl
            import matplotlib.pyplot as plt
            bpl.set_style()

            x = np.arange(0, 5, 0.1)

            fig = plt.figure(figsize=[15, 7])
            ax1 = fig.add_subplot(121)
            ax2 = fig.add_subplot(122, projection="bpl")  # bpl subplot.

            for ax in [ax1, ax2]:
                ax.plot(x, x, label="x")
                ax.plot(x, 2*x, label="2x")
                ax.plot(x, 3*x, label="3x")
                ax.legend(loc=2)

            ax1.set_title("matplotlib")
            ax2.set_title("betterplotlib")

        You can still pass in any kwargs to the legend function you want.

        .. plot::
            :include-source:

            import betterplotlib as bpl
            import numpy as np

            bpl.set_style()

            x = np.arange(0, 5, 0.1)

            bpl.plot(x, x, label="x")
            bpl.plot(x, 2*x, label="2x")
            bpl.plot(x, 3*x, label="3x")
            bpl.legend(fontsize=20, loc=6, title="Title")
        """

        # push the legend a little farther away from the edge.
        kwargs.setdefault("borderaxespad", 0.75)
        kwargs.setdefault("fontsize", rcParams["legend.fontsize"])

        leg = super(Axes_bpl, self).legend(*args, **kwargs)

        # set title fontsize properly
        if "title" in kwargs:
            title = leg.get_title()
            title.set_fontsize(kwargs["fontsize"] * 1.2)

        # adjust the size of points within the legend
        for handle in leg.legend_handles:
            # if it's not a handle for a scatter plot, setting the sizes will fail
            try:
                handle.set_sizes([100])
            except:
                continue

        if leg is not None:
            # turn the background into whatever color it needs to be
            frame = leg.get_frame()
            frame.set_linewidth(linewidth)

        return leg



[docs]
    def equal_scale(self):
        """
        Makes the x and y axes have the same scale.

        Useful for plotting things like ra and dec, something with the same
        quantity on both axes, or anytime the x and y axis have the same scale. It also
        works when both axes are in log (making one dex the same on both axes) or when
        only one axis is in log (one dex = one unit in linear space).

        It's really one one command, but it's one I have a hard time
        remembering.

        Note that this keeps the range the same from the plot as before, so you
        may want to adjust the limits to make the plot look better. It will
        keep the axes adjusted the same, though, no matter how you change the
        limits afterward.

        :return: None

        Examples:

        .. plot::
            :include-source:

            import betterplotlib as bpl
            import numpy as np

            bpl.set_style()

            # make a Gaussian with more spread in y direction
            xs = np.random.normal(0, 1, 1000)
            ys = np.random.normal(0, 2, 1000)

            fig, [ax1, ax2] = bpl.subplots(figsize=[12, 5], ncols=2)

            ax1.scatter(xs, ys)
            ax2.scatter(xs, ys)

            ax2.equal_scale()

            ax1.add_labels(title="Looks symmetric")
            ax2.add_labels(title="Shows true shape")

        Here is proof that changing the limits don't change the scaling between
        the axes.

        .. plot::
            :include-source:

            import betterplotlib as bpl
            import numpy as np

            bpl.set_style()

            # make a Gaussian with more spread in y direction
            xs = np.random.normal(0, 1, 1000)
            ys = np.random.normal(0, 2, 1000)

            fig, [ax1, ax2] = bpl.subplots(figsize=[12, 5], ncols=2)

            ax1.scatter(xs, ys)
            ax2.scatter(xs, ys)

            ax1.equal_scale()
            ax2.equal_scale()

            ax1.set_limits(-10, 10, -4, 4)
            ax2.set_limits(-5, 5, -10, 10)

        And here's a demonstration of using this with log scaled axes

        .. plot::
            :include-source:

            import betterplotlib as bpl
            import numpy as np

            bpl.set_style()

            xs = np.random.normal(0, 1, 1000)
            ys = 10 ** np.random.normal(0, 0.5, 1000)

            fig, ax = bpl.subplots()

            ax.scatter(xs, ys)
            ax.log("y")
            ax.set_limits(-3, 3, 10**-3, 10**3)
            ax.equal_scale()

        """
        self.set_aspect("equal", adjustable="box")



[docs]
    def easy_add_text(self, text, location, **kwargs):
        """
        Adds text in common spots easily.

        This was inspired by the plt.legend() function and its loc parameter,
        which allows for easy placement of legends. This does a similar thing,
        but just for text.

        This can take any additional keyword aguments that can be passed to `add_text`,
        other than `coords`.

        VERY IMPORTANT NOTE: Although this works similar to plt.legend()'s loc
        parameter, the numbering is NOT the same. My numbering is based on the
        keypad. 1 is in the bottom left, 5 in the center, and 9 in the top
        right. You can also specify words that tell the location.

        :param text: Text to add to the axes.
        :type text: str
        :param location: Location to add the text. This can be specified two
                         in two possible ways. You can pass an integer, which
                         puts the text at the location corresponding to that
                         number's location on a standard keyboard numpad.
                         You can also pass a string that describe the location.
                         'upper', 'center', and 'lower' describe the vertical
                         location, and 'left', 'center', and 'right' describe
                         the horizontal location. You need to specify vertical,
                         then horizontal, like 'upper right'. Note that
                         'center' is the code for the center, not
                         'center center'.
        :type location: str, int
        :param kwargs: additional text parameters that will be passed on to the
                       plt.text() function. Note that this function controls the
                       x and y location, as well as the horizonatl and vertical
                       alignment, so do not pass those parameters.
        :return: Same as output of plt.text()

        Example:

        There are two ways to specify the location, and we will demo both.

        .. plot::
            :include-source:

            import betterplotlib as bpl
            bpl.set_style()

            bpl.easy_add_text("1", 1)
            bpl.easy_add_text("2", 2)
            bpl.easy_add_text("3", 3)
            bpl.easy_add_text("4", 4)
            bpl.easy_add_text("5", 5)
            bpl.easy_add_text("6", 6)
            bpl.easy_add_text("7", 7)
            bpl.easy_add_text("8", 8)
            bpl.easy_add_text("9", 9)

        .. plot::
            :include-source:

            import betterplotlib as bpl
            bpl.set_style()

            bpl.easy_add_text("upper left", "upper left")
            bpl.easy_add_text("upper center", "upper center")
            bpl.easy_add_text("upper right", "upper right")
            bpl.easy_add_text("center left", "center left")
            bpl.easy_add_text("center", "center")
            bpl.easy_add_text("center right", "center right")
            bpl.easy_add_text("lower left", "lower left")
            bpl.easy_add_text("lower center", "lower center")
            bpl.easy_add_text("lower right", "lower right")


        """
        # check that the user didn't specify parameters I want to control.
        if (
            "ha" in kwargs
            or "va" in kwargs
            or "horizontalalignment" in kwargs
            or "verticalalignment" in kwargs
            or "coords" in kwargs
        ):
            raise ValueError("This function controls the alignment. Do not pass it in.")

        # then check each different case, and set the parameters we want to use.
        if location == 1 or location == "lower left":
            x_value = 0.04
            y_value = 0.04
            kwargs["horizontalalignment"] = "left"
            kwargs["verticalalignment"] = "bottom"
        elif location == 2 or location == "lower center":
            x_value = 0.5
            y_value = 0.04
            kwargs["horizontalalignment"] = "center"
            kwargs["verticalalignment"] = "bottom"
        elif location == 3 or location == "lower right":
            x_value = 0.96
            y_value = 0.04
            kwargs["horizontalalignment"] = "right"
            kwargs["verticalalignment"] = "bottom"
        elif location == 4 or location == "center left":
            x_value = 0.04
            y_value = 0.5
            kwargs["horizontalalignment"] = "left"
            kwargs["verticalalignment"] = "center"
        elif location == 5 or location == "center":
            x_value = 0.5
            y_value = 0.5
            kwargs["horizontalalignment"] = "center"
            kwargs["verticalalignment"] = "center"
        elif location == 6 or location == "center right":
            x_value = 0.96
            y_value = 0.5
            kwargs["horizontalalignment"] = "right"
            kwargs["verticalalignment"] = "center"
        elif location == 7 or location == "upper left":
            x_value = 0.04
            y_value = 0.96
            kwargs["horizontalalignment"] = "left"
            kwargs["verticalalignment"] = "top"
        elif location == 8 or location == "upper center":
            x_value = 0.5
            y_value = 0.96
            kwargs["horizontalalignment"] = "center"
            kwargs["verticalalignment"] = "top"
        elif location == 9 or location == "upper right":
            x_value = 0.96
            y_value = 0.96
            kwargs["horizontalalignment"] = "right"
            kwargs["verticalalignment"] = "top"
        else:
            raise ValueError("loc was not specified properly.")

        # then add the text.
        return self.add_text(x_value, y_value, text, coords="axes", **kwargs)


    def _density_contour_core(
        self,
        xs,
        ys,
        bin_size=None,
        percent_levels=None,
        smoothing=0,
        weights=None,
        log=False,
        labels=False,
        filled=False,
        **kwargs,
    ):
        """
        The underlying function to do both filled and unfilled contours. Call
        `density_contour` or `density_contourf` instead of this.

        :param xs: List of x values
        :type xs: list, np.ndarray
        :param ys: List of y values
        :type ys: list, np.ndarray
        :param bin_size: Bin size to use for the underlying 2D histogram. This
                         can either be a scalar, in which case the bin size will
                         be the same in both the x dimensions, or else a two
                         element list, where the first element will be the
                         bin size in the x dimension, and the second will be
                         the bin size in the y dimension.
        :type bin_size: int, float, list
        :param percent_levels: A list describing the levels of the contours that
                               will be drawn. Each value in this list contains
                               a float between zero and 1 (inclusive) that
                               describes how much of that data will be enclosed
                               by a contour. So if you pass [0.25, 0.5, 0.75],
                               there will be three contours drawn, that enclose
                               25%, 50%, and 75% of the data. If this is not
                               passed in, the default is
                               [0.25, 0.5, 0.75, 0.95].
        :type percent_levels: float, list
        :param smoothing: Optional parameter that will allow the contours to be
                          smoothed. Pass in a nonzero value, which will be the
                          standard deviation of the Gaussian kernel use to smooth
                          the histogram. When using this, often choosing smaller
                          bin sizes is advantageous to make a less grainy plot.
                          Has the same format as padding and bin_size, so different
                          smoothing kernels are possible in the x and y directions.
        :type smoothing: int, float, list
        :param weights: A list containing weights for each data point. If these
                        are not passed, all data points will be weighted
                        equally.
        :type weights: list, np.ndarray
        :param log: Whether or not to do the smoothing and bin creation in log
                    space. This should be used if the plot will be done on
                    log-scaled axes.Can either be a single bool, in which case the
                    x and y scales will both be log (or not), or a two element
                    array, where the first is whether the x axis is log, and the
                    second is y. If this is used, the bin_size and smoothing
                    parameters will be interpreted as dex, rather than raw values.
        :type log: bool, list
        :param labels: Whether or not to label the individual contour lines
                       with their percentage level.
        :type labels: bool
        :param filled: True will use filled contours, or False to use hollow ones.
        :type filled: bool
        :param kwargs: Additional keyword arguments to pass on to the original
                       matplotlib contour function.
        :return: output of the matplotlib.contour function.
        """
        # error check weird error matplotlib has when all x and y data are same.
        if len(set(xs)) == len(set(ys)) == 1 and smoothing == 0:
            raise ValueError(
                "All points are identical. This breaks matplotlib "
                "contours for some reason. "
                "Try other data, or smooth."
            )
        # levels is set by this function, so it can't be in there
        if "levels" in kwargs:
            raise ValueError(
                "The levels parameter is set by this function. " "Do not pass it in. "
            )
        # if smoothing is not specified, we still want some padding on the
        # outside so the contours aren't cut off.
        if smoothing == 0:
            try:
                padding = [
                    2 * tools._freedman_diaconis(xs),
                    2 * tools._freedman_diaconis(ys),
                ]
            except ValueError:  # data length too short, will handle in 2d hist
                padding = 0
        else:
            padding = tools._padding_from_smoothing(smoothing)
        hist, x_e, y_e = tools.smart_hist_2d(
            xs,
            ys,
            bin_size,
            padding=padding,
            weights=weights,
            smoothing=smoothing,
            log=log,
        )
        x_cen = tools.bin_centers(x_e)
        y_cen = tools.bin_centers(y_e)

        # then get the levels of the contours
        if percent_levels is None:
            percent_levels = [0, 0.25, 0.5, 0.75, 0.95]
        else:
            not_list_msg = "Percent_levels needs to be a numeric list."
            percent_levels = type_checking.numeric_list_1d(percent_levels, not_list_msg)
            # add zero level to have center region full
            percent_levels = np.insert(percent_levels, 0, 0)

        levels = tools.percentile_level(hist.flatten(), percent_levels)
        # then check that the levels are increasing and without duplicates
        if len(set(levels)) < len(levels):
            raise ValueError(
                "The percent levels chosen lead to duplicate "
                "levels.\nContour levels must be increasing."
            )
        kwargs["levels"] = levels

        # set the normalization to ignore the central dummy level. But I don't want
        # either level to be at the edge of the colormap, since those are often white
        # or black
        vmin = levels[0]
        vmax = levels[-2]
        vmin -= 0.1 * (vmax - vmin)
        vmax += 0.1 * (vmax - vmin)
        kwargs["norm"] = mpl_colors.Normalize(vmin=vmin, vmax=vmax)

        if not filled:
            kwargs.setdefault("zorder", 3)
            kwargs.setdefault("linewidths", 2)
            contours = super(Axes_bpl, self).contour(x_cen, y_cen, hist, **kwargs)
        else:
            kwargs.setdefault("zorder", 2)
            contours = super(Axes_bpl, self).contourf(x_cen, y_cen, hist, **kwargs)

        if labels:
            # need to order the percent_levels properly (from high to low)
            percent_levels = sorted(percent_levels)[::-1]
            label_percents = percent_levels + [0]
            # needed since there is one hidden coutour at the very center.
            label_dict = {
                l: "{:.1f}%".format(percent * 100)
                for l, percent in zip(levels, label_percents)
            }

            self.clabel(contours, fmt=label_dict, fontsize=16)

        return contours


[docs]
    def density_contour(
        self,
        xs,
        ys,
        bin_size=None,
        percent_levels=None,
        smoothing=0,
        weights=None,
        log=False,
        labels=False,
        **kwargs,
    ):
        """
        Creates contours over a 2D histogram of data density.

        Here you pass in the location and weights of all data points, then
        this will calculate the 2D histogram with smartly chosen bin size,
        and put contours over the top of that histogram.

        These contours are just lines, not filled regions. Check out
        `density_contourf()` for that.

        :param xs: list of x values
        :type xs: list, ndarray
        :param ys: list of y values
        :type ys: list, ndarray
        :param bin_size: Bin size to use for the underlying 2D histogram. This
                         can either be a scalar, in which case the bin size will
                         be the same in both the x dimensions, or else a two
                         element list, where the first element will be the
                         bin size in the x dimension, and the second will be
                         the bin size in the y dimension.
        :type bin_size: int, float, list
        :param percent_levels: A list describing the levels of the contours that
                               will be drawn. Each value in this list contains
                               a float between zero and 1 (inclusive) that
                               describes how much of that data will be enclosed
                               by a contour. So if you pass [0.25, 0.5, 0.75],
                               there will be three contours drawn, that enclose
                               25%, 50%, and 75% of the data. If this is not
                               passed in, the default is
                               [0.25, 0.5, 0.75, 0.95].
        :type percent_levels: float, list
        :param smoothing: Optional parameter that will allow the contours to be
                          smoothed. Pass in a nonzero value, which will be the
                          standard deviation of the Gaussian kernel use to smooth
                          the histogram. When using this, often choosing smaller
                          bin sizes is advantageous to make a less grainy plot.
                          Has the same format as padding and bin_size, so different
                          smoothing kernels are possible in the x and y directions.
        :type smoothing: int, float, list
        :param weights: A list containing weights for each data point. If these
                        are not passed, all data points will be weighted
                        equally.
        :type weights: list, np.ndarray
        :param log: Whether or not to do the smoothing and bin creation in log
                    space. This should be used if the plot will be done on
                    log-scaled axes.Can either be a single bool, in which case the
                    x and y scales will both be log (or not), or a two element
                    array, where the first is whether the x axis is log, and the
                    second is y. If this is used, the bin_size and smoothing
                    parameters will be interpreted as dex, rather than raw values.
        :type log: bool, list
        :param labels: Whether or not to label the individual contour lines
                       with their percentage level.
        :type labels: bool
        :param kwargs: Additional keyword arguments to pass on to the original
                       matplotlib contour function.
        :return: output of the matplotlib.contour function.

        .. plot::
            :include-source:

            import betterplotlib as bpl
            import numpy as np

            bpl.set_style()

            xs = np.concatenate(
                [
                    np.random.normal(3, 2, 1000),
                    np.random.normal(7, 2, 1000),
                ]
            )
            ys = np.concatenate(
                [
                    np.random.normal(7, 2, 1000),
                    np.random.normal(3, 2, 1000),
                ]
            )

            bpl.density_contour(xs, ys, bin_size=0.01, smoothing=0.5, cmap="inferno")
            bpl.set_limits(0, 10, 0, 10)
            bpl.equal_scale()

        .. plot::
            :include-source:

            import betterplotlib as bpl
            import numpy as np

            bpl.set_style()

            xs = np.concatenate(
                [
                    np.random.normal(3, 2, 1000),
                    np.random.normal(7, 2, 1000),
                ]
            )
            ys = 10 ** np.concatenate(
                [
                    np.random.normal(7, 2, 1000),
                    np.random.normal(3, 2, 1000),
                ]
            )

            fig, ax = bpl.subplots()
            ax.density_contour(
                xs, ys, bin_size=0.01, smoothing=0.5, log=[False, True], cmap="inferno"
            )
            ax.log("y")
            ax.set_limits(0, 10, 1, 1e10)
            bpl.equal_scale()
        """
        return self._density_contour_core(
            xs,
            ys,
            bin_size=bin_size,
            percent_levels=percent_levels,
            smoothing=smoothing,
            weights=weights,
            log=log,
            labels=labels,
            filled=False,
            **kwargs,
        )



[docs]
    def density_contourf(
        self,
        xs,
        ys,
        bin_size=None,
        percent_levels=None,
        smoothing=0,
        weights=None,
        log=False,
        **kwargs,
    ):
        """
        Creates filled contours over a 2D histogram of data density.

        Here you pass in the location and weights of all data points, then
        this will calculate the 2D histogram with smartly chosen bin size,
        and put contours over the top of that histogram.

        These contours are just filled regions with no lines. Check out
        `density_contour()` for that.

        :param xs: list of x values
        :type xs: list, ndarray
        :param ys: list of y values
        :type ys: list, ndarray
        :param bin_size: Bin size to use for the underlying 2D histogram. This
                         can either be a scalar, in which case the bin size will
                         be the same in both the x dimensions, or else a two
                         element list, where the first element will be the
                         bin size in the x dimension, and the second will be
                         the bin size in the y dimension.
        :type bin_size: int, float, list
        :param percent_levels: A list describing the levels of the contours that
                               will be drawn. Each value in this list contains
                               a float between zero and 1 (inclusive) that
                               describes how much of that data will be enclosed
                               by a contour. So if you pass [0.25, 0.5, 0.75],
                               there will be three contours drawn, that enclose
                               25%, 50%, and 75% of the data. If this is not
                               passed in, the default is
                               [0.25, 0.5, 0.75, 0.95].
        :type percent_levels: float, list
        :param smoothing: Optional parameter that will allow the contours to be
                          smoothed. Pass in a nonzero value, which will be the
                          standard deviation of the Gaussian kernel use to smooth
                          the histogram. When using this, often choosing smaller
                          bin sizes is advantageous to make a less grainy plot.
                          Has the same format as padding and bin_size, so different
                          smoothing kernels are possible in the x and y directions.
        :type smoothing: int, float, list
        :param weights: A list containing weights for each data point. If these
                        are not passed, all data points will be weighted
                        equally.
        :type weights: list, np.ndarray
        :param log: Whether or not to do the smoothing and bin creation in log
                    space. This should be used if the plot will be done on
                    log-scaled axes.Can either be a single bool, in which case the
                    x and y scales will both be log (or not), or a two element
                    array, where the first is whether the x axis is log, and the
                    second is y. If this is used, the bin_size and smoothing
                    parameters will be interpreted as dex, rather than raw values.
        :type log: bool, list
        :param kwargs: Additional keyword arguments to pass on to the original
                       matplotlib contour function.
        :return: output of the matplotlib.contourf function.

        .. plot::
            :include-source:

            import betterplotlib as bpl
            import numpy as np

            bpl.set_style()

            xs = np.concatenate(
                [
                    np.random.normal(3, 2, 1000),
                    np.random.normal(7, 2, 1000),
                ]
            )
            ys = np.concatenate(
                [
                    np.random.normal(7, 2, 1000),
                    np.random.normal(3, 2, 1000),
                ]
            )

            bpl.density_contourf(xs, ys, bin_size=0.01, smoothing=0.5, cmap="inferno")
            bpl.set_limits(0, 10, 0, 10)
            bpl.equal_scale()

        .. plot::
            :include-source:

            import betterplotlib as bpl
            import numpy as np

            bpl.set_style()

            xs = 10 ** np.concatenate(
                [
                    np.random.normal(3, 2, 1000),
                    np.random.normal(7, 2, 1000),
                ]
            )
            ys = np.concatenate(
                [
                    np.random.normal(7, 2, 1000),
                    np.random.normal(3, 2, 1000),
                ]
            )

            fig, ax = bpl.subplots()
            ax.density_contourf(
                xs, ys, bin_size=0.01, smoothing=0.5, log=[True, False], cmap="inferno"
            )
            ax.log("x")
            ax.set_limits(1, 1e10, 0, 10)
            bpl.equal_scale()
        """
        # don't let user use the labels param here like they can in contour
        if "labels" in kwargs:
            raise ValueError("Filled contours cannot have labels.")
        return self._density_contour_core(
            xs,
            ys,
            bin_size=bin_size,
            percent_levels=percent_levels,
            smoothing=smoothing,
            weights=weights,
            log=log,
            labels=False,
            filled=True,
            **kwargs,
        )



[docs]
    def contour_scatter(
        self,
        xs,
        ys,
        bin_size=None,
        percent_levels=None,
        smoothing=0,
        weights=None,
        labels=False,
        fill_cmap="white",
        scatter_kwargs=None,
        contour_kwargs=None,
        contourf_kwargs=None,
    ):
        """
        Create a contour plot with scatter points in the sparse regions.

        When a dataset is large, plotting a scatterplot is often really hard to
        understand, due to many points overlapping and the high density of
        points overall. A contour or hexbin plot solves many of these problems,
        but these still have the disadvantage of making outliers less obvious.
        A simple solution is to plot contours in the dense regions, while
        plotting individual points where the density is low. That is what
        this function does.

        Here's how this works under the hood. Skip this paragraph if you don't
        care; it won't affect how you use this. This function uses the numpy
        2D histogram function to create an array representing the density in
        each region. If no binning info is specified by the user, the
        Freedman-Diaconis algorithm is used in both dimensions to find the
        ideal bin size for the data. First, an opaque filled contour is
        plotted, then the contour lines are put on top. Then the outermost
        contour is made into a matplotlib path object, which lets us
        check which of the points are outside of this contour. Only the points
        that are outside are plotted.

        :param xs: list of x values
        :type xs: list, ndarray
        :param ys: list of y values
        :type ys: list, ndarray
        :param bin_size: Bin size to use for the underlying 2D histogram. This
                         can either be a scalar, in which case the bin size will
                         be the same in both the x dimensions, or else a two
                         element list, where the first element will be the
                         bin size in the x dimension, and the second will be
                         the bin size in the y dimension.
        :type bin_size: int, float, list
        :param percent_levels: A list describing the levels of the contours that
                               will be drawn. Each value in this list contains
                               a float between zero and 1 (inclusive) that
                               describes how much of that data will be enclosed
                               by a contour. So if you pass [0.25, 0.5, 0.75],
                               there will be three contours drawn, that enclose
                               25%, 50%, and 75% of the data. If this is not
                               passed in, the default is
                               [0.25, 0.5, 0.75, 0.95].
        :type percent_levels: float, list
        :param smoothing: Optional parameter that will allow the contours to be
                          smoothed. Pass in a nonzero value, which will be the
                          standard deviation of the Gaussian kernel use to smooth
                          the histogram. When using this, often choosing smaller
                          bin sizes is advantageous to make a less grainy plot.
                          Has the same format as padding and bin_size, so different
                          smoothing kernels are possible in the x and y directions.
        :type smoothing: int, float, list
        :param weights: A list containing weights for each data point. If these
                        are not passed, all data points will be weighted
                        equally.
        :type weights: list, np.ndarray
        :param labels: Whether or not to label the individual contour lines
                       with their percentage level.
        :type labels: bool
        :param fill_cmap: The colormap used for the filled regions. Can be
                          a strong with any named matplotlib colormap or a
                          colormap object. In addition, there are some special
                          strings that can be used. "white", which is just a
                          solid white fill, is the default.  "background_grey"
                          gives a solid fill that is the same color as the
                          make_ax_dark() background. "modified_greys" is a
                          colormap that starts at the "background_grey" color,
                          then transitions to black.
        :type fill_cmap: str, matplotlib.colors.LinearSegmentedColormap
        :param scatter_kwargs: Dictionary of additional parameters that will be
                               passed to the underlying matplotlib scatter
                               function used for points in the outer regions.
        :type scatter_kwargs: dict
        :param contour_kwargs: Dictionary of additional parameters that will be
                               passed to the underlying matplotlib contour
                               function.
        :type contour_kwargs: dict
        :param contourf_kwargs: Dictionary of additional parameters that will be
                                passed to the underlying matplotlib contourf
                                function.
        :type contourf_kwargs: dict

        Examples

        First, we'll show why this plot is useful. This won't use any of the
        fancy settings, other than `bin_size`, which is used to make the
        contours look nicer.

        .. plot::
            :include-source:

            import numpy as np
            import betterplotlib as bpl

            bpl.set_style()

            xs = np.concatenate(
                [
                    np.random.normal(0, 1, 100000),
                    np.random.normal(3, 1, 100000),
                    np.random.normal(0, 1, 100000),
                ]
            )
            ys = np.concatenate(
                [
                    np.random.normal(0, 1, 100000),
                    np.random.normal(3, 1, 100000),
                    np.random.normal(3, 1, 100000),
                ]
            )

            fig, (ax1, ax2) = bpl.subplots(ncols=2, figsize=[10, 5])

            ax1.scatter(xs, ys)
            ax2.contour_scatter(xs, ys, bin_size=0.3)

        The scatter plot is okay, but the contour makes things easier to see.

        We'll now mess with some of the other parameters. This plot shows how
        the  `bin_size` parameter changes things.

        .. plot::
            :include-source:

            import numpy as np
            import betterplotlib as bpl

            bpl.set_style()

            xs = np.concatenate(
                [
                    np.random.normal(0, 1, 10000),
                    np.random.normal(3, 1, 10000),
                    np.random.normal(0, 1, 10000),
                ]
            )
            ys = np.concatenate(
                [
                    np.random.normal(0, 1, 10000),
                    np.random.normal(3, 1, 10000),
                    np.random.normal(3, 1, 10000),
                ]
            )

            fig, (ax1, ax2, ax3) = bpl.subplots(ncols=3, figsize=[15, 5])

            ax1.contour_scatter(xs, ys, bin_size=0.2)
            ax2.contour_scatter(xs, ys, bin_size=0.3)
            ax3.contour_scatter(xs, ys, bin_size=0.5)

        You can see how small values of `bin_size` lead to more noisy contours.
        The code will attempt to choose its own value of `bin_size` if nothing
        is specified, but it's normally not a very good choice.

        Adjusting the smoothing is often the better way to control the
        noise.

        .. plot::
            :include-source:

            import numpy as np
            import betterplotlib as bpl

            bpl.set_style()

            xs = np.concatenate(
                [
                    np.random.normal(0, 1, 10000),
                    np.random.normal(3, 1, 10000),
                    np.random.normal(0, 1, 10000),
                ]
            )
            ys = np.concatenate(
                [
                    np.random.normal(0, 1, 10000),
                    np.random.normal(3, 1, 10000),
                    np.random.normal(3, 1, 10000),
                ]
            )

            fig, (ax1, ax2, ax3) = bpl.subplots(ncols=3, figsize=[15, 5])

            ax1.contour_scatter(xs, ys, bin_size=0.1, smoothing=0.1)
            ax2.contour_scatter(xs, ys, bin_size=0.1, smoothing=0.2)
            ax3.contour_scatter(xs, ys, bin_size=0.1, smoothing=0.3)

        The weights behave exactly the same as they do in the other density
        contour functions. Here we just have 4 points, but with different
        weights. We also show the different smoothing for different axes, and
        the labels.

        .. plot::
            :include-source:

            import betterplotlib as bpl

            bpl.set_style()

            xs = [1, 2, 3, 4]
            ys = [1, 2, 3, 4]
            weights = [1, 2, 3, 4]
            bpl.contour_scatter(
                xs,
                ys,
                weights=weights,
                bin_size=0.01,
                smoothing=[0.8, 0.3],
                fill_cmap="Blues",
                labels=True,
                contour_kwargs={"colors": "k"},
            )
            bpl.equal_scale()

        Now we can mess with the fun stuff, which is the `fill_cmap` param and
        the kwargs that get passed to the `scatter`, `contour`, and `contourf`
        function calls. There is a lot of stuff going on here, just for
        demonstration purposes. Note that the code has some default parameters
        that it will choose if you don't specify anything.

        .. plot::
            :include-source:

            import numpy as np
            import betterplotlib as bpl

            bpl.set_style()

            xs = np.concatenate(
                [
                    np.random.normal(0, 1, 10000),
                    np.random.normal(3, 1, 10000),
                    np.random.normal(0, 1, 10000),
                ]
            )
            ys = np.concatenate(
                [
                    np.random.normal(0, 1, 10000),
                    np.random.normal(3, 1, 10000),
                    np.random.normal(3, 1, 10000),
                ]
            )

            fig, axs = bpl.subplots(nrows=2, ncols=2)
            [ax1, ax2], [ax3, ax4] = axs

            percent_levels = [0.99, 0.7, 0.3]
            smoothing = 0.2
            bin_size = 0.1

            ax1.contour_scatter(
                xs,
                ys,
                bin_size=bin_size,
                percent_levels=percent_levels,
                smoothing=smoothing,
                fill_cmap="background_grey",
                contour_kwargs={"cmap": "magma"},
                scatter_kwargs={"s": 10, "c": bpl.almost_black},
            )
            ax1.make_ax_dark()

            # or we can choose our own `fill_cmap`
            ax2.contour_scatter(
                xs,
                ys,
                bin_size=bin_size,
                smoothing=smoothing,
                fill_cmap="viridis",
                percent_levels=percent_levels,
                contour_kwargs={"linewidths": 1, "colors": "white"},
                scatter_kwargs={"s": 50, "c": bpl.color_cycle[3], "alpha": 0.3},
            )

            # There are also my colormaps that work with the dark axes
            ax3.contour_scatter(
                xs,
                ys,
                bin_size=bin_size,
                smoothing=smoothing,
                fill_cmap="modified_greys",
                percent_levels=percent_levels,
                scatter_kwargs={"c": bpl.color_cycle[0]},
                contour_kwargs={
                    "linewidths": [2, 0, 0, 0, 0, 0, 0],
                    "colors": bpl.almost_black,
                },
            )
            ax3.make_ax_dark()

            # the default `fill_cmap` is white.
            new_linestyles = ["solid", "dashed", "dashed", "dashed"]
            ax4.contour_scatter(
                xs,
                ys,
                bin_size=bin_size,
                smoothing=smoothing,
                percent_levels=percent_levels,
                scatter_kwargs={
                    "marker": "^",
                    "linewidth": 0.2,
                    "c": bpl.color_cycle[1],
                    "s": 20,
                },
                contour_kwargs={
                    "linestyles": new_linestyles,
                    "colors": bpl.almost_black,
                },
            )

        Note that the contours will work appropriately for datasets with
        "holes", as demonstrated here.

        .. plot::
            :include-source:

            import numpy as np
            import betterplotlib as bpl
            bpl.set_style()

            rad1 = np.random.normal(10, 0.75, 10000)
            theta1 = np.random.uniform(0, 2 * np.pi, 10000)
            x1 = [r * np.cos(t) for r, t in zip(rad1, theta1)]
            y1 = [r * np.sin(t) for r, t in zip(rad1, theta1)]

            rad2 = np.random.normal(20, 0.75, 20000)
            theta2 = np.random.uniform(0, 2 * np.pi, 20000)
            x2 = [r * np.cos(t) for r, t in zip(rad2, theta2)]
            y2 = [r * np.sin(t) for r, t in zip(rad2, theta2)]

            rad3 = np.random.normal(12, 0.75, 12000)
            theta3 = np.random.uniform(0, 2 * np.pi, 12000)
            x3 = [r * np.cos(t) + 10 for r, t in zip(rad3, theta3)]
            y3 = [r * np.sin(t) + 10 for r, t in zip(rad3, theta3)]

            x4 = np.random.uniform(-20, 20, 3500)
            y4 = x4 + np.random.normal(0, 0.5, 3500)

            y5 = y4 * (-1)

            xs = np.concatenate([x1, x2, x3, x4, x4])
            ys = np.concatenate([y1, y2, y3, y4, y5])

            fig, ax = bpl.subplots()

            ax.contour_scatter(xs, ys, smoothing=0.5, bin_size=0.5)
            ax.equal_scale()
        """

        if scatter_kwargs is None:
            scatter_kwargs = dict()
        if contour_kwargs is None:
            contour_kwargs = dict()
        if contourf_kwargs is None:
            contourf_kwargs = dict()

        # determine what our colormap for the fill will be
        if fill_cmap == "white":
            # colormap with one color: white
            fill_cmap = mpl_colors.ListedColormap(colors="white", N=1)
        elif fill_cmap == "background_grey":
            # colormap with one color: the light grey used in backgrounds
            fill_cmap = mpl_colors.ListedColormap(colors=colors.light_gray, N=1)
        elif fill_cmap == "modified_greys":
            # make one that transitions from light grey to black
            new_colors = [colors.light_gray, "black"]
            fill_cmap = mpl_colors.LinearSegmentedColormap.from_list(
                "mod_gray", new_colors
            )

        # then we can set a bunch of default parameters for the contours
        contour_kwargs.setdefault("linewidths", 2)
        contour_kwargs.setdefault("zorder", 3)
        if "colors" not in contour_kwargs:
            contour_kwargs.setdefault("cmap", "viridis")

        # we can then go ahead and plot the filled contours, then the contour lines
        if fill_cmap is not None:
            self.density_contourf(
                xs,
                ys,
                bin_size=bin_size,
                percent_levels=percent_levels,
                smoothing=smoothing,
                weights=weights,
                cmap=fill_cmap,
                **contourf_kwargs,
            )
        contours = self.density_contour(
            xs,
            ys,
            bin_size=bin_size,
            percent_levels=percent_levels,
            smoothing=smoothing,
            weights=weights,
            labels=labels,
            **contour_kwargs,
        )

        # we saved the output from the contour, since it has information about the
        # shape of the contours we can use to figure out which points are outside
        # and therefore need to be plotted. There may be multiple outside contours,
        # especially if the shape is complicated, so we test to see how many
        # each point is inside. We only do this if the user actually wants to
        # plot these points
        if scatter_kwargs.get("s") != 0:
            shapes_in = np.zeros(len(xs))
            for line in contours.allsegs[0]:  # zero index is lowest level
                # make a closed shape with the line
                polygon = path.Path(line, closed=True)
                shapes_in += polygon.contains_points(list(zip(xs, ys)))

            # the ones that need to be hidden are inside an odd number of
            # shapes. This shounds weird, but actually works. If we have a ring
            # of points, the outliers in the middle will be inside the outermost
            # and innermost contours, so they are inside two shapes. We want to
            # plot these. So we plot the ones that are divisible by two.
            plot_idx = np.where(shapes_in % 2 == 0)

            # We then get these elements. The multiple indexing is only supported for
            # numpy arrays, not Python lists, so convert our values to that first.
            outside_xs = np.array(xs)[plot_idx]
            outside_ys = np.array(ys)[plot_idx]

            # now we can do our scatterplot.
            scatter_kwargs.setdefault("alpha", 1.0)
            scatter_kwargs.setdefault("s", 10)
            if "c" not in scatter_kwargs:
                scatter_kwargs.setdefault("color", colors.almost_black)
            scatter_kwargs.setdefault("zorder", 1)
            self.scatter(outside_xs, outside_ys, **scatter_kwargs)

        return contours



[docs]
    def data_ticks(self, x_data, y_data, extent=0.015, *args, **kwargs):
        """
        Puts tiny ticks on the axis borders making the location of each point.

        :param x_data: list of values to mark on the x-axis.
        :type x_data: list
        :param y_data: list of values to mark on the y-axis. This doesn't have
                       to be the same length as `x-data`, necessarily.
        :type y_data: list
        :param extent: How far the ticks go up from the x-axis. The default is
                       0.02, meaning the ticks go 2% of the way to the top of
                       the plot. Note that the ticks created by this function
                       will have the same physical size on both axes. Since in
                       general the x and y axes aren't the same physical size,
                       the ticks on the y-axis will be scaled to match the
                       physical size of the x ticks. This means that in the
                       default case, the y ticks won't cover 2% of the axis, but
                       again will be the same physical size as the x ticks.
        :type extent: float
        :param args: Additional arguments to pass to the `axvline` and `axhline`
                     functions, which is what is used to make each tick.
        :param kwargs: Additional keyword arguments to pass to the `axvline` and
                       `axhline` functions. `color` is an important one here,
                       and it defaults to `almost_black` here.


        Example

        .. plot::
            :include-source:

            import numpy as np
            import betterplotlib as bpl
            bpl.set_style()

            xs = np.random.normal(0, 1, 100)
            ys = np.random.normal(0, 1, 100)

            bpl.scatter(xs, ys)
            bpl.data_ticks(xs, ys)
        """
        kwargs.setdefault("color", colors.almost_black)
        kwargs.setdefault("linewidth", 0.5)

        for x in x_data:
            self.axvline(x, ymin=0, ymax=extent, *args, **kwargs)

        # Since the matplotlib command to ax(h/v)line uses an extent based on
        # percentage of the way to the end, to get the same physical size for
        # both axes, we have to scale based on the size of the axes
        h_extent = (self.bbox.height / self.bbox.width) * extent
        for y in y_data:
            self.axhline(y, xmin=0, xmax=h_extent, *args, **kwargs)



[docs]
    def plot(self, *args, **kwargs):
        """
        A slightly improved plot function.

        This is best used for plotting lines, while the `scatter()` function
        is best used for plotting points.

        Currently all this does is make the lines thicker, which looks better.
        There isn't any added functionality.

        The parameters here are the exact same as they are for the regular
        `plt.plot()` or `ax.plot()` functions.

        :param args: Additional arguments to pass to the `plot` function
        :param kwargs: Additional keyword arguments to pass to the `plot` function

        .. plot::
            :include-source:

            import numpy as np
            import betterplotlib as bpl
            import matplotlib.pyplot as plt
            bpl.set_style()

            xs = np.arange(0, 1, 0.01)
            ys_1 = xs
            ys_2 = xs**2

            fig = plt.figure(figsize=[15, 7])
            ax1 = fig.add_subplot(121)
            ax2 = fig.add_subplot(122, projection="bpl")  # bpl subplot.

            ax1.plot(xs, ys_1)
            ax1.plot(xs, ys_2)

            ax2.plot(xs, ys_1)
            ax2.plot(xs, ys_2)

            ax1.set_title("matplotlib")
            ax2.set_title("betterplotlib")

        """
        # set the linewidth to a thicker value. There are two keys here, though,
        # so we have to be careful.
        if not ("lw" in kwargs or "linewidth" in kwargs):
            kwargs.setdefault("lw", 3)

        return super(Axes_bpl, self).plot(*args, **kwargs)



[docs]
    def axvline(self, x=0, *args, **kwargs):
        """
        Place a vertical line at some point on the axes.

        :param x: Data value on the x-axis to place the line.
        :type x: float
        :param args: Additional parameters that will be passed on the the
                     regular `plt.axvline` function. See it's documentation
                     for details.
        :param kwargs: Similarly, additional keyword arguments that will be
                       passed on to the regular `plt.axvline` function.

        .. plot::
            :include-source:

            import numpy as np
            import betterplotlib as bpl
            bpl.set_style()

            left_xs = np.arange(-20, 1, 0.01)
            right_xs = np.arange(1.001, 20, 0.01)
            left_ys = left_xs / (left_xs - 1)
            right_ys = right_xs / (right_xs - 1)

            bpl.make_ax_dark()
            bpl.plot(left_xs, left_ys, c=bpl.color_cycle[2])
            bpl.plot(right_xs, right_ys, c=bpl.color_cycle[2])
            bpl.axvline(1.0, linestyle="--")
            bpl.axhline(1.0, linestyle="--")
            bpl.set_limits(-10, 10, -10, 10)

        """

        # set the color to be almost black. Matplotlib has two keywords for
        # color, so we need to check both here.
        if not ("c" in kwargs or "color" in kwargs):
            kwargs.setdefault("c", colors.almost_black)

        return super(Axes_bpl, self).axvline(x, *args, **kwargs)



[docs]
    def axhline(self, y=0, *args, **kwargs):
        """
        Place a horizontal line at some point on the axes.

        :param y: Data value on the y-axis to place the line.
        :type y: float
        :param args: Additional parameters that will be passed on the the
                     regular `plt.axhline` function. See it's documentation
                     for details.
        :param kwargs: Similarly, additional keyword arguments that will be
                       passed on to the regular `plt.axhline` function.

        .. plot::
            :include-source:

            import numpy as np
            import betterplotlib as bpl
            bpl.set_style()

            left_xs = np.arange(-20, 1, 0.01)
            right_xs = np.arange(1.001, 20, 0.01)
            left_ys = left_xs / (left_xs - 1)
            right_ys = right_xs / (right_xs - 1)

            bpl.make_ax_dark()
            bpl.plot(left_xs, left_ys, c=bpl.color_cycle[2])
            bpl.plot(right_xs, right_ys, c=bpl.color_cycle[2])
            bpl.axvline(1.0, linestyle="--")
            bpl.axhline(1.0, linestyle="--")
            bpl.set_limits(-10, 10, -10, 10)

        """

        # set the color to be almost black. Matplotlib has two keywords for
        # color, so we need to check both here.
        if not ("c" in kwargs or "color" in kwargs):
            kwargs.setdefault("c", colors.almost_black)

        return super(Axes_bpl, self).axhline(y, *args, **kwargs)



[docs]
    def errorbar(self, *args, **kwargs):
        """
        Wrapper for the plt.errorbar() function.

        Style changes: capsize is automatically zero, and the format is
        automatically a scatter plot, rather than the connected lines that
        are used by default otherwise. It also adds a black marker edge to
        distinguish the markers when there are lots of data poitns. Otherwise
        everything blends together.

        :param args: Additional arguments to pass to the `errorbar` function
        :param kwargs: Additional keyword arguments to pass to the `errorbar` function

        .. plot::
            :include-source:

            import numpy as np
            import betterplotlib as bpl
            import matplotlib.pyplot as plt
            bpl.set_style()

            xs = np.random.normal(0, 1, 100)
            ys = np.random.normal(0, 1, 100)
            yerr = np.random.uniform(0.3, 0.8, 100)
            xerr = np.random.uniform(0.3, 0.8, 100)

            fig = plt.figure(figsize=[15, 7])
            ax1 = fig.add_subplot(121)
            ax2 = fig.add_subplot(122, projection="bpl")  # bpl subplot.

            for ax in [ax1, ax2]:
                ax.errorbar(xs,   ys,   xerr=xerr, yerr=yerr, label="set 1")
                ax.errorbar(xs+1, ys+1, xerr=xerr, yerr=yerr, label="set 2")
                ax.legend()
            ax1.set_title("matplotlib")
            ax2.set_title("betterplotlib")

        """

        kwargs.setdefault("capsize", 0)
        kwargs.setdefault("fmt", "o")
        kwargs.setdefault("markeredgewidth", 0.25)
        kwargs.setdefault("markeredgecolor", colors.almost_black)

        return super(Axes_bpl, self).errorbar(*args, **kwargs)



[docs]
    def twin_axis_simple(self, axis, lower_lim, upper_lim, label="", log=False):
        """
        Creates a differently scaled axis on either the top or the left.

        Note that this only does simple scalings of the new axes, which will
        still only be linear or log scaled axes. If you want a function that
        smartly places labels based on a function that takes one set of axes
        values to another (in a potentially nonlinear way), the other function
        I haven't made will do that.

        :param axis: Where the new scaled axis will be placed. Must
                     either be "x" or "y".
        :type axis: str
        :param lower_lim: Value to be put on the left/bottom of the newly
                          created axis.
        :type lower_lim: float
        :param upper_lim: Value to be put on the right/top of the newly
                          created axis.
        :type upper_lim: float
        :param label: The label to put on this new axis.
        :type label: str
        :param log: Whether or not to log scale this axis.
        :type log: bool
        :returns: the new axes

        .. plot::
            :include-source:

            import betterplotlib as bpl
            bpl.set_style()

            bpl.set_limits(0, 10, 0, 5)
            bpl.add_labels("x", "y")
            bpl.twin_axis_simple("x", 0, 100, "$10 x$")
            bpl.twin_axis_simple("y", 1, 10**5, "$10^y$", log=True)

        Note that for a slightly more complicated version of this plot, say if
        we wanted the top x axis to be x^2 rather than 10x, the limits would
        still be the same, but since the new axis will always be a linear or log
        scale the new axis won't represent the true relationship between the
        variables on the twin axes. See `twin_axis` for that.


        """

        if axis == "x":
            new_ax = super(Axes_bpl, self).twiny()
            new_ax.set_xlim(lower_lim, upper_lim)
            if log:
                new_ax.set_xscale("log")  # not a bpl subplot, so we can't use log()
            new_ax.set_xlabel(label)
        elif axis == "y":
            new_ax = super(Axes_bpl, self).twinx()
            new_ax.set_ylim(lower_lim, upper_lim)
            if log:
                new_ax.set_yscale("log")  # not a bpl subplot, so we can't use log()
            new_ax.set_ylabel(label)
        else:
            raise ValueError("Axis must be either 'x' or 'y'. ")

        return new_ax



[docs]
    def twin_axis(
        self, axis, new_ticks, label, old_to_new_func=None, new_to_old_func=None
    ):
        """
        Create a twin axis, where the new axis values are an arbitrary function
        of the old values.

        This is used when you want to put two related quantities on the axis,
        for example distance/redshift in astronomy, where one isn't a simple
        scaling of the other. If you want a simple linear or log scale, use the
        `twin_axis_simple` function. This one will create a new axis that is
        an arbitrary scale.

        :param axis: Whether the new axis labels will be on the "x" or "y" axis.
                     If "x" is chosen this will place the markers on the top
                     botder of the plot, while "y" will place the values on the
                     left border of the plot. "x" and "y" are the only
                     allowed values.
        :type axis: str
        :param new_ticks: List of of locations (in the new data values) to place
                          ticks. Any values outside the range of the plot
                          will be ignored.
        :type new_ticks: list, np.ndarray
        :param label: The label given to the newly created axis.
        :type label: str
        :param old_to_new_func: Function that takes values on the original axis
                                and transforms them to corresponding values
                                on the soon-to-be created axis. Either this
                                parameter or `new_to_old_func` can be used, but
                                not both.
        :param new_to_old_func: Function that takes values on the
                                soon-to-be-created axis and transforms them to
                                corresponding values on the original axis.
                                Either this parameter or `old_to_new_func` can
                                be used, but not both.
        :return: New axis object that was created, containing the newly
                 created labels.

        .. plot::
            :include-source:

            import betterplotlib as bpl
            bpl.set_style()

            def square(x):
                return x**2

            def cubed(x):
                return x**3

            fig, ax = bpl.subplots(figsize=[5, 5], tight_layout=True)
            ax.set_limits(0, 10, 0, 10.0001)  # to avoid floating point errors
            ax.add_labels("x", "y")
            ax.twin_axis("y", [0, 10, 30, 60, 100], "$y^2$", square)
            ax.twin_axis("x", [0, 10, 100, 400, 1000], "$x^3$", cubed)

        Note that we had to be careful with floating point errors when one of
        the markers we want is exactly on the edge. Make the borders slightly
        larger to ensure that all labels fit on the plot.

        There are two ways to give the funtion that transforms the values from
        one axis to the other. The parameter `old_to_new_func` (used as the
        last parameter in the plot above) takes values on the original axis and
        transforms them to values on the newly created axis. However, the
        parameter `new_to_old_func` does the inverse, taking values on the
        new axis and transforming them to the currently existing one. Only one
        of these two parameters can be provided. Identical plots can be created
        with either function, but due to specifics of the implementation, using
        the `new_to_old_func` parameter is slightly more computationally
        efficient. Here's an example of an identical plot to the first created
        with `new_to_old_func` instead.


        .. plot::
            :include-source:

            import betterplotlib as bpl

            bpl.set_style()

            def cube_root(x):
                return x ** (1.0 / 3.0)

            fig, ax = bpl.subplots(figsize=[5, 5], tight_layout=True)
            ax.set_limits(0, 10, 0, 10.0001)  # to avoid floating point errors
            ax.add_labels("x", "y")
            ax.twin_axis("y", [0, 10, 30, 60, 100], "$y^2$", new_to_old_func=np.sqrt)
            ax.twin_axis(
                "x", [0, 10, 100, 400, 1000], "$x^3$", new_to_old_func=cube_root
            )

        This function will ignore values for the ticks that are outside the
        limits of the plot. The following plot isn't the most useful, since
        it could be done with the `axis_twin_simple`, but it gets the idea
        across.

        .. plot::
            :include-source:

            import betterplotlib as bpl
            bpl.set_style()

            xs = np.logspace(0, 3, 100)

            fig, ax = bpl.subplots(figsize=[5, 5], tight_layout=True)
            ax.plot(xs, xs)
            ax.log("both")
            ax.add_labels("x", "y")
            # extraneous values are ignored.
            ax.twin_axis("x", [-1, 0, 1, 2, 3, 4, 5], "log(x)", np.log10)
            ax.twin_axis("y", [-1, 0, 1, 2, 3, 4, 5], "log(y)", np.log10)

        """

        # support for automatically adding new ticks is not yet supported. You
        # have to pass your own in.
        #     if new_ticks is None:
        #         if axis == "x":
        #             new_ticks = create_new_bins(func, ax.get_xticks())
        #         elif axis == "y":
        #             new_ticks = create_new_bins(func, ax.get_yticks())

        # implementation details: The data values for the old axes will be used
        # as the data values for the new scaled axis. This ensures that they
        # will line up with each other. However, we will set the label text
        # to be the values the user passes in.

        if old_to_new_func is None and new_to_old_func is None:
            raise ValueError(
                "Either `old_to_new_func` or `new_to_old_func` " "must be provided."
            )
        if old_to_new_func is not None and new_to_old_func is not None:
            raise ValueError(
                "Don't provide both `old_to_new_func` and "
                "`new_to_old_func`.\nUsing `new_to_old_func` is "
                "more efficient, so provide only that."
            )

        # depending on which axis the user wants to use, we have to get
        # different things.
        if axis == "y":
            new_ax = self.twinx()  # shares y axis
            old_min, old_max = self.get_ylim()
            lim_func = new_ax.set_ylim  # function to set limits
            new_axis = new_ax.yaxis
            new_ax.set_ylabel(label)
            # the new axis needs to share the same scaling as the old
            if self.get_yscale() == "log":
                new_ax.set_yscale("log")  # not a bpl axis, so we can't use log()
                # if we have log in old, we don't want minor ticks on the new
                new_axis.set_tick_params(which="minor", length=0)
            new_ax.set_ylabel(label)
        elif axis == "x":
            new_ax = self.twiny()  # shares x axis
            old_min, old_max = self.get_xlim()
            lim_func = new_ax.set_xlim  # function to set limits
            new_axis = new_ax.xaxis
            new_ax.set_xlabel(label)
            # the new axis needs to share the same scaling as the old
            if self.get_xscale() == "log":
                new_ax.set_xscale("log")  # not a bpl axis, so we can't use log()
                # if we have log in old, we don't want minor ticks on the new
                new_axis.set_tick_params(which="minor", length=0)
        else:
            raise ValueError("`axis` must either be 'x' or 'y'. ")

        # set the limits using the function we got earlier. We use the values
        # of the old axies for the underlying data
        lim_func(old_min, old_max)

        # then determine the locations to put the new ticks, in terms of the
        # old values
        tick_locs_in_old = []
        new_ticks_good = []
        if new_to_old_func is not None:
            for new_value in new_ticks:
                # we can directly use the function to go from the new ticks to
                # the values on the old axis that correspond
                old_data_loc = new_to_old_func(new_value)
                # then check if it's within the original axis range
                if old_min <= old_data_loc <= old_max:
                    tick_locs_in_old.append(old_data_loc)
                    new_ticks_good.append(new_value)
        else:
            for new_value in new_ticks:
                # determine the value on the original axis corresponding to
                # each tick. Since we have the function transforming the old
                # ticks to the new ones, we have to invert it
                # define a function to minimize so scipy can work.
                def minimize(x):
                    return abs(old_to_new_func(x) - new_value)

                # ignore numpy warnings here, everything is fine.
                with np.errstate(all="ignore"):
                    old_data_loc = optimize.minimize_scalar(minimize).x
                    # then check if it's within the original axis range
                    if old_min <= old_data_loc <= old_max:
                        tick_locs_in_old.append(old_data_loc)
                        new_ticks_good.append(new_value)

        # then put the ticks at the locations of the old data, but label them
        # with the value of the transformed data.
        new_axis.set_ticks(tick_locs_in_old)
        new_axis.set_ticklabels(new_ticks_good)

        return new_ax



[docs]
    def shaded_density(
        self,
        xs,
        ys,
        bin_size=None,
        smoothing=0,
        cmap="Greys",
        weights=None,
        log_xy=False,
        log_hist=False,
    ):
        """
        Creates shaded regions showing the density.

        Is essentially a 2D histogram, but supports smoothing. Under the hood,
        this uses the  pcolormesh function in matplotlib.

        :param xs: list of x values
        :type xs: list, ndarray
        :param ys: list of y values
        :type ys: list, ndarray
        :param bin_size: Bin size to use for the underlying 2D histogram. This
                         can either be a scalar, in which case the bin size will
                         be the same in both the x dimensions, or else a two
                         element list, where the first element will be the
                         bin size in the x dimension, and the second will be
                         the bin size in the y dimension.
        :type bin_size: int, float, list
        :param smoothing: Optional parameter that will smooth the shaded density.
                          Pass in a nonzero value, which will be the
                          standard deviation of the Gaussian kernel use to smooth
                          the histogram. When using this, often choosing smaller
                          bin sizes is advantageous to make a less grainy plot.
                          Has the same format as padding and bin_size, so different
                          smoothing kernels are possible in the x and y directions.
        :type smoothing: int, float, list
        :param cmap: The colormap to use for the shading
        :type cmap: str
        :param weights: A list containing weights for each data point. If these
                        are not passed, all data points will be weighted
                        equally.
        :type weights: list, np.ndarray
        :param log_xy: Whether or not to do the smoothing and bin creation in log
                       space. This should be used if the plot will be done on
                       log-scaled axes.Can either be a single bool, in which case the
                       x and y scales will both be log (or not), or a two element
                       array, where the first is whether the x axis is log, and the
                       second is y. If this is used, the bin_size and smoothing
                       parameters will be interpreted as dex, rather than raw values.
        :type log: bool, list
        :param log_hist: Whether or not to use the log of the histogram values to
                         compute the shading, or just the values of the histogram.
        :type log_hist: bool
        :return: output of the pcolormesh function call.

        .. plot::
            :include-source:

            import betterplotlib as bpl
            import numpy as np
            bpl.set_style()

            xs = np.concatenate(
                [np.random.normal(3, 2, 1000), np.random.normal(7, 2, 1000)]
            )
            ys = np.concatenate(
                [np.random.normal(7, 2, 1000), np.random.normal(3, 2, 1000)]
            )

            bpl.shaded_density(xs, ys, bin_size=0.01, smoothing=0.5, cmap="inferno")
            bpl.set_limits(0, 10, 0, 10)
            bpl.equal_scale()

        .. plot::
            :include-source:

            import betterplotlib as bpl
            import numpy as np

            bpl.set_style()

            xs = np.concatenate(
                [np.random.normal(3, 2, 1000), np.random.normal(7, 2, 1000)]
            )
            ys = 10 ** np.concatenate(
                [np.random.normal(7, 2, 1000), np.random.normal(3, 2, 1000)]
            )

            fig, ax = bpl.subplots()
            bpl.shaded_density(
                xs,
                ys,
                bin_size=0.01,
                smoothing=0.5,
                cmap="inferno",
                log_xy=[False, True],
            )
            ax.log("y")
            bpl.set_limits(0, 10, 1, 1e10)
            bpl.equal_scale()

        """
        padding = tools._padding_from_smoothing(smoothing)
        # first get the underlying density histogram
        hist, x_edges, y_edges = tools.smart_hist_2d(
            xs,
            ys,
            bin_size,
            padding=padding,
            smoothing=smoothing,
            weights=weights,
            log=log_xy,
        )

        vmax = np.max(hist)
        vmin = np.percentile(hist[hist >= 0], 1)

        if log_hist:
            vmin_linear = np.percentile(hist[hist > 0], 1)
            hist = np.log10(hist)
            vmax = np.log10(vmax)
            vmin = max(np.log10(vmin_linear), vmax - 3)

        return super(Axes_bpl, self).pcolormesh(
            x_edges, y_edges, hist, cmap=cmap, vmax=vmax, vmin=vmin
        )


    # Function to use to set the ticks
    @staticmethod
    @ticker.FuncFormatter
    def _nice_log_formatter(x, pos):
        """
        Format numbers to be places on a log axis. Numbers near 1 will not be exponents

        For example, we'll have 10^-3, 0.001, 0.1, 1, 10, 100, 10^3...

        :param x: number to format:
        :type x: float
        :return: string with formatted number
        :rtype: str
        """
        exp = np.log10(x)
        # this only works for labels that are factors of 10. For non-factor of 10
        # ticks (e.g. minor ticks) don't label them. The user can add their own labels
        # if they want
        try:
            assert np.isclose(exp, int(exp))
        except AssertionError:
            return ""

        # for values between 0.01 and 100, just use that value.
        # Otherwise use the log.
        if abs(exp) <= 2:
            return f"{x:g}"
        else:
            return "$10^{" + f"{exp:.0f}" + "}$"


[docs]
    def log(self, axes, nice_format=True):
        """
        Set the x and/or y axis to be log-scaled

        :param axes: which axes to log scale. Pass "x" for the x axis, "y" for the y
                     axis, or "both".
        :type axees: str
        :param nice_format: whether to format numbers near 1 as regular numbers,
                            instead of exponential notation. For example, passing True
                            will show 1 as 1, while False will show 1 as 10^0. Defaults
                            to True.
        :type nice_format: bool
        :returns: None

        .. plot::
            :include-source:

            import betterplotlib as bpl

            bpl.set_style()

            fig, axs = bpl.subplots(ncols=2, figsize=[12, 6])

            for ax, nice_format in zip(axs, [True, False]):
                ax.log("both", nice_format)
                ax.set_limits(1e-3, 1e3, 1e-3, 1e3)
                ax.equal_scale()
                ax.add_labels(title=f"nice_format = {str(nice_format)}")

        """
        axes = axes.lower()
        if axes not in ["x", "y", "both"]:
            raise ValueError('`axes` must be either "x", "y", or "both".')
        if axes == "both":
            axes = "xy"

        if "x" in axes:
            self.set_xscale("log")
            if nice_format:
                self.xaxis.set_major_formatter(self._nice_log_formatter)
                self.xaxis.set_minor_formatter(self._nice_log_formatter)
        if "y" in axes:
            self.set_yscale("log")
            if nice_format:
                self.yaxis.set_major_formatter(self._nice_log_formatter)
                self.yaxis.set_minor_formatter(self._nice_log_formatter)



[docs]
    def set_ticks(self, axis, ticks, labels=None, minor=False):
        """
        Set tick marks on an axis, with optional label names

        This is just a wrapper around `ax.xaxis.set_ticks()`, but I always forget
        that syntax.

        :param axis: to put these ticks on either the "x" or "y" axis
        :type axis: str
        :param ticks: The list of locations to put ticks
        :type ticks: list
        :param labels: The labels to put for each of the ticks passed in
        :type labels: list
        :param minor: Whether these ticks should be major or minor ticks
        :type minor: bool

        .. plot::
            :include-source:

            import betterplotlib as bpl

            bpl.set_style()

            bpl.set_limits(1, 2, 1, 2)
            bpl.equal_scale()
            bpl.set_ticks("x", [1, 1.5, 1.7, 2.0])
            bpl.set_ticks("y", [1, 1.2, 1.5, 2.0], ["A", "B", "C", "D"])

        .. plot::
            :include-source:

            import betterplotlib as bpl

            bpl.set_style()

            bpl.log("both")
            bpl.set_limits(1, 10, 1, 10)
            bpl.equal_scale()
            bpl.set_ticks("x", [1, 10], ["A", "D"])
            bpl.set_ticks("x", [3, 5], ["b", "c"], minor=True)
            bpl.set_ticks("y", [1, 10])
            bpl.set_ticks(
                "y",
                [1, 2, 3, 4, 5, 6, 7, 8, 9],
                ["", "2", "3", "", "5", "", "7", "", ""],
                minor=True,
            )

        """
        if axis == "x":
            ax = self.xaxis
        elif axis == "y":
            ax = self.yaxis
        else:
            raise ValueError('`axis` must be either "x" or "y"')

        ax.set_ticks(ticks, labels=labels, minor=minor)



[docs]
    def kde(self, xs, smoothing, norm=False, log=False, **kwargs):
        """
        Visualize a distribution in 1D with kernel density estimation

        :param xs: The data to visualiza
        :type xs: list, ndarray
        :param smoothing: The smoothing to apply to each data point. If a single value
                          is supplied, that will be applied to all data points. You
                          can also supply a list with length equal to `xs` to use
                          different smoothing for each data point.
        :type smoothing: float, list, ndarray
        :param norm: Whether to normalize the distribution so that integrates to 1.
        :type norm: bool
        :param log: Whether to do the KDE creation in log space. If this is used,
                    the value for `smoothing` will be interpreted as dex. If `norm` is
                    also used, the integration will be done in log space, meaning we
                    integrate dlogx rather than dx.
        :type log: bool
        :param kwargs: additional keyword arguments to pass to the `plot` function

        .. plot::
            :include-source:

            import betterplotlib as bpl
            import numpy as np

            bpl.set_style()

            data = np.random.normal(0, 1, 100)
            bpl.kde(data, 0.5)
            bpl.set_limits(-3, 3, 0)

        You can also use different smoothing for each data point. Note that the area
        contributed by each point is equal, so points with smaller smoothing give
        higher points.

        .. plot::
            :include-source:

            import betterplotlib as bpl

            bpl.set_style()

            bpl.kde([1, 2, 3], [0.1, 0.2, 0.3])
            bpl.set_limits(0, 4, 0)

        Finally, the normalization parameter allows comparison of datasets of
        different size. Here I also demonstrate that any additional keyword arguments
        get passed along to `plot`.

        .. plot::
            :include-source:

            import betterplotlib as bpl
            import numpy as np

            bpl.set_style()

            d1 = np.random.normal(0, 1, 10000)
            d2 = np.random.normal(0.5, 1.5, 50)
            bpl.kde(d1, 0.5, norm=True, lw=3, c=bpl.color_cycle[1], label="N=10,000")
            bpl.kde(d2, 0.5, norm=True, lw=10, c=bpl.almost_black, label="N=50")
            bpl.set_limits(-3, 3, 0)
            bpl.legend()

        When normalization is used with log, the integration will be done in log space
        as well. Also, when log is used, smoothing
        will be interpreted in dex.

        .. plot::
            :include-source:

            import betterplotlib as bpl
            import numpy as np

            bpl.set_style()

            d1 = 10**np.random.normal(0, 1, 10000)
            d2 = 10**np.random.normal(0.5, 1.5, 50)
            bpl.kde(d1, 0.5, norm=True, log=True)
            bpl.kde(d2, 0.5, norm=True, log=True)
            bpl.log("x")
            bpl.set_limits(1e-3, 1e3, 0)

        """
        smoothing = type_checking.numeric_list_1d(
            smoothing, "smoothing must be a scalar or an array"
        )
        # if it's a single value, turn it into a list with the same dimension as xs
        if len(smoothing) == 1:
            smoothing = np.repeat(smoothing, len(xs))
        if len(smoothing) != len(xs):
            raise ValueError(
                "`smoothing` must be a scalar or an array with the same size as `xs`."
            )

        # to do the KDE in log space, just take the log of the data, do the regular
        # KDE, then undo the log on the data before plotting
        if log:
            xs = np.log10(xs)

        # get the points used to sample the KDE. We'll base the spacing on the smallest
        # smoothing, but the padding on the largest smoothing
        spacing = 0.1 * np.min(smoothing)
        padding = 10 * np.max(smoothing)
        points = np.arange(
            np.min(xs) - padding,
            np.max(xs) + padding,
            spacing,
        )

        # then go through the data points and construct the sum
        result = np.zeros(points.shape)
        for x, smooth in zip(xs, smoothing):
            result += tools.gaussian(points, x, smooth)

        # apply normalization
        if norm:
            integral = integrate.trapz(x=points, y=result)
            result = result / integral

        # undo the log to data before plotting
        if log:
            points = 10**points

        self.plot(points, result, **kwargs)
Source code for betterplotlib.axes_bpl

betterplotlib

Navigation

Related Topics