import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
[docs]def savefig(fig, *args, **kwargs):
"""
Save figure with tight bounding box.
From https://github.com/mwaskom/seaborn/blob/master/seaborn/axisgrid.py#L33
"""
kwargs = kwargs.copy()
kwargs.setdefault("bbox_inches", "tight")
fig.savefig(*args, **kwargs)
[docs]def umap_scatter(
data,
umap_1_key,
umap_2_key,
hue_key,
continuous_hue=False,
label_key=None,
marker_size=15,
figsize=(8, 8),
discrete_palette=None,
continuous_cmap="viridis",
label_z_order=10,
label_color="k",
label_alpha=0.5,
label_size=20,
):
"""Simple umap scatter plot, with legend outside figure.
Note, for discrete hues (continuous_hue=False):
Figure size will grow beyond the figsize parameter setting, because the legend is pulled out of figure.
So you must use fig.savefig('filename', bbox_inches='tight').
This is provided automatically by genetools.plots.savefig(fig, 'filename')
If using with scanpy, to get umap data from adata.obsm into adata.obs, try:
> data = helpers.horizontal_concat(adata.obs, adata.obsm.to_df()[['X_umap1', 'X_umap2']])
:param data: input data, e.g. anndata.obs
:type data: pandas.DataFrame
:param umap_1_key: column name with first dimension of UMAP
:type umap_1_key: string
:param umap_2_key: column name with second dimension of UMAP
:type umap_2_key: string
:param hue_key: column name with hue that will be used to color points
:type hue_key: string
:param continuous_hue: whether hue column takes continuous values and colorbar should be shown, defaults to False
:type continuous_hue: bool, optional
:param label_key: column name with optional cluster labels, defaults to None
:type label_key: string, optional
:param marker_size: marker size, defaults to 15
:type marker_size: int, optional
:param figsize: figure size, defaults to (8, 8)
:type figsize: tuple, optional
:param discrete_palette: color palette for discrete hues, defaults to None
:type discrete_palette: matplotlib palette name, list of colors, or dict mapping hue values to colors, optional
:param continuous_cmap: colormap for continuous hues, defaults to None
:type continuous_cmap: matplotlib.colors.Colormap, optional
:param label_z_order: z-index for cluster labels, defaults to 10
:type label_z_order: int, optional
:param label_color: color for cluster labels, defaults to 'k'
:type label_color: str, optional
:param label_alpha: opacity for cluster labels, defaults to 0.5
:type label_alpha: float, optional
:param label_size: size of cluster labels, defaults to 20
:type label_size: int, optional
:return: matplotlib figure and axes
:rtype: (matplotlib.Figure, matplotlib.Axes)
"""
fig, ax = plt.subplots(figsize=figsize)
with sns.axes_style("white"):
if continuous_hue:
# plot continuous variable with a colorbar
g = ax.scatter(
data[umap_1_key].values,
data[umap_2_key].values,
c=data[hue_key].values,
cmap=continuous_cmap,
s=marker_size,
)
# color bar
# see also https://stackoverflow.com/a/44642014/130164
fig.colorbar(g)
else:
# plot discrete hues
# create colors
n_colors = data[hue_key].nunique()
if not discrete_palette:
discrete_palette = sns.color_palette("Spectral", n_colors=n_colors)
if len(discrete_palette) < n_colors:
raise ValueError("Not enough colors in palette")
# subset to exact number of colors we need (otherwise seaborn throws error)
discrete_palette = discrete_palette[:n_colors]
# plot
g = sns.scatterplot(
data=data,
x=umap_1_key,
y=umap_2_key,
hue=hue_key,
palette=discrete_palette,
ax=ax,
legend="full",
alpha=1,
s=marker_size,
)
# equal aspect ratio
ax.set_aspect("equal", "datalim")
# add cluster labels
if label_key is not None:
for label, grp in data.groupby(label_key):
plt.annotate(
"%s" % label,
grp[[umap_1_key, umap_2_key]].mean(), # mean of x and y
horizontalalignment="center",
verticalalignment="center",
size=label_size,
weight="bold",
alpha=label_alpha,
color=label_color,
zorder=label_z_order,
)
sns.despine(ax=ax)
# pull legend outside figure to the right
# https://stackoverflow.com/a/34579525/130164
# https://matplotlib.org/tutorials/intermediate/legend_guide.html#legend-location
# note: this expands figsize so you have to savefig with bbox_inches='tight'
if not continuous_hue:
plt.legend(bbox_to_anchor=(1.05, 1), loc="upper left", borderaxespad=0.0)
return fig, ax
[docs]def horizontal_stacked_bar_plot(
data, index_key, hue_key, value_key, palette=None, figsize=(8, 8), normalize=True
):
"""Horizontal stacked bar chart.
Note, figure size will grow beyond the figsize parameter setting, because the legend is pulled out of figure.
So you must use fig.savefig('filename', bbox_inches='tight').
This is provided automatically by genetools.plots.savefig(fig, 'filename')
See https://observablehq.com/@d3/stacked-normalized-horizontal-bar for inspiration and colors.
:param data: Plot data containing at minimum the columns identified by [index_key], [hue_key], and [value_key].
:type data: pandas.DataFrame
:param index_key: Column name defining the rows.
:type index_key: str
:param hue_key: Column name defining the horizontal bar categories.
:type hue_key: str
:param value_key: Column name defining the bar sizes.
:type value_key: str
:param palette: Color palette, defaults to None (in which case default palette used)
:type palette: matplotlib palette name, list of colors, or dict mapping hue values to colors, optional
:param figsize: figure size, defaults to (8, 8)
:type figsize: tuple, optional
:param normalize: Normalize each row's frequencies to sum to 1, defaults to True
:type normalize: bool, optional
:raises ValueError: Must specify correct number of colors if supplying a custom palette
:return: matplotlib figure and axes
:rtype: (matplotlib.Figure, matplotlib.Axes)
"""
plot_df = data[[index_key, value_key, hue_key]].copy()
# create colors
n_colors = plot_df[hue_key].nunique()
if not palette:
palette = sns.color_palette("muted", n_colors=n_colors)
if len(palette) < n_colors:
raise ValueError("Not enough colors in palette")
if normalize:
# Normalize values to sum to 1 per row
plot_df[value_key] = plot_df.groupby(index_key)[value_key].apply(
lambda g: g / g.sum()
)
# Sort so we maintain consistent order before we calculate cumulative value
plot_df = plot_df.sort_values([index_key, hue_key])
# Accumulate value with every subsequent box/hue as we go across each index/row
# These will become row-level "left offsets" for each hue
cum_value_key = value_key + "_cumulative_value"
plot_df[cum_value_key] = plot_df.groupby(index_key)[value_key].cumsum()
with sns.axes_style("white"):
fig, ax = plt.subplots(figsize=figsize)
# Go hue-by-hue, and plot down the rows
for (color, (hue_name, hue_data)) in zip(palette, plot_df.groupby(hue_key)):
ax.barh(
hue_data[index_key].values,
hue_data[value_key].values,
align="center",
height=0.25,
left=(hue_data[cum_value_key] - hue_data[value_key]).values,
label=hue_name,
color=color,
)
# pull legend outside figure
# https://stackoverflow.com/a/34579525/130164
plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.0, title=hue_key)
plt.xlabel("Frequency")
return fig, ax
####
# def stacked_density_plot(
# data,
# row_var,
# hue_var,
# value_var,
# col_var=None,
# overlap=False,
# suptitle=None,
# figsize=None,
# hue_order=None,
# row_order=None,
# palette=None,
# ):
# """
# Multiple density plot.
# Adapted from old work at https://github.com/hammerlab/infino/blob/develop/analyze_cut.py#L912
# For row_order, consider row_order=reversed(list(range(data.ylevel.values.max()+1)))
# """
# with sns.plotting_context("notebook"):
# with sns.axes_style("white", rc={"axes.facecolor": (0, 0, 0, 0)}):
# g = sns.FacetGrid(
# data,
# row=row_var,
# hue=hue_var,
# col=col_var,
# row_order=row_order,
# hue_order=hue_order,
# aspect=15,
# height=0.5,
# palette=palette,
# sharey=False, # important -- they don't share y ranges.
# )
# ## Draw the densities in a few steps
# # this is the shaded area
# g.map(sns.kdeplot, value_var, clip_on=False, shade=True, alpha=0.8, lw=2)
# # this is the dividing horizontal line
# g.map(plt.axhline, y=0, lw=2, clip_on=False, ls="dashed")
# ### Add label for each facet.
# def label(**kwargs):
# """
# kwargs is e.g.: {'color': (0.4918017777777778, 0.25275644444444445, 0.3333333333333333), 'label': 'Name of the row'}
# """
# color = kwargs["color"]
# label = kwargs["label"]
# ax = plt.gca() # map() changes current axis repeatedly
# # x=1 if plot_on_right else 0; ha="right" if plot_on_right else "left",
# ax.text(
# 1.25,
# 0.5,
# label,
# # fontweight="bold",
# color=color,
# # ha="right",
# ha="left",
# va="center",
# transform=ax.transAxes,
# fontsize="x-small",
# # fontsize='x-large', #15,
# # bbox=dict(facecolor='yellow', alpha=0.3)
# )
# g.map(label)
# ## Beautify the plot.
# g.set(xlim=(-0.01, 1.01))
# # seems to do the trick along with sharey=False
# g.set(ylim=(0, None))
# # Some `subplots_adjust` line is necessary. without this, nothing appears
# if not overlap:
# g.fig.subplots_adjust(hspace=0)
# # Remove axes details that don't play will with overlap
# g.set_titles("")
# # g.set_titles(col_template="{col_name}", row_template="")
# g.set(yticks=[], ylabel="")
# g.despine(bottom=True, left=True)
# # fix x axis
# g.set_xlabels("Pseudotime")
# # resize
# if figsize:
# g.fig.set_size_inches(figsize[0], figsize[1])
# else:
# cur_size = g.fig.get_size_inches()
# increase_vertical = 3 # 7 #4 # 3
# g.fig.set_size_inches(cur_size[0], cur_size[1] + increase_vertical)
# if suptitle is not None:
# g.fig.suptitle(suptitle, fontsize="medium")
# # tighten
# g.fig.tight_layout()
# # then reoverlap
# if overlap:
# g.fig.subplots_adjust(hspace=-0.1)
# return g, g.fig
# TODO: density umap plot
# TODO: two class density plots.