# -*- coding: utf-8 -*-
# !/usr/bin/env python3
import os, sys
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pylab as plt
from .utils import mm2inch, plot_legend_list, despine, get_colormap
from .clustermap import ClusterMapPlotter
from matplotlib.ticker import MultipleLocator, FormatStrFormatter
# =============================================================================
[docs]def scale(values, vmin=None, vmax=None):
if vmin is None:
vmin = np.nanmin(values)
if vmax is None:
vmax = np.nanmax(values)
if vmax == vmin:
return [1 for j in values]
delta = vmax - vmin
return [(j - vmin) / delta for j in values]
# =============================================================================
[docs]def dotHeatmap2d(
data,
hue=None,
vmin=None,
vmax=None,
ax=None,
colors=None,
cmap=None,
max_s=None,
spines=False,
**kwargs
):
"""
Plot dot heatmap using a dataframe matrix as input.
Parameters
----------
data : pd.DataFrame
input matrix (pandas.DataFrame)
hue : pd.DataFrame
hue to control the colors and cmap of the dot.
vmin : float
minimal size for the dot.
vmax : float
maximal size for the dot
ax : ax
ax
colors : dict
colors to control the dot, keys should be the value in hue. if colors is a str, then colors will overwrite
the parameter `c`.
cmap : str of dict
control the colormap of the dot, if cmap is a dict, keys should be the values from hue dataframe.
If `cmap` is a str (such as 'Set1'), the parameter `colors` will overwrite the colors of dots.
If `cmap` was a dict, then this paramter will overwrite the `colors`, and colors can only control the
colors for markers.
s : int, float, or dataframe
control the sizes of dot.
c : dataframe, or str
control the colors of dots.
marker : str, dataframe or dict
when marker is a dict, hue must not be None, and keys are categorical values from hue, values should be marker.
kwargs : dict
such as s,c,marker, s,marker and colors can also be pandas.DataFrame.
other kwargs passed to plt.scatter
Returns
-------
axes:
"""
# print(locals())
row_labels = data.index.tolist()
col_labels = data.columns.tolist()
# print(data.sort_index())
data = data.stack().reset_index()
data.columns = ["Row", "Col", "Value"]
if ax is None:
ax = plt.gca()
df = data["Col"].apply(lambda j: col_labels.index(j) + 1).to_frame(name="X")
df["Y"] = data["Row"].apply(lambda j: row_labels.index(j) + 1)
df["Value"] = data.Value.values
del data
if max_s is None: #passed from DotClustermapPlotter, not None
#The unit of size for the s parameter is squared points. This means
# that the area of the marker is specified in points squared.
# A point in this context is a unit of measure in typography,
# equal to 1/72 of an inch. Therefore, if you specify s=100,
# each marker's area will be 100 points squared, not its width or height.
w, h = (
ax.get_window_extent().width / ax.figure.dpi,
ax.get_window_extent().height / ax.figure.dpi,
) #unit is inch
r = min(w * 72 / len(col_labels), h * 72 / len(row_labels))
# r is the minimal of width and height for each scatter point, unit is point.
max_s = r**2
# s
s = kwargs.pop("s", None)
# print(s is None,vmin,vmax)
if s is None:
df["S"] = scale(df["Value"].abs().values,vmin=vmin, vmax=vmax)
else:
if isinstance(s, pd.DataFrame): # s is already normalized globally
s = s.reindex(index=row_labels, columns=col_labels).stack().reset_index()
s.columns = ["Row", "Col", "Value"]
# df["S"] = scale(s.Value.abs().values) #scale to 0-1
df['S'] = s.Value.values
elif isinstance(s, (int, float)):
df["S"] = s
# hue
if not hue is None: # hue is a dataframe
hue = hue.reindex(index=row_labels, columns=col_labels).stack().reset_index()
hue.columns = ["Row", "Col", "Value"]
df.insert(2, "Hue", hue.Value.values)
# marker
marker = kwargs.pop("marker", "o")
if isinstance(marker, pd.DataFrame):
marker = (
marker.reindex(index=row_labels, columns=col_labels).stack().reset_index()
)
marker.columns = ["Row", "Col", "Value"]
df["Markers"] = marker.Value.values
elif isinstance(marker, str):
df["Markers"] = marker
elif isinstance(marker, dict): # keys are values from hue, values should be marker.
if hue is None:
raise ValueError("when marker is a dict, hue must not be None")
df["Markers"] = df.Hue.map(marker)
else:
raise ValueError("marker must be string, dataframe or dict")
# colors
c_ready = False
if not colors is None and isinstance(colors, str):
df["C"] = colors
c_ready = True
elif "c" in kwargs: # c: dataframe or color, optional
c = kwargs.pop("c")
if isinstance(c, pd.DataFrame):
c = c.reindex(index=row_labels, columns=col_labels).stack().reset_index()
c.columns = ["Row", "Col", "Value"]
df["C"] = c.Value.values
else: # str
df["C"] = c
c_ready = True
elif hue is None:
df["C"] = df.S.tolist() # scale(data['Value'].values,vmin=vmin, vmax=vmax)
kwargs.setdefault("cmap", cmap)
c_ready = True
elif not hue is None and isinstance(cmap, str):
color_dict = {} # keys are categorical values from hue, values are colors.
if colors is None: # using cmap
col_list = df["Hue"].value_counts().index.tolist()
for c in col_list:
color_dict[c] = matplotlib.colors.to_hex(
get_colormap(cmap)(col_list.index(c))
)
elif type(colors) == dict:
color_dict = colors
elif type(colors) == str:
col_list = df["Hue"].value_counts().index.tolist()
for c in col_list:
color_dict[c] = colors
else:
raise ValueError("colors must be string or dict")
df["C"] = df["Hue"].map(color_dict)
c_ready = True
kwargs.setdefault(
"norm", matplotlib.colors.Normalize(vmin=vmin, vmax=vmax, clip=True)
)
# print(f"max_s: {max_s}",df.S.min(),df.S.max()) #min and max S should be 0,1
if c_ready and type(cmap) == str:
kwargs["cmap"] = cmap
for mk in df.Markers.unique():
# df1 = df.query("Markers==@mk").copy()
df1 = df.loc[df.Markers==mk].copy()
if df1.shape[0] == 0:
continue
kwargs["marker"] = mk
ax.scatter(
x=df1.X.values,
y=df1.Y.values,
s=df1.S * max_s,
c=df1.C.values,
**kwargs
) # vmax=vmax,vmin=vmin,
elif type(cmap) == dict and not hue is None:
for h in cmap: # key are hue, values are cmap
# df1 = df.query("Hue==@h").copy()
df1 = df.loc[df.Hue==h].copy()
if df1.shape[0] == 0:
continue
kwargs["cmap"] = cmap[h]
for mk in df1.Markers.unique():
# df2 = df1.query("Markers==@mk").copy()
df2 = df1.loc[df1.Markers==mk].copy()
kwargs["marker"] = mk
ax.scatter(
x=df2.X.values,
y=df2.Y.values,
s=df2.S * max_s,
c=df2.C.values,
**kwargs
) #
else:
raise ValueError("cmap must be string or dict")
ax.set_ylim([0.5, len(row_labels) + 0.5])
ax.set_xlim(0.5, len(col_labels) + 0.5)
y_locater = list(range(1, len(row_labels) + 1))
x_locater = list(range(1, len(col_labels) + 1))
ax.yaxis.set_major_locator(plt.FixedLocator(y_locater))
ax.yaxis.set_minor_locator(plt.FixedLocator(np.array(y_locater) - 0.5))
ax.xaxis.set_major_locator(plt.FixedLocator(x_locater))
ax.xaxis.set_minor_locator(plt.FixedLocator(np.array(x_locater) - 0.5))
ax.invert_yaxis() # axis=1: left -> right, axis=0: bottom -> top.
ax.set_yticklabels(row_labels)
ax.set_xticklabels(col_labels)
if not spines:
despine(ax=ax, left=True, bottom=True, right=True, top=True)
# for side in ["top", "right", "left", "bottom"]:
# ax.spines[side].set_visible(False)
return ax
# =============================================================================
[docs]class DotClustermapPlotter(ClusterMapPlotter):
"""
DotClustermap (Heatmap) plotter, inherited from ClusterMapPlotter.
Plot dot heatmap (clustermap) with annotation and legends.
Parameters
----------
data : dataframe
pandas dataframe or numpy array.
x: str
The column name in data.columns to be shown on the columns of heatmap / clustermap.
y : str
The column name in data.columns to be shown on the rows of heatmap / clustermap.
value : str
The column name in data.columns to control the sizes, or color of scatter (dot).
hue : str, optional.
The column name in data.columns to control the color, cmap, markers of scatter (dot).
s : str or int, optional.
The column name in data.columns to control the size of scatter (dot). If `s` is None,
`value` will be used to control the sizes of dot. This parameter will overwrite value.
c : str, optional.
The column name in data.columns to control the color of scatter (dot).
`c` can also be one color str, such as 'red'. If `c` is not given, colors of the dot
will be determined by `cmap` or `colors`.
marker :str or dict, optional.
Please go to: https://matplotlib.org/stable/api/markers_api.html to see all available markers.
Such as '.',',','o','v','^','<','>','1','2','3','4','8','s','p','P','*','h','H','+','x','X','D','d','|','_',
default marker is 'o'.
If marker is a string, it should be a marker to control the markers of scatter (dot).
marker could also be a name of the column from data.columns.tolist()
If marker is a dict, the keys should be the values from data[hue].values, and values should be marker.
colors :dict.
Keys should be the values from data[hue].values, and values should be color.
It will be only used to control the colors of markers in figure legend.
cmap :str or dict, optional.
If cmap is a dict, the keys should be the values from data[hue].values, and values should be cmap.
If cmap is a string, it should be colormap, such as 'Set1'.
color_legend_kws: dict
legend_kws passed to plot_color_dict_legend
cmap_legend_kws: dict
legend_kws passed to plot_cmap_legend
dot_legend_kws: dict
legend_kws passed to plot_marker_legend
value_na : float or int
used to fill na for data.pivot_table(index=self.y,columns=self.x,values=self.value,aggfunc=self.aggfunc).fillna(self.value_na)
hue_na :
float, str or int
used to fill na for data.pivot_table(index=self.y,columns=self.x,values=self.hue,aggfunc=self.aggfunc).fillna(self.hue_na)
s_na :
floator int
used to fill na for data.pivot_table(index=self.y,columns=self.x,values=self.s,aggfunc=self.aggfunc).fillna(self.s_na)
c_na : float, int or str
used to fill na for data.pivot_table(index=self.y,columns=self.x,values=self.c,aggfunc=self.aggfunc).fillna(self.c_na)
aggfunc : function
when there are multiple values for the same x and y, using aggfunc (default is np.mean) to aggregate them.
aggfunc will be called in data.pivot(index=y,columns=x,values=value,aggfunc=aggfunc)
spines: bool
Whether show spines of the axes or not [False]
max_s: float
max size of the dot in scatter, default is None, will be inferred automatically.
alpha: float [0,1]
coefficient to scale the size of dot in figure legend, valid for marker and dot in legend.
kwargs :dict
Other kwargs passed to ClusterMapPlotter and dotHeatmap2d, such as max_s, vmin, vmax.
Returns
-------
DotClustermapPlotter.
"""
def __init__(
self,
data=None,
x=None,
y=None,
value=None,
hue=None,
s=None,
c=None,
marker="o",
alpha=1,
color_legend_kws={},
cmap_legend_kws={},
dot_legend_kws={},
dot_legend_marker="o",
aggfunc=np.mean,
value_na=0,
hue_na="NA",
s_na=0,
c_na=0,
spines=False,
max_s=None,
**kwargs
):
kwargs["data"] = data
self.x = x
self.y = y
self.value = value
self.hue = hue
self.s = s
self.c = c
self.marker = marker
self.alpha = alpha
self.aggfunc = aggfunc
self.value_na = value_na
self.hue_na = hue_na
self.s_na = s_na
self.c_na = c_na
self.color_legend_kws = color_legend_kws
self.cmap_legend_kws = cmap_legend_kws
self.spines = spines
self.dot_legend_kws = dot_legend_kws
self.dot_legend_marker=dot_legend_marker
self.max_s=max_s
super().__init__(**kwargs)
[docs] def plot_matrix(self, row_order, col_order):
if self.verbose >= 1:
print("Plotting matrix..")
nrows = len(row_order)
ncols = len(col_order)
ratio=self.kwargs.pop('ratio',None)
if not ratio is None:
print("Warning: ratio is deprecated, please use max_s instead")
if self.max_s is None:
self.max_s = ratio
if self.max_s is None:
# The unit of size for the s parameter is squared points. This means
# that the area of the marker is specified in points squared.
# A point in this context is a unit of measure in typography,
# equal to 1/72 of an inch. Therefore, if you specify s=100,
# each marker's area will be 100 points squared, not its width or height.
w, h = (
self.ax_heatmap.get_window_extent().width / self.ax_heatmap.figure.dpi,
self.ax_heatmap.get_window_extent().height / self.ax_heatmap.figure.dpi,
) # unit is inch
r = min(w * 72 / self.data2d.shape[1], h * 72 / self.data2d.shape[0])
# r is the minimal of width and height for each scatter point, unit is point.
max_s = r ** 2
if self.verbose >= 1:
print(f"Inferred max_s (max size of scatter point) is: {max_s}")
else:
max_s = self.max_s
if self.verbose >= 1:
print(f"Using user provided max_s: {max_s}")
self.kwargs['max_s'] = max_s
self.col_split_gap_pixel = self.col_split_gap * mm2inch * self.ax.figure.dpi
self.wspace = (
(self.col_split_gap_pixel * ncols)
/ (
self.ax_heatmap.get_window_extent().width
+ self.col_split_gap_pixel - self.col_split_gap_pixel * ncols
)
)
self.row_split_gap_pixel = self.row_split_gap * mm2inch * self.ax.figure.dpi
self.hspace = (
(self.row_split_gap_pixel * nrows)
/ (
self.ax_heatmap.get_window_extent().height
+ self.row_split_gap_pixel - self.row_split_gap_pixel * nrows
)
)
self.heatmap_gs = matplotlib.gridspec.GridSpecFromSubplotSpec(
nrows,
ncols,
hspace=self.hspace,
wspace=self.wspace,
subplot_spec=self.gs[1, 1],
height_ratios=[len(rows) for rows in row_order],
width_ratios=[len(cols) for cols in col_order],
)
self.heatmap_axes = np.empty(shape=(nrows, ncols), dtype=object)
# if nrows > 1 or ncols > 1:
self.ax_heatmap.set_axis_off()
for i, rows in enumerate(row_order):
for j, cols in enumerate(col_order):
ax1 = self.ax_heatmap.figure.add_subplot(
self.heatmap_gs[i, j],
sharex=self.heatmap_axes[0, j],
sharey=self.heatmap_axes[i, 0],
)
# ax1.set_xlim([0, len(rows)])
# ax1.set_ylim([0, len(cols)])
kwargs = self.kwargs.copy()
# print(kwargs)
dotHeatmap2d(
self.data2d.loc[rows, cols],
cmap=kwargs.pop("cmap", self.cmap),
ax=ax1,
spines=self.spines,
**kwargs
)
self.heatmap_axes[i, j] = ax1
ax1.yaxis.label.set_visible(False)
ax1.xaxis.label.set_visible(False)
ax1.tick_params(
which="both",
left=False,
right=False,
labelleft=False,
labelright=False,
top=False,
bottom=False,
labeltop=False,
labelbottom=False,
)
[docs] def collect_legends(self):
if self.verbose >= 1:
print("Collecting legends..")
self.legend_list = []
self.label_max_width = 0
for annotation in [
self.top_annotation,
self.bottom_annotation,
self.left_annotation,
self.right_annotation,
]:
if not annotation is None:
annotation.collect_legends()
if annotation.plot_legend and len(annotation.legend_list) > 0:
self.legend_list.extend(annotation.legend_list)
# print(annotation.label_max_width,self.label_max_width)
if annotation.label_max_width > self.label_max_width:
self.label_max_width = annotation.label_max_width
if self.legend:
if (isinstance(self.cmap, str) and not self.hue is None and self.c is None): #
color_dict = {}
col_list = self.kwargs["hue"].unstack().value_counts().index.tolist()
# print(col_list,self.kwargs['hue'])
for c in col_list:
color_dict[c] = matplotlib.colors.to_hex(
get_colormap(self.cmap)(col_list.index(c))
)
self.legend_list.append(
[
color_dict,
self.hue,
self.color_legend_kws,
len(color_dict),
"color_dict",
]
)
cmap = self.cmap
c = self.kwargs.get("c", None)
cmap_legend_kws = self.cmap_legend_kws.copy()
# cmap_legend_kws["vmax"] = self.kwargs.get('vmax',1)
# cmap_legend_kws["vmin"] = self.kwargs.get('vmin',0)
cmap_legend_kws.setdefault("vmin", self.kwargs.get('vmin')) # round(vmin, 2))
cmap_legend_kws.setdefault("vmax", self.kwargs.get('vmax')) # round(vmax, 2))
if (
not cmap is None
and type(cmap) == str
and not c is None
and type(c) != str
):
# print(cmap_legend_kws)
self.legend_list.append([cmap, self.value, cmap_legend_kws, 4, "cmap"])
if type(cmap) == dict:
for k in cmap:
self.legend_list.append([cmap[k], k, cmap_legend_kws, 4, "cmap"])
marker = self.kwargs.get("marker", None)
# ax = self.heatmap_axes[0, 0]
# w, h = (
# ax.get_window_extent().width / ax.figure.dpi,
# ax.get_window_extent().height / ax.figure.dpi,
# )
# r = min(w * 72 / len(self.col_order[0]), h * 72 / len(self.row_order[0]))
max_s=self.kwargs['max_s']
if type(marker) == dict and not self.hue is None:
self.legend_list.append(
[
(marker, self.kwargs.get("colors", None), np.sqrt(max_s) * self.alpha),
self.hue,
self.dot_legend_kws,
len(marker),
"markers",
] #size of s in scatter equal to marker_size**2
) # markersize is r*0.8
# dot size legend:
if type(self.s) == str:
# s=self.kwargs.get('s',None)
# colors=self.kwargs.get('colors',None)
markers1 = {}
ms = {}
for f in [1, 0.8, 0.6, 0.4, 0.2]:
k = str(round(f * self.smax, 2))
markers1[k] = self.dot_legend_marker
ms[k] = f * np.sqrt(max_s) * self.alpha
# ms[k] = np.sqrt(f * max_s * self.alpha)
title = self.s if not self.s is None else self.value
self.legend_list.append(
[
(markers1, None, ms),
title,
self.dot_legend_kws,
len(markers1),
"markers",
]
)
heatmap_label_max_width = (
max([label.get_window_extent().width for label in self.yticklabels])
if len(self.yticklabels) > 0
else 0
)
if (
heatmap_label_max_width >= self.label_max_width
or self.legend_anchor == "ax_heatmap"
):
self.label_max_width = heatmap_label_max_width * 1.1
if len(self.legend_list) > 1:
self.legend_list = sorted(self.legend_list, key=lambda x: x[3])
[docs] def post_processing(self):
if not self.spines:
for ax in self.heatmap_axes.ravel():
despine(ax=ax, left=True, bottom=True, right=True, top=True)
if __name__ == "__main__":
pass