[1]:
import os,sys
%matplotlib inline
import matplotlib.pylab as plt
import pickle
import numpy as np
plt.rcParams['figure.dpi'] = 100
plt.rcParams['savefig.dpi']=300
# sys.path.append(os.path.expanduser("~/Projects/Github/PyComplexHeatmap"))
from PyComplexHeatmap import *
use_pch_style() # or plt.style.use('default') to restore default style
# plt.rcParams
# import matplotlib; print(matplotlib.__version__)
[2]:
#set font to Arial using the following code
plt.rcParams['font.family']='sans serif'
plt.rcParams['font.sans-serif']='Arial'
# set pdf.fonttype to 42
plt.rcParams['pdf.fonttype']=42

Generate dataset

[3]:
#Generate example dataset (random)
df = pd.DataFrame(['GroupA'] * 5 + ['GroupB'] * 5, columns=['AB'])
df['CD'] = ['C'] * 3 + ['D'] * 3 + ['G'] * 4
df['EF'] = ['E'] * 6 + ['F'] * 2 + ['H'] * 2
df['F'] = np.random.normal(0, 1, 10)
df.index = ['sample' + str(i) for i in range(1, df.shape[0] + 1)]
df_box = pd.DataFrame(np.random.randn(10, 4), columns=['Gene' + str(i) for i in range(1, 5)])
df_box.index = ['sample' + str(i) for i in range(1, df_box.shape[0] + 1)]
df_bar = pd.DataFrame(np.random.uniform(0, 10, (10, 2)), columns=['TMB1', 'TMB2'])
df_bar.index = ['sample' + str(i) for i in range(1, df_box.shape[0] + 1)]
df_scatter = pd.DataFrame(np.random.uniform(0, 10, 10), columns=['Scatter'])
df_scatter.index = ['sample' + str(i) for i in range(1, df_box.shape[0] + 1)]
df_heatmap = pd.DataFrame(np.random.randn(30, 10), columns=['sample' + str(i) for i in range(1, 11)])
df_heatmap.index = ["Fea" + str(i) for i in range(1, df_heatmap.shape[0] + 1)]
df_heatmap.iloc[1, 2] = np.nan
[4]:
# add a missing value to sample4
df_heatmap.loc['Fea4','sample4']=np.nan
df_box.loc['sample4','Gene4']=np.nan
df_box
[4]:
Gene1 Gene2 Gene3 Gene4
sample1 2.187513 -1.380853 -0.480167 0.085030
sample2 0.601946 -0.323006 -0.860937 -0.205367
sample3 0.337548 -0.900402 0.148900 1.439385
sample4 -0.471896 -0.337046 2.190533 NaN
sample5 0.849067 1.225516 -0.338290 1.243937
sample6 -1.045769 0.290131 -0.659531 -0.068667
sample7 -0.161038 -1.058872 -0.484389 0.783456
sample8 -0.178389 0.448634 -0.365703 0.369459
sample9 0.748063 0.673198 -0.249985 1.274346
sample10 -1.711610 -0.307186 1.698132 0.678074

Add selected rows labels

[5]:
#Annotate the rows with average > 0.3
df_rows = df_heatmap.apply(lambda x:x.name if x.sample4 > 0.5 else None,axis=1)
df_rows=df_rows.to_frame(name='Selected')
df_rows['XY']=df_rows.index.to_series().apply(lambda x:'A' if int(x.replace('Fea',''))>=15 else 'B')

row_ha = HeatmapAnnotation(
            Scatter=anno_scatterplot(df_heatmap.sample4.apply(lambda x:round(x,2)),
                            height=12,cmap='jet',legend=False,grid=True,
                                     legend_kws=dict(color='red')),
            Line=anno_lineplot(df_heatmap.sample4.apply(lambda x:round(x,2)),
                            height=12,colors='red',linewidth=2,legend=False),
            Bar=anno_barplot(df_heatmap.sample4.apply(lambda x:round(x,2)),
                            height=15,cmap='rainbow',legend=False,ylim=(-5,5)),
            selected=anno_label(df_rows,colors='red',relpos=(-0.05,0.4)),
            label_kws={'rotation':30,'horizontalalignment':'left','verticalalignment':'bottom'},
            axis=0,verbose=0)

col_ha = HeatmapAnnotation(
            label=anno_label(df.AB, merge=True,rotation=10,
                             arrowprops = dict(visible=False),
                            ),
            AB=anno_simple(df.AB,add_text=True),
            axis=1,
            CD=anno_simple(df.CD,add_text=True),
            EF=anno_simple(df.EF,add_text=True,
                            legend_kws={'frameon':True}),
            G=anno_boxplot(df_box, cmap='jet',legend=False,grid=True),
            verbose=0)

print(np.nanmin(df_heatmap),np.nanmax(df_heatmap))

plt.figure(figsize=(5.5, 6.5))
cm = ClusterMapPlotter(
        data=df_heatmap, top_annotation=col_ha,right_annotation=row_ha,
        col_cluster=True,row_cluster=True,
        col_split=df.AB,row_split=2, z_score=0,vmin=-2.2,vmax=2.3,
        col_split_gap=0.5,row_split_gap=0.8,
        row_dendrogram=True,col_dendrogram=False,row_dendrogram_size=15,
        show_rownames=False,show_colnames=True,
        tree_kws={'row_cmap': 'Set1'},verbose=0,legend_vgap=5,
        cmap='RdYlBu_r',bezier=True,dotsize=2,
        legend_kws=dict(label='test'), #label='values',
        xticklabels_kws=dict(labelrotation=90,labelcolor='blue',labelsize=14,grid_color='red',bottom=True))
# for ax in cm.top_annotation.axes[-1,:]:
#     ax.cla()
plt.savefig("example0.pdf", bbox_inches='tight')
plt.show()
print(cm.kwargs['vmin'],cm.kwargs['vmax'],cm.legend_kws)
-3.7087202627688773 2.7682545505502714
../_images/notebooks_advanced_usage_6_1.png
-2.2 2.3 {'label': 'test', 'vmin': -2.2, 'vmax': 2.3, 'center': None}
[6]:
cm.heatmap_axes
[6]:
array([[<Axes: >, <Axes: >],
       [<Axes: >, <Axes: >]], dtype=object)

Add annotations on the top of heatmap cells

[7]:
#Annotate the rows with average > 0.3
df_rows = df_heatmap.apply(lambda x:x.name if x.sample4 > 0.5 else None,axis=1)
df_rows=df_rows.to_frame(name='Selected')
df_rows['XY']=df_rows.index.to_series().apply(lambda x:'A' if int(x.replace('Fea',''))>=15 else 'B')

row_ha = HeatmapAnnotation(
            S4=anno_simple(df_heatmap.sample4.apply(lambda x:round(x,2) if not pd.isna(x) else ''),
                       add_text=True,height=10,legend=False,
                       text_kws={'rotation':0,'fontsize':10,'color':'blue'}),
            # Scatter=anno_scatterplot(df_heatmap.sample4.apply(lambda x:round(x,2)),
            #                  height=10),
            Test=anno_barplot(df_heatmap.sample4.apply(lambda x:round(x,2)),
                        height=18,cmap='rainbow',grid=True),
            selected=anno_label(df_rows,colors='red'),
            axis=0,verbose=0,#wgap=4,
            label_kws={'rotation':0,'horizontalalignment':'left',
                                        'verticalalignment':'bottom'})

col_ha = HeatmapAnnotation(
            label=anno_label(df.AB, merge=True,rotation=15),
            AB=anno_simple(df.AB,add_text=True),axis=1,
            CD=anno_simple(df.CD,add_text=True,text_kws=dict(bbox={'boxstyle':'Circle','edgecolor':'white','fill':False},fontsize=8),
                           height=4.5),
            EF=anno_simple(df.EF,add_text=True,
                            legend_kws={'frameon':False}),
            Exp=anno_boxplot(df_box, cmap='turbo',grid=True),
            verbose=0,#hgap=2
            ) #verbose=0 will turn off the log.

print(df.head())
print(df_box.mean(axis=1))
print(df_heatmap.head())
plt.figure(figsize=(6, 8))
cm = ClusterMapPlotter(
            data=df_heatmap, top_annotation=col_ha,right_annotation=row_ha,
            col_split=df.AB,
            row_split=df_rows.XY,
            #col_split_gap=3.5,row_split_gap=2.5,
            col_cluster=True,row_cluster=True,
            label='values',row_dendrogram=False,
            show_rownames=True,show_colnames=True,
            verbose=0,legend_vgap=5,#legend_hpad=10,legend_vpad=5,
            annot=False,fmt='.1g',linewidths=0.05,linecolor='gold',cmap='RdYlBu_r',
            xticklabels_kws={'labelrotation':-45,'labelcolor':'blue'},
            yticklabels_kws=dict(labelcolor='red'),#subplot_gap=8
)
#subplot_gap controls the gap between main heatmap and column or row annotations

plt.show()
print(cm.row_order)
print(cm.col_order)
             AB CD EF         F
sample1  GroupA  C  E  0.045435
sample2  GroupA  C  E -0.820792
sample3  GroupA  C  E -0.381520
sample4  GroupA  D  E  1.558057
sample5  GroupA  D  E -0.065420
sample1     0.102881
sample2    -0.196841
sample3     0.256358
sample4     0.460530
sample5     0.745058
sample6    -0.370959
sample7    -0.230211
sample8     0.068500
sample9     0.611406
sample10    0.089352
dtype: float64
       sample1   sample2   sample3   sample4   sample5   sample6   sample7  \
Fea1 -0.369817 -0.334013 -1.133481  0.283969 -0.220920 -1.615043  0.408399
Fea2  0.736265 -0.167285       NaN  0.021201 -0.192741 -1.376010  0.084740
Fea3  1.878100 -0.998456  1.944019 -0.655073 -0.420349 -1.213828  0.090542
Fea4  0.722164 -1.294405  0.398772       NaN  0.302977 -1.010801  0.045174
Fea5  1.922654  1.329243 -0.039535  0.731163 -0.613262 -0.224573 -0.333527

       sample8   sample9  sample10
Fea1 -1.767529  0.077708 -1.155438
Fea2 -0.848125 -0.334524 -0.401966
Fea3 -0.021023  0.140168  0.145610
Fea4 -1.692063 -0.908440  0.587826
Fea5  0.129036  0.410582 -0.157031
../_images/notebooks_advanced_usage_9_1.png
[['Fea11', 'Fea12', 'Fea8', 'Fea4', 'Fea1', 'Fea2', 'Fea6', 'Fea9', 'Fea10', 'Fea13', 'Fea7', 'Fea14', 'Fea3', 'Fea5'], ['Fea16', 'Fea18', 'Fea22', 'Fea25', 'Fea20', 'Fea21', 'Fea28', 'Fea17', 'Fea24', 'Fea26', 'Fea27', 'Fea30', 'Fea15', 'Fea23', 'Fea19', 'Fea29']]
[['sample5', 'sample3', 'sample1', 'sample2', 'sample4'], ['sample10', 'sample8', 'sample9', 'sample6', 'sample7']]

Only plot the annotations

[8]:
df = pd.DataFrame(['AAAA1'] * 5 + ['BBBBB2'] * 5, columns=['AB'])
df['CD'] = ['C'] * 3 + ['D'] * 3 + ['G'] * 4
df['F'] = np.random.normal(0, 1, 10)
df.index = ['sample' + str(i) for i in range(1, df.shape[0] + 1)]
df_box = pd.DataFrame(np.random.randn(10, 4), columns=['Gene' + str(i) for i in range(1, 5)])
df_box.index = ['sample' + str(i) for i in range(1, df_box.shape[0] + 1)]
df_bar = pd.DataFrame(np.random.uniform(0, 10, (10, 2)), columns=['TMB1', 'TMB2'])
df_bar.index = ['sample' + str(i) for i in range(1, df_box.shape[0] + 1)]
df_scatter = pd.DataFrame(np.random.uniform(0, 10, 10), columns=['Scatter'])
df_scatter.index = ['sample' + str(i) for i in range(1, df_box.shape[0] + 1)]
df_bar1 = pd.DataFrame(np.random.uniform(0, 10, (10, 2)), columns=['T1-A', 'T1-B'])
df_bar1.index = ['sample' + str(i) for i in range(1, df_box.shape[0] + 1)]
df_bar2 = pd.DataFrame(np.random.uniform(0, 10, (10, 2)), columns=['T2-A', 'T2-B'])
df_bar2.index = ['sample' + str(i) for i in range(1, df_box.shape[0] + 1)]
df_bar3 = pd.DataFrame(np.random.uniform(0, 10, (10, 2)), columns=['T3-A', 'T3-B'])
df_bar3.index = ['sample' + str(i) for i in range(1, df_box.shape[0] + 1)]
df_bar3.iloc[5,0]=np.nan
df_bar4 = pd.DataFrame(np.random.uniform(0, 10, (10, 1)), columns=['T4'])
df_bar4.index = ['sample' + str(i) for i in range(1, df_box.shape[0] + 1)]
df_bar4.iloc[7,0]=np.nan
print(df)
print(df_box.head())
print(df_scatter)
print(df_bar)
print(df_bar1)
print(df_bar2)
print(df_bar3)
print(df_bar4)
              AB CD         F
sample1    AAAA1  C  0.857937
sample2    AAAA1  C -0.870811
sample3    AAAA1  C -0.897133
sample4    AAAA1  D  0.293353
sample5    AAAA1  D -0.404538
sample6   BBBBB2  D -1.232761
sample7   BBBBB2  G  0.951264
sample8   BBBBB2  G -0.361155
sample9   BBBBB2  G  0.451165
sample10  BBBBB2  G  0.213719
            Gene1     Gene2     Gene3     Gene4
sample1 -0.709382 -0.106931 -0.424066  1.362913
sample2  0.723315  0.800076  0.067149  1.829474
sample3  0.238146 -0.169403 -0.319038  0.718999
sample4  1.361002  1.085507  0.379784  0.441183
sample5  0.334794  0.217863 -0.558056 -1.826657
           Scatter
sample1   8.216554
sample2   8.482098
sample3   7.604668
sample4   1.605833
sample5   4.903368
sample6   1.348598
sample7   0.023861
sample8   8.493241
sample9   4.575743
sample10  3.844640
              TMB1      TMB2
sample1   5.965623  2.443817
sample2   8.546646  2.510858
sample3   0.782676  0.895993
sample4   6.942029  8.824860
sample5   2.555597  5.106013
sample6   4.029492  5.522492
sample7   7.194071  9.541492
sample8   8.234834  4.642008
sample9   6.957621  9.646708
sample10  8.722453  2.258993
              T1-A      T1-B
sample1   8.974470  4.920883
sample2   9.248793  5.524964
sample3   7.138581  9.183642
sample4   1.760228  4.954536
sample5   6.039348  2.486589
sample6   8.050732  7.901286
sample7   3.258294  2.482795
sample8   3.950215  4.546003
sample9   9.166755  0.442683
sample10  1.989800  6.856756
              T2-A      T2-B
sample1   1.260332  0.054708
sample2   8.630146  9.714847
sample3   0.809092  5.798171
sample4   1.511556  6.270546
sample5   2.486518  0.510064
sample6   9.579535  4.952142
sample7   9.292721  8.212768
sample8   3.728597  9.083941
sample9   6.995197  2.644324
sample10  6.332444  5.953887
              T3-A      T3-B
sample1   9.333338  6.984959
sample2   1.364911  1.636278
sample3   6.091161  4.116743
sample4   7.673169  7.839617
sample5   2.357917  1.026705
sample6        NaN  0.051534
sample7   7.400061  8.654729
sample8   5.270826  3.245087
sample9   0.386638  0.432719
sample10  4.159563  8.533462
                T4
sample1   1.965158
sample2   0.990276
sample3   3.860812
sample4   3.309407
sample5   1.689017
sample6   7.998592
sample7   4.415705
sample8        NaN
sample9   8.981833
sample10  1.133716
[9]:
plt.figure(figsize=(4, 8))
col_ha = HeatmapAnnotation(
            label=anno_label(df.AB, merge=True,rotation=15),
            AB=anno_simple(df.AB,add_text=True), axis=1,
            CD=anno_simple(df.CD, add_text=True,text_kws={'color':'black'}),
            Exp=anno_boxplot(df_box, cmap='turbo',grid=True),
            Scatter=anno_scatterplot(df_scatter,legend=True,grid=True),
            Line=anno_lineplot(df_bar2,linewidth=4,colors={'T2-B':'orangered','T2-A':'yellowgreen'},
                              marker='D',legend=True),  #colors=['orangered','yellowgreen']
            TMB_bar=anno_barplot(df_bar,legend=True,cmap='Set1'),
            Bar1=anno_barplot(df_bar1,legend=True,colors=['red','black']), #colors can be str, list, tuple or dict
            Bar4=anno_barplot(df_bar4,legend=True,cmap='turbo'),
            Bar2=anno_barplot(df_bar2,legend=True,colors={'T2-B':'orangered','T2-A':'yellowgreen'}),
            Bar3=anno_barplot(df_bar3,legend=True,cmap='Paired'),
            plot=True,legend=True,legend_vgap=5,hgap=4,
            # legend_order=False,
            # legend_order=['AB','CD','Line','Bar1'],legend_width=20
)
col_ha.show_ticklabels(df.index.tolist(),fontdict={'color':'blue'},rotation=-30)
plt.show()
Plotting HeatmapAnnotations
Collecting annotation legends..
../_images/notebooks_advanced_usage_12_1.png

Change orentation to down and add extra space

[10]:
plt.figure(figsize=(4, 8))
row_ha = HeatmapAnnotation(
            TMB_bar=anno_barplot(df_bar,legend=True,cmap='Set1'),
            Bar1=anno_barplot(df_bar1,legend=True,cmap='Dark2'),
            Bar4=anno_barplot(df_bar4,legend=True,cmap='turbo'),
            Bar2=anno_barplot(df_bar2,legend=True,cmap='tab10'),
            Bar3=anno_barplot(df_bar3,legend=True,cmap='Paired'),
            Scatter=anno_scatterplot(df_scatter),
            Exp=anno_boxplot(df_box, cmap='turbo',legend=True,grid=True),
            CD=anno_simple(df.CD, colors={'C': 'red', 'D': 'gray', 'G': 'yellow'},
                           add_text=True,legend=True,text_kws={'color':'black'}),
            AB=anno_simple(df.AB,add_text=True,legend=True),
            label=anno_label(df.AB, merge=True,rotation=-15),
            plot=True,plot_legend=False,legend_hpad=13,axis=1,hgap=1
            )
row_ha.show_ticklabels(df.index.tolist(),fontdict={'color':'blue'},rotation=30)
plt.show()
# Here, we can use hgap (when axis=1) or wgap (when axis=0) to control the widh of height space between different annotations.
Plotting HeatmapAnnotations
../_images/notebooks_advanced_usage_14_1.png

Change orentation to the left

[11]:
plt.figure(figsize=(8, 4))
row_ha = HeatmapAnnotation(
            label=anno_label(df.AB, merge=True,rotation=15),
            AB=anno_simple(df.AB,add_text=True,legend=True,
                           #text_kws=dict(bbox={"pad":0},va='center',ha='center',rotation_mode='anchor')
                          ),
            CD=anno_simple(df.CD,add_text=True,legend=True),
            Exp=anno_boxplot(df_box, cmap='turbo',legend=True),
            Scatter=anno_scatterplot(df_scatter),
            TMB_bar=anno_barplot(df_bar,legend=True,cmap='Set1'),
            Bar1=anno_barplot(df_bar1,legend=True,cmap='Dark2'),
            Bar4=anno_barplot(df_bar4,legend=True,cmap='turbo'),
            Bar2=anno_barplot(df_bar2,legend=True,cmap='tab10'),
            Bar3=anno_barplot(df_bar3,legend=True,cmap='Paired'),
            plot=True,legend=True,legend_vgap=5,
            axis=0,legend_hpad=20,label_side='bottom',wgap=3,
            )
row_ha.show_ticklabels(df.index.tolist(),fontdict={'color':'blue'},rotation=0)
plt.show()
Plotting HeatmapAnnotations
Collecting annotation legends..
Incresing ncol
Incresing ncol
More than 3 cols is not supported
Legend too long, generating a new column..
../_images/notebooks_advanced_usage_16_1.png

Change orentation to the right

[12]:
plt.figure(figsize=(8, 4))
row_ha = HeatmapAnnotation(
            TMB_bar=anno_barplot(df_bar,legend=True,cmap='Set1'),
            Bar1=anno_barplot(df_bar1,legend=True,cmap='Dark2'),
            Bar4=anno_barplot(df_bar4,legend=True,cmap='turbo'),
            Bar2=anno_barplot(df_bar2,legend=True,cmap='tab10'),
            Bar3=anno_barplot(df_bar3,legend=True,cmap='Paired'),
            Scatter=anno_scatterplot(df_scatter,grid=True),
            Exp=anno_boxplot(df_box, cmap='turbo',legend=True,grid=True),
            CD=anno_simple(df.CD, colors={'C': 'red', 'D': 'gray', 'G': 'green'},
                           add_text=True,legend=True,
                           text_kws={'rotation':-90}),
            AB=anno_simple(df.AB,add_text=True,legend=True,
                           text_kws={'rotation':-90,'color':'black'}),
            label=anno_label(df.AB, merge=True,rotation=15),
            plot=True,legend=True,legend_hpad=13,legend_vgap=5,axis=0,wgap=3,
            )
row_ha.show_ticklabels(df.index.tolist(),fontdict={'color':'black'},rotation=0)
plt.show()
Plotting HeatmapAnnotations
Collecting annotation legends..
Incresing ncol
Incresing ncol
More than 3 cols is not supported
Legend too long, generating a new column..
../_images/notebooks_advanced_usage_18_1.png

Changing orientation using parameter orientation

By Default, if there is no anno_label in the annotation, the oriention would be determined by parameter orientation.

[13]:
plt.figure(figsize=(8, 4))
col_ha = HeatmapAnnotation(
            AB=anno_simple(df.AB,add_text=True,legend=True),
            CD=anno_simple(df.CD,add_text=True,legend=True),
            Exp=anno_boxplot(df_box, cmap='turbo',legend=True),
            Scatter=anno_scatterplot(df_scatter,grid=True),
            TMB_bar=anno_barplot(df_bar,legend=True,cmap='Set1'),
            Bar1=anno_barplot(df_bar1,legend=True,cmap='Dark2'),
            Bar4=anno_barplot(df_bar4,legend=True,cmap='turbo'),
            Bar2=anno_barplot(df_bar2,legend=True,cmap='tab10'),
            Bar3=anno_barplot(df_bar3,legend=True,cmap='Paired'),
            plot=True,legend=True,axis=0,
            legend_vgap=5,orientation='left',
            )
plt.show()
Plotting HeatmapAnnotations
Collecting annotation legends..
Incresing ncol
Incresing ncol
More than 3 cols is not supported
Legend too long, generating a new column..
../_images/notebooks_advanced_usage_20_1.png
[14]:
plt.figure(figsize=(8, 4))
col_ha = HeatmapAnnotation(
            AB=anno_simple(df.AB,add_text=True,legend=True,
                          text_kws={'rotation':-90,'fontsize':14,'color':'black'}),
            CD=anno_simple(df.CD,add_text=True,legend=True,
                          text_kws={'rotation':-90,'fontsize':14,'color':'white'}),
            Exp=anno_boxplot(df_box, cmap='turbo',legend=True),
            Scatter=anno_scatterplot(df_scatter),
            TMB_bar=anno_barplot(df_bar,legend=True,cmap='Set1'),
            Bar1=anno_barplot(df_bar1,legend=True,cmap='Dark2'),
            Bar4=anno_barplot(df_bar4,legend=True,cmap='turbo'),
            Bar2=anno_barplot(df_bar2,legend=True,cmap='tab10'),
            Bar3=anno_barplot(df_bar3,legend=True,cmap='Paired'),
            plot=True,legend=True,axis=0,wgap=3,
            legend_vgap=5,orientation='right',
            )
plt.show()
Plotting HeatmapAnnotations
Collecting annotation legends..
Incresing ncol
Incresing ncol
More than 3 cols is not supported
Legend too long, generating a new column..
../_images/notebooks_advanced_usage_21_1.png

Add multiple heatmap annotations using for loop

Typically, we can create a heatmap annotatin using the following code:

col_ha = HeatmapAnnotation(
                        Group=anno_simple(df_cols.hypomethylated_samples,colors=sample_group_color_dict,legend=True),
                        CellType=anno_simple(df_cols.CellType,colors=ct_color_dict,legend=ct_legend),
                        M1=anno_simple(df_cols['M1'],cmap='jet',legend=lgd,vmax=1,vmin=0,legend_kws={'label':'M1'}),
                        verbose=0,label_side='right',label_kws={'horizontalalignment':'left'})

But what if we have many annotations, for example:

col_ha = HeatmapAnnotation(
                        Group=anno_simple(df_cols.hypomethylated_samples,colors=sample_group_color_dict,legend=True),
                        CellType=anno_simple(df_cols.CellType,colors=ct_color_dict,legend=ct_legend),
                        M1=anno_simple(df_cols['M1'],cmap='jet',legend=lgd,vmax=1,vmin=0,legend_kws={'label':'M1'}),
                        M2=anno_simple(df_cols['M2'],cmap='jet',legend=lgd,vmax=1,vmin=0,legend_kws={'label':'M2'}),
                        M3=anno_simple(df_cols['M3'],cmap='jet',legend=lgd,vmax=1,vmin=0,legend_kws={'label':'M3'}),
                        .....
                        verbose=0,label_side='right',label_kws={'horizontalalignment':'left'})

In this case, we can create an dict including the name and annotation as keys and values:

col_ha_dict={
                'Group':anno_simple(df_cols.hypomethylated_samples,colors=sample_group_color_dict,legend=True),
                'CellType':anno_simple(df_cols.CellType,colors=ct_color_dict,legend=ct_legend)
                }


for col in sample_cols:
    col_ha_dict[col]=anno_simple(df_cols[col],cmap='jet',legend=lgd,vmax=1,vmin=0,legend_kws={'label':col})

col_ha = HeatmapAnnotation(**col_ha_dict,
                           verbose=0,label_side='right',label_kws={'horizontalalignment':'left'})

Cluster between groups and cluster within groups

Similar to cluster_between_groups and cluster_within_groups in R (https://jokergoo.github.io/2021/03/05/cluster-groups-in-complexheatmap/)

clsuter within groups: col_split=*, col_cluster=True

[15]:
df['Groups']=['G1']+['G2']+['G3']*5+['G4']+['G5']*2
col_ha = HeatmapAnnotation(
            Groups=anno_simple(df.Groups,add_text=True,text_kws={'color':'black'}),
            AB=anno_simple(df.AB,add_text=True),axis=1,
            Exp=anno_boxplot(df_box, cmap='turbo'),
            verbose=0) #verbose=0 will turn off the log.


plt.figure(figsize=(6, 8))
cm = ClusterMapPlotter(
        data=df_heatmap, top_annotation=col_ha,
        col_split=df.Groups,col_split_gap=2,
        col_cluster=True,row_cluster=True,col_dendrogram=True,
        label='values',show_rownames=True,show_colnames=True,
        tree_kws={'col_cmap': 'Set1'},verbose=0,legend_vgap=7,
        annot=True,fmt='.1g',linewidths=0.05,linecolor='gold',cmap='RdYlBu_r',
        xticklabels_kws={'labelrotation':-45,'labelcolor':'blue'},
        ylabel='Features',
        legend_order=['AB','Groups','Exp','values'] #change legend order
)
plt.show()
../_images/notebooks_advanced_usage_25_0.png

cluster_between_groups: col_split=*, col_split_order="cluster_between_groups",col_cluster=False

[16]:
plt.figure(figsize=(8, 10))
cm = ClusterMapPlotter(
        data=df_heatmap, top_annotation=col_ha,
        col_split=df.Groups, col_split_order="cluster_between_groups",
        col_split_gap=2,col_cluster=False,
        row_cluster=True,col_dendrogram=True,row_dendrogram_size=35,col_dendrogram_size=25,
        row_split=2,row_split_gap=1,row_dendrogram=True,
        label='values',show_rownames=True,show_colnames=True,bezier=True,dotsize=8,
        tree_kws={'colors':'blue','row_cmap':'Set1','col_cmap':'Paired'},
        verbose=0,legend_vgap=7,
        linewidths=0.05,linecolor='gold',cmap='RdYlBu_r',
        xticklabels_kws={'labelrotation':-45,'labelcolor':'blue'},
        ylabel='Features')
plt.show()
../_images/notebooks_advanced_usage_27_0.png

cluster_within_groups && cluster_between_groups: col_split=*, col_split_order="cluster_between_groups",col_cluster=True

[17]:
plt.figure(figsize=(6, 8))
cm = ClusterMapPlotter(
        data=df_heatmap, top_annotation=col_ha,
        col_split=df.loc[:,['AB','Groups']], col_split_order="cluster_between_groups",
        col_split_gap=2,col_cluster=True,row_split_gap=1.5,
        row_split=3,#row_split_order='cluster_between_groups',
        row_cluster=True,col_dendrogram=True,row_dendrogram=True,
        label='values',show_rownames=True,show_colnames=True,
        tree_kws={'colors':'blue'},verbose=0,legend_vgap=7,
        annot=True,fmt='.1g',linewidths=0.05,linecolor='gold',cmap='RdYlBu_r',
        xticklabels_kws={'labelrotation':-45,'labelcolor':'blue'},
        ylabel='Features')
plt.show()
../_images/notebooks_advanced_usage_29_0.png
[18]:
# `label_kws` in `HeatmapAnnotation` control the heatmap annotaiton labels
col_ha = HeatmapAnnotation(
                Groups=anno_simple(df.Groups,add_text=True,text_kws={'color':'black'}),
                AB=anno_simple(df.AB,add_text=True),axis=1,
                Exp=anno_boxplot(df_box, cmap='turbo',grid=True),
                verbose=0,label_side='right'
)

# `xticklabels_kws` and `yticklabels_kws` control the ticklabels for the heatmap.
plt.figure(figsize=(6, 8))
cm = ClusterMapPlotter(data=df_heatmap, top_annotation=col_ha,
                       col_split=df.Groups,col_split_order=['G2','G1','G5','G4','G3'],
                       col_split_gap=4.5,col_cluster=True,
                       row_cluster=True,col_dendrogram=True,
                       label='values',show_rownames=True,show_colnames=True,
                       row_names_side='left',
                       tree_kws={'col_cmap':'Set1'},verbose=0,legend_vgap=7,
                       linewidths=0.05,linecolor='gold',cmap='RdYlBu_r',
                       xticklabels_kws=dict(labelrotation=-45,labelcolor='purple',labelsize=14),
                       #more parameters for [x/y]_ticklabels_kws, see: matplotlib.axes.Axes.tick_params or ?ax.tick_params
                       xlabel='Samples',ylabel="Features",
                       xlabel_kws=dict(color='white',fontsize=14),
                       ylabel_kws=dict(color='blue',fontsize=14,labelpad=45), #increace labelpad manually using labelpad (points)
                       xlabel_bbox_kws=dict(facecolor='black'),
                       ylabel_bbox_kws=dict(facecolor='chocolate',edgecolor='red'),
                      )
plt.savefig("test.pdf",bbox_inches='tight')
plt.show()
../_images/notebooks_advanced_usage_30_0.png

Custom annotation

[19]:
annot=df_heatmap.applymap(lambda x:'∗∗∗' if x >= 2 else '∗∗' if x >=1 else '∗' if x >0 else '')
# To make asterisk located at center in vertical, use ∗ ASTERISK OPERATOR. instead of normal *; see: https://unicode-explorer.com/c/2217
plt.figure(figsize=(5, 6.5))
cm = ClusterMapPlotter(
        data=df_heatmap, top_annotation=col_ha,
        annot=annot,fmt=None,annot_kws={'color':'white','fontname':'Courier'},
        col_split=df.Groups, col_split_order="cluster_between_groups",
        col_cluster=True,row_cluster=True,
        label='values',
        tree_kws={'col_cmap': 'Set1'},verbose=0,legend_vgap=7,
        linewidths=0.05,linecolor='white',cmap='RdYlBu_r',
        xticklabels_kws={'labelrotation':-45,'labelcolor':'blue'})
plt.show()
../_images/notebooks_advanced_usage_32_0.png

Custom linkage

[20]:
import fastcluster
# custom column linkage
linkage = fastcluster.linkage(df_heatmap.T.apply(lambda x:x.fillna(x.median()),axis=1), method='average', metric='canberra')
print("df_heatmap shape:",df_heatmap.shape,"\nlinkage shape:",linkage.shape,"\n",linkage)

plt.figure(figsize=(4, 6))
cm = ClusterMapPlotter(
        data=df_heatmap, top_annotation=col_ha,z_score=0,
        col_cluster=True,row_cluster=True,show_rownames=True,show_colnames=True,
        label='values',col_dendrogram_kws=dict(linkage=linkage),col_dendrogram=True,
        tree_kws={'col_cmap': 'Set1'},verbose=0,legend_vgap=7,
        linewidths=0.05,linecolor='white',cmap='RdYlBu_r',
        xticklabels_kws={'labelrotation':-45,'labelcolor':'blue'})
plt.show()
df_heatmap shape: (30, 10)
linkage shape: (9, 4)
 [[ 2.          9.         18.27233765  2.        ]
 [ 0.          1.         18.48177502  2.        ]
 [ 4.          5.         19.6472128   2.        ]
 [ 7.         10.         19.93972364  3.        ]
 [ 8.         12.         20.24478861  3.        ]
 [ 3.         11.         20.3717095   3.        ]
 [14.         15.         21.93488246  6.        ]
 [ 6.         13.         21.99672826  4.        ]
 [16.         17.         22.35515103 10.        ]]
../_images/notebooks_advanced_usage_34_1.png
[21]:
df['Groups']=['G1']+['G2']+['G3']*5+['G4']+['G5']*2
plt.figure(figsize=(4, 6))
cm = ClusterMapPlotter(
        data=df_heatmap, top_annotation=col_ha,
        col_cluster=True,row_cluster=True,show_rownames=True,show_colnames=True,
        row_split=2,row_split_gap=3,row_dendrogram=True,
        label='values',col_dendrogram_kws=dict(linkage=linkage),col_dendrogram=True,
        tree_kws={'col_cmap': 'Set1','row_cmap':'Dark2'},verbose=0,legend_vgap=7,
        linewidths=0.05,linecolor='white',cmap='RdYlBu_r',
        xticklabels_kws={'labelrotation':-45,'labelcolor':'blue'})
plt.show()
../_images/notebooks_advanced_usage_35_0.png

Image annotation

[29]:
df = pd.DataFrame(['AAAA1'] * 5 + ['BBBBB2'] * 5, columns=['AB'])
df['CD'] = ['C'] * 3 + ['D'] * 3 + ['G'] * 4
df['F'] = np.random.normal(0, 1, 10)
df.index = ['sample' + str(i) for i in range(1, df.shape[0] + 1)]
df_box = pd.DataFrame(np.random.randn(10, 4), columns=['Gene' + str(i) for i in range(1, 5)])
df_box.index = ['sample' + str(i) for i in range(1, df_box.shape[0] + 1)]
df_bar = pd.DataFrame(np.random.uniform(0, 10, (10, 2)), columns=['TMB1', 'TMB2'])
df_bar.index = ['sample' + str(i) for i in range(1, df_box.shape[0] + 1)]
df_scatter = pd.DataFrame(np.random.uniform(0, 10, 10), columns=['Scatter'])
df_scatter.index = ['sample' + str(i) for i in range(1, df_box.shape[0] + 1)]
df_bar1 = pd.DataFrame(np.random.uniform(0, 10, (10, 2)), columns=['T1-A', 'T1-B'])
df_bar1.index = ['sample' + str(i) for i in range(1, df_box.shape[0] + 1)]
df_bar2 = pd.DataFrame(np.random.uniform(0, 10, (10, 2)), columns=['T2-A', 'T2-B'])
df_bar2.index = ['sample' + str(i) for i in range(1, df_box.shape[0] + 1)]
df_bar3 = pd.DataFrame(np.random.uniform(0, 10, (10, 2)), columns=['T3-A', 'T3-B'])
df_bar3.index = ['sample' + str(i) for i in range(1, df_box.shape[0] + 1)]
df_bar3.iloc[5,0]=np.nan
df_bar4 = pd.DataFrame(np.random.uniform(0, 10, (10, 1)), columns=['T4'])
df_bar4.index = ['sample' + str(i) for i in range(1, df_box.shape[0] + 1)]
df_bar4.iloc[7,0]=np.nan
df_img = pd.DataFrame(['https://motifcollections.aertslab.org/v10nr_clust/logos/metacluster_136.1.png',
                       'https://motifcollections.aertslab.org/v10nr_clust/logos/metacluster_135.7.png',
                       'https://cdn3.iconfinder.com/data/icons/family-member-flat-happy-family-day/512/Brother-512.png',
                       'https://cdn3.iconfinder.com/data/icons/family-member-flat-happy-family-day/512/Sister-512.png',
                       'https://img.freepik.com/free-vector/sticker-design-with-cute-mouse-isolated_1308-59360.jpg',
                       'https://motifcollections.aertslab.org/v10nr_clust/logos/metacluster_131.8.png',
                       'https://img.freepik.com/premium-vector/vector-illustration-gorilla-isolated-white-background-cartoon-style_1151-66575.jpg',
                      "2.png",'1.jpeg',
                      'https://cdn3.iconfinder.com/data/icons/family-member-flat-happy-family-day/512/Brother-520.png'], columns=['path'])
df_img.index = ['sample' + str(i) for i in range(1, df_box.shape[0] + 1)]
[30]:
plt.figure(figsize=(16, 4))
col_ha = HeatmapAnnotation(
            label=anno_label(df.AB, merge=True,rotation=15),
            AB=anno_simple(df.AB,add_text=True,legend=True), axis=1,
            CD=anno_simple(df.CD, add_text=True,legend=True,text_kws={'color':'black'}),
            Exp=anno_boxplot(df_box, cmap='turbo',legend=True),
            Scatter=anno_scatterplot(df_scatter),
            Bar1=anno_barplot(df_bar1,legend=True,cmap='Dark2'),
            Bar4=anno_barplot(df_bar4,legend=True,cmap='turbo'),
            Img=anno_img(df_img.path,border_width=5,border_color=255,height=15),
            plot=True,legend=True,legend_vgap=5,hgap=0.5)
col_ha.show_ticklabels(df.index.tolist(),fontdict={'color':'blue'},rotation=-30)
plt.show()
Plotting HeatmapAnnotations
Collecting annotation legends..
../_images/notebooks_advanced_usage_38_1.png

How to force display all row/col ticklabels?

When the height or width is not big enough to display all xticklabels and yticklabels, some ticklabels will be hidden to avoid overlapping. For example:

[31]:
plt.figure(figsize=(3.5, 5))
cm = ClusterMapPlotter(
        data=df_heatmap,
        col_cluster=True,row_cluster=True,
        col_split=df.AB,row_split=2,
        col_split_gap=0.5,row_split_gap=0.8,
        label='values',row_dendrogram=True,
        show_rownames=True,show_colnames=True,row_names_side='right',
        tree_kws={'row_cmap': 'Set1'},verbose=0,legend_vgap=5,
        cmap='meth2',xticklabels_kws={'labelrotation':-90,'labelcolor':'blue'})
plt.show()
../_images/notebooks_advanced_usage_40_0.png

To force display all ticklabels no matter whether the height or width is big enough, set parameters xticklabels/yticklabels to True:

[32]:
plt.figure(figsize=(3.5, 5))
cm = ClusterMapPlotter(
        data=df_heatmap,
        col_cluster=True,row_cluster=True,
        col_split=df.AB,row_split=2,
        col_split_gap=0.5,row_split_gap=0.8,
        label='values',row_dendrogram=True,
        show_rownames=True,show_colnames=True,
        row_names_side='right',yticklabels=True,
        tree_kws={'row_cmap': 'Set1'},verbose=0,legend_vgap=5,
        cmap='meth2',xticklabels_kws={'labelrotation':-90,'labelcolor':'blue'})
plt.show()
../_images/notebooks_advanced_usage_42_0.png