[61]:
import os,sys
%matplotlib inline
import matplotlib.pylab as plt
import pickle
plt.rcParams['figure.dpi'] = 100
plt.rcParams['savefig.dpi']=300
sys.path.append(os.path.expanduser("~/Projects/Github/PyComplexHeatmap"))
import PyComplexHeatmap
print(PyComplexHeatmap.__version__)
from PyComplexHeatmap import *
1.7.2.dev0+g8abf70a.d20240415
[62]:
#set font to Arial using the following code
plt.rcParams['font.family']='sans serif'
plt.rcParams['font.sans-serif']='Arial'
# set pdf.fonttype to 42
plt.rcParams['pdf.fonttype']=42

Generate dataset

[63]:
#Generate example dataset (random)
df = pd.DataFrame(['GroupA'] * 5 + ['GroupB'] * 5, columns=['AB'])
df['CD'] = ['C'] * 3 + ['D'] * 3 + ['G'] * 4
df['EF'] = ['E'] * 6 + ['F'] * 2 + ['H'] * 2
df['F'] = np.random.normal(0, 1, 10)
df.index = ['sample' + str(i) for i in range(1, df.shape[0] + 1)]
df_box = pd.DataFrame(np.random.randn(10, 4), columns=['Gene' + str(i) for i in range(1, 5)])
df_box.index = ['sample' + str(i) for i in range(1, df_box.shape[0] + 1)]
df_bar = pd.DataFrame(np.random.uniform(0, 10, (10, 2)), columns=['TMB1', 'TMB2'])
df_bar.index = ['sample' + str(i) for i in range(1, df_box.shape[0] + 1)]
df_scatter = pd.DataFrame(np.random.uniform(0, 10, 10), columns=['Scatter'])
df_scatter.index = ['sample' + str(i) for i in range(1, df_box.shape[0] + 1)]
df_heatmap = pd.DataFrame(np.random.randn(30, 10), columns=['sample' + str(i) for i in range(1, 11)])
df_heatmap.index = ["Fea" + str(i) for i in range(1, df_heatmap.shape[0] + 1)]
df_heatmap.iloc[1, 2] = np.nan

Add selected rows labels

[64]:
#Annotate the rows with average > 0.3
df_rows = df_heatmap.apply(lambda x:x.name if x.sample4 > 0.5 else None,axis=1)
df_rows=df_rows.to_frame(name='Selected')
df_rows['XY']=df_rows.index.to_series().apply(lambda x:'A' if int(x.replace('Fea',''))>=15 else 'B')

row_ha = HeatmapAnnotation(
            Scatter=anno_scatterplot(df_heatmap.sample4.apply(lambda x:round(x,2)),
                            height=12,cmap='jet',legend=False,grid=True),
            Line=anno_lineplot(df_heatmap.sample4.apply(lambda x:round(x,2)),
                            height=12,colors='red',linewidth=2,legend=False),
            Bar=anno_barplot(df_heatmap.sample4.apply(lambda x:round(x,2)),
                            height=15,cmap='rainbow',legend=False),
            selected=anno_label(df_rows,colors='red',relpos=(-0.05,0.4)),
            label_kws={'rotation':30,'horizontalalignment':'left','verticalalignment':'bottom'},
            axis=0,verbose=0)

col_ha = HeatmapAnnotation(
            label=anno_label(df.AB, merge=True,rotation=10,
                             arrowprops = dict(visible=False,)
                            ), #visible in arrowprops can control whether to show the arrow
            AB=anno_simple(df.AB,add_text=True),axis=1,
            CD=anno_simple(df.CD,add_text=True),
            EF=anno_simple(df.EF,add_text=True,
                            legend_kws={'frameon':True}),
            G=anno_boxplot(df_box, cmap='jet',legend=False,grid=True),
            verbose=0)

print(np.nanmin(df_heatmap),np.nanmax(df_heatmap))

plt.figure(figsize=(5.5, 6.5))
cm = ClusterMapPlotter(
        data=df_heatmap, top_annotation=col_ha,right_annotation=row_ha,
        col_cluster=True,row_cluster=True,
        col_split=df.AB,row_split=2, z_score=0,
        col_split_gap=0.5,row_split_gap=0.8,
        label='values',row_dendrogram=True,col_dendrogram=False,row_dendrogram_size=15,
        show_rownames=False,show_colnames=True,
        tree_kws={'row_cmap': 'Set1'},verbose=0,legend_gap=5,
        cmap='RdYlBu_r',xticklabels_kws={'labelrotation':-90,'labelcolor':'blue'})
plt.savefig("example0.pdf", bbox_inches='tight')
plt.show()
-2.6321021225640213 2.8811702820956677
../_images/notebooks_advanced_usage_5_1.png

Add annotations on the top of heatmap cells

[65]:
#Annotate the rows with average > 0.3
df_rows = df_heatmap.apply(lambda x:x.name if x.sample4 > 0.5 else None,axis=1)
df_rows=df_rows.to_frame(name='Selected')
df_rows['XY']=df_rows.index.to_series().apply(lambda x:'A' if int(x.replace('Fea',''))>=15 else 'B')

row_ha = HeatmapAnnotation(
            S4=anno_simple(df_heatmap.sample4.apply(lambda x:round(x,2)),
                       add_text=True,height=10,
                       text_kws={'rotation':0,'fontsize':10,'color':'black'}),
            # Scatter=anno_scatterplot(df_heatmap.sample4.apply(lambda x:round(x,2)),
            #                  height=10),
            Test=anno_barplot(df_heatmap.sample4.apply(lambda x:round(x,2)),
                        height=18,cmap='rainbow'),
            selected=anno_label(df_rows,colors='red'),
            axis=0,verbose=0,#wgap=4,
            label_kws={'rotation':0,'horizontalalignment':'left',
                                        'verticalalignment':'bottom'})

col_ha = HeatmapAnnotation(
            label=anno_label(df.AB, merge=True,rotation=15),
            AB=anno_simple(df.AB,add_text=True),axis=1,
            CD=anno_simple(df.CD,add_text=True,text_kws=dict(bbox={'boxstyle':'Circle','edgecolor':'white','fill':False},fontsize=8),
                           height=4.5),
            EF=anno_simple(df.EF,add_text=True,
                            legend_kws={'frameon':False}),
            Exp=anno_boxplot(df_box, cmap='turbo',grid=True),
            verbose=0,#hgap=2
            ) #verbose=0 will turn off the log.

print(df.head())
print(df_box.mean(axis=1))
print(df_heatmap.head())
plt.figure(figsize=(6, 8))
cm = ClusterMapPlotter(
            data=df_heatmap, top_annotation=col_ha,right_annotation=row_ha,
            col_split=df.AB,
            row_split=df_rows.XY,
            #col_split_gap=3.5,row_split_gap=2.5,
            col_cluster=True,row_cluster=True,
            label='values',row_dendrogram=False,
            show_rownames=True,show_colnames=True,
            verbose=0,legend_gap=5,#legend_hpad=10,legend_vpad=5,
            annot=True,fmt='.1g',linewidths=0.05,linecolor='gold',cmap='RdYlBu_r',
            xticklabels_kws={'labelrotation':-45,'labelcolor':'blue'},
            yticklabels_kws=dict(labelcolor='red'),#subplot_gap=8
)
#subplot_gap controls the gap between main heatmap and column or row annotations

plt.show()
print(cm.row_order)
print(cm.col_order)
             AB CD EF         F
sample1  GroupA  C  E -0.210367
sample2  GroupA  C  E -0.764762
sample3  GroupA  C  E  1.665616
sample4  GroupA  D  E -1.545737
sample5  GroupA  D  E  3.145600
sample1     0.002185
sample2    -0.004763
sample3     0.325769
sample4     1.436755
sample5     0.348452
sample6    -0.356842
sample7     0.111622
sample8    -0.557544
sample9    -0.094792
sample10    0.020230
dtype: float64
       sample1   sample2   sample3   sample4   sample5   sample6   sample7  \
Fea1 -0.131741 -0.162128 -0.328656  0.588910  0.113312  0.466366  0.040714
Fea2  0.081925 -0.916003       NaN  0.598650 -0.771747  1.746257  0.375753
Fea3 -0.902882 -0.120029  0.172726  0.302578  0.395246 -1.188965 -1.224670
Fea4 -0.974029 -1.019292 -0.147058 -0.385203  0.931348  0.210785  1.821199
Fea5  0.082241 -0.627536 -0.373517  1.569405  0.037562 -0.828194 -0.177589

       sample8   sample9  sample10
Fea1  0.304473  0.497133 -0.414034
Fea2 -0.125438  0.165879 -1.002507
Fea3  1.157401 -0.241850  0.352401
Fea4 -0.892112 -0.233361 -1.032746
Fea5  0.875993  1.381788  1.279071
../_images/notebooks_advanced_usage_7_1.png
[['Fea28', 'Fea21', 'Fea29', 'Fea27', 'Fea26', 'Fea15', 'Fea16', 'Fea19', 'Fea30', 'Fea17', 'Fea23', 'Fea22', 'Fea24', 'Fea18', 'Fea20', 'Fea25'], ['Fea11', 'Fea14', 'Fea9', 'Fea4', 'Fea8', 'Fea3', 'Fea5', 'Fea12', 'Fea1', 'Fea2', 'Fea7', 'Fea10', 'Fea6', 'Fea13']]
[['sample1', 'sample4', 'sample3', 'sample2', 'sample5'], ['sample6', 'sample7', 'sample8', 'sample9', 'sample10']]

Only plot the annotations

[66]:
df = pd.DataFrame(['AAAA1'] * 5 + ['BBBBB2'] * 5, columns=['AB'])
df['CD'] = ['C'] * 3 + ['D'] * 3 + ['G'] * 4
df['F'] = np.random.normal(0, 1, 10)
df.index = ['sample' + str(i) for i in range(1, df.shape[0] + 1)]
df_box = pd.DataFrame(np.random.randn(10, 4), columns=['Gene' + str(i) for i in range(1, 5)])
df_box.index = ['sample' + str(i) for i in range(1, df_box.shape[0] + 1)]
df_bar = pd.DataFrame(np.random.uniform(0, 10, (10, 2)), columns=['TMB1', 'TMB2'])
df_bar.index = ['sample' + str(i) for i in range(1, df_box.shape[0] + 1)]
df_scatter = pd.DataFrame(np.random.uniform(0, 10, 10), columns=['Scatter'])
df_scatter.index = ['sample' + str(i) for i in range(1, df_box.shape[0] + 1)]
df_bar1 = pd.DataFrame(np.random.uniform(0, 10, (10, 2)), columns=['T1-A', 'T1-B'])
df_bar1.index = ['sample' + str(i) for i in range(1, df_box.shape[0] + 1)]
df_bar2 = pd.DataFrame(np.random.uniform(0, 10, (10, 2)), columns=['T2-A', 'T2-B'])
df_bar2.index = ['sample' + str(i) for i in range(1, df_box.shape[0] + 1)]
df_bar3 = pd.DataFrame(np.random.uniform(0, 10, (10, 2)), columns=['T3-A', 'T3-B'])
df_bar3.index = ['sample' + str(i) for i in range(1, df_box.shape[0] + 1)]
df_bar3.iloc[5,0]=np.nan
df_bar4 = pd.DataFrame(np.random.uniform(0, 10, (10, 1)), columns=['T4'])
df_bar4.index = ['sample' + str(i) for i in range(1, df_box.shape[0] + 1)]
df_bar4.iloc[7,0]=np.nan
print(df)
print(df_box.head())
print(df_scatter)
print(df_bar)
print(df_bar1)
print(df_bar2)
print(df_bar3)
print(df_bar4)
              AB CD         F
sample1    AAAA1  C  0.586862
sample2    AAAA1  C  1.246191
sample3    AAAA1  C -1.118067
sample4    AAAA1  D  1.186581
sample5    AAAA1  D  0.648017
sample6   BBBBB2  D -0.740133
sample7   BBBBB2  G  1.237010
sample8   BBBBB2  G -1.679448
sample9   BBBBB2  G  1.454375
sample10  BBBBB2  G -0.008855
            Gene1     Gene2     Gene3     Gene4
sample1 -0.051486 -0.698328 -1.563904  1.145194
sample2  0.397775  0.563337 -0.554522 -0.283961
sample3  1.911433 -0.178098 -0.449513  0.436575
sample4  0.998321 -1.547375  0.153069  0.659142
sample5 -0.857548  0.045073 -0.116103 -1.709895
           Scatter
sample1   2.312420
sample2   8.700443
sample3   2.594622
sample4   0.995570
sample5   6.346088
sample6   8.784254
sample7   4.548412
sample8   9.312224
sample9   9.279522
sample10  2.176052
              TMB1      TMB2
sample1   3.283453  1.029316
sample2   8.081410  9.157657
sample3   4.618796  5.994200
sample4   4.024821  5.061905
sample5   6.009279  3.291284
sample6   5.844126  1.899835
sample7   6.129225  0.940908
sample8   1.691643  3.311886
sample9   1.342168  1.987989
sample10  1.568968  6.577185
              T1-A      T1-B
sample1   9.389534  7.654098
sample2   3.917390  2.732671
sample3   6.469047  3.691846
sample4   1.152763  8.272298
sample5   2.207963  4.467439
sample6   9.777025  4.860341
sample7   2.062939  5.930794
sample8   4.574324  4.723417
sample9   7.000079  2.968817
sample10  2.355413  0.396133
              T2-A      T2-B
sample1   3.330243  8.382224
sample2   0.536375  8.244936
sample3   2.155200  6.579543
sample4   9.374440  0.161295
sample5   2.301849  4.527089
sample6   6.317955  7.000506
sample7   4.266394  1.553810
sample8   4.835425  9.188559
sample9   5.556356  4.475371
sample10  3.264468  3.981251
              T3-A      T3-B
sample1   2.009180  0.472453
sample2   0.474146  3.230050
sample3   5.015374  3.327011
sample4   4.303717  5.684264
sample5   0.078473  2.806307
sample6        NaN  1.584169
sample7   4.086410  6.818322
sample8   8.464331  9.508005
sample9   3.717501  6.782938
sample10  5.872283  4.300459
                T4
sample1   0.816047
sample2   2.419108
sample3   9.955314
sample4   2.457227
sample5   8.029317
sample6   5.447036
sample7   8.975613
sample8        NaN
sample9   4.026585
sample10  3.807470
[67]:
plt.figure(figsize=(4, 8))
col_ha = HeatmapAnnotation(
            label=anno_label(df.AB, merge=True,rotation=15),
            AB=anno_simple(df.AB,add_text=True), axis=1,
            CD=anno_simple(df.CD, add_text=True,text_kws={'color':'black'}),
            Exp=anno_boxplot(df_box, cmap='turbo',grid=True),
            Scatter=anno_scatterplot(df_scatter,legend=True),
            Line=anno_lineplot(df_bar2,linewidth=4,colors={'T2-B':'orangered','T2-A':'yellowgreen'},
                              marker='D',legend=True),  #colors=['orangered','yellowgreen']
            TMB_bar=anno_barplot(df_bar,legend=True,cmap='Set1'),
            Bar1=anno_barplot(df_bar1,legend=True,colors=['red','black']), #colors can be str, list, tuple or dict
            Bar4=anno_barplot(df_bar4,legend=True,cmap='turbo'),
            Bar2=anno_barplot(df_bar2,legend=True,colors={'T2-B':'orangered','T2-A':'yellowgreen'}),
            Bar3=anno_barplot(df_bar3,legend=True,cmap='Paired'),
            plot=True,legend=True,legend_gap=5,hgap=4)
col_ha.show_ticklabels(df.index.tolist(),fontdict={'color':'blue'},rotation=-30)
plt.show()
Starting plotting HeatmapAnnotations
Collecting annotation legends..
../_images/notebooks_advanced_usage_10_1.png

Change orentation to down and add extra space

[68]:
plt.figure(figsize=(4, 8))
row_ha = HeatmapAnnotation(
            TMB_bar=anno_barplot(df_bar,legend=True,cmap='Set1'),
            Bar1=anno_barplot(df_bar1,legend=True,cmap='Dark2'),
            Bar4=anno_barplot(df_bar4,legend=True,cmap='turbo'),
            Bar2=anno_barplot(df_bar2,legend=True,cmap='tab10'),
            Bar3=anno_barplot(df_bar3,legend=True,cmap='Paired'),
            Scatter=anno_scatterplot(df_scatter),
            Exp=anno_boxplot(df_box, cmap='turbo',legend=True,grid=True),
            CD=anno_simple(df.CD, colors={'C': 'red', 'D': 'gray', 'G': 'yellow'},
                           add_text=True,legend=True,text_kws={'color':'black'}),
            AB=anno_simple(df.AB,add_text=True,legend=True),
            label=anno_label(df.AB, merge=True,rotation=-15),
            plot=True,plot_legend=False,legend_hpad=13,axis=1,hgap=1
            )
row_ha.show_ticklabels(df.index.tolist(),fontdict={'color':'blue'},rotation=30)
plt.show()
# Here, we can use hgap (when axis=1) or wgap (when axis=0) to control the widh of height space between different annotations.
Starting plotting HeatmapAnnotations
../_images/notebooks_advanced_usage_12_1.png

Change orentation to the left

[69]:
plt.figure(figsize=(8, 4))
row_ha = HeatmapAnnotation(
            label=anno_label(df.AB, merge=True,rotation=15),
            AB=anno_simple(df.AB,add_text=True,legend=True,
                           #text_kws=dict(bbox={"pad":0},va='center',ha='center',rotation_mode='anchor')
                          ),
            CD=anno_simple(df.CD,add_text=True,legend=True),
            Exp=anno_boxplot(df_box, cmap='turbo',legend=True),
            Scatter=anno_scatterplot(df_scatter),
            TMB_bar=anno_barplot(df_bar,legend=True,cmap='Set1'),
            Bar1=anno_barplot(df_bar1,legend=True,cmap='Dark2'),
            Bar4=anno_barplot(df_bar4,legend=True,cmap='turbo'),
            Bar2=anno_barplot(df_bar2,legend=True,cmap='tab10'),
            Bar3=anno_barplot(df_bar3,legend=True,cmap='Paired'),
            plot=True,legend=True,legend_gap=5,
            axis=0,legend_hpad=20,label_side='bottom',wgap=3,
            )
row_ha.show_ticklabels(df.index.tolist(),fontdict={'color':'blue'},rotation=0)
plt.show()
Starting plotting HeatmapAnnotations
Collecting annotation legends..
Incresing ncol
Incresing ncol
Incresing ncol
More than 3 cols is not supported
Legend too long, generating a new column..
../_images/notebooks_advanced_usage_14_1.png

Change orentation to the right

[70]:
plt.figure(figsize=(8, 4))
row_ha = HeatmapAnnotation(
            TMB_bar=anno_barplot(df_bar,legend=True,cmap='Set1'),
            Bar1=anno_barplot(df_bar1,legend=True,cmap='Dark2'),
            Bar4=anno_barplot(df_bar4,legend=True,cmap='turbo'),
            Bar2=anno_barplot(df_bar2,legend=True,cmap='tab10'),
            Bar3=anno_barplot(df_bar3,legend=True,cmap='Paired'),
            Scatter=anno_scatterplot(df_scatter),
            Exp=anno_boxplot(df_box, cmap='turbo',legend=True,grid=True),
            CD=anno_simple(df.CD, colors={'C': 'red', 'D': 'gray', 'G': 'green'},
                           add_text=True,legend=True,
                           text_kws={'rotation':-90}),
            AB=anno_simple(df.AB,add_text=True,legend=True,
                           text_kws={'rotation':-90,'color':'black'}),
            label=anno_label(df.AB, merge=True,rotation=15),
            plot=True,legend=True,legend_hpad=13,legend_gap=5,axis=0,wgap=3,
            )
row_ha.show_ticklabels(df.index.tolist(),fontdict={'color':'black'},rotation=0)
plt.show()
Starting plotting HeatmapAnnotations
Collecting annotation legends..
Incresing ncol
Incresing ncol
Incresing ncol
More than 3 cols is not supported
Legend too long, generating a new column..
../_images/notebooks_advanced_usage_16_1.png

Changing orientation using parameter orientation

By Default, if there is no anno_label in the annotation, the oriention would be determined by parameter orientation.

[71]:
plt.figure(figsize=(8, 4))
col_ha = HeatmapAnnotation(
            AB=anno_simple(df.AB,add_text=True,legend=True),
            CD=anno_simple(df.CD,add_text=True,legend=True),
            Exp=anno_boxplot(df_box, cmap='turbo',legend=True),
            Scatter=anno_scatterplot(df_scatter),
            TMB_bar=anno_barplot(df_bar,legend=True,cmap='Set1'),
            Bar1=anno_barplot(df_bar1,legend=True,cmap='Dark2'),
            Bar4=anno_barplot(df_bar4,legend=True,cmap='turbo'),
            Bar2=anno_barplot(df_bar2,legend=True,cmap='tab10'),
            Bar3=anno_barplot(df_bar3,legend=True,cmap='Paired'),
            plot=True,legend=True,axis=0,
            legend_gap=5,orientation='left',
            )
plt.show()
Starting plotting HeatmapAnnotations
Collecting annotation legends..
Incresing ncol
Incresing ncol
Incresing ncol
More than 3 cols is not supported
Legend too long, generating a new column..
../_images/notebooks_advanced_usage_18_1.png
[72]:
plt.figure(figsize=(8, 4))
col_ha = HeatmapAnnotation(
            AB=anno_simple(df.AB,add_text=True,legend=True,
                          text_kws={'rotation':-90,'fontsize':14,'color':'black'}),
            CD=anno_simple(df.CD,add_text=True,legend=True,
                          text_kws={'rotation':-90,'fontsize':14,'color':'white'}),
            Exp=anno_boxplot(df_box, cmap='turbo',legend=True),
            Scatter=anno_scatterplot(df_scatter),
            TMB_bar=anno_barplot(df_bar,legend=True,cmap='Set1'),
            Bar1=anno_barplot(df_bar1,legend=True,cmap='Dark2'),
            Bar4=anno_barplot(df_bar4,legend=True,cmap='turbo'),
            Bar2=anno_barplot(df_bar2,legend=True,cmap='tab10'),
            Bar3=anno_barplot(df_bar3,legend=True,cmap='Paired'),
            plot=True,legend=True,axis=0,wgap=3,
            legend_gap=5,orientation='right',
            )
plt.show()
Starting plotting HeatmapAnnotations
Collecting annotation legends..
Incresing ncol
Incresing ncol
Incresing ncol
More than 3 cols is not supported
Legend too long, generating a new column..
../_images/notebooks_advanced_usage_19_1.png

Add multiple heatmap annotations using for loop

Typically, we can create a heatmap annotatin using the following code:

col_ha = HeatmapAnnotation(
                        Group=anno_simple(df_cols.hypomethylated_samples,colors=sample_group_color_dict,legend=True),
                        CellType=anno_simple(df_cols.CellType,colors=ct_color_dict,legend=ct_legend),
                        M1=anno_simple(df_cols['M1'],cmap='jet',legend=lgd,vmax=1,vmin=0,legend_kws={'label':'M1'}),
                        verbose=0,label_side='right',label_kws={'horizontalalignment':'left'})

But what if we have many annotations, for example:

col_ha = HeatmapAnnotation(
                        Group=anno_simple(df_cols.hypomethylated_samples,colors=sample_group_color_dict,legend=True),
                        CellType=anno_simple(df_cols.CellType,colors=ct_color_dict,legend=ct_legend),
                        M1=anno_simple(df_cols['M1'],cmap='jet',legend=lgd,vmax=1,vmin=0,legend_kws={'label':'M1'}),
                        M2=anno_simple(df_cols['M2'],cmap='jet',legend=lgd,vmax=1,vmin=0,legend_kws={'label':'M2'}),
                        M3=anno_simple(df_cols['M3'],cmap='jet',legend=lgd,vmax=1,vmin=0,legend_kws={'label':'M3'}),
                        .....
                        verbose=0,label_side='right',label_kws={'horizontalalignment':'left'})

In this case, we can create an dict including the name and annotation as keys and values:

col_ha_dict={
                'Group':anno_simple(df_cols.hypomethylated_samples,colors=sample_group_color_dict,legend=True),
                'CellType':anno_simple(df_cols.CellType,colors=ct_color_dict,legend=ct_legend)
                }


for col in sample_cols:
    col_ha_dict[col]=anno_simple(df_cols[col],cmap='jet',legend=lgd,vmax=1,vmin=0,legend_kws={'label':col})

col_ha = HeatmapAnnotation(**col_ha_dict,
                           verbose=0,label_side='right',label_kws={'horizontalalignment':'left'})

Cluster between groups and cluster within groups

Similar to cluster_between_groups and cluster_within_groups in R (https://jokergoo.github.io/2021/03/05/cluster-groups-in-complexheatmap/)

clsuter within groups: col_split=*, col_cluster=True

[73]:
df['Groups']=['G1']+['G2']+['G3']*5+['G4']+['G5']*2
col_ha = HeatmapAnnotation(
            Groups=anno_simple(df.Groups,add_text=True,text_kws={'color':'black'}),
            AB=anno_simple(df.AB,add_text=True),axis=1,
            Exp=anno_boxplot(df_box, cmap='turbo'),
            verbose=0) #verbose=0 will turn off the log.


plt.figure(figsize=(6, 8))
cm = ClusterMapPlotter(
        data=df_heatmap, top_annotation=col_ha,
        col_split=df.Groups,col_split_gap=2,
        col_cluster=True,row_cluster=True,col_dendrogram=True,
        label='values',show_rownames=True,show_colnames=True,
        tree_kws={'col_cmap': 'Set1'},verbose=0,legend_gap=7,
        annot=True,fmt='.1g',linewidths=0.05,linecolor='gold',cmap='RdYlBu_r',
        xticklabels_kws={'labelrotation':-45,'labelcolor':'blue'},
        ylabel='Features')
plt.show()
../_images/notebooks_advanced_usage_23_0.png

cluster_between_groups: col_split=*, col_split_order="cluster_between_groups",col_cluster=False

[74]:
plt.figure(figsize=(6, 8))
cm = ClusterMapPlotter(
        data=df_heatmap, top_annotation=col_ha,
        col_split=df.Groups, col_split_order="cluster_between_groups",
        col_split_gap=2,col_cluster=False,
        row_cluster=True,col_dendrogram=True,
        row_split=2,row_split_gap=1,row_dendrogram=True,
        label='values',show_rownames=True,show_colnames=True,
        tree_kws={'colors':'blue','row_cmap':'Set1','col_cmap':'Paired'},
        verbose=0,legend_gap=7,
        linewidths=0.05,linecolor='gold',cmap='RdYlBu_r',
        xticklabels_kws={'labelrotation':-45,'labelcolor':'blue'},
        ylabel='Features')
plt.show()
../_images/notebooks_advanced_usage_25_0.png

cluster_within_groups && cluster_between_groups: col_split=*, col_split_order="cluster_between_groups",col_cluster=True

[75]:
plt.figure(figsize=(6, 8))
cm = ClusterMapPlotter(
        data=df_heatmap, top_annotation=col_ha,
        col_split=df.loc[:,['AB','Groups']], col_split_order="cluster_between_groups",
        col_split_gap=2,col_cluster=True,row_split_gap=1.5,
        row_split=3,#row_split_order='cluster_between_groups',
        row_cluster=True,col_dendrogram=True,row_dendrogram=True,
        label='values',show_rownames=True,show_colnames=True,
        tree_kws={'colors':'blue'},verbose=0,legend_gap=7,
        annot=True,fmt='.1g',linewidths=0.05,linecolor='gold',cmap='RdYlBu_r',
        xticklabels_kws={'labelrotation':-45,'labelcolor':'blue'},
        ylabel='Features')
plt.show()
../_images/notebooks_advanced_usage_27_0.png
[76]:
# `label_kws` in `HeatmapAnnotation` control the heatmap annotaiton labels
col_ha = HeatmapAnnotation(
                Groups=anno_simple(df.Groups,add_text=True,text_kws={'color':'black'}),
                AB=anno_simple(df.AB,add_text=True),axis=1,
                Exp=anno_boxplot(df_box, cmap='turbo',grid=True),
                verbose=0,label_side='right'
)

# `xticklabels_kws` and `yticklabels_kws` control the ticklabels for the heatmap.
plt.figure(figsize=(6, 8))
cm = ClusterMapPlotter(data=df_heatmap, top_annotation=col_ha,
                       col_split=df.Groups,col_split_order=['G2','G1','G5','G4','G3'],
                       col_split_gap=4.5,col_cluster=True,
                       row_cluster=True,col_dendrogram=True,
                       label='values',show_rownames=True,show_colnames=True,
                       row_names_side='left',
                       tree_kws={'col_cmap':'Set1'},verbose=0,legend_gap=7,
                       linewidths=0.05,linecolor='gold',cmap='RdYlBu_r',
                       xticklabels_kws=dict(labelrotation=-45,labelcolor='purple',labelsize=14),
                       #more parameters for [x/y]_ticklabels_kws, see: matplotlib.axes.Axes.tick_params or ?ax.tick_params
                       xlabel='Samples',ylabel="Features",
                       xlabel_kws=dict(color='white',fontsize=14),
                       ylabel_kws=dict(color='blue',fontsize=14,labelpad=45), #increace labelpad manually using labelpad (points)
                       xlabel_bbox_kws=dict(facecolor='black'),
                       ylabel_bbox_kws=dict(facecolor='chocolate',edgecolor='red'),
                      )
plt.savefig("test.pdf",bbox_inches='tight')
plt.show()
../_images/notebooks_advanced_usage_28_0.png

Custom annotation

[77]:
annot=df_heatmap.applymap(lambda x:'∗∗∗' if x >= 2 else '∗∗' if x >=1 else '∗' if x >0 else '')
# To make asterisk located at center in vertical, use ∗ ASTERISK OPERATOR. instead of normal *; see: https://unicode-explorer.com/c/2217
plt.figure(figsize=(4, 6.5))
cm = ClusterMapPlotter(
        data=df_heatmap, top_annotation=col_ha,
        annot=annot,fmt=None,annot_kws={'color':'black','fontname':'Courier'},
        col_split=df.Groups, col_split_order="cluster_between_groups",
        col_cluster=True,row_cluster=True,
        label='values',
        tree_kws={'col_cmap': 'Set1'},verbose=0,legend_gap=7,
        linewidths=0.05,linecolor='white',cmap='RdYlBu_r',
        xticklabels_kws={'labelrotation':-45,'labelcolor':'blue'})
plt.show()
../_images/notebooks_advanced_usage_30_0.png

Custom linkage

[78]:
import fastcluster
# custom column linkage
linkage = fastcluster.linkage(df_heatmap.T.apply(lambda x:x.fillna(x.median()),axis=1), method='average', metric='canberra')
print("df_heatmap shape:",df_heatmap.shape,"\nlinkage shape:",linkage.shape,"\n",linkage)

plt.figure(figsize=(4, 6))
cm = ClusterMapPlotter(
        data=df_heatmap, top_annotation=col_ha,z_score=0,
        col_cluster=True,row_cluster=True,show_rownames=True,show_colnames=True,
        label='values',col_dendrogram_kws=dict(linkage=linkage),col_dendrogram=True,
        tree_kws={'col_cmap': 'Set1'},verbose=0,legend_gap=7,
        linewidths=0.05,linecolor='white',cmap='RdYlBu_r',
        xticklabels_kws={'labelrotation':-45,'labelcolor':'blue'})
plt.show()
df_heatmap shape: (30, 10)
linkage shape: (9, 4)
 [[ 3.          7.         16.30063564  2.        ]
 [ 4.          9.         19.98077013  2.        ]
 [ 0.          8.         20.13691502  2.        ]
 [ 5.         11.         20.98548079  3.        ]
 [10.         12.         21.51448534  4.        ]
 [ 2.         13.         21.61319985  4.        ]
 [ 6.         14.         21.88357858  5.        ]
 [15.         16.         22.2212842   9.        ]
 [ 1.         17.         22.43105863 10.        ]]
../_images/notebooks_advanced_usage_32_1.png
[79]:
df['Groups']=['G1']+['G2']+['G3']*5+['G4']+['G5']*2
plt.figure(figsize=(4, 6))
cm = ClusterMapPlotter(
        data=df_heatmap, top_annotation=col_ha,
        col_cluster=True,row_cluster=True,show_rownames=True,show_colnames=True,
        row_split=2,row_split_gap=3,row_dendrogram=True,
        label='values',col_dendrogram_kws=dict(linkage=linkage),col_dendrogram=True,
        tree_kws={'col_cmap': 'Set1','row_cmap':'Dark2'},verbose=0,legend_gap=7,
        linewidths=0.05,linecolor='white',cmap='RdYlBu_r',
        xticklabels_kws={'labelrotation':-45,'labelcolor':'blue'})
plt.show()
../_images/notebooks_advanced_usage_33_0.png

Image annotation

[80]:
df = pd.DataFrame(['AAAA1'] * 5 + ['BBBBB2'] * 5, columns=['AB'])
df['CD'] = ['C'] * 3 + ['D'] * 3 + ['G'] * 4
df['F'] = np.random.normal(0, 1, 10)
df.index = ['sample' + str(i) for i in range(1, df.shape[0] + 1)]
df_box = pd.DataFrame(np.random.randn(10, 4), columns=['Gene' + str(i) for i in range(1, 5)])
df_box.index = ['sample' + str(i) for i in range(1, df_box.shape[0] + 1)]
df_bar = pd.DataFrame(np.random.uniform(0, 10, (10, 2)), columns=['TMB1', 'TMB2'])
df_bar.index = ['sample' + str(i) for i in range(1, df_box.shape[0] + 1)]
df_scatter = pd.DataFrame(np.random.uniform(0, 10, 10), columns=['Scatter'])
df_scatter.index = ['sample' + str(i) for i in range(1, df_box.shape[0] + 1)]
df_bar1 = pd.DataFrame(np.random.uniform(0, 10, (10, 2)), columns=['T1-A', 'T1-B'])
df_bar1.index = ['sample' + str(i) for i in range(1, df_box.shape[0] + 1)]
df_bar2 = pd.DataFrame(np.random.uniform(0, 10, (10, 2)), columns=['T2-A', 'T2-B'])
df_bar2.index = ['sample' + str(i) for i in range(1, df_box.shape[0] + 1)]
df_bar3 = pd.DataFrame(np.random.uniform(0, 10, (10, 2)), columns=['T3-A', 'T3-B'])
df_bar3.index = ['sample' + str(i) for i in range(1, df_box.shape[0] + 1)]
df_bar3.iloc[5,0]=np.nan
df_bar4 = pd.DataFrame(np.random.uniform(0, 10, (10, 1)), columns=['T4'])
df_bar4.index = ['sample' + str(i) for i in range(1, df_box.shape[0] + 1)]
df_bar4.iloc[7,0]=np.nan
df_img = pd.DataFrame([f"1.jpeg" for i in range(1,11)], columns=['path'])
df_img.index = ['sample' + str(i) for i in range(1, df_box.shape[0] + 1)]
# print(df)
# print(df_box.head())
# print(df_scatter)
print(df_img)
            path
sample1   1.jpeg
sample2   1.jpeg
sample3   1.jpeg
sample4   1.jpeg
sample5   1.jpeg
sample6   1.jpeg
sample7   1.jpeg
sample8   1.jpeg
sample9   1.jpeg
sample10  1.jpeg
[81]:
plt.figure(figsize=(16, 4))
col_ha = HeatmapAnnotation(
            label=anno_label(df.AB, merge=True,rotation=15),
            AB=anno_simple(df.AB,add_text=True,legend=True), axis=1,
            CD=anno_simple(df.CD, add_text=True,legend=True,text_kws={'color':'black'}),
            Exp=anno_boxplot(df_box, cmap='turbo',legend=True),
            Scatter=anno_scatterplot(df_scatter),
            Bar1=anno_barplot(df_bar1,legend=True,cmap='Dark2'),
            Bar4=anno_barplot(df_bar4,legend=True,cmap='turbo'),
            Img=anno_img(df_img.path,border_width=5,border_color=255,height=15),
            plot=True,legend=True,legend_gap=5,hgap=0.5)
col_ha.show_ticklabels(df.index.tolist(),fontdict={'color':'blue'},rotation=-30)
plt.show()
Starting plotting HeatmapAnnotations
Collecting annotation legends..
../_images/notebooks_advanced_usage_36_1.png

Choosing Build-in colormap in PyComplexHeatmap

Diverging

  • exp1

  • exp2

  • meth1

  • meth2

  • diverging1

  • parula

[82]:
from PyComplexHeatmap.utils import get_colormap
[83]:
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
cmaps = {}

gradient = np.linspace(0, 1, 256)
gradient = np.vstack((gradient, gradient))


def plot_color_gradients(category, cmap_list):
    # Create figure and adjust figure height to number of colormaps
    nrows = len(cmap_list)
    figh = 0.35 + 0.15 + (nrows + (nrows - 1) * 0.1) * 0.22
    fig, axs = plt.subplots(nrows=nrows + 1, figsize=(6.4, figh))
    fig.subplots_adjust(top=1 - 0.35 / figh, bottom=0.15 / figh,
                        left=0.2, right=0.99)
    axs[0].set_title(f'{category} colormaps', fontsize=14)

    for ax, name in zip(axs, cmap_list):
        ax.imshow(gradient, aspect='auto', cmap=mpl.colormaps[name])
        ax.text(-0.01, 0.5, name, va='center', ha='right', fontsize=10,
                transform=ax.transAxes)

    # Turn off *all* ticks & spines, not just the ones with colormaps.
    for ax in axs:
        ax.set_axis_off()

    # Save colormap list for later.
    cmaps[category] = cmap_list
[84]:
plot_color_gradients('Sequential',
                     ['exp1', 'exp2', 'meth1', 'meth2', 'diverging1', 'parula'])
../_images/notebooks_advanced_usage_41_0.png

Qualitative

  • cmap50

[85]:
# get_colormap("cmap50") # 50 different colors
[86]:
plot_color_gradients('Qualitative',
                     ['cmap50'])
../_images/notebooks_advanced_usage_44_0.png

How to use the Build-in cmap?

Just set cmap=”custom_cmap”

[87]:
df = pd.DataFrame(['GroupA'] * 5 + ['GroupB'] * 5, columns=['AB'])
df['CD'] = ['C'] * 3 + ['D'] * 3 + ['G'] * 4
df['EF'] = ['E'] * 6 + ['F'] * 2 + ['H'] * 2
df['F'] = np.random.normal(0, 1, 10)
df.index = ['sample' + str(i) for i in range(1, df.shape[0] + 1)]

df_heatmap = pd.DataFrame(np.random.randn(30, 10), columns=['sample' + str(i) for i in range(1, 11)])
df_heatmap.index = ["Fea" + str(i) for i in range(1, df_heatmap.shape[0] + 1)]
df_heatmap.iloc[1, 2] = np.nan

plt.figure(figsize=(3.5, 5))
cm = ClusterMapPlotter(
        data=df_heatmap,
        col_cluster=True,row_cluster=True,
        col_split=df.AB,row_split=2,
        col_split_gap=0.5,row_split_gap=0.8,
        label='values',row_dendrogram=True,
        show_rownames=False,show_colnames=True,
        tree_kws={'row_cmap': 'Set1'},verbose=0,legend_gap=5,
        cmap='parula',xticklabels_kws={'labelrotation':-90,'labelcolor':'blue'},
        ylabel="Features",xlabel="Samples")
# plt.savefig("example0.pdf", bbox_inches='tight')
plt.show()
../_images/notebooks_advanced_usage_46_0.png
[88]:
plt.figure(figsize=(3.5, 5))
cm = ClusterMapPlotter(
        data=df_heatmap,
        col_cluster=True,row_cluster=True,
        col_split=df.AB,row_split=2,
        col_split_gap=0.5,row_split_gap=0.8,
        label='values',row_dendrogram=True,
        show_rownames=True,show_colnames=True,row_names_side='right',
        tree_kws={'row_cmap': 'Set1'},verbose=0,legend_gap=5,
        cmap='meth2',xticklabels_kws={'labelrotation':-90,'labelcolor':'blue'})
plt.show()
../_images/notebooks_advanced_usage_47_0.png

How to force display all row/col ticklabels?

When the height or width is not big enough to display all xticklabels and yticklabels, some ticklabels will be hidden to avoid overlapping. For example:

[89]:
plt.figure(figsize=(3.5, 5))
cm = ClusterMapPlotter(
        data=df_heatmap,
        col_cluster=True,row_cluster=True,
        col_split=df.AB,row_split=2,
        col_split_gap=0.5,row_split_gap=0.8,
        label='values',row_dendrogram=True,
        show_rownames=True,show_colnames=True,row_names_side='right',
        tree_kws={'row_cmap': 'Set1'},verbose=0,legend_gap=5,
        cmap='meth2',xticklabels_kws={'labelrotation':-90,'labelcolor':'blue'})
plt.show()
../_images/notebooks_advanced_usage_49_0.png

To force display all ticklabels no matter whether the height or width is big enough, set parameters xticklabels/yticklabels to True:

[90]:
plt.figure(figsize=(3.5, 5))
cm = ClusterMapPlotter(
        data=df_heatmap,
        col_cluster=True,row_cluster=True,
        col_split=df.AB,row_split=2,
        col_split_gap=0.5,row_split_gap=0.8,
        label='values',row_dendrogram=True,
        show_rownames=True,show_colnames=True,
        row_names_side='right',yticklabels=True,
        tree_kws={'row_cmap': 'Set1'},verbose=0,legend_gap=5,
        cmap='meth2',xticklabels_kws={'labelrotation':-90,'labelcolor':'blue'})
plt.show()
../_images/notebooks_advanced_usage_51_0.png