[1]:
import os,sys
%matplotlib inline
import matplotlib.pylab as plt
import pickle
import numpy as np
plt.rcParams['figure.dpi'] = 100
plt.rcParams['savefig.dpi']=300
# sys.path.append(os.path.expanduser("~/Projects/Github/PyComplexHeatmap"))
from PyComplexHeatmap import *
use_pch_style() # or plt.style.use('default') to restore default style
# plt.rcParams
# import matplotlib; print(matplotlib.__version__)
[2]:
#set font to Arial using the following code
plt.rcParams['font.family']='sans serif'
plt.rcParams['font.sans-serif']='Arial'
# set pdf.fonttype to 42
plt.rcParams['pdf.fonttype']=42
Generate dataset¶
[3]:
#Generate example dataset (random)
df = pd.DataFrame(['GroupA'] * 5 + ['GroupB'] * 5, columns=['AB'])
df['CD'] = ['C'] * 3 + ['D'] * 3 + ['G'] * 4
df['EF'] = ['E'] * 6 + ['F'] * 2 + ['H'] * 2
df['F'] = np.random.normal(0, 1, 10)
df.index = ['sample' + str(i) for i in range(1, df.shape[0] + 1)]
df_box = pd.DataFrame(np.random.randn(10, 4), columns=['Gene' + str(i) for i in range(1, 5)])
df_box.index = ['sample' + str(i) for i in range(1, df_box.shape[0] + 1)]
df_bar = pd.DataFrame(np.random.uniform(0, 10, (10, 2)), columns=['TMB1', 'TMB2'])
df_bar.index = ['sample' + str(i) for i in range(1, df_box.shape[0] + 1)]
df_scatter = pd.DataFrame(np.random.uniform(0, 10, 10), columns=['Scatter'])
df_scatter.index = ['sample' + str(i) for i in range(1, df_box.shape[0] + 1)]
df_heatmap = pd.DataFrame(np.random.randn(30, 10), columns=['sample' + str(i) for i in range(1, 11)])
df_heatmap.index = ["Fea" + str(i) for i in range(1, df_heatmap.shape[0] + 1)]
df_heatmap.iloc[1, 2] = np.nan
[4]:
# add a missing value to sample4
df_heatmap.loc['Fea4','sample4']=np.nan
df_box.loc['sample4','Gene4']=np.nan
df_box
[4]:
| Gene1 | Gene2 | Gene3 | Gene4 | |
|---|---|---|---|---|
| sample1 | 2.187513 | -1.380853 | -0.480167 | 0.085030 |
| sample2 | 0.601946 | -0.323006 | -0.860937 | -0.205367 |
| sample3 | 0.337548 | -0.900402 | 0.148900 | 1.439385 |
| sample4 | -0.471896 | -0.337046 | 2.190533 | NaN |
| sample5 | 0.849067 | 1.225516 | -0.338290 | 1.243937 |
| sample6 | -1.045769 | 0.290131 | -0.659531 | -0.068667 |
| sample7 | -0.161038 | -1.058872 | -0.484389 | 0.783456 |
| sample8 | -0.178389 | 0.448634 | -0.365703 | 0.369459 |
| sample9 | 0.748063 | 0.673198 | -0.249985 | 1.274346 |
| sample10 | -1.711610 | -0.307186 | 1.698132 | 0.678074 |
Add selected rows labels¶
[5]:
#Annotate the rows with average > 0.3
df_rows = df_heatmap.apply(lambda x:x.name if x.sample4 > 0.5 else None,axis=1)
df_rows=df_rows.to_frame(name='Selected')
df_rows['XY']=df_rows.index.to_series().apply(lambda x:'A' if int(x.replace('Fea',''))>=15 else 'B')
row_ha = HeatmapAnnotation(
Scatter=anno_scatterplot(df_heatmap.sample4.apply(lambda x:round(x,2)),
height=12,cmap='jet',legend=False,grid=True,
legend_kws=dict(color='red')),
Line=anno_lineplot(df_heatmap.sample4.apply(lambda x:round(x,2)),
height=12,colors='red',linewidth=2,legend=False),
Bar=anno_barplot(df_heatmap.sample4.apply(lambda x:round(x,2)),
height=15,cmap='rainbow',legend=False,ylim=(-5,5)),
selected=anno_label(df_rows,colors='red',relpos=(-0.05,0.4)),
label_kws={'rotation':30,'horizontalalignment':'left','verticalalignment':'bottom'},
axis=0,verbose=0)
col_ha = HeatmapAnnotation(
label=anno_label(df.AB, merge=True,rotation=10,
arrowprops = dict(visible=False),
),
AB=anno_simple(df.AB,add_text=True),
axis=1,
CD=anno_simple(df.CD,add_text=True),
EF=anno_simple(df.EF,add_text=True,
legend_kws={'frameon':True}),
G=anno_boxplot(df_box, cmap='jet',legend=False,grid=True),
verbose=0)
print(np.nanmin(df_heatmap),np.nanmax(df_heatmap))
plt.figure(figsize=(5.5, 6.5))
cm = ClusterMapPlotter(
data=df_heatmap, top_annotation=col_ha,right_annotation=row_ha,
col_cluster=True,row_cluster=True,
col_split=df.AB,row_split=2, z_score=0,vmin=-2.2,vmax=2.3,
col_split_gap=0.5,row_split_gap=0.8,
row_dendrogram=True,col_dendrogram=False,row_dendrogram_size=15,
show_rownames=False,show_colnames=True,
tree_kws={'row_cmap': 'Set1'},verbose=0,legend_vgap=5,
cmap='RdYlBu_r',bezier=True,dotsize=2,
legend_kws=dict(label='test'), #label='values',
xticklabels_kws=dict(labelrotation=90,labelcolor='blue',labelsize=14,grid_color='red',bottom=True))
# for ax in cm.top_annotation.axes[-1,:]:
# ax.cla()
plt.savefig("example0.pdf", bbox_inches='tight')
plt.show()
print(cm.kwargs['vmin'],cm.kwargs['vmax'],cm.legend_kws)
-3.7087202627688773 2.7682545505502714
-2.2 2.3 {'label': 'test', 'vmin': -2.2, 'vmax': 2.3, 'center': None}
[6]:
cm.heatmap_axes
[6]:
array([[<Axes: >, <Axes: >],
[<Axes: >, <Axes: >]], dtype=object)
Add annotations on the top of heatmap cells¶
[7]:
#Annotate the rows with average > 0.3
df_rows = df_heatmap.apply(lambda x:x.name if x.sample4 > 0.5 else None,axis=1)
df_rows=df_rows.to_frame(name='Selected')
df_rows['XY']=df_rows.index.to_series().apply(lambda x:'A' if int(x.replace('Fea',''))>=15 else 'B')
row_ha = HeatmapAnnotation(
S4=anno_simple(df_heatmap.sample4.apply(lambda x:round(x,2) if not pd.isna(x) else ''),
add_text=True,height=10,legend=False,
text_kws={'rotation':0,'fontsize':10,'color':'blue'}),
# Scatter=anno_scatterplot(df_heatmap.sample4.apply(lambda x:round(x,2)),
# height=10),
Test=anno_barplot(df_heatmap.sample4.apply(lambda x:round(x,2)),
height=18,cmap='rainbow',grid=True),
selected=anno_label(df_rows,colors='red'),
axis=0,verbose=0,#wgap=4,
label_kws={'rotation':0,'horizontalalignment':'left',
'verticalalignment':'bottom'})
col_ha = HeatmapAnnotation(
label=anno_label(df.AB, merge=True,rotation=15),
AB=anno_simple(df.AB,add_text=True),axis=1,
CD=anno_simple(df.CD,add_text=True,text_kws=dict(bbox={'boxstyle':'Circle','edgecolor':'white','fill':False},fontsize=8),
height=4.5),
EF=anno_simple(df.EF,add_text=True,
legend_kws={'frameon':False}),
Exp=anno_boxplot(df_box, cmap='turbo',grid=True),
verbose=0,#hgap=2
) #verbose=0 will turn off the log.
print(df.head())
print(df_box.mean(axis=1))
print(df_heatmap.head())
plt.figure(figsize=(6, 8))
cm = ClusterMapPlotter(
data=df_heatmap, top_annotation=col_ha,right_annotation=row_ha,
col_split=df.AB,
row_split=df_rows.XY,
#col_split_gap=3.5,row_split_gap=2.5,
col_cluster=True,row_cluster=True,
label='values',row_dendrogram=False,
show_rownames=True,show_colnames=True,
verbose=0,legend_vgap=5,#legend_hpad=10,legend_vpad=5,
annot=False,fmt='.1g',linewidths=0.05,linecolor='gold',cmap='RdYlBu_r',
xticklabels_kws={'labelrotation':-45,'labelcolor':'blue'},
yticklabels_kws=dict(labelcolor='red'),#subplot_gap=8
)
#subplot_gap controls the gap between main heatmap and column or row annotations
plt.show()
print(cm.row_order)
print(cm.col_order)
AB CD EF F
sample1 GroupA C E 0.045435
sample2 GroupA C E -0.820792
sample3 GroupA C E -0.381520
sample4 GroupA D E 1.558057
sample5 GroupA D E -0.065420
sample1 0.102881
sample2 -0.196841
sample3 0.256358
sample4 0.460530
sample5 0.745058
sample6 -0.370959
sample7 -0.230211
sample8 0.068500
sample9 0.611406
sample10 0.089352
dtype: float64
sample1 sample2 sample3 sample4 sample5 sample6 sample7 \
Fea1 -0.369817 -0.334013 -1.133481 0.283969 -0.220920 -1.615043 0.408399
Fea2 0.736265 -0.167285 NaN 0.021201 -0.192741 -1.376010 0.084740
Fea3 1.878100 -0.998456 1.944019 -0.655073 -0.420349 -1.213828 0.090542
Fea4 0.722164 -1.294405 0.398772 NaN 0.302977 -1.010801 0.045174
Fea5 1.922654 1.329243 -0.039535 0.731163 -0.613262 -0.224573 -0.333527
sample8 sample9 sample10
Fea1 -1.767529 0.077708 -1.155438
Fea2 -0.848125 -0.334524 -0.401966
Fea3 -0.021023 0.140168 0.145610
Fea4 -1.692063 -0.908440 0.587826
Fea5 0.129036 0.410582 -0.157031
[['Fea11', 'Fea12', 'Fea8', 'Fea4', 'Fea1', 'Fea2', 'Fea6', 'Fea9', 'Fea10', 'Fea13', 'Fea7', 'Fea14', 'Fea3', 'Fea5'], ['Fea16', 'Fea18', 'Fea22', 'Fea25', 'Fea20', 'Fea21', 'Fea28', 'Fea17', 'Fea24', 'Fea26', 'Fea27', 'Fea30', 'Fea15', 'Fea23', 'Fea19', 'Fea29']]
[['sample5', 'sample3', 'sample1', 'sample2', 'sample4'], ['sample10', 'sample8', 'sample9', 'sample6', 'sample7']]
Only plot the annotations¶
[8]:
df = pd.DataFrame(['AAAA1'] * 5 + ['BBBBB2'] * 5, columns=['AB'])
df['CD'] = ['C'] * 3 + ['D'] * 3 + ['G'] * 4
df['F'] = np.random.normal(0, 1, 10)
df.index = ['sample' + str(i) for i in range(1, df.shape[0] + 1)]
df_box = pd.DataFrame(np.random.randn(10, 4), columns=['Gene' + str(i) for i in range(1, 5)])
df_box.index = ['sample' + str(i) for i in range(1, df_box.shape[0] + 1)]
df_bar = pd.DataFrame(np.random.uniform(0, 10, (10, 2)), columns=['TMB1', 'TMB2'])
df_bar.index = ['sample' + str(i) for i in range(1, df_box.shape[0] + 1)]
df_scatter = pd.DataFrame(np.random.uniform(0, 10, 10), columns=['Scatter'])
df_scatter.index = ['sample' + str(i) for i in range(1, df_box.shape[0] + 1)]
df_bar1 = pd.DataFrame(np.random.uniform(0, 10, (10, 2)), columns=['T1-A', 'T1-B'])
df_bar1.index = ['sample' + str(i) for i in range(1, df_box.shape[0] + 1)]
df_bar2 = pd.DataFrame(np.random.uniform(0, 10, (10, 2)), columns=['T2-A', 'T2-B'])
df_bar2.index = ['sample' + str(i) for i in range(1, df_box.shape[0] + 1)]
df_bar3 = pd.DataFrame(np.random.uniform(0, 10, (10, 2)), columns=['T3-A', 'T3-B'])
df_bar3.index = ['sample' + str(i) for i in range(1, df_box.shape[0] + 1)]
df_bar3.iloc[5,0]=np.nan
df_bar4 = pd.DataFrame(np.random.uniform(0, 10, (10, 1)), columns=['T4'])
df_bar4.index = ['sample' + str(i) for i in range(1, df_box.shape[0] + 1)]
df_bar4.iloc[7,0]=np.nan
print(df)
print(df_box.head())
print(df_scatter)
print(df_bar)
print(df_bar1)
print(df_bar2)
print(df_bar3)
print(df_bar4)
AB CD F
sample1 AAAA1 C 0.857937
sample2 AAAA1 C -0.870811
sample3 AAAA1 C -0.897133
sample4 AAAA1 D 0.293353
sample5 AAAA1 D -0.404538
sample6 BBBBB2 D -1.232761
sample7 BBBBB2 G 0.951264
sample8 BBBBB2 G -0.361155
sample9 BBBBB2 G 0.451165
sample10 BBBBB2 G 0.213719
Gene1 Gene2 Gene3 Gene4
sample1 -0.709382 -0.106931 -0.424066 1.362913
sample2 0.723315 0.800076 0.067149 1.829474
sample3 0.238146 -0.169403 -0.319038 0.718999
sample4 1.361002 1.085507 0.379784 0.441183
sample5 0.334794 0.217863 -0.558056 -1.826657
Scatter
sample1 8.216554
sample2 8.482098
sample3 7.604668
sample4 1.605833
sample5 4.903368
sample6 1.348598
sample7 0.023861
sample8 8.493241
sample9 4.575743
sample10 3.844640
TMB1 TMB2
sample1 5.965623 2.443817
sample2 8.546646 2.510858
sample3 0.782676 0.895993
sample4 6.942029 8.824860
sample5 2.555597 5.106013
sample6 4.029492 5.522492
sample7 7.194071 9.541492
sample8 8.234834 4.642008
sample9 6.957621 9.646708
sample10 8.722453 2.258993
T1-A T1-B
sample1 8.974470 4.920883
sample2 9.248793 5.524964
sample3 7.138581 9.183642
sample4 1.760228 4.954536
sample5 6.039348 2.486589
sample6 8.050732 7.901286
sample7 3.258294 2.482795
sample8 3.950215 4.546003
sample9 9.166755 0.442683
sample10 1.989800 6.856756
T2-A T2-B
sample1 1.260332 0.054708
sample2 8.630146 9.714847
sample3 0.809092 5.798171
sample4 1.511556 6.270546
sample5 2.486518 0.510064
sample6 9.579535 4.952142
sample7 9.292721 8.212768
sample8 3.728597 9.083941
sample9 6.995197 2.644324
sample10 6.332444 5.953887
T3-A T3-B
sample1 9.333338 6.984959
sample2 1.364911 1.636278
sample3 6.091161 4.116743
sample4 7.673169 7.839617
sample5 2.357917 1.026705
sample6 NaN 0.051534
sample7 7.400061 8.654729
sample8 5.270826 3.245087
sample9 0.386638 0.432719
sample10 4.159563 8.533462
T4
sample1 1.965158
sample2 0.990276
sample3 3.860812
sample4 3.309407
sample5 1.689017
sample6 7.998592
sample7 4.415705
sample8 NaN
sample9 8.981833
sample10 1.133716
[9]:
plt.figure(figsize=(4, 8))
col_ha = HeatmapAnnotation(
label=anno_label(df.AB, merge=True,rotation=15),
AB=anno_simple(df.AB,add_text=True), axis=1,
CD=anno_simple(df.CD, add_text=True,text_kws={'color':'black'}),
Exp=anno_boxplot(df_box, cmap='turbo',grid=True),
Scatter=anno_scatterplot(df_scatter,legend=True,grid=True),
Line=anno_lineplot(df_bar2,linewidth=4,colors={'T2-B':'orangered','T2-A':'yellowgreen'},
marker='D',legend=True), #colors=['orangered','yellowgreen']
TMB_bar=anno_barplot(df_bar,legend=True,cmap='Set1'),
Bar1=anno_barplot(df_bar1,legend=True,colors=['red','black']), #colors can be str, list, tuple or dict
Bar4=anno_barplot(df_bar4,legend=True,cmap='turbo'),
Bar2=anno_barplot(df_bar2,legend=True,colors={'T2-B':'orangered','T2-A':'yellowgreen'}),
Bar3=anno_barplot(df_bar3,legend=True,cmap='Paired'),
plot=True,legend=True,legend_vgap=5,hgap=4,
# legend_order=False,
# legend_order=['AB','CD','Line','Bar1'],legend_width=20
)
col_ha.show_ticklabels(df.index.tolist(),fontdict={'color':'blue'},rotation=-30)
plt.show()
Plotting HeatmapAnnotations
Collecting annotation legends..
Change orentation to down and add extra space¶
[10]:
plt.figure(figsize=(4, 8))
row_ha = HeatmapAnnotation(
TMB_bar=anno_barplot(df_bar,legend=True,cmap='Set1'),
Bar1=anno_barplot(df_bar1,legend=True,cmap='Dark2'),
Bar4=anno_barplot(df_bar4,legend=True,cmap='turbo'),
Bar2=anno_barplot(df_bar2,legend=True,cmap='tab10'),
Bar3=anno_barplot(df_bar3,legend=True,cmap='Paired'),
Scatter=anno_scatterplot(df_scatter),
Exp=anno_boxplot(df_box, cmap='turbo',legend=True,grid=True),
CD=anno_simple(df.CD, colors={'C': 'red', 'D': 'gray', 'G': 'yellow'},
add_text=True,legend=True,text_kws={'color':'black'}),
AB=anno_simple(df.AB,add_text=True,legend=True),
label=anno_label(df.AB, merge=True,rotation=-15),
plot=True,plot_legend=False,legend_hpad=13,axis=1,hgap=1
)
row_ha.show_ticklabels(df.index.tolist(),fontdict={'color':'blue'},rotation=30)
plt.show()
# Here, we can use hgap (when axis=1) or wgap (when axis=0) to control the widh of height space between different annotations.
Plotting HeatmapAnnotations
Change orentation to the left¶
[11]:
plt.figure(figsize=(8, 4))
row_ha = HeatmapAnnotation(
label=anno_label(df.AB, merge=True,rotation=15),
AB=anno_simple(df.AB,add_text=True,legend=True,
#text_kws=dict(bbox={"pad":0},va='center',ha='center',rotation_mode='anchor')
),
CD=anno_simple(df.CD,add_text=True,legend=True),
Exp=anno_boxplot(df_box, cmap='turbo',legend=True),
Scatter=anno_scatterplot(df_scatter),
TMB_bar=anno_barplot(df_bar,legend=True,cmap='Set1'),
Bar1=anno_barplot(df_bar1,legend=True,cmap='Dark2'),
Bar4=anno_barplot(df_bar4,legend=True,cmap='turbo'),
Bar2=anno_barplot(df_bar2,legend=True,cmap='tab10'),
Bar3=anno_barplot(df_bar3,legend=True,cmap='Paired'),
plot=True,legend=True,legend_vgap=5,
axis=0,legend_hpad=20,label_side='bottom',wgap=3,
)
row_ha.show_ticklabels(df.index.tolist(),fontdict={'color':'blue'},rotation=0)
plt.show()
Plotting HeatmapAnnotations
Collecting annotation legends..
Incresing ncol
Incresing ncol
More than 3 cols is not supported
Legend too long, generating a new column..
Change orentation to the right¶
[12]:
plt.figure(figsize=(8, 4))
row_ha = HeatmapAnnotation(
TMB_bar=anno_barplot(df_bar,legend=True,cmap='Set1'),
Bar1=anno_barplot(df_bar1,legend=True,cmap='Dark2'),
Bar4=anno_barplot(df_bar4,legend=True,cmap='turbo'),
Bar2=anno_barplot(df_bar2,legend=True,cmap='tab10'),
Bar3=anno_barplot(df_bar3,legend=True,cmap='Paired'),
Scatter=anno_scatterplot(df_scatter,grid=True),
Exp=anno_boxplot(df_box, cmap='turbo',legend=True,grid=True),
CD=anno_simple(df.CD, colors={'C': 'red', 'D': 'gray', 'G': 'green'},
add_text=True,legend=True,
text_kws={'rotation':-90}),
AB=anno_simple(df.AB,add_text=True,legend=True,
text_kws={'rotation':-90,'color':'black'}),
label=anno_label(df.AB, merge=True,rotation=15),
plot=True,legend=True,legend_hpad=13,legend_vgap=5,axis=0,wgap=3,
)
row_ha.show_ticklabels(df.index.tolist(),fontdict={'color':'black'},rotation=0)
plt.show()
Plotting HeatmapAnnotations
Collecting annotation legends..
Incresing ncol
Incresing ncol
More than 3 cols is not supported
Legend too long, generating a new column..
Changing orientation using parameter orientation¶
By Default, if there is no anno_label in the annotation, the oriention would be determined by parameter orientation.
[13]:
plt.figure(figsize=(8, 4))
col_ha = HeatmapAnnotation(
AB=anno_simple(df.AB,add_text=True,legend=True),
CD=anno_simple(df.CD,add_text=True,legend=True),
Exp=anno_boxplot(df_box, cmap='turbo',legend=True),
Scatter=anno_scatterplot(df_scatter,grid=True),
TMB_bar=anno_barplot(df_bar,legend=True,cmap='Set1'),
Bar1=anno_barplot(df_bar1,legend=True,cmap='Dark2'),
Bar4=anno_barplot(df_bar4,legend=True,cmap='turbo'),
Bar2=anno_barplot(df_bar2,legend=True,cmap='tab10'),
Bar3=anno_barplot(df_bar3,legend=True,cmap='Paired'),
plot=True,legend=True,axis=0,
legend_vgap=5,orientation='left',
)
plt.show()
Plotting HeatmapAnnotations
Collecting annotation legends..
Incresing ncol
Incresing ncol
More than 3 cols is not supported
Legend too long, generating a new column..
[14]:
plt.figure(figsize=(8, 4))
col_ha = HeatmapAnnotation(
AB=anno_simple(df.AB,add_text=True,legend=True,
text_kws={'rotation':-90,'fontsize':14,'color':'black'}),
CD=anno_simple(df.CD,add_text=True,legend=True,
text_kws={'rotation':-90,'fontsize':14,'color':'white'}),
Exp=anno_boxplot(df_box, cmap='turbo',legend=True),
Scatter=anno_scatterplot(df_scatter),
TMB_bar=anno_barplot(df_bar,legend=True,cmap='Set1'),
Bar1=anno_barplot(df_bar1,legend=True,cmap='Dark2'),
Bar4=anno_barplot(df_bar4,legend=True,cmap='turbo'),
Bar2=anno_barplot(df_bar2,legend=True,cmap='tab10'),
Bar3=anno_barplot(df_bar3,legend=True,cmap='Paired'),
plot=True,legend=True,axis=0,wgap=3,
legend_vgap=5,orientation='right',
)
plt.show()
Plotting HeatmapAnnotations
Collecting annotation legends..
Incresing ncol
Incresing ncol
More than 3 cols is not supported
Legend too long, generating a new column..
Add multiple heatmap annotations using for loop¶
Typically, we can create a heatmap annotatin using the following code:
col_ha = HeatmapAnnotation(
Group=anno_simple(df_cols.hypomethylated_samples,colors=sample_group_color_dict,legend=True),
CellType=anno_simple(df_cols.CellType,colors=ct_color_dict,legend=ct_legend),
M1=anno_simple(df_cols['M1'],cmap='jet',legend=lgd,vmax=1,vmin=0,legend_kws={'label':'M1'}),
verbose=0,label_side='right',label_kws={'horizontalalignment':'left'})
But what if we have many annotations, for example:
col_ha = HeatmapAnnotation(
Group=anno_simple(df_cols.hypomethylated_samples,colors=sample_group_color_dict,legend=True),
CellType=anno_simple(df_cols.CellType,colors=ct_color_dict,legend=ct_legend),
M1=anno_simple(df_cols['M1'],cmap='jet',legend=lgd,vmax=1,vmin=0,legend_kws={'label':'M1'}),
M2=anno_simple(df_cols['M2'],cmap='jet',legend=lgd,vmax=1,vmin=0,legend_kws={'label':'M2'}),
M3=anno_simple(df_cols['M3'],cmap='jet',legend=lgd,vmax=1,vmin=0,legend_kws={'label':'M3'}),
.....
verbose=0,label_side='right',label_kws={'horizontalalignment':'left'})
In this case, we can create an dict including the name and annotation as keys and values:
col_ha_dict={
'Group':anno_simple(df_cols.hypomethylated_samples,colors=sample_group_color_dict,legend=True),
'CellType':anno_simple(df_cols.CellType,colors=ct_color_dict,legend=ct_legend)
}
for col in sample_cols:
col_ha_dict[col]=anno_simple(df_cols[col],cmap='jet',legend=lgd,vmax=1,vmin=0,legend_kws={'label':col})
col_ha = HeatmapAnnotation(**col_ha_dict,
verbose=0,label_side='right',label_kws={'horizontalalignment':'left'})
Cluster between groups and cluster within groups¶
Similar to cluster_between_groups and cluster_within_groups in R (https://jokergoo.github.io/2021/03/05/cluster-groups-in-complexheatmap/)
clsuter within groups: col_split=*, col_cluster=True¶
[15]:
df['Groups']=['G1']+['G2']+['G3']*5+['G4']+['G5']*2
col_ha = HeatmapAnnotation(
Groups=anno_simple(df.Groups,add_text=True,text_kws={'color':'black'}),
AB=anno_simple(df.AB,add_text=True),axis=1,
Exp=anno_boxplot(df_box, cmap='turbo'),
verbose=0) #verbose=0 will turn off the log.
plt.figure(figsize=(6, 8))
cm = ClusterMapPlotter(
data=df_heatmap, top_annotation=col_ha,
col_split=df.Groups,col_split_gap=2,
col_cluster=True,row_cluster=True,col_dendrogram=True,
label='values',show_rownames=True,show_colnames=True,
tree_kws={'col_cmap': 'Set1'},verbose=0,legend_vgap=7,
annot=True,fmt='.1g',linewidths=0.05,linecolor='gold',cmap='RdYlBu_r',
xticklabels_kws={'labelrotation':-45,'labelcolor':'blue'},
ylabel='Features',
legend_order=['AB','Groups','Exp','values'] #change legend order
)
plt.show()
cluster_between_groups: col_split=*, col_split_order="cluster_between_groups",col_cluster=False¶
[16]:
plt.figure(figsize=(8, 10))
cm = ClusterMapPlotter(
data=df_heatmap, top_annotation=col_ha,
col_split=df.Groups, col_split_order="cluster_between_groups",
col_split_gap=2,col_cluster=False,
row_cluster=True,col_dendrogram=True,row_dendrogram_size=35,col_dendrogram_size=25,
row_split=2,row_split_gap=1,row_dendrogram=True,
label='values',show_rownames=True,show_colnames=True,bezier=True,dotsize=8,
tree_kws={'colors':'blue','row_cmap':'Set1','col_cmap':'Paired'},
verbose=0,legend_vgap=7,
linewidths=0.05,linecolor='gold',cmap='RdYlBu_r',
xticklabels_kws={'labelrotation':-45,'labelcolor':'blue'},
ylabel='Features')
plt.show()
cluster_within_groups && cluster_between_groups: col_split=*, col_split_order="cluster_between_groups",col_cluster=True¶
[17]:
plt.figure(figsize=(6, 8))
cm = ClusterMapPlotter(
data=df_heatmap, top_annotation=col_ha,
col_split=df.loc[:,['AB','Groups']], col_split_order="cluster_between_groups",
col_split_gap=2,col_cluster=True,row_split_gap=1.5,
row_split=3,#row_split_order='cluster_between_groups',
row_cluster=True,col_dendrogram=True,row_dendrogram=True,
label='values',show_rownames=True,show_colnames=True,
tree_kws={'colors':'blue'},verbose=0,legend_vgap=7,
annot=True,fmt='.1g',linewidths=0.05,linecolor='gold',cmap='RdYlBu_r',
xticklabels_kws={'labelrotation':-45,'labelcolor':'blue'},
ylabel='Features')
plt.show()
[18]:
# `label_kws` in `HeatmapAnnotation` control the heatmap annotaiton labels
col_ha = HeatmapAnnotation(
Groups=anno_simple(df.Groups,add_text=True,text_kws={'color':'black'}),
AB=anno_simple(df.AB,add_text=True),axis=1,
Exp=anno_boxplot(df_box, cmap='turbo',grid=True),
verbose=0,label_side='right'
)
# `xticklabels_kws` and `yticklabels_kws` control the ticklabels for the heatmap.
plt.figure(figsize=(6, 8))
cm = ClusterMapPlotter(data=df_heatmap, top_annotation=col_ha,
col_split=df.Groups,col_split_order=['G2','G1','G5','G4','G3'],
col_split_gap=4.5,col_cluster=True,
row_cluster=True,col_dendrogram=True,
label='values',show_rownames=True,show_colnames=True,
row_names_side='left',
tree_kws={'col_cmap':'Set1'},verbose=0,legend_vgap=7,
linewidths=0.05,linecolor='gold',cmap='RdYlBu_r',
xticklabels_kws=dict(labelrotation=-45,labelcolor='purple',labelsize=14),
#more parameters for [x/y]_ticklabels_kws, see: matplotlib.axes.Axes.tick_params or ?ax.tick_params
xlabel='Samples',ylabel="Features",
xlabel_kws=dict(color='white',fontsize=14),
ylabel_kws=dict(color='blue',fontsize=14,labelpad=45), #increace labelpad manually using labelpad (points)
xlabel_bbox_kws=dict(facecolor='black'),
ylabel_bbox_kws=dict(facecolor='chocolate',edgecolor='red'),
)
plt.savefig("test.pdf",bbox_inches='tight')
plt.show()
Custom annotation¶
[19]:
annot=df_heatmap.applymap(lambda x:'∗∗∗' if x >= 2 else '∗∗' if x >=1 else '∗' if x >0 else '')
# To make asterisk located at center in vertical, use ∗ ASTERISK OPERATOR. instead of normal *; see: https://unicode-explorer.com/c/2217
plt.figure(figsize=(5, 6.5))
cm = ClusterMapPlotter(
data=df_heatmap, top_annotation=col_ha,
annot=annot,fmt=None,annot_kws={'color':'white','fontname':'Courier'},
col_split=df.Groups, col_split_order="cluster_between_groups",
col_cluster=True,row_cluster=True,
label='values',
tree_kws={'col_cmap': 'Set1'},verbose=0,legend_vgap=7,
linewidths=0.05,linecolor='white',cmap='RdYlBu_r',
xticklabels_kws={'labelrotation':-45,'labelcolor':'blue'})
plt.show()
Custom linkage¶
[20]:
import fastcluster
# custom column linkage
linkage = fastcluster.linkage(df_heatmap.T.apply(lambda x:x.fillna(x.median()),axis=1), method='average', metric='canberra')
print("df_heatmap shape:",df_heatmap.shape,"\nlinkage shape:",linkage.shape,"\n",linkage)
plt.figure(figsize=(4, 6))
cm = ClusterMapPlotter(
data=df_heatmap, top_annotation=col_ha,z_score=0,
col_cluster=True,row_cluster=True,show_rownames=True,show_colnames=True,
label='values',col_dendrogram_kws=dict(linkage=linkage),col_dendrogram=True,
tree_kws={'col_cmap': 'Set1'},verbose=0,legend_vgap=7,
linewidths=0.05,linecolor='white',cmap='RdYlBu_r',
xticklabels_kws={'labelrotation':-45,'labelcolor':'blue'})
plt.show()
df_heatmap shape: (30, 10)
linkage shape: (9, 4)
[[ 2. 9. 18.27233765 2. ]
[ 0. 1. 18.48177502 2. ]
[ 4. 5. 19.6472128 2. ]
[ 7. 10. 19.93972364 3. ]
[ 8. 12. 20.24478861 3. ]
[ 3. 11. 20.3717095 3. ]
[14. 15. 21.93488246 6. ]
[ 6. 13. 21.99672826 4. ]
[16. 17. 22.35515103 10. ]]
[21]:
df['Groups']=['G1']+['G2']+['G3']*5+['G4']+['G5']*2
plt.figure(figsize=(4, 6))
cm = ClusterMapPlotter(
data=df_heatmap, top_annotation=col_ha,
col_cluster=True,row_cluster=True,show_rownames=True,show_colnames=True,
row_split=2,row_split_gap=3,row_dendrogram=True,
label='values',col_dendrogram_kws=dict(linkage=linkage),col_dendrogram=True,
tree_kws={'col_cmap': 'Set1','row_cmap':'Dark2'},verbose=0,legend_vgap=7,
linewidths=0.05,linecolor='white',cmap='RdYlBu_r',
xticklabels_kws={'labelrotation':-45,'labelcolor':'blue'})
plt.show()
Image annotation¶
[29]:
df = pd.DataFrame(['AAAA1'] * 5 + ['BBBBB2'] * 5, columns=['AB'])
df['CD'] = ['C'] * 3 + ['D'] * 3 + ['G'] * 4
df['F'] = np.random.normal(0, 1, 10)
df.index = ['sample' + str(i) for i in range(1, df.shape[0] + 1)]
df_box = pd.DataFrame(np.random.randn(10, 4), columns=['Gene' + str(i) for i in range(1, 5)])
df_box.index = ['sample' + str(i) for i in range(1, df_box.shape[0] + 1)]
df_bar = pd.DataFrame(np.random.uniform(0, 10, (10, 2)), columns=['TMB1', 'TMB2'])
df_bar.index = ['sample' + str(i) for i in range(1, df_box.shape[0] + 1)]
df_scatter = pd.DataFrame(np.random.uniform(0, 10, 10), columns=['Scatter'])
df_scatter.index = ['sample' + str(i) for i in range(1, df_box.shape[0] + 1)]
df_bar1 = pd.DataFrame(np.random.uniform(0, 10, (10, 2)), columns=['T1-A', 'T1-B'])
df_bar1.index = ['sample' + str(i) for i in range(1, df_box.shape[0] + 1)]
df_bar2 = pd.DataFrame(np.random.uniform(0, 10, (10, 2)), columns=['T2-A', 'T2-B'])
df_bar2.index = ['sample' + str(i) for i in range(1, df_box.shape[0] + 1)]
df_bar3 = pd.DataFrame(np.random.uniform(0, 10, (10, 2)), columns=['T3-A', 'T3-B'])
df_bar3.index = ['sample' + str(i) for i in range(1, df_box.shape[0] + 1)]
df_bar3.iloc[5,0]=np.nan
df_bar4 = pd.DataFrame(np.random.uniform(0, 10, (10, 1)), columns=['T4'])
df_bar4.index = ['sample' + str(i) for i in range(1, df_box.shape[0] + 1)]
df_bar4.iloc[7,0]=np.nan
df_img = pd.DataFrame(['https://motifcollections.aertslab.org/v10nr_clust/logos/metacluster_136.1.png',
'https://motifcollections.aertslab.org/v10nr_clust/logos/metacluster_135.7.png',
'https://cdn3.iconfinder.com/data/icons/family-member-flat-happy-family-day/512/Brother-512.png',
'https://cdn3.iconfinder.com/data/icons/family-member-flat-happy-family-day/512/Sister-512.png',
'https://img.freepik.com/free-vector/sticker-design-with-cute-mouse-isolated_1308-59360.jpg',
'https://motifcollections.aertslab.org/v10nr_clust/logos/metacluster_131.8.png',
'https://img.freepik.com/premium-vector/vector-illustration-gorilla-isolated-white-background-cartoon-style_1151-66575.jpg',
"2.png",'1.jpeg',
'https://cdn3.iconfinder.com/data/icons/family-member-flat-happy-family-day/512/Brother-520.png'], columns=['path'])
df_img.index = ['sample' + str(i) for i in range(1, df_box.shape[0] + 1)]
[30]:
plt.figure(figsize=(16, 4))
col_ha = HeatmapAnnotation(
label=anno_label(df.AB, merge=True,rotation=15),
AB=anno_simple(df.AB,add_text=True,legend=True), axis=1,
CD=anno_simple(df.CD, add_text=True,legend=True,text_kws={'color':'black'}),
Exp=anno_boxplot(df_box, cmap='turbo',legend=True),
Scatter=anno_scatterplot(df_scatter),
Bar1=anno_barplot(df_bar1,legend=True,cmap='Dark2'),
Bar4=anno_barplot(df_bar4,legend=True,cmap='turbo'),
Img=anno_img(df_img.path,border_width=5,border_color=255,height=15),
plot=True,legend=True,legend_vgap=5,hgap=0.5)
col_ha.show_ticklabels(df.index.tolist(),fontdict={'color':'blue'},rotation=-30)
plt.show()
Plotting HeatmapAnnotations
Collecting annotation legends..
How to force display all row/col ticklabels?¶
When the height or width is not big enough to display all xticklabels and yticklabels, some ticklabels will be hidden to avoid overlapping. For example:
[31]:
plt.figure(figsize=(3.5, 5))
cm = ClusterMapPlotter(
data=df_heatmap,
col_cluster=True,row_cluster=True,
col_split=df.AB,row_split=2,
col_split_gap=0.5,row_split_gap=0.8,
label='values',row_dendrogram=True,
show_rownames=True,show_colnames=True,row_names_side='right',
tree_kws={'row_cmap': 'Set1'},verbose=0,legend_vgap=5,
cmap='meth2',xticklabels_kws={'labelrotation':-90,'labelcolor':'blue'})
plt.show()
To force display all ticklabels no matter whether the height or width is big enough, set parameters xticklabels/yticklabels to True:
[32]:
plt.figure(figsize=(3.5, 5))
cm = ClusterMapPlotter(
data=df_heatmap,
col_cluster=True,row_cluster=True,
col_split=df.AB,row_split=2,
col_split_gap=0.5,row_split_gap=0.8,
label='values',row_dendrogram=True,
show_rownames=True,show_colnames=True,
row_names_side='right',yticklabels=True,
tree_kws={'row_cmap': 'Set1'},verbose=0,legend_vgap=5,
cmap='meth2',xticklabels_kws={'labelrotation':-90,'labelcolor':'blue'})
plt.show()