[61]:
import os,sys
%matplotlib inline
import matplotlib.pylab as plt
import pickle
plt.rcParams['figure.dpi'] = 100
plt.rcParams['savefig.dpi']=300
sys.path.append(os.path.expanduser("~/Projects/Github/PyComplexHeatmap"))
import PyComplexHeatmap
print(PyComplexHeatmap.__version__)
from PyComplexHeatmap import *
1.7.2.dev0+g8abf70a.d20240415
[62]:
#set font to Arial using the following code
plt.rcParams['font.family']='sans serif'
plt.rcParams['font.sans-serif']='Arial'
# set pdf.fonttype to 42
plt.rcParams['pdf.fonttype']=42
Generate dataset¶
[63]:
#Generate example dataset (random)
df = pd.DataFrame(['GroupA'] * 5 + ['GroupB'] * 5, columns=['AB'])
df['CD'] = ['C'] * 3 + ['D'] * 3 + ['G'] * 4
df['EF'] = ['E'] * 6 + ['F'] * 2 + ['H'] * 2
df['F'] = np.random.normal(0, 1, 10)
df.index = ['sample' + str(i) for i in range(1, df.shape[0] + 1)]
df_box = pd.DataFrame(np.random.randn(10, 4), columns=['Gene' + str(i) for i in range(1, 5)])
df_box.index = ['sample' + str(i) for i in range(1, df_box.shape[0] + 1)]
df_bar = pd.DataFrame(np.random.uniform(0, 10, (10, 2)), columns=['TMB1', 'TMB2'])
df_bar.index = ['sample' + str(i) for i in range(1, df_box.shape[0] + 1)]
df_scatter = pd.DataFrame(np.random.uniform(0, 10, 10), columns=['Scatter'])
df_scatter.index = ['sample' + str(i) for i in range(1, df_box.shape[0] + 1)]
df_heatmap = pd.DataFrame(np.random.randn(30, 10), columns=['sample' + str(i) for i in range(1, 11)])
df_heatmap.index = ["Fea" + str(i) for i in range(1, df_heatmap.shape[0] + 1)]
df_heatmap.iloc[1, 2] = np.nan
Add selected rows labels¶
[64]:
#Annotate the rows with average > 0.3
df_rows = df_heatmap.apply(lambda x:x.name if x.sample4 > 0.5 else None,axis=1)
df_rows=df_rows.to_frame(name='Selected')
df_rows['XY']=df_rows.index.to_series().apply(lambda x:'A' if int(x.replace('Fea',''))>=15 else 'B')
row_ha = HeatmapAnnotation(
Scatter=anno_scatterplot(df_heatmap.sample4.apply(lambda x:round(x,2)),
height=12,cmap='jet',legend=False,grid=True),
Line=anno_lineplot(df_heatmap.sample4.apply(lambda x:round(x,2)),
height=12,colors='red',linewidth=2,legend=False),
Bar=anno_barplot(df_heatmap.sample4.apply(lambda x:round(x,2)),
height=15,cmap='rainbow',legend=False),
selected=anno_label(df_rows,colors='red',relpos=(-0.05,0.4)),
label_kws={'rotation':30,'horizontalalignment':'left','verticalalignment':'bottom'},
axis=0,verbose=0)
col_ha = HeatmapAnnotation(
label=anno_label(df.AB, merge=True,rotation=10,
arrowprops = dict(visible=False,)
), #visible in arrowprops can control whether to show the arrow
AB=anno_simple(df.AB,add_text=True),axis=1,
CD=anno_simple(df.CD,add_text=True),
EF=anno_simple(df.EF,add_text=True,
legend_kws={'frameon':True}),
G=anno_boxplot(df_box, cmap='jet',legend=False,grid=True),
verbose=0)
print(np.nanmin(df_heatmap),np.nanmax(df_heatmap))
plt.figure(figsize=(5.5, 6.5))
cm = ClusterMapPlotter(
data=df_heatmap, top_annotation=col_ha,right_annotation=row_ha,
col_cluster=True,row_cluster=True,
col_split=df.AB,row_split=2, z_score=0,
col_split_gap=0.5,row_split_gap=0.8,
label='values',row_dendrogram=True,col_dendrogram=False,row_dendrogram_size=15,
show_rownames=False,show_colnames=True,
tree_kws={'row_cmap': 'Set1'},verbose=0,legend_gap=5,
cmap='RdYlBu_r',xticklabels_kws={'labelrotation':-90,'labelcolor':'blue'})
plt.savefig("example0.pdf", bbox_inches='tight')
plt.show()
-2.6321021225640213 2.8811702820956677
Add annotations on the top of heatmap cells¶
[65]:
#Annotate the rows with average > 0.3
df_rows = df_heatmap.apply(lambda x:x.name if x.sample4 > 0.5 else None,axis=1)
df_rows=df_rows.to_frame(name='Selected')
df_rows['XY']=df_rows.index.to_series().apply(lambda x:'A' if int(x.replace('Fea',''))>=15 else 'B')
row_ha = HeatmapAnnotation(
S4=anno_simple(df_heatmap.sample4.apply(lambda x:round(x,2)),
add_text=True,height=10,
text_kws={'rotation':0,'fontsize':10,'color':'black'}),
# Scatter=anno_scatterplot(df_heatmap.sample4.apply(lambda x:round(x,2)),
# height=10),
Test=anno_barplot(df_heatmap.sample4.apply(lambda x:round(x,2)),
height=18,cmap='rainbow'),
selected=anno_label(df_rows,colors='red'),
axis=0,verbose=0,#wgap=4,
label_kws={'rotation':0,'horizontalalignment':'left',
'verticalalignment':'bottom'})
col_ha = HeatmapAnnotation(
label=anno_label(df.AB, merge=True,rotation=15),
AB=anno_simple(df.AB,add_text=True),axis=1,
CD=anno_simple(df.CD,add_text=True,text_kws=dict(bbox={'boxstyle':'Circle','edgecolor':'white','fill':False},fontsize=8),
height=4.5),
EF=anno_simple(df.EF,add_text=True,
legend_kws={'frameon':False}),
Exp=anno_boxplot(df_box, cmap='turbo',grid=True),
verbose=0,#hgap=2
) #verbose=0 will turn off the log.
print(df.head())
print(df_box.mean(axis=1))
print(df_heatmap.head())
plt.figure(figsize=(6, 8))
cm = ClusterMapPlotter(
data=df_heatmap, top_annotation=col_ha,right_annotation=row_ha,
col_split=df.AB,
row_split=df_rows.XY,
#col_split_gap=3.5,row_split_gap=2.5,
col_cluster=True,row_cluster=True,
label='values',row_dendrogram=False,
show_rownames=True,show_colnames=True,
verbose=0,legend_gap=5,#legend_hpad=10,legend_vpad=5,
annot=True,fmt='.1g',linewidths=0.05,linecolor='gold',cmap='RdYlBu_r',
xticklabels_kws={'labelrotation':-45,'labelcolor':'blue'},
yticklabels_kws=dict(labelcolor='red'),#subplot_gap=8
)
#subplot_gap controls the gap between main heatmap and column or row annotations
plt.show()
print(cm.row_order)
print(cm.col_order)
AB CD EF F
sample1 GroupA C E -0.210367
sample2 GroupA C E -0.764762
sample3 GroupA C E 1.665616
sample4 GroupA D E -1.545737
sample5 GroupA D E 3.145600
sample1 0.002185
sample2 -0.004763
sample3 0.325769
sample4 1.436755
sample5 0.348452
sample6 -0.356842
sample7 0.111622
sample8 -0.557544
sample9 -0.094792
sample10 0.020230
dtype: float64
sample1 sample2 sample3 sample4 sample5 sample6 sample7 \
Fea1 -0.131741 -0.162128 -0.328656 0.588910 0.113312 0.466366 0.040714
Fea2 0.081925 -0.916003 NaN 0.598650 -0.771747 1.746257 0.375753
Fea3 -0.902882 -0.120029 0.172726 0.302578 0.395246 -1.188965 -1.224670
Fea4 -0.974029 -1.019292 -0.147058 -0.385203 0.931348 0.210785 1.821199
Fea5 0.082241 -0.627536 -0.373517 1.569405 0.037562 -0.828194 -0.177589
sample8 sample9 sample10
Fea1 0.304473 0.497133 -0.414034
Fea2 -0.125438 0.165879 -1.002507
Fea3 1.157401 -0.241850 0.352401
Fea4 -0.892112 -0.233361 -1.032746
Fea5 0.875993 1.381788 1.279071
[['Fea28', 'Fea21', 'Fea29', 'Fea27', 'Fea26', 'Fea15', 'Fea16', 'Fea19', 'Fea30', 'Fea17', 'Fea23', 'Fea22', 'Fea24', 'Fea18', 'Fea20', 'Fea25'], ['Fea11', 'Fea14', 'Fea9', 'Fea4', 'Fea8', 'Fea3', 'Fea5', 'Fea12', 'Fea1', 'Fea2', 'Fea7', 'Fea10', 'Fea6', 'Fea13']]
[['sample1', 'sample4', 'sample3', 'sample2', 'sample5'], ['sample6', 'sample7', 'sample8', 'sample9', 'sample10']]
Only plot the annotations¶
[66]:
df = pd.DataFrame(['AAAA1'] * 5 + ['BBBBB2'] * 5, columns=['AB'])
df['CD'] = ['C'] * 3 + ['D'] * 3 + ['G'] * 4
df['F'] = np.random.normal(0, 1, 10)
df.index = ['sample' + str(i) for i in range(1, df.shape[0] + 1)]
df_box = pd.DataFrame(np.random.randn(10, 4), columns=['Gene' + str(i) for i in range(1, 5)])
df_box.index = ['sample' + str(i) for i in range(1, df_box.shape[0] + 1)]
df_bar = pd.DataFrame(np.random.uniform(0, 10, (10, 2)), columns=['TMB1', 'TMB2'])
df_bar.index = ['sample' + str(i) for i in range(1, df_box.shape[0] + 1)]
df_scatter = pd.DataFrame(np.random.uniform(0, 10, 10), columns=['Scatter'])
df_scatter.index = ['sample' + str(i) for i in range(1, df_box.shape[0] + 1)]
df_bar1 = pd.DataFrame(np.random.uniform(0, 10, (10, 2)), columns=['T1-A', 'T1-B'])
df_bar1.index = ['sample' + str(i) for i in range(1, df_box.shape[0] + 1)]
df_bar2 = pd.DataFrame(np.random.uniform(0, 10, (10, 2)), columns=['T2-A', 'T2-B'])
df_bar2.index = ['sample' + str(i) for i in range(1, df_box.shape[0] + 1)]
df_bar3 = pd.DataFrame(np.random.uniform(0, 10, (10, 2)), columns=['T3-A', 'T3-B'])
df_bar3.index = ['sample' + str(i) for i in range(1, df_box.shape[0] + 1)]
df_bar3.iloc[5,0]=np.nan
df_bar4 = pd.DataFrame(np.random.uniform(0, 10, (10, 1)), columns=['T4'])
df_bar4.index = ['sample' + str(i) for i in range(1, df_box.shape[0] + 1)]
df_bar4.iloc[7,0]=np.nan
print(df)
print(df_box.head())
print(df_scatter)
print(df_bar)
print(df_bar1)
print(df_bar2)
print(df_bar3)
print(df_bar4)
AB CD F
sample1 AAAA1 C 0.586862
sample2 AAAA1 C 1.246191
sample3 AAAA1 C -1.118067
sample4 AAAA1 D 1.186581
sample5 AAAA1 D 0.648017
sample6 BBBBB2 D -0.740133
sample7 BBBBB2 G 1.237010
sample8 BBBBB2 G -1.679448
sample9 BBBBB2 G 1.454375
sample10 BBBBB2 G -0.008855
Gene1 Gene2 Gene3 Gene4
sample1 -0.051486 -0.698328 -1.563904 1.145194
sample2 0.397775 0.563337 -0.554522 -0.283961
sample3 1.911433 -0.178098 -0.449513 0.436575
sample4 0.998321 -1.547375 0.153069 0.659142
sample5 -0.857548 0.045073 -0.116103 -1.709895
Scatter
sample1 2.312420
sample2 8.700443
sample3 2.594622
sample4 0.995570
sample5 6.346088
sample6 8.784254
sample7 4.548412
sample8 9.312224
sample9 9.279522
sample10 2.176052
TMB1 TMB2
sample1 3.283453 1.029316
sample2 8.081410 9.157657
sample3 4.618796 5.994200
sample4 4.024821 5.061905
sample5 6.009279 3.291284
sample6 5.844126 1.899835
sample7 6.129225 0.940908
sample8 1.691643 3.311886
sample9 1.342168 1.987989
sample10 1.568968 6.577185
T1-A T1-B
sample1 9.389534 7.654098
sample2 3.917390 2.732671
sample3 6.469047 3.691846
sample4 1.152763 8.272298
sample5 2.207963 4.467439
sample6 9.777025 4.860341
sample7 2.062939 5.930794
sample8 4.574324 4.723417
sample9 7.000079 2.968817
sample10 2.355413 0.396133
T2-A T2-B
sample1 3.330243 8.382224
sample2 0.536375 8.244936
sample3 2.155200 6.579543
sample4 9.374440 0.161295
sample5 2.301849 4.527089
sample6 6.317955 7.000506
sample7 4.266394 1.553810
sample8 4.835425 9.188559
sample9 5.556356 4.475371
sample10 3.264468 3.981251
T3-A T3-B
sample1 2.009180 0.472453
sample2 0.474146 3.230050
sample3 5.015374 3.327011
sample4 4.303717 5.684264
sample5 0.078473 2.806307
sample6 NaN 1.584169
sample7 4.086410 6.818322
sample8 8.464331 9.508005
sample9 3.717501 6.782938
sample10 5.872283 4.300459
T4
sample1 0.816047
sample2 2.419108
sample3 9.955314
sample4 2.457227
sample5 8.029317
sample6 5.447036
sample7 8.975613
sample8 NaN
sample9 4.026585
sample10 3.807470
[67]:
plt.figure(figsize=(4, 8))
col_ha = HeatmapAnnotation(
label=anno_label(df.AB, merge=True,rotation=15),
AB=anno_simple(df.AB,add_text=True), axis=1,
CD=anno_simple(df.CD, add_text=True,text_kws={'color':'black'}),
Exp=anno_boxplot(df_box, cmap='turbo',grid=True),
Scatter=anno_scatterplot(df_scatter,legend=True),
Line=anno_lineplot(df_bar2,linewidth=4,colors={'T2-B':'orangered','T2-A':'yellowgreen'},
marker='D',legend=True), #colors=['orangered','yellowgreen']
TMB_bar=anno_barplot(df_bar,legend=True,cmap='Set1'),
Bar1=anno_barplot(df_bar1,legend=True,colors=['red','black']), #colors can be str, list, tuple or dict
Bar4=anno_barplot(df_bar4,legend=True,cmap='turbo'),
Bar2=anno_barplot(df_bar2,legend=True,colors={'T2-B':'orangered','T2-A':'yellowgreen'}),
Bar3=anno_barplot(df_bar3,legend=True,cmap='Paired'),
plot=True,legend=True,legend_gap=5,hgap=4)
col_ha.show_ticklabels(df.index.tolist(),fontdict={'color':'blue'},rotation=-30)
plt.show()
Starting plotting HeatmapAnnotations
Collecting annotation legends..
Change orentation to down
and add extra space¶
[68]:
plt.figure(figsize=(4, 8))
row_ha = HeatmapAnnotation(
TMB_bar=anno_barplot(df_bar,legend=True,cmap='Set1'),
Bar1=anno_barplot(df_bar1,legend=True,cmap='Dark2'),
Bar4=anno_barplot(df_bar4,legend=True,cmap='turbo'),
Bar2=anno_barplot(df_bar2,legend=True,cmap='tab10'),
Bar3=anno_barplot(df_bar3,legend=True,cmap='Paired'),
Scatter=anno_scatterplot(df_scatter),
Exp=anno_boxplot(df_box, cmap='turbo',legend=True,grid=True),
CD=anno_simple(df.CD, colors={'C': 'red', 'D': 'gray', 'G': 'yellow'},
add_text=True,legend=True,text_kws={'color':'black'}),
AB=anno_simple(df.AB,add_text=True,legend=True),
label=anno_label(df.AB, merge=True,rotation=-15),
plot=True,plot_legend=False,legend_hpad=13,axis=1,hgap=1
)
row_ha.show_ticklabels(df.index.tolist(),fontdict={'color':'blue'},rotation=30)
plt.show()
# Here, we can use hgap (when axis=1) or wgap (when axis=0) to control the widh of height space between different annotations.
Starting plotting HeatmapAnnotations
Change orentation to the left¶
[69]:
plt.figure(figsize=(8, 4))
row_ha = HeatmapAnnotation(
label=anno_label(df.AB, merge=True,rotation=15),
AB=anno_simple(df.AB,add_text=True,legend=True,
#text_kws=dict(bbox={"pad":0},va='center',ha='center',rotation_mode='anchor')
),
CD=anno_simple(df.CD,add_text=True,legend=True),
Exp=anno_boxplot(df_box, cmap='turbo',legend=True),
Scatter=anno_scatterplot(df_scatter),
TMB_bar=anno_barplot(df_bar,legend=True,cmap='Set1'),
Bar1=anno_barplot(df_bar1,legend=True,cmap='Dark2'),
Bar4=anno_barplot(df_bar4,legend=True,cmap='turbo'),
Bar2=anno_barplot(df_bar2,legend=True,cmap='tab10'),
Bar3=anno_barplot(df_bar3,legend=True,cmap='Paired'),
plot=True,legend=True,legend_gap=5,
axis=0,legend_hpad=20,label_side='bottom',wgap=3,
)
row_ha.show_ticklabels(df.index.tolist(),fontdict={'color':'blue'},rotation=0)
plt.show()
Starting plotting HeatmapAnnotations
Collecting annotation legends..
Incresing ncol
Incresing ncol
Incresing ncol
More than 3 cols is not supported
Legend too long, generating a new column..
Change orentation to the right¶
[70]:
plt.figure(figsize=(8, 4))
row_ha = HeatmapAnnotation(
TMB_bar=anno_barplot(df_bar,legend=True,cmap='Set1'),
Bar1=anno_barplot(df_bar1,legend=True,cmap='Dark2'),
Bar4=anno_barplot(df_bar4,legend=True,cmap='turbo'),
Bar2=anno_barplot(df_bar2,legend=True,cmap='tab10'),
Bar3=anno_barplot(df_bar3,legend=True,cmap='Paired'),
Scatter=anno_scatterplot(df_scatter),
Exp=anno_boxplot(df_box, cmap='turbo',legend=True,grid=True),
CD=anno_simple(df.CD, colors={'C': 'red', 'D': 'gray', 'G': 'green'},
add_text=True,legend=True,
text_kws={'rotation':-90}),
AB=anno_simple(df.AB,add_text=True,legend=True,
text_kws={'rotation':-90,'color':'black'}),
label=anno_label(df.AB, merge=True,rotation=15),
plot=True,legend=True,legend_hpad=13,legend_gap=5,axis=0,wgap=3,
)
row_ha.show_ticklabels(df.index.tolist(),fontdict={'color':'black'},rotation=0)
plt.show()
Starting plotting HeatmapAnnotations
Collecting annotation legends..
Incresing ncol
Incresing ncol
Incresing ncol
More than 3 cols is not supported
Legend too long, generating a new column..
Changing orientation using parameter orientation
¶
By Default, if there is no anno_label
in the annotation, the oriention would be determined by parameter orientation
.
[71]:
plt.figure(figsize=(8, 4))
col_ha = HeatmapAnnotation(
AB=anno_simple(df.AB,add_text=True,legend=True),
CD=anno_simple(df.CD,add_text=True,legend=True),
Exp=anno_boxplot(df_box, cmap='turbo',legend=True),
Scatter=anno_scatterplot(df_scatter),
TMB_bar=anno_barplot(df_bar,legend=True,cmap='Set1'),
Bar1=anno_barplot(df_bar1,legend=True,cmap='Dark2'),
Bar4=anno_barplot(df_bar4,legend=True,cmap='turbo'),
Bar2=anno_barplot(df_bar2,legend=True,cmap='tab10'),
Bar3=anno_barplot(df_bar3,legend=True,cmap='Paired'),
plot=True,legend=True,axis=0,
legend_gap=5,orientation='left',
)
plt.show()
Starting plotting HeatmapAnnotations
Collecting annotation legends..
Incresing ncol
Incresing ncol
Incresing ncol
More than 3 cols is not supported
Legend too long, generating a new column..
[72]:
plt.figure(figsize=(8, 4))
col_ha = HeatmapAnnotation(
AB=anno_simple(df.AB,add_text=True,legend=True,
text_kws={'rotation':-90,'fontsize':14,'color':'black'}),
CD=anno_simple(df.CD,add_text=True,legend=True,
text_kws={'rotation':-90,'fontsize':14,'color':'white'}),
Exp=anno_boxplot(df_box, cmap='turbo',legend=True),
Scatter=anno_scatterplot(df_scatter),
TMB_bar=anno_barplot(df_bar,legend=True,cmap='Set1'),
Bar1=anno_barplot(df_bar1,legend=True,cmap='Dark2'),
Bar4=anno_barplot(df_bar4,legend=True,cmap='turbo'),
Bar2=anno_barplot(df_bar2,legend=True,cmap='tab10'),
Bar3=anno_barplot(df_bar3,legend=True,cmap='Paired'),
plot=True,legend=True,axis=0,wgap=3,
legend_gap=5,orientation='right',
)
plt.show()
Starting plotting HeatmapAnnotations
Collecting annotation legends..
Incresing ncol
Incresing ncol
Incresing ncol
More than 3 cols is not supported
Legend too long, generating a new column..
Add multiple heatmap annotations using for
loop¶
Typically, we can create a heatmap annotatin using the following code:
col_ha = HeatmapAnnotation(
Group=anno_simple(df_cols.hypomethylated_samples,colors=sample_group_color_dict,legend=True),
CellType=anno_simple(df_cols.CellType,colors=ct_color_dict,legend=ct_legend),
M1=anno_simple(df_cols['M1'],cmap='jet',legend=lgd,vmax=1,vmin=0,legend_kws={'label':'M1'}),
verbose=0,label_side='right',label_kws={'horizontalalignment':'left'})
But what if we have many annotations, for example:
col_ha = HeatmapAnnotation(
Group=anno_simple(df_cols.hypomethylated_samples,colors=sample_group_color_dict,legend=True),
CellType=anno_simple(df_cols.CellType,colors=ct_color_dict,legend=ct_legend),
M1=anno_simple(df_cols['M1'],cmap='jet',legend=lgd,vmax=1,vmin=0,legend_kws={'label':'M1'}),
M2=anno_simple(df_cols['M2'],cmap='jet',legend=lgd,vmax=1,vmin=0,legend_kws={'label':'M2'}),
M3=anno_simple(df_cols['M3'],cmap='jet',legend=lgd,vmax=1,vmin=0,legend_kws={'label':'M3'}),
.....
verbose=0,label_side='right',label_kws={'horizontalalignment':'left'})
In this case, we can create an dict including the name and annotation as keys and values:
col_ha_dict={
'Group':anno_simple(df_cols.hypomethylated_samples,colors=sample_group_color_dict,legend=True),
'CellType':anno_simple(df_cols.CellType,colors=ct_color_dict,legend=ct_legend)
}
for col in sample_cols:
col_ha_dict[col]=anno_simple(df_cols[col],cmap='jet',legend=lgd,vmax=1,vmin=0,legend_kws={'label':col})
col_ha = HeatmapAnnotation(**col_ha_dict,
verbose=0,label_side='right',label_kws={'horizontalalignment':'left'})
Cluster between groups and cluster within groups¶
Similar to cluster_between_groups
and cluster_within_groups
in R (https://jokergoo.github.io/2021/03/05/cluster-groups-in-complexheatmap/)
clsuter within groups: col_split=*, col_cluster=True
¶
[73]:
df['Groups']=['G1']+['G2']+['G3']*5+['G4']+['G5']*2
col_ha = HeatmapAnnotation(
Groups=anno_simple(df.Groups,add_text=True,text_kws={'color':'black'}),
AB=anno_simple(df.AB,add_text=True),axis=1,
Exp=anno_boxplot(df_box, cmap='turbo'),
verbose=0) #verbose=0 will turn off the log.
plt.figure(figsize=(6, 8))
cm = ClusterMapPlotter(
data=df_heatmap, top_annotation=col_ha,
col_split=df.Groups,col_split_gap=2,
col_cluster=True,row_cluster=True,col_dendrogram=True,
label='values',show_rownames=True,show_colnames=True,
tree_kws={'col_cmap': 'Set1'},verbose=0,legend_gap=7,
annot=True,fmt='.1g',linewidths=0.05,linecolor='gold',cmap='RdYlBu_r',
xticklabels_kws={'labelrotation':-45,'labelcolor':'blue'},
ylabel='Features')
plt.show()
cluster_between_groups: col_split=*, col_split_order="cluster_between_groups",col_cluster=False
¶
[74]:
plt.figure(figsize=(6, 8))
cm = ClusterMapPlotter(
data=df_heatmap, top_annotation=col_ha,
col_split=df.Groups, col_split_order="cluster_between_groups",
col_split_gap=2,col_cluster=False,
row_cluster=True,col_dendrogram=True,
row_split=2,row_split_gap=1,row_dendrogram=True,
label='values',show_rownames=True,show_colnames=True,
tree_kws={'colors':'blue','row_cmap':'Set1','col_cmap':'Paired'},
verbose=0,legend_gap=7,
linewidths=0.05,linecolor='gold',cmap='RdYlBu_r',
xticklabels_kws={'labelrotation':-45,'labelcolor':'blue'},
ylabel='Features')
plt.show()
cluster_within_groups && cluster_between_groups: col_split=*, col_split_order="cluster_between_groups",col_cluster=True
¶
[75]:
plt.figure(figsize=(6, 8))
cm = ClusterMapPlotter(
data=df_heatmap, top_annotation=col_ha,
col_split=df.loc[:,['AB','Groups']], col_split_order="cluster_between_groups",
col_split_gap=2,col_cluster=True,row_split_gap=1.5,
row_split=3,#row_split_order='cluster_between_groups',
row_cluster=True,col_dendrogram=True,row_dendrogram=True,
label='values',show_rownames=True,show_colnames=True,
tree_kws={'colors':'blue'},verbose=0,legend_gap=7,
annot=True,fmt='.1g',linewidths=0.05,linecolor='gold',cmap='RdYlBu_r',
xticklabels_kws={'labelrotation':-45,'labelcolor':'blue'},
ylabel='Features')
plt.show()
[76]:
# `label_kws` in `HeatmapAnnotation` control the heatmap annotaiton labels
col_ha = HeatmapAnnotation(
Groups=anno_simple(df.Groups,add_text=True,text_kws={'color':'black'}),
AB=anno_simple(df.AB,add_text=True),axis=1,
Exp=anno_boxplot(df_box, cmap='turbo',grid=True),
verbose=0,label_side='right'
)
# `xticklabels_kws` and `yticklabels_kws` control the ticklabels for the heatmap.
plt.figure(figsize=(6, 8))
cm = ClusterMapPlotter(data=df_heatmap, top_annotation=col_ha,
col_split=df.Groups,col_split_order=['G2','G1','G5','G4','G3'],
col_split_gap=4.5,col_cluster=True,
row_cluster=True,col_dendrogram=True,
label='values',show_rownames=True,show_colnames=True,
row_names_side='left',
tree_kws={'col_cmap':'Set1'},verbose=0,legend_gap=7,
linewidths=0.05,linecolor='gold',cmap='RdYlBu_r',
xticklabels_kws=dict(labelrotation=-45,labelcolor='purple',labelsize=14),
#more parameters for [x/y]_ticklabels_kws, see: matplotlib.axes.Axes.tick_params or ?ax.tick_params
xlabel='Samples',ylabel="Features",
xlabel_kws=dict(color='white',fontsize=14),
ylabel_kws=dict(color='blue',fontsize=14,labelpad=45), #increace labelpad manually using labelpad (points)
xlabel_bbox_kws=dict(facecolor='black'),
ylabel_bbox_kws=dict(facecolor='chocolate',edgecolor='red'),
)
plt.savefig("test.pdf",bbox_inches='tight')
plt.show()
Custom annotation¶
[77]:
annot=df_heatmap.applymap(lambda x:'∗∗∗' if x >= 2 else '∗∗' if x >=1 else '∗' if x >0 else '')
# To make asterisk located at center in vertical, use ∗ ASTERISK OPERATOR. instead of normal *; see: https://unicode-explorer.com/c/2217
plt.figure(figsize=(4, 6.5))
cm = ClusterMapPlotter(
data=df_heatmap, top_annotation=col_ha,
annot=annot,fmt=None,annot_kws={'color':'black','fontname':'Courier'},
col_split=df.Groups, col_split_order="cluster_between_groups",
col_cluster=True,row_cluster=True,
label='values',
tree_kws={'col_cmap': 'Set1'},verbose=0,legend_gap=7,
linewidths=0.05,linecolor='white',cmap='RdYlBu_r',
xticklabels_kws={'labelrotation':-45,'labelcolor':'blue'})
plt.show()
Custom linkage¶
[78]:
import fastcluster
# custom column linkage
linkage = fastcluster.linkage(df_heatmap.T.apply(lambda x:x.fillna(x.median()),axis=1), method='average', metric='canberra')
print("df_heatmap shape:",df_heatmap.shape,"\nlinkage shape:",linkage.shape,"\n",linkage)
plt.figure(figsize=(4, 6))
cm = ClusterMapPlotter(
data=df_heatmap, top_annotation=col_ha,z_score=0,
col_cluster=True,row_cluster=True,show_rownames=True,show_colnames=True,
label='values',col_dendrogram_kws=dict(linkage=linkage),col_dendrogram=True,
tree_kws={'col_cmap': 'Set1'},verbose=0,legend_gap=7,
linewidths=0.05,linecolor='white',cmap='RdYlBu_r',
xticklabels_kws={'labelrotation':-45,'labelcolor':'blue'})
plt.show()
df_heatmap shape: (30, 10)
linkage shape: (9, 4)
[[ 3. 7. 16.30063564 2. ]
[ 4. 9. 19.98077013 2. ]
[ 0. 8. 20.13691502 2. ]
[ 5. 11. 20.98548079 3. ]
[10. 12. 21.51448534 4. ]
[ 2. 13. 21.61319985 4. ]
[ 6. 14. 21.88357858 5. ]
[15. 16. 22.2212842 9. ]
[ 1. 17. 22.43105863 10. ]]
[79]:
df['Groups']=['G1']+['G2']+['G3']*5+['G4']+['G5']*2
plt.figure(figsize=(4, 6))
cm = ClusterMapPlotter(
data=df_heatmap, top_annotation=col_ha,
col_cluster=True,row_cluster=True,show_rownames=True,show_colnames=True,
row_split=2,row_split_gap=3,row_dendrogram=True,
label='values',col_dendrogram_kws=dict(linkage=linkage),col_dendrogram=True,
tree_kws={'col_cmap': 'Set1','row_cmap':'Dark2'},verbose=0,legend_gap=7,
linewidths=0.05,linecolor='white',cmap='RdYlBu_r',
xticklabels_kws={'labelrotation':-45,'labelcolor':'blue'})
plt.show()
Image annotation¶
[80]:
df = pd.DataFrame(['AAAA1'] * 5 + ['BBBBB2'] * 5, columns=['AB'])
df['CD'] = ['C'] * 3 + ['D'] * 3 + ['G'] * 4
df['F'] = np.random.normal(0, 1, 10)
df.index = ['sample' + str(i) for i in range(1, df.shape[0] + 1)]
df_box = pd.DataFrame(np.random.randn(10, 4), columns=['Gene' + str(i) for i in range(1, 5)])
df_box.index = ['sample' + str(i) for i in range(1, df_box.shape[0] + 1)]
df_bar = pd.DataFrame(np.random.uniform(0, 10, (10, 2)), columns=['TMB1', 'TMB2'])
df_bar.index = ['sample' + str(i) for i in range(1, df_box.shape[0] + 1)]
df_scatter = pd.DataFrame(np.random.uniform(0, 10, 10), columns=['Scatter'])
df_scatter.index = ['sample' + str(i) for i in range(1, df_box.shape[0] + 1)]
df_bar1 = pd.DataFrame(np.random.uniform(0, 10, (10, 2)), columns=['T1-A', 'T1-B'])
df_bar1.index = ['sample' + str(i) for i in range(1, df_box.shape[0] + 1)]
df_bar2 = pd.DataFrame(np.random.uniform(0, 10, (10, 2)), columns=['T2-A', 'T2-B'])
df_bar2.index = ['sample' + str(i) for i in range(1, df_box.shape[0] + 1)]
df_bar3 = pd.DataFrame(np.random.uniform(0, 10, (10, 2)), columns=['T3-A', 'T3-B'])
df_bar3.index = ['sample' + str(i) for i in range(1, df_box.shape[0] + 1)]
df_bar3.iloc[5,0]=np.nan
df_bar4 = pd.DataFrame(np.random.uniform(0, 10, (10, 1)), columns=['T4'])
df_bar4.index = ['sample' + str(i) for i in range(1, df_box.shape[0] + 1)]
df_bar4.iloc[7,0]=np.nan
df_img = pd.DataFrame([f"1.jpeg" for i in range(1,11)], columns=['path'])
df_img.index = ['sample' + str(i) for i in range(1, df_box.shape[0] + 1)]
# print(df)
# print(df_box.head())
# print(df_scatter)
print(df_img)
path
sample1 1.jpeg
sample2 1.jpeg
sample3 1.jpeg
sample4 1.jpeg
sample5 1.jpeg
sample6 1.jpeg
sample7 1.jpeg
sample8 1.jpeg
sample9 1.jpeg
sample10 1.jpeg
[81]:
plt.figure(figsize=(16, 4))
col_ha = HeatmapAnnotation(
label=anno_label(df.AB, merge=True,rotation=15),
AB=anno_simple(df.AB,add_text=True,legend=True), axis=1,
CD=anno_simple(df.CD, add_text=True,legend=True,text_kws={'color':'black'}),
Exp=anno_boxplot(df_box, cmap='turbo',legend=True),
Scatter=anno_scatterplot(df_scatter),
Bar1=anno_barplot(df_bar1,legend=True,cmap='Dark2'),
Bar4=anno_barplot(df_bar4,legend=True,cmap='turbo'),
Img=anno_img(df_img.path,border_width=5,border_color=255,height=15),
plot=True,legend=True,legend_gap=5,hgap=0.5)
col_ha.show_ticklabels(df.index.tolist(),fontdict={'color':'blue'},rotation=-30)
plt.show()
Starting plotting HeatmapAnnotations
Collecting annotation legends..
Choosing Build-in colormap in PyComplexHeatmap¶
Diverging¶
exp1
exp2
meth1
meth2
diverging1
parula
[82]:
from PyComplexHeatmap.utils import get_colormap
[83]:
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
cmaps = {}
gradient = np.linspace(0, 1, 256)
gradient = np.vstack((gradient, gradient))
def plot_color_gradients(category, cmap_list):
# Create figure and adjust figure height to number of colormaps
nrows = len(cmap_list)
figh = 0.35 + 0.15 + (nrows + (nrows - 1) * 0.1) * 0.22
fig, axs = plt.subplots(nrows=nrows + 1, figsize=(6.4, figh))
fig.subplots_adjust(top=1 - 0.35 / figh, bottom=0.15 / figh,
left=0.2, right=0.99)
axs[0].set_title(f'{category} colormaps', fontsize=14)
for ax, name in zip(axs, cmap_list):
ax.imshow(gradient, aspect='auto', cmap=mpl.colormaps[name])
ax.text(-0.01, 0.5, name, va='center', ha='right', fontsize=10,
transform=ax.transAxes)
# Turn off *all* ticks & spines, not just the ones with colormaps.
for ax in axs:
ax.set_axis_off()
# Save colormap list for later.
cmaps[category] = cmap_list
[84]:
plot_color_gradients('Sequential',
['exp1', 'exp2', 'meth1', 'meth2', 'diverging1', 'parula'])
Qualitative¶
cmap50
[85]:
# get_colormap("cmap50") # 50 different colors
[86]:
plot_color_gradients('Qualitative',
['cmap50'])
How to use the Build-in cmap?¶
Just set cmap=”custom_cmap”
[87]:
df = pd.DataFrame(['GroupA'] * 5 + ['GroupB'] * 5, columns=['AB'])
df['CD'] = ['C'] * 3 + ['D'] * 3 + ['G'] * 4
df['EF'] = ['E'] * 6 + ['F'] * 2 + ['H'] * 2
df['F'] = np.random.normal(0, 1, 10)
df.index = ['sample' + str(i) for i in range(1, df.shape[0] + 1)]
df_heatmap = pd.DataFrame(np.random.randn(30, 10), columns=['sample' + str(i) for i in range(1, 11)])
df_heatmap.index = ["Fea" + str(i) for i in range(1, df_heatmap.shape[0] + 1)]
df_heatmap.iloc[1, 2] = np.nan
plt.figure(figsize=(3.5, 5))
cm = ClusterMapPlotter(
data=df_heatmap,
col_cluster=True,row_cluster=True,
col_split=df.AB,row_split=2,
col_split_gap=0.5,row_split_gap=0.8,
label='values',row_dendrogram=True,
show_rownames=False,show_colnames=True,
tree_kws={'row_cmap': 'Set1'},verbose=0,legend_gap=5,
cmap='parula',xticklabels_kws={'labelrotation':-90,'labelcolor':'blue'},
ylabel="Features",xlabel="Samples")
# plt.savefig("example0.pdf", bbox_inches='tight')
plt.show()
[88]:
plt.figure(figsize=(3.5, 5))
cm = ClusterMapPlotter(
data=df_heatmap,
col_cluster=True,row_cluster=True,
col_split=df.AB,row_split=2,
col_split_gap=0.5,row_split_gap=0.8,
label='values',row_dendrogram=True,
show_rownames=True,show_colnames=True,row_names_side='right',
tree_kws={'row_cmap': 'Set1'},verbose=0,legend_gap=5,
cmap='meth2',xticklabels_kws={'labelrotation':-90,'labelcolor':'blue'})
plt.show()
How to force display all row/col ticklabels?¶
When the height or width is not big enough to display all xticklabels and yticklabels, some ticklabels will be hidden to avoid overlapping. For example:
[89]:
plt.figure(figsize=(3.5, 5))
cm = ClusterMapPlotter(
data=df_heatmap,
col_cluster=True,row_cluster=True,
col_split=df.AB,row_split=2,
col_split_gap=0.5,row_split_gap=0.8,
label='values',row_dendrogram=True,
show_rownames=True,show_colnames=True,row_names_side='right',
tree_kws={'row_cmap': 'Set1'},verbose=0,legend_gap=5,
cmap='meth2',xticklabels_kws={'labelrotation':-90,'labelcolor':'blue'})
plt.show()
To force display all ticklabels no matter whether the height or width is big enough, set parameters xticklabels/yticklabels
to True:
[90]:
plt.figure(figsize=(3.5, 5))
cm = ClusterMapPlotter(
data=df_heatmap,
col_cluster=True,row_cluster=True,
col_split=df.AB,row_split=2,
col_split_gap=0.5,row_split_gap=0.8,
label='values',row_dendrogram=True,
show_rownames=True,show_colnames=True,
row_names_side='right',yticklabels=True,
tree_kws={'row_cmap': 'Set1'},verbose=0,legend_gap=5,
cmap='meth2',xticklabels_kws={'labelrotation':-90,'labelcolor':'blue'})
plt.show()