[2]:

import os,sys
%matplotlib inline
import numpy as np
import matplotlib.pylab as plt
import pickle
plt.rcParams['figure.dpi'] = 100
plt.rcParams['savefig.dpi']=300
plt.rcParams['font.family']='sans serif'
plt.rcParams['font.sans-serif']='Arial'
plt.rcParams['pdf.fonttype']=42
sys.path.append(os.path.expanduser("~/Projects/Github/PyComplexHeatmap/"))
import PyComplexHeatmap
from PyComplexHeatmap import *
use_pch_style() # or plt.style.use('default') to restore default style

Load an example brain networks dataset from seaborn¶

[3]:

import seaborn as sns

# Load the brain networks dataset, select subset, and collapse the multi-index
df = sns.load_dataset("brain_networks", header=[0, 1, 2], index_col=0)
used_networks = [1, 5, 6, 7, 8, 12, 13, 17]
used_columns = (df.columns
                  .get_level_values("network")
                  .astype(int)
                  .isin(used_networks))
df = df.loc[:, used_columns]

df.columns = df.columns.map("-".join)

# Compute a correlation matrix and convert to long-form
corr_mat = df.corr().stack().reset_index(name="correlation")
corr_mat['Level']=corr_mat.correlation.apply(lambda x:'High' if x>=0.7 else 'Middle' if x >= 0.3 else 'Low')
data=corr_mat.pivot(index='level_0',columns='level_1',values='correlation')

[4]:

data.head()

[4]:

level_1	1-1-lh	1-1-rh	12-1-lh	12-1-rh	12-2-lh	12-2-rh	12-3-lh	13-1-lh	13-1-rh	13-2-lh	...	7-2-lh	7-2-rh	7-3-lh	7-3-rh	8-1-lh	8-1-rh	8-2-lh	8-2-rh	8-3-lh	8-3-rh
level_0
1-1-lh	1.000000	0.881516	-0.049793	0.026902	-0.144335	-0.141253	0.119250	-0.261589	-0.272701	-0.370021	...	-0.366065	-0.325680	-0.196770	-0.144566	-0.366818	-0.388756	-0.352529	-0.363982	-0.341524	-0.350452
1-1-rh	0.881516	1.000000	-0.112697	-0.036909	-0.144277	-0.189683	0.084633	-0.324230	-0.332886	-0.374322	...	-0.361036	-0.274151	-0.142392	-0.070452	-0.358625	-0.402173	-0.302286	-0.339989	-0.315931	-0.343379
12-1-lh	-0.049793	-0.112697	1.000000	0.343464	0.470239	0.100802	0.438449	0.339667	0.089811	0.272394	...	-0.036493	-0.171179	-0.043298	-0.158039	0.005598	-0.060007	0.079078	-0.040060	0.027878	-0.075781
12-1-rh	0.026902	-0.036909	0.343464	1.000000	0.130549	0.278569	0.127621	-0.014404	0.051249	-0.090130	...	-0.170053	-0.124278	-0.112148	-0.063705	-0.172007	-0.040629	-0.079687	0.024864	-0.092263	-0.068858
12-2-lh	-0.144335	-0.144277	0.470239	0.130549	1.000000	0.521377	0.506652	0.320966	0.141884	0.608392	...	-0.075986	-0.095015	0.012966	-0.082816	0.023340	0.058718	0.034181	0.033355	-0.022982	0.025638

5 rows × 38 columns

[5]:

corr_mat.Level.value_counts().index.tolist()

[5]:

['Low', 'Middle', 'High']

[6]:

corr_mat.head()

[6]:

	level_0	level_1	correlation	Level
0	1-1-lh	1-1-lh	1.000000	High
1	1-1-lh	1-1-rh	0.881516	High
2	1-1-lh	5-1-lh	0.431619	Middle
3	1-1-lh	5-1-rh	0.418708	Middle
4	1-1-lh	6-1-lh	-0.084634	Low

Dot Heatmap¶

Plot traditional heatmap using square marker `marker='s'`¶

[7]:

plt.figure(figsize=(8,8))
cm = DotClustermapPlotter(data=corr_mat,x='level_0',y='level_1',value='correlation',
              c='correlation',cmap='jet',vmax=1,vmin=0,s=0.7,marker='s',spines=True)
cm.ax_heatmap.grid(which='minor',color='white',linestyle='--',linewidth=1)
plt.show()

Starting plotting..
Starting calculating row orders..
Reordering rows..
Starting calculating col orders..
Reordering cols..
Plotting matrix..
Inferred max_s (max size of scatter point) is: 132.808800071564
Collecting legends..
Plotting legends..
Estimated legend width: 7.5 mm

../_images/notebooks_dotHeatmap_8_1.png

Simple dot heatmap using fixed dot size¶

In default, using circle marker: marker='o'

[9]:

plt.figure(figsize=(8,8))
cm = DotClustermapPlotter(corr_mat,x='level_0',y='level_1',value='correlation',
                          c='correlation',s=0.5,cmap='Oranges',vmax=1,vmin=0)
plt.show()

Starting plotting..
Starting calculating row orders..
Reordering rows..
Starting calculating col orders..
Reordering cols..
Plotting matrix..
Inferred max_s (max size of scatter point) is: 132.808800071564
Collecting legends..
Plotting legends..
Estimated legend width: 7.5 mm

../_images/notebooks_dotHeatmap_10_1.png

Changing the size of point¶

In default, we determined the size of the points based on the value col if parameter s was not given

[20]:

plt.figure(figsize=(8,8))
cm = DotClustermapPlotter(corr_mat,x='level_0',y='level_1',value='correlation',
                          s='correlation',cmap='RedYellowBlue_r',c='correlation',
                          vmax=1,vmin=0,
                          linewidth=0.5,edgecolor='black')
plt.show()

Starting plotting..
Starting calculating row orders..
Reordering rows..
Starting calculating col orders..
Reordering cols..
Plotting matrix..
Inferred max_s (max size of scatter point) is: 132.808800071564
Collecting legends..
Plotting legends..
Estimated legend width: 28.22361111111111 mm

../_images/notebooks_dotHeatmap_12_1.png

Add parameter `hue` and use different `colors` for different groups¶

[16]:

plt.figure(figsize=(8,8))
cm = DotClustermapPlotter(
    corr_mat,x='level_0',y='level_1',value='correlation',hue='Level',
    colors={'High':'red','Middle':'purple','Low':'green'},
    s='correlation',vmax=1,vmin=0)
plt.show()

Starting plotting..
Starting calculating row orders..
Reordering rows..
Starting calculating col orders..
Reordering cols..
Plotting matrix..
Inferred max_s (max size of scatter point) is: 132.808800071564
Collecting legends..
Plotting legends..
Estimated legend width: 28.22361111111111 mm

../_images/notebooks_dotHeatmap_14_1.png

Add parameter `hue` and use different `cmap` and `marker` for different groups¶

[17]:

plt.figure(figsize=(8,8))
cm = DotClustermapPlotter(corr_mat,x='level_0',y='level_1',value='correlation',hue='Level',
              colors={'High':'red','Middle':'purple','Low':'green'}, #in this case, colors is only used to control the color in the legend
              marker={'High':'P','Middle':'*','Low':'D'},
                          spines=True,
              vmax=1,vmin=0,legend_width=18)
plt.show()

Starting plotting..
Starting calculating row orders..
Reordering rows..
Starting calculating col orders..
Reordering cols..
Plotting matrix..
Inferred max_s (max size of scatter point) is: 132.808800071564
Collecting legends..
Plotting legends..

../_images/notebooks_dotHeatmap_16_1.png

[18]:

plt.figure(figsize=(8,8))
cm = DotClustermapPlotter(corr_mat,x='level_0',y='level_1',value='correlation',hue='Level',
              cmap={'High':'Reds','Middle':'Purples','Low':'Greens'},
              colors={'High':'red','Middle':'purple','Low':'green'}, #in this case, colors is only used to control the color in the legend
              marker={'High':'P','Middle':'*','Low':'D'},
                          spines=True,
              vmax=1,vmin=0,legend_width=18)
plt.show()

Starting plotting..
Starting calculating row orders..
Reordering rows..
Starting calculating col orders..
Reordering cols..
Plotting matrix..
Inferred max_s (max size of scatter point) is: 132.808800071564
Collecting legends..
Plotting legends..

../_images/notebooks_dotHeatmap_17_1.png

Dot Clustermap¶

Plot clustermap using seaborn brain networks dataset¶

[19]:

corr_mat.head()

[19]:

	level_0	level_1	correlation	Level
0	1-1-lh	1-1-lh	1.000000	High
1	1-1-lh	1-1-rh	0.881516	High
2	1-1-lh	5-1-lh	0.431619	Middle
3	1-1-lh	5-1-rh	0.418708	Middle
4	1-1-lh	6-1-lh	-0.084634	Low

[20]:

df_row=corr_mat['level_0'].drop_duplicates().to_frame()
df_row['RowGroup']=df_row.level_0.apply(lambda x:x.split('-')[0])
df_row.set_index('level_0',inplace=True)

df_col=corr_mat['level_1'].drop_duplicates().to_frame()
df_col['ColGroup']=df_col.level_1.apply(lambda x:x.split('-')[0])
df_col.set_index('level_1',inplace=True)

print(df_row.head())
print(df_col.head())

        RowGroup
level_0
1-1-lh         1
1-1-rh         1
5-1-lh         5
5-1-rh         5
6-1-lh         6
        ColGroup
level_1
1-1-lh         1
1-1-rh         1
5-1-lh         5
5-1-rh         5
6-1-lh         6

[21]:

row_ha = HeatmapAnnotation(Row=anno_simple(df_row.RowGroup,cmap='Set1',
                                           add_text=True,text_kws={'color':'black','rotation':-90},
                                          legend=False),
                           axis=0,verbose=0,label_kws={'rotation':45,'horizontalalignment':'left'})

col_ha = HeatmapAnnotation(label=anno_label(df_col.ColGroup, merge=True,rotation=45),
                           Col=anno_simple(df_col.ColGroup,cmap='Dark2',legend=False,add_text=True),
                           verbose=0,label_side='left',label_kws={'horizontalalignment':'right'})

plt.figure(figsize=(9, 8))
cm = DotClustermapPlotter(data=corr_mat, x='level_0',y='level_1',value='correlation',
                          hue='Level', cmap={'High':'Reds','Middle':'Purples','Low':'Greens'},
                          colors={'High':'red','Middle':'purple','Low':'green'},
                          marker={'High':'P','Middle':'*','Low':'D'},
                          top_annotation=col_ha,right_annotation=row_ha,
                          col_split=2,row_split=2, col_split_gap=0.5,row_split_gap=1,
                          show_rownames=True,show_colnames=True,row_dendrogram=True,
                          tree_kws={'row_cmap': 'Set1'},verbose=0,legend_gap=7,spines=True,)
plt.show()

../_images/notebooks_dotHeatmap_22_0.png

Visualize up to five dimension data using DotClustermapPlotter¶

Plot enrichment analysis result using example dataset with samples annotations

[22]:

data=pd.read_csv("../data/kycg_result.txt",sep='\t')
data=data.loc[data.Category.isin(['rmsk1','ChromHMM','EnsRegBuild'])]
data.SampleID.replace({'Clark2018_Argelaguet2019':'Dataset1','Luo2022':'Dataset2'},inplace=True)
max_p=np.nanmax(data['-log10(Pval)'].values)
data['-log10(Pval)'].fillna(max_p,inplace=True)
data['ID']=data.SampleID + '-' + data.CpGType
vc=data.groupby('Term').SampleID.apply(lambda x:x.nunique())

data=data.loc[data.Term.isin(vc[vc>=2].index.tolist())]
# p_max=data['-log10(Pval)'].max()
# p_min=data['-log10(Pval)'].min()
# data['-log10(Pval)']=data['-log10(Pval)'].apply(lambda x:(x-p_min)/(p_max-p_min))
df_col=data.ID.drop_duplicates().to_frame()
df_col['Dataset']=df_col.ID.apply(lambda x:x.split('-')[0])
df_col['Correlation']=df_col.ID.apply(lambda x:x.split('-')[1])
df_col.set_index('ID',inplace=True)
df_row=data.loc[:,['Term','Category']].drop_duplicates()
df_row.set_index('Term',inplace=True)

[23]:

data.head()

[23]:

	Term	odds_ratio	Category	SampleID	CpGType	pvalue	EnrichType	-log10(Pval)	ID
49	Het	1.061	ChromHMM	Dataset1	Negative	2.020000e-07	Enrich	26.0	Dataset1-Negative
55	Tx	1.029	ChromHMM	Dataset1	Negative	4.580000e-07	Enrich	26.0	Dataset1-Negative
65	TssFlnk	1.056	ChromHMM	Dataset1	Negative	2.370000e-06	Enrich	26.0	Dataset1-Negative
112	TxWk	1.022	ChromHMM	Dataset1	Negative	8.660000e-05	Enrich	26.0	Dataset1-Negative
346	DNA?	1.350	rmsk1	Dataset1	Negative	2.760000e-02	Enrich	26.0	Dataset1-Negative

[24]:

data['-log10(Pval)'].describe()

[24]:

count    64.000000
mean     25.356918
std       3.632073
min       3.119186
25%      26.000000
50%      26.000000
75%      26.000000
max      26.000000
Name: -log10(Pval), dtype: float64

[25]:

print(data.CpGType.unique())
print(data.EnrichType.unique())

['Negative' 'Positive']
['Enrich' 'Depletion']

[26]:

df_col

[26]:

	Dataset	Correlation
ID
Dataset1-Negative	Dataset1	Negative
Dataset1-Positive	Dataset1	Positive
Dataset2-Negative	Dataset2	Negative
Dataset2-Positive	Dataset2	Positive

[27]:

df_row

[27]:

	Category
Term
Het	ChromHMM
Tx	ChromHMM
TssFlnk	ChromHMM
TxWk	ChromHMM
DNA?	rmsk1
srpRNA	rmsk1
DNA	rmsk1
Unknown	rmsk1
Satellite	rmsk1
Simple_repeat	rmsk1
Low_complexity	rmsk1
LINE	rmsk1
Quies	ChromHMM
ReprPCWk	ChromHMM
TssBiv	ChromHMM
ReprPC	ChromHMM
LTR	rmsk1
snRNA	rmsk1
SINE	rmsk1

[28]:

row_ha = HeatmapAnnotation(
                           Category=anno_simple(df_row.Category,cmap='Set1',
                                           add_text=False,legend=False),
                           label=anno_label(df_row.Category, merge=True,rotation=0),
                           axis=0,verbose=0,label_kws={'rotation':45,'horizontalalignment':'left'})

col_ha = HeatmapAnnotation(
                           Dataset=anno_simple(df_col.Dataset,cmap='Set1',legend=False,add_text=True),
                           Correlation=anno_simple(df_col.Correlation,cmap='Dark2',legend=False,add_text=True),
                           verbose=0,label_side='left',label_kws={'horizontalalignment':'right'})

plt.figure(figsize=(3, 5))
cm = DotClustermapPlotter(data=data, x='ID',y='Term',value='-log10(Pval)',c='-log10(Pval)',s='odds_ratio',
                          hue='EnrichType', row_cluster=False,col_cluster=False,
                          cmap={'Enrich':'RdYlGn_r','Depletion':'coolwarm_r'},
                          colors={'Enrich':'red','Depletion':'blue'},
                          #marker={'Enrich':'^','Depletion':'v'},
                          top_annotation=col_ha,right_annotation=row_ha,
                          col_split=df_col.Dataset,row_split=df_row.Category, col_split_gap=0.5,row_split_gap=1,
                          show_rownames=True,show_colnames=False,row_dendrogram=False,
                          verbose=1,legend_gap=7) #if the size of dot in legend is too large, use alpha to control, for example: alpha=0.8
plt.savefig("dotHeatmap1.pdf",bbox_inches='tight')
plt.show()

Starting plotting..
Starting calculating row orders..
Reordering rows..
Starting calculating col orders..
Reordering cols..
Plotting matrix..
Inferred max_s (max size of scatter point) is: 180.09263168093761
Collecting legends..
Plotting legends..
Estimated legend width: 25.930555555555557 mm

../_images/notebooks_dotHeatmap_30_1.png

[29]:

plt.figure(figsize=(3.5, 5))
cm = DotClustermapPlotter(data=data, x='ID',y='Term',value='odds_ratio',s='-log10(Pval)',
                          hue='EnrichType', row_cluster=False,#cmap='jet',
                          colors={'Enrich':'red','Depletion':'blue'},c='-log10(Pval)',
                          cmap={'Enrich':'Reds','Depletion':'Blues'},
                          marker={'Enrich':'$\\ast$','Depletion':'X'},value_na=25,c_na=25,
                          top_annotation=col_ha,right_annotation=row_ha,
                          col_split=df_col.Dataset,row_split=df_row.Category, col_split_gap=0.5,row_split_gap=1,
                          show_rownames=True,verbose=1,legend_gap=7,dot_legend_marker='o')
# plt.savefig(os.path.expanduser("~/Gallery/20230227_kycg.pdf"),bbox_inches='tight')
plt.show()

Starting plotting..
Starting calculating row orders..
Reordering rows..
Starting calculating col orders..
Reordering cols..
Plotting matrix..
Inferred max_s (max size of scatter point) is: 180.09263168093761
Collecting legends..
Plotting legends..
Estimated legend width: 30.516666666666666 mm

../_images/notebooks_dotHeatmap_31_1.png

[30]:

data['-log10(Pval)'].describe()

[30]:

count    64.000000
mean     25.356918
std       3.632073
min       3.119186
25%      26.000000
50%      26.000000
75%      26.000000
max      26.000000
Name: -log10(Pval), dtype: float64

[31]:

plt.figure(figsize=(3.5, 4))
cm = DotClustermapPlotter(data=data, x='ID',y='Term',value='-log10(Pval)',c='-log10(Pval)',s='odds_ratio',
                          hue='EnrichType', row_cluster=False,col_cluster=False,cmap='jet',
                          colors={'Enrich':'red','Depletion':'blue'},
                          # marker={'Enrich':'P','Depletion':'*'},
                          value_na=25,c_na=25,
                          top_annotation=col_ha,right_annotation=row_ha,
                          col_split=df_col.Dataset,row_split=df_row.Category, col_split_gap=0.5,row_split_gap=1,
                          show_rownames=True,verbose=1,legend_gap=7,spines=True,dot_legend_marker='D')
plt.show()

Starting plotting..
Starting calculating row orders..
Reordering rows..
Starting calculating col orders..
Reordering cols..
Plotting matrix..
Inferred max_s (max size of scatter point) is: 110.41377726332676
Collecting legends..
Plotting legends..
Estimated legend width: 25.930555555555557 mm

../_images/notebooks_dotHeatmap_33_1.png

[ ]: