Visualizing categorical variables using oncoPrint¶
[1]:
import os,sys
# sys.path.append(os.path.expanduser("~/Projects/Github/PyComplexHeatmap/"))
from PyComplexHeatmap import *
use_pch_style() # or plt.style.use('default') to restore default style
import pandas as pd
import random
[2]:
# Create toy dataset
samples = [f'Sample_{i}' for i in range(1, 11)]
genes = [f'Gene_{i}' for i in range(1, 11)]
alts_lol = list()
for sample in samples:
for gene in genes:
amp_value = random.randint(0, 1)
if amp_value == 0:
del_value = random.randint(0, 1)
else:
del_value = 0
if (amp_value == 0) & (del_value == 0):
neut_value = 1
else:
neut_value = 0
alts_lol.append([sample, gene, amp_value, neut_value, del_value])
alts_df = pd.DataFrame(alts_lol, columns=['sample', 'gene', 'amp', 'neut', 'del'])
# Prepare column annotations
annot_1_df = pd.DataFrame([[i, random.randint(0,100)] for i in samples], columns=['sample', 'annot1'])
annot_1_df.index = annot_1_df['sample']
annot_2_df = pd.DataFrame([[i, random.randint(500, 5000)] for i in samples], columns=['sample', 'annot2'])
annot_2_df.index = annot_2_df['sample']
annot_3_df = pd.DataFrame([[i, 'patient' + str(random.randint(1,5))] for i in samples],
columns=['sample', 'patient'])
annot_3_df.index = annot_3_df['sample']
[3]:
alts_df
[3]:
sample | gene | amp | neut | del | |
---|---|---|---|---|---|
0 | Sample_1 | Gene_1 | 1 | 0 | 0 |
1 | Sample_1 | Gene_2 | 1 | 0 | 0 |
2 | Sample_1 | Gene_3 | 0 | 1 | 0 |
3 | Sample_1 | Gene_4 | 0 | 0 | 1 |
4 | Sample_1 | Gene_5 | 1 | 0 | 0 |
... | ... | ... | ... | ... | ... |
95 | Sample_10 | Gene_6 | 1 | 0 | 0 |
96 | Sample_10 | Gene_7 | 1 | 0 | 0 |
97 | Sample_10 | Gene_8 | 1 | 0 | 0 |
98 | Sample_10 | Gene_9 | 0 | 1 | 0 |
99 | Sample_10 | Gene_10 | 1 | 0 | 0 |
100 rows × 5 columns
[4]:
annot_3_df
[4]:
sample | patient | |
---|---|---|
sample | ||
Sample_1 | Sample_1 | patient4 |
Sample_2 | Sample_2 | patient3 |
Sample_3 | Sample_3 | patient5 |
Sample_4 | Sample_4 | patient1 |
Sample_5 | Sample_5 | patient1 |
Sample_6 | Sample_6 | patient3 |
Sample_7 | Sample_7 | patient3 |
Sample_8 | Sample_8 | patient4 |
Sample_9 | Sample_9 | patient3 |
Sample_10 | Sample_10 | patient2 |
[5]:
annot_1_df
[5]:
sample | annot1 | |
---|---|---|
sample | ||
Sample_1 | Sample_1 | 31 |
Sample_2 | Sample_2 | 5 |
Sample_3 | Sample_3 | 26 |
Sample_4 | Sample_4 | 96 |
Sample_5 | Sample_5 | 17 |
Sample_6 | Sample_6 | 32 |
Sample_7 | Sample_7 | 4 |
Sample_8 | Sample_8 | 96 |
Sample_9 | Sample_9 | 3 |
Sample_10 | Sample_10 | 92 |
[6]:
annot_2_df
[6]:
sample | annot2 | |
---|---|---|
sample | ||
Sample_1 | Sample_1 | 4870 |
Sample_2 | Sample_2 | 4572 |
Sample_3 | Sample_3 | 2378 |
Sample_4 | Sample_4 | 4970 |
Sample_5 | Sample_5 | 4662 |
Sample_6 | Sample_6 | 1196 |
Sample_7 | Sample_7 | 3065 |
Sample_8 | Sample_8 | 2553 |
Sample_9 | Sample_9 | 4390 |
Sample_10 | Sample_10 | 4068 |
[7]:
top_annotation=HeatmapAnnotation(label=anno_label(annot_3_df.patient, merge=True,rotation=45),
A3=anno_simple(annot_3_df['patient']),
A1=anno_barplot(annot_1_df['annot1'],height=5),
A2=anno_barplot(annot_2_df['annot2']))
# Plot oncoprint
plt.figure(figsize=(4,6))
op=oncoPrintPlotter(data=alts_df, y='gene', x='sample', values=['amp', 'neut', 'del'],
show_rownames=True, show_colnames=True, colors=['red', 'white', 'green'],
top_annotation=top_annotation, col_split=annot_3_df['patient'],
col_split_gap=0.2,width=0.8)
#width control the width of the bar in each cell
# there are other plot_kws, such as 'align'
# Remove the grid
# for annotation in op.top_annotation.annotations:
# ax=annotation.ax
# ax.grid(False)
# #remove spines for top annotation and right annotation
# despine(ax=ax,left=False, bottom=True, right=False, top=True)
# despine(ax=ax,left=True, bottom=False, right=True, top=False)
# Remove the grid
for ax in op.top_annotation.axes.flatten():
ax.grid(False)
#remove spines for top annotation and right annotation
despine(ax=ax,left=False, bottom=True, right=False, top=True)
despine(ax=ax,left=True, bottom=False, right=True, top=False)
Starting plotting..
Starting calculating row orders..
Reordering rows..
Starting calculating col orders..
Reordering cols..
Plotting matrix..
Starting plotting HeatmapAnnotations
Collecting legends..
Collecting annotation legends..
Plotting legends..
Estimated legend width: 21.344444444444445 mm