Visualizing categorical variables using oncoPrint¶
[1]:
from PyComplexHeatmap import *
import pandas as pd
import random
[3]:
# Create toy dataset
samples = [f'Sample_{i}' for i in range(1, 11)]
genes = [f'Gene_{i}' for i in range(1, 11)]
alts_lol = list()
for sample in samples:
for gene in genes:
amp_value = random.randint(0, 1)
if amp_value == 0:
del_value = random.randint(0, 1)
else:
del_value = 0
if (amp_value == 0) & (del_value == 0):
neut_value = 1
else:
neut_value = 0
alts_lol.append([sample, gene, amp_value, neut_value, del_value])
alts_df = pd.DataFrame(alts_lol, columns=['sample', 'gene', 'amp', 'neut', 'del'])
# Prepare column annotations
annot_1_df = pd.DataFrame([[i, random.randint(0,100)] for i in samples], columns=['sample', 'annot1'])
annot_1_df.index = annot_1_df['sample']
annot_2_df = pd.DataFrame([[i, random.randint(500, 5000)] for i in samples], columns=['sample', 'annot2'])
annot_2_df.index = annot_2_df['sample']
annot_3_df = pd.DataFrame([[i, 'patient' + str(random.randint(1,5))] for i in samples],
columns=['sample', 'patient'])
annot_3_df.index = annot_3_df['sample']
[4]:
alts_df
[4]:
sample | gene | amp | neut | del | |
---|---|---|---|---|---|
0 | Sample_1 | Gene_1 | 1 | 0 | 0 |
1 | Sample_1 | Gene_2 | 0 | 0 | 1 |
2 | Sample_1 | Gene_3 | 1 | 0 | 0 |
3 | Sample_1 | Gene_4 | 0 | 0 | 1 |
4 | Sample_1 | Gene_5 | 0 | 1 | 0 |
... | ... | ... | ... | ... | ... |
95 | Sample_10 | Gene_6 | 0 | 0 | 1 |
96 | Sample_10 | Gene_7 | 1 | 0 | 0 |
97 | Sample_10 | Gene_8 | 0 | 0 | 1 |
98 | Sample_10 | Gene_9 | 1 | 0 | 0 |
99 | Sample_10 | Gene_10 | 1 | 0 | 0 |
100 rows × 5 columns
[9]:
annot_3_df
[9]:
sample | patient | |
---|---|---|
sample | ||
Sample_1 | Sample_1 | patient2 |
Sample_2 | Sample_2 | patient5 |
Sample_3 | Sample_3 | patient3 |
Sample_4 | Sample_4 | patient4 |
Sample_5 | Sample_5 | patient1 |
Sample_6 | Sample_6 | patient4 |
Sample_7 | Sample_7 | patient5 |
Sample_8 | Sample_8 | patient5 |
Sample_9 | Sample_9 | patient5 |
Sample_10 | Sample_10 | patient4 |
[10]:
annot_1_df
[10]:
sample | annot1 | |
---|---|---|
sample | ||
Sample_1 | Sample_1 | 77 |
Sample_2 | Sample_2 | 89 |
Sample_3 | Sample_3 | 14 |
Sample_4 | Sample_4 | 78 |
Sample_5 | Sample_5 | 80 |
Sample_6 | Sample_6 | 27 |
Sample_7 | Sample_7 | 32 |
Sample_8 | Sample_8 | 61 |
Sample_9 | Sample_9 | 19 |
Sample_10 | Sample_10 | 81 |
[11]:
annot_2_df
[11]:
sample | annot2 | |
---|---|---|
sample | ||
Sample_1 | Sample_1 | 1114 |
Sample_2 | Sample_2 | 2698 |
Sample_3 | Sample_3 | 4718 |
Sample_4 | Sample_4 | 4633 |
Sample_5 | Sample_5 | 3444 |
Sample_6 | Sample_6 | 1984 |
Sample_7 | Sample_7 | 2088 |
Sample_8 | Sample_8 | 2316 |
Sample_9 | Sample_9 | 2939 |
Sample_10 | Sample_10 | 1373 |
[8]:
top_annotation=HeatmapAnnotation(label=anno_label(annot_3_df.patient, merge=True,rotation=45),
A3=anno_simple(annot_3_df['patient']),
A1=anno_barplot(annot_1_df['annot1'],height=5),
A2=anno_barplot(annot_2_df['annot2']))
# Plot oncoprint
plt.figure(figsize=(4,6))
op=oncoPrintPlotter(data=alts_df, y='gene', x='sample', values=['amp', 'neut', 'del'],
show_rownames=True, show_colnames=True, colors=['red', 'white', 'green'],
top_annotation=top_annotation, col_split=annot_3_df['patient'],
col_split_gap=0.2,width=0.8)
#width control the width of the bar in each cell
# there are other plot_kws, such as 'align'
# Remove the grid
# for annotation in op.top_annotation.annotations:
# ax=annotation.ax
# ax.grid(False)
# #remove spines for top annotation and right annotation
# despine(ax=ax,left=False, bottom=True, right=False, top=True)
# despine(ax=ax,left=True, bottom=False, right=True, top=False)
# Remove the grid
for ax in op.top_annotation.axes.flatten():
ax.grid(False)
#remove spines for top annotation and right annotation
despine(ax=ax,left=False, bottom=True, right=False, top=True)
despine(ax=ax,left=True, bottom=False, right=True, top=False)
Starting plotting..
Starting calculating row orders..
Reordering rows..
Starting calculating col orders..
Reordering cols..
Plotting matrix..
Starting plotting HeatmapAnnotations
Collecting legends..
Collecting annotation legends..
Plotting legends..