Visualizing categorical variables using oncoPrint

[1]:
import os,sys
# sys.path.append(os.path.expanduser("~/Projects/Github/PyComplexHeatmap/"))
from PyComplexHeatmap import *
use_pch_style() # or plt.style.use('default') to restore default style
import pandas as pd
import random
[2]:
# Create toy dataset
samples = [f'Sample_{i}' for i in range(1, 11)]
genes = [f'Gene_{i}' for i in range(1, 11)]

alts_lol = list()

for sample in samples:
    for gene in genes:
        amp_value = random.randint(0, 1)

        if amp_value == 0:
            del_value = random.randint(0, 1)

        else:
            del_value = 0

        if (amp_value == 0) & (del_value == 0):
            neut_value = 1

        else:
            neut_value = 0

        alts_lol.append([sample, gene, amp_value, neut_value, del_value])

alts_df = pd.DataFrame(alts_lol, columns=['sample', 'gene', 'amp', 'neut', 'del'])

# Prepare column annotations
annot_1_df = pd.DataFrame([[i, random.randint(0,100)] for i in samples], columns=['sample', 'annot1'])
annot_1_df.index = annot_1_df['sample']

annot_2_df = pd.DataFrame([[i, random.randint(500, 5000)] for i in samples], columns=['sample', 'annot2'])
annot_2_df.index = annot_2_df['sample']

annot_3_df = pd.DataFrame([[i, 'patient' + str(random.randint(1,5))] for i in samples],
                            columns=['sample', 'patient'])
annot_3_df.index = annot_3_df['sample']
[3]:
alts_df
[3]:
sample gene amp neut del
0 Sample_1 Gene_1 1 0 0
1 Sample_1 Gene_2 1 0 0
2 Sample_1 Gene_3 0 1 0
3 Sample_1 Gene_4 0 0 1
4 Sample_1 Gene_5 1 0 0
... ... ... ... ... ...
95 Sample_10 Gene_6 1 0 0
96 Sample_10 Gene_7 1 0 0
97 Sample_10 Gene_8 1 0 0
98 Sample_10 Gene_9 0 1 0
99 Sample_10 Gene_10 1 0 0

100 rows × 5 columns

[4]:
annot_3_df
[4]:
sample patient
sample
Sample_1 Sample_1 patient4
Sample_2 Sample_2 patient3
Sample_3 Sample_3 patient5
Sample_4 Sample_4 patient1
Sample_5 Sample_5 patient1
Sample_6 Sample_6 patient3
Sample_7 Sample_7 patient3
Sample_8 Sample_8 patient4
Sample_9 Sample_9 patient3
Sample_10 Sample_10 patient2
[5]:
annot_1_df
[5]:
sample annot1
sample
Sample_1 Sample_1 31
Sample_2 Sample_2 5
Sample_3 Sample_3 26
Sample_4 Sample_4 96
Sample_5 Sample_5 17
Sample_6 Sample_6 32
Sample_7 Sample_7 4
Sample_8 Sample_8 96
Sample_9 Sample_9 3
Sample_10 Sample_10 92
[6]:
annot_2_df
[6]:
sample annot2
sample
Sample_1 Sample_1 4870
Sample_2 Sample_2 4572
Sample_3 Sample_3 2378
Sample_4 Sample_4 4970
Sample_5 Sample_5 4662
Sample_6 Sample_6 1196
Sample_7 Sample_7 3065
Sample_8 Sample_8 2553
Sample_9 Sample_9 4390
Sample_10 Sample_10 4068
[7]:
top_annotation=HeatmapAnnotation(label=anno_label(annot_3_df.patient, merge=True,rotation=45),
                                 A3=anno_simple(annot_3_df['patient']),
                                 A1=anno_barplot(annot_1_df['annot1'],height=5),
                                 A2=anno_barplot(annot_2_df['annot2']))

# Plot oncoprint
plt.figure(figsize=(4,6))

op=oncoPrintPlotter(data=alts_df, y='gene', x='sample', values=['amp', 'neut', 'del'],
                    show_rownames=True, show_colnames=True, colors=['red', 'white', 'green'],
                    top_annotation=top_annotation, col_split=annot_3_df['patient'],
                    col_split_gap=0.2,width=0.8)
#width control the width of the bar in each cell
# there are other plot_kws, such as 'align'

# Remove the grid
# for annotation in op.top_annotation.annotations:
#     ax=annotation.ax
#     ax.grid(False)
#     #remove spines for top annotation and right annotation
#     despine(ax=ax,left=False, bottom=True, right=False, top=True)
#     despine(ax=ax,left=True, bottom=False, right=True, top=False)

# Remove the grid
for ax in op.top_annotation.axes.flatten():
    ax.grid(False)
    #remove spines for top annotation and right annotation
    despine(ax=ax,left=False, bottom=True, right=False, top=True)
    despine(ax=ax,left=True, bottom=False, right=True, top=False)
Starting plotting..
Starting calculating row orders..
Reordering rows..
Starting calculating col orders..
Reordering cols..
Plotting matrix..
Starting plotting HeatmapAnnotations
Collecting legends..
Collecting annotation legends..
Plotting legends..
Estimated legend width: 21.344444444444445 mm
../_images/notebooks_oncoPrint2_7_1.png