Visualizing categorical variables using oncoPrint

[1]:
from PyComplexHeatmap import *
import pandas as pd
import random
[3]:
# Create toy dataset
samples = [f'Sample_{i}' for i in range(1, 11)]
genes = [f'Gene_{i}' for i in range(1, 11)]

alts_lol = list()

for sample in samples:
    for gene in genes:
        amp_value = random.randint(0, 1)

        if amp_value == 0:
            del_value = random.randint(0, 1)

        else:
            del_value = 0

        if (amp_value == 0) & (del_value == 0):
            neut_value = 1

        else:
            neut_value = 0

        alts_lol.append([sample, gene, amp_value, neut_value, del_value])

alts_df = pd.DataFrame(alts_lol, columns=['sample', 'gene', 'amp', 'neut', 'del'])

# Prepare column annotations
annot_1_df = pd.DataFrame([[i, random.randint(0,100)] for i in samples], columns=['sample', 'annot1'])
annot_1_df.index = annot_1_df['sample']

annot_2_df = pd.DataFrame([[i, random.randint(500, 5000)] for i in samples], columns=['sample', 'annot2'])
annot_2_df.index = annot_2_df['sample']

annot_3_df = pd.DataFrame([[i, 'patient' + str(random.randint(1,5))] for i in samples],
                            columns=['sample', 'patient'])
annot_3_df.index = annot_3_df['sample']
[4]:
alts_df
[4]:
sample gene amp neut del
0 Sample_1 Gene_1 1 0 0
1 Sample_1 Gene_2 0 0 1
2 Sample_1 Gene_3 1 0 0
3 Sample_1 Gene_4 0 0 1
4 Sample_1 Gene_5 0 1 0
... ... ... ... ... ...
95 Sample_10 Gene_6 0 0 1
96 Sample_10 Gene_7 1 0 0
97 Sample_10 Gene_8 0 0 1
98 Sample_10 Gene_9 1 0 0
99 Sample_10 Gene_10 1 0 0

100 rows × 5 columns

[9]:
annot_3_df
[9]:
sample patient
sample
Sample_1 Sample_1 patient2
Sample_2 Sample_2 patient5
Sample_3 Sample_3 patient3
Sample_4 Sample_4 patient4
Sample_5 Sample_5 patient1
Sample_6 Sample_6 patient4
Sample_7 Sample_7 patient5
Sample_8 Sample_8 patient5
Sample_9 Sample_9 patient5
Sample_10 Sample_10 patient4
[10]:
annot_1_df
[10]:
sample annot1
sample
Sample_1 Sample_1 77
Sample_2 Sample_2 89
Sample_3 Sample_3 14
Sample_4 Sample_4 78
Sample_5 Sample_5 80
Sample_6 Sample_6 27
Sample_7 Sample_7 32
Sample_8 Sample_8 61
Sample_9 Sample_9 19
Sample_10 Sample_10 81
[11]:
annot_2_df
[11]:
sample annot2
sample
Sample_1 Sample_1 1114
Sample_2 Sample_2 2698
Sample_3 Sample_3 4718
Sample_4 Sample_4 4633
Sample_5 Sample_5 3444
Sample_6 Sample_6 1984
Sample_7 Sample_7 2088
Sample_8 Sample_8 2316
Sample_9 Sample_9 2939
Sample_10 Sample_10 1373
[8]:
top_annotation=HeatmapAnnotation(label=anno_label(annot_3_df.patient, merge=True,rotation=45),
                                 A3=anno_simple(annot_3_df['patient']),
                                 A1=anno_barplot(annot_1_df['annot1'],height=5),
                                 A2=anno_barplot(annot_2_df['annot2']))

# Plot oncoprint
plt.figure(figsize=(4,6))

op=oncoPrintPlotter(data=alts_df, y='gene', x='sample', values=['amp', 'neut', 'del'],
                    show_rownames=True, show_colnames=True, colors=['red', 'white', 'green'],
                    top_annotation=top_annotation, col_split=annot_3_df['patient'],
                    col_split_gap=0.2,width=0.8)
#width control the width of the bar in each cell
# there are other plot_kws, such as 'align'

# Remove the grid
# for annotation in op.top_annotation.annotations:
#     ax=annotation.ax
#     ax.grid(False)
#     #remove spines for top annotation and right annotation
#     despine(ax=ax,left=False, bottom=True, right=False, top=True)
#     despine(ax=ax,left=True, bottom=False, right=True, top=False)

# Remove the grid
for ax in op.top_annotation.axes.flatten():
    ax.grid(False)
    #remove spines for top annotation and right annotation
    despine(ax=ax,left=False, bottom=True, right=False, top=True)
    despine(ax=ax,left=True, bottom=False, right=True, top=False)
Starting plotting..
Starting calculating row orders..
Reordering rows..
Starting calculating col orders..
Reordering cols..
Plotting matrix..
Starting plotting HeatmapAnnotations
Collecting legends..
Collecting annotation legends..
Plotting legends..
../_images/notebooks_oncoPrint2_7_1.png