Tutorial 2: Testing this out. Did we need a title afterall?

To demonstrate some of the main functionalities of CAJAL, here we perform some basic analysis on a set of neuron …. etc

[9]:
!python -m pip uninstall immunopheno --yes
WARNING: Skipping immunopheno as it is not installed.

[10]:
!pip install git+https://github.com/CamaraLab/ImmunoPheno.git@package-dev --q
[2]:
pip show immunopheno
Name: ImmunoPheno
Version: 0.1.0
Summary: My package description
Home-page:
Author:
Author-email: Pablo Cámara <pcamara@pennmedicine.upenn.edu>
License: MIT
Location: /opt/conda/lib/python3.10/site-packages
Requires: matplotlib, numpy, pandas, plotly, pytest-cov, pytest-mock, scipy, seaborn, statsmodels, umap-learn
Required-by:
Note: you may need to restart the kernel to use updated packages.
[11]:
import pandas as pd
from immunopheno.data_processing import ImmunoPhenoData
from immunopheno.plots import plot_UMAP
[25]:
# Load titration data into pandas
titr_protein = pd.read_csv('/home/jovyan/Dropbox/Projects/PGC019_ITCR/Analyses/PGC019.a1/ADT_count_titration_128.csv', sep=',', index_col=[0])
titr_rna = pd.read_csv('/home/jovyan/Dropbox/Projects/PGC019_ITCR/Analyses/PGC019.a1/RNA_count_titration.csv', sep=',', index_col=[0])
[26]:
# Cell annotations (if you have them, if not - skip this step)
titr_annotations = pd.read_csv('/home/jovyan/Dropbox/Projects/PGC019_ITCR/Analyses/PGC019.a1/titration_cluster_labels_updated.csv', sep=',', header=None)
[33]:
titr_annotations
[33]:
0 1
0 AAACCTGAGATCGATA.1 CD4T
1 AAACCTGAGGTGCTAG.1 NK
2 AAACCTGAGTGTCTCA.1 CD4T
3 AAACCTGAGTTAACGA.1 CD4T
4 AAACCTGCACCGAATT.1 CD8T
... ... ...
4597 TTTGTCAGTGTATGGG.1 NK
4598 TTTGTCAGTTGTGGCC.1 CD4T
4599 TTTGTCATCAGGCGAA.1 CD8T
4600 TTTGTCATCAGTGCAT.1 CD4T
4601 TTTGTCATCTTATCTG.1 CD4T

4602 rows × 2 columns

[27]:
# Create ImmunoPhenoData object
titr_IPD = ImmunoPhenoData(protein_matrix = titr_protein, gene_matrix = titr_rna, cell_labels = titr_annotations)
[34]:
titr_IPD._cell_labels
[34]:
1
0
AAACCTGAGATCGATA.1 CD4T
AAACCTGAGGTGCTAG.1 NK
AAACCTGAGTGTCTCA.1 CD4T
AAACCTGAGTTAACGA.1 CD4T
AAACCTGCACCGAATT.1 CD8T
... ...
TTTGTCAGTGTATGGG.1 NK
TTTGTCAGTTGTGGCC.1 CD4T
TTTGTCATCAGGCGAA.1 CD8T
TTTGTCATCAGTGCAT.1 CD4T
TTTGTCATCTTATCTG.1 CD4T

4602 rows × 1 columns

[28]:
# Fit all antibodies
ab_fits = titr_IPD.fit_all_antibodies(model = "nb")
[29]:
# Normalize all antibodies
normalized_counts = titr_IPD.normalize_all_antibodies(p_threshold=1, sig_expr_threshold=1, bg_expr_threshold=0)
WARNING:root: 1 cells with 0% or 100% expression have been automatically filtered out.
[30]:
normalized_counts
[30]:
CD86 CD11b CD155 CD47 CD70 CD30 CD40 CD154 CD52 CD3 ... CD85j CD23 Ig_light_chain_lambda GARP CD328 CD82 CD101 CD360 CD88 CD224
AAACCTGAGATCGATA.1 -10.0 -10.000000 -10.000000 -0.460284 -10.000000 -10.000000 -10.000000 -10.0 -0.174112 -1.093664 ... -10.0 -10.000000 -0.199029 -10.0 -10.000000 0.668037 -10.000000 -10.0 -10.0 -0.316886
AAACCTGAGGTGCTAG.1 -10.0 -0.301102 -10.000000 -10.000000 -10.000000 0.956764 -10.000000 -10.0 -10.000000 -10.000000 ... -10.0 -10.000000 -10.000000 -10.0 -0.804102 -0.641263 -10.000000 -10.0 -10.0 -10.000000
AAACCTGAGTGTCTCA.1 -10.0 -10.000000 -10.000000 -10.000000 -10.000000 -10.000000 -10.000000 -10.0 -1.010289 -10.000000 ... -10.0 -0.151787 -10.000000 -10.0 -10.000000 -10.000000 -10.000000 -10.0 -10.0 -10.000000
AAACCTGAGTTAACGA.1 -10.0 0.107471 -0.305559 2.425378 -0.209423 0.050867 -0.444389 -10.0 0.515340 1.389753 ... -10.0 -10.000000 -0.188508 -10.0 -0.900573 0.177272 0.123500 -10.0 -10.0 -0.133478
AAACCTGCACCGAATT.1 -10.0 -10.000000 -10.000000 -0.600946 -10.000000 -10.000000 -10.000000 -10.0 -0.964027 -0.825876 ... -10.0 -10.000000 -10.000000 -10.0 -10.000000 -10.000000 -10.000000 -10.0 -10.0 -0.416079
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
TTTGTCAGTGTATGGG.1 -10.0 -0.337266 -10.000000 -10.000000 -10.000000 -10.000000 -10.000000 -10.0 -10.000000 -10.000000 ... -10.0 -10.000000 -10.000000 -10.0 -0.611211 -10.000000 -10.000000 -10.0 -10.0 -10.000000
TTTGTCAGTTGTGGCC.1 -10.0 -10.000000 -10.000000 0.038870 -10.000000 -10.000000 -10.000000 -10.0 -0.060641 -0.368290 ... -10.0 -10.000000 -10.000000 -10.0 -10.000000 -0.002836 -10.000000 -10.0 -10.0 -0.271702
TTTGTCATCAGGCGAA.1 -10.0 -10.000000 -10.000000 -10.000000 -10.000000 -10.000000 -10.000000 -10.0 -10.000000 -10.000000 ... -10.0 -10.000000 -0.221565 -10.0 -10.000000 -10.000000 -0.312163 -10.0 -10.0 -10.000000
TTTGTCATCAGTGCAT.1 -10.0 -10.000000 -0.760652 -0.526684 -10.000000 -0.526821 -10.000000 -10.0 0.519523 -0.650985 ... -10.0 -10.000000 -0.199306 -10.0 -10.000000 0.666830 -10.000000 -10.0 -10.0 0.807605
TTTGTCATCTTATCTG.1 -10.0 -10.000000 -10.000000 -10.000000 -10.000000 -10.000000 -10.000000 -10.0 -10.000000 -10.000000 ... -10.0 -0.473411 -10.000000 -10.0 -0.900560 -10.000000 -10.000000 -10.0 -10.0 -10.000000

4601 rows × 128 columns

[31]:
# Plot UMAP
plot_UMAP(titr_IPD, normalized=False)
[32]:
plot_UMAP(titr_IPD, normalized=True)