Tutorial 2: Testing this out. Did we need a title afterall?
To demonstrate some of the main functionalities of CAJAL, here we perform some basic analysis on a set of neuron …. etc
[9]:
!python -m pip uninstall immunopheno --yes
WARNING: Skipping immunopheno as it is not installed.
[10]:
!pip install git+https://github.com/CamaraLab/ImmunoPheno.git@package-dev --q
[2]:
pip show immunopheno
Name: ImmunoPheno
Version: 0.1.0
Summary: My package description
Home-page:
Author:
Author-email: Pablo Cámara <pcamara@pennmedicine.upenn.edu>
License: MIT
Location: /opt/conda/lib/python3.10/site-packages
Requires: matplotlib, numpy, pandas, plotly, pytest-cov, pytest-mock, scipy, seaborn, statsmodels, umap-learn
Required-by:
Note: you may need to restart the kernel to use updated packages.
[11]:
import pandas as pd
from immunopheno.data_processing import ImmunoPhenoData
from immunopheno.plots import plot_UMAP
[25]:
# Load titration data into pandas
titr_protein = pd.read_csv('/home/jovyan/Dropbox/Projects/PGC019_ITCR/Analyses/PGC019.a1/ADT_count_titration_128.csv', sep=',', index_col=[0])
titr_rna = pd.read_csv('/home/jovyan/Dropbox/Projects/PGC019_ITCR/Analyses/PGC019.a1/RNA_count_titration.csv', sep=',', index_col=[0])
[26]:
# Cell annotations (if you have them, if not - skip this step)
titr_annotations = pd.read_csv('/home/jovyan/Dropbox/Projects/PGC019_ITCR/Analyses/PGC019.a1/titration_cluster_labels_updated.csv', sep=',', header=None)
[33]:
titr_annotations
[33]:
| 0 | 1 | |
|---|---|---|
| 0 | AAACCTGAGATCGATA.1 | CD4T |
| 1 | AAACCTGAGGTGCTAG.1 | NK |
| 2 | AAACCTGAGTGTCTCA.1 | CD4T |
| 3 | AAACCTGAGTTAACGA.1 | CD4T |
| 4 | AAACCTGCACCGAATT.1 | CD8T |
| ... | ... | ... |
| 4597 | TTTGTCAGTGTATGGG.1 | NK |
| 4598 | TTTGTCAGTTGTGGCC.1 | CD4T |
| 4599 | TTTGTCATCAGGCGAA.1 | CD8T |
| 4600 | TTTGTCATCAGTGCAT.1 | CD4T |
| 4601 | TTTGTCATCTTATCTG.1 | CD4T |
4602 rows × 2 columns
[27]:
# Create ImmunoPhenoData object
titr_IPD = ImmunoPhenoData(protein_matrix = titr_protein, gene_matrix = titr_rna, cell_labels = titr_annotations)
[34]:
titr_IPD._cell_labels
[34]:
| 1 | |
|---|---|
| 0 | |
| AAACCTGAGATCGATA.1 | CD4T |
| AAACCTGAGGTGCTAG.1 | NK |
| AAACCTGAGTGTCTCA.1 | CD4T |
| AAACCTGAGTTAACGA.1 | CD4T |
| AAACCTGCACCGAATT.1 | CD8T |
| ... | ... |
| TTTGTCAGTGTATGGG.1 | NK |
| TTTGTCAGTTGTGGCC.1 | CD4T |
| TTTGTCATCAGGCGAA.1 | CD8T |
| TTTGTCATCAGTGCAT.1 | CD4T |
| TTTGTCATCTTATCTG.1 | CD4T |
4602 rows × 1 columns
[28]:
# Fit all antibodies
ab_fits = titr_IPD.fit_all_antibodies(model = "nb")
[29]:
# Normalize all antibodies
normalized_counts = titr_IPD.normalize_all_antibodies(p_threshold=1, sig_expr_threshold=1, bg_expr_threshold=0)
WARNING:root: 1 cells with 0% or 100% expression have been automatically filtered out.
[30]:
normalized_counts
[30]:
| CD86 | CD11b | CD155 | CD47 | CD70 | CD30 | CD40 | CD154 | CD52 | CD3 | ... | CD85j | CD23 | Ig_light_chain_lambda | GARP | CD328 | CD82 | CD101 | CD360 | CD88 | CD224 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| AAACCTGAGATCGATA.1 | -10.0 | -10.000000 | -10.000000 | -0.460284 | -10.000000 | -10.000000 | -10.000000 | -10.0 | -0.174112 | -1.093664 | ... | -10.0 | -10.000000 | -0.199029 | -10.0 | -10.000000 | 0.668037 | -10.000000 | -10.0 | -10.0 | -0.316886 |
| AAACCTGAGGTGCTAG.1 | -10.0 | -0.301102 | -10.000000 | -10.000000 | -10.000000 | 0.956764 | -10.000000 | -10.0 | -10.000000 | -10.000000 | ... | -10.0 | -10.000000 | -10.000000 | -10.0 | -0.804102 | -0.641263 | -10.000000 | -10.0 | -10.0 | -10.000000 |
| AAACCTGAGTGTCTCA.1 | -10.0 | -10.000000 | -10.000000 | -10.000000 | -10.000000 | -10.000000 | -10.000000 | -10.0 | -1.010289 | -10.000000 | ... | -10.0 | -0.151787 | -10.000000 | -10.0 | -10.000000 | -10.000000 | -10.000000 | -10.0 | -10.0 | -10.000000 |
| AAACCTGAGTTAACGA.1 | -10.0 | 0.107471 | -0.305559 | 2.425378 | -0.209423 | 0.050867 | -0.444389 | -10.0 | 0.515340 | 1.389753 | ... | -10.0 | -10.000000 | -0.188508 | -10.0 | -0.900573 | 0.177272 | 0.123500 | -10.0 | -10.0 | -0.133478 |
| AAACCTGCACCGAATT.1 | -10.0 | -10.000000 | -10.000000 | -0.600946 | -10.000000 | -10.000000 | -10.000000 | -10.0 | -0.964027 | -0.825876 | ... | -10.0 | -10.000000 | -10.000000 | -10.0 | -10.000000 | -10.000000 | -10.000000 | -10.0 | -10.0 | -0.416079 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| TTTGTCAGTGTATGGG.1 | -10.0 | -0.337266 | -10.000000 | -10.000000 | -10.000000 | -10.000000 | -10.000000 | -10.0 | -10.000000 | -10.000000 | ... | -10.0 | -10.000000 | -10.000000 | -10.0 | -0.611211 | -10.000000 | -10.000000 | -10.0 | -10.0 | -10.000000 |
| TTTGTCAGTTGTGGCC.1 | -10.0 | -10.000000 | -10.000000 | 0.038870 | -10.000000 | -10.000000 | -10.000000 | -10.0 | -0.060641 | -0.368290 | ... | -10.0 | -10.000000 | -10.000000 | -10.0 | -10.000000 | -0.002836 | -10.000000 | -10.0 | -10.0 | -0.271702 |
| TTTGTCATCAGGCGAA.1 | -10.0 | -10.000000 | -10.000000 | -10.000000 | -10.000000 | -10.000000 | -10.000000 | -10.0 | -10.000000 | -10.000000 | ... | -10.0 | -10.000000 | -0.221565 | -10.0 | -10.000000 | -10.000000 | -0.312163 | -10.0 | -10.0 | -10.000000 |
| TTTGTCATCAGTGCAT.1 | -10.0 | -10.000000 | -0.760652 | -0.526684 | -10.000000 | -0.526821 | -10.000000 | -10.0 | 0.519523 | -0.650985 | ... | -10.0 | -10.000000 | -0.199306 | -10.0 | -10.000000 | 0.666830 | -10.000000 | -10.0 | -10.0 | 0.807605 |
| TTTGTCATCTTATCTG.1 | -10.0 | -10.000000 | -10.000000 | -10.000000 | -10.000000 | -10.000000 | -10.000000 | -10.0 | -10.000000 | -10.000000 | ... | -10.0 | -0.473411 | -10.000000 | -10.0 | -0.900560 | -10.000000 | -10.000000 | -10.0 | -10.0 | -10.000000 |
4601 rows × 128 columns
[31]:
# Plot UMAP
plot_UMAP(titr_IPD, normalized=False)
[32]:
plot_UMAP(titr_IPD, normalized=True)