sigc tutorials
Contents
sigc tutorials
[86]:
import sigc
import scanpy as sc
import anndata as ad
import pandas as pd
import numpy as np
import gseapy
import warnings
warnings.filterwarnings("ignore")
[15]:
adata = ad.read_h5ad("kidney_tumors_convert_symbol.h5ad")
[4]:
kegg_metabolism = sigc.metabolism_sigs(resources='KEGG')
display(kegg_metabolism.head(5))
| name | description | member | |
|---|---|---|---|
| 0 | Glycolysis / Gluconeogenesis | 00010 | HK3 |
| 1 | Glycolysis / Gluconeogenesis | 00010 | HK1 |
| 2 | Glycolysis / Gluconeogenesis | 00010 | HK2 |
| 3 | Glycolysis / Gluconeogenesis | 00010 | HKDC1 |
| 4 | Glycolysis / Gluconeogenesis | 00010 | GCK |
[17]:
df = adata.to_df()
print(df.shape)
sig_mtx = sigc.sigc_score(df, kegg_metabolism, method="AUCell")
(4636, 32922)
[18]:
adata
[18]:
AnnData object with n_obs × n_vars = 4636 × 32922
obs: 'orig.ident', 'nCount_RNA', 'nFeature_RNA', 'seurat_clusters', 'mtFrac', 'age', 'CnT', 'DTLH', 'UBCD', 'SSBpr', 'End', 'MSC', 'RVCSB', 'SSBpod', 'SSBm.d', 'ICa', 'ErPrT', 'ICb', 'NPC', 'Pod', 'author_cell_type', 'cell_type_ontology_term_id', 'disease_ontology_term_id', 'self_reported_ethnicity_ontology_term_id', 'development_stage_ontology_term_id', 'tissue_ontology_term_id', 'sex_ontology_term_id', 'organism_ontology_term_id', 'is_primary_data', 'assay_ontology_term_id', 'donor_id', 'suspension_type', 'cell_type', 'assay', 'disease', 'organism', 'sex', 'tissue', 'self_reported_ethnicity', 'development_stage'
var: 'vst.mean', 'vst.variance', 'vst.variance.expected', 'vst.variance.standardized', 'vst.variable', 'feature_is_filtered', 'feature_name', 'feature_reference', 'feature_biotype', 'external_gene_name'
obsm: 'X_pca', 'X_umap'
[19]:
sig_mtx.shape
[19]:
(4636, 85)
[20]:
#adata.obsm["umap"] = adata.obsm["X_bothumap"]
#adata.obsm["tsne"] = adata.obsm["X_bothtsne"]
sig_adata = ad.AnnData(sig_mtx, obs=adata.obs, obsm=adata.obsm)
[21]:
sig_mtx.head(2)
[21]:
| Regulon | Glycolysis / Gluconeogenesis | Citrate cycle (TCA cycle) | Pentose phosphate pathway | Pentose and glucuronate interconversions | Fructose and mannose metabolism | Galactose metabolism | Ascorbate and aldarate metabolism | Starch and sucrose metabolism | Amino sugar and nucleotide sugar metabolism | Pyruvate metabolism | ... | One carbon pool by folate | Retinol metabolism | Porphyrin and chlorophyll metabolism | Ubiquinone and other terpenoid-quinone biosynthesis | Terpenoid backbone biosynthesis | Caffeine metabolism | Neomycin, kanamycin and gentamicin biosynthesis | Metabolism of xenobiotics by cytochrome P450 | Drug metabolism - cytochrome P450 | Drug metabolism - other enzymes |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Cell | |||||||||||||||||||||
| NB8350521_AAACCCAAGTGATGGC | 0.999293 | 0.999311 | 0.999332 | 0.999373 | 0.999373 | 0.999392 | 0.999368 | 0.999392 | 0.999379 | 0.999268 | ... | 0.999332 | 0.999373 | 0.999392 | 0.999392 | 0.999337 | 0.999392 | 0.999392 | 0.999322 | 0.999336 | 0.999303 |
| NB8350521_AAACGAAGTATTGAGA | 0.999284 | 0.999392 | 0.999352 | 0.999373 | 0.999334 | 0.999392 | 0.999392 | 0.999392 | 0.999379 | 0.999330 | ... | 0.999362 | 0.999373 | 0.999377 | 0.999392 | 0.999392 | 0.999392 | 0.999392 | 0.999375 | 0.999374 | 0.999360 |
2 rows × 85 columns
[29]:
import matplotlib.pyplot as plt
fig = plt.figure(figsize=(8,5),constrained_layout=True)
ax = fig.add_gridspec(top=0.75, right=0.75).subplots()
sc.pl.highest_expr_genes(sig_adata, n_top=20, ax=ax)
Visualization
scanpy visualization methods are convenient
[30]:
sc.pl.umap(sig_adata, color='Glycolysis / Gluconeogenesis')
[37]:
sc.tl.rank_genes_groups(sig_adata, 'cell_type', method='wilcoxon')
[53]:
celltypes = sig_adata.obs["cell_type"].unique().to_list()
celltypes
[53]:
['epithelial cell of nephron',
'mesenchymal stem cell',
'leukocyte',
'native cell']
[77]:
#import matplotlib.pyplot as plt
#fig = plt.figure(figsize=(8,9),constrained_layout=True)
#ax = fig.add_gridspec(nrows=2, ncols=2, top=0.8, right=0.8).subplots()
#fig, ax = plt.subplots(1,1, figsize=(4,4),constrained_layout=True)
sc.pl.rank_genes_groups(sig_adata, groups=[celltypes[0]], n_genes=15, show=False)
[78]:
pd.DataFrame(sig_adata.uns['rank_genes_groups']['names']).head(5)
[78]:
| epithelial cell of nephron | leukocyte | mesenchymal stem cell | native cell | |
|---|---|---|---|---|
| 0 | Arachidonic acid metabolism | Retinol metabolism | Steroid biosynthesis | Phenylalanine metabolism |
| 1 | Histidine metabolism | One carbon pool by folate | Arginine and proline metabolism | Oxidative phosphorylation |
| 2 | Ascorbate and aldarate metabolism | Glycine, serine and threonine metabolism | Glutathione metabolism | Tyrosine metabolism |
| 3 | Glycosphingolipid biosynthesis - ganglio series | Propanoate metabolism | Drug metabolism - other enzymes | Fructose and mannose metabolism |
| 4 | Valine, leucine and isoleucine biosynthesis | Taurine and hypotaurine metabolism | Drug metabolism - cytochrome P450 | Arachidonic acid metabolism |
[94]:
marker_metabolisms = list(set(pd.DataFrame(sig_adata.uns['rank_genes_groups']['names']).head(5).values.ravel()))
[100]:
sc.pl.violin(sig_adata, marker_metabolisms[0], groupby='cell_type', ncols=5)
[107]:
fig, ax = plt.subplots(1,1, figsize=(9,6),constrained_layout=True)
sc.pl.stacked_violin(sig_adata, marker_metabolisms, groupby='cell_type', rotation=45, ax=ax);
[108]:
sc.pl.dotplot(sig_adata, marker_metabolisms, groupby='cell_type')
[ ]:
[19]:
help(sigc)
Help on package sigc:
NAME
sigc
PACKAGE CONTENTS
core
FUNCTIONS
genesets2GeneSig(df: pandas.core.frame.DataFrame) -> Sequence[Type[ctxcore.genesig.GeneSignature]]
Conver dataframe to GeneSig for AUCell.
:param df: A dataframe with columns ["name", "member", "description"].
name member description
signature1 gene1 signature1 description
signature1 gene2 signature1 description
signature2 gene4 signature2 description
:return: GeneSignature list.
metabolism_sigs(resources='KEGG') -> pandas.core.frame.DataFrame
Get a set of default metabolism signature from KEGG or REACTOME.
:param resources: KEGG or REACTOME, default KEGG
sigc_score(ex_mtx: pandas.core.frame.DataFrame, GeneSigs: pandas.core.frame.DataFrame, method='AUCell', num_workers=4) -> pandas.core.frame.DataFrame
Get a set of signature score of a given gene expression matrix.
:param ex_mtx: The expression profile matrix.
The rows should correspond to different cells,
the columns to different genes (n_cells x n_genes).
:param GeneSigs: A dataframe with columns ["name", "member", "description"].
name member description
signature1 gene1 signature1 description
signature1 gene2 signature1 description
signature2 gene4 signature2 description
:param method: sinature score method [AUCell, GSVA, ssGSEA, ...] (default: AUCell).
:param num_workers: The number of cores to use in AUCell method (default: 4).
:return: A dataframe with cell signature score (n_cells x n_signatures).
DATA
__all__ = ['genesets2GeneSig', 'metabolism_sigs', 'sigc_score']
VERSION
0.1.15
FILE
/root/anaconda3/lib/python3.9/site-packages/sigc/__init__.py
[ ]: