sigc tutorials

[86]:
import sigc
import scanpy as sc
import anndata as ad
import pandas as pd
import numpy as np
import gseapy
import warnings
warnings.filterwarnings("ignore")
[15]:
adata = ad.read_h5ad("kidney_tumors_convert_symbol.h5ad")
[4]:
kegg_metabolism = sigc.metabolism_sigs(resources='KEGG')
display(kegg_metabolism.head(5))
name description member
0 Glycolysis / Gluconeogenesis 00010 HK3
1 Glycolysis / Gluconeogenesis 00010 HK1
2 Glycolysis / Gluconeogenesis 00010 HK2
3 Glycolysis / Gluconeogenesis 00010 HKDC1
4 Glycolysis / Gluconeogenesis 00010 GCK
[17]:
df = adata.to_df()
print(df.shape)

sig_mtx = sigc.sigc_score(df, kegg_metabolism, method="AUCell")
(4636, 32922)
[18]:
adata
[18]:
AnnData object with n_obs × n_vars = 4636 × 32922
    obs: 'orig.ident', 'nCount_RNA', 'nFeature_RNA', 'seurat_clusters', 'mtFrac', 'age', 'CnT', 'DTLH', 'UBCD', 'SSBpr', 'End', 'MSC', 'RVCSB', 'SSBpod', 'SSBm.d', 'ICa', 'ErPrT', 'ICb', 'NPC', 'Pod', 'author_cell_type', 'cell_type_ontology_term_id', 'disease_ontology_term_id', 'self_reported_ethnicity_ontology_term_id', 'development_stage_ontology_term_id', 'tissue_ontology_term_id', 'sex_ontology_term_id', 'organism_ontology_term_id', 'is_primary_data', 'assay_ontology_term_id', 'donor_id', 'suspension_type', 'cell_type', 'assay', 'disease', 'organism', 'sex', 'tissue', 'self_reported_ethnicity', 'development_stage'
    var: 'vst.mean', 'vst.variance', 'vst.variance.expected', 'vst.variance.standardized', 'vst.variable', 'feature_is_filtered', 'feature_name', 'feature_reference', 'feature_biotype', 'external_gene_name'
    obsm: 'X_pca', 'X_umap'
[19]:
sig_mtx.shape
[19]:
(4636, 85)
[20]:
#adata.obsm["umap"] = adata.obsm["X_bothumap"]
#adata.obsm["tsne"] = adata.obsm["X_bothtsne"]
sig_adata = ad.AnnData(sig_mtx, obs=adata.obs, obsm=adata.obsm)
[21]:
sig_mtx.head(2)
[21]:
Regulon Glycolysis / Gluconeogenesis Citrate cycle (TCA cycle) Pentose phosphate pathway Pentose and glucuronate interconversions Fructose and mannose metabolism Galactose metabolism Ascorbate and aldarate metabolism Starch and sucrose metabolism Amino sugar and nucleotide sugar metabolism Pyruvate metabolism ... One carbon pool by folate Retinol metabolism Porphyrin and chlorophyll metabolism Ubiquinone and other terpenoid-quinone biosynthesis Terpenoid backbone biosynthesis Caffeine metabolism Neomycin, kanamycin and gentamicin biosynthesis Metabolism of xenobiotics by cytochrome P450 Drug metabolism - cytochrome P450 Drug metabolism - other enzymes
Cell
NB8350521_AAACCCAAGTGATGGC 0.999293 0.999311 0.999332 0.999373 0.999373 0.999392 0.999368 0.999392 0.999379 0.999268 ... 0.999332 0.999373 0.999392 0.999392 0.999337 0.999392 0.999392 0.999322 0.999336 0.999303
NB8350521_AAACGAAGTATTGAGA 0.999284 0.999392 0.999352 0.999373 0.999334 0.999392 0.999392 0.999392 0.999379 0.999330 ... 0.999362 0.999373 0.999377 0.999392 0.999392 0.999392 0.999392 0.999375 0.999374 0.999360

2 rows × 85 columns

[29]:
import matplotlib.pyplot as plt
fig = plt.figure(figsize=(8,5),constrained_layout=True)
ax = fig.add_gridspec(top=0.75, right=0.75).subplots()

sc.pl.highest_expr_genes(sig_adata, n_top=20, ax=ax)

_images/sigc_main_9_0.png

Visualization

scanpy visualization methods are convenient

[30]:
sc.pl.umap(sig_adata, color='Glycolysis / Gluconeogenesis')
_images/sigc_main_11_0.png
[37]:
sc.tl.rank_genes_groups(sig_adata, 'cell_type', method='wilcoxon')
[53]:
celltypes = sig_adata.obs["cell_type"].unique().to_list()
celltypes
[53]:
['epithelial cell of nephron',
 'mesenchymal stem cell',
 'leukocyte',
 'native cell']
[77]:
#import matplotlib.pyplot as plt
#fig = plt.figure(figsize=(8,9),constrained_layout=True)
#ax = fig.add_gridspec(nrows=2, ncols=2, top=0.8, right=0.8).subplots()
#fig, ax = plt.subplots(1,1, figsize=(4,4),constrained_layout=True)
sc.pl.rank_genes_groups(sig_adata, groups=[celltypes[0]], n_genes=15, show=False)

_images/sigc_main_14_0.png
[78]:
pd.DataFrame(sig_adata.uns['rank_genes_groups']['names']).head(5)
[78]:
epithelial cell of nephron leukocyte mesenchymal stem cell native cell
0 Arachidonic acid metabolism Retinol metabolism Steroid biosynthesis Phenylalanine metabolism
1 Histidine metabolism One carbon pool by folate Arginine and proline metabolism Oxidative phosphorylation
2 Ascorbate and aldarate metabolism Glycine, serine and threonine metabolism Glutathione metabolism Tyrosine metabolism
3 Glycosphingolipid biosynthesis - ganglio series Propanoate metabolism Drug metabolism - other enzymes Fructose and mannose metabolism
4 Valine, leucine and isoleucine biosynthesis Taurine and hypotaurine metabolism Drug metabolism - cytochrome P450 Arachidonic acid metabolism
[94]:
marker_metabolisms = list(set(pd.DataFrame(sig_adata.uns['rank_genes_groups']['names']).head(5).values.ravel()))
[100]:
sc.pl.violin(sig_adata, marker_metabolisms[0], groupby='cell_type', ncols=5)
_images/sigc_main_17_0.png
[107]:
fig, ax = plt.subplots(1,1, figsize=(9,6),constrained_layout=True)

sc.pl.stacked_violin(sig_adata, marker_metabolisms, groupby='cell_type', rotation=45, ax=ax);
_images/sigc_main_18_0.png
[108]:
sc.pl.dotplot(sig_adata, marker_metabolisms, groupby='cell_type')
_images/sigc_main_19_0.png
[ ]:

[19]:
help(sigc)
Help on package sigc:

NAME
    sigc

PACKAGE CONTENTS
    core

FUNCTIONS
    genesets2GeneSig(df: pandas.core.frame.DataFrame) -> Sequence[Type[ctxcore.genesig.GeneSignature]]
        Conver dataframe to GeneSig for AUCell.

        :param df: A dataframe with columns ["name", "member", "description"].
                   name        member     description
                   signature1  gene1      signature1 description
                   signature1  gene2      signature1 description
                   signature2  gene4      signature2 description
        :return: GeneSignature list.

    metabolism_sigs(resources='KEGG') -> pandas.core.frame.DataFrame
        Get a set of default metabolism signature from KEGG or REACTOME.
        :param resources: KEGG or REACTOME, default KEGG

    sigc_score(ex_mtx: pandas.core.frame.DataFrame, GeneSigs: pandas.core.frame.DataFrame, method='AUCell', num_workers=4) -> pandas.core.frame.DataFrame
        Get a set of signature score of a given gene expression matrix.

        :param ex_mtx: The expression profile matrix.
                       The rows should correspond to different cells,
                       the columns to different genes (n_cells x n_genes).
        :param GeneSigs: A dataframe with columns ["name", "member", "description"].
                           name        member     description
                           signature1  gene1      signature1 description
                           signature1  gene2      signature1 description
                           signature2  gene4      signature2 description
        :param method: sinature score method [AUCell, GSVA, ssGSEA, ...] (default: AUCell).
        :param num_workers: The number of cores to use in AUCell method (default: 4).
        :return: A dataframe with cell signature score (n_cells x n_signatures).

DATA
    __all__ = ['genesets2GeneSig', 'metabolism_sigs', 'sigc_score']

VERSION
    0.1.15

FILE
    /root/anaconda3/lib/python3.9/site-packages/sigc/__init__.py


[ ]: