Contents

sigc tutorials

Contents

sigc tutorials

[86]:

import sigc
import scanpy as sc
import anndata as ad
import pandas as pd
import numpy as np
import gseapy
import warnings
warnings.filterwarnings("ignore")

[15]:

adata = ad.read_h5ad("kidney_tumors_convert_symbol.h5ad")

[4]:

kegg_metabolism = sigc.metabolism_sigs(resources='KEGG')
display(kegg_metabolism.head(5))

	name	description	member
0	Glycolysis / Gluconeogenesis	00010	HK3
1	Glycolysis / Gluconeogenesis	00010	HK1
2	Glycolysis / Gluconeogenesis	00010	HK2
3	Glycolysis / Gluconeogenesis	00010	HKDC1
4	Glycolysis / Gluconeogenesis	00010	GCK

[17]:

df = adata.to_df()
print(df.shape)

sig_mtx = sigc.sigc_score(df, kegg_metabolism, method="AUCell")

(4636, 32922)

[18]:

adata

[18]:

AnnData object with n_obs × n_vars = 4636 × 32922
    obs: 'orig.ident', 'nCount_RNA', 'nFeature_RNA', 'seurat_clusters', 'mtFrac', 'age', 'CnT', 'DTLH', 'UBCD', 'SSBpr', 'End', 'MSC', 'RVCSB', 'SSBpod', 'SSBm.d', 'ICa', 'ErPrT', 'ICb', 'NPC', 'Pod', 'author_cell_type', 'cell_type_ontology_term_id', 'disease_ontology_term_id', 'self_reported_ethnicity_ontology_term_id', 'development_stage_ontology_term_id', 'tissue_ontology_term_id', 'sex_ontology_term_id', 'organism_ontology_term_id', 'is_primary_data', 'assay_ontology_term_id', 'donor_id', 'suspension_type', 'cell_type', 'assay', 'disease', 'organism', 'sex', 'tissue', 'self_reported_ethnicity', 'development_stage'
    var: 'vst.mean', 'vst.variance', 'vst.variance.expected', 'vst.variance.standardized', 'vst.variable', 'feature_is_filtered', 'feature_name', 'feature_reference', 'feature_biotype', 'external_gene_name'
    obsm: 'X_pca', 'X_umap'

[19]:

sig_mtx.shape

[19]:

(4636, 85)

[20]:

#adata.obsm["umap"] = adata.obsm["X_bothumap"]
#adata.obsm["tsne"] = adata.obsm["X_bothtsne"]
sig_adata = ad.AnnData(sig_mtx, obs=adata.obs, obsm=adata.obsm)

[21]:

sig_mtx.head(2)

[21]:

Regulon	Glycolysis / Gluconeogenesis	Citrate cycle (TCA cycle)	Pentose phosphate pathway	Pentose and glucuronate interconversions	Fructose and mannose metabolism	Galactose metabolism	Ascorbate and aldarate metabolism	Starch and sucrose metabolism	Amino sugar and nucleotide sugar metabolism	Pyruvate metabolism	...	One carbon pool by folate	Retinol metabolism	Porphyrin and chlorophyll metabolism	Ubiquinone and other terpenoid-quinone biosynthesis	Terpenoid backbone biosynthesis	Caffeine metabolism	Neomycin, kanamycin and gentamicin biosynthesis	Metabolism of xenobiotics by cytochrome P450	Drug metabolism - cytochrome P450	Drug metabolism - other enzymes
Cell
NB8350521_AAACCCAAGTGATGGC	0.999293	0.999311	0.999332	0.999373	0.999373	0.999392	0.999368	0.999392	0.999379	0.999268	...	0.999332	0.999373	0.999392	0.999392	0.999337	0.999392	0.999392	0.999322	0.999336	0.999303
NB8350521_AAACGAAGTATTGAGA	0.999284	0.999392	0.999352	0.999373	0.999334	0.999392	0.999392	0.999392	0.999379	0.999330	...	0.999362	0.999373	0.999377	0.999392	0.999392	0.999392	0.999392	0.999375	0.999374	0.999360

2 rows × 85 columns

[29]:

import matplotlib.pyplot as plt
fig = plt.figure(figsize=(8,5),constrained_layout=True)
ax = fig.add_gridspec(top=0.75, right=0.75).subplots()

sc.pl.highest_expr_genes(sig_adata, n_top=20, ax=ax)

_images/sigc_main_9_0.png

Visualization

scanpy visualization methods are convenient

[30]:

sc.pl.umap(sig_adata, color='Glycolysis / Gluconeogenesis')

_images/sigc_main_11_0.png

[37]:

sc.tl.rank_genes_groups(sig_adata, 'cell_type', method='wilcoxon')

[53]:

celltypes = sig_adata.obs["cell_type"].unique().to_list()
celltypes

[53]:

['epithelial cell of nephron',
 'mesenchymal stem cell',
 'leukocyte',
 'native cell']

[77]:

#import matplotlib.pyplot as plt
#fig = plt.figure(figsize=(8,9),constrained_layout=True)
#ax = fig.add_gridspec(nrows=2, ncols=2, top=0.8, right=0.8).subplots()
#fig, ax = plt.subplots(1,1, figsize=(4,4),constrained_layout=True)
sc.pl.rank_genes_groups(sig_adata, groups=[celltypes[0]], n_genes=15, show=False)

_images/sigc_main_14_0.png

[78]:

pd.DataFrame(sig_adata.uns['rank_genes_groups']['names']).head(5)

[78]:

	epithelial cell of nephron	leukocyte	mesenchymal stem cell	native cell
0	Arachidonic acid metabolism	Retinol metabolism	Steroid biosynthesis	Phenylalanine metabolism
1	Histidine metabolism	One carbon pool by folate	Arginine and proline metabolism	Oxidative phosphorylation
2	Ascorbate and aldarate metabolism	Glycine, serine and threonine metabolism	Glutathione metabolism	Tyrosine metabolism
3	Glycosphingolipid biosynthesis - ganglio series	Propanoate metabolism	Drug metabolism - other enzymes	Fructose and mannose metabolism
4	Valine, leucine and isoleucine biosynthesis	Taurine and hypotaurine metabolism	Drug metabolism - cytochrome P450	Arachidonic acid metabolism

[94]:

marker_metabolisms = list(set(pd.DataFrame(sig_adata.uns['rank_genes_groups']['names']).head(5).values.ravel()))

[100]:

sc.pl.violin(sig_adata, marker_metabolisms[0], groupby='cell_type', ncols=5)

_images/sigc_main_17_0.png

[107]:

fig, ax = plt.subplots(1,1, figsize=(9,6),constrained_layout=True)

sc.pl.stacked_violin(sig_adata, marker_metabolisms, groupby='cell_type', rotation=45, ax=ax);

_images/sigc_main_18_0.png

[108]:

sc.pl.dotplot(sig_adata, marker_metabolisms, groupby='cell_type')

_images/sigc_main_19_0.png

[ ]:

[19]:

help(sigc)

Help on package sigc:

NAME
    sigc

PACKAGE CONTENTS
    core

FUNCTIONS
    genesets2GeneSig(df: pandas.core.frame.DataFrame) -> Sequence[Type[ctxcore.genesig.GeneSignature]]
        Conver dataframe to GeneSig for AUCell.

        :param df: A dataframe with columns ["name", "member", "description"].
                   name        member     description
                   signature1  gene1      signature1 description
                   signature1  gene2      signature1 description
                   signature2  gene4      signature2 description
        :return: GeneSignature list.

    metabolism_sigs(resources='KEGG') -> pandas.core.frame.DataFrame
        Get a set of default metabolism signature from KEGG or REACTOME.
        :param resources: KEGG or REACTOME, default KEGG

    sigc_score(ex_mtx: pandas.core.frame.DataFrame, GeneSigs: pandas.core.frame.DataFrame, method='AUCell', num_workers=4) -> pandas.core.frame.DataFrame
        Get a set of signature score of a given gene expression matrix.

        :param ex_mtx: The expression profile matrix.
                       The rows should correspond to different cells,
                       the columns to different genes (n_cells x n_genes).
        :param GeneSigs: A dataframe with columns ["name", "member", "description"].
                           name        member     description
                           signature1  gene1      signature1 description
                           signature1  gene2      signature1 description
                           signature2  gene4      signature2 description
        :param method: sinature score method [AUCell, GSVA, ssGSEA, ...] (default: AUCell).
        :param num_workers: The number of cores to use in AUCell method (default: 4).
        :return: A dataframe with cell signature score (n_cells x n_signatures).

DATA
    __all__ = ['genesets2GeneSig', 'metabolism_sigs', 'sigc_score']

VERSION
    0.1.15

FILE
    /root/anaconda3/lib/python3.9/site-packages/sigc/__init__.py

[ ]: