import pandas as pd
import numpy as np
import scanpy as sc

from NSForest_v3dot9_1 import *


data_folder = "demo data/"
adata = sc.read_h5ad(data_folder + "adata_layer1.h5ad") #<---


adata #quick look of the data

AnnData object with n_obs × n_vars = 871 × 16497
    obs: 'cluster'


## assign the header name for the cluster labels
cluster_header = "cluster" #<---


## check the clusters
set(adata.obs[cluster_header])

{'e1_e299_SLC17A7_L5b_Cdh13',
 'g1_g48_GLI3_Astro_Gja1',
 'g2_g27_APBB1IP_Micro_Ctss',
 'g3_g18_GPNMB_OPC_Pdgfra',
 'g4_g9_MOG_Oligo_Opalin',
 'i10_i16_TSPAN12_Vip_Mybpc1',
 'i11_i6_EGF_Vip_Mybpc1',
 'i1_i90_COL5A2_Ndnf_Car4',
 'i2_i77_LHX6_Sst_Cbln4',
 'i3_i56_BAGE2_Ndnf_Cxcl14',
 'i4_i54_MC4R_Ndnf_Cxcl14',
 'i5_i47_TRPC3_Ndnf_Car4',
 'i6_i44_GPR149_Vip_Mybpc1',
 'i7_i31_CLMP_Ndnf_Cxcl14',
 'i8_i27_SNCG_Vip_Mybpc1',
 'i9_i22_TAC3_Vip_Mybpc1'}


## run over a selected list of clusters
NSForest_results_selected = NSForest(adata, cluster_header=cluster_header, n_trees=100, n_genes_eval=6,
                                     cluster_list=['e1_e299_SLC17A7_L5b_Cdh13','g1_g48_GLI3_Astro_Gja1']) #<---

Preparing data...
--- 0.005445003509521484 seconds ---
Calculating medians...
--- 0.40106868743896484 seconds ---
Number of clusters to evaluate: 2
1 out of 2:
	e1_e299_SLC17A7_L5b_Cdh13
	['TESPA1', 'SV2B']
	0.9828178694158075
2 out of 2:
	g1_g48_GLI3_Astro_Gja1
	['EZR', 'FGFR3']
	0.9308510638297872
--- 1.2347710132598877 seconds ---


## look at the results
NSForest_results_selected


## by default, run over all clusters
NSForest_results = NSForest(adata, cluster_header=cluster_header, n_trees=100, n_genes_eval=6) #<---

Preparing data...
--- 0.004155874252319336 seconds ---
Calculating medians...
--- 0.35634779930114746 seconds ---
Number of clusters to evaluate: 16
1 out of 16:
	e1_e299_SLC17A7_L5b_Cdh13
	['TESPA1', 'SV2B']
	0.9828178694158075
2 out of 16:
	g1_g48_GLI3_Astro_Gja1
	['EZR', 'FGFR3']
	0.9308510638297872
3 out of 16:
	g2_g27_APBB1IP_Micro_Ctss
	['PLCG2']
	0.9565217391304346
4 out of 16:
	g3_g18_GPNMB_OPC_Pdgfra
	['GPNMB', 'VCAN']
	0.9090909090909091
5 out of 16:
	g4_g9_MOG_Oligo_Opalin
	['ST18']
	1.0
6 out of 16:
	i10_i16_TSPAN12_Vip_Mybpc1
	['CHRNB3', 'HCRTR2', 'COL12A1']
	0.7142857142857143
7 out of 16:
	i11_i6_EGF_Vip_Mybpc1
	['EGF', 'COPZ1']
	0.5555555555555555
8 out of 16:
	i1_i90_COL5A2_Ndnf_Car4
	['COL5A2', 'SV2C']
	0.9049079754601227
9 out of 16:
	i2_i77_LHX6_Sst_Cbln4
	['SYTL5', 'SOX6']
	0.9501557632398755
10 out of 16:
	i3_i56_BAGE2_Ndnf_Cxcl14
	['BAGE2', 'THSD7B']
	0.8430232558139534
11 out of 16:
	i4_i54_MC4R_Ndnf_Cxcl14
	['ADAM33', 'BCAS1']
	0.8426966292134832
12 out of 16:
	i5_i47_TRPC3_Ndnf_Car4
	['NTNG1', 'EYA4']
	0.9064327485380117
13 out of 16:
	i6_i44_GPR149_Vip_Mybpc1
	['FLT1', 'GPR149']
	0.7916666666666666
14 out of 16:
	i7_i31_CLMP_Ndnf_Cxcl14
	['PAX6', 'TGFBR2']
	0.9009009009009009
15 out of 16:
	i8_i27_SNCG_Vip_Mybpc1
	['SNCG', 'LINC01109']
	0.7831325301204819
16 out of 16:
	i9_i22_TAC3_Vip_Mybpc1
	['MCTP2', 'IQGAP2']
	0.7926829268292683
--- 10.439274072647095 seconds ---


## look at the results
NSForest_results


## store cluster-marker information in a dictionary
markers = dict(zip(NSForest_results['clusterName'], NSForest_results['NSForest_markers']))


## use scanpy plot function to plot the marker genes
sc.pl.stacked_violin(adata, markers, groupby=cluster_header, dendrogram=True)

WARNING: dendrogram data not found (using key=dendrogram_cluster). Running `sc.tl.dendrogram` with default parameters. For fine tuning it is recommended to run `sc.tl.dendrogram` independently.
WARNING: You’re trying to run this on 16497 dimensions of `.X`, if you really want this, set `use_rep='X'`.
         Falling back to preprocessing with `sc.pp.pca` and default params.


# binary_genes = dict(zip(NSForest_results['clusterName'], NSForest_results['binary_genes']))


# sc.pl.stacked_violin(adata, binary_genes, groupby=cluster_header, dendrogram=True, swap_axes=True)


!jupyter nbconvert --to html NS-Forest_tutorial.ipynb #<---

[NbConvertApp] Converting notebook NS-Forest_tutorial.ipynb to html
[NbConvertApp] Writing 827761 bytes to NS-Forest_tutorial.html


!python --version

Python 3.8.8


sc.__version__

'1.8.2'

	clusterName	f_score	PPV	TN	FP	FN	TP	marker_count	NSForest_markers	binary_genes
0	e1_e299_SLC17A7_L5b_Cdh13	0.982818	0.989619	569	3	13	286	2	[TESPA1, SV2B]	[KIAA1211L, HOPX, TESPA1, CUX2, CYP1B1_AS1, SV...
1	g1_g48_GLI3_Astro_Gja1	0.930851	1.000000	823	0	13	35	2	[EZR, FGFR3]	[EMX2OS, FAM189A2, EZR, PAPLN, FGFR3, SLCO1C1,...

	clusterName	f_score	PPV	TN	FP	FN	TP	marker_count	NSForest_markers	binary_genes
0	e1_e299_SLC17A7_L5b_Cdh13	0.982818	0.989619	569	3	13	286	2	[TESPA1, SV2B]	[KIAA1211L, HOPX, TESPA1, CUX2, CYP1B1_AS1, SV...
1	g1_g48_GLI3_Astro_Gja1	0.930851	1.000000	823	0	13	35	2	[EZR, FGFR3]	[EMX2OS, FAM189A2, EZR, PAPLN, FGFR3, SLCO1C1,...
2	g2_g27_APBB1IP_Micro_Ctss	0.956522	1.000000	844	0	5	22	1	[PLCG2]	[PLCG2, MS4A14, ITGAX, RBM47, LINC01141, CD53,...
3	g3_g18_GPNMB_OPC_Pdgfra	0.909091	1.000000	853	0	6	12	2	[GPNMB, VCAN]	[STK32A, COL9A2, GPNMB, CSPG4, LHFPL3_AS1, VCA...
4	g4_g9_MOG_Oligo_Opalin	1.000000	1.000000	862	0	0	9	1	[ST18]	[MOG, MYRF, ST18, OPALIN, CD22, ERMN, DOCK5, B...
5	i10_i16_TSPAN12_Vip_Mybpc1	0.714286	0.800000	853	2	8	8	3	[CHRNB3, HCRTR2, COL12A1]	[ZNF223, LINC01539, CHRNB3, HCRTR2, CRH, COL12...
6	i11_i6_EGF_Vip_Mybpc1	0.555556	0.666667	864	1	4	2	2	[EGF, COPZ1]	[EGF, SNHG25, KCNIP2_AS1, DPF2, COPZ1, TMED5, ...
7	i1_i90_COL5A2_Ndnf_Car4	0.904908	1.000000	781	0	31	59	2	[COL5A2, SV2C]	[COL5A2, TRPC3, CBLN4, NPNT, FREM1, SV2C, LINC...
8	i2_i77_LHX6_Sst_Cbln4	0.950156	1.000000	794	0	16	61	2	[SYTL5, SOX6]	[LHX6, SYTL5, RASGRF2, PLCH1, SOX6, TENM1, KIA...
9	i3_i56_BAGE2_Ndnf_Cxcl14	0.843023	1.000000	815	0	27	29	2	[BAGE2, THSD7B]	[BAGE2, GREM2, GRB14, ADAMTSL1, SEMA3C, THSD7B...
10	i4_i54_MC4R_Ndnf_Cxcl14	0.842697	0.967742	816	1	24	30	2	[ADAM33, BCAS1]	[MC4R, LINC01435, ADAM33, FAM19A4, BCAS1, NTN4...
11	i5_i47_TRPC3_Ndnf_Car4	0.906433	1.000000	824	0	16	31	2	[NTNG1, EYA4]	[PRSS12, SSTR2, NTNG1, TARID, EYA4, CA2, NRG1_...
12	i6_i44_GPR149_Vip_Mybpc1	0.791667	1.000000	827	0	25	19	2	[FLT1, GPR149]	[FLT1, CXCL12, SLC22A3, GPR149, SHISA8, IGFBP5...
13	i7_i31_CLMP_Ndnf_Cxcl14	0.900901	1.000000	840	0	11	20	2	[PAX6, TGFBR2]	[KIAA1644, CLMP, PAX6, CPLX3, TGFBR2, WIF1, AB...
14	i8_i27_SNCG_Vip_Mybpc1	0.783133	0.928571	843	1	14	13	2	[SNCG, LINC01109]	[SNCG, MMRN2, EDNRA, LBH, KCNK2, LINC01109, AR...
15	i9_i22_TAC3_Vip_Mybpc1	0.792683	0.866667	847	2	9	13	2	[MCTP2, IQGAP2]	[BSPRY, MCTP2, VWC2L_IT1, IQGAP2, CCNG2, VIP, ...

NS-Forest v3.9.1¶

Readme¶

Tutorial¶

Data¶

Run NS-Forest¶

Run over a selected list of clusters¶

Run over all clusters (by default)¶

Plots¶

NS-Forest marker genes¶

Binary genes¶

Save report to html¶