https://github.com/theislab/single-cell-tutorial/blob/master/supplementary_scripts/Splatter-marker-genes-random-data.ipynb
import scanpy as sc
import numpy as np
import scipy as sp
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import rcParams
from matplotlib import colors
import seaborn as sb
import warnings
from rpy2.rinterface import RRuntimeWarning
from rpy2.robjects import pandas2ri
%load_ext rpy2.ipython
# ignore R warning messages
warnings.filterwarnings("ignore", category=RRuntimeWarning)
# Automatically convert rpy2 outputs to pandas dataframes
pandas2ri.activate()
plt.rcParams['figure.figsize']=(8,8) #rescale figures
sc.settings.verbosity = 3
#sc.set_figure_params(dpi=200, dpi_save=300)
#sc.logging.print_versions()
%%R -o data
#suppressMessages(library(splatter))
suppressMessages(library(splatter))
n_cells <- 1000
n_genes <- 10000
params <- newSplatParams(seed=12345,
nGenes = n_genes,
batchCells = n_cells,
dropout.shape = 0.95,
de.prob = 0)
sim <- splatSimulate(params)
data <- counts(sim)
import pandas as pd
data_df = pd.DataFrame(data)
adata = sc.AnnData(data_df.transpose())
adata.obs_names = adata.obs_names.astype(str)
adata.var_names = adata.var_names.astype(str)
#Basic QC filtering
sc.pp.filter_cells(adata, min_counts=1000)
sc.pp.filter_cells(adata, min_genes=200)
sc.pp.filter_genes(adata, min_cells=20)
#Normalize the data
sc.pp.normalize_per_cell(adata, counts_per_cell_after=1e4)
sc.pp.log1p(adata)
#Detect highly variable genes
sc.pp.filter_genes_dispersion(adata, flavor='cell_ranger', n_top_genes=4000, log=False, subset=False)
# Pre-process and visualize
sc.pp.pca(adata, svd_solver='arpack', use_highly_variable=True, n_comps=10)
sc.pp.neighbors(adata)
sc.tl.umap(adata)
sc.pl.pca_variance_ratio(adata)
#Plot visualizations
sc.pl.pca(adata)
sc.pl.umap(adata)
#Cluster the data
sc.tl.louvain(adata, resolution=0.5, key_added='louvain')
sc.pl.pca(adata, color='louvain')#, save='_splatter_pca_louvain.pdf')
sc.pl.umap(adata, color='louvain')
Marker genes
sc.tl.rank_genes_groups(adata, groupby='louvain', use_raw=False, method='t-test', key_added='rank_louvain', n_genes=9000)
sc.pl.rank_genes_groups(adata, key='rank_louvain', groups=['0','1'], fontsize=12)
P-value analysis
#Extract p-value data
pval_clust0 = [i[0] for i in adata.uns['rank_louvain']['pvals']]
pval_clust1 = [i[1] for i in adata.uns['rank_louvain']['pvals']]
#Plot expected P-value distributions
lab_size = 25
tick_size = 15
p1 = sb.distplot(pval_clust0, kde=False, bins=100)
p1.set_xlabel("P-values",fontsize=lab_size)
p1.set_ylabel("Frequency",fontsize=lab_size)
p1.tick_params(labelsize=tick_size)
f1 = p1.get_figure()
#f1.savefig('figures/hist_pvals_clust0.pdf')
plt.show()
p2 = sb.distplot(pval_clust1, kde=False, bins=100)
p2.set_xlabel("P-values",fontsize=lab_size)
p2.set_ylabel("Frequency",fontsize=lab_size)
p2.tick_params(labelsize=tick_size)
f2 = p2.get_figure()
#f2.savefig('figures/hist_pvals_clust1.pdf')
plt.show()
文章来源:https://www.toymoban.com/news/detail-679050.html
#Number of 'significant' marker gene after multiple testing correcting
signif_thresh = 0.05
n_signif_markers0 = np.sum([i[0] < signif_thresh for i in adata.uns['rank_louvain']['pvals_adj']])
n_signif_markers1 = np.sum([i[1] < signif_thresh for i in adata.uns['rank_louvain']['pvals_adj']])
print(n_signif_markers0)
print(n_signif_markers1)
文章来源地址https://www.toymoban.com/news/detail-679050.html
到了这里,关于splatter marker gene random data的文章就介绍完了。如果您还想了解更多内容,请在右上角搜索TOY模板网以前的文章或继续浏览下面的相关文章,希望大家以后多多支持TOY模板网!