trendsceek || 識別基因空間表達趨勢

空間轉錄組技術使得我們可以在組織成像的基礎上考察基因表達情況,同時也需要新的分析策略。trendsceek是一種基於標記點過程的方法,識別具有顯著空間表達趨勢的基因。trendsceek在空間轉錄組和順序熒光原位雜交數據中都能很好地發現空間差異基因,並在單細胞RNA-seq數據的低維投影(TSNE/umap)中揭示了顯著的基因表達梯度和熱點。

library(trendsceek)
library(Seurat)
library(SeuratData)


AvailableData()


stxBrain.SeuratData::anterior1 -> sto 
head(sto@images$anterior1@coordinates)

                   tissue row col imagerow imagecol
AAACAAGTATCTCCCA-1      1  50 102     7475     8501
AAACACCAATAACTGC-1      1  59  19     8553     2788
AAACAGAGCGACTCCT-1      1  14  94     3164     7950
AAACAGCTTTCAGAAG-1      1  43   9     6637     2099
AAACAGGGTCTATATT-1      1  47  13     7116     2375
AAACATGGTGAGAGGA-1      1  62   0     8913     1480
pp = pos2pp(sto@images$anterior1@coordinates[,c(2,3)])
log.fcn = log10
counts_sub[1:2,1:4]
pp = set_marks(pp, as.matrix(sto@assays$Spatial@counts), log.fcn = log.fcn)

min.ncells.expr = 3
min.expr = 5
counts_filt = genefilter_exprmat(as.matrix(sto@assays$Spatial@counts), min.expr, min.ncells.expr)
dim(counts_filt)


quantile.cutoff = 0.9 ##filter out the most lowly expressed genes from the fitting
method = 'glm' ##For (robust) linear regression set to 'rlm'
vargenes_stats = calc_varstats(counts_filt, counts_filt, quant.cutoff = quantile.cutoff, method = method)

n.top2plot = 10
topvar.genes = rownames(vargenes_stats[['real.stats']])[1:n.top2plot]
pp2plot = pp_select(pp, topvar.genes)
plot.ercc.points = FALSE
plot_cv2vsmean(vargenes_stats, topvar.genes, plot.ercc.points = plot.ercc.points)

min.count = 1
counts_norm = deseq_norm(as.matrix(sto@assays$Spatial@counts), min.count)
counts_sub = counts_norm[topvar.genes, ]
dim(counts_sub)
plot_pp_scatter(pp2plot, log_marks = FALSE, scale_marks = FALSE, pal.direction = -1)
nrand = 100
ncores = 1

##run
trendstat_list = trendsceek_test(pp2plot, nrand, ncores)
trendstat_list

 head(trendstat_list$sig_genes_list$Vmark)
           gene  test earlystop max.env.rel.dev max.rel.dev   min.pval nsim_max nsim_stop      p.bh      p.bo rank
S100a5   S100a5 Vmark         0        6.898791  0.29728032 0.00990099        2         2 0.0110011 0.0990099    1
Fabp7     Fabp7 Vmark         0        5.392828  0.12836321 0.00990099        2         2 0.0110011 0.0990099    2
Ptgds     Ptgds Vmark         0        3.491384  0.09823452 0.00990099        2         2 0.0110011 0.0990099    3
Clca3a1 Clca3a1 Vmark         0        3.075842  0.35753230 0.00990099        2         2 0.0110011 0.0990099    4
Ttr         Ttr Vmark         0        2.962141  0.10187457 0.00990099        2         2 0.0110011 0.0990099    5
Kl           Kl Vmark         0        1.762761  0.11802672 0.00990099        2         2 0.0110011 0.0990099    6
alpha = 0.05 ##Benjamini-Hochberg
sig_list = extract_sig_genes(trendstat_list, alpha)
lapply(sig_list, nrow)
sig_genes = sig_list[['markcorr']][, 'gene']
plot_trendstats(trendstat_list, sig_genes[1])
plot_pp_scatter(pp_sig, log_marks = FALSE, scale_marks = FALSE, pal.direction = -1,pointsize.factor = 1)


https://github.com/edsgard/trendsceek
Edsgärd D. et al., Identification of spatial expression trends in single-cell gene expression data, Nature Methods, 2018
doi:10.1038/nmeth.4634

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章