適合用於FPKM數據求差異基因的ballgown算法

對於FPKM表達數據時,Edger,limma,和deseq等算法並不合適。而ballgown是針對於FPKM數據開發的差異基因算法,可以嘗試。
示例數據如下所示:


# --------------------------------------------------------
# 
# 
# 
# --------------------------------------------------------

setwd("D:\\SCIwork\\F5\\DEG")


library(tidyr)
library('ballgown')
load("mRNA_exprSet.Rda")

mRNA_exprSet[1:4,1:4]

# --------------------------------------------------------
# 
# 
# 
# --------------------------------------------------------

mRNA_exprSet <- mRNA_exprSet %>%
  tidyr::separate(gene_id, c("gene_name",
                             "gene_id",
                             "gene_biotype"), 
                  sep = " \\| ")

mRNA_exprSet <- mRNA_exprSet[,-(2:3)]
index <- duplicated(mRNA_exprSet$gene_name)
mRNA.data <- mRNA_exprSet[!index,]
dim(mRNA.data)


# --------------------------------------------------------
# 
# 
# 
# --------------------------------------------------------

#包含基因名的第一列轉爲行名
BLCA_fpkm_data = mRNA.data
rownames(BLCA_fpkm_data) = BLCA_fpkm_data[,1]
BLCA_fpkm_data  =  BLCA_fpkm_data[c(-1)]

#生成分組文件
load("mRNA_exprSet.Rda")
metadata <- data.frame(names(mRNA_exprSet)[-1])
for (i in 1:length(metadata[,1])) {
  num <- as.numeric(substring(metadata[i,1],14,15))
  if (num %in% seq(1,9)) {metadata[i,2] <- "T"}
  if (num %in% seq(10,29)) {metadata[i,2] <- "N"}
}

names(metadata) <- c("TCGA_id","group")

metadata$group <- as.factor(metadata$group)


# --------------------------------------------------------
# 
# 
# 
# --------------------------------------------------------


result_diff = stattest(gowntable = BLCA_fpkm_data ,
                       pData = metadata , 
                       covariate = "group" , 
                       getFC = TRUE , 
                       log =TRUE,
                       meas='FPKM',
                       feature="gene")


result_diff$LogFC <- log2(result_diff$fc)

result_diff$LogFC_abs <- abs(result_diff$LogFC)

write.csv (result_diff, "mRNA_BLCA_fpkm_diff.csv", row.names = F)

# --------------------------------------------------------
# 
# 
# 
# --------------------------------------------------------

foldChange =0.5

padj=0.05

diffSig=result_diff[which(result_diff$pval< padj & result_diff$LogFC_abs > foldChange),]

dim(diffSig)

write.csv(diffSig, file="diffSig_mRNA_BLCA.csv")
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章