多分組差異分析解決方案（1）循環T檢驗

主要方法：將其中某一組設置爲實驗組，其餘幾組統一設置爲對照組。

第一步讀取數據，合併表達矩陣和分組文件

#===========================================================================


#===========================================================================


rm(list = ls(all.names = TRUE))

options(stringsAsFactors = F)

library(Matrix)



setwd('D:\\SCIwork\\F38KRT\\s2')


data <- read.csv('cdata.csv', header = T, row.names = 1)

data <- as.data.frame(t(data))

data[1:4,1:4]


normalized<-function(y) {
  
  x<-y[!is.na(y)]
  
  x<-(x - min(x)) / (max(x) - min(x))
  
  y[!is.na(y)]<-x
  
  return(y)
}

db <-  as.data.frame(apply(data,2,normalized))


data <- db

data$sample <- rownames(data)

data$sample <- chartr(old='.', new='-', x=data$sample)


setwd('D:\\SCIwork\\F38KRT\\s3')

group <- read.csv('group2.csv', header = T)

names(group)[1] <- 'sample'

group$sample <- chartr(old='.', new='-', x=group$sample)

group <- subset(group, select=c("sample", "group"))

group$subtype <- group$group

group$group  <- NULL




dt <- merge(group, data, by='sample')

dt[1:4,1:4]

dt$sample <- NULL

table(dt$subtype)

dt_total <- dt

輸入文件cdata經過T轉置爲data後如下所示：

上面的代碼包括了，對錶達量歸一化的代碼。

normalized<-function(y) {
x<-y[!is.na(y)]
x<-(x - min(x)) / (max(x) - min(x))
y[!is.na(y)]<-x
return(y)}

經過這個代碼，樣本的表達量將會被轉化到0-1之間的數值。

將subtype1設置爲exp組，其餘兩組（subtype2，和subtype3）設置爲con組。

#===========================================================================


#===========================================================================


dt$subtype <- ifelse(dt$subtype == 'Subtype1', 'Exp', 'Con')

table(dt$subtype)

dt <- dt[order(dt$subtype), ]

dt[1:4,1:4]

dt_Con <- subset(dt, dt$subtype == 'Con')

dt_Con[1:4,1:4]

dt_Exp <- subset(dt, dt$subtype == 'Exp')

dt_Exp[1:4,1:4]




dt_Con$subtype <- paste0(dt_Con$subtype, rownames(dt_Con))

rownames(dt_Con) <- dt_Con$subtype

dt_Con$subtype <- NULL

dt_Con <- as.data.frame(t(dt_Con))




dt_Exp$subtype <- paste0(dt_Exp$subtype, rownames(dt_Exp))

rownames(dt_Exp) <- dt_Exp$subtype

dt_Exp$subtype <- NULL

dt_Exp <- as.data.frame(t(dt_Exp))


Pvalue<-c(rep(0,nrow(dt_Con)))

log2_FC<-c(rep(0,nrow(dt_Con)))

for(i in 1:nrow(dt_Con)){
  
  y=t.test(as.numeric(dt_Con[i,]),as.numeric(dt_Exp[i,]))
  Pvalue[i] <- y$p.value
  log2_FC[i] <-log2(mean(as.numeric(dt_Exp[i,]))/(mean(as.numeric(dt_Con[i,]))))
  
}


library(dplyr)

library(tidyr)

library(tibble)

# 對p value進行FDR校正
fdr=p.adjust(Pvalue, "BH") 
# 在原文件後面加入log2FC，p value和FDR,共3列；
out<- as.data.frame(cbind(log2_FC,Pvalue,fdr))
out$gene <- rownames(dt_Con)
# out <- out %>%
#   dplyr::filter(log2_FC > 0.5 & Pvalue < 0.05)


setwd('D:\\SCIwork\\F38KRT\\s5')

write.csv(out, file = 'out_S1.csv')

循環T檢驗後求取差異基因的差異倍數和P值。

同樣的邏輯，分別求取subtype2和subtype3的差異基因




#===========================================================================


#===========================================================================



dt <- dt_total

dt$subtype <- ifelse(dt$subtype == 'Subtype2', 'Exp', 'Con')

table(dt$subtype)

dt <- dt[order(dt$subtype), ]

dt[1:4,1:4]

dt_Con <- subset(dt, dt$subtype == 'Con')

dt_Con[1:4,1:4]

dt_Exp <- subset(dt, dt$subtype == 'Exp')

dt_Exp[1:4,1:4]




dt_Con$subtype <- paste0(dt_Con$subtype, rownames(dt_Con))

rownames(dt_Con) <- dt_Con$subtype

dt_Con$subtype <- NULL

dt_Con <- as.data.frame(t(dt_Con))




dt_Exp$subtype <- paste0(dt_Exp$subtype, rownames(dt_Exp))

rownames(dt_Exp) <- dt_Exp$subtype

dt_Exp$subtype <- NULL

dt_Exp <- as.data.frame(t(dt_Exp))


Pvalue<-c(rep(0,nrow(dt_Con)))

log2_FC<-c(rep(0,nrow(dt_Con)))

for(i in 1:nrow(dt_Con)){
  
  y=t.test(as.numeric(dt_Con[i,]),as.numeric(dt_Exp[i,]))
  Pvalue[i] <- y$p.value
  log2_FC[i] <-log2(mean(as.numeric(dt_Exp[i,]))/(mean(as.numeric(dt_Con[i,]))))
  
}



# 對p value進行FDR校正
fdr=p.adjust(Pvalue, "BH") 
# 在原文件後面加入log2FC，p value和FDR,共3列；
out<- as.data.frame(cbind(log2_FC,Pvalue,fdr))
out$gene <- rownames(dt_Con)



setwd('D:\\SCIwork\\F38KRT\\s5')

write.csv(out, file = 'out_S2.csv')







#===========================================================================


#===========================================================================



dt <- dt_total

dt$subtype <- ifelse(dt$subtype == 'Subtype3', 'Exp', 'Con')

table(dt$subtype)

dt <- dt[order(dt$subtype), ]

dt[1:4,1:4]

dt_Con <- subset(dt, dt$subtype == 'Con')

dt_Con[1:4,1:4]

dt_Exp <- subset(dt, dt$subtype == 'Exp')

dt_Exp[1:4,1:4]




dt_Con$subtype <- paste0(dt_Con$subtype, rownames(dt_Con))

rownames(dt_Con) <- dt_Con$subtype

dt_Con$subtype <- NULL

dt_Con <- as.data.frame(t(dt_Con))




dt_Exp$subtype <- paste0(dt_Exp$subtype, rownames(dt_Exp))

rownames(dt_Exp) <- dt_Exp$subtype

dt_Exp$subtype <- NULL

dt_Exp <- as.data.frame(t(dt_Exp))


Pvalue<-c(rep(0,nrow(dt_Con)))

log2_FC<-c(rep(0,nrow(dt_Con)))

for(i in 1:nrow(dt_Con)){
  
  y=t.test(as.numeric(dt_Con[i,]),as.numeric(dt_Exp[i,]))
  Pvalue[i] <- y$p.value
  log2_FC[i] <-log2(mean(as.numeric(dt_Exp[i,]))/(mean(as.numeric(dt_Con[i,]))))
  
}



# 對p value進行FDR校正
fdr=p.adjust(Pvalue, "BH") 
# 在原文件後面加入log2FC，p value和FDR,共3列；
out<- as.data.frame(cbind(log2_FC,Pvalue,fdr))
out$gene <- rownames(dt_Con)



setwd('D:\\SCIwork\\F38KRT\\s5')

write.csv(out, file = 'out_S3.csv')

發表評論

所有評論

還沒有人評論，想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.

多分組差異分析解決方案（1）循環T檢驗

主要方法：將其中某一組設置爲實驗組，其餘幾組統一設置爲對照組。

第一步讀取數據，合併表達矩陣和分組文件

將subtype1設置爲exp組，其餘兩組（subtype2，和subtype3）設置爲con組。

同樣的邏輯，分別求取subtype2和subtype3的差異基因

win11關閉自動檢測病毒刪文件

千兆寬帶實際網速能到達多少？

循環繪圖和多分組多基因（變量）差異展示

在線網站根據圖片提取小分子化學結構

MCP-count包計算腫瘤微環境中各類細胞丰度

利賓斯基規則篩選小分子

R語言之箱型圖修改中位數爲平均數

https://yachay.unat.edu.pe/blog/index.php?comment_area=format_blog&comment_component=blog&comment_co

linux以太網驅動總結