散點圖一般用於展示兩個變量之間的關係(比如線性相關)例如兩個基因表達量的相關性。
cor.test(data
gene2) Pearson's product-moment correlation data: data gene1 and data$gene2 t = 2.4858, df = 395, p-value = 0.01334 95 percent confidence interval: 0.02600102 0.21984192 cor 0.1241053
實例:通過以下代碼計算兩個基因的相關性
- ①使用ggplot2繪製
p1 <- ggplot(data = data, mapping = aes(x = gene1, y = gene2)) + geom_point(colour = "#426671", size = 2) + geom_smooth(method = lm,colour='#764C29',fill='#E7E1D7') p1 <- p1+ stat_cor(method = "pearson", label.x = 0.15, label.y = 30)+xlim(0,0.44) p1 p1 <- p1 + xlab("gene1") + theme(axis.title.x = element_text(size = 16, face = "bold", vjust = 0.5, hjust = 0.5))+ ylab("gene2") + theme(axis.title.y = element_text(size = 16, face = "bold", vjust = 0.5, hjust = 0.5))+ theme_bw() p1
- ②使用ggscatter繪製
ggscatter(data, x = "gene1", y = "gene2", color = "#426671", size =2, # Points color, shape and size add = "reg.line", # Add regressin line add.params = list(color = "#764C29", fill = "#E7E1D7"), # Customize reg. line conf.int = TRUE, # Add confidence interval cor.coef = TRUE, # Add correlation coefficient. see ?stat_cor cor.coeff.args = list(method = "pearson", label.x = 3, label.sep = "\n") )+stat_cor(method = "pearson", label.x = 0.15, label.y = 30)+xlim(0,0.44)+ xlab("gene1") + ylab('gene2) theme(axis.title.x = element_text(size = 16, face = "bold", vjust = 0.5, hjust = 0.5))+ ylab("gene2") + theme(axis.title.y = element_text(size = 16, face = "bold", vjust = 0.5, hjust = 0.5))+ theme_bw() p1
可以看出兩個基因關聯性並不高。
一些ggscatter的例子
set.seed(1234) dat <- data.frame(cond = rep(c("A", "B"), each=10), xvar = 1:20 + rnorm(20,sd=3), yvar = 1:20 + rnorm(20,sd=3)) head(dat) library(ggplot2)
繪製最基本的線性迴歸圖
ggplot(dat, aes(x=xvar, y=yvar)) + geom_point(shape=1) # Use hollow circles ggplot(dat, aes(x=xvar, y=yvar)) + geom_point(shape=1) + # Use hollow circles geom_smooth(method=lm) # Add linear regression line # (by default includes 95% confidence region) ggplot(dat, aes(x=xvar, y=yvar)) + geom_point(shape=1) + # Use hollow circles geom_smooth(method=lm, # Add linear regression line se=FALSE) # Don't add shaded confidence region ggplot(dat, aes(x=xvar, y=yvar)) + geom_point(shape=1) + # Use hollow circles geom_smooth() # Add a loess smoothed fit curve with confidence region #> `geom_smooth()` using method = 'loess'
可以自定義設置點的顏色和大小
# Set color by cond ggplot(dat, aes(x=xvar, y=yvar, color=cond)) + geom_point(shape=1) # Same, but with different colors and add regression lines ggplot(dat, aes(x=xvar, y=yvar, color=cond)) + geom_point(shape=1) + scale_colour_hue(l=50) + # Use a slightly darker palette than normal geom_smooth(method=lm, # Add linear regression lines se=FALSE) # Don't add shaded confidence region # Extend the regression lines beyond the domain of the data ggplot(dat, aes(x=xvar, y=yvar, color=cond)) + geom_point(shape=1) + scale_colour_hue(l=50) + # Use a slightly darker palette than normal geom_smooth(method=lm, # Add linear regression lines se=FALSE, # Don't add shaded confidence region fullrange=TRUE) # Extend regression lines # Set shape by cond ggplot(dat, aes(x=xvar, y=yvar, shape=cond)) + geom_point() # Same, but with different shapes ggplot(dat, aes(x=xvar, y=yvar, shape=cond)) + geom_point() + scale_shape_manual(values=c(1,2)) # Use a hollow circle and triangle