ggplot2畫熱圖展示相關係數的簡單小例子

參考鏈接

http://www.sthda.com/english/wiki/ggplot2-quick-correlation-matrix-heatmap-r-software-and-data-visualization

新學到的內容

upper.tri()
lower.tri()
可以分別獲取矩陣數據的上三角和下三角

通過cormat[upper.tri(cormat)] <- NA能夠將矩陣上三角數據賦值爲NA

對圖例的一些操作,包括:
調整右側圖例的上下位置
theme()函數中的legend.justification參數
比如簡單的柱形圖
首先是構造數據集

df<-data.frame(A=LETTERS[1:10],B=1:10)

畫圖


p1<-ggplot(df,aes(x=A,y=B))+
  geom_col(aes(fill=B))+
  scale_fill_gradient2(low="red",high = "blue",
                       mid="green",midpoint = 5)+
  theme(legend.justification = c(0,1))

p2<-ggplot(df,aes(x=A,y=B))+
  geom_col(aes(fill=B))+
  scale_fill_gradient2(low="red",high = "blue",
                       mid="green",midpoint = 5)+
  theme(legend.justification = c(0,0))
library(cowplot)
plot_grid(p1,p2,ncol=1,nrow = 2,labels = c("p1","p2"))

默認圖例位置在右側中間,legend.justification=c(0,0) c(0,0)前一個0是x後一個0是y,如果圖例在左右側,只需要設置y,範圍是0到1,1在頂部0在底部。如果圖例在上下位置對應只需要設置x

p3<-ggplot(df,aes(x=A,y=B))+
  geom_col(aes(fill=B))+
  scale_fill_gradient2(low="red",high = "blue",
                       mid="green",midpoint = 5)+
  theme(legend.position = "top",
        legend.justification = c(0,0))

p4<-ggplot(df,aes(x=A,y=B))+
  geom_col(aes(fill=B))+
  scale_fill_gradient2(low="red",high = "blue",
                       mid="green",midpoint = 5)+
  theme(legend.position = "top",
        legend.justification = c(1,0))
plot_grid(p3,p4,ncol=1,nrow = 2,labels = c("p3","p4"))

當圖例位於上下位置時,還可以設置圖例標題的位置,應該是上下左右,用到的代碼是
guides(fill=guide_colorbar(title.position = "top"))

ggplot(df,aes(x=A,y=B))+
  geom_col(aes(fill=B))+
  scale_fill_gradient2(low="red",high = "blue",
                       mid="green",midpoint = 5)+
  theme(legend.position = "top",
        legend.justification = c(0,0))+
  guides(fill=guide_colorbar(title.position = "top"))


現在圖例的標題是靠上鉅作,如果居中的話可以繼續加參數title.hjust = 0.5

ggplot(df,aes(x=A,y=B))+
  geom_col(aes(fill=B))+
  scale_fill_gradient2(low="red",high = "blue",
                       mid="green",midpoint = 5)+
  theme(legend.position = "top",
        legend.justification = c(0,0))+
  guides(fill=guide_colorbar(title.position = "top",title.hjust = 0.5))

改變顏色條的寬度和高度barwidth = 5,barheight = 5

ggplot(df,aes(x=A,y=B))+
  geom_col(aes(fill=B))+
  scale_fill_gradient2(low="red",high = "blue",
                       mid="green",midpoint = 5,
                       limit=c(0,10))+
  theme(legend.position = "top",
        legend.justification = c(0,0))+
  guides(fill=guide_colorbar(title.position = "top",
                             title.hjust = 0.5,
                             barwidth = 5,barheight = 5))

調整圖例上顯示的刻度

ggplot(df,aes(x=A,y=B))+
  geom_col(aes(fill=B))+
  scale_fill_gradient2(low="red",high = "blue",
                       mid="green",midpoint = 5,
                       limit=c(0,10),breaks=c(0,5,10),
                       label=c("A","B","C"))+
  theme(legend.position = "top",
        legend.justification = c(0,0))+
  guides(fill=guide_colorbar(title.position = "top",
                             title.hjust = 0.5,
                             barwidth = 5,barheight = 5,
                             ticks = T,
                             label = T))
下面進入正題,ggplot2熱圖可視化相關係數

代碼

library(reshape2)
library(ggplot2)
mydata <- mtcars[, c(1,3,4,5,6,7)]
head(mydata)
cormat <- round(cor(mydata),2)
get_upper_tri <- function(cormat){
  cormat[lower.tri(cormat)]<- NA
  return(cormat)
}
upper_tri <- get_upper_tri(cormat)
melted_cormat <- melt(upper_tri, na.rm = TRUE)
ggplot(data = melted_cormat, aes(Var2, Var1, fill = value))+
  geom_tile(color = "white")+
  scale_fill_gradient2(low = "blue", high = "red", mid = "white", 
                       midpoint = 0, limit = c(-1,1), space = "Lab", 
                       name="Pearson\nCorrelation") +
  theme_minimal()+ 
  theme(axis.text.x = element_text(angle = 45, vjust = 1, 
                                   size = 12, hjust = 1))+
  coord_fixed()

接下來原文說對相關係數做聚類,以發現可能的關係,但是這裏遇到個問題爲什麼原始數據要做轉換呢(1-cormat)/2這個代碼有什麼作用呢?

reorder_cormat <- function(cormat){
  # Use correlation between variables as distance
  dd <- as.dist((1-cormat)/2)
  hc <- hclust(dd)
  cormat <-cormat[hc$order, hc$order]
}

cormat <- reorder_cormat(cormat)
upper_tri <- get_upper_tri(cormat)

melted_cormat <- melt(upper_tri, na.rm = TRUE)
ggplot(melted_cormat, aes(Var2, Var1, fill = value))+
  geom_tile(color = "white")+
  geom_text(aes(Var2, Var1, label = value), color = "black", size = 4)+
  scale_fill_gradient2(low = "blue", high = "red", mid = "white", 
                       midpoint = 0, limit = c(-1,1), space = "Lab", 
                       name="Pearson\nCorrelation") +
  theme_minimal()+ 
  theme(axis.text.x = element_text(angle = 45, vjust = 1, 
                                   size = 12, hjust = 1))+
  coord_fixed()

今天的內容就到這裏

歡迎大家關注我的公衆號
小明的數據分析筆記本

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章