數據關係型圖表
沈益
7/29/2019
4.1 帶趨勢線的散點圖
mydata<-read.csv("配套資源/第4章 數據關係型圖表/Scatter_Data.csv",stringsAsFactors=FALSE)
ggplot(data = mydata, aes(x,y)) +
geom_point(fill="black",colour="black",size=3,shape=21) +
#geom_smooth(method="lm",se=TRUE,formula=y ~ splines::bs(x, 5),colour="red")+ #(h)
geom_smooth(method = 'gam',formula=y ~s(x))+ #(g)
#geom_smooth(method = 'loess',span=0.4,se=TRUE,colour="#00A5FF",fill="#00A5FF",alpha=0.2)+ #(f)
scale_y_continuous(breaks = seq(0, 125, 25))+
theme(
text=element_text(size=15,color="black"),
plot.title=element_text(size=15,family="myfont",hjust=.5,color="black"),
legend.position="none"
)
4.2 殘差分析圖
採用黑色到紅色漸變顏色和氣泡麪積大小兩個視覺暗示對應殘差的絕對值大小,用於實際數據點的表示;而擬合數據點則用小空心圓圈表示,並放置在灰色的擬合曲線上。用直線連接實際數據點和擬合數據點。殘差的絕對值越大,顏色越紅、氣泡也越大,連接直線越長,這樣可以很清晰地觀察數據的擬合效果 線性迴歸
mydata <- read.csv("配套資源/第4章 數據關係型圖表/Residual_Analysis_Data.csv", stringsAsFactors = FALSE)
fit <- lm(y2 ~ x, data = mydata) # 對數據進行線性擬合
mydata$predicted <- predict(fit) # 擬合曲線的預測值
mydata$residuals <- residuals(fit) # 擬合曲線的殘差值
mydata$Abs_Residuals <- abs(mydata$residuals) # 擬合曲線的殘差值的絕對值
ggplot(mydata, aes(x, y2)) +
geom_point(aes(fill = Abs_Residuals, size = Abs_Residuals), shape = 21, color = "black") +
scale_fill_continuous(low = "black", high = "red") + # 修改填充顏色
geom_smooth(method = "lm", se = FALSE, color = "lightgrey") +
geom_point(aes(y = predicted), shape = 1) + # 繪製預測擬合點
geom_segment(aes(xend = x, yend = predicted), alpha = .2) + # 繪製線段
guides(fill = guide_legend((title = "Residual")),
size = guide_legend(title = "Residual")) +
ylim(c(0, 150)) +
xlab("X-Axis") +
ylab("Y-Axis") +
theme(text = element_text(size = 15, face = "plain", color = "black"),
axis.title = element_text(size = 10, face = "plain", color = "black"),
legend.position = "right",
legend.title = element_text(size = 13, face = "plain", color = "black"),
legend.background = element_rect(fill = alpha("black", 0.2)))
二次迴歸
mydata <- read.csv("配套資源/第4章 數據關係型圖表/Residual_Analysis_Data.csv", stringsAsFactors = FALSE)
fit <- lm(y5 ~ x+I(x^2), data = mydata)
mydata$predicted <- predict(fit)
mydata$residuals0 <- residuals(fit)
mydata$Residuals <- abs(mydata$residuals0)
ggplot(mydata, aes(x, y5)) +
geom_point(aes(fill=Residuals, size = Residuals), shape = 21, color = "black") +
geom_smooth(method = "lm", formula = y ~ x+I(x^2), se = FALSE, color = "lightgrey") +
geom_segment(aes(xend = x, yend = predicted), alpha=0.2) +
geom_point(aes(y = predicted), shape = 1) +
scale_fill_continuous(low = "black", high = "red") +
xlab("X-Axis") +
ylab("Y-Axis") +
guides(fill = guide_legend(title = "Residual"),
size = guide_legend(title = "Residual")) +
theme(text = element_text(size = 15, face = "plain", color = "black"),
axis.title = element_text(size = 10, face = "plain", color = "black"),
axis.text = element_text(size = 10, face = "plain", color = "black"),
legend.position = "right",
legend.title = element_text(size = 13, face = "plain", color = "black"),
legend.text = element_text(size = 10, face = "plain", color = "black"),
legend.background = element_rect(fill = alpha("black", 0.2)))
4.3 直方圖
塊狀直方圖
x <- rnorm(250, mean = 10, sd=1) # 隨機生成250個,平均值是10,標準差爲1的服從正態分佈的數
step <- 0.2
breaks <- seq(min(x) - step, max(x) + step, step) # 指定分隔好的區間個數
hg <- hist(x, breaks = breaks, plot = FALSE) # 統計數據頻數
bins <- length(hg$counts)
yvals <- numeric(0)
xvals <- numeric(0)
for(i in 1:bins){
yvals <- c(yvals, hg$counts[i]:0) # 每一列有幾個數就顯示幾個方塊
xvals <- c(xvals, rep(hg$mids[i], hg$counts[i] + 1))
}
dat <- data.frame(xvals, yvals)
dat <- dat[yvals > 0,]
# colormap <- colorRampPalette(rev(brewer.pal(11, "Spectral")))(32) # 創建梯度顏色
ggplot(dat, aes(x=xvals, y=yvals, fill=yvals)) +
geom_tile(colour="black") +
#scale_fill_gradientn(colours = colormap)
scale_fill_distiller(palette = "Spectral") +
ylim(0, max(yvals) * 1.3) +
theme(
text = element_text(size = 15, color = "black"),
plot.title = element_text(size = 15, family = "myfont", face = "bold.italic", hjust = .5, colour = "black"),
legend.background = element_rect(alpha("black", 0.1)),
legend.position = c(0.9, 0.75)
)
圓圈狀點圖
x <- rnorm(250, mean = 10, sd=1) # 隨機生成250個,平均值是10,標準差爲1的服從正態分佈的數
step <- 0.2
breaks <- seq(min(x) - step, max(x) + step, step) # 指定分隔好的區間個數
hg <- hist(x, breaks = breaks, plot = FALSE) # 統計數據頻數
bins <- length(hg$counts)
yvals <- numeric(0)
xvals <- numeric(0)
for(i in 1:bins){
yvals <- c(yvals, hg$counts[i]:0) # 每一列有幾個數就顯示幾個方塊
xvals <- c(xvals, rep(hg$mids[i], hg$counts[i] + 1))
}
dat <- data.frame(xvals, yvals)
dat <- dat[yvals > 0,]
colormap <- colorRampPalette(rev(brewer.pal(11, "Spectral")))(32) # 創建梯度顏色
ggplot(dat, aes(x=xvals, y=yvals, fill=yvals)) +
geom_point(color = "black", shape=21, size = 4) +
scale_fill_gradientn(colours = colormap) +
ylim(0, max(yvals)*1.3) +
theme(
text = element_text(size = 15, color = "black"),
plot.title = element_text(size = 15, family = "myfont", face = "bold.italic", hjust = .5, color = "black"),
legend.background = element_blank(),
legend.position = c(0.9, 0.75)
)
4.4 Q-Q圖
P-P圖(或 Q-Q 圖)可檢驗的分佈包括:beta distribution;t-distribution;chi-squrae;gamma distribution;normal distibution; uniform distribution; Pareto distribution; logistic distributon 一般來說,當比較兩組樣本時,Q-Q圖是一種比直方圖更加有效的方法。
df <-data.frame(x=rnorm(250 , mean=10 , sd=1))
ggplot(df, aes(sample = x))+ # qq 圖需要制定 sample
geom_qq(shape =1) +
geom_qq_line(fill = "#00AFBB",size=1)
P-P圖
library(CircStats)
pp.plot(x)
4.5 散點圖
散點圖通常用於顯示和比較數值,不僅可以顯示趨勢,還能顯示數據集羣的形狀,以及在數據雲團中各個點的關係。這類散點圖非常適合聚類分析。常用的聚類方法包括 K-means, FCM, KFCM, DBSCAN, MeanShift 等。
帶透明度設置的散點圖
mydata<-read.csv("配套資源/第4章 數據關係型圖表/HighDensity_Scatter_Data.csv",stringsAsFactors=FALSE)
ggplot(data = mydata, aes(x,y)) +
geom_point(colour="blue",alpha=0.2)+ # 設置透明度
labs(x = "Axis X",y="Axis Y")+
theme(
text=element_text(size=15,color="black"),
plot.title=element_text(size=15,family="myfont",face="bold.italic",hjust=.5,color="black"),
legend.position="none"
)
k-means 聚類的散點圖
mydata <- read.csv("配套資源/第4章 數據關係型圖表/HighDensity_Scatter_Data.csv")
kmeansResult <- kmeans(mydata, 2, nstart = 20) # 聚類的數量爲 2,隨機數據集選擇次數爲 200
mydata$cluster <- as.factor(kmeansResult$cluster)
ggplot(mydata, aes(x, y, color=cluster)) +
geom_point(alpha=0.2) +
scale_color_manual(values = c("#00AFBB", "#FC4E07")) +
labs(x = "Axis X", y = "Axis Y") +
theme(
text = element_text(size = 15, face = "bold.italic", color = "black"),
plot.title = element_text(size = 15, face = "bold.italic", color = "black"),
legend.background = element_blank(),
legend.position = c(0.85, 0.15)
)
帶橢圓標定的聚類散點
mydata <- read.csv("配套資源/第4章 數據關係型圖表/HighDensity_Scatter_Data.csv")
mydata$cluster <- as.factor(kmeansResult$cluster)
ggplot(data = mydata, aes(x,y,color=cluster)) +
geom_point (alpha=0.2)+
# 繪製透明度爲0.2 的散點圖
stat_ellipse(aes(x=x,y=y,fill= cluster), geom="polygon", level=0.95, alpha=0.2) +
#繪製橢圓標定不同類別,如果省略該語句,則繪製圖3-1-7(c)
scale_color_manual(values=c("#00AFBB","#FC4E07")) +#使用不同顏色標定不同數據類別
scale_fill_manual(values=c("#00AFBB","#FC4E07"))+ #使用不同顏色標定不同橢類別
labs(x = "Axis X",y="Axis Y")+
theme(
text=element_text(size=15,color="black"),
plot.title=element_text(size=15,family="myfont",face="bold.italic",color="black"),
legend.background=element_blank(),
legend.position=c(0.85,0.15)
)
4.6 多數據系列散點圖
多數據系列散點圖只是在單數據系列上添加新的數據系列,使用不同的填充顏色或者形狀區分數據系列,R 中 ggplot2 包中的 geom_point()函數可以根據數據類別映射到不同的填充顏色與形狀,以及邊框顏色。
mydata <- read.csv("配套資源/第4章 數據關係型圖表/HighDensity_Scatter_Data.csv", stringsAsFactors = FALSE)
mydata <- mydata[round(runif(300, 0, 10000)), ]
kmeanResult <- kmeans(mydata, 2, nstart = 20)
mydata$cluster <- as.factor(kmeanResult$cluster)
ggplot(mydata, aes(x, y, fill=cluster, shape=cluster)) +
geom_point(size=4, colour="black", alpha=0.7) +
scale_shape_manual(values = c(21, 23)) +
scale_fill_manual(values = c("#00AFBB", "#FC4E07")) +
labs(x = "Axis X", y = "Axis Y") +
scale_y_continuous(limits = c(-5, 10)) +
scale_x_continuous(limits = c(-5, 10)) +
theme(
text = element_text(size = 15, color = "black"),
plot.title = element_text(size = 15, family = "myfont", face = "bold.italic", color = "black"),
legend.background = element_blank(),
legend.position = c(0.85, 0.15)
)
4.7 氣泡圖
ggplot2 包自帶的添加數據標籤的函數 geom_text()容易出現數據標籤重疊的現象,所以使用 ggrepel 包中的 geom_text_repel()函數 帶數據標籤的氣泡圖
ggplot(mtcars, aes(wt, mpg)) +
geom_point(aes(size=disp, fill=disp), shape=21, colour="black", alpha=0.8) +
scale_fill_gradient2(low="#377EB8",high="#E41A1C",midpoint = mean(mtcars$disp)) +
scale_size_area(max_size = 12) +
geom_text_repel(label=mtcars$disp) +
guides(size = guide_legend((title = "Value")),
fill = guide_legend((title = "Value"))) +
theme(
legend.text=element_text(size=10,face="plain",color="black"),
axis.title=element_text(size=10,face="plain",color="black"),
axis.text = element_text(size=10,face="plain",color="black"),
legend.position = "right"
)
方塊氣泡圖
ggplot(mtcars, aes(wt, mpg)) +
geom_point(aes(size=disp, fill=disp), shape=22, colour="black", alpha=0.8) +
scale_fill_gradient2(low = brewer.pal(7, "Set1")[2], high = brewer.pal(7, "Set1")[1],
midpoint = mean(mtcars$disp)) +
scale_size_area(max_size = 12) +
guides(fill = guide_legend(title = "Value"),
size = guide_legend(title = "Value")) +
theme(text = element_text(size = 15, color = "black"),
plot.title = element_text(size = 15, face = "bold.italic", color = "black"))
4.8 三維散點圖
R 中 scatterplot3d()函數,rgl 包中的 plot3d()函數,plot3D 中的 scatter3D()函數等都可以繪製三維散點圖。rgl 包中的 plot3d()函數繪製的三維圖表可以實現圖表的旋轉。
df <- read.csv("配套資源/第4章 數據關係型圖表/ThreeD_Scatter_Data.csv", stringsAsFactors = FALSE, header = T)
pmar <- par(mar = c(2.1, 3.1, 4.1, 6.2))
with(df, scatter3D(x = mph, y = Gas_Mileage, z = Power, #bgvar = mag,
pch = 21, cex = 1.4, col = "black", bg = "#F57446",
xlab = "0-60 mph(sec)",
ylab = "Gas Mileage (mpg)",
zlab = "Power (Kw)",
zlim = c(40, 80),
ticktype = "detailed", bty = "f", box = TRUE,
#panel.first = panelfirst,
theta = 60, phi = 20, d = 3,
colkey = FALSE,
list(length = 0.5, width = 0.5, cex.clab = 0.75)))
帶顏色的散點圖
colormap <- colorRampPalette(rev(brewer.pal(11,'RdYlGn')))(100)#
index <- ceiling(((prc <- 0.7 * df$Power/ diff(range(df$Power))) - min(prc) + 0.3)*100)
for (i in seq(1,length(index)) ){
prc[i]=colormap[index[i]]
}
pmar <- par(mar = c(2.1, 3.1, 3.1, 8.1))
with(df, scatter3D(x = mph, y = Gas_Mileage, z = Power, #bgvar = mag,
pch = 21, cex = 1.5,col="black",bg=prc,
xlab = "0-60 mph (sec)",
ylab = "Gas Mileage (mpg)",
zlab = "Power (kW)",
zlim=c(40,180),
ticktype = "detailed",bty = "f",box = TRUE,
#panel.first = panelfirst,
theta = 60, phi = 20, d=3,
colkey = FALSE)#list(length = 0.5, width = 0.5, cex.clab = 0.75))
)
colkey (col=colormap,clim=range(df$Power),clab = "Power", add=TRUE, length=0.5,side = 4)
index <- ceiling(((prc <- 0.7 * df$Weight/ diff(range(df$Weight))) - min(prc) + 0.3)*100)
for (i in seq(1,length(index)) ){
prc[i]=colormap[index[i]]
}
pmar <- par(mar = c(5.1, 4.1, 4.1, 6.1))
with(df, scatter3D(x = mph, y = Gas_Mileage, z = Power, #bgvar = mag,
pch = 21, cex = 1.5,col="black",bg=prc,
xlab = "0-60 mph (sec)",
ylab = "Gas Mileage (mpg)",
zlab = "Power (kW)",
zlim=c(40,180),
ticktype = "detailed",bty = "f",box = TRUE,
#panel.first = panelfirst,
theta = 60, phi = 20, d=3,
colkey = FALSE)#list(length = 0.5, width = 0.5, cex.clab = 0.75))
)
colkey (col=colormap,clim=range(df$Weight),clab = "Weight", add=TRUE, length=0.5,side = 4)
三維氣泡圖
with(df, scatter3D(x = mph, y = Gas_Mileage, z = Power, #bgvar = mag,
pch = 21, cex = rescale(df$Weight, c(.5, 5)),col="black",bg="#ED5E3C",
xlab = "0-60 mph (sec)",
ylab = "Gas Mileage (mpg)",
zlab = "Power (kW)",
zlim=c(40,180),
ticktype = "detailed",bty = "f",box = TRUE,
#panel.first = panelfirst,
theta = 60, phi = 20, d=3,
colkey = FALSE)#list(length = 0.5, width = 0.5, cex.clab = 0.75))
)
breaks<-round(seq(500,2000,length.out=4),3)
legend("right",title = "Weight",legend=breaks,pch=21,
pt.cex=rescale(breaks, c(.5, 5)),y.intersp=1.6,cex=1,
pt.bg = "#ED5E3C",bg="white",bty="n")
index <- ceiling(((prc <- 0.7 * df$Weight/ diff(range(df$Weight))) - min(prc) + 0.3)*100)
for (i in seq(1,length(index)) ){
prc[i]=colormap[index[i]]
}
pmar <- par(mar = c(5.1, 4.1, 4.1, 6.1))
with(df, scatter3D(x = mph, y = Gas_Mileage, z = Power, #bgvar = mag,
pch = 21, cex = rescale(df$Weight, c(.5, 5)),col="black",bg=prc,
xlab = "0-60 mph (sec)",
ylab = "Gas Mileage (mpg)",
zlab = "Power (kW)",
zlim=c(40,180),
ticktype = "detailed",bty = "f",box = TRUE,
theta = 60, phi = 20, d=3,
colkey = FALSE)
)
#colkey (col=colormap,clim=range(df$Weight),clab = "Weight", add=TRUE, length=0.5,side = 4)
breaks<-round(seq(500,2000,length.out=4),3)
legend_index <- ceiling(((legend_prc <- 0.7 *breaks/ diff(range(breaks))) - min(legend_prc) + 0.3)*100)
for (i in seq(1,length(legend_index)) ){
legend_prc[i]=colormap[legend_index[i]]
}
legend("right",title = "Weight",legend=breaks,pch=21,
pt.cex=rescale(breaks, c(.5, 5)),y.intersp=1.6,
pt.bg = legend_prc,bg="white",bty="n")
library(wesanderson)
pmar <- par(mar = c(5.1, 4.1, 4.1, 7.1))
colors0 <- wes_palette(n=3, name="Darjeeling1")
colors <- colors0[as.numeric(iris$Species)]
with(iris, scatter3D(x = Sepal.Length, y = Sepal.Width, z = Petal.Length, #bgvar = mag,
pch = 21, cex = 1.5,col="black",bg=colors,
xlab = "longitude", ylab = "latitude",
zlab = "depth, km",
ticktype = "detailed",bty = "f",box = TRUE,
#panel.first = panelfirst,
theta = 140, phi = 20, d=3,
colkey = FALSE)#list(length = 0.5, width = 0.5, cex.clab = 0.75))
)
legend("right",title = "Species",legend=c("setosa", "versicolor", "virginica"),pch=21,
cex=1,y.intersp=1,pt.bg = colors0,bg="white",bty="n")
4.9 曲面擬合圖
library(lattice)
library(gridExtra)
library(reshape2)
library(RColorBrewer)
colormap<-colorRampPalette(rev(brewer.pal(11,'RdYlGn')))(100)
#--------------------------------------------------多項式擬合------------------------------------------
mydata <- read.csv("配套資源/第4章 數據關係型圖表/Surface_Data.csv", sep= ",", header=T)
#多項式擬合z=f(x,y)=a+bx+cy+dxx+eyy
x <- mydata$x
y <- mydata$y
z <- mydata$z
x2<-x*x
y2<-y*y
poly_z <- lm(z ~ x + y +x2+y2)
print(poly_z)
#設定爲30X30的網格數據(x, y),並根據擬合方程求其數值
N<-30
xmar <- seq(min(x),max(x),(max(x)-min(x))/N)
ymar <- seq(min(y),max(y),(max(y)-min(y))/N)
Grid_xy<-expand.grid(list(x=xmar,y=ymar))
Grid_xy$x2<-Grid_xy$x*Grid_xy$x
Grid_xy$y2<-Grid_xy$y*Grid_xy$y
Grid_z <- predict.lm(poly_z, newdata=Grid_xy)
df<-data.frame(matrix(Grid_z, length(xmar), length(ymar)))
colnames(df)<-xmar
df$x<-ymar
melt_df<-melt(df,id.vars='x', variable.name ="y",value.name = "z")
melt_df$y<-as.numeric(melt_df$y)
#trellis.par.set("axis.line",list(col=NA,lty=1,lwd=1)) # Removes the border of the plot if you want
surface_plot1 <- wireframe(z ~ y*x, data=melt_df,
xlab = "0-60 mph (sec)",
ylab = "Gax Mileage (mpg)",
zlab="Power (KW)",
zlim=c(20,180),
drape = TRUE,
colorkey = TRUE,
scales = list(arrows=FALSE),
light.source = c(10,0,10),
col.regions = colorRampPalette(rev(brewer.pal(11,'RdYlGn')))(100),
screen = list(z = -60, x = -60)
)
surface_plot1
max_z<-max(melt_df$z)
min_z<-min(melt_df$z)
breaks_lines<-seq(min_z,max_z,by=(max_z-min_z)/10)
Contour1<-ggplot()+
geom_raster(data=melt_df,aes(y=y,x=x,fill=z),interpolate=TRUE)+#根據高度填充
geom_contour(data=melt_df,aes(y=y,x=x,z=z,colour= ..level..),breaks=breaks_lines,color="black")+#
geom_point(data=mydata,aes(y,x,fill=z),shape=21,size=3)+
scale_fill_gradientn(colours=colormap)+
labs(x="0-60 mph (sec)",y="Gax Mileage (mpg)",fill="Power (KW)")+
#xlim(min(x),max(x))+
#ylim(min(y),max(y))+
theme_light()+
theme(
axis.title=element_text(size=15,face="plain",color="black"),
axis.text = element_text(size=13,face="plain",color="black"),
legend.title=element_text(size=13,face="plain",color="black"),
legend.text = element_text(size=11,face="plain",color="black"),
legend.background = element_blank(),
legend.position =c(0.82,0.25)
)
Contour1
loess 迴歸式擬合
mydata <- read.csv("配套資源/第4章 數據關係型圖表/Surface_Data.csv", sep= ",", header=T)
x <- mydata$x
y <- mydata$y
#z <- mydata$z
xmar <- seq(min(x),max(x),(max(x)-min(x))/30)
ymar <- seq(min(y),max(y),(max(y)-min(y))/30)
elev.loess <- loess(z ~ x * y, mydata,span=0.95)
print(elev.loess)
# get fitted (interpolated) values
elev.interp <- predict(elev.loess, expand.grid(list(x=xmar,y=ymar)))
df<-data.frame(matrix(elev.interp, length(xmar),length(ymar)))
colnames(df)<-xmar
df$x<-ymar
melt_df<-melt(df,id.vars='x', variable.name ="y",value.name = "z")
melt_df$y<-as.numeric(melt_df$y)
#trellis.par.set("axis.line",list(col=NA,lty=1,lwd=1)) # Removes the border of the plot if you want
surface_plot2 <- wireframe(z ~ y*x, data=melt_df,
xlab = "0-60 mph (sec)",
ylab = "Gax Mileage (mpg)",
zlab="Power (KW)",
#main = "orgpractices",
zlim=c(20,180),
drape = TRUE,
colorkey = TRUE,
scales = list(arrows=FALSE),##,cex=.5, tick.number = 5, z = list(arrows=F), distance =c(1, 1, 1)),
light.source = c(10,0,10),
col.regions = colorRampPalette(rev(brewer.pal(11,'RdYlGn')))(100),
screen = list(z = -60, x = -60)
)
surface_plot2
max_z<-max(melt_df$z)
min_z<-min(melt_df$z)
breaks_lines<-seq(min_z,max_z,by=(max_z-min_z)/10)
Contour2<-ggplot()+
geom_raster(data=melt_df,aes(y=y,x=x,fill=z),interpolate=TRUE)+#根據高度填充
geom_contour(data=melt_df,aes(y=y,x=x,z=z,colour= ..level..),breaks=breaks_lines,color="black")+#
geom_point(data=mydata,aes(y,x,fill=z),shape=21,size=3)+
scale_fill_gradientn(colours=colormap)+
labs(x="0-60 mph (sec)",y="Gax Mileage (mpg)",fill="Power (KW)")+
#xlim(min(x),max(x))+
#ylim(min(y),max(y))+
theme_light()+
theme(
axis.title=element_text(size=15,face="plain",color="black"),
axis.text = element_text(size=13,face="plain",color="black"),
legend.title=element_text(size=13,face="plain",color="black"),
legend.text = element_text(size=11,face="plain",color="black"),
legend.background = element_blank(),
legend.position =c(0.85,0.2)
)
Contour2
圖表組合
grid.arrange(surface_plot1,surface_plot2, ncol=2, clip=TRUE)
grid.arrange(Contour1,Contour2, ncol=2, clip=TRUE)
多項式擬合
mydata <- read.csv("配套資源/第4章 數據關係型圖表/Surface_Data.csv", sep= ",", header=T)
x <- mydata$x
y <- mydata$y
#z <- mydata$z
elev.loess <- loess(z ~ x * y, mydata,span=0.95)
print(elev.loess)
xmar <- seq(min(x),max(x),(max(x)-min(x))/30)
ymar <- seq(min(y),max(y),(max(y)-min(y))/30)
# get fitted (interpolated) values
elev.interp <- predict(elev.loess, expand.grid(list(x=xmar,y=ymar)))
pred_z<-matrix(elev.interp, length(xmar),length(ymar))
# 顯示曲面網格,網格邊線顏色爲洋紅,顯示box框線
persp3D (xmar, ymar, pred_z,
theta = 150, phi = 40, d=3,
col = colormap,
scale = TRUE, border = "black",
bty = "f",box = TRUE,ticktype = "detailed",#nticks=5,
xlab = "0-60 mph (sec)",
ylab = "Gax Mileage (mpg)",
zlab="Power (KW)",
zlim=c(20,180))
fitpoints <- predict(elev.loess )
scatter3D(z = z, x = x, y = y, pch = 21, cex = 1,
theta = 150, phi = 40, d=3,ticktype = "detailed",
col = colormap,
surf = list(x = xmar, y = ymar, z = pred_z,border = "black",shade=0,ffit = fitpoints), # fit參數增加預測值與真實值之間的連線
bty = "f", col.panel = NA,
ylab = "0-60 mph (sec)",
xlab = "Gax Mileage (mpg)",
zlab="Power (KW)",
zlim=c(20,180))# col.panel = NA則panel透明
5.0 等高線圖
library(ggplot2)
library( directlabels)
library(RColorBrewer)
rf <- colorRampPalette(rev(brewer.pal(11,'Spectral')))
colormap <- colorRampPalette(rev(brewer.pal(11,'Spectral')))(32)
z<-as.matrix(read.table("配套資源/第4章 數據關係型圖表/等高線.txt",header=TRUE))
colnames(z)<-seq(1,ncol(z),by=1)
max_z<-max(z)
min_z<-min(z)
breaks_lines<-seq(min(z),max(z),by=(max_z-min_z)/10)
map<-melt(z)
colnames(map)<-c("Var1","Var2","value")
head(map)
Contour<-ggplot(map,aes(x=Var1,y=Var2,z=value))+
geom_tile(aes(fill=value))+#根據高度填充
scale_fill_gradientn(colours=colormap)+
geom_contour(aes(colour= ..level..),breaks=breaks_lines,color="black")+#
labs(x="X-Axis",y="Y-Axis",fill="Z-Value")+
theme(
axis.title=element_text(size=15,face="plain",color="black"),
axis.text = element_text(size=13,face="plain",color="black"),
legend.title=element_text(size=13,face="plain",color="black"),
legend.text = element_text(size=11,face="plain",color="black"),
legend.background = element_blank(),
legend.position =c(0.15,0.2)
)
Contour
direct.label(Contour, list("bottom.pieces", cex=0.8, #"far.from.others.borders",
fontface="plain", fontfamily="serif", colour='black'))
5.1 切面圖
library(plot3D)
library(RColorBrewer)
x <- y <- z <- seq(-4, 4, by = 0.2)
M <- mesh(x, y, z)
R <- with (M, sqrt(x^2 + y^2 + z^2))
p <- sin(2*R) /(R+1e-3)
colormap <- colorRampPalette(rev(brewer.pal(11,'Spectral')),alpha = TRUE)(32)
slice3D(x, y, z, colvar = p, facets = FALSE,
col = ramp.col(colormap,alpha = 0.9),
clab="p vlaue",
xs = 0, ys = c(-4, 0, 4), zs = NULL,
ticktype = "detailed",bty = "f",box = TRUE,
theta = -120, phi = 30, d=3,
colkey = list(length = 0.5, width = 1, cex.clab = 1))
5.2 三元相圖
三元相散點圖
library(ggtern)
library(grid)
library(RColorBrewer)
#---------------------------------------(a) 三元相散點圖-----------------------------------------------------
data(Fragments)
arrangement = list()
for(base in c('ilr')){
y = ggtern(Fragments,aes(Qm,Qp,M)) +
theme_showarrows()+
geom_point(size=3) +
ggtitle(sprintf("Basis: %s",base)) +
limit_tern(.5,1,.5)
arrangement[[length(arrangement) + 1]] = y
}
grid.arrange(grobs = arrangement,nrow=1)
三元相等高線圖
data(Fragments)
arrangement = list()
for(base in c('ilr')){
x = ggtern(Fragments,aes(Qm,Qp,M)) +
stat_density_tern(geom='polygon',
aes(fill=..level..),
base=base,
colour='grey50') +
theme_showarrows()+
geom_point() +
ggtitle(sprintf("Basis: %s",base)) +
scale_fill_gradientn(colours=c(brewer.pal(7,"Set1")[2],"white",brewer.pal(7,"Set1")[1]),na.value=NA)+
limit_tern(.5,1,.5)
arrangement[[length(arrangement) + 1]] = x
}
grid.arrange(grobs = arrangement,nrow=1)
5.3 散點曲線圖
library(ggplot2)
library(ggalt)
mydata<-read.csv("配套資源/第4章 數據關係型圖表/Line_Data.csv",header=T)
ggplot(mydata, aes(x, y) )+
geom_xspline(spline_shape=-0.5, size=0.25)+
geom_point(shape=21,size=4,color="black",fill="#F78179") +
xlab("X-Axis")+
ylab("Y-Axis")+
ylim(0, 50)+
theme_gray()+
theme(
text=element_text(size=15,face="plain",color="black"),
axis.title=element_text(size=10,face="plain",color="black"),
axis.text = element_text(size=10,face="plain",color="black")
)
散點曲線填充圖
library(splines)
newdata <- data.frame(spline(mydata$x,mydata$y,n=300,method="hyman" ))
ggplot(newdata, aes(x, y) )+
geom_line(size=0.5,color="black")+
geom_area(fill="#F78179",alpha=0.9)+ # 填充
geom_point(data=mydata,aes(x,y),shape=21,size=3,color="black",fill="white") +
xlab("X-Axis")+
ylab("Y-Axis")+
ylim(0, 50)+
theme_gray()+
theme(
text=element_text(size=15,face="plain",color="black"),
axis.title=element_text(size=12,face="plain",color="black"),
axis.text = element_text(size=12,face="plain",color="black")
)
5.4 瀑布圖
library(plot3D)
library(RColorBrewer)
mydata0<-read.csv("配套資源/第4章 數據關係型圖表/Facting_Data.csv",check.names =FALSE)
N<-ncol(mydata0)-1
mydata<-data.frame(x=numeric(),y=numeric(),variable=character())
for (i in 1:N){
newdata<-data.frame(spline(mydata0[,1],mydata0[,i+1],n=300,method= "natural"))
newdata$variable<-colnames(mydata0)[i+1]
mydata<-rbind(mydata,newdata)
}
mydata$variable<-as.numeric(mydata$variable)
group<-unique(mydata$variable)
M<-length(group)
gg_color_hue <- function(n) {
hues = seq(15, 375, length = n + 1)
hcl(h = hues, l = 65, c = 100)[1:n]
}
colormap <- rev(gg_color_hue(M))#brewer.pal(M,'RdYlGn')
pmar <- par(mar = c(5.1, 4.1, 4.1, 6.1))
perspbox(z=as.vector(0),#add=TRUE,
xlim=c(20,70),ylim=c(360,750),zlim=c(0,15),
ticktype = "detailed",bty = "f",box = TRUE,colkey = FALSE,
theta = -110, phi = 20, d=3)
for (i in 1:M){
df0<-mydata[mydata$variable==group[i],]
Ndf<-nrow(df0)
df<-rbind(df0,c(df0$x[1],df0$y[Ndf],df0$variable[Ndf]))
with(df,polygon3D(x=variable,y=x, z=y, alpha=0.6,
col=colormap[i],lwd = 3,add=TRUE,colkey = FALSE))
with(df0,lines3D(x=variable,y=x, z=y,
lwd = 0.5,col="black",add=TRUE))
}
library(plot3D)
library(RColorBrewer)
mydata0<-read.csv("配套資源/第4章 數據關係型圖表/Facting_Data.csv",check.names =FALSE)
N<-ncol(mydata0)-1
mydata<-data.frame(x=numeric(),y=numeric(),variable=character())
for (i in 1:N){
newdata<-data.frame(spline(mydata0[,1],mydata0[,i+1],n=300,method= "natural"))
newdata$variable<-colnames(mydata0)[i+1]
mydata<-rbind(mydata,newdata)
}
mydata$variable<-as.numeric(mydata$variable)
group<-unique(mydata$variable)
M<-length(group)
#----------------------------------------------------------------------------------------------------------------
colormap <- colorRampPalette(rev(brewer.pal(11,'Spectral')),alpha = TRUE)(32)
#colormap <- colorRampPalette(rev(brewer.pal(7,'RdYlGn')),alpha = TRUE)(32)
pmar <- par(mar = c(5.1, 4.1, 4.1, 6.1))
perspbox(z=as.vector(0),#add=TRUE,
xlim=c(20,70),ylim=c(360,750),zlim=c(0,15),
ticktype = "detailed",bty = "f",box = TRUE,colkey = FALSE,
theta = -110, phi = 20, d=3)
for (i in 1:M){
df0<-mydata[mydata$variable==group[i],]
df<-cbind(df0,z0=rep(0,nrow(df0)))
df<-df[df$y>0.05,]
with(df,segments3D(x0=variable, y0=x,z0=z0,
x1=variable,y1=x, z1=y, colvar =y,
alpha=0.5,col=ramp.col(colormap,alpha = 0.9),lwd = 3,add=TRUE,colkey = FALSE))
with(df0,lines3D(x=variable,y=x, z=y,
lwd = 1.5,col="black",add=TRUE))
}
colkey (col=colormap,clim=range(mydata$y),clab = "Z Value", add=TRUE, length=0.5,side = 4)
行分面的帶填充的曲線圖
mydata0<-read.csv("配套資源/第4章 數據關係型圖表/Facting_Data.csv",stringsAsFactors=FALSE)
colnames(mydata0)<-c("X_Axis",seq(60,25,-5))
mydata<-melt(mydata0,id.vars = "X_Axis")
ggplot(mydata,aes(X_Axis,value,fill=variable))+
geom_area(color="black",size=0.25)+
facet_grid(variable~.)+
theme(
text=element_text(size=15,face="plain",color="black"),
axis.title=element_text(size=10,face="plain",color="black"),
axis.text = element_text(size=10,face="plain",color="black"),
legend.position="none")
library(RColorBrewer)
colormap <- colorRampPalette(rev(brewer.pal(11,'Spectral')))(32)
mydata0<-read.csv("配套資源/第4章 數據關係型圖表/Facting_Data.csv",stringsAsFactors=FALSE)
N<-ncol(mydata0)-1
colnames(mydata0)<-c("X_Axis",seq(60,25,-5))
mydata<-data.frame(x=numeric(),y=numeric(),variable=character())
for (i in 1:N){
newdata<-data.frame(spline(mydata0[,1],mydata0[,i+1],n=300,method= "natural"))
newdata$variable<-colnames(mydata0)[i+1]
mydata<-rbind(mydata,newdata)
}
mydata$variable<-factor(mydata$variable,levels=seq(60,25,-5))
ggplot(mydata,aes(x,y,group=variable))+
geom_bar(aes(fill=y),color=NA,size=0.25,stat="identity")+
geom_line(color="black",size=0.5)+
scale_fill_gradientn(colours=colormap)+
facet_grid(variable~.)+
theme(
text=element_text(size=15,face="plain",color="black"),
axis.title=element_text(size=10,face="plain",color="black"),
axis.text = element_text(size=10,face="plain",color="black"),
legend.position="right"
)
colormap <- colorRampPalette(rev(brewer.pal(11,'Spectral')))(32)
mydata0<-read.csv("配套資源/第4章 數據關係型圖表/Facting_Data.csv",check.names =FALSE)
N<-ncol(mydata0)-1
labels_Y<-colnames(mydata0)[1:N+1]
colnames(mydata0)<-c("x",seq(1,N,1))
mydata<-data.frame(x=numeric(),y=numeric(),variable=character()) #創建空的Data.Frame
for (i in 1:N){
newdata<-data.frame(spline(mydata0[,1],mydata0[,i+1],n=300,method= "natural"))
newdata$variable<-colnames(mydata0)[i+1]
mydata<-rbind(mydata,newdata)
}
Step<-5
mydata$offest<--as.numeric(mydata$variable)*Step
mydata$V1_density_offest<-mydata$y+mydata$offest
p<-ggplot()
for (i in 1:N){
p<-p+ geom_linerange(data=mydata[mydata$variable==i,],aes(x=x,ymin=offest,ymax=V1_density_offest,group=variable,color=y),size =1, alpha =1) +
geom_line(data=mydata[mydata$variable==i,],aes(x=x, y=V1_density_offest),color="black",size=0.5)
}
#ggplot() +
# geom_linerange(aes(x=x,ymin=offest,ymax=V1_density_offest,group=variable,color=y),mydata,size =1, alpha =1) +
p+scale_color_gradientn(colours=colormap)+
#geom_line(aes(x, V1_density_offest,group=variable),mydata,color="black")+
scale_y_continuous(breaks=seq(-Step*N,-Step,Step),labels=rev(labels_Y))+
xlab("Time")+
ylab("Class")+
theme(
panel.background=element_rect(fill="white",colour=NA),
panel.grid.major.x = element_line(colour = "grey80",size=.25),
panel.grid.major.y = element_line(colour = "grey60",size=.25),
axis.line = element_blank(),
text=element_text(size=13),
plot.title=element_text(size=15,hjust=.5),
legend.position="right"
)
ggplot() +
geom_ribbon(aes(x, ymin=offest,ymax=V1_density_offest, fill=variable),mydata, alpha=1,colour=NA)+
geom_line(aes(x, V1_density_offest, color=variable,group=variable),mydata, color="black")+
scale_y_continuous(breaks=seq(-40,-5,5),labels=rev(labels_Y))+
theme_classic()+
theme(
panel.background=element_rect(fill="white",colour=NA),
panel.grid.major.x = element_line(colour = "grey80",size=.25),
panel.grid.major.y = element_line(colour = "grey60",size=.25),
axis.line = element_blank(),
text=element_text(size=15),
plot.title=element_text(size=15,hjust=.5),#family="myfont",
legend.position="none"
)
5.5 熱力圖
data("mtcars")
mat <- round(cor(mtcars), 1)
mydata <- melt(mat)
colnames(mydata)<-c("Var1","Var2","value")
ggplot(mydata, aes(x = Var1, y = Var2, fill = value,label=value)) +
geom_tile(colour="black") +
geom_text(size=3,colour="white")+
coord_equal()+
scale_fill_gradientn(colours=c(brewer.pal(7,"Set1")[2],"white",brewer.pal(7,"Set1")[1]),na.value=NA)+
theme(panel.background=element_rect(fill="white",colour=NA),
panel.grid.major = element_line(colour = "grey60",size=.25,linetype ="dotted" ),
panel.grid.minor = element_line(colour = "grey60",size=.25,linetype ="dotted" ),
text=element_text(size=15),
plot.title=element_text(size=15,family="myfont",hjust=.5)
)
mydata$AbsValue<-abs(mydata$value)
ggplot(mydata, aes(x= Var1 , y=Var2)) +
geom_point(aes(size=AbsValue,fill = value), shape=21, colour="black") +
scale_fill_gradientn(colours=c(brewer.pal(7,"Set1")[2],"white",brewer.pal(7,"Set1")[1]),na.value=NA)+
scale_size_area(max_size=12, guide=FALSE) +
theme(
text=element_text(size=15,face="plain",color="black"),
axis.title=element_text(size=13,face="plain",color="black"),
axis.text = element_text(size=12,face="plain",color="black"),
legend.position="right"
)
library(corrplot)
library(matlab)
color<-colorRampPalette(c(brewer.pal(7,"Set1")[2],"white",brewer.pal(7,"Set1")[1]))(100)
corrplot(mat, method="ellipse",order ="alphabet",pch.col = "black",col=color)
corrplot.mixed(mat,order ="alphabet",pch.col = "black",bg = "grey80", lower.col = color, upper.col = color)
5.6 Venn 圖
library(VennDiagram)
library(RColorBrewer)
venn.diagram(list(B = 1:1800, A = 1571:2020,c=500:1100),fill = c(brewer.pal(7,"Set1")[1:3]),
alpha = c(0.5, 0.5,0.5), cex = 2,
cat.cex=3,cat.fontface = 4,lty =2, fontfamily =3,
resolution =300, filename = "trial.tiff")
## [1] 1
5.7 樹形圖
library(factoextra)
## Welcome! Related Books: `Practical Guide To Cluster Analysis in R` at https://goo.gl/13EFCZ
data(USArrests)
dd <- dist(scale(datasets::mtcars), method = "euclidean")
hc <- hclust(dd, method = "ward.D2")
fviz_dend(hc, k = 4, # 聚類的類別數目爲4
cex = 0.8, # 數據標籤的字體大小
k_colors = c("#2E9FDF", "#00AFBB", "#E7B800", "#FC4E07"),
color_labels_by_k = FALSE, # 數據標籤也根據顏色設定
rect_border = c("#2E9FDF", "#00AFBB", "#E7B800", "#FC4E07"),
rect = TRUE, # 使用不同顏色的矩形框標定類別
rect_fill = TRUE)
fviz_dend(hc, k = 4, cex = 0.8, horiz = TRUE,
k_colors = c("#2E9FDF", "#00AFBB", "#E7B800", "#FC4E07"),# k_colors = "jco",
color_labels_by_k = FALSE, # 數據標籤也根據顏色設定
rect_border = c("#2E9FDF", "#00AFBB", "#E7B800", "#FC4E07"),
rect = TRUE,rect_fill = TRUE)
par(mar = rep(300,4))
fviz_dend(hc, cex = 0.8, k = 4,
color_labels_by_k = FALSE, # 數據標籤也根據顏色設定
k_colors = c("#2E9FDF", "#00AFBB", "#E7B800", "#FC4E07"),
type = "circular",#phylogenic",
#labels_track_height = 0.1,
repel = TRUE,
rect_lty = 0.5)
par(mar = rep(300,4))
fviz_dend(hc, cex = 1, k = 4,
k_colors = c("#2E9FDF", "#00AFBB", "#E7B800", "#FC4E07"),
type = "phylogenic",
color_labels_by_k = FALSE, # 數據標籤也根據顏色設定
labels_track_height = 0.1,
repel = TRUE,
rect_lty = 0.5)
library(dendextend)
library(circlize)
dd <- dist(scale(datasets::mtcars), method = "euclidean")
hc <- hclust(dd, method = "ward.D2")
dend <- as.dendrogram(hc)
# modify the dendrogram to have some colors in the branches and labels
dend <- dend %>%
color_branches(k=4,col= c("#2E9FDF", "#00AFBB", "#E7B800", "#FC4E07"))# %>%
#color_labels(k=4,col= c("#2E9FDF", "#00AFBB", "#E7B800", "#FC4E07"), color_labels_by_k = TRUE)
# plot the radial plot
par(mar = rep(0,4))
#circlize_dendrogram(dend, dend_track_height = 0.8)
circlize_dendrogram(dend, labels_track_height = 0.6, dend_track_height = 0.3,size=3)
圓堆積圖
library(ggraph)
library(igraph)
library(tidyverse)
library(viridis)
data(flare)
# Create a subset of the dataset (I remove 1 level)
edges<-flare$edges %>% filter(to %in% from) %>% droplevels()
vertices <- flare$vertices %>% filter(name %in% c(edges$from, edges$to)) %>% droplevels()
vertices$size<-runif(nrow(vertices))
# Rebuild the graph object
mygraph <- graph_from_data_frame( edges, vertices=vertices )
colormap <- colorRampPalette(rev(brewer.pal(9,'YlGnBu')))(32)
ggraph(mygraph, layout = 'circlepack', weight="size" ) +
geom_node_circle(aes(fill = depth)) +
geom_node_text( aes(label=shortName, filter=leaf, fill=depth, size=size)) +
theme_void() +
theme(legend.position="FALSE") +
scale_fill_gradientn(colors=colormap)
library(ggraph)
library(igraph)
library(dplyr)
df <- data.frame(group=c("root", "root", "a","a","b","b","b"),
subitem=c("a", "b", "x","y","z","u","v"),
size=c(0, 0, 6,2,3,2,5))
# create a dataframe with the vertices' attributes
vertices <- df %>%
distinct(subitem, size) %>%
add_row(subitem = "root", size = 0)
graph <- graph_from_data_frame(df, vertices = vertices)
ggraph(graph, layout = "circlepack", weight = 'size') +
geom_node_circle(aes(fill =depth)) +
# adding geom_text to see which circle is which node
geom_text(aes(x = x, y = y, label = paste(name, "size=", size))) +
coord_fixed()
和絃圖
library(circlize)
library(RColorBrewer)
set.seed(999)
mat<-matrix(sample(18, 18), 3, 6)
rownames(mat) <- paste0("S", 1:3)
colnames(mat) <- paste0("E", 1:6)
df<- data.frame(from = rep(rownames(mat), times = ncol(mat)),
to = rep(colnames(mat), each = nrow(mat)),
value = as.vector(mat),
stringsAsFactors = FALSE)
chordDiagram(df,grid.col = brewer.pal(9,"Set1")[1:9],link.border="grey")
circos.clear()
chordDiagram(mat,grid.col = brewer.pal(9,"Set1")[1:9],link.border="grey")
circos.clear()
桑吉圖
library(ggalluvial)
library(ggplot2)
data(vaccinations)
levels(vaccinations$response) <- rev(levels(vaccinations$response))
ggplot(vaccinations,
aes(x = survey, stratum = response, alluvium = subject,
weight = freq,
fill = response, label = response)) +
geom_flow(alpha = 0.7,color = "darkgray") +
geom_stratum(alpha = 1) +
geom_text(stat = "stratum", size = 3.5) +
theme_classic()+
#coord_flip() +
theme(legend.position = "none",
axis.text.x =element_text(color="black",size=12),
axis.title.x = element_blank(),
axis.text.y =element_blank(),
axis.line = element_blank(),
axis.ticks =element_blank() )# +
#ggtitle("vaccination survey responses at three points in time")
#-------------------------------------------------------------------------
data(Refugees, package = "alluvial")
country_regions <- c(
Afghanistan = "Middle East",
Burundi = "Central Africa",
`Congo DRC` = "Central Africa",
Iraq = "Middle East",
Myanmar = "Southeast Asia",
Palestine = "Middle East",
Somalia = "Horn of Africa",
Sudan = "Central Africa",
Syria = "Middle East",
Vietnam = "Southeast Asia"
)
Refugees$region <- country_regions[Refugees$country]
ggplot(data = Refugees,
aes(x = year, weight = refugees, alluvium = country)) +
geom_alluvium(aes(fill = country, colour = country),
alpha = .85, decreasing = FALSE) +
scale_x_continuous(breaks = seq(2003, 2013, 2)) +
#theme(axis.text.x = element_text(angle = -30, hjust = 0)) +
scale_fill_brewer(type = "qual", palette = "Paired") +
scale_color_brewer(type = "qual", palette = "Paired") #+