R | 绘制箱线图

箱线图是能同时反映数据统计量和整体分布展示图,Boxplot从下到上展示的是最小值,第一四分位数 (箱子的下边线)、中位数 (箱子中间的线)、第三四分位数 (箱子上边线)、最大值。
数据来源 :CRA_jobs.txt(通过爬虫获得猎聘网职位CRA酬薪)

library(ggplot2)
library(ggthemes)
library("RColorBrewer")
library(dplyr)
library(tidyr)
library(magrittr)
display.brewer.all()  # 显示所有可用色板
display.brewer.all(type = "seq")  # 查看渐变色板
###颜色设置
cols <- brewer.pal(12,"Set3")

###导入数据
CRA_jobs <- read.csv("CRA_jobs.txt", header = T, sep = "\t", stringsAsFactor=F)

### 筛选数据
major_city <- c("上海", "北京", "深圳", "广州", "福州", "厦门")
CRA_major_city <- filter(CRA_jobs, city %in% major_city )
CRA_major_city$city <- factor(CRA_major_city$city, levels = c("上海", "北京", "深圳", "广州", "福州", "厦门"), labels = c("上海", "北京", "深圳", "广州", "福州", "厦门"))
CRA_major_city <- filter(CRA_major_city, salary <=50)

### 可视化 
plot <- ggplot(data = CRA_major_city,  aes(x=city, y=salary)) +
  stat_boxplot(geom ='errorbar', width=0.3, position=position_dodge(1.05)) +
  geom_boxplot(aes(fill =factor(city))) +
  scale_x_discrete(name = "城市") +
  scale_y_continuous(name = "薪水") +
  scale_fill_manual(values = cols) +
  guides(fill = guide_legend(title = "城市", title.position = "top", # 指定图例名称为"城市", 位置为箱体的上边
                             key.width = 3, key.height = 10, nrow = 3, ncol = 2, byrow = TRUE)) + # 修改箱体尺寸,并矩形排列,按行排
  theme_bw()    ### 设置主题,直接应用theme包
# theme(title = element_text(family = "sans", face = "bold"), panel.grid.major = element_blank()) .... ### 自定主题
ggsave("CRA_major_city.png", plot, width = 6.15, height = 3.15)

### 分组可视化,整理数据
## 方法一
CRA_major_city$group <- CRA_major_city$city %>% lapply(function(x){
  x <- ifelse(as.character(x) %in% c("上海", "北京"), "G1",ifelse(as.character(x) %in% c( "深圳", "广州"), "G2",ifelse(as.character(x) %in% c("福州", "厦门"), "G3",0)))
}) %>% unlist() %>% as.factor()
## 方法二'
groups <- data.frame(areas=c("上海","北京","深圳","广州","福州","厦门"), group=c("G1","G1","G2","G2","G3","G3"))
CRA_major_city$group <- groups[CRA_major_city$city,]$group

ggplot() +
  #stat_boxplot( geom ='errorbar', width=0.3, position=position_dodge(1.05)) +
  geom_boxplot(data = CRA_major_city,  aes(x=group, y=salary, fill =factor(city)), position=position_dodge(1.05)) +
  scale_x_discrete(name = "城市") +
  scale_y_continuous(name = "薪水") +
  scale_fill_manual(values = cols) +
  guides(fill = guide_legend(title = "城市", title.position = "top", # 指定图例名称为"城市", 位置为箱体的上边
                             key.width = 3, key.height = 10, nrow = 3, ncol = 2, byrow = TRUE)) + # 修改箱体尺寸,并矩形排列,按行排
  theme_bw()    ### 设置主题,直接应用theme包

在这里插入图片描述

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章