Visualizing Data via R (box-plot,histogram,scatter)

Visualizing Data via R (box-plot,histogram,violin,scatter)

load data

library("AzureML")
ws = workspace()
auto.price = download.datasets(ws, "Automobile price data (Raw)")

## Coerce some character columns to numeric
cols <- c('price', 'bore', 'stroke', 'horsepower', 'peak.rpm')
## 替換未知值爲NA
auto.price[, cols] = lapply(auto.price[, cols], function(x) ifelse(x == '?', NA, x))
## 字符串替換爲數字
auto.price[, cols] = lapply(auto.price[, cols], as.numeric)
## remove rows with NAs 
auto.price = auto.price[complete.cases(auto.price), ]

## Add a log transformed column for price
auto.price$lnprice = log(auto.price$price)
  
## Consolidate the number of cylinders
auto.price$num.cylinders = 
    ifelse(auto.price$num.of.cylinders %in% c("four", "three"), "three-four",
        ifelse(auto.price$num.of.cylinders %in% c("five", "six"), "five-six", "eight-twelve"))
  
head(auto.price)

數據處理結果
在這裏插入圖片描述
在這裏插入圖片描述

Create a Pair-Wise Scatter Plot

library(ggplot2)
library(repr)
options(repr.plot.width=8, repr.plot.height=8)

num.cols = c("wheel.base",
              "width",
              "height",
              "curb.weight",
              "engine.size",
              "bore",
              "compression.ratio",
              "city.mpg",
              "price",
              "lnprice")

pairs(~ ., data = auto.price[, num.cols])

十個屬性成對散點圖
在這裏插入圖片描述
在這裏插入圖片描述

Create Histograms

## Function to plot conditioned histograms
options(repos = c(CRAN = "http://cran.rstudio.com"))
install.packages('gridExtra')
options(repr.plot.width=6, repr.plot.height=3)
auto.hist = function(x) {
  library(ggplot2)
  library(gridExtra)
  ## Compute the bin width
  rg = range(auto.price[,x])
  bw = (rg[2] - rg[1])/30
  ## Define the title
  title <- paste("Histogram of", x, "conditioned on type of drive wheels")
  ## Create the histogram
  ggplot(auto.price, aes_string(x)) +
    geom_histogram(aes(y = ..count..), binwidth = bw) +
    facet_grid(. ~ drive.wheels) +
    ggtitle(title) 
}

## Create histograms for specified features.
plot.cols2 = c("length",
               "curb.weight",
               "engine.size",
               "city.mpg",
               "price")
lapply(plot.cols2, auto.hist)

按照drive.wheels不同取值對每一個屬性畫直方圖
在這裏插入圖片描述
在這裏插入圖片描述
在這裏插入圖片描述在這裏插入圖片描述在這裏插入圖片描述

Create Box Plots

## Function to create conditioned box plots
auto.box = function(x) {
  title = paste("Box plot of", x, "by type of drive wheels")
  ggplot(auto.price, aes_string('drive.wheels', x)) +
    geom_boxplot() +
    ggtitle(title)
}

lapply(plot.cols2, auto.box)

結果如下 geom_boxplot()
在這裏插入圖片描述

Create Scatter Plots

## Scatter plot using color to differentiate points
scatter.auto = function(x){
  require(ggplot2)
  title = paste("price vs.", x, 'with color by num.cylinders')
  ggplot(auto.price, aes_string(x, 'price')) +
    geom_point(aes(color = factor(num.cylinders))) +
    ggtitle(title)
}

## Define columns for making scatter plots
plot.cols3 = c("length",
                "curb.weight",
                "engine.size",
                "city.mpg")
lapply(plot.cols3, scatter.auto)

結果如下:geom_point()。aes(): Aesthetic mappings describe how variables in the data are mapped to visual properties (aesthetics) of geoms. factor()將類別屬性作爲顏色影響因子。

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章