# -*- encoding: utf-8 -*-
"""
@File : homework_6_電商數據可視化分析.py
@Time : 2019/9/24 10:31
@Author : chen
"""
# 可以打開下面的網頁,看各種圖形的源碼
# https://plot.ly/python/
import pandas as pd
import numpy as np
import plotly.graph_objects as go
# pip install colorlover
import colorlover as cl
colors = ['#F1948A', '#AED6F1', '#F9E79F', '#E5E8E8', '#F1948A', '#D0ECE7', '#F6DDCC', '#D2B4DE',
'#117A65', '#FAE5D3', '#34495E', '#DC7633', '#D35400', '#0E6251', '#FCF3CF', '#E8F8F5', '#D4E6F1', '#FAD8D8', '#E59866']
data = pd.read_csv('BlackFriday.csv')
print(data.head(10)) # 前10條數據
print(data.shape) # 數據維度
print(data.Occupation.unique()) #
print(len(data.Occupation.unique())) # 數據長度
print(data.describe()) # 輸出數據
# 缺失值查看
print(data.isna().sum())
# pivot_table Pandas的高級應用中的透視表的功能
gender_purchase = data.pivot_table(values='Purchase', aggfunc="sum", index=["User_ID","Gender"]).reset_index() # aggfunc="sum"代表指定的函數 ["User_ID","Gender"]按照ID,性別分組
gender_purchase.head(20) # 前20個數據
print(gender_purchase.count())
gender_count = gender_purchase.groupby(by="Gender").size().reset_index(name="人數")
gender_count["佔比"] = gender_count["人數"]/gender_count["人數"].sum()
print(gender_count)
# 描繪數據
trace = go.Pie(labels=gender_purchase.Gender.tolist(),
values=gender_purchase.Purchase.tolist(),
hole=0.5)
fig= go.Figure(data=[trace])
fig.show()
# 購物分佈 男女性購物 箱型圖
x_female = gender_purchase[gender_purchase.Gender == "F"].Purchase
y_male = gender_purchase[gender_purchase.Gender == "M"].Purchase
trance1= go.Box(y=y_male, name="男性購物", boxmean=True) # y=y_male 改爲x=y_male 可以圖像橫着顯示
trance2= go.Box(y=x_female, name="女性購物", boxmean=True)
fig = go.Figure(data=[trance1, trance2])
fig.show()
# 銷售前10
top10_sellers = data.pivot_table(values=['Purchase'],
index=['Product_ID'],
aggfunc='count').reset_index().sort_values(by='Purchase',ascending=False).head(10)
print("top10_sellers:", top10_sellers)
# 購買熱銷產品的是誰
top_sellers_buyers = data[data.Product_ID.isin(top10_sellers.Product_ID.tolist())]
print(top_sellers_buyers.head(10))
# 熱銷商品和性別的關係
top_sellers_gender = top_sellers_buyers.pivot_table(values="Purchase",
index=["Product_ID","Gender"],
aggfunc="count").reset_index()
print(top_sellers_gender)
traces=[]
i=0
for g in top_sellers_gender.Gender.unique():
trace = go.Bar(x=top_sellers_gender[top_sellers_gender.Gender==g].Purchase,
y=top_sellers_gender[top_sellers_gender.Gender==g].Product_ID,
name=g, # 橫軸的名稱
marker= dict(color=colors[i]), # 顏色
orientation = "h") # 方向控制
traces.append(trace)
i+=1
fig = go.Figure(data=traces)
fig.show()
# 熱銷產品和城市之間的關係
top_sellers_city = top_sellers_buyers.prvot_table(values="Purchase",
index=['Product_ID',"City_Category"],
aggfunc="count").reset_index()
print(top_sellers_city.head())
traces=[]
i=0
for c in top_sellers_city.City_Category.unique():
trace = go.Bar(x=top_sellers_city[top_sellers_city.City_Category==c].Purchase,
y=top_sellers_city[top_sellers_city.City_Category==c].Product_ID,
name=c, # 橫軸的名稱
marker=dict(color=colors[i]), # 顏色
orientation="h") # 方向控制
traces.append(trace)
i += 1
go.Figure(data=traces).show()
電商數據可視化分析
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.