信用卡客戶畫像（年齡，性別，收入，支出和信用卡種的關係）

import os
import pandas as pd

os.chdir(r'H:\2019-2-3新華書店筆記以及資料\資料\HW5')

card=pd.read_csv(r"card.csv",encoding="gbk") #讀取數據
disp=pd.read_csv(r"disp.csv",encoding="gbk")
clients=pd.read_csv(r"clients.csv",encoding="gbk")

card.head() #查看數據分佈

	card_id	disp_id	issued	type
0	1005	9285	1993-11-07	普通卡
1	104	588	1994-01-19	普通卡
2	747	4915	1994-02-05	普通卡
3	70	439	1994-02-08	普通卡
4	577	3687	1994-02-15	普通卡

disp.head()

	disp_id	client_id	account_id	type
0	1	1	1	所有者
1	2	2	2	所有者
2	3	3	2	用戶
3	4	4	3	所有者
4	5	5	3	用戶

clients.head()

	client_id	sex	birth_date	district_id
0	1	女	1970-12-13	18
1	2	男	1945-02-04	1
2	3	女	1940-10-09	1
3	4	男	1956-12-01	5
4	5	女	1960-07-03	5

import sqlite3 # sqlite3相當於輕量版，更多功能可使用SQLAlchemy

con = sqlite3.connect(':memory:') # 數據庫連接，使用sql連接查詢表

card.to_sql('card', con)

disp.to_sql('disp', con)

clients.to_sql('clients', con)

car_sql='''select a.*,c.sex,c.birth_date,c.district_id from card a left join disp b on a.disp_id=b.disp_id left join clients c on b.client_id=c.client_id where b.type="所有者"'''

card_t=pd.read_sql(car_sql, con)

card_t.head()

	index	card_id	disp_id	issued	type	sex	birth_date	district_id
0	0	1005	9285	1993-11-07	普通卡	女	1968-01-28	74
1	1	104	588	1994-01-19	普通卡	女	1960-10-20	61
2	2	747	4915	1994-02-05	普通卡	男	1963-07-19	40
3	3	70	439	1994-02-08	普通卡	女	1968-09-12	51
4	4	577	3687	1994-02-15	普通卡	男	1972-02-06	49

from pylab import mpl

mpl.rcParams['font.sans-serif'] = ['SimHei'] # 指定默認字體

mpl.rcParams['axes.unicode_minus'] = False # 解決保存圖像是負號'-'顯示爲方塊的問題

card_t['type'].value_counts().plot(kind='pie',autopct='%.1f%%') #查看數據分佈餅圖

<matplotlib.axes._subplots.AxesSubplot at 0x167ec390>

#1、髮卡趨勢
#總體髮卡趨勢
from  datetime  import  *  
card_t['issued_date']=pd.to_datetime(card_t['issued'])
card_t['issued_year']=card_t['issued_date'].map(lambda x:x.year)
card_t.card_id.groupby(card_t['issued_year']).count().plot(kind="bar") #年齡和髮卡的關係
<matplotlib.axes._subplots.AxesSubplot at 0x9354358>

#2、不同卡的分佈

card_t.type.value_counts().plot(kind="pie",autopct='%.1f%%') #卡種分佈

<matplotlib.axes._subplots.AxesSubplot at 0x93aa8d0>

pd.crosstab(card_t.issued_year,card_t.type).plot(kind = 'bar') #卡種和年齡的關係分佈
<matplotlib.axes._subplots.AxesSubplot at 0x940cb00>

t1=pd.crosstab(card_t.issued_year,card_t.type) #卡種和年齡佔比對比

t1["sum1"]=t1.sum(1)

t2=t1.div(t1.sum1,axis = 0)

t2.drop("sum1",1).plot(kind = 'bar',stacked= True)

<matplotlib.axes._subplots.AxesSubplot at 0xa8b5d30>

import matplotlib.pyplot as plt
labels=["青年卡","普通卡","金卡"]
y1=t1.loc[:,"青年卡"]#.astype('int')
y2=t1.loc[:,"普通卡"]#.astype('int')
y3=t1.loc[:,"金卡"]#.astype('int')
x=t1.index#.astype('int')
plt.stackplot(x,y1,y2,y3,labels = labels) # 圖表類型
plt.title('髮卡趨勢')
plt.ylabel('髮卡量')
plt.legend(loc = 'upper left')# 標籤位置
plt.show()

?plt.stackplot(x,y1,y2,y3,labels = labels) 
pd.crosstab(card_t.type,card_t.sex).plot(kind='bar')#性別和卡種數量對比
<matplotlib.axes._subplots.AxesSubplot at 0xa9960b8>

xb=pd.crosstab(card_t.type,card_t.sex) #性別和卡種百分比對比
xb.div(xb.sum(1),axis=0).plot(kind='bar')
<matplotlib.axes._subplots.AxesSubplot at 0xaa24860>

from stack2dim import *
stack2dim(card_t,'type','sex') #調用之前寫的庫，查看卡種和性別關係（柱子粗細代表數量，縱座標代表百分比）

card_t['age']=(pd.to_datetime(card_t['issued'])-pd.to_datetime(card_t['birth_date']))
card_t['age1']=card_t['age'].map(lambda x:x.days/365)
import seaborn as sns
sns.boxplot(x='type',y='age1',data=card_t) #查看何須圖，年齡和卡種分佈
<matplotlib.axes._subplots.AxesSubplot at 0xc104940>

trans=pd.read_csv(r"trans.csv",encoding="gbk")
trans.to_sql('trans', con)
card_t.to_sql('card_t', con)
D:\anaconda\lib\site-packages\IPython\core\interactiveshell.py:2785: DtypeWarning: Columns (8) have mixed types. Specify dtype option on import or set low_memory=False.
  interactivity=interactivity, compiler=compiler, result=result)
D:\anaconda\lib\site-packages\pandas\io\sql.py:450: UserWarning: the 'timedelta' type is not supported, and will be written as integer values (ns frequency) to the database.
  chunksize=chunksize, dtype=dtype)

trans.head()

	trans_id	account_id	date	type	operation	amount	balance	k_symbol	bank	account
0	695247	2378	1993-01-01	貸	信貸資金	$700	$700	NaN	NaN	NaN
1	171812	576	1993-01-01	貸	信貸資金	$900	$900	NaN	NaN	NaN
2	207264	704	1993-01-01	貸	信貸資金	$1,000	$1,000	NaN	NaN	NaN
3	1117247	3818	1993-01-01	貸	信貸資金	$600	$600	NaN	NaN	NaN
4	579373	1972	1993-01-02	貸	信貸資金	$400	$400	NaN	NaN	NaN

card_t.head()

	index	card_id	disp_id	issued	type	sex	birth_date	district_id	issued_date	issued_year	age	age1
0	0	1005	9285	1993-11-07	普通卡	女	1968-01-28	74	1993-11-07	1993	9415 days	25.794521
1	1	104	588	1994-01-19	普通卡	女	1960-10-20	61	1994-01-19	1994	12144 days	33.271233
2	2	747	4915	1994-02-05	普通卡	男	1963-07-19	40	1994-02-05	1994	11159 days	30.572603
3	3	70	439	1994-02-08	普通卡	女	1968-09-12	51	1994-02-08	1994	9280 days	25.424658
4	4	577	3687	1994-02-15	普通卡	男	1972-02-06	49	1994-02-15	1994	8045 days	22.041096

car_sql='''

select a.card_id,a.issued,a.type,c.type as t_type,c.amount,c.balance,c.date as t_date

  from card as a

  left join disp as b on a.disp_id=b.disp_id

  left join trans as c on b.account_id=c.account_id

  where b.type="所有者"

  order by a.card_id,c.date

'''  #使用sql連接表
card_t2=pd.read_sql(car_sql, con)
card_t2.head()

	card_id	issued	type	t_type	amount	balance	t_date
0	1	1998-10-16	金卡	貸	$900	$900	1996-11-24
1	1	1998-10-16	金卡	貸	$33975	$34875	1996-12-07
2	1	1998-10-16	金卡	借	$6,600	$28275	1996-12-24
3	1	1998-10-16	金卡	貸	$30	$28305	1996-12-31
4	1	1998-10-16	金卡	貸	$22650	$50955	1997-01-07

card_t2['issued']=pd.to_datetime(card_t2['issued'])
card_t2['t_date']=pd.to_datetime(card_t2['t_date'])
card_t2.head()

	card_id	issued	type	t_type	amount	balance	t_date
0	1	1998-10-16	金卡	貸	$900	$900	1996-11-24
1	1	1998-10-16	金卡	貸	$33975	$34875	1996-12-07
2	1	1998-10-16	金卡	借	$6,600	$28275	1996-12-24
3	1	1998-10-16	金卡	貸	$30	$28305	1996-12-31
4	1	1998-10-16	金卡	貸	$22650	$50955	1997-01-07

card_t2['balance2'] = card_t2['balance'].map(lambda x: int(''.join(x[1:].split(','))))
card_t2['amount2'] = card_t2['amount'].map(lambda x: int(''.join(x[1:].split(','))))
import datetime #辦卡前一年的平均餘額
card_t3 = card_t2[card_t2.issued>card_t2.t_date][card_t2.issued<card_t2.t_date+datetime.timedelta(days=365)]
card_t3.head()
D:\anaconda\lib\site-packages\ipykernel_launcher.py:2: UserWarning: Boolean Series key will be reindexed to match DataFrame index.

	card_id	issued	type	t_type	amount	balance	t_date	balance2	amount2
47	1	1998-10-16	金卡	借	$13600	$57950	1997-10-20	57950	13600
48	1	1998-10-16	金卡	借	$15	$58197	1997-10-31	58197	15
49	1	1998-10-16	金卡	貸	$262	$58212	1997-10-31	58212	262
50	1	1998-10-16	金卡	借	$4,880	$53317	1997-11-06	53317	4880
51	1	1998-10-16	金卡	貸	$22650	$75967	1997-11-07	75967	22650

#card_t3["avg_balance"] = card_t3.groupby('card_id')['balance2'].mean() #生成的series  下面agg生成的dataframe
card_t4=card_t3.groupby(['type','card_id'])['balance2'].agg([('avg_balance','mean')])
card_t4.head()
#card_t4.to_sql('card_t4', con)

		avg_balance
type	card_id
普通卡	2	38898.610169
	4	54222.060976
	7	46888.702381
	9	55854.223881
	10	51387.500000

card_t3.groupby(['type','card_id'])['balance2'].agg([('avg_balance','mean'),'count']).head()

		avg_balance	count
type	card_id
普通卡	2	38898.610169	59
	4	54222.060976	82
	7	46888.702381	84
	9	55854.223881	67
	10	51387.500000	66

card_t5=card_t4.reset_index()  #爲透視表添加行的名，type類型不變
#card_t5=pd.read_sql('select * from card_t4', con)
sns.boxplot(x = 'type', y = 'avg_balance', data = card_t5)
<matplotlib.axes._subplots.AxesSubplot at 0x21afb358>

#%%6、不同類型卡的持卡人在辦卡前一年內的平均收入對比
type_dict = {'借':'out','貸':'income'}
card_t3['type1'] = card_t3.t_type.map(type_dict)
card_t6= card_t3.groupby(['type','card_id','type1'])[['amount2']].sum()
card_t6.head()
card_t6.to_sql('card_t6', con)
card_t7=card_t6.reset_index()
#card_t7=pd.read_sql('select * from card_t6', con)
card_t7.to_sql('card_t7', con)
card_t8=pd.read_sql('select * from card_t7 where type1="income"', con)
card_t6.head()

			amount2
type	card_id	type1
普通卡	2	income	193911
	2	out	196384
	4	income	474142
	4	out	357224
	7	income	299797

sns.boxplot(x = 'type', y = 'amount2', data = card_t8)  #收入和卡種的關係
<matplotlib.axes._subplots.AxesSubplot at 0x15fd6588>

card_t8.head()

	index	type	card_id	type1	amount2
0	0	普通卡	2	income	193911
1	2	普通卡	4	income	474142
2	4	普通卡	7	income	299797
3	6	普通卡	9	income	322548
4	8	普通卡	10	income	644560

card_t9=pd.read_sql('select * from card_t7 where type1="out"', con) #支出和卡種的關係
sns.boxplot(x = 'type', y = 'amount2', data = card_t9)
<matplotlib.axes._subplots.AxesSubplot at 0xa993a58>

信用卡客戶畫像（年齡，性別，收入，支出和信用卡種的關係）

使用c#強大的表達式樹實現對象的深克隆之解決循環引用的問題

痞子衡嵌入式：恩智浦i.MX RT1xxx系列MCU啓動那些事（12.A）- uSDHC eMMC啓動時間(RT1170)

本地SSL證書過期輸入命令在IIS自動生成

.NET週刊【5月第2期 2024-05-12】

mysql 8.0 使用navicate失敗 Client does not support authentication protocol requested by server

mysql 聯合唯一索引和null

Redirecting (meta refresh) scarapy 重定向

二手房房價預測（置信度）

xpath 解析後和原網頁結構不一致

https://yachay.unat.edu.pe/blog/index.php?comment_area=format_blog&comment_component=blog&comment_co

linux以太網驅動總結

信用卡客戶畫像（年齡，性別，收入，支出 和信用卡種的關係）

信用卡客戶畫像（年齡，性別，收入，支出和信用卡種的關係）