python psi指標

def Cal_Psi(score,pre_score,length=10):
    import math
    labels=['c'+str(i) for i in range(length)]
    True_out,bins=pd.qcut(score,q=length,retbins=True,labels=labels)
    bins[0] = bins[0]-0.001 #cut左開右閉,之前最小值再分組後組記號爲空,這裏減0.01劃到最左側區間
    
    Pre_out,bins_=pd.cut(pre_score,bins=bins,retbins=True,labels=labels)
    
    a=pd.DataFrame(pd.Series(True_out).value_counts()).rename(columns={0:'val1'})
    a=a.applymap(lambda y : y/len(a))
    
    b=pd.DataFrame(pd.Series(Pre_out).value_counts()).rename(columns={0:'val2'})
    b=b.applymap(lambda y : y/len(b))
    
    re=pd.merge(a,b,left_index=True,right_index=True)
    
    psi=0
    for i in range(len(re)):
        if re['val1'][i]==0:
            re['val1'][i]=0.000001
        if re['val2'][i]==0:
            re['val2'][i]=0.000001
        p=((re['val2'][i]-re['val1'][i])*(math.log((re['val2'][i]/re['val1'][i]))))
        
        psi=psi+p
    return psi

 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章