股票数据分析(雷达图)--python

csv数据下载连接:https://pan.baidu.com/s/1KTS5WzfH4z9Y4U4rIG-3Ig
在这里插入图片描述

代码:

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.cluster import KMeans # 引入机器学习


# 用来正常显示中文
plt.rcParams['font.sans-serif'] = ['SimHei']
# 用来正常显示符号
plt.rcParams['axes.unicode_minus'] = False

class GuPiaoData():

    def detedData(self,filePath): # 探索数据
        df = pd.read_csv(filePath)
        # print(df)
        describe = df.describe(include='all')
        # print(describe.T)
        df.to_excel('data/Gupiaodata01.xls')
        describe.T.to_excel('data/gupiaodata_describe.xls')
    def cleanData(self,filePath):
        '''
        清洗无效数据:空值行,不在合理的数据范围的行
        :param filepath:
        :return:
        '''
        df = pd.read_excel(filePath)
        #过滤非法值
        filter1 = df['Turnover rate'].notnull()#转手率
        filter2 = df['Range of Rise and Fall'].notnull() # 涨跌幅
        filter3 = df['Price-earning ratio'].notnull() #市盈率
        filter4 = df['Price-to-book ratio'].notnull()  # 市净率
        filter5 = df['Marketing rate'].notnull()  # 市销率
        filter6 = df['Realization rate'].notnull()  # 市现率
        filters = filter1 & filter2 & filter3 & filter4 & filter5 & filter6
        df=df[filters]
        df.to_excel('data/gupiaodata_clean.xls')
    def chooseData(self,filepath):
        '''
        从清洗后的数据中选取需要使用的列
        :param filepath: 清洗完成之后保存的数据文件路径
        :return:
        '''
        df = pd.read_excel(filepath)
        df =df[['Turnover rate','Range of Rise and Fall','Price-earning ratio',
                'Price-to-book ratio','Marketing rate','Realization rate']]
        df.to_excel('data/Gupiaodata_coredata.xls')
    def transformData(self,filePath):
        #对数据进行转换
        df = pd.read_excel(filePath)
        df['换手率'] = df['Turnover rate']
        df['涨跌率'] = df['Range of Rise and Fall']
        df['市盈率'] = df['Price-earning ratio']
        df['市净率'] = df['Price-to-book ratio']
        df['市销率'] = df['Marketing rate']
        df['市现率']  = df['Realization rate']

        df = df[['换手率','涨跌率','市盈率','市净率','市销率','市现率']]
        df.to_excel('data/Gupiaodata_coretransformdata.xls')
    def standarData(self,filepath):
        '''
        一般标准化的方式:(原数据-平均值)/标准差
        :param filepath:
        :return:
        '''
        df = pd.read_excel(filepath)
        df = (df - np.mean(df,axis=0))/np.std(df,axis=0)
        df[['换手率','涨跌率','市盈率','市净率','市销率','市现率']].to_excel('data/Gupiao_stdcoredata.xls')
        pass
    def classifyData(self,filepath,k =5):

        df = pd.read_excel(filepath)
        kmeans = KMeans(k)
        kmeans.fit(df[['换手率','涨跌率','市盈率','市净率','市销率','市现率']])
        print(kmeans.cluster_centers_)
        print(kmeans.labels_)
        # return kmeans.cluster_centers_()
        df['label'] = kmeans.labels_
        #df.to_excel('data/air_result.xls')
        coreData = pd.DataFrame(kmeans.cluster_centers_)
        #coreData.to_excel('data/air_core.xls')

        # 绘制雷达图
        # 组织数据
        #构造x轴值
        xdata = np.linspace(0,2*np.pi,k,endpoint=False)
        xdata = np.concatenate((xdata, [xdata[0]]))

        ydata1 = np.concatenate((coreData[0],[coreData[0][0]]))
        ydata2= np.concatenate((coreData[1], [coreData[1][0]]))
        ydata3 = np.concatenate((coreData[2], [coreData[2][0]]))
        ydata4 = np.concatenate((coreData[3], [coreData[3][0]]))
        ydata5 = np.concatenate((coreData[4], [coreData[4][0]]))
        ydata6 = np.concatenate((coreData[5], [coreData[5][0]]))

        fig = plt.figure()
        ax = fig.add_subplot(111,polar=True)

        ax.plot(xdata,ydata1,  'r--', linewidth=1,label='换手率')
        ax.plot(xdata, ydata2, 'g--', linewidth=1, label='涨跌率')
        ax.plot(xdata, ydata3, 'b--', linewidth=1, label='市盈率')
        ax.plot(xdata, ydata4, 'o--', linewidth=1, label='市净率')
        ax.plot(xdata, ydata5, 'y--', linewidth=1, label='市销率')
        ax.plot(xdata, ydata6, 'b--', linewidth=1, label='市现率')

        ax.set_thetagrids(xdata*180/np.pi,['换手率','涨跌率','市盈率','市净率','市销率','市现率'])

        ax.set_rlim(-2,4)
        plt.legend(loc = 'best')
        plt.show()


if __name__ == '__main__':
    gupiao = GuPiaoData()
    # gupiao.detedData('szgupiaodata.csv')
    # gupiao.cleanData('data/Gupiaodata01.xls')
    # gupiao.chooseData('data/Gupiaodata_clean.xls')
    #gupiao.transformData('data/Gupiaodata_coredata.xls')
    # gupiao.standarData('data/Gupiaodata_coretransformdata.xls')
    gupiao.classifyData('data/Gupiao_stdcoredata.xls',k=6)
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章