利用py2neo建立金融知識圖譜(1)

數據來源

選擇tushare的公募基金管理人接口,獲取所需要的數據

import tushare as ts
import pandas as pd
import time
token = ''
ts.set_token(token)
pro = ts.pro_api()
df = pro.fund_company()
# df.to_csv("jijin.csv",encoding="utf_8_sig",index=None)

數據展示:

name,shortname,province,city,address,phone,office,website,chairman,manager,reg_capital,setup_date,end_date,employees,main_business,org_code,credit_code
北京廣能投資基金管理有限公司,廣能基金,北京,北京市,北京市朝陽區北四環中路27號院5號樓2712-2715A,,北京市朝陽區北四環中路27號院5號樓2712-2715A,www.gnfund.cn,劉錫潛,楊運成,10000.0,20111031,,10.0,,584419680,

設計流程

首先建立節點

選擇name,province,manger幾個字段,將其他字段添加爲屬性節省空間。

其次建立關係

name字段爲主鍵,關聯其他字段

代碼

# coding:utf-8
import os
import pandas as pd
import re
from py2neo import Graph, Node, Relationship
'''
MATCH (n)
OPTIONAL MATCH (n)-[r]-()
DELETE n,r
#刪庫demo
'''

def creat_node(file,graph):
    if not os.path.exists(file):
        print('{} 文件不存在'.format(file))
    df=pd.read_csv(file)
    df = df.fillna(value=str('不存在'))
    # for column in list(df.columns)[:]:
    #     a = df[column]
    #     for i in zip(a):
    #         #print(i)
    #         reg = "[^0-9A-Za-z\u4e00-\u9fa5]"
    #         i = re.sub(reg, '', str(i))
    #         #print(i)
    #         node = Node(column,name=i)
    #         if not graph.find_one(label=column, property_key='name', property_value=i):
    #             graph.create(node)
    #             print('創建了新 結點 : {}'.format(node))
    name,shortname = df.name,df.shortname
    province,city =  df.province,df.city
    manager,chairman = df.manager,df.chairman

    for name,shortname,province,city,manager,chairman in zip(df.name,df.shortname,
                                                             df.province,df.city,
                                                             df.manager,df.chairman):
        name_node=Node('名字',
                       name=name,
                       shortname=shortname,
                       )
        province_node = Node('所在地',
                       name=province,
                       city=city,
                       )
        manager_node = Node('總經理',name=manager)
        chairman_node = Node('法人代表', name=chairman)

        if not graph.find_one(label='名字',property_key='name',property_value=name):
            graph.create(name_node)
        if not graph.find_one(label='所在地',property_key='name',property_value=province):
            graph.create(province_node)
        if not graph.find_one(label='總經理',property_key='name',property_value=manager):
            graph.create(manager_node)
        if not graph.find_one(label='法人代表',property_key='name',property_value=chairman):
            graph.create(chairman_node)
        print('創建了新的結點:{}{}{}{}'.format(name_node, province_node, manager_node, chairman_node))
        name_node = graph.find_one(label='名字',property_key='name',property_value=name)
        province_node = graph.find_one(label='所在地',property_key='name',property_value=province)
        manager_node = graph.find_one(label='總經理',property_key='name',property_value=manager)
        chairman_node = graph.find_one(label='法人代表',property_key='name',property_value=chairman)

        relationship1 = Relationship(name_node, '地址', province_node)
        graph.create(relationship1)
        print('新建關係: {}'.format(relationship1))

        relationship2 = Relationship(name_node, '經理人', manager_node)
        graph.create(relationship2)
        print('新建關係: {}'.format(relationship2))

        relationship3 = Relationship(name_node, '法人', chairman_node)
        graph.create(relationship3)
        print('新建關係: {}'.format(relationship3))


if __name__=='__main__':
    #graph = Graph(password="")
    graph = Graph('http://:7474', username='neo4j', password='')
    chess_file = 'jijin.csv'
    creat_node(chess_file,graph)

效果

在這裏插入圖片描述

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章