使用python 將 SQL腳本中的相關表 提取出來:

使用python 將 SQL腳本中的相關表 提取出來:

#!/usr/bin/env python
# coding=utf-8
import datetime
import re

from requests.auth import HTTPBasicAuth
import pymysql
import pyhive
import sqlalchemy
import pandas
pymysql.install_as_MySQLdb()
from sqlalchemy.engine import create_engine
from sqlalchemy.schema import *
import pandas as pd
import requests
from requests.packages.urllib3.exceptions import InsecureRequestWarning

requests.packages.urllib3.disable_warnings(InsecureRequestWarning)

"""
將 sql_text 這段SQL 腳本中所使用到表提取出來
"""

def extract_tables(sql_str):
    # pattern = r'(?:(?<=from|join)\s+)(?:\w+\.\w+|\w+)'
    pattern = r'(?:(?<=from|join)\s+)(?:\w+\.\w+\.\w+|\w+\.\w+)'   # 經過測試這個比較完美
    list_tables=list(set(re.findall(pattern, str(sql_str), re.I)))
    return list_tables


if __name__ == '__main__':
    pattern = r'(?:(?<=from|join)\s+)(?:\w+\.\w+|\w+)'
    myChannel_sit_Name=""
    myChannel_sit_pwd=""
    engine = create_engine('mysql+pymysql://username:password@localhost/database')
    sql = """select id,title,sql_text  from tableName """
    df = pd.read_sql(sql, myChannel_sit_engine)
    print(df.loc[:,["id","sql_text"]])
    dict_={}
    for row_index, row in  df.loc[:,["id","sql_text"]].iterrows():
        print("*"*10)
        print(row_index)
        sql_text=str(row[1]).replace("\\r\\n"," ").replace("\\t","")
        table_list=extract_tables(sql_text)
        dict_[row[0]]=table_list
    print(dict_)

    inser_tmp="  INSERT INTO test_table_001 (data_api_id, extract_tables) VALUES "
    for k,v in dict_.items():
        inser_tmp+="(\"{}\",\"{}\"), ".format(k,str(v).replace(" ",''))
    inser_str=inser_tmp[:len(inser_tmp) - 2] + ";"
    print(inser_str)
    conn=conn=myChannel_sit_engine.connect()
    conn.execute("DELETE  FROM   db_apple_bbi_sit.test_table_001 ;")
    conn.execute(inser_str)

 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章