科大訊飛實時轉錄 demo 改寫(python3)

#-*- encoding:utf-8 -*-
# import subprocess
# import sys
# import hashlib
# from hashlib import sha1
# import hmac
# import base64
# from socket import *
# import json, time, threading
# from websocket import create_connection
# import websocket
# from urllib import quote
# import logging

# reload(sys)
# sys.setdefaultencoding("utf8")
# logging.basicConfig()

import subprocess
import sys
import hashlib
from hashlib import sha1
import hmac
import base64
from socket import *
import json, time, threading
from websocket import create_connection
import websocket
from urllib.parse import quote
import os
from tkinter import *

audio_filepath_pre = ""  # 正反斜槓都可以
audiopath_midvalue = audio_filepath_pre[0: audio_filepath_pre.rfind('.')]  # 截取掉後綴名
base_url = "wss://rtasr.xfyun.cn/v1/ws"
app_id = ""
api_key = ""
speaker_number = "2" #沒用,沒提供
has_seperate = "true"#沒用,沒提供
# file_path = "./test_1.pcm"

end_tag = "{\"end\": true}"

class Client():
    def __init__(self):
        # audiopath_midvalue = audio_filepath_pre[0: audio_filepath_pre.rfind('.')]  # 截取掉後綴名
        command = "ffmpeg -y -i " + audio_filepath_pre + " -acodec pcm_s16le -f s16le -ac 1 -ar 16000 " + audiopath_midvalue + ".pcm"
        child = subprocess.Popen(command, shell=True)  # 執行
        child.wait()  # 貌似不能省掉,不省又會造成ui無響應(開子線程就可以了),待解決(不等待轉換完成無法發送文件)
        child.kill()

        # 生成鑑權參數

        ts = str(int(time.time()))
        tt = (app_id + ts).encode('utf-8')
        md5 = hashlib.md5()
        md5.update(tt)
        baseString = md5.hexdigest()
        baseString = bytes(baseString, encoding='utf-8')

        apiKey = api_key.encode('utf-8')
        signa = hmac.new(apiKey, baseString, hashlib.sha1).digest()
        signa = base64.b64encode(signa)
        signa = str(signa, 'utf-8')

        self.ws = create_connection(base_url + "?appid=" + app_id + "&ts=" + ts + "&has_seperate" + has_seperate + "&speaker_number" + speaker_number + "&signa=" + quote(signa))  # 建立連接

        self.trecv = threading.Thread(target=self.recv)
        self.trecv.start()
        self.fileHandle = open(audiopath_midvalue + ".txt", 'w')

    # 拼接當前句子的內容,返回片段ID、時間戳和內容

    def joint(self, data):
        jointResult = ""
        data_dict = json.loads(data)
        segId = data_dict["seg_id"]
        cn_dict = data_dict["cn"]
        st_dict = cn_dict["st"]
        rt_list = st_dict["rt"]
        startTime = st_dict["bg"]
        for i in range(len(rt_list)):
            ws_dict = rt_list[i]
            ws_list = ws_dict["ws"]
            for j in range(len(ws_list)):
                cw_dict = ws_list[j]
                cw_list = cw_dict["cw"]
                for k in range(len(cw_list)):
                    w_dict = cw_list[k]
                    w = w_dict["w"]
                    jointResult += w
        return segId, startTime, jointResult

    # 發送音頻文件
    def send(self, file_path):
        file_object = open(file_path, 'rb')
        try:
            index = 1
            while True:
                chunk = file_object.read(1280)
                if not chunk:
                    break
                self.ws.send(chunk)

                index += 1
                time.sleep(0.04)
        finally:
            # print str(index) + ", read len:" + str(len(chunk)) + ", file tell:" + str(file_object.tell())
            file_object.close()

        # self.ws.send(bytes(end_tag))
        self.ws.send(bytes(end_tag.encode('utf-8')))
        print ("send end tag success")

    def recv(self):
        try:
            while self.ws.connected:
                result = str(self.ws.recv())
                if len(result) == 0:
                    print ("receive result end")
                    break
                result_dict = json.loads(result)

                # 解析結果
                if result_dict["action"] == "started":
                    print ("handshake success, result: " + result)

                if result_dict["action"] == "result":
                    print ("rtasr result: " + result)
                    segId, startTime, words = self.joint(result_dict["data"])
                    if segId == 0:  # 第一個句子提前賦值
                        startTimepre = startTime
                        wordspre = words
                    if int(startTime) // 100 != int(startTimepre) // 100:  # 只保留同時間點最後的句子,整除100是因爲時間戳的毫秒級誤差
                        s = int(startTimepre) // 1000  # 秒
                        ss = "%02d" % (s % 60)  # 前補零
                        m = s // 60  # 分
                        mm = "%02d" % (m % 60)
                        h = m // 60  # 時
                        hh = "%02d" % h
                        print(hh + ":" + mm + ":" + ss + "  " + wordspre)
                        self.fileHandle.write(hh + ":" + mm + ":" + ss + "  " + wordspre + '\n')  # 寫入內容

                    startTimepre = startTime
                    wordspre = words

                if result_dict["action"] == "error":
                    print ("rtasr error: " + result)
                    self.ws.close()
                    return
        except websocket.WebSocketConnectionClosedException:
            # 最後一個句子
            s = int(startTime) // 1000  # 秒
            ss = "%02d" % (s % 60)  # 前補零
            m = s // 60  # 分
            mm = "%02d" % (m % 60)
            h = m // 60  # 時
            hh = "%02d" % h
            print(hh + ":" + mm + ":" + ss + "  " + words)
            self.fileHandle.write(hh + ":" + mm + ":" + ss + "  " + words + '\n')  # 寫入內容
            print ("receive result end")
            self.fileHandle.close()  # 關閉文件
            # os.remove(audiopath_midvalue + ".pcm")  # 刪除掉pcm文件

    def close(self):
        self.ws.close()
        print ("connection closed")

if __name__ == '__main__':
    client = Client()
    client.send(audiopath_midvalue + ".pcm")
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章