百度AI智能學習

春色將闌,鶯聲漸老,紅英落盡青梅小。畫堂人靜雨濛濛,屏山半掩餘香嫋。
密約沉沉,離情杳杳,菱花塵滿慵將照。倚樓無語欲銷魂,長空黯淡連芳草。

首先安裝python包

pip3 install baidu-aip

再註冊百度AI得到相關參數

https://ai.baidu.com/

一、語音合成

from aip import AipSpeech

APP_ID = '14446020'
API_KEY = 'GnaoLWrIiTKP10disiDHMiNZ'
SECRET_KEY = 'FYaMNBsH5NFsgWcRsyBfaHDV70MvvE6u'

#實例化AipSpeech對象
client = AipSpeech(APP_ID, API_KEY, SECRET_KEY)

#調用方法語音合成
result = client.synthesis("歡迎來到王者榮耀",
                          options={
                              "per": 1,
                              "spd": 9,
                              "pit": 9,
                              "vol": 15,
                          })
if not isinstance(result, dict):
    with open('auido.mp3', 'wb') as f:
        f.write(result)

二、語音識別

from aip import AipSpeech
import os

APP_ID = '14446020'
API_KEY = 'GnaoLWrIiTKP10disiDHMiNZ'
SECRET_KEY = 'FYaMNBsH5NFsgWcRsyBfaHDV70MvvE6u'

client = AipSpeech(APP_ID, API_KEY, SECRET_KEY)

def get_file_content(filePath):
    cmd_str = f"ffmpeg -y -i {filePath} -acodec pcm_s16le -f s16le -ac 1 -ar 16000 {filePath}.pcm"
    os.system(cmd_str)
    with open(f"{filePath}.pcm", 'rb') as fp:
        return fp.read()

res = client.asr(speech=get_file_content("auido.mp3"),options={
    "dev_pid":1536,
})

print(res["result"][0])

這裏用到一個格式轉換的軟件,百度地址:
https://pan.baidu.com/s/1MadxSh-A0Pzo1Su_wKdktQ 提取碼:x5xi
固定的格式轉換命令:(需要將bin文件添加環境變量,在cmd中執行)
ffmpeg -y -i filePath -acodec pcm_s16le -f s16le -ac 1 -ar 16000 filePath.pcm

三、短文本相似度

from aip import AipNlp

APP_ID = '14446020'
API_KEY = 'GnaoLWrIiTKP10disiDHMiNZ'
SECRET_KEY = 'FYaMNBsH5NFsgWcRsyBfaHDV70MvvE6u'
client = AipNlp(APP_ID,API_KEY,SECRET_KEY)

ret = client.simnet("你今年幾歲了?","多大年齡了?")
print(ret)

{'log_id': 4545309161914786697, 'texts': {'text_2': '多大年齡了?', 'text_1': '你今年幾歲了?'}, 'score': 0.742316} score 是兩個測試的短文本相似度,一般大於0.72的兩個短文本的意思是相似的句子!

四、代碼實現對接圖靈

import requests

def tuling_test(question):
    url = "http://openapi.tuling123.com/openapi/api/v2"
    data = {
        "reqType":0,
        "perception": {
            "inputText": {
                "text": question
            },
            "inputImage": {
            },
        },
        "userInfo": {
            "apiKey": "2f4e809b8b3049ce82a6b4787bad65bb",
            "userId": "wangjifei"
        }
    }

    return requests.post(url=url,json=data).json()

ret = tuling_test("心情不好")
print(ret.get("results")[0]["values"]["text"])

五、簡單實現人機交流

  • 基本步驟:
    用戶錄製音頻---傳入函數---格式轉化---語音識別---匹配答案---語音合成---語音文件流寫入文件---os執行文件---刪除文件
from aip import AipSpeech
from aip import AipNlp
from uuid import uuid4
import os
import requests
import time

APP_ID = '14446007'
API_KEY = 'QrQWLLg5a8qld7Qty7avqCGC'
SECRET_KEY = 'O5mE31LSl17hm8NRYyf9PwlE5Byqm0nr'

client = AipSpeech(APP_ID, API_KEY, SECRET_KEY)
nlp_client = AipNlp(APP_ID, API_KEY, SECRET_KEY)


def tuling_test(question):
    """接入圖靈,爲問題匹配答案"""
    url = "http://openapi.tuling123.com/openapi/api/v2"
    data = {
        "reqType": 0,
        "perception": {
            "inputText": {
                "text": question
            },
            "inputImage": {
            },
        },
        "userInfo": {
            "apiKey": "2f4e809b8b3049ce82a6b4787bad65bb",
            "userId": "wangjifei"
        }
    }
    ret = requests.post(url=url, json=data).json()
    return ret.get("results")[0]["values"]["text"]


def get_file_content(filePath):
    """音頻的格式轉換"""
    cmd_str = f"ffmpeg -y -i {filePath} -acodec pcm_s16le -f s16le -ac 1 -ar 16000 {filePath}.pcm"
    os.system(cmd_str)
    with open(f"{filePath}.pcm", 'rb') as fp:
        return fp.read()


def custom_reply(text):
    """根據問題得到相應的答案,可以通過短文本相似來自定義,也可以調用圖靈問題庫"""
    if nlp_client.simnet("你叫什麼名字", text).get("score") >= 0.72:
        return "我不能告訴你"

    return tuling_test(text)


def learn_say(file_name):
    """機器人學說話"""

    # 語音識別成文字
    res = client.asr(speech=get_file_content(file_name), options={
        "dev_pid": 1536,
    })
    os.remove(f"{file_name}.pcm")
    text = res.get("result")[0]
    # 根據問題得到相關答案
    text1 = custom_reply(text)
    # 答案語音合成
    res_audio = client.synthesis(text1, options={
        "vol": 8,
        "pit": 8,
        "spd": 5,
        "per": 4
    })
    # 通過uuid 生成文件名
    ret_file_name = f"{uuid4()}.mp3"
    # 將生成的語音流寫入文件中
    with open(ret_file_name, "wb") as f:
        f.write(res_audio)
    # 執行音頻文件
    ret = os.system(ret_file_name)
    time.sleep(2)
    os.remove(ret_file_name)

if __name__ == '__main__':
    learn_say("auido.m4a")

六、網頁版智能機器人對話

flask_ws.py

from flask import Flask, request, render_template
from uuid import uuid4
from geventwebsocket.websocket import WebSocket
from gevent.pywsgi import WSGIServer
from geventwebsocket.handler import WebSocketHandler
from learn_say import learn_say

app = Flask(__name__)  # type:Flask

@app.route("/ws")
def ws():
    user_socket = request.environ.get("wsgi.websocket")  # type:WebSocket

    while True:
        msg = user_socket.receive()
        q_file_name = f"{uuid4()}.wav"
        with open(q_file_name, "wb") as f:
            f.write(msg)

        ret_file_name = learn_say(q_file_name)

        user_socket.send(ret_file_name)

if __name__ == '__main__':
    http_serv = WSGIServer(("127.0.0.1", 8006), app, handler_class=WebSocketHandler)
    http_serv.serve_forever()

flask_app.py

from flask import Flask, request, render_template, send_file

app = Flask(__name__)  # type:Flask

@app.route("/index")
def index():
    return render_template("index.html")

@app.route("/get_audio/<audio_name>")
def get_audio(audio_name):
    return send_file(audio_name)

if __name__ == '__main__':
    app.run("127.0.0.1", 8008, debug=True)

learn_say.py

from aip import AipSpeech
from aip import AipNlp
from uuid import uuid4
import os
import requests
import time

APP_ID = '14446007'
API_KEY = 'QrQWLLg5a8qld7Qty7avqCGC'
SECRET_KEY = 'O5mE31LSl17hm8NRYyf9PwlE5Byqm0nr'

client = AipSpeech(APP_ID, API_KEY, SECRET_KEY)
nlp_client = AipNlp(APP_ID, API_KEY, SECRET_KEY)


def tuling_test(question):
    """接入圖靈,爲問題匹配答案"""
    url = "http://openapi.tuling123.com/openapi/api/v2"
    data = {
        "reqType": 0,
        "perception": {
            "inputText": {
                "text": question
            },
            "inputImage": {
            },
        },
        "userInfo": {
            "apiKey": "2f4e809b8b3049ce82a6b4787bad65bb",
            "userId": "wangjifei"
        }
    }
    ret = requests.post(url=url, json=data).json()
    return ret.get("results")[0]["values"]["text"]


def get_file_content(filePath):
    """音頻的格式轉換"""
    cmd_str = f"ffmpeg -y -i {filePath} -acodec pcm_s16le -f s16le -ac 1 -ar 16000 {filePath}.pcm"
    os.system(cmd_str)
    with open(f"{filePath}.pcm", 'rb') as fp:
        return fp.read()



def custom_reply(text):
    """根據問題得到相應的答案,可以通過短文本相似來自定義,也可以調用圖靈問題庫"""
    if nlp_client.simnet("你叫什麼名字", text).get("score") >= 0.72:
        return "我不能告訴你"

    return tuling_test(text)


def learn_say(file_name):
    """機器人學說話"""

    # 語音識別成文字
    res = client.asr(speech=get_file_content(file_name), options={
        "dev_pid": 1536,
    })
    os.remove(file_name)
    os.remove(f"{file_name}.pcm")
    text = res.get("result")[0]
    # 根據問題得到相關答案
    text1 = custom_reply(text)
    # 答案語音合成
    res_audio = client.synthesis(text1, options={
        "vol": 8,
        "pit": 8,
        "spd": 5,
        "per": 4
    })
    # 通過uuid 生成文件名
    ret_file_name = f"{uuid4()}.mp3"
    # 將生成的語音流寫入文件中
    with open(ret_file_name, "wb") as f:
        f.write(res_audio)

    return ret_file_name

index.html

<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <title>Title</title>

</head>
<body>
<audio src="" autoplay controls id="player"></audio>

<button onclick="start_reco()">錄製消息</button>
<br>
<button onclick="stop_reco()">發送語音消息</button>

</body>
<script src="/static/Recorder.js"></script>
<script type="application/javascript">
    var serv = "http://127.0.0.1:8008";
    var ws_serv = "ws://127.0.0.1:8006/ws";

    var get_music = serv + "/get_audio/";
    var ws = new WebSocket(ws_serv);
    ws.onmessage = function (data) {
        document.getElementById("player").src = get_music + data.data
    };


    var reco = null;
    var audio_context = new AudioContext();
    navigator.getUserMedia = (navigator.getUserMedia ||
        navigator.webkitGetUserMedia ||
        navigator.mozGetUserMedia ||
        navigator.msGetUserMedia);

    navigator.getUserMedia({audio: true}, create_stream, function (err) {
        console.log(err)
    });

    function create_stream(user_media) {
        var stream_input = audio_context.createMediaStreamSource(user_media);
        reco = new Recorder(stream_input);
    }

    //錄製消息
    function start_reco() {
        reco.record();
    }
    //先停止錄製,再獲取音頻
    function stop_reco() {
        reco.stop();
        get_audio();
        reco.clear();
    }
    //獲取音頻,發送音頻
    function get_audio() {
        reco.exportWAV(function (wav_file) {
            // wav_file = Blob對象
            ws.send(wav_file);
        })
    }
</script>
</html>
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章