春色將闌,鶯聲漸老,紅英落盡青梅小。畫堂人靜雨濛濛,屏山半掩餘香嫋。
密約沉沉,離情杳杳,菱花塵滿慵將照。倚樓無語欲銷魂,長空黯淡連芳草。
首先安裝python包
pip3 install baidu-aip
再註冊百度AI得到相關參數
一、語音合成
from aip import AipSpeech
APP_ID = '14446020'
API_KEY = 'GnaoLWrIiTKP10disiDHMiNZ'
SECRET_KEY = 'FYaMNBsH5NFsgWcRsyBfaHDV70MvvE6u'
#實例化AipSpeech對象
client = AipSpeech(APP_ID, API_KEY, SECRET_KEY)
#調用方法語音合成
result = client.synthesis("歡迎來到王者榮耀",
options={
"per": 1,
"spd": 9,
"pit": 9,
"vol": 15,
})
if not isinstance(result, dict):
with open('auido.mp3', 'wb') as f:
f.write(result)
二、語音識別
from aip import AipSpeech
import os
APP_ID = '14446020'
API_KEY = 'GnaoLWrIiTKP10disiDHMiNZ'
SECRET_KEY = 'FYaMNBsH5NFsgWcRsyBfaHDV70MvvE6u'
client = AipSpeech(APP_ID, API_KEY, SECRET_KEY)
def get_file_content(filePath):
cmd_str = f"ffmpeg -y -i {filePath} -acodec pcm_s16le -f s16le -ac 1 -ar 16000 {filePath}.pcm"
os.system(cmd_str)
with open(f"{filePath}.pcm", 'rb') as fp:
return fp.read()
res = client.asr(speech=get_file_content("auido.mp3"),options={
"dev_pid":1536,
})
print(res["result"][0])
這裏用到一個格式轉換的軟件,百度地址:
https://pan.baidu.com/s/1MadxSh-A0Pzo1Su_wKdktQ 提取碼:x5xi
固定的格式轉換命令:(需要將bin文件添加環境變量,在cmd中執行)
ffmpeg -y -i filePath -acodec pcm_s16le -f s16le -ac 1 -ar 16000 filePath.pcm
三、短文本相似度
from aip import AipNlp
APP_ID = '14446020'
API_KEY = 'GnaoLWrIiTKP10disiDHMiNZ'
SECRET_KEY = 'FYaMNBsH5NFsgWcRsyBfaHDV70MvvE6u'
client = AipNlp(APP_ID,API_KEY,SECRET_KEY)
ret = client.simnet("你今年幾歲了?","多大年齡了?")
print(ret)
{'log_id': 4545309161914786697, 'texts': {'text_2': '多大年齡了?', 'text_1': '你今年幾歲了?'}, 'score': 0.742316} score 是兩個測試的短文本相似度,一般大於0.72的兩個短文本的意思是相似的句子!
四、代碼實現對接圖靈
import requests
def tuling_test(question):
url = "http://openapi.tuling123.com/openapi/api/v2"
data = {
"reqType":0,
"perception": {
"inputText": {
"text": question
},
"inputImage": {
},
},
"userInfo": {
"apiKey": "2f4e809b8b3049ce82a6b4787bad65bb",
"userId": "wangjifei"
}
}
return requests.post(url=url,json=data).json()
ret = tuling_test("心情不好")
print(ret.get("results")[0]["values"]["text"])
五、簡單實現人機交流
- 基本步驟:
用戶錄製音頻---傳入函數---格式轉化---語音識別---匹配答案---語音合成---語音文件流寫入文件---os執行文件---刪除文件
from aip import AipSpeech
from aip import AipNlp
from uuid import uuid4
import os
import requests
import time
APP_ID = '14446007'
API_KEY = 'QrQWLLg5a8qld7Qty7avqCGC'
SECRET_KEY = 'O5mE31LSl17hm8NRYyf9PwlE5Byqm0nr'
client = AipSpeech(APP_ID, API_KEY, SECRET_KEY)
nlp_client = AipNlp(APP_ID, API_KEY, SECRET_KEY)
def tuling_test(question):
"""接入圖靈,爲問題匹配答案"""
url = "http://openapi.tuling123.com/openapi/api/v2"
data = {
"reqType": 0,
"perception": {
"inputText": {
"text": question
},
"inputImage": {
},
},
"userInfo": {
"apiKey": "2f4e809b8b3049ce82a6b4787bad65bb",
"userId": "wangjifei"
}
}
ret = requests.post(url=url, json=data).json()
return ret.get("results")[0]["values"]["text"]
def get_file_content(filePath):
"""音頻的格式轉換"""
cmd_str = f"ffmpeg -y -i {filePath} -acodec pcm_s16le -f s16le -ac 1 -ar 16000 {filePath}.pcm"
os.system(cmd_str)
with open(f"{filePath}.pcm", 'rb') as fp:
return fp.read()
def custom_reply(text):
"""根據問題得到相應的答案,可以通過短文本相似來自定義,也可以調用圖靈問題庫"""
if nlp_client.simnet("你叫什麼名字", text).get("score") >= 0.72:
return "我不能告訴你"
return tuling_test(text)
def learn_say(file_name):
"""機器人學說話"""
# 語音識別成文字
res = client.asr(speech=get_file_content(file_name), options={
"dev_pid": 1536,
})
os.remove(f"{file_name}.pcm")
text = res.get("result")[0]
# 根據問題得到相關答案
text1 = custom_reply(text)
# 答案語音合成
res_audio = client.synthesis(text1, options={
"vol": 8,
"pit": 8,
"spd": 5,
"per": 4
})
# 通過uuid 生成文件名
ret_file_name = f"{uuid4()}.mp3"
# 將生成的語音流寫入文件中
with open(ret_file_name, "wb") as f:
f.write(res_audio)
# 執行音頻文件
ret = os.system(ret_file_name)
time.sleep(2)
os.remove(ret_file_name)
if __name__ == '__main__':
learn_say("auido.m4a")
六、網頁版智能機器人對話
flask_ws.py
from flask import Flask, request, render_template
from uuid import uuid4
from geventwebsocket.websocket import WebSocket
from gevent.pywsgi import WSGIServer
from geventwebsocket.handler import WebSocketHandler
from learn_say import learn_say
app = Flask(__name__) # type:Flask
@app.route("/ws")
def ws():
user_socket = request.environ.get("wsgi.websocket") # type:WebSocket
while True:
msg = user_socket.receive()
q_file_name = f"{uuid4()}.wav"
with open(q_file_name, "wb") as f:
f.write(msg)
ret_file_name = learn_say(q_file_name)
user_socket.send(ret_file_name)
if __name__ == '__main__':
http_serv = WSGIServer(("127.0.0.1", 8006), app, handler_class=WebSocketHandler)
http_serv.serve_forever()
flask_app.py
from flask import Flask, request, render_template, send_file
app = Flask(__name__) # type:Flask
@app.route("/index")
def index():
return render_template("index.html")
@app.route("/get_audio/<audio_name>")
def get_audio(audio_name):
return send_file(audio_name)
if __name__ == '__main__':
app.run("127.0.0.1", 8008, debug=True)
learn_say.py
from aip import AipSpeech
from aip import AipNlp
from uuid import uuid4
import os
import requests
import time
APP_ID = '14446007'
API_KEY = 'QrQWLLg5a8qld7Qty7avqCGC'
SECRET_KEY = 'O5mE31LSl17hm8NRYyf9PwlE5Byqm0nr'
client = AipSpeech(APP_ID, API_KEY, SECRET_KEY)
nlp_client = AipNlp(APP_ID, API_KEY, SECRET_KEY)
def tuling_test(question):
"""接入圖靈,爲問題匹配答案"""
url = "http://openapi.tuling123.com/openapi/api/v2"
data = {
"reqType": 0,
"perception": {
"inputText": {
"text": question
},
"inputImage": {
},
},
"userInfo": {
"apiKey": "2f4e809b8b3049ce82a6b4787bad65bb",
"userId": "wangjifei"
}
}
ret = requests.post(url=url, json=data).json()
return ret.get("results")[0]["values"]["text"]
def get_file_content(filePath):
"""音頻的格式轉換"""
cmd_str = f"ffmpeg -y -i {filePath} -acodec pcm_s16le -f s16le -ac 1 -ar 16000 {filePath}.pcm"
os.system(cmd_str)
with open(f"{filePath}.pcm", 'rb') as fp:
return fp.read()
def custom_reply(text):
"""根據問題得到相應的答案,可以通過短文本相似來自定義,也可以調用圖靈問題庫"""
if nlp_client.simnet("你叫什麼名字", text).get("score") >= 0.72:
return "我不能告訴你"
return tuling_test(text)
def learn_say(file_name):
"""機器人學說話"""
# 語音識別成文字
res = client.asr(speech=get_file_content(file_name), options={
"dev_pid": 1536,
})
os.remove(file_name)
os.remove(f"{file_name}.pcm")
text = res.get("result")[0]
# 根據問題得到相關答案
text1 = custom_reply(text)
# 答案語音合成
res_audio = client.synthesis(text1, options={
"vol": 8,
"pit": 8,
"spd": 5,
"per": 4
})
# 通過uuid 生成文件名
ret_file_name = f"{uuid4()}.mp3"
# 將生成的語音流寫入文件中
with open(ret_file_name, "wb") as f:
f.write(res_audio)
return ret_file_name
index.html
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>Title</title>
</head>
<body>
<audio src="" autoplay controls id="player"></audio>
<button onclick="start_reco()">錄製消息</button>
<br>
<button onclick="stop_reco()">發送語音消息</button>
</body>
<script src="/static/Recorder.js"></script>
<script type="application/javascript">
var serv = "http://127.0.0.1:8008";
var ws_serv = "ws://127.0.0.1:8006/ws";
var get_music = serv + "/get_audio/";
var ws = new WebSocket(ws_serv);
ws.onmessage = function (data) {
document.getElementById("player").src = get_music + data.data
};
var reco = null;
var audio_context = new AudioContext();
navigator.getUserMedia = (navigator.getUserMedia ||
navigator.webkitGetUserMedia ||
navigator.mozGetUserMedia ||
navigator.msGetUserMedia);
navigator.getUserMedia({audio: true}, create_stream, function (err) {
console.log(err)
});
function create_stream(user_media) {
var stream_input = audio_context.createMediaStreamSource(user_media);
reco = new Recorder(stream_input);
}
//錄製消息
function start_reco() {
reco.record();
}
//先停止錄製,再獲取音頻
function stop_reco() {
reco.stop();
get_audio();
reco.clear();
}
//獲取音頻,發送音頻
function get_audio() {
reco.exportWAV(function (wav_file) {
// wav_file = Blob對象
ws.send(wav_file);
})
}
</script>
</html>