樹莓派學習之旅(4)— 綜合心知天氣、語言喚醒和百度API做一個語音天氣助手

樹莓派學習之旅(4)— 綜合心知天氣、語言喚醒和百度API做一個語音天氣助手

一、修改代碼

在之前博客(樹莓派學習之旅(3)— 使用 snowboy 做語音喚醒功能)的基礎上,對代碼進行修改。
因爲在打開聲卡的過程中需要消耗比較多的時間,所以我們在打開聲卡後,打印一行提示符,
在這裏插入圖片描述
因爲我們之後要使用聲卡播放聲音,所以將聲卡的output設爲True,
在這裏插入圖片描述
然後我們將回調函數參數修改,不再使用他提供的回調函數
在這裏插入圖片描述
接下來我們就來完成這個回調函數,首先,我們需要將聲卡關閉,因爲語音喚醒的聲卡是使用回調函數的,我們不能讀寫,所以重新開啓一次聲卡

def detected_callback():
    detector.terminate()        # 關閉聲卡
    # 再次打開聲卡
    stream = detector.audio.open(format = paInt16, channels = 1,rate = 16000, input = True,output=True, frames_per_buffer = 2048) 

然後播放 “你好,主人” 的語音,播放完後開始錄音,這個在之前的博客說過:樹莓派學習之旅(2)— 使用USB聲卡進行錄音

    print("你好,主人!")
    play_audio(stream,"wav/nihaozhuren.wav")    # 播放 主人你好
    record(stream)                              # 錄音

錄音好後通過百度API進行語言識別:Python開發之路(2)— 使用百度API實現語音識別

    restr = baidu_api.baisu_asr(token)
    print(restr)

通過語音識別得到的結果,然後判斷該做出什麼反應,並使用百度語音合成,然後播放:Python開發之路(3)— 使用百度API實現語音合成

    if "天氣" in restr:
        weather = baidu_api.get_weather()
        spk_str = "今天天氣:%s。氣溫:%s攝氏度。"%(weather["text"],weather["temperature"])
        print(spk_str)
        baidu_api.baidu_tts(spk_str,token)
        play_audio(stream,"wav/result.wav") 
    elif( ("開燈" in restr) or ("把燈打開" in restr) ):
        spk_str = "主人。燈已打開"
        print(spk_str)
        baidu_api.baidu_tts(spk_str,token)
        play_audio(stream,"wav/result.wav") 
    stream.close()

二、代碼

1、demo.py的代碼

import snowboydecoder
import sys,wave,os,time
import signal
from tqdm import tqdm
from pyaudio import PyAudio,paInt16
import baidu_api

interrupted = False


def signal_handler(signal, frame):
    global interrupted
    interrupted = True


def interrupt_callback():
    global interrupted
    return interrupted

if len(sys.argv) == 1:
    print("Error: need to specify model name")
    print("Usage: python demo.py your.model")
    sys.exit(-1)

model = sys.argv[1]

# capture SIGINT signal, e.g., Ctrl+C
signal.signal(signal.SIGINT, signal_handler)

token = baidu_api.get_token()

detector = snowboydecoder.HotwordDetector(model, sensitivity=0.5)
print('Press Ctrl+C to exit')

def play_audio(stream,filename):
    stream.start_stream()
    wf = wave.open(filename, 'rb')
    while True:
        data = wf.readframes(2048)
        if data == b"": break
        stream.write(data)
    stream.stop_stream()
    time.sleep(0.01)
    wf.close()

def record(stream):  
    stream.start_stream() 
    record_buf = []
    # 開始採樣
    for i in tqdm(range( 8 * 4 )):          # 錄音5秒
        audio_data = stream.read(2048)      # 讀出聲卡緩衝區的音頻數據
        record_buf.append(audio_data)       # 將讀出的音頻數據追加到record_buf列表

    wf = wave.open("01.wav", 'wb')          # 創建一個音頻文件,名字爲“01.wav"
    wf.setnchannels(1)                      # 設置聲道數爲2
    wf.setsampwidth(2)                      # 設置採樣深度爲2個字節,即16位
    wf.setframerate(16000)                  # 設置採樣率爲16000
    # 將數據寫入創建的音頻文件
    wf.writeframes("".encode().join(record_buf))
    # 寫完後將文件關閉
    wf.close()
    stream.stop_stream()
    time.sleep(0.01)

def detected_callback():
    detector.terminate()        # 關閉聲卡
    # 再次打開聲卡
    stream = detector.audio.open(format = paInt16, channels = 1,rate = 16000, input = True,output=True, frames_per_buffer = 2048)  
    print("你好,主人!")
    play_audio(stream,"wav/nihaozhuren.wav")    # 播放 主人你好
    record(stream)                              # 錄音
    restr = baidu_api.baisu_asr(token)
    print(restr)
    if "天氣" in restr:
        weather = baidu_api.get_weather()
        spk_str = "今天天氣:%s。氣溫:%s攝氏度。"%(weather["text"],weather["temperature"])
        print(spk_str)
        baidu_api.baidu_tts(spk_str,token)
        play_audio(stream,"wav/result.wav") 
    elif( ("開燈" in restr) or ("把燈打開" in restr) ):
        spk_str = "主人。燈已打開"
        print(spk_str)
        baidu_api.baidu_tts(spk_str,token)
        play_audio(stream,"wav/result.wav")
    stream.close()
    #detector.start(detected_callback=detected_callback,interrupt_check=interrupt_callback,sleep_time=0.03)
 

# main loop
detector.start(detected_callback=detected_callback,
               interrupt_check=interrupt_callback,
               sleep_time=0.03)

detector.terminate()


2、baidu_api.py的代碼

#!/usr/bin/env python3
# -*- coding:utf-8 -*-
# Author: William

# encoding:utf-8

import json
from urllib import request,parse

def get_token():
    API_Key = "DF2wS4DQ53TlS8ATxasy0ZXv"            # 官網獲取的API_Key
    Secret_Key = "GvADiMXnwATEhaiKuOXg3t37KnKClGWr" # 爲官網獲取的Secret_Key
    #拼接得到Url
    Url = "https://aip.baidubce.com/oauth/2.0/token?grant_type=client_credentials&client_id="+API_Key+"&client_secret="+Secret_Key
    try:
        resp = request.urlopen(Url)
        result = json.loads(resp.read().decode('utf-8'))
        # 打印access_token
        print("access_token:",result['access_token'])
        return result['access_token']
    except request.URLError as err:
        print('token http response http code : ' + str(err.code))

def baisu_asr(token):
    # 2、打開需要識別的語音文件
    speech_data = []
    with open("01.wav", 'rb') as speech_file:
        speech_data = speech_file.read()
    length = len(speech_data)
    if length == 0:
        print('file 01.wav length read 0 bytes')

    # 3、設置Url裏的參數
    params = {'cuid': "12345678python", # 用戶唯一標識,用來區分用戶,長度爲60字符以內。
              'token': token,           # 我們獲取到的 Access Token
              'dev_pid': 1537 }         # 1537 表示識別普通話
    # 將參數編碼
    params_query = parse.urlencode(params)
    # 拼接成一個我們需要的完整的完整的url
    Url = 'http://vop.baidu.com/server_api' + "?" + params_query

    # 4、設置請求頭
    headers = {
        'Content-Type': 'audio/wav; rate=16000',    # 採樣率和文件格式
        'Content-Length': length
    }

    # 5、發送請求,音頻數據直接放在body中
    # 構建Request對象
    req = request.Request(Url, speech_data, headers)
    # 發送請求
    res_f = request.urlopen(req)
    result = json.loads(res_f.read().decode('utf-8'))
    print(result)
    return result['result'][0]

def baidu_tts(TEXT,token):
    # 2、將需要合成的文字做2次urlencode編碼
    tex = parse.quote_plus(TEXT)  # 兩次urlencode
    # 3、設置文本以及其他參數
    params = {'tok': token,     # 開放平臺獲取到的開發者access_token
              'tex': tex,       # 合成的文本,使用UTF-8編碼。小於2048箇中文字或者英文數字
              'per': 4,         # 發音人選擇, 基礎音庫:0爲度小美,1爲度小宇,3爲度逍遙,4爲度丫丫,
              'spd': 5,         # 語速,取值0-15,默認爲5中語速
              'pit': 5,         # 音調,取值0-15,默認爲5中語調
              'vol': 5,         # 音量,取值0-15,默認爲5中音量
              'aue': 6,         # 下載的文件格式, 3爲mp3格式(默認); 4爲pcm-16k; 5爲pcm-8k; 6爲wav(內容同pcm-16k)
              'cuid': "7749py", # 用戶唯一標識
              'lan': 'zh', 'ctp': 1}  # lan ctp 固定參數
    # 4、將參數編碼,然後放入body,生成Request對象
    data = parse.urlencode(params)
    req = request.Request("http://tsn.baidu.com/text2audio", data.encode('utf-8'))
    # 5、發送post請求
    f = request.urlopen(req)
    result_str = f.read()
    # 6、將返回的header信息取出並生成一個字典
    headers = dict((name.lower(), value) for name, value in f.headers.items())
    # 7、如果返回的header裏有”Content-Type: audio/wav“信息,則合成成功
    if "audio/wav" in headers['content-type'] :
        print("tts success")
        # 合成成功即將數據存入文件
        with open("wav/result.wav", 'wb') as of:
            of.write(result_str)


def get_weather():
    KEY = "SqEQJuFtxQBkZNNGC"            # 官網獲取的私鑰
    params = {  'key': KEY,
                'location': "贛州",
                'language': "zh-Hans",
                'unit': "c"
             }
    # 將參數編碼
    params_query = parse.urlencode(params)
    # 拼接成一個我們需要的完整的完整的url
    Url = "https://api.seniverse.com/v3/weather/now.json" + "?" + params_query
    # 發送請求
    res_f = request.urlopen(Url)
    # 對獲取的數據進行處理
    result = json.loads(res_f.read().decode('utf-8'))
    # 打印獲得的結果
    print(result["results"][0])
    # 天氣
    return result["results"][0]["now"]

3、snowboydecoder.py修改後的代碼

#!/usr/bin/env python

import collections
import pyaudio
import snowboydetect
import time
import wave
import os
import logging
from ctypes import *
from contextlib import contextmanager

logging.basicConfig()
logger = logging.getLogger("snowboy")
logger.setLevel(logging.INFO)
TOP_DIR = os.path.dirname(os.path.abspath(__file__))

RESOURCE_FILE = os.path.join(TOP_DIR, "resources/common.res")
DETECT_DING = os.path.join(TOP_DIR, "resources/ding.wav")
DETECT_DONG = os.path.join(TOP_DIR, "resources/dong.wav")

def py_error_handler(filename, line, function, err, fmt):
    pass

ERROR_HANDLER_FUNC = CFUNCTYPE(None, c_char_p, c_int, c_char_p, c_int, c_char_p)

c_error_handler = ERROR_HANDLER_FUNC(py_error_handler)

@contextmanager
def no_alsa_error():
    try:
        asound = cdll.LoadLibrary('libasound.so')
        asound.snd_lib_error_set_handler(c_error_handler)
        yield
        asound.snd_lib_error_set_handler(None)
    except:
        yield
        pass

class RingBuffer(object):
    """Ring buffer to hold audio from PortAudio"""

    def __init__(self, size=4096):
        self._buf = collections.deque(maxlen=size)

    def extend(self, data):
        """Adds data to the end of buffer"""
        self._buf.extend(data)

    def get(self):
        """Retrieves data from the beginning of buffer and clears it"""
        tmp = bytes(bytearray(self._buf))
        self._buf.clear()
        return tmp


def play_audio_file(fname=DETECT_DONG):
    """Simple callback function to play a wave file. By default it plays
    a Ding sound.

    :param str fname: wave file name
    :return: None
    """
    ding_wav = wave.open(fname, 'rb')
    ding_data = ding_wav.readframes(ding_wav.getnframes())
    with no_alsa_error():
        audio = pyaudio.PyAudio()
    stream_out = audio.open(
        format=audio.get_format_from_width(ding_wav.getsampwidth()),
        channels=ding_wav.getnchannels(),
        rate=ding_wav.getframerate(), input=False, output=True)
    stream_out.start_stream()
    stream_out.write(ding_data)
    time.sleep(0.2)
    stream_out.stop_stream()
    stream_out.close()
    audio.terminate()


class HotwordDetector(object):
    """
    Snowboy decoder to detect whether a keyword specified by `decoder_model`
    exists in a microphone input stream.

    :param decoder_model: decoder model file path, a string or a list of strings
    :param resource: resource file path.
    :param sensitivity: decoder sensitivity, a float of a list of floats.
                              The bigger the value, the more senstive the
                              decoder. If an empty list is provided, then the
                              default sensitivity in the model will be used.
    :param audio_gain: multiply input volume by this factor.
    :param apply_frontend: applies the frontend processing algorithm if True.
    """

    def __init__(self, decoder_model,
                 resource=RESOURCE_FILE,
                 sensitivity=[],
                 audio_gain=1,
                 apply_frontend=False):

        tm = type(decoder_model)
        ts = type(sensitivity)
        if tm is not list:
            decoder_model = [decoder_model]
        if ts is not list:
            sensitivity = [sensitivity]
        model_str = ",".join(decoder_model)

        self.detector = snowboydetect.SnowboyDetect(
            resource_filename=resource.encode(), model_str=model_str.encode())
        self.detector.SetAudioGain(audio_gain)
        self.detector.ApplyFrontend(apply_frontend)
        self.num_hotwords = self.detector.NumHotwords()

        if len(decoder_model) > 1 and len(sensitivity) == 1:
            sensitivity = sensitivity * self.num_hotwords
        if len(sensitivity) != 0:
            assert self.num_hotwords == len(sensitivity), \
                "number of hotwords in decoder_model (%d) and sensitivity " \
                "(%d) does not match" % (self.num_hotwords, len(sensitivity))
        sensitivity_str = ",".join([str(t) for t in sensitivity])
        if len(sensitivity) != 0:
            self.detector.SetSensitivity(sensitivity_str.encode())

        self.ring_buffer = RingBuffer(
            self.detector.NumChannels() * self.detector.SampleRate() * 5)
        # 1、新建一個PyAudio對象   
        with no_alsa_error():
            self.audio = pyaudio.PyAudio()

    def start(self, detected_callback=play_audio_file,
              interrupt_check=lambda: False,
              sleep_time=0.03,
              audio_recorder_callback=None,
              silent_count_threshold=15,
              recording_timeout=100):
        """
        Start the voice detector. For every `sleep_time` second it checks the
        audio buffer for triggering keywords. If detected, then call
        corresponding function in `detected_callback`, which can be a single
        function (single model) or a list of callback functions (multiple
        models). Every loop it also calls `interrupt_check` -- if it returns
        True, then breaks from the loop and return.

        :param detected_callback: a function or list of functions. The number of
                                  items must match the number of models in
                                  `decoder_model`.
        :param interrupt_check: a function that returns True if the main loop
                                needs to stop.
        :param float sleep_time: how much time in second every loop waits.
        :param audio_recorder_callback: if specified, this will be called after
                                        a keyword has been spoken and after the
                                        phrase immediately after the keyword has
                                        been recorded. The function will be
                                        passed the name of the file where the
                                        phrase was recorded.
        :param silent_count_threshold: indicates how long silence must be heard
                                       to mark the end of a phrase that is
                                       being recorded.
        :param recording_timeout: limits the maximum length of a recording.
        :return: None
        """
        self._running = True

        def audio_callback(in_data, frame_count, time_info, status):
            self.ring_buffer.extend(in_data)
            play_data = chr(0) * len(in_data)
            return play_data, pyaudio.paContinue

        # 2、打開聲卡
        def open_stream():
            self.stream_in = self.audio.open(
                input=True, output=True,
                format=self.audio.get_format_from_width(
                    self.detector.BitsPerSample() / 8),
                channels=self.detector.NumChannels(),
                rate=self.detector.SampleRate(),
                frames_per_buffer=2048,
                stream_callback=audio_callback)
                # 3、打開聲卡後,打印一行提示信息
            print("I am Listening......")

        open_stream()
        
        # print("**************channels:",self.detector.NumChannels())
        # print("**************format:",self.audio.get_format_from_width(self.detector.BitsPerSample() / 8))
        # print("**************rate:",self.detector.SampleRate())
        if interrupt_check():
            logger.debug("detect voice return")
            return

        tc = type(detected_callback)
        if tc is not list:
            detected_callback = [detected_callback]
        if len(detected_callback) == 1 and self.num_hotwords > 1:
            detected_callback *= self.num_hotwords

        assert self.num_hotwords == len(detected_callback), \
            "Error: hotwords in your models (%d) do not match the number of " \
            "callbacks (%d)" % (self.num_hotwords, len(detected_callback))

        logger.debug("detecting...")

        state = "PASSIVE"
        while self._running is True:
            if interrupt_check():
                logger.debug("detect voice break")
                break
            data = self.ring_buffer.get()
            if len(data) == 0:
                time.sleep(sleep_time)
                continue

            status = self.detector.RunDetection(data)
            if status == -1:
                logger.warning("Error initializing streams or reading audio data")

            #small state machine to handle recording of phrase after keyword
            if state == "PASSIVE":
                if status > 0: #key word found
                    self.recordedData = []
                    self.recordedData.append(data)
                    silentCount = 0
                    recordingCount = 0
                    message = "Keyword " + str(status) + " detected at time: "
                    message += time.strftime("%Y-%m-%d %H:%M:%S",
                                         time.localtime(time.time()))
                    logger.info(message)
                    callback = detected_callback[status-1]
                    if callback is not None:
                        callback()
                        self.stream_in.close() 
                        open_stream()
                    
                    if audio_recorder_callback is not None:
                        state = "ACTIVE"
                    continue

            elif state == "ACTIVE":
                stopRecording = False
                if recordingCount > recording_timeout:
                    stopRecording = True
                elif status == -2: #silence found
                    if silentCount > silent_count_threshold:
                        stopRecording = True
                    else:
                        silentCount = silentCount + 1
                elif status == 0: #voice found
                    silentCount = 0

                if stopRecording == True:
                    fname = self.saveMessage()
                    audio_recorder_callback(fname)
                    state = "PASSIVE"
                    continue

                recordingCount = recordingCount + 1
                self.recordedData.append(data)

        logger.debug("finished.")

    def saveMessage(self):
        """
        Save the message stored in self.recordedData to a timestamped file.
        """
        filename = 'output' + str(int(time.time())) + '.wav'
        data = b''.join(self.recordedData)

        #use wave to save data
        wf = wave.open(filename, 'wb')
        wf.setnchannels(1)
        wf.setsampwidth(self.audio.get_sample_size(
            self.audio.get_format_from_width(
                self.detector.BitsPerSample() / 8)))
        wf.setframerate(self.detector.SampleRate())
        wf.writeframes(data)
        wf.close()
        logger.debug("finished saving: " + filename)
        return filename

    def terminate(self):
        """
        Terminate audio stream. Users can call start() again to detect.
        :return: None
        """
        self.stream_in.stop_stream()
        self.stream_in.close()
        # self.audio.terminate()
        # self._running = False

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章