百度語音識別PC端接入unity

(此處爲http上傳音頻識別模式)

一,找到百度AI平臺,管理臺點擊SDK下載     選擇c#sdk

 

下載完以後會有對應不同.net版本的dll包,找到自己unity對應的。net版本,將文件夾拖入

然後找到你的應用管理,點擊管理,在這裏要用到一個apikey 一個secretKey,將這兩個值複製,識別會用到,至於id展示不需要使用

二,實現錄音-上傳識別

  1,錄音,可以百度到很多教程,基於Microphone類實現。不作闡釋。該類可以設置一直錄製和指定時間錄製,這裏採取第二種,指定錄製時間,結束後停止,上傳識別,如果需要一直識別,可以在拿到結果後或者上傳後再次開始錄製,根據需求這裏是拿到結果後纔開始新的錄製。另外一種原理類似,只是節點不同。

2,拿到音頻數據後,即錄音完成後,要使用UnityWebRequest來進行上傳。在此之前,要先訪問百度平臺拿取一次token,做識別認證(官方有說明)。重點:http的token獲取地址在這裏,官網教程找到REST API文檔:要使用的是這裏的token地址不然很容易出現認證失敗。

在該文檔中找到上傳地址,在token地址下方,翻找,如下:

注意:一定要確保你的sdk,apikey,secretkey是同一應用賬號下的,就是說你要使用自己的,相對應的,否則是無法上傳識別成功的。

三,上代碼:

這裏是我實現了一個識別類,進行了簡單封裝,相關點都有註釋,可做參考:

using Baidu.Aip.Speech;
using LitJson;
using Newtonsoft.Json.Linq;
using System;
using System.Collections;
using System.Collections.Generic;
using System.Net.Security;
using System.Security.Cryptography.X509Certificates;
using UnityEngine;
using UnityEngine.Networking;

namespace BaiDuVoice
{
    public class AccessToken
    {
        public string access_token;

        public int expires_in;

        public string session_key;

        public string scope;

        public string refresh_token;

        public string session_secret;
    }
    //識別結果,用於json序列化,請勿改動
    public class Recognizeresult
    {
        public string corpus_no;

        public string err_msg;

        public int err_no;
        /// <summary>
        /// 語音識別到的結果
        /// </summary>
        public List<string> result;

        public string sn;
    }
    public enum Language
    {
        None=1535,
        Chinese=1536,//無標點,可識別簡單英文
        StandardChinese =1537,//純中文,有標點
        English=1737,//純英文,無標點
    }
    /// <summary>
    /// 設置賬戶等信息
    /// </summary>
    public class SpeechSetting
    {
        public string AppID;
        public string ApiKey;
        public string SecretKey;
        /// <summary>
        /// 時間間隔
        /// </summary>
        public int  SpaceTime;
    }

    public delegate void OnRecongnition( Recognizeresult msg);//識別結果的回調
    /// <summary>
    /// 識別
    /// </summary>
    public class BaiDuSpeech : MonoBehaviour
    {
        public static bool Available;//是否可用
        private SpeechSetting setting;//配置信息
        private string GetTokenUrl = "https://openapi.baidu.com/oauth/2.0/token";//獲取token的地址
        private string token = "";//拿到的token
        private string CongnitionAddress = "http://vop.baidu.com/server_api";//識別地址
        private string recordStr = "";//識別出來的字符串

        //錄製
        private int rate = 16000;//採樣率
        private string deviceName;//設備名稱
        private AudioClip clip;//
        private int recordTime =3;//錄製時長  s
        //private AudioSource sour;
        /// <summary>
        /// 識別結果回調
        /// </summary>
        public OnRecongnition OnRecongnition;

        private Recognizeresult Reg_result;//識別結果
        private bool isRecongnting = false;//是否正在識別
        private Coroutine cor;
        private WaitForSeconds waittime;
        public void  Init(SpeechSetting setting)
        {
            this.setting = setting;
            recordTime = setting.SpaceTime;
            waittime = new WaitForSeconds(recordTime);

            if (Microphone.devices.Length > 0)
            {
                deviceName = Microphone.devices[0];
                //sour = gameObject.AddComponent<AudioSource>();
                Available = true;
                Debug.Log("獲取token");
                GetToken();
            }
            else
            {
                Available = false;
                Debug.LogError("未發現可用mic設備");
            }
        }

        private void GetToken()
        {
            WWWForm from = new WWWForm();
            from.AddField("grant_type", "client_credentials");
            from.AddField("client_id", setting .ApiKey);
            from.AddField("client_secret", setting .SecretKey );
            StartCoroutine(HttpPostRequest(GetTokenUrl , from));
        }
        IEnumerator HttpPostRequest(string urls, WWWForm form)
        {
            string url = string.Format("{0}?grant_type={1}&client_id={2}&client_secret={3}", GetTokenUrl  , "client_credentials", setting .ApiKey  , setting .SecretKey );
            Debug.Log(url);
            UnityWebRequest Request = UnityWebRequest.Post(url, form);
            yield return Request.SendWebRequest();
            if (Request.isNetworkError)
            {
                Debug .Log ("授權失敗" + Request.error);
                Reg_result.err_msg = "授權失敗" + Request.error;
                SendMsg();
            }
            else
            {
                if (Request.responseCode == 200)
                {
                    string result = Request.downloadHandler.text;
                    Debug .Log ("成功獲取數據:" + result);
                    OnGetToken(result);
                }
                else
                {
                    Debug .Log ("狀態碼錯誤:" + Request.responseCode+"\n 授權數據:"+ Request.downloadHandler.text);
                    Reg_result.err_msg = "狀態碼錯誤:" + Request.responseCode;
                    SendMsg();
                }
            }

        }
        /// <summary>
        /// 當成功獲取到服務器返回的json數據,進行解析
        /// </summary>
        private void OnGetToken(string res)
        {
            AccessToken accessToken = JsonMapper.ToObject<AccessToken>(res);
            token  = accessToken.access_token;
            Debug.Log("token :" + token);
        }

        /// <summary>
        /// 開始識別 
        /// </summary>
        /// <param name="rate"></param>
        /// <param name="loop">是否實時</param>
        public void StartRecongnition()
        {
            if (isRecongnting) return;//
            isRecongnting = true;
            if (Reg_result ==null)
            {
                Reg_result = new Recognizeresult();
            }
            if (!Available)
            {
                Reg_result.err_msg = "mic不可用,請檢查mic";
                SendMsg();
            }
           else
            {
                if (cor != null)
                {
                    StopCoroutine(cor);
                }
                cor = StartCoroutine(Recongnition());
            }
        }
        /// <summary>
        /// 結束識別
        /// </summary>
        public void StopRecongnition()
        {
            isRecongnting = false;
            if (cor != null)
            {
                StopCoroutine(cor);
                cor = null;
            }
            clip = null;
            Reg_result = null;
        }
        //通知
        private void SendMsg()
        {
            if (Reg_result == null) return;
            if (OnRecongnition != null)
            {
                OnRecongnition(Reg_result);
            }
            //清除信息
            Reg_result.err_msg ="";
        }
        IEnumerator Recongnition()
        {
            Debug.Log("開始識別");
            //開始識別
            clip = Microphone.Start(deviceName, false ,recordTime, rate);
            yield return waittime;
            //結束當前錄製
            Microphone.End(deviceName);
            //sour.PlayOneShot(clip);
            //將Audioclip填充到數組中
            float[] samples = new float[rate * recordTime * clip.channels];
            clip.GetData(samples, 0);
            short[] sampleshort = new short[samples.Length];
            for (int i = 0; i < samples.Length; i++)
            {
                sampleshort[i] = (short)(samples[i] * short.MaxValue);
            }
            byte[] data = new byte[samples.Length * 2];
            Buffer.BlockCopy(sampleshort, 0, data, 0, data.Length);
            //發送數據,識別
            WWWForm form = new WWWForm();
            string url = string.Format("{0}?cuid={1}&token={2}&dev_pid={3}", CongnitionAddress, SystemInfo.deviceUniqueIdentifier, token, "1536");//上傳信息到百度
            form.AddBinaryData("audio", data);
            UnityWebRequest request = UnityWebRequest.Post(url, form);
            request.SetRequestHeader("Content-Type", "audio/pcm;rate=" + rate);
            yield return request.SendWebRequest();
            if (request.isNetworkError)
            {
                print("Net error:" + request.error);
            }
            else
            {
                if (request.responseCode == 200)
                {
                    string result = request.downloadHandler.text;
                    if (result.Contains("error"))
                    {
                        Debug.Log("識別失敗:" + result);
                        Reg_result.err_msg = "識別失敗: " + result;
                    }
                    else
                    {
                        Debug.Log("成功獲取數據:" + result);
                        Reg_result = JsonMapper.ToObject<Recognizeresult>(result);
                    }
                }
                else
                {
                    Debug.Log("狀態碼錯誤:" + request.responseCode);
                    Reg_result.err_msg = "狀態碼錯誤:" + request.responseCode;
                }
            }
            SendMsg();
            isRecongnting = false ;
        }
    }
}


調用測試:

  private BaiDuSpeech baidu;//百度識別
  private void Start()
       {
             baidu = gameObject.AddComponent<BaiDuSpeech>();
                baidu.OnRecongnition += OnGetResult;
                //要在初始化之前將事件註冊上
                SpeechSetting setting = new SpeechSetting()
                {
                    AppID = "**********",//換成自己的就可以
                    ApiKey = "*******",//
                    SecretKey = "******",//
                    SpaceTime = 3,
                };
                baidu.Init(setting);
       }
 private void OnGetResult(Recognizeresult msg)
        {
            Debug.Log("msg.corpus_no " + msg.corpus_no + "\nmsg.err_msg" + msg.err_msg + " \nmsg.sn " + msg.sn);

            if (msg.err_msg.Contains("success"))
            {
                Debug.Log("識別成功"+msg.result[0]);//result裏邊包含有可能的識別結果,一般取第一個
            }
            else
            {
                Debug.Log("識別失敗");
            }
        }

 親測可用,項目使用中,如有疑問歡迎提出!

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章