百度语音识别PC端接入unity

(此处为http上传音频识别模式)

一,找到百度AI平台,管理台点击SDK下载     选择c#sdk

 

下载完以后会有对应不同.net版本的dll包,找到自己unity对应的。net版本,将文件夹拖入

然后找到你的应用管理,点击管理,在这里要用到一个apikey 一个secretKey,将这两个值复制,识别会用到,至于id展示不需要使用

二,实现录音-上传识别

  1,录音,可以百度到很多教程,基于Microphone类实现。不作阐释。该类可以设置一直录制和指定时间录制,这里采取第二种,指定录制时间,结束后停止,上传识别,如果需要一直识别,可以在拿到结果后或者上传后再次开始录制,根据需求这里是拿到结果后才开始新的录制。另外一种原理类似,只是节点不同。

2,拿到音频数据后,即录音完成后,要使用UnityWebRequest来进行上传。在此之前,要先访问百度平台拿取一次token,做识别认证(官方有说明)。重点:http的token获取地址在这里,官网教程找到REST API文档:要使用的是这里的token地址不然很容易出现认证失败。

在该文档中找到上传地址,在token地址下方,翻找,如下:

注意:一定要确保你的sdk,apikey,secretkey是同一应用账号下的,就是说你要使用自己的,相对应的,否则是无法上传识别成功的。

三,上代码:

这里是我实现了一个识别类,进行了简单封装,相关点都有注释,可做参考:

using Baidu.Aip.Speech;
using LitJson;
using Newtonsoft.Json.Linq;
using System;
using System.Collections;
using System.Collections.Generic;
using System.Net.Security;
using System.Security.Cryptography.X509Certificates;
using UnityEngine;
using UnityEngine.Networking;

namespace BaiDuVoice
{
    public class AccessToken
    {
        public string access_token;

        public int expires_in;

        public string session_key;

        public string scope;

        public string refresh_token;

        public string session_secret;
    }
    //识别结果,用于json序列化,请勿改动
    public class Recognizeresult
    {
        public string corpus_no;

        public string err_msg;

        public int err_no;
        /// <summary>
        /// 语音识别到的结果
        /// </summary>
        public List<string> result;

        public string sn;
    }
    public enum Language
    {
        None=1535,
        Chinese=1536,//无标点,可识别简单英文
        StandardChinese =1537,//纯中文,有标点
        English=1737,//纯英文,无标点
    }
    /// <summary>
    /// 设置账户等信息
    /// </summary>
    public class SpeechSetting
    {
        public string AppID;
        public string ApiKey;
        public string SecretKey;
        /// <summary>
        /// 时间间隔
        /// </summary>
        public int  SpaceTime;
    }

    public delegate void OnRecongnition( Recognizeresult msg);//识别结果的回调
    /// <summary>
    /// 识别
    /// </summary>
    public class BaiDuSpeech : MonoBehaviour
    {
        public static bool Available;//是否可用
        private SpeechSetting setting;//配置信息
        private string GetTokenUrl = "https://openapi.baidu.com/oauth/2.0/token";//获取token的地址
        private string token = "";//拿到的token
        private string CongnitionAddress = "http://vop.baidu.com/server_api";//识别地址
        private string recordStr = "";//识别出来的字符串

        //录制
        private int rate = 16000;//采样率
        private string deviceName;//设备名称
        private AudioClip clip;//
        private int recordTime =3;//录制时长  s
        //private AudioSource sour;
        /// <summary>
        /// 识别结果回调
        /// </summary>
        public OnRecongnition OnRecongnition;

        private Recognizeresult Reg_result;//识别结果
        private bool isRecongnting = false;//是否正在识别
        private Coroutine cor;
        private WaitForSeconds waittime;
        public void  Init(SpeechSetting setting)
        {
            this.setting = setting;
            recordTime = setting.SpaceTime;
            waittime = new WaitForSeconds(recordTime);

            if (Microphone.devices.Length > 0)
            {
                deviceName = Microphone.devices[0];
                //sour = gameObject.AddComponent<AudioSource>();
                Available = true;
                Debug.Log("获取token");
                GetToken();
            }
            else
            {
                Available = false;
                Debug.LogError("未发现可用mic设备");
            }
        }

        private void GetToken()
        {
            WWWForm from = new WWWForm();
            from.AddField("grant_type", "client_credentials");
            from.AddField("client_id", setting .ApiKey);
            from.AddField("client_secret", setting .SecretKey );
            StartCoroutine(HttpPostRequest(GetTokenUrl , from));
        }
        IEnumerator HttpPostRequest(string urls, WWWForm form)
        {
            string url = string.Format("{0}?grant_type={1}&client_id={2}&client_secret={3}", GetTokenUrl  , "client_credentials", setting .ApiKey  , setting .SecretKey );
            Debug.Log(url);
            UnityWebRequest Request = UnityWebRequest.Post(url, form);
            yield return Request.SendWebRequest();
            if (Request.isNetworkError)
            {
                Debug .Log ("授权失败" + Request.error);
                Reg_result.err_msg = "授权失败" + Request.error;
                SendMsg();
            }
            else
            {
                if (Request.responseCode == 200)
                {
                    string result = Request.downloadHandler.text;
                    Debug .Log ("成功获取数据:" + result);
                    OnGetToken(result);
                }
                else
                {
                    Debug .Log ("状态码错误:" + Request.responseCode+"\n 授权数据:"+ Request.downloadHandler.text);
                    Reg_result.err_msg = "状态码错误:" + Request.responseCode;
                    SendMsg();
                }
            }

        }
        /// <summary>
        /// 当成功获取到服务器返回的json数据,进行解析
        /// </summary>
        private void OnGetToken(string res)
        {
            AccessToken accessToken = JsonMapper.ToObject<AccessToken>(res);
            token  = accessToken.access_token;
            Debug.Log("token :" + token);
        }

        /// <summary>
        /// 开始识别 
        /// </summary>
        /// <param name="rate"></param>
        /// <param name="loop">是否实时</param>
        public void StartRecongnition()
        {
            if (isRecongnting) return;//
            isRecongnting = true;
            if (Reg_result ==null)
            {
                Reg_result = new Recognizeresult();
            }
            if (!Available)
            {
                Reg_result.err_msg = "mic不可用,请检查mic";
                SendMsg();
            }
           else
            {
                if (cor != null)
                {
                    StopCoroutine(cor);
                }
                cor = StartCoroutine(Recongnition());
            }
        }
        /// <summary>
        /// 结束识别
        /// </summary>
        public void StopRecongnition()
        {
            isRecongnting = false;
            if (cor != null)
            {
                StopCoroutine(cor);
                cor = null;
            }
            clip = null;
            Reg_result = null;
        }
        //通知
        private void SendMsg()
        {
            if (Reg_result == null) return;
            if (OnRecongnition != null)
            {
                OnRecongnition(Reg_result);
            }
            //清除信息
            Reg_result.err_msg ="";
        }
        IEnumerator Recongnition()
        {
            Debug.Log("开始识别");
            //开始识别
            clip = Microphone.Start(deviceName, false ,recordTime, rate);
            yield return waittime;
            //结束当前录制
            Microphone.End(deviceName);
            //sour.PlayOneShot(clip);
            //将Audioclip填充到数组中
            float[] samples = new float[rate * recordTime * clip.channels];
            clip.GetData(samples, 0);
            short[] sampleshort = new short[samples.Length];
            for (int i = 0; i < samples.Length; i++)
            {
                sampleshort[i] = (short)(samples[i] * short.MaxValue);
            }
            byte[] data = new byte[samples.Length * 2];
            Buffer.BlockCopy(sampleshort, 0, data, 0, data.Length);
            //发送数据,识别
            WWWForm form = new WWWForm();
            string url = string.Format("{0}?cuid={1}&token={2}&dev_pid={3}", CongnitionAddress, SystemInfo.deviceUniqueIdentifier, token, "1536");//上传信息到百度
            form.AddBinaryData("audio", data);
            UnityWebRequest request = UnityWebRequest.Post(url, form);
            request.SetRequestHeader("Content-Type", "audio/pcm;rate=" + rate);
            yield return request.SendWebRequest();
            if (request.isNetworkError)
            {
                print("Net error:" + request.error);
            }
            else
            {
                if (request.responseCode == 200)
                {
                    string result = request.downloadHandler.text;
                    if (result.Contains("error"))
                    {
                        Debug.Log("识别失败:" + result);
                        Reg_result.err_msg = "识别失败: " + result;
                    }
                    else
                    {
                        Debug.Log("成功获取数据:" + result);
                        Reg_result = JsonMapper.ToObject<Recognizeresult>(result);
                    }
                }
                else
                {
                    Debug.Log("状态码错误:" + request.responseCode);
                    Reg_result.err_msg = "状态码错误:" + request.responseCode;
                }
            }
            SendMsg();
            isRecongnting = false ;
        }
    }
}


调用测试:

  private BaiDuSpeech baidu;//百度识别
  private void Start()
       {
             baidu = gameObject.AddComponent<BaiDuSpeech>();
                baidu.OnRecongnition += OnGetResult;
                //要在初始化之前将事件注册上
                SpeechSetting setting = new SpeechSetting()
                {
                    AppID = "**********",//换成自己的就可以
                    ApiKey = "*******",//
                    SecretKey = "******",//
                    SpaceTime = 3,
                };
                baidu.Init(setting);
       }
 private void OnGetResult(Recognizeresult msg)
        {
            Debug.Log("msg.corpus_no " + msg.corpus_no + "\nmsg.err_msg" + msg.err_msg + " \nmsg.sn " + msg.sn);

            if (msg.err_msg.Contains("success"))
            {
                Debug.Log("识别成功"+msg.result[0]);//result里边包含有可能的识别结果,一般取第一个
            }
            else
            {
                Debug.Log("识别失败");
            }
        }

 亲测可用,项目使用中,如有疑问欢迎提出!

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章