html5語音識別

想要使用html5網頁實現語音識別，大概流程：用戶點擊錄音按鈕，進行語音錄入，錄入後上傳語音，後臺接收到請求後，會調用語音識別的方法，最後返回識別結果，前端接收到結果後再進行後續處理。

參考：https://blog.csdn.net/qq_33609401/article/details/78172080，這裏面寫得很詳細，HZRecorder.js可以去這裏複製。

所以技術上主要分爲兩個部分：語音獲取、語音識別；

一、語音獲取

在html上增加錄音按鈕，使用 HZRecorder.js，調用設備的錄音功能，（注意：手機端瀏覽器需要有ssl證書（即網址協議爲https）才能調用錄音，有人說打包了就可以，沒有ssl證書，打包了也會調用失敗）

前端：

<!DOCTYPE html>
<html>
<head>
    <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
	<meta name="viewport" content="width=device-width,height=device-height, initial-scale=1.0, maximum-scale=1.0, user-scalable=0" /> 
    <title></title>
</head>
<body>
    <div>
        <audio controls autoplay></audio>
        <input onclick="startRecording()" type="button" value="錄音" />
        <input onclick="stopRecording()" type="button" value="停止" />
        <input onclick="playRecording()" type="button" value="播放" />
        <input onclick="uploadAudio()" type="button" value="提交" />
    </div>

    <script type="text/javascript" src="HZRecorder.js"></script>

    <script>
        var recorder;
        var audio = document.querySelector('audio');
        function startRecording() {
            HZRecorder.get(function (rec) {
                recorder = rec;
                recorder.start();
            });
        }
        function stopRecording() {
            recorder.stop();
        }
        function playRecording() {
            recorder.play(audio);
        }
        function uploadAudio() {
            recorder.upload("http://localhost:8080/****/UploadVideoServlet", function (state, e) {
                switch (state) {
                    case 'uploading': 
                        break;
                    case 'ok': 
                        alert("上傳成功"); 
                        break;
                    case 'error':
                        alert("上傳失敗");
                        break;
                    case 'cancel':
                        alert("上傳被取消");
                        break;
                }
            });
        }

    </script>
</body>
</html>

後臺：在UploadVideoServlet中處理上傳和調用語音識別代碼。

@WebServlet("/UploadVideoServlet")
public class UploadVideoServlet extends HttpServlet {
	private static final long serialVersionUID = 1L;
       
    public UploadVideoServlet() {
        super(); 
    }

	protected void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
		response.getWriter().append("Served at: ").append(request.getContextPath());
		this.doPost(request, response);
	}

	protected void doPost(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
			// 消息提示
			String message = "";
			try { 
				DiskFileItemFactory factory = new DiskFileItemFactory();
				ServletFileUpload upload = new ServletFileUpload(factory);
				List<FileItem> list = upload.parseRequest(request);
				for (FileItem item : list) {
                    // 調用語音識別方法
//					Sample.main(item.get()); //百度
					WebIAT.main(item.get()); //科大訊飛
					message = "文件上傳成功！"; 
				}
				
			} catch (Exception e) {
				message = "文件上傳失敗！";
				e.printStackTrace();
			}
 
	}

}

二、語音識別：使用科大訊飛或者百度提供的api實現。

（不論科大訊飛或者百度：(1) 都需要先去申請 APPID 、API_KEY，具體步驟去官網看看就知道了，（2）都提供了幾種方式，我沒有依次嘗試）

1、科大訊飛示例：

參考 https://doc.xfyun.cn/rest_api/%E8%AF%AD%E9%9F%B3%E5%90%AC%E5%86%99.html

public class WebIAT {
	// 聽寫webapi接口地址
	private static final String WEBIAT_URL = "http://api.xfyun.cn/v1/service/v1/iat";
	// 應用APPID（必須爲webapi類型應用，並開通語音聽寫服務，參考帖子如何創建一個webapi應用：http://bbs.xfyun.cn/forum.php?mod=viewthread&tid=36481）
	private static final String APPID = "*******";
	// 接口密鑰（webapi類型應用開通聽寫服務後，控制檯--我的應用---語音聽寫---相應服務的apikey）
	private static final String API_KEY = "*************************";
	// 音頻編碼
	private static final String AUE = "raw";
	// 引擎類型 
	private static final String ENGINE_TYPE = "sms16k";
	// 後端點（取值範圍0-10000ms）
	private static final String VAD_EOS = "10000"; 
	
	
	/**
	 * 聽寫 WebAPI 調用示例程序
	 * @param args
	 * @throws IOException
	 */
	public static void main(byte[] bytes) throws IOException {
		Map<String, String> header = buildHttpHeader();
		//byte[] audioByteArray = FileUtil.read(AUDIO_PATH);
		byte[] audioByteArray = bytes;
		String audioBase64 = new String(Base64.encodeBase64(audioByteArray), "UTF-8");
		String result = HttpUtil.doPost1(WEBIAT_URL, header, "audio=" + URLEncoder.encode(audioBase64, "UTF-8"));
		System.out.println("聽寫 WebAPI 接口調用結果：" + result);
	}
	// 返回code爲錯誤碼時，請查詢https://www.xfyun.cn/document/error-code解決方案

	/**
	 * 組裝http請求頭
	 */
	private static Map<String, String> buildHttpHeader() throws UnsupportedEncodingException {
		String curTime = System.currentTimeMillis() / 1000L + "";
		String param = "{\"aue\":\""+AUE+"\""+",\"engine_type\":\"" + ENGINE_TYPE + "\""+",\"vad_eos\":\"" + VAD_EOS + "\"}";
		String paramBase64 = new String(Base64.encodeBase64(param.getBytes("UTF-8")));
		String checkSum = DigestUtils.md5Hex(API_KEY + curTime + paramBase64);
		Map<String, String> header = new HashMap<String, String>();
		header.put("Content-Type", "application/x-www-form-urlencoded; charset=utf-8");
		header.put("X-Param", paramBase64);
		header.put("X-CurTime", curTime);
		header.put("X-CheckSum", checkSum);
		header.put("X-Appid", APPID);
		return header;
	}
}

2、百度示例：

參考 https://ai.baidu.com/docs#/ASR-Online-Java-SDK/top

public class Sample {


 private static final String serverURL = "http://vop.baidu.com/server_api";
 private static String token = "";
 
 //put your own params here
 private static final String apiKey = "****************";  //改爲自己的apiKey
 private static final String secretKey = "*******************"; //改爲自己的secretKey 
 private static final String cuid = "*******";  //唯一標識，可以寫機器碼


 public static void main(byte[] bytes) throws Exception {
	  getToken();
	  method2(bytes);
 }
 private static void getToken() throws Exception {
	  String getTokenURL = "https://openapi.baidu.com/oauth/2.0/token?grant_type=client_credentials" +"&client_id=" + apiKey + "&client_secret=" + secretKey;
	  HttpURLConnection conn = (HttpURLConnection) new URL(getTokenURL).openConnection();
	  token = new JSONObject(printResponse(conn)).getString("access_token");
 }

 private static void method2(byte[] bytes) throws Exception {
	  HttpURLConnection conn = (HttpURLConnection) new URL(serverURL+ "?cuid=" + cuid + "&token=" + token).openConnection();
	
	  // add request header
	  conn.setRequestMethod("POST");
	  conn.setRequestProperty("Content-Type", "audio/wav; rate=16000");
	
	  conn.setDoInput(true);
	  conn.setDoOutput(true);
	
	  // send request
	  DataOutputStream wr = new DataOutputStream(conn.getOutputStream());
	  wr.write(bytes);
	  wr.flush();
	  wr.close();
	
	
	  printResponse(conn);
 }


 private static String printResponse(HttpURLConnection conn) throws Exception {
	  if (conn.getResponseCode() != 200) {
	 // request error
	 return "";
	  }
	  InputStream is = conn.getInputStream();
	  BufferedReader rd = new BufferedReader(new InputStreamReader(is));
	  String line;
	  StringBuffer response = new StringBuffer();
	  while ((line = rd.readLine()) != null) {
	 response.append(line);
	 response.append('\r');
	  }
	  rd.close();
	  System.out.println(new JSONObject(response.toString()).toString(4)); 
	  return response.toString();
 }

轉換結果示例：

拿到結果後，前端就可以進行後續處理了。

附： HZRecorder.js

(function (window) {
    //兼容
    window.URL = window.URL || window.webkitURL;
    navigator.getUserMedia = navigator.getUserMedia || navigator.webkitGetUserMedia || navigator.mozGetUserMedia || navigator.msGetUserMedia;


    var HZRecorder = function (stream, config) {
        config = config || {};
        config.sampleBits = config.sampleBits || 16;      //採樣數位 8, 16
        config.sampleRate = config.sampleRate || (16000);   //採樣率(1/6 44100)


        var context = new AudioContext();
        var audioInput = context.createMediaStreamSource(stream);
        var recorder = context.createScriptProcessor(4096, 1, 1);


        var audioData = {
            size: 0          //錄音文件長度
            , buffer: []     //錄音緩存
            , inputSampleRate: context.sampleRate    //輸入採樣率
            , inputSampleBits: 16       //輸入採樣數位 8, 16
            , outputSampleRate: config.sampleRate    //輸出採樣率
            , oututSampleBits: config.sampleBits       //輸出採樣數位 8, 16
            , input: function (data) {
                this.buffer.push(new Float32Array(data));
                this.size += data.length;
            }
            , compress: function () { //合併壓縮
                //合併
                var data = new Float32Array(this.size);
                var offset = 0;
                for (var i = 0; i < this.buffer.length; i++) {
                    data.set(this.buffer[i], offset);
                    offset += this.buffer[i].length;
                }
                //壓縮
                var compression = parseInt(this.inputSampleRate / this.outputSampleRate);
                var length = data.length / compression;
                var result = new Float32Array(length);
                var index = 0, j = 0;
                while (index < length) {
                    result[index] = data[j];
                    j += compression;
                    index++;
                }
                return result;
            }
            , encodeWAV: function () {
                var sampleRate = Math.min(this.inputSampleRate, this.outputSampleRate);
                var sampleBits = Math.min(this.inputSampleBits, this.oututSampleBits);
                var bytes = this.compress();
                var dataLength = bytes.length * (sampleBits / 8);
                var buffer = new ArrayBuffer(44 + dataLength);
                var data = new DataView(buffer);


                var channelCount = 1;//單聲道
                var offset = 0;


                var writeString = function (str) {
                    for (var i = 0; i < str.length; i++) {
                        data.setUint8(offset + i, str.charCodeAt(i));
                    }
                }
                
                // 資源交換文件標識符 
                writeString('RIFF'); offset += 4;
                // 下個地址開始到文件尾總字節數,即文件大小-8 
                data.setUint32(offset, 36 + dataLength, true); offset += 4;
                // WAV文件標誌
                writeString('WAVE'); offset += 4;
                // 波形格式標誌 
                writeString('fmt '); offset += 4;
                // 過濾字節,一般爲 0x10 = 16 
                data.setUint32(offset, 16, true); offset += 4;
                // 格式類別 (PCM形式採樣數據) 
                data.setUint16(offset, 1, true); offset += 2;
                // 通道數 
                data.setUint16(offset, channelCount, true); offset += 2;
                // 採樣率,每秒樣本數,表示每個通道的播放速度 
                data.setUint32(offset, sampleRate, true); offset += 4;
                // 波形數據傳輸率 (每秒平均字節數) 單聲道×每秒數據位數×每樣本數據位/8 
                data.setUint32(offset, channelCount * sampleRate * (sampleBits / 8), true); offset += 4;
                // 快數據調整數 採樣一次佔用字節數 單聲道×每樣本的數據位數/8 
                data.setUint16(offset, channelCount * (sampleBits / 8), true); offset += 2;
                // 每樣本數據位數 
                data.setUint16(offset, sampleBits, true); offset += 2;
                // 數據標識符 
                writeString('data'); offset += 4;
                // 採樣數據總數,即數據總大小-44 
                data.setUint32(offset, dataLength, true); offset += 4;
                // 寫入採樣數據 
                if (sampleBits === 8) {
                    for (var i = 0; i < bytes.length; i++, offset++) {
                        var s = Math.max(-1, Math.min(1, bytes[i]));
                        var val = s < 0 ? s * 0x8000 : s * 0x7FFF;
                        val = parseInt(255 / (65535 / (val + 32768)));
                        data.setInt8(offset, val, true);
                    }
                } else {
                    for (var i = 0; i < bytes.length; i++, offset += 2) {
                        var s = Math.max(-1, Math.min(1, bytes[i]));
                        data.setInt16(offset, s < 0 ? s * 0x8000 : s * 0x7FFF, true);
                    }
                }


                return new Blob([data], { type: 'audio/wav' });
            }
        };


        //開始錄音
        this.start = function () {
            audioInput.connect(recorder);
            recorder.connect(context.destination);
        }


        //停止
        this.stop = function () {
            recorder.disconnect();
        }


        //獲取音頻文件
        this.getBlob = function () {
            this.stop();
            return audioData.encodeWAV();
        }


        //回放
        this.play = function (audio) {
            audio.src = window.URL.createObjectURL(this.getBlob());
        }


        //上傳
        this.upload = function (url, callback) {
            var fd = new FormData();
            fd.append("audioData", this.getBlob());
            var xhr = new XMLHttpRequest();
            if (callback) {
                xhr.upload.addEventListener("progress", function (e) {
                    callback('uploading', e);
                }, false);
                xhr.addEventListener("load", function (e) {
                    callback('ok', e);
                }, false);
                xhr.addEventListener("error", function (e) {
                    callback('error', e);
                }, false);
                xhr.addEventListener("abort", function (e) {
                    callback('cancel', e);
                }, false);
            }
            xhr.open("POST", url);
            xhr.send(fd);
        }


        //音頻採集
        recorder.onaudioprocess = function (e) {
            audioData.input(e.inputBuffer.getChannelData(0));
            //record(e.inputBuffer.getChannelData(0));
        }


    };
    //拋出異常
    HZRecorder.throwError = function (message) {
        alert(message);
        throw new function () { this.toString = function () { return message; } }
    }
    //是否支持錄音
    HZRecorder.canRecording = (navigator.getUserMedia != null);
    //獲取錄音機
    HZRecorder.get = function (callback, config) {
        if (callback) {
            if (navigator.getUserMedia) {
                navigator.getUserMedia(
                    { audio: true } //只啓用音頻
                    , function (stream) {
                        var rec = new HZRecorder(stream, config);
                        callback(rec);
                    }
                    , function (error) {
						console.log(error);
                        switch (error.code || error.name) {
                            case 'PERMISSION_DENIED':
                            case 'PermissionDeniedError':
                                HZRecorder.throwError('用戶拒絕提供信息。');
                                break;
                            case 'NOT_SUPPORTED_ERROR':
                            case 'NotSupportedError':
                                HZRecorder.throwError('瀏覽器不支持硬件設備。');
                                break;
                            case 'MANDATORY_UNSATISFIED_ERROR':
                            case 'MandatoryUnsatisfiedError':
                                HZRecorder.throwError('無法發現指定的硬件設備。');
                                break;
                            default:
                                HZRecorder.throwError('無法打開麥克風。異常信息:' + (error.code || error.name));
                                break;
                        }
                    });
            } else {
                HZRecorder.throwErr('當前瀏覽器不支持錄音功能。'); return;
            }
        }
    }


    window.HZRecorder = HZRecorder;


})(window);

開源高性能結構化日誌模塊NanoLog

【簡寫Mybatis-02】註冊機的實現以及SqlSession處理

手繪二維碼

.NET藉助虛擬網卡實現一個簡單異地組網工具

使用express創建node服務器

socket.io實時監控數據庫，數據變化更新頁面

縮放帶來的echarts模糊

html5語音識別

jeecg中t:dictSelect 部分值不能重複選擇

Mac下配置sublime實現LaTeX

https://yachay.unat.edu.pe/blog/index.php?comment_area=format_blog&comment_component=blog&comment_co

linux以太網驅動總結