想要使用html5網頁實現語音識別,大概流程:用戶點擊錄音按鈕,進行語音錄入,錄入後上傳語音,後臺接收到請求後,會調用語音識別的方法,最後返回識別結果,前端接收到結果後再進行後續處理。
參考:https://blog.csdn.net/qq_33609401/article/details/78172080,這裏面寫得很詳細,HZRecorder.js可以去這裏複製。
所以技術上主要分爲兩個部分:語音獲取、語音識別;
一、語音獲取
在html上增加錄音按鈕,使用 HZRecorder.js,調用設備的錄音功能,(注意:手機端瀏覽器需要有ssl證書(即網址協議爲https)才能調用錄音,有人說打包了就可以,沒有ssl證書,打包了也會調用失敗)
前端:
<!DOCTYPE html>
<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<meta name="viewport" content="width=device-width,height=device-height, initial-scale=1.0, maximum-scale=1.0, user-scalable=0" />
<title></title>
</head>
<body>
<div>
<audio controls autoplay></audio>
<input onclick="startRecording()" type="button" value="錄音" />
<input onclick="stopRecording()" type="button" value="停止" />
<input onclick="playRecording()" type="button" value="播放" />
<input onclick="uploadAudio()" type="button" value="提交" />
</div>
<script type="text/javascript" src="HZRecorder.js"></script>
<script>
var recorder;
var audio = document.querySelector('audio');
function startRecording() {
HZRecorder.get(function (rec) {
recorder = rec;
recorder.start();
});
}
function stopRecording() {
recorder.stop();
}
function playRecording() {
recorder.play(audio);
}
function uploadAudio() {
recorder.upload("http://localhost:8080/****/UploadVideoServlet", function (state, e) {
switch (state) {
case 'uploading':
break;
case 'ok':
alert("上傳成功");
break;
case 'error':
alert("上傳失敗");
break;
case 'cancel':
alert("上傳被取消");
break;
}
});
}
</script>
</body>
</html>
後臺:在UploadVideoServlet中處理上傳和調用語音識別代碼。
@WebServlet("/UploadVideoServlet")
public class UploadVideoServlet extends HttpServlet {
private static final long serialVersionUID = 1L;
public UploadVideoServlet() {
super();
}
protected void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
response.getWriter().append("Served at: ").append(request.getContextPath());
this.doPost(request, response);
}
protected void doPost(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
// 消息提示
String message = "";
try {
DiskFileItemFactory factory = new DiskFileItemFactory();
ServletFileUpload upload = new ServletFileUpload(factory);
List<FileItem> list = upload.parseRequest(request);
for (FileItem item : list) {
// 調用語音識別方法
// Sample.main(item.get()); //百度
WebIAT.main(item.get()); //科大訊飛
message = "文件上傳成功!";
}
} catch (Exception e) {
message = "文件上傳失敗!";
e.printStackTrace();
}
}
}
二、語音識別:使用科大訊飛或者百度提供的api實現。
(不論科大訊飛或者百度:(1) 都需要先去申請 APPID 、API_KEY,具體步驟去官網看看就知道了,(2)都提供了幾種方式,我沒有依次嘗試)
1、科大訊飛示例:
參考 https://doc.xfyun.cn/rest_api/%E8%AF%AD%E9%9F%B3%E5%90%AC%E5%86%99.html
public class WebIAT {
// 聽寫webapi接口地址
private static final String WEBIAT_URL = "http://api.xfyun.cn/v1/service/v1/iat";
// 應用APPID(必須爲webapi類型應用,並開通語音聽寫服務,參考帖子如何創建一個webapi應用:http://bbs.xfyun.cn/forum.php?mod=viewthread&tid=36481)
private static final String APPID = "*******";
// 接口密鑰(webapi類型應用開通聽寫服務後,控制檯--我的應用---語音聽寫---相應服務的apikey)
private static final String API_KEY = "*************************";
// 音頻編碼
private static final String AUE = "raw";
// 引擎類型
private static final String ENGINE_TYPE = "sms16k";
// 後端點(取值範圍0-10000ms)
private static final String VAD_EOS = "10000";
/**
* 聽寫 WebAPI 調用示例程序
* @param args
* @throws IOException
*/
public static void main(byte[] bytes) throws IOException {
Map<String, String> header = buildHttpHeader();
//byte[] audioByteArray = FileUtil.read(AUDIO_PATH);
byte[] audioByteArray = bytes;
String audioBase64 = new String(Base64.encodeBase64(audioByteArray), "UTF-8");
String result = HttpUtil.doPost1(WEBIAT_URL, header, "audio=" + URLEncoder.encode(audioBase64, "UTF-8"));
System.out.println("聽寫 WebAPI 接口調用結果:" + result);
}
// 返回code爲錯誤碼時,請查詢https://www.xfyun.cn/document/error-code解決方案
/**
* 組裝http請求頭
*/
private static Map<String, String> buildHttpHeader() throws UnsupportedEncodingException {
String curTime = System.currentTimeMillis() / 1000L + "";
String param = "{\"aue\":\""+AUE+"\""+",\"engine_type\":\"" + ENGINE_TYPE + "\""+",\"vad_eos\":\"" + VAD_EOS + "\"}";
String paramBase64 = new String(Base64.encodeBase64(param.getBytes("UTF-8")));
String checkSum = DigestUtils.md5Hex(API_KEY + curTime + paramBase64);
Map<String, String> header = new HashMap<String, String>();
header.put("Content-Type", "application/x-www-form-urlencoded; charset=utf-8");
header.put("X-Param", paramBase64);
header.put("X-CurTime", curTime);
header.put("X-CheckSum", checkSum);
header.put("X-Appid", APPID);
return header;
}
}
2、百度示例:
參考 https://ai.baidu.com/docs#/ASR-Online-Java-SDK/top
public class Sample {
private static final String serverURL = "http://vop.baidu.com/server_api";
private static String token = "";
//put your own params here
private static final String apiKey = "****************"; //改爲自己的apiKey
private static final String secretKey = "*******************"; //改爲自己的secretKey
private static final String cuid = "*******"; //唯一標識,可以寫機器碼
public static void main(byte[] bytes) throws Exception {
getToken();
method2(bytes);
}
private static void getToken() throws Exception {
String getTokenURL = "https://openapi.baidu.com/oauth/2.0/token?grant_type=client_credentials" +"&client_id=" + apiKey + "&client_secret=" + secretKey;
HttpURLConnection conn = (HttpURLConnection) new URL(getTokenURL).openConnection();
token = new JSONObject(printResponse(conn)).getString("access_token");
}
private static void method2(byte[] bytes) throws Exception {
HttpURLConnection conn = (HttpURLConnection) new URL(serverURL+ "?cuid=" + cuid + "&token=" + token).openConnection();
// add request header
conn.setRequestMethod("POST");
conn.setRequestProperty("Content-Type", "audio/wav; rate=16000");
conn.setDoInput(true);
conn.setDoOutput(true);
// send request
DataOutputStream wr = new DataOutputStream(conn.getOutputStream());
wr.write(bytes);
wr.flush();
wr.close();
printResponse(conn);
}
private static String printResponse(HttpURLConnection conn) throws Exception {
if (conn.getResponseCode() != 200) {
// request error
return "";
}
InputStream is = conn.getInputStream();
BufferedReader rd = new BufferedReader(new InputStreamReader(is));
String line;
StringBuffer response = new StringBuffer();
while ((line = rd.readLine()) != null) {
response.append(line);
response.append('\r');
}
rd.close();
System.out.println(new JSONObject(response.toString()).toString(4));
return response.toString();
}
轉換結果示例:
拿到結果後,前端就可以進行後續處理了。
附: HZRecorder.js
(function (window) {
//兼容
window.URL = window.URL || window.webkitURL;
navigator.getUserMedia = navigator.getUserMedia || navigator.webkitGetUserMedia || navigator.mozGetUserMedia || navigator.msGetUserMedia;
var HZRecorder = function (stream, config) {
config = config || {};
config.sampleBits = config.sampleBits || 16; //採樣數位 8, 16
config.sampleRate = config.sampleRate || (16000); //採樣率(1/6 44100)
var context = new AudioContext();
var audioInput = context.createMediaStreamSource(stream);
var recorder = context.createScriptProcessor(4096, 1, 1);
var audioData = {
size: 0 //錄音文件長度
, buffer: [] //錄音緩存
, inputSampleRate: context.sampleRate //輸入採樣率
, inputSampleBits: 16 //輸入採樣數位 8, 16
, outputSampleRate: config.sampleRate //輸出採樣率
, oututSampleBits: config.sampleBits //輸出採樣數位 8, 16
, input: function (data) {
this.buffer.push(new Float32Array(data));
this.size += data.length;
}
, compress: function () { //合併壓縮
//合併
var data = new Float32Array(this.size);
var offset = 0;
for (var i = 0; i < this.buffer.length; i++) {
data.set(this.buffer[i], offset);
offset += this.buffer[i].length;
}
//壓縮
var compression = parseInt(this.inputSampleRate / this.outputSampleRate);
var length = data.length / compression;
var result = new Float32Array(length);
var index = 0, j = 0;
while (index < length) {
result[index] = data[j];
j += compression;
index++;
}
return result;
}
, encodeWAV: function () {
var sampleRate = Math.min(this.inputSampleRate, this.outputSampleRate);
var sampleBits = Math.min(this.inputSampleBits, this.oututSampleBits);
var bytes = this.compress();
var dataLength = bytes.length * (sampleBits / 8);
var buffer = new ArrayBuffer(44 + dataLength);
var data = new DataView(buffer);
var channelCount = 1;//單聲道
var offset = 0;
var writeString = function (str) {
for (var i = 0; i < str.length; i++) {
data.setUint8(offset + i, str.charCodeAt(i));
}
}
// 資源交換文件標識符
writeString('RIFF'); offset += 4;
// 下個地址開始到文件尾總字節數,即文件大小-8
data.setUint32(offset, 36 + dataLength, true); offset += 4;
// WAV文件標誌
writeString('WAVE'); offset += 4;
// 波形格式標誌
writeString('fmt '); offset += 4;
// 過濾字節,一般爲 0x10 = 16
data.setUint32(offset, 16, true); offset += 4;
// 格式類別 (PCM形式採樣數據)
data.setUint16(offset, 1, true); offset += 2;
// 通道數
data.setUint16(offset, channelCount, true); offset += 2;
// 採樣率,每秒樣本數,表示每個通道的播放速度
data.setUint32(offset, sampleRate, true); offset += 4;
// 波形數據傳輸率 (每秒平均字節數) 單聲道×每秒數據位數×每樣本數據位/8
data.setUint32(offset, channelCount * sampleRate * (sampleBits / 8), true); offset += 4;
// 快數據調整數 採樣一次佔用字節數 單聲道×每樣本的數據位數/8
data.setUint16(offset, channelCount * (sampleBits / 8), true); offset += 2;
// 每樣本數據位數
data.setUint16(offset, sampleBits, true); offset += 2;
// 數據標識符
writeString('data'); offset += 4;
// 採樣數據總數,即數據總大小-44
data.setUint32(offset, dataLength, true); offset += 4;
// 寫入採樣數據
if (sampleBits === 8) {
for (var i = 0; i < bytes.length; i++, offset++) {
var s = Math.max(-1, Math.min(1, bytes[i]));
var val = s < 0 ? s * 0x8000 : s * 0x7FFF;
val = parseInt(255 / (65535 / (val + 32768)));
data.setInt8(offset, val, true);
}
} else {
for (var i = 0; i < bytes.length; i++, offset += 2) {
var s = Math.max(-1, Math.min(1, bytes[i]));
data.setInt16(offset, s < 0 ? s * 0x8000 : s * 0x7FFF, true);
}
}
return new Blob([data], { type: 'audio/wav' });
}
};
//開始錄音
this.start = function () {
audioInput.connect(recorder);
recorder.connect(context.destination);
}
//停止
this.stop = function () {
recorder.disconnect();
}
//獲取音頻文件
this.getBlob = function () {
this.stop();
return audioData.encodeWAV();
}
//回放
this.play = function (audio) {
audio.src = window.URL.createObjectURL(this.getBlob());
}
//上傳
this.upload = function (url, callback) {
var fd = new FormData();
fd.append("audioData", this.getBlob());
var xhr = new XMLHttpRequest();
if (callback) {
xhr.upload.addEventListener("progress", function (e) {
callback('uploading', e);
}, false);
xhr.addEventListener("load", function (e) {
callback('ok', e);
}, false);
xhr.addEventListener("error", function (e) {
callback('error', e);
}, false);
xhr.addEventListener("abort", function (e) {
callback('cancel', e);
}, false);
}
xhr.open("POST", url);
xhr.send(fd);
}
//音頻採集
recorder.onaudioprocess = function (e) {
audioData.input(e.inputBuffer.getChannelData(0));
//record(e.inputBuffer.getChannelData(0));
}
};
//拋出異常
HZRecorder.throwError = function (message) {
alert(message);
throw new function () { this.toString = function () { return message; } }
}
//是否支持錄音
HZRecorder.canRecording = (navigator.getUserMedia != null);
//獲取錄音機
HZRecorder.get = function (callback, config) {
if (callback) {
if (navigator.getUserMedia) {
navigator.getUserMedia(
{ audio: true } //只啓用音頻
, function (stream) {
var rec = new HZRecorder(stream, config);
callback(rec);
}
, function (error) {
console.log(error);
switch (error.code || error.name) {
case 'PERMISSION_DENIED':
case 'PermissionDeniedError':
HZRecorder.throwError('用戶拒絕提供信息。');
break;
case 'NOT_SUPPORTED_ERROR':
case 'NotSupportedError':
HZRecorder.throwError('瀏覽器不支持硬件設備。');
break;
case 'MANDATORY_UNSATISFIED_ERROR':
case 'MandatoryUnsatisfiedError':
HZRecorder.throwError('無法發現指定的硬件設備。');
break;
default:
HZRecorder.throwError('無法打開麥克風。異常信息:' + (error.code || error.name));
break;
}
});
} else {
HZRecorder.throwErr('當前瀏覽器不支持錄音功能。'); return;
}
}
}
window.HZRecorder = HZRecorder;
})(window);