Python調用微軟語音識別

Python調用微軟語音識別

參加某個項目時用到了微軟的語音識別,根據官方給的C#版寫的,沒有什麼註釋

調用語音識別的封裝類

MSspeechAPI_class.py

# -*- coding:utf-8 -*-
import certifi
import pycurl
import requests
import os
import json
import uuid
from StringIO import StringIO


def byteify(input_data):
    # convert json to list
    if isinstance(input_data, dict):
        return {byteify(key): byteify(value) for key, value in input_data.iteritems()}
    elif isinstance(input_data, list):
        return [byteify(element) for element in input_data]
    elif isinstance(input_data, unicode):
        return input_data.encode('utf-8')
    else:
        return input_data


def read_in_chunks(file_object, chunk_size=1024):
    # post chunk encoding data
    while True:
        data = file_object.read(chunk_size)
        if not data:
            break
        yield data


class Authentication:
    # get token
    def __init__(self, client_id, client_secret):
        self.AccessUrl = "https://oxford-speech.cloudapp.net/token/issueToken"
        self._clientId = client_id
        # provided by MS
        self._clientSecret = client_secret
        # provided by MS
        self.request_data = 'grant_type=client_credentials&client_id=' + self._clientId+'&client_secret='
        self.request_data += self._clientSecret + '&scope=https://speech.platform.bing.com'
        # opt must be a string object not a unicode object
        data_l = len(self.request_data)
        http_header = [
                       "Content-Type:application/x-www-form-urlencoded"
                       ]

        storage = StringIO()
        # the way to get response and print it
        c = pycurl.Curl()
        c.setopt(pycurl.CAINFO, certifi.old_where())
        c.setopt(pycurl.URL, self.AccessUrl)
        c.setopt(c.HTTPHEADER, http_header)
        c.setopt(c.POST, 1)
        c.setopt(c.POSTFIELDSIZE, data_l)
        c.setopt(c.CONNECTTIMEOUT, 30)
        c.setopt(c.TIMEOUT, 30)
        c.setopt(c.POSTFIELDS, self.request_data)
        # --------------------------------------------
        c.setopt(c.WRITEFUNCTION, storage.write)
        # --------------------------------------------
        c.perform()
        c.close()
        body = storage.getvalue()
        storage.close()
        self._token = byteify(json.loads(body.decode()))

    def getAccessToken(self):
        return self._token["access_token"]


class MsSpeechRequest:
    def __init__(self, audiofile, audioSamplerate=16000, clientid='', clientsecret='', locale='zh-CN', deviceOS='Rasbian'):
        if audiofile == None:
            print 'audio input wrong'
            return
        try:
            self._auth = Authentication(clientid, clientsecret)
        except Exception as e:
            print 'failed get access token.details:%s',e.__str__()
        self._RequestUri = "https://speech.platform.bing.com/recognize"
        self._RequestUri += "?scenarios=smd"
        self._RequestUri += "&appid="
        # input appid
        self._RequestUri += "&locale="+locale
        self._RequestUri += "&device.os="+deviceOS
        self._RequestUri += "&version=3.0"
        self._RequestUri += "&format=json"
        self._RequestUri += "&instanceid="
        # input instance id
        self._RequestUri += "&requestid="+str(uuid.uuid4())
        self._audioFile = audiofile
        self._audioSamplerate = audioSamplerate.__str__()
        self._token = self._auth.getAccessToken()
        # print self._token
        self._response = ''

    def post_request(self):
        headers = {}
        headers['Accept'] = 'application/json;text/xml'
        headers['Content-Type'] = 'audio/wav; codec=\"audio/pcm\"; samplerate='+self._audioSamplerate
        headers['Authorization'] = 'Bearer '+'%s' % self._token
        try:
            with open(self._audioFile,'rb') as f:
                r=requests.post(self._RequestUri, data=read_in_chunks(f), headers=headers, stream=True)
                print r
                self._response = byteify(r.text)
                print self._response
        except Exception as e:
            print 'failed get request response. Details:%s',e.__str__()

    def returnResult(self):
        self.post_request()
        return self._response

具體調用

# -*- coding:utf-8 -*-
from MSspeechAPI_class import MsSpeechRequest
def main():
    requset = MsSpeechRequest(audiofile=('output.wav'))
    response = requset.returnResult()
    print response

main()
發佈了32 篇原創文章 · 獲贊 17 · 訪問量 3萬+
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章