Python學習筆記-WXPY語音信息識別

安裝依賴工具包:

pip install pyaudio

pip install pydub

pip install ffmpeg

pip install baidu_aip

語音信息識別代碼記錄

# -*- coding:utf-8 -*-

import os
import sys
import json
import urllib
import requests
from wxpy import *
from PIL import Image
from io import BytesIO
from aip import AipSpeech
from bs4 import BeautifulSoup
from pydub import AudioSegment

default_encoding = 'utf-8'
if sys.getdefaultencoding() != default_encoding:
    reload(sys)
    sys.setdefaultencoding(default_encoding)

BAIDU_SR_APP_ID = 'XXXXXXXX'
BAIDU_SR_API_KEY = 'ABCDu1EFg2hiJK0lmNO3P45q'
BAIDU_SR_SECRET_KEY = 'aBc12Def03g45hIjkLMNO6p7Qrs8txyz'

client = AipSpeech(BAIDU_SR_APP_ID, BAIDU_SR_API_KEY, BAIDU_SR_SECRET_KEY)


def handle_dt(keyword, max_size=10):
    resp = requests.get('https://www.doutula.com/search', {'keyword': keyword})
    b_soup = BeautifulSoup(resp.content, 'lxml')
    a_tags = b_soup.select(
        '#search-result-page > div > div > div > div > div.search-result.list-group-item > div > div > a > img')
    picture_name_list = []
    i = 0
    for a_tag in a_tags:
        link = a_tag.get('data-original')
        if link is not None and str(link).endswith('jpg'):
            link = str(link).strip()
            picture_name = link[link.rindex('/') + 1:]
            parent_dir = os.path.join(os.path.dirname(__file__), 'picture')
            urllib.urlretrieve(link, os.path.join(parent_dir, picture_name))
            picture_name_list.append(picture_name)
            i += 1
            if i == max_size:
                break
    return picture_name_list


# MP3文件轉換WAV文件
def mp3_to_wav(mp3_file_name):
    parent_dir = os.path.join(os.path.dirname(__file__), 'recording')
    with open(os.path.join(parent_dir, mp3_file_name), 'rb') as fp:
        voices = fp.read()
    bio = BytesIO(voices)
    AudioSegment.converter = 'F:\\develop\\ffmpeg\\ffmpeg-win64-static\\bin\\ffmpeg.exe'
    audio = AudioSegment.from_mp3(bio)
    wav_file_name = mp3_file_name[:str(mp3_file_name).rindex('.')] + '.wav'
    audio.export(os.path.join(parent_dir, wav_file_name), format='wav')


ascii_char = list("$@B%8&WM#*oahkbdpqwmZO0QLCJUYXzcvunxrjft/\|()1{}[]?-_+~<>i!lI;:,\"^`'. ")


def handle_image(image_file, txt_file=None, width=80, height=80):
    im = Image.open(image_file)
    im = im.resize((width, height), Image.NEAREST)
    txt = ""
    for i in range(height):
        for j in range(width):
            txt += handle_image_char(*im.getpixel((j, i)))
        txt += '\n'
    if txt_file is not None:
        with open(txt_file, 'wb') as fw:
            fw.write(txt)
    return txt


def handle_image_char(r, g, b, alpha=256):
    if alpha == 0:
        return ' '
    length = len(ascii_char)
    gray = int(0.2126 * r + 0.7152 * g + 0.0722 * b)
    unit = (256.0 + 1) / length
    return ascii_char[int(gray / unit)]


if __name__ == '__main__':
    bot = Bot(cache_path=True)
    # 設置歷史消息的最大保存數量
    bot.messages.max_history = 1000

    friends = bot.friends()

    t_friend_1 = friends.search(u'朋友1')[0]


    @bot.register(chats=[t_friend_1])
    def listen_and_reply_friends(msg):
        print 'received msg:[{}] {}'.format(msg.type, msg.text)
        if msg.type == 'Picture':
            parent_dir = os.path.join(os.path.dirname(__file__), 'picture')
            print 'picture dir {} name {}'.format(parent_dir, msg.file_name)
            image_file = os.path.join(parent_dir, msg.file_name)
            msg.get_file(save_path=image_file)
            msg.reply(handle_image(image_file))
        elif msg.type == 'Video':
            parent_dir = os.path.join(os.path.dirname(__file__), 'video')
            print 'video dir {} name {}'.format(parent_dir, msg.file_name)
            msg.get_file(save_path=os.path.join(parent_dir, msg.file_name))
        elif msg.type == 'Recording':
            parent_dir = os.path.join(os.path.dirname(__file__), 'recording')
            mp3_file_name = msg.file_name
            print 'recording dir {} mp3 name {}'.format(parent_dir, mp3_file_name)
            # 接收MP3文件並轉換爲WAV文件
            bio = BytesIO(msg.get_file())
            AudioSegment.converter = 'F:\\develop\\ffmpeg\\ffmpeg-win64-static\\bin\\ffmpeg.exe'
            audio = AudioSegment.from_mp3(bio)
            wav_file_name = mp3_file_name[:str(mp3_file_name).rindex('.')] + '.wav'
            print 'recording dir {} wav name {}'.format(parent_dir, wav_file_name)
            audio.export(os.path.join(parent_dir, wav_file_name), format='wav')
            # 讀取轉換後的WAV文件
            with open(os.path.join(parent_dir, wav_file_name), 'rb') as fp:
                voices = fp.read()
            # 百度語音識別API識別語音信息
            try:
                # 參數dev_pid:1536普通話(支持簡單的英文識別)、1537普通話(純中文識別)、1737英語、1637粵語、1837四川話、1936普通話遠場
                result = client.asr(voices, 'wav', 16000, {'dev_pid': 1537})
                result_text = result["result"][0]
                print("receive speech msg: " + result_text)
            except KeyError:
                print("Speech Recognition Error")
                print KeyError.message
        else:
            msg.reply('thank you ! i have received it')

        parent_dir = os.path.join(os.path.dirname(__file__), 'picture')
        picture_name_list = handle_dt(msg.text, max_size=1)
        for picture_name in picture_name_list:
            msg.reply_image(os.path.join(parent_dir, picture_name))


    embed()

異常記錄
Couldn't find ffmpeg or avconv - defaulting to ffmpeg......
1、通過https://ffmpeg.zeranoe.com/builds/網址下載FFmpeg(ffmpeg-win64-static.zip)
2、把%FFMPEG_HOME%/bin加入到系統環境變量中
3、修改PYTHON_HOME\Lib\site-packages\pydub\utils.py文件,指定位置添加指定內容
if os.name == "nt" and not program.endswith(".exe"):
        program += ".exe"
   envdir_list = [os.curdir] + os.environ["PATH"].split(os.pathsep)
   envdir_list.append('F:\\ffmpeg\\ffmpeg-win64-static\\bin')
4、代碼使用的時候如下操作
AudioSegment.converter = 'F:\\ffmpeg\\ffmpeg-win64-static\\bin\\ffmpeg.exe'
audio = AudioSegment.from_mp3(bio)

 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章