安裝依賴工具包:
pip install pyaudio
pip install pydub
pip install ffmpeg
pip install baidu_aip
語音信息識別代碼記錄
# -*- coding:utf-8 -*-
import os
import sys
import json
import urllib
import requests
from wxpy import *
from PIL import Image
from io import BytesIO
from aip import AipSpeech
from bs4 import BeautifulSoup
from pydub import AudioSegment
default_encoding = 'utf-8'
if sys.getdefaultencoding() != default_encoding:
reload(sys)
sys.setdefaultencoding(default_encoding)
BAIDU_SR_APP_ID = 'XXXXXXXX'
BAIDU_SR_API_KEY = 'ABCDu1EFg2hiJK0lmNO3P45q'
BAIDU_SR_SECRET_KEY = 'aBc12Def03g45hIjkLMNO6p7Qrs8txyz'
client = AipSpeech(BAIDU_SR_APP_ID, BAIDU_SR_API_KEY, BAIDU_SR_SECRET_KEY)
def handle_dt(keyword, max_size=10):
resp = requests.get('https://www.doutula.com/search', {'keyword': keyword})
b_soup = BeautifulSoup(resp.content, 'lxml')
a_tags = b_soup.select(
'#search-result-page > div > div > div > div > div.search-result.list-group-item > div > div > a > img')
picture_name_list = []
i = 0
for a_tag in a_tags:
link = a_tag.get('data-original')
if link is not None and str(link).endswith('jpg'):
link = str(link).strip()
picture_name = link[link.rindex('/') + 1:]
parent_dir = os.path.join(os.path.dirname(__file__), 'picture')
urllib.urlretrieve(link, os.path.join(parent_dir, picture_name))
picture_name_list.append(picture_name)
i += 1
if i == max_size:
break
return picture_name_list
# MP3文件轉換WAV文件
def mp3_to_wav(mp3_file_name):
parent_dir = os.path.join(os.path.dirname(__file__), 'recording')
with open(os.path.join(parent_dir, mp3_file_name), 'rb') as fp:
voices = fp.read()
bio = BytesIO(voices)
AudioSegment.converter = 'F:\\develop\\ffmpeg\\ffmpeg-win64-static\\bin\\ffmpeg.exe'
audio = AudioSegment.from_mp3(bio)
wav_file_name = mp3_file_name[:str(mp3_file_name).rindex('.')] + '.wav'
audio.export(os.path.join(parent_dir, wav_file_name), format='wav')
ascii_char = list("$@B%8&WM#*oahkbdpqwmZO0QLCJUYXzcvunxrjft/\|()1{}[]?-_+~<>i!lI;:,\"^`'. ")
def handle_image(image_file, txt_file=None, width=80, height=80):
im = Image.open(image_file)
im = im.resize((width, height), Image.NEAREST)
txt = ""
for i in range(height):
for j in range(width):
txt += handle_image_char(*im.getpixel((j, i)))
txt += '\n'
if txt_file is not None:
with open(txt_file, 'wb') as fw:
fw.write(txt)
return txt
def handle_image_char(r, g, b, alpha=256):
if alpha == 0:
return ' '
length = len(ascii_char)
gray = int(0.2126 * r + 0.7152 * g + 0.0722 * b)
unit = (256.0 + 1) / length
return ascii_char[int(gray / unit)]
if __name__ == '__main__':
bot = Bot(cache_path=True)
# 設置歷史消息的最大保存數量
bot.messages.max_history = 1000
friends = bot.friends()
t_friend_1 = friends.search(u'朋友1')[0]
@bot.register(chats=[t_friend_1])
def listen_and_reply_friends(msg):
print 'received msg:[{}] {}'.format(msg.type, msg.text)
if msg.type == 'Picture':
parent_dir = os.path.join(os.path.dirname(__file__), 'picture')
print 'picture dir {} name {}'.format(parent_dir, msg.file_name)
image_file = os.path.join(parent_dir, msg.file_name)
msg.get_file(save_path=image_file)
msg.reply(handle_image(image_file))
elif msg.type == 'Video':
parent_dir = os.path.join(os.path.dirname(__file__), 'video')
print 'video dir {} name {}'.format(parent_dir, msg.file_name)
msg.get_file(save_path=os.path.join(parent_dir, msg.file_name))
elif msg.type == 'Recording':
parent_dir = os.path.join(os.path.dirname(__file__), 'recording')
mp3_file_name = msg.file_name
print 'recording dir {} mp3 name {}'.format(parent_dir, mp3_file_name)
# 接收MP3文件並轉換爲WAV文件
bio = BytesIO(msg.get_file())
AudioSegment.converter = 'F:\\develop\\ffmpeg\\ffmpeg-win64-static\\bin\\ffmpeg.exe'
audio = AudioSegment.from_mp3(bio)
wav_file_name = mp3_file_name[:str(mp3_file_name).rindex('.')] + '.wav'
print 'recording dir {} wav name {}'.format(parent_dir, wav_file_name)
audio.export(os.path.join(parent_dir, wav_file_name), format='wav')
# 讀取轉換後的WAV文件
with open(os.path.join(parent_dir, wav_file_name), 'rb') as fp:
voices = fp.read()
# 百度語音識別API識別語音信息
try:
# 參數dev_pid:1536普通話(支持簡單的英文識別)、1537普通話(純中文識別)、1737英語、1637粵語、1837四川話、1936普通話遠場
result = client.asr(voices, 'wav', 16000, {'dev_pid': 1537})
result_text = result["result"][0]
print("receive speech msg: " + result_text)
except KeyError:
print("Speech Recognition Error")
print KeyError.message
else:
msg.reply('thank you ! i have received it')
parent_dir = os.path.join(os.path.dirname(__file__), 'picture')
picture_name_list = handle_dt(msg.text, max_size=1)
for picture_name in picture_name_list:
msg.reply_image(os.path.join(parent_dir, picture_name))
embed()
異常記錄
Couldn't find ffmpeg or avconv - defaulting to ffmpeg......
1、通過https://ffmpeg.zeranoe.com/builds/網址下載FFmpeg(ffmpeg-win64-static.zip)
2、把%FFMPEG_HOME%/bin加入到系統環境變量中
3、修改PYTHON_HOME\Lib\site-packages\pydub\utils.py文件,指定位置添加指定內容
if os.name == "nt" and not program.endswith(".exe"):
program += ".exe"
envdir_list = [os.curdir] + os.environ["PATH"].split(os.pathsep)
envdir_list.append('F:\\ffmpeg\\ffmpeg-win64-static\\bin')
4、代碼使用的時候如下操作
AudioSegment.converter = 'F:\\ffmpeg\\ffmpeg-win64-static\\bin\\ffmpeg.exe'
audio = AudioSegment.from_mp3(bio)