10.性別識別
代碼:見day05 gndr.py
age score phone
25 80 110
28 90 120
...
/ 25 80 110 \
| 28 90 120 |
\ ... /
[{'age': 25, 'score': 80, 'phone': 110},
{'age': 28, 'score': 90, 'phone': 120}]
11.情感分析
代碼:sent.py
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
import nltk.corpus as nc
import nltk.classify as cf
import nltk.classify.util as cu
pdata = []
fileids = nc.movie_reviews.fileids('pos')
for fileid in fileids:
feature = {}
words = nc.movie_reviews.words(fileid)
for word in words:
feature[word] = True
pdata.append((feature, 'POSITIVE'))
ndata = []
fileids = nc.movie_reviews.fileids('neg')
for fileid in fileids:
feature = {}
words = nc.movie_reviews.words(fileid)
for word in words:
feature[word] = True
ndata.append((feature, 'NEGATIVE'))
pnumb, nnumb = \
int(0.8 * len(pdata)), int(0.8 * len(ndata))
train_data = pdata[:pnumb] + ndata[:nnumb]
test_data = pdata[pnumb:] + ndata[nnumb:]
model = cf.NaiveBayesClassifier.train(train_data)
ac = cu.accuracy(model, test_data)
print(ac)
tops = model.most_informative_features()
for top in tops[:10]:
print(top[0])
reviews = [
'It is an amazing movie.',
'This is a dull movie. I would never recommend it to anyone.',
'The cinematography is pretty great in this movie.',
'The direction was terrible and the story was all over the place.']
sents, probs = [], []
for review in reviews:
feature = {}
words = review.split()
for word in words:
feature[word] = True
pcls = model.prob_classify(feature)
sent = pcls.max()
prob = pcls.prob(sent)
sents.append(sent)
probs.append(prob)
for review, sent, prob in zip(
reviews, sents, probs):
print(review, '->', sent, '%.2f' % prob)
12.主題抽取
代碼:topic.py
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
import warnings
warnings.filterwarnings(
'ignore', category=UserWarning)
import nltk.tokenize as tk
import nltk.corpus as nc
import nltk.stem.snowball as sb
import gensim.models.ldamodel as gm
import gensim.corpora as gc
doc = []
with open('../../data/topic.txt', 'r') as f:
for line in f.readlines():
doc.append(line[:-1])
tokenizer = tk.RegexpTokenizer(r'\w+')
stopwords = nc.stopwords.words('english')
stemmer = sb.SnowballStemmer('english')
lines_tokens = []
for line in doc:
tokens = tokenizer.tokenize(line.lower())
line_tokens = []
for token in tokens:
if token not in stopwords:
token = stemmer.stem(token)
line_tokens.append(token)
lines_tokens.append(line_tokens)
dic = gc.Dictionary(lines_tokens)
bow = []
for line_tokens in lines_tokens:
row = dic.doc2bow(line_tokens)
bow.append(row)
n_topics = 2
model = gm.LdaModel(bow, num_topics=n_topics,
id2word=dic, passes=25)
topics = model.print_topics(num_topics=n_topics,
num_words=4)
print(topics)
十六、語音識別
聲音->文本
1.聲音的本質是震動,震動的本質是位移關於時間的函數
Signal: s = f(t)
波形文件(.wav)中記錄了不同採樣時刻的位移
2.通過傅里葉變換,可以將時間域的聲音函數分解爲一系列不同頻率的正弦函數的疊加,通過頻率譜線的特殊分佈,建立音頻內容和文本的對應關係,以此作爲模型訓練的基礎。
代碼:audio.py
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
import numpy as np
import numpy.fft as nf
import scipy.io.wavfile as wf
import matplotlib.pyplot as mp
sample_rate, sigs = wf.read('../../data/freq.wav')
print(sample_rate)
print(sigs.shape, sigs.dtype)
sigs = sigs / 2 ** 15
times = np.arange(len(sigs)) / sample_rate
freqs = nf.fftfreq(sigs.size, 1 / sample_rate)
ffts = nf.fft(sigs)
pows = np.abs(ffts)
mp.figure('Audio', facecolor='lightgray')
mp.subplot(121)
mp.title('Time Domain', fontsize=16)
mp.xlabel('Time', fontsize=12)
mp.ylabel('Signal', fontsize=12)
mp.tick_params(labelsize=10)
mp.grid(linestyle=':')
mp.plot(times, sigs, c='dodgerblue', label='Signal')
mp.legend()
mp.subplot(122)
mp.title('Frequency Domain', fontsize=16)
mp.xlabel('Frequency', fontsize=12)
mp.ylabel('Power', fontsize=12)
mp.tick_params(labelsize=10)
mp.grid(linestyle=':')
mp.plot(freqs[freqs >= 0], pows[freqs >= 0],
c='orangered', label='Power')
mp.legend()
mp.tight_layout()
mp.show()
3.梅爾頻率倒譜系數(MFCC)通過與聲音內容密切相關的13個特殊頻率所對應的能量分佈,作爲語音的特徵。
代碼:mfcc.py
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
import numpy as np
import numpy.fft as nf
import scipy.io.wavfile as wf
import python_speech_features as sf
import matplotlib.pyplot as mp
sample_rate, sigs = wf.read(
'../../data/speeches/training/orange/orange01.wav')
mfcc = sf.mfcc(sigs, sample_rate)
print(mfcc.shape)
mp.matshow(mfcc.T, cmap='gist_rainbow',
fignum='MFCC')
mp.title('MFCC', fontsize=16)
mp.xlabel('Sample', fontsize=12)
mp.ylabel('Feature', fontsize=12)
mp.tick_params(which='both', top=False,
labeltop=False, labelbottom=True,
labelsize=10)
mp.show()
4.語音識別
代碼:spch.py
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
import os
import warnings
import numpy as np
import scipy.io.wavfile as wf
import python_speech_features as sf
import hmmlearn.hmm as hl
warnings.filterwarnings(
'ignore', category=DeprecationWarning)
np.seterr(all='ignore')
def search_speeches(directory, speeches):
directory = os.path.normpath(directory)
if not os.path.isdir(directory):
raise IOError(
"The directory '" + directory +
"' doesn't exist!")
for entry in os.listdir(directory):
label = directory[directory.rfind(
os.path.sep) + 1:]
path = os.path.join(directory, entry)
if os.path.isdir(path):
search_speeches(path, speeches)
elif os.path.isfile(path) and \
path.endswith('.wav'):
if label not in speeches:
speeches[label] = []
speeches[label].append(path)
train_speeches = {}
search_speeches('../../data/speeches/training',
train_speeches)
train_x, train_y = [], []
for label, filenames in train_speeches.items():
mfccs = np.array([])
for filename in filenames:
sample_rate, sigs = wf.read(filename)
mfcc = sf.mfcc(sigs, sample_rate)
if len(mfccs) == 0:
mfccs = mfcc
else:
mfccs = np.append(mfccs, mfcc, axis=0)
train_x.append(mfccs)
train_y.append(label)
models = {}
for mfccs, label in zip(train_x, train_y):
# 基於高斯(正態)分佈的隱馬爾科夫模型
model = hl.GaussianHMM(
n_components=4, covariance_type='diag',
n_iter=1000)
models[label] = model.fit(mfccs)
test_speeches = {}
search_speeches('../../data/speeches/testing',
test_speeches)
test_x, test_y = [], []
for label, filenames in train_speeches.items():
mfccs = np.array([])
for filename in filenames:
sample_rate, sigs = wf.read(filename)
mfcc = sf.mfcc(sigs, sample_rate)
if len(mfccs) == 0:
mfccs = mfcc
else:
mfccs = np.append(mfccs, mfcc, axis=0)
test_x.append(mfccs)
test_y.append(label)
pred_test_y = []
for mfccs in test_x:
best_score, best_label = None, None
for label, model in models.items():
score = model.score(mfccs) # 相似度得分
if (best_score is None) or (
best_score < score):
best_score, best_label = score, label
pred_test_y.append(best_label)
print(test_y)
print(pred_test_y)
5.聲音合成
代碼:music.py
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
import json
import numpy as np
import scipy.io.wavfile as wf
with open('../../data/12.json', 'r') as f:
freqs = json.loads(f.read())
tones = [
('G5', 1.5),
('A5', 0.5),
('G5', 1.5),
('E5', 0.5),
('D5', 0.5),
('E5', 0.25),
('D5', 0.25),
('C5', 0.5),
('A4', 0.5),
('C5', 0.75)]
sample_rate = 44100
music = np.empty(shape=1)
for tone, duration in tones:
times = np.linspace(
0, duration, duration * sample_rate)
sound = np.sin(2 * np.pi * freqs[tone] * times)
music = np.append(music, sound)
music *= 2 ** 15
music = music.astype(np.int16)
wf.write('../../data/music.wav', sample_rate, music)
十七、圖像識別
1.OpenCV,機器視覺
代碼:basic.py
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
import numpy as np
import cv2 as cv
original = cv.imread('../../data/forest.jpg')
cv.imshow('Original', original)
blue = np.zeros_like(original)
blue[..., 0] = original[..., 0] # 0 - 藍色通道
cv.imshow('Blue', blue)
green = np.zeros_like(original)
green[..., 1] = original[..., 1] # 1 - 綠色通道
cv.imshow('Green', green)
red = np.zeros_like(original)
red[..., 2] = original[..., 2] # 2 - 紅色通道
cv.imshow('Red', red)
h, w = original.shape[:2]
l, t = int(w / 4), int(h / 4)
r, b = int(w * 3 / 4), int(h * 3 / 4)
cropped = original[t:b, l:r]
cv.imshow('Cropped', cropped)
scaled1 = cv.resize(
original, (int(w / 4), int(h / 4)),
interpolation=cv.INTER_LINEAR)
cv.imshow('Scaled1', scaled1)
scaled2 = cv.resize(
scaled1, None, fx=4, fy=4,
interpolation=cv.INTER_LINEAR)
cv.imshow('Scaled2', scaled2)
cv.waitKey()
cv.imwrite('../../data/blue.jpg', blue)
cv.imwrite('../../data/green.jpg', green)
cv.imwrite('../../data/red.jpg', red)
cv.imwrite('../../data/cropped.jpg', cropped)
cv.imwrite('../../data/scaled1.jpg', scaled1)
cv.imwrite('../../data/scaled2.jpg', scaled2)
2.邊緣檢測
亮度梯度
代碼:edge.py
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
import cv2 as cv
original = cv.imread(
'../../data/chair.jpg', cv.IMREAD_GRAYSCALE)
cv.imshow('Original', original)
hsobel = cv.Sobel(original, cv.CV_64F, 1, 0,
ksize=5)
cv.imshow('H-Sobel', hsobel)
vsobel = cv.Sobel(original, cv.CV_64F, 0, 1,
ksize=5)
cv.imshow('V-Sobel', vsobel)
sobel = cv.Sobel(original, cv.CV_64F, 1, 1,
ksize=5)
cv.imshow('Sobel', sobel)
laplacian = cv.Laplacian(original, cv.CV_64F)
cv.imshow('Laplacian', laplacian)
canny = cv.Canny(original, 50, 240)
cv.imshow('Canny', canny)
cv.waitKey()
3.亮度提升
直方圖均衡化
代碼:eq.py
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
import cv2 as cv
original = cv.imread('../../data/sunrise.jpg')
cv.imshow('Original', original)
gray = cv.cvtColor(original, cv.COLOR_BGR2GRAY)
cv.imshow('Gray', gray)
equalized_gray = cv.equalizeHist(gray)
cv.imshow('Equalized Gray', equalized_gray)
yuv = cv.cvtColor(original, cv.COLOR_BGR2YUV)
yuv[..., 0] = cv.equalizeHist(yuv[..., 0])
equalized_color = cv.cvtColor(yuv, cv.COLOR_YUV2BGR)
cv.imshow('Equalized Color', equalized_color)
cv.waitKey()
4.角點檢測
平直棱線的交匯點
代碼:corner.py
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
import cv2 as cv
original = cv.imread('../../data/box.png')
cv.imshow('Original', original)
gray = cv.cvtColor(original, cv.COLOR_BGR2GRAY)
cv.imshow('Gray', gray)
corners = cv.cornerHarris(gray, 7, 5, 0.04)
corners = cv.dilate(corners, None)
mixture = original.copy()
mixture[corners > corners.max() * 0.01] = [0, 0, 255]
cv.imshow('Corner', mixture)
cv.waitKey()
5.特徵點檢測
STAR/SIFT
代碼:star.py , sift.py
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
import cv2 as cv
original = cv.imread('../../data/table.jpg')
cv.imshow('Original', original)
gray = cv.cvtColor(original, cv.COLOR_BGR2GRAY)
cv.imshow('Gray', gray)
star = cv.xfeatures2d.StarDetector_create()
keypoints = star.detect(gray)
mixture = original.copy()
cv.drawKeypoints(
original, keypoints, mixture,
flags=cv.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS)
cv.imshow('Mixture', mixture)
cv.waitKey()
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
import cv2 as cv
original = cv.imread('../../data/table.jpg')
cv.imshow('Original', original)
gray = cv.cvtColor(original, cv.COLOR_BGR2GRAY)
cv.imshow('Gray', gray)
sift = cv.xfeatures2d.SIFT_create()
keypoints = sift.detect(gray)
mixture = original.copy()
cv.drawKeypoints(
original, keypoints, mixture,
flags=cv.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS)
cv.imshow('Mixture', mixture)
cv.waitKey()