吳恩達deep Learning ，class1，week1，assignmen1

1.code

#-*- coding:utf-8 -*-
'''
version3
coding_time: 2018/10/15 20:02

@author: Steve Chen
'''

import numpy as np
import matplotlib.pyplot as plt
import time
from lr_utils import load_dataset

def getdata():
	train_x_orig, train_y, test_x_orig, test_y ,classes = load_dataset()

	print('-' * 10 + 'dimensions of original data' + '-' * 10)
	print('train_x_orig: ' + str(train_x_orig.shape))
	print('train_y: ' + str(train_y.shape))
	print('test_x_orig: ' + str(test_x_orig.shape))
	print('test_y: ' + str(test_y.shape))
	'''
	version2中我忘了除以255，直接導致內存不夠，報錯
	write_by _my_self_version2.py:50: RuntimeWarning: overflow encountered in exp
	s = 1 / (1 + np.exp(-z))
	write_by _my_self_version2.py:63: RuntimeWarning: divide by zero encountered in log
	cost = (-1 / m) * np.sum(train_y * np.log(A) + (1 - train_y) * np.log(1 - A))
	write_by _my_self_version2.py:63: RuntimeWarning: invalid value encountered in multiply
	cost = (-1 / m) * np.sum(train_y * np.log(A) + (1 - train_y) * np.log(1 - A))
	cost: nan
	cost: nan
	'''

	train_x = (train_x_orig.reshape(train_x_orig.shape[0], -1).T) / 255
	test_x = (test_x_orig.reshape(test_x_orig.shape[0], -1).T) / 255

	print('-' * 10 + 'dimensions of processed data' + '-' * 10)
	print('train_x: ' + str(train_x.shape))
	print('train_y: ' + str(train_y.shape))
	print('test_x: ' + str(test_x.shape))
	print('test_y: ' + str(test_y.shape)+'\n')	


	return train_x, train_y, test_x, test_y, classes


def sigmoid(z):
	s = 1 / (1 + np.exp(- z))

	return s

def initialize(dim):
	w_init = np.zeros((dim,1))
	b_init = 0

	return w_init, b_init



def propagate_for_one_time(w, b, train_x, train_y, learning_rate):
	m = train_y.shape[1] 
	A = sigmoid(np.dot(w.T, train_x) + b) #don't forget '+b'
	assert( A.shape == train_y.shape)
	diff_matrix = A - train_y
	cost = np.mean(-(train_y * np.log(A) + ( 1 - train_y) * np.log(1 -A)))

	#dw = np.array((diff_mat * train_x).sum(axis = 1) / m) #it is take almost eightfold time in this kind of expression
	#dw = dw.reshape((num_x,1)) #with the expression above, we have to use it to make a number to an array
	dw = np.dot(train_x, diff_matrix.T) / m
	db = np.sum(diff_matrix) / m

	w = w - learning_rate * dw
	b = b - learning_rate * db

	assert(w.shape == (train_x.shape[0], 1))
	assert(b.dtype == float or int)

	return w, b, cost


def optimize(w, b, train_x, train_y, num_iterations, learning_rate, print_cost = False):
	costs = []
	for i in range(num_iterations):
		w, b, cost = propagate_for_one_time(w, b, train_x, train_y, learning_rate)

		if i%100 == 0:
			costs.append(cost)
		if i%100 == 0 and print_cost == True:
			print(cost)

	return w, b, costs



#def predict(w, b, test_x, test_y,print_cost = False): 函數需要具有普適性，因爲這個是預測函數，所以參數中不可能出現Y 和 cost
def predict(w, b, test_x):

	y_predict_orig = sigmoid(np.dot(w.T, test_x) + b)
	#assert(y_predict_orig.shape == test_y.shape)

	y_predict = np.array([[0 if x<0.5 else 1 for x in y_predict_orig.squeeze()]])#without squeeze() function,this array is in shape of (1,xx),whic is not iterable(etc.[[1,2]);
																				 #squeeze it, make it look like [1,2],then it is ok
	assert(y_predict.shape ==(1, test_x.shape[1]))

	return y_predict


def model(w_init, b_init, train_x, train_y, test_x, teset_y, num_iterations, learning_rate, print_cost = False):
	tick = time.time()

	w, b, costs_train = optimize(w_init, b_init, train_x, train_y, num_iterations, learning_rate, print_cost)
	y_predict_test = predict(w, b, test_x)
	y_predict_train = predict(w, b, train_x)

	#num_correct = np.sum(x_predict & test_y)錯誤，這個應該用同或而不是與操作（否則原來0，預測0 這種正確判斷會被漏掉）
	num_correct_test = test_y.shape[1] - np.sum(abs(test_y - y_predict_test))
	correct_ratio_test = num_correct_test / test_y.shape[1] 

	num_correct_train = train_y.shape[1] - np.sum(abs(train_y - y_predict_train))
	correct_ratio_train = num_correct_train / train_y.shape[1] 

	print('correct_ratio_test: ' + str(correct_ratio_test * 100) + '%')
	print('correct_ratio_train: ' + str(correct_ratio_train * 100) + '%')

	d = {
			'w': w,
			'b': b,
			'learning_rate': learning_rate,
			'y_predict_test': y_predict_test,
			'y_predict_train':y_predict_train,
			'correct_ratio_test': correct_ratio_test,
			'correct_ratio_train': correct_ratio_train,
			'costs': np.array([costs_train])
		}

	tock = time.time()
	print('time: ' + str((tock - tick) * 1000))

	return d

if __name__ == '__main__':
	print('verson3(final version)')
	
	train_x, train_y, test_x, test_y, classes = getdata()
	w_init, b_init = initialize(train_x.shape[0])
	#d = model(w_init, b_init, train_x, train_y, test_x, test_y, num_iterations = 2000, learning_rate = 0.005, print_cost = True)
	#print('correct_ratio_test: ' + str(d['correct_ratio_test'] * 100) + '%')
	#print('correct_ratio_train: ' + str(d['correct_ratio_train'] * 100) + '%')

	learning_rate = [0.01, 0.001, 0.0001]
	models = {}
	for i in range(len(learning_rate)):

		print('-' * 25)
		print('learning rate: ' + str(learning_rate[i]))
		d = model(w_init, b_init, train_x, train_y, test_x, test_y,  2000, learning_rate[i], print_cost = False)
		models[str(i)] = d
		plt.plot(models[str(i)]['costs'].squeeze(), label = str(learning_rate[i]))

	plt.legend()


	plt.xlabel('iterations(per hundreds)')
	plt.ylabel('cost')
	plt.show()

2.the understanding of logistic regression and the LOST FUNCTION,COST FUNCTION

①logistic regression

②sigmoid function and lost function

③cost function