部分代碼
第二週 Linear Regression
computeCost
predictions = X * theta;
J = 1/(2*m)*(predictions - y)'*(predictions - y);
gradientDescent
temp1 = theta(1) - (alpha / m) * sum((X * theta - y).* X(:,1));
temp2 = theta(2) - (alpha / m) * sum((X * theta - y).* X(:,2));
theta(1) = temp1;
theta(2) = temp2;
J_history(iter) = computeCost(X, y, theta);
featureNormalize
mu = mean(X);
sigma = std(X);
for i=1:size(mu,2)
X_norm(:,i) = (X(:,i).-mu(i))./sigma(i);
end
computeCostMulti
predictions = X * theta;
J = 1/(2*m)*(predictions - y)' * (predictions - y);
gradientDescentMulti
for i=1:size(X,2)
temp(i) = theta(i) - (alpha / m) * sum((X * theta - y).* X(:,i));
end
for j=1:size(X,2)
theta(j) = temp(j);
end
Normal Equation
theta = pinv(X'*X)*X'*y;
Ex2
1.1 Visualizing the data
% Find Indices of Positive and Negative Examples
pos = find(y==1); neg = find(y == 0); % 對應0/1的相應地址向量
% Plot Examples
plot(X(pos, 1), X(pos, 2), 'k+','LineWidth', 2, ...
'MarkerSize', 7);
plot(X(neg, 1), X(neg, 2), 'ko', 'MarkerFaceColor', 'y', ...
'MarkerSize', 7);
SIGMOID function
g = 1./(ones(size(z))+e.^(-z));
1.2.2 Cost function and gradient
H=sigmoid(X*theta);
J = 1/m * sum((-y.*log(H)).-((1- y).*log(1 - H)));
grad = 1/m * ((H.-y)'*X)';
Evaluating logistic regression
p_predict = sigmoid(X * theta);
p_point = find(p_predict >= 0.5);
p(p_point,1) =1;
2 Regularized logistic regression
H = sigmoid(X*theta);
theta2 = theta(2:length(theta),1);
J = 1/m * sum((-y.*log(H)).-((1- y).*log(1 - H))) + lambda/(2*m) * sum(theta2.^2);
grad = 1/m * ((H.-y)'*X)';
grad = [grad(1,1);(grad + lambda/m*theta)(2:length(theta),1)];
2.5 Optional (ungraded) exercises
Notice the changes in the decision boundary as you vary lambda. With a small lambda, you should nd that the classier gets almost every training example correct, but draws a very complicated boundary, thus overtting the data.
作業:Multi-class Classification and Neural Networks
1.3.3 Vectorizing regularized logistic regression
H = sigmoid(X * theta);
J = sum(-y.*log(H) - (1-y).*log(1-H))/m + lambda/(2*m) * sum(theta(2:end).^2);
grad = X' * (H-y)/m;
grad = [grad(1);(grad + lambda/m*theta)(2:length(theta))];
1.4 One-vs-all Classication
for k=1:num_labels
initial_theta = zeros(n + 1, 1);
options = optimset('GradObj', 'on', 'MaxIter', 50);
[theta] = fmincg (@(t)(lrCostFunction(t, X, (y == k), lambda)),initial_theta, options);
all_theta(k,:) = theta';
end
1.4.1 One-vs-all Prediction
[c,i] = max(sigmoid(X * all_theta'), [], 2);
p = i;
2 Neural Networks
X = [ones(m, 1) X];
tem1 = Theta1 * X';
ans1 = sigmoid(tem1);
tem2 = Theta2 * [ones(1, m);ans1];
ans2 = sigmoid(tem2);
[c,i] = max(ans2', [], 2);
p = i;
Programming Exercise 4: Neural Networks Learning
ans1 = [ones(m, 1) X];
tem2 = ans1 * Theta1';
ans2 = sigmoid(tem2);
tem3 = [ones(m, 1) ans2] * Theta2';
H = sigmoid(tem3);
yy = zeros(m, num_labels);
for i = 1:m
yy(i,y(i)) = 1;
end
J = 1/m * sum(sum(-yy.*log(H)-(1-yy).*log(1-H))) + lambda/2/m * (sum(sum(Theta1(:,2:end).^2)) + sum(sum(Theta2(:,2:end).^2)));
for row = 1:m
ans1 = [1 X(row,:)]';
tem2 = Theta1 * ans1;
ans2 = [1; sigmoid(tem2)];
tem3 = Theta2 * ans2;
ans3 = sigmoid(tem3);
delta3 = ans3 - yy'(:, row);
delta2 = (Theta2' * delta3) .* sigmoidGradient([1; tem2]);
delta2 = delta2(2:end);
Theta1_grad = Theta1_grad + delta2 * ans1';
Theta2_grad = Theta2_grad + delta3 * ans2';
end
Theta1_grad = Theta1_grad ./ m;
Theta1_grad(:, 2:end) = Theta1_grad(:, 2:end) + (lambda/m) * Theta1(:, 2:end);
Theta2_grad = Theta2_grad ./ m;