最大的感受就是python真的太太太方便了,終於明白爲什麼搞機器學習的人選擇了python而不是JAVA或者別的什麼語言。這次的Homework是用次梯度算法研究兩個不同的目標函數,算法本身很簡單,但JAVA沒有python那麼多方便好用的特性,所有寫起來有些磕磕絆絆。
在調試過程中遇到一個初學者可能很容易掉的坑:1/2計算結果是0而不是1!因爲1和2都是整型,計算的結果也會強制轉化爲整型,所以應當寫成1.0/2或者1/2.0才能得到正確的計算結果。
函數抽象類
import Jama.Matrix;
public abstract class Function {
abstract Matrix subGrad(Matrix x);
abstract double value(Matrix x);
abstract int getDim();
static Matrix sign(Matrix vec) {
double[][] arr = vec.getArrayCopy();
for(int i=0;i<arr.length;i++) {
for(int j=0;j<arr[i].length;j++) {
if(arr[i][j]>0){
arr[i][j] = 1;
}
else if(arr[i][j]<0){
arr[i][j] = -1;
}
}
}
Matrix mat = new Matrix(arr);
return mat;
}
}
第一個函數
import Jama.Matrix;
public class Func1 extends Function {
Matrix A;
Matrix b;
int dim = 100;
public int getDim() {
return dim;
}
public Func1() {
A = Matrix.random(500, 100);
b = Matrix.random(500, 1);
}
public double value(Matrix x) {
return (A.times(x).minus(b)).norm1();
}
public Matrix subGrad(Matrix x) {
Matrix mat = Function.sign(A.times(x).minus(b));
return A.transpose().times(mat);
}
}
第二個函數
import Jama.Matrix;
public class Func2 extends Function {
Matrix A;
Matrix b;
double c;
int dim = 1024;
public int getDim() {
return dim;
}
public Func2() {
double m[][] = new double[1024][1];
A = Matrix.random(512, 1024);
for(int i=0;i<1024;i+=10) {
m[i][0] = 1;
}
Matrix w = new Matrix(m);
b = A.times(w);
c = 1e-3;
}
public double value(Matrix x) {
Matrix mat = A.times(x).minus(b);
return (1.0/2)*mat.norm2()*mat.norm2() + c*x.norm1();
}
public Matrix subGrad(Matrix x) {
return A.transpose().times(A.times(x).minus(b)).plus(Function.sign(x).times(c));
}
}
算法部分
import Jama.Matrix;
public class Subgradient {
double a;
int times = 0;
final int MAX = 5000;
final double ERR = 1e-6;
Matrix x;
Matrix x1;
private Subgradient(Function f, double a2) {
double[][] m = new double[f.getDim()][1];
Matrix x1 =new Matrix(m);
a = a2;
do {
x = x1;
Matrix g = f.subGrad(x);
x1 = x.minus(g.times(a2/g.norm2()));
times++;
}while(Math.abs(1 - f.value(x1)/f.value(x))>ERR && times<MAX);
System.out.print(Math.abs(1 - f.value(x1)/f.value(x))+" ");
}
private static void loop(Function f, float[] rate) {
for(float i:rate) {
long startTime = System.currentTimeMillis();
Subgradient s = new Subgradient(f,i);
long endTime = System.currentTimeMillis();
System.out.println(s.times);
System.out.println("程序運行時間:"+(endTime - startTime)+"ms");
}
}
public static void main(String arg[]) {
float rate1[] = {0.1f, 0.01f, 0.001f};
float rate2[] = {0.1f, 0.01f};
Function f1 = new Func1();
Function f2 = new Func2();
loop(f1, rate1);
loop(f2, rate2);
}
}
python寫法
# -*- coding:utf-8 -*-
'''
Created on 2019.5.26
@author: Administrator
'''
import numpy as np
import time
from numpy.linalg import norm
import matplotlib.pyplot as plt
from abc import ABCMeta,abstractmethod
MAX_N_O_I = 1500
E = 1e-8
class Function(metaclass=ABCMeta):
@abstractmethod
def getDim(self):
pass
@abstractmethod
def __call__(self, *args, **kwargs):
pass
@abstractmethod
def subGrad(self, *args, **kwargs):
pass
class Func1(Function):
def __init__(self):
self.coef_A = np.random.randn(500,100)
self.coef_b = np.random.randn(500,1)
self.dim = 100
def getDim(self):
return self.dim
def subGrad(self,x:np.array) -> np.array:
vec = self.coef_A.dot(x) - self.coef_b
vec = np.sign(vec)
return self.coef_A.T.dot(vec)
def __call__(self, x:np.array) -> float:
return norm(self.coef_A.dot(x) - self.coef_b, ord=1)
class Func2(Function):
def __init__(self):
self.a = np.random.randn(512,1024)
self.b = np.zeros((1024,1))
for i in range(1,1024,10):
self.b[i] = 1
self.c = 1e-3
self.dim = 1024
def getDim(self):
return self.dim
def subGrad(self,x:np.array) -> np.array:
sign = np.sign(x)
return self.a.T.dot(self.a).dot(x - self.b) + self.c*sign
def __call__(self, x:np.array) -> float:
return (1/2)*norm(self.a.dot(x-self.b))**2 + self.c*norm(x, ord=1)
def loop(fun):
def func(*args):
global a
for a in [0.1, 0.01, 0.001]:
start = time.perf_counter()
y = list(fun(*args))
end = time.perf_counter()
print(f'程序運行時間:{end-start}s')
plt.plot(y, label=a)
plt.legend(loc='best')
plt.show()
plt.close()
return func
@loop
def min(f:Function):
x = np.zeros((f.getDim(),1))
times = 0
while times < MAX_N_O_I:
g = f.subGrad(x)
x1 = x - a*g/norm(g)
yield f(x)
times +=1
if abs(1 - f(x1)/f(x)) < E:break
x = x1
if __name__ == '__main__':
min(Func1())
min(Func2())