網上找了一圈,發現求向量間相似度的代碼基本都是要求輸入兩個字符串string,而不能直接求向量間的相似度。
於是我和我好基友就花了一下午,寫出了一個求餘弦距離的java程序,用的原理基本上是矩陣的原理。話不多說,代碼奉上。
package com.ansj.vec;
import java.io.*;
import java.lang.reflect.Array;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.TreeSet;
import com.ansj.vec.domain.WordEntry;
import java.io.IOException;
import java.util.Arrays;
import com.ansj.vec.Word2VEC;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;
import java.util.Vector;
public class test {
public static void main(String[] args) throws IOException {
Word2VEC w1 = new Word2VEC() ;
w1.loadJavaModel("C:/Users/Administrator/Desktop/Data/paoVector") ;
float[] z=new float[100];
z=w1.getWordVector("逃跑");
float[] a=new float[1000];
a=w1.getWordVector("我");
float[] b=new float[1000];
b=w1.getWordVector("喜歡");
float[] c=new float[200];
c=w1.getWordVector("和");
float[] d=new float[200];
d=w1.getWordVector("運動");
float[] e=new float[200];
e=sum(a,b);
float[] f=new float[200];
f=sum(e,c);
float[] g=new float[200];
g=sum(f,d);
float[] h=new float[200];
for (int i = 0; i < g.length; i++) {
h[i]=g[i]/4;
}
double jieguo=0;
jieguo=sim(h,z);
System.out.print(jieguo);
/* for(float i:h){
System.out.println(i);
}
*/
}
private static float[] sum(float[] center, float[] fs) {
// TODO Auto-generated method stub
if (center == null && fs == null) {
return null;
}
if (fs == null) {
return center;
}
if (center == null) {
return fs;
}
for (int i = 0; i < fs.length; i++) {
center[i] += fs[i];
}
return center;
}
private static double cheng(float[] center, float[] fs) {
// TODO Auto-generated method stub
/* if (center == null && fs == null) {
break;
}
if (fs == null) {
break;
}
if (center == null) {
break;
}
*/
for (int i = 0; i < fs.length; i++) {
center[i] *= fs[i];
}
float temp=0;
for (int i=0;i<center.length;i++){
temp+=center[i];
}
return temp;
}
private static double qumo(float[] center) {
// TODO Auto-generated method stub
double temp=0;
double temp1=0;
for (int i = 0; i < center.length; i++) {
temp+= center[i]*center[i];
}
for (int i=0;i<center.length;i++){
temp+=center[i];
}
temp1=Math.sqrt(temp);
return temp1;
}
private static double sim(float[] center, float[] fs) {
// TODO Auto-generated method stub
double result =0;
double temp=cheng(center,fs)/(qumo(center)*qumo(fs));
result=temp;
return result;
}
}