最近需要做兩個字符串的相似度比較,涉及到了這個算法,於是寫一篇博客記錄一下。
算法簡介
Edit Distance 算法,又稱Levenshtein Distance(LD)算法,以下簡稱LD,LD 可以衡量兩字符串的相似性。
距離的概念
算法的思路
if(string_1[i]==string_2[j]){
temp = 0;
}else{
temp = 1;
}
接下來就是關鍵了:算法實現
float CMy0121124829Dlg::Similarity(CString string_1, CString string_2)
{
int m = string_1.GetLength();
int n = string_2.GetLength();
int** different;
try
{
different = new int*[m+1];
for (int i=0; i<=m; i++)
{
different[i] = new int[n+1];
}
}
catch (const std::bad_alloc& e)
{
MessageBox(_T("內存不足,請重啓程序後再試!"));
}
for (int i = 0; i <= m; i++) {
different[i][0] = i;
}
for (int i = 0; i <= n; i++) {
different[0][i] = i;
}
int temp;
for (int i = 1; i <= m; i++){
for (int j = 1; j <= n; j++){
if (string_1[i - 1] == string_2[j - 1]) {
temp = 0;
}
else {
temp = 1;
}
different[i][j] = Min_in_three(different[i - 1][j - 1] + temp, different[i][j - 1] + 1, different[i - 1][j] + 1);
}
}
int dif = different[m][n];
for (int i=0; i<=m; i++)
{
delete different[i];
}
delete different;
return 1 - (float)dif/ ((string_1.GetLength()>string_2.GetLength()?string_1.GetLength(): string_2.GetLength()));
}