有两个不同的字符串,通过使用一套操作方法可以把两个字符串变成一样的。
1) "a" 和 "b" ==> 把a变成b,或把b变成a 变化了一次
2) "abc" 和 "ade" ==> 把bc变成de,或把de变成bc 变化了两次
3) "abcd" 和 "abcde" ==> 删除e 变化了一次
每操作一次,两个字符串的距离就加 1。
1) "a" 和 "b" 的距离为 1
2) "abc" 和 "ade" 的距离为 2
3) "abcd" 和 "abcde" 的距离为 1
相似度为距离的倒数。
思考方法:
1 使用递归的思考方式:
字符串A和B,假设A中的一个字符和B中的一个字符比较后不一样。
可以使用下面几种操作将它们变为一样的:
- 修改 A的字符
- 修改 B的字符
- 删除 A的字符
- 删除 B的字符
- 增加 A中的一个字符
- 增加 B中的一个字符
因为只需要计算字符串间的距离,所以并不需要记录具体的操作形式,但无论怎样操作,A和B的距离增加了 1。
t1 ==> 递归时,考虑A串不动,操作B串
t2 ==> 递归时,考虑B串不动,操作A串
t3 ==> 递归时,考虑同时操作A串和B串
int calc_distance(char* strA, int nABegin, int nAEnd, char* strB, int nBBegin, int nBEnd)
{
if (strA[nABegin] == strB[nBBegin])
{
return calc_distance(strA, nABegin+1, nAEnd, strB, nBBegin+1, nBEnd);
}
else
{
t1 = calc_distance(strA, nABegin, nAEnd, strB, nBBegin+1, nBEnd);
t2 = calc_distance(strA, nABegin+1, nAEnd, strB, nBBegin, nBEnd);
t3 = calc_distance(strA, nABegin+1, nAEnd, strB, nBBegin+1, nBEnd);
min = minval(t1,t2,t3)+1;
return min;
}
}
2 使用动态规划的思考方式:
用 记录分别对字符串A和B同时操作
record[x][y-1] ==> 用来记录,考虑A串不动,操作B串后的操作次数
record[x-1][y] ==> 用来记录,考虑B串不动,操作A串后的操作次数
record[x-1][y-1] ==> 用来记录,考虑同时操作A串和B串的操作次数
然后取其中最小的值 加 1 赋值给 record[x][y]。这样经过操作后record[lenA][lenB]就是最后的结果。
int calc_distance2(char* strA, int lenA, char* strB, int lenB)
{
int** record = NULL;
int i = 0, j = 0;
int ret = 0;
record = new int*[lenA+1];
for (i = 0; i < lenA+1; i++)
{
record[i] = new int[lenB+1];
memset(record[i],0,sizeof(int)*(lenB+1));
}
for (i = 1; i <= lenA; i++)
{
record[i][0] = i;
}
for (j = 1; j <= lenB; j++)
{
record[0][j] = j;
}
record[0][0] = 0;
for (i = 1; i <= lenA; i++)
for (j = 1; j <= lenB; j++)
{
if (strA[i-1] == strB[j-1])
{
record[i][j] = record[i-1][j-1];
}
else
{
record[i][j] = minval(record[i-1][j-1], record[i][j-1], record[i-1][j]) + 1;
}
}
ret = record[lenA][lenB];
for (i = 0; i < lenA+1; i++)
{
delete[] record[i];
}
delete[] record;
return ret;
}
#include <iostream>
using namespace std;
int minval(int a, int b, int c)
{
int min = 0;
if (a < b)
{
if (c < a)
min = c;
else
min = a;
}
else
{
if (c < b)
min = c;
else
min = b;
}
return min;
}
int calc_distance1(char* strA, int nABegin, int nAEnd, char* strB, int nBBegin, int nBEnd)
{
int t1 = 0, t2 = 0, t3 = 0;
int min = 0;
if (nABegin > nAEnd)
{
(nBBegin > nBEnd) ? min = 0: min = nBEnd - nBBegin + 1;
return min;
}
if (nBBegin > nBEnd)
{
(nABegin > nAEnd) ? min = 0: min = nAEnd - nABegin + 1;
return min;
}
cout << "compare: " << strA[nABegin] << ":" << strB[nBBegin] << endl;
if (strA[nABegin] == strB[nBBegin])
{
return calc_distance1(strA, nABegin+1, nAEnd, strB, nBBegin+1, nBEnd);
}
else
{
t1 = calc_distance1(strA, nABegin, nAEnd, strB, nBBegin+1, nBEnd);
t2 = calc_distance1(strA, nABegin+1, nAEnd, strB, nBBegin, nBEnd);
t3 = calc_distance1(strA, nABegin+1, nAEnd, strB, nBBegin+1, nBEnd);
min = minval(t1,t2,t3)+1;
return min;
}
}
int calc_distance2(char* strA, int lenA, char* strB, int lenB)
{
int** record = NULL;
int i = 0, j = 0;
int ret = 0;
record = new int*[lenA+1];
for (i = 0; i < lenA+1; i++)
{
record[i] = new int[lenB+1];
memset(record[i],0,sizeof(int)*(lenB+1));
}
for (i = 1; i <= lenA; i++)
{
record[i][0] = i;
}
for (j = 1; j <= lenB; j++)
{
record[0][j] = j;
}
record[0][0] = 0;
for (i = 1; i <= lenA; i++)
for (j = 1; j <= lenB; j++)
{
if (strA[i-1] == strB[j-1])
{
record[i][j] = record[i-1][j-1];
}
else
{
record[i][j] = minval(record[i-1][j-1], record[i][j-1], record[i-1][j]) + 1;
}
}
ret = record[lenA][lenB];
for (i = 0; i < lenA+1; i++)
{
delete[] record[i];
}
delete[] record;
return ret;
}
void main()
{
int dist = 0;
//char* testA = "abc";
//char* testB = "abcd";
//char* testA = "abce";
//char* testB = "abcd";
//char* testA = "abcwyz";
//char* testB = "abcmno";
char* testA = "a1b2c3d4";
char* testB = "abcd";
//char* testA = "xxa1b2c3d4";
//char* testB = "abcd";
int lenA = strlen(testA);
int lenB = strlen(testB);
//dist = calc_distance1(testA, 0, lenA-1, testB, 0, lenB-1);
dist = calc_distance2(testA, lenA, testB, lenB);
cout << "A=" << testA << " B=" << testB << " distance=" << dist << endl;
cin >> dist;
}