時間:2014.09.05
一、題目
給定一個query和一個text,均由小寫字母組成。要求在text中找出以同樣的順序連續出現在query中的最長連續字母序列的長度。例如, query爲“acbac”,text爲“acaccbabb”,那麼text中的“cba”爲最長的連續出現在query中的字母序列,因此,返回結果應該爲其長度3。
二、分析
1.先是RK算法,有關RK算法的再往後寫出
bool RabinKarpMatch(const string& T, const string& P)
{
static const int d = 128;
static const int q = 6999997;
int n = T.length();
int m = P.length();
int h = 1;
for (int i = 1; i < m; i++)
h = (h*d) % q; //h=d^(m-1) mode q
int p = 0, t = 0;
for (int i = 0; i < m; ++i) //processing
{
p = ((p*d) + P[i]) % q;
t = ((t*d) + T[i]) % q;
}
for (int s = 0; s < n - m + 1; ++s) //s=[0...n-m+1-1]
{
if (t== p)
{
int i = 0;
for (i; i < m; ++i)
{
if (P[i] != T[s + i])
break;
}
if (i == m)
return true;
}
t = (d*(t - T[s] * h% q+q) + T[s + m]) % q;
}
return false;
}
2.然後是頂層實現
size_t GetLargestCommomSubLen(const string& text, const string& query)
{
size_t query_len = query.length();
size_t text_len = text.length();
assert(text_len >= query_len);
if (text.empty() || query.empty())
return 0;
size_t max_len = 0;
for (size_t start = 0; start < query_len; ++start)
{
size_t size = query_len - start;
for (size_t len = 1; len <= size; ++len)
{
if (RabinKarpMatch(text, query.substr(start, len)))
{
if (len>max_len)
max_len = len;
}
}
}
return max_len;
}
三、動態規劃
int GetLongestCommSubstrLen(const string& text, const string& query)
{
int text_len = text.length();
int query_len = query.length();
if (text_len == 0 || 0 == query_len)
return 0;
vector<vector<int>> L(text_len, vector<int>(query_len, 0));
int text_start = -1;
int query_start = -1;
for (int j = 0; j < query_len; ++j)
{
L[0][j] = (text[0] == query[j] ? 1 : 0);
}
for (int i = 1; i < text_len; ++i)
{
L[i][0] = (text[i] == query[0] ? 1 : 0);
for (int j = 1; j < query_len; ++j)
{
if (text[i] == query[j])
{
L[i][j] = L[i - 1][j - 1] + 1;
}
}
}
int longest = 0;
for (int i = 0; i < text_len; ++i)
{
for (int j = 0; j < query_len; ++j)
{
if (longest < L[i][j])
{
longest = L[i][j];
text_start = i - longest + 1;
query_start = j - longest + 1;
}
}
}
return longest;
}
這種方法採取了空間換時間的策略,儘管如此,在空間上,還可以優化,在空間的使用上並沒有想象的那麼恐怖。比如在計算斐波拉契數列時,其實求後一項只與前面兩項相關,多餘的信息存儲造成了空間上的浪費,在這裏同樣也是如此,看公式L[ i,j ]=L[ i-1,j-1 ] + 1,亦知L的計算也只與前一行相關,而前一行的值是通過計算已知的了,於是只要兩行存儲空間即可,每當計算新的一行的,把舊行上升到第0行即可,swap一下即可。int GetLongestCommSubstrLen(const string& text, const string& query)
{
int text_len = text.length();
int query_len = query.length();
if (text_len == 0 || 0 == query_len)
return 0;
vector<vector<int>> L(2, vector<int>(query_len, 0));
int text_start = -1;
int query_start = -1;
int longest = 0;
for (int j = 0; j < query_len; ++j)
{
if (text[0] == query[j])
{
L[0][j] = 1;
}
}
for (int i = 1; i < text_len; ++i)
{
L[1][0] = (text[i] == query[0] ? 1 : 0);
for (int j = 1; j < query_len; ++j)
{
if (text[i] == query[j])
{
L[1][j] = L[0][j - 1] + 1;
if (longest < L[1][j])
longest = L[1][j];
}
}
L[1].swap(L[0]);
}
return longest;
}