曾經有幾次想寫後綴數組,結果被論文中縮成豆腐塊的程序嚇到了.......
現在看懂了之後, 被這個程序寫的醜陋嚇到了。
醜雖醜,但是寫的還是很精巧的, 照着打吧。
後綴數組其實就是吧一個長度爲n的字符串(當然也可以是其他的,只要能比大小)的所有後綴排序後存在數組裏。
打裸是O(n*n*log(n))的。
其實如果以每個點爲開頭,分別求出向後延伸1, 2, 4 .... 的排列順序,後者是可以有前者推出的,配合基數排序,就可以把複雜度降低到O (n log (n));
當然, 代碼很精巧,也很醜。
把求出的後綴排序後的首字母放在sa數組中, 用sa 退出height (height (i) = suffix(i)與 suffix(pre-rank(i)的最長公共前綴), 有這麼一個定理 height[ rank[i ]] >= height[ rank[i -1]]-1 , 按照rank的順序來求, 用變量存上一個rank的height, 由於這個變量頂多減少n次, 類似kmp的複雜度分析,這是o(n) 的。
用height就可以求很多東西了。
poj 1743 求不可重疊最長重複字串。
求出height後,二分答案,把height數組 分成若干份, 每份內的height都大於二分的數,觀察每份內,是否存在解即可。
nlogn
當然,這道題規定,如果一段數同時減去一個數後, 與另一段數相同,也算作相同,這裏就要用差分思想解決了。
# include <cstdlib>
# include <cstdio>
# include <cmath>
# include <cstring>
using namespace std;
const int maxn = 100000, oo = 1073741819;
int pret[maxn], s[maxn], height[maxn], sa[maxn], stmp[maxn], lis[maxn], rank[maxn], rk[maxn];
int p, n, l, r, mid, i;
void read()
{
int i;
memset(s, 0, sizeof(s));
memset(sa, 0, sizeof(sa));
scanf("%d", &n);
for (i = 1; i<= n; i++)
scanf("%d", &s[i]);
for (i = 1; i< n; i++)
s[i] = s[i+1]-s[i];
s[n] = 0; n--;
for (i = 1; i<= n; i++)
s[i] += 89;
}
void work_suffix(int n, int m)
{
int i, j;
memset(pret, 0, sizeof(pret));
for (i = 1; i <= n; i++) pret[rk[i]=s[i]]++;
for (i = 1; i <= m; i++) pret[i]+= pret[i-1];
for (i = n; i >= 1; i--) sa[pret[rk[i]]--] = i;
for (j = 1, p = 0; p < n; j <<=1, m = p)
{
for (p = 0, i = n-j+1; i <=n; i++) lis[++p] = i;
for (i = 1; i <=n ; i++) if (sa[i]>j) lis[++p] = sa[i] - j;
memset(pret, 0, sizeof(pret));
for (i = 1; i <=n ; i++) stmp[i] = rk[lis[i]];
for (i = 1; i <=n ; i++) pret[stmp[i]]++;
for (i = 1; i <=m ; i++) pret[i]+= pret[i-1];
for (i = n; i >=1 ; i--) sa[pret[stmp[i]]--] = lis[i];
for (i = 1; i <=n ; i++) lis[i] = rk[i];
for (p = 1, rk[sa[1]] = 1, i = 2; i <= n; i++)
rk[sa[i]] = (lis[sa[i]] == lis[sa[i-1]] && lis[sa[i]+j] == lis[sa[i-1]+j])? p:++p;
}
}
void work_height()
{
int i, j, p = 0;
for (i = 1; i <= n; i++) rank[sa[i]] = i;
for (i = 1; i < n; height[rank[i++]] = p)
for (p?p--:0,j = sa[rank[i]-1]; j+p <=n+1 && i+p <=n+1 && s[j+p]==s[i+p]; p++);
}
bool check(int mid)
{
int ll,rr;
for (i = 1; i <= n; i++)
{
if (height[i] < mid) ll = oo, rr = -oo;
ll = ll > sa[i] ? sa[i]:ll;
rr = rr < sa[i] ? sa[i]:rr;
if (rr- ll >= mid) return true;
}
return false;
}
int main()
{
freopen("1743.in", "r", stdin);
freopen("1743.out", "w", stdout);
for (;;)
{
read();
if (n == -1) break;
s[++n]= 0;
work_suffix(n, 1000);
work_height();
for (l = 0,r = n; l < r;)
{
if (check(mid = (l+r+1 >>1))) l = mid;
else r = mid-1;
}
l++;
printf("%d\n", l >= 5? l:0);
}
return 0;
}
poj 3261 求重複k次的最長重複字串, 可重疊。
有木有發現和前面一道題是一樣的二分檢驗?
# include <cstdlib>
# include <cstdio>
# include <cstring>
using namespace std;
const int oo = 1073741819, maxn = 30000;
int rank[maxn], a[maxn], pret[1000000+5], rk[maxn];
int sa[maxn], lis[maxn], stmp[maxn], height[maxn];
int i , n, k;
void read()
{
int i;
scanf("%d%d", &n, &k);
for (i = 1; i <= n; i++)
scanf("%d", &a[i]), a[i]++;
a[++n]=0;
}
void work_suffix(int n, int m)
{
int i, j, p;
for (i=1; i<=m; i++) pret[i] = 0;
for (i=1; i<=n; i++) pret[rk[i] = a[i]]++;
for (i=1; i<=m; i++) pret[i]+= pret[i-1];
for (i=n; i>=1; i--) sa[pret[a[i]]--] = i;
for (j = 1, p = 0; p <n; j <<=1, m = p)
{
for (p = 0, i=n-j+1; i<=n; i++) lis[++p] = i;
for (i=1; i<=n; i++) if (sa[i] > j) lis[++p] = sa[i]-j;
for (i=1; i<=m; i++) pret[i] = 0;
for (i=1; i<=n; i++) stmp[i]=rk[lis[i]];
for (i=1; i<=n; i++) pret[stmp[i]]++;
for (i=1; i<=m; i++) pret[i]+=pret[i-1];
for (i=n; i>=1; i--) sa[pret[stmp[i]]--]=lis[i];
for (i=1; i<=n; i++) lis[i]=rk[i];
for (rk[sa[1]]=1, i=2, p=1; i<=n; i++)
rk[sa[i]] = (lis[sa[i]] == lis[sa[i-1]] && lis[sa[i]+j] == lis[sa[i-1]+j]) ? p:++p;
}
}
bool check(int mid)
{
int i, ll, rr;
for (i=1; i<=n; i++)
{
if (height[i] < mid ) ll = oo, rr = -oo;
if (i < ll) ll = i;
if (i > rr) rr = i;
if (rr - ll >= k-1) return true;
}
return false;
}
void work_height()
{
int i,j,p = 0;
for (i=1; i<=n; i++) rank[sa[i]] = i;
for (i=1, p=0; i<=n; height[rank[i++]] = p)
for (p?p--:0, j=sa[rank[i]-1];a[j+p]==a[i+p]; p++);
}
int main()
{
freopen("3261.in","r", stdin);
freopen("3261.out", "w", stdout);
read();
work_suffix(n, 1000000+5);
work_height();
int mid, l = 0, r = n;
for (;l < r;)
{
if (check(mid = (l+r+1 >> 1))) l = mid;
else r = mid-1;
}
printf("%d", l);
return 0;
}
poj 2406
給定字符串l,它是由字串s重複k次得到的,求k的最大值。
同樣的求height, 枚舉s的長度,首先strlen(l)% strlen(s) == 0, 其次, suffix(1)與suffix(1+strlen(s)) 的最長公共前綴 = n - strlen(s), 畫個圖就很明瞭了。
當然,這道題不是出給後綴數組的,是出個kmp的,數據出到了100 0000, 倍增是過不了的, 只能用醜陋的DC3
貼個kmp的算了:
# include <cstdlib>
# include <cstdio>
# include <cmath>
# include <cstring>
const int maxn = 1000000+5;
char s[maxn];
int n,i,j, next[maxn];
int main()
{
//freopen("2406.in", "r", stdin);
//freopen("2406.out", "w", stdout);
for (;;)
{
memset(s, 0, sizeof(s));
scanf("%s\n", s+1);
n = strlen(s+1);
if (s[1] == '.') return 0;
for (i = 2, j = 0; i <= n; next[i++] = j)
{
for (;j > 0 && s[j+1] != s[i];j = next[j]);
if (s[i] == s[j+1]) j++;
}
printf("%d\n", n % (n - next[n]) == 0 ? n / (n - next[n]): 1);
}
return 0;
}
poj 3294
求:對於n個字符串,至少出現n div 2+1 個字符串中的 最長字串。
把n個字符串用未出現且不同的字符連接, 同樣的二分答案,分組,組內統計即可。
# include <cstdlib>
# include <cstdio>
# include <cstring>
using namespace std;
const int maxn = 100000+5000;
char s[10000+5];
int height[maxn], pret[maxn], sa[maxn], rk[maxn], rank[maxn], stmp[maxn], lis[maxn], a[maxn],id[maxn];
int ans[1000+5];
bool have[1000+5];
int lim, n, i, j, key, len, tot;
void read()
{
scanf("%d", &n); key = 30, tot = 0;
for (i = 1; i <= n; i++)
{
scanf("%s", s+1); len = strlen(s+1);
for (j = 1; j <=len; j++) a[++tot] = s[j] - 'a'+1, id[tot] = i;
a[++tot] = key++; id[tot] = 0;
}
a[tot] = 0;
}
void work_suffix(int n, int m)
{
int i, j, p;
for (i = 1; i<=m; i++) pret[i] = 0;
for (i = 1; i<=n; i++) pret[rk[i]=a[i]]++;
for (i = 1; i<=m; i++) pret[i]+= pret[i-1];
for (i = n; i>=1; i--) sa[pret[rk[i]]--] = i;
for (j = 1, p = 0; p < n; j <<=1, m =p)
{
for (p = 0, i = n-j+1; i<=n; i++) lis[++p] = i;
for (i = 1; i <=n; i++) if (sa[i]>j) lis[++p]=sa[i]-j;
for (i = 1; i <=m; i++) pret[i] = 0;
for (i = 1; i <=n; i++) stmp[i]= rk[lis[i]];
for (i = 1; i <=n; i++) pret[stmp[i]]++;
for (i = 1; i <=m; i++) pret[i] += pret[i-1];
for (i = n; i >=1; i--) sa[pret[stmp[i]]--] = lis[i];
for (i = 1; i <=n; i++) lis[i] = rk[i];
for (i = 2, p = 1, rk[sa[1]] = 1; i <=n; i++)
rk[sa[i]] = (lis[sa[i]] == lis[sa[i-1]] && lis[sa[i]+j] ==lis[sa[i-1]+j]) ? p:++p;
}
}
void work_height(int n)
{
int i ,j, p;
for (i = 1; i<=n; i++) rank[sa[i]] = i;
for (i = 1, p =0; i <=n; height[rank[i++]] = p)
for (p?p--:0, j = sa[rank[i]-1]; a[i+p]== a[j+p]; p++);
}
bool check(int mid)
{
int all, i;
if (mid == 0) return true;
memset(ans, 0, sizeof(ans));
for (i = 1; i <=tot; i++)
{
if (height[i] < mid)
{
memset(have, false, sizeof(have));
all = 0;
}
if (id[sa[i]] != 0)
{
int old = all;
if (have[id[sa[i]]] == false) all++, have[id[sa[i]]] = true;
if (all == n / 2 +1 && old < n / 2+1) ans[++ans[0]] = sa[i];
}
}
if (ans[0] != 0 ) return true;
return false;
}
int main()
{
for (;;)
{
read();
if (n == 0) break;
if (n == 1) printf("?\n");
if (n == 1) continue;
work_suffix(tot, key+1);
work_height(tot);
int l, r, mid; lim = n / 2 +1;
for (l = 0, r = tot; l <r;)
{
if (check(mid = l+r+1 >> 1)) l = mid;
else r = mid-1;
}
check(l);
if (l == 0) printf("?\n");
else
{
// printf("%d\n", l);
for (i = 1; i <= ans[0]; i++)
{
for (j = 1; j <=l; j++) printf("%c", a[ans[i]+j-1] + 'a' -1);
printf("\n");
}
}
printf("\n");
}
return 0;
}
poj 2774
# include <cstdlib>
# include <cstdio>
# include <cmath>
# include <cstring>
using namespace std;
const int maxn = 300000;
int height[maxn],id[maxn],pret[maxn],sa[maxn],stmp[maxn],rk[maxn],a[maxn],lis[maxn],rank[maxn];
int tot, i , n , ans;
char s[maxn / 2];
void read()
{
int i, len;
scanf("%s\n", s+1); len = strlen(s+1);
for (i = 1; i <= len; i++) a[++tot] = s[i]-'a' +1, id[tot] = 1;
scanf("%s\n", s+1); len = strlen(s+1);
for (a[++tot] = 30, i = 1; i <= len; i++) a[++tot] = s[i]-'a' +1, id[tot] = 2;
a[++tot] = 0;
}
void work_suffix(int n, int m)
{
int i , j, p;
for (i = 1; i <= m; i++) pret[i] = 0;
for (i = 1; i <= n; i++) pret[rk[i] = a[i]] ++;
for (i = 1; i <= m; i++) pret[i] += pret[i-1];
for (i = n; i >= 1; i--) sa[pret[rk[i]]--] = i;
for (p = 0, j = 1; p < n; j <<=1, m = p)
{
for (p = 0, i = n-j+1; i <= n; i++) lis[++p] = i;
for (i = 1; i <=n; i++) if (sa[i]> j) lis[++p] = sa[i] - j;
for (i = 1; i <=m; i++) pret[i] = 0;
for (i = 1; i <=n; i++) stmp[i] = rk[lis[i]];
for (i = 1; i <=n; i++) pret[stmp[i]]++;
for (i = 1; i <=m; i++) pret[i] += pret[i-1];
for (i = n; i >=1; i--) sa[pret[stmp[i]]--] = lis[i];
for (i = 1; i <=n; i++) lis[i] = rk[i];
for (i = 2, p = 1, rk[sa[1]] = 1; i<=n; i++)
rk[sa[i]] = (lis[sa[i]] == lis[sa[i-1]] && lis[sa[i]+j] == lis[sa[i-1]+j])? p:++p;
}
}
void work_height(int n)
{
int i ,j, p= 0;
for (i = 1; i <=n; i++) rank[sa[i]] = i;
for (i = 1; i <n; height[rank[i++]] = p)
for (p?p--:0, j = sa[rank[i]-1]; a[i+p] == a[j+p]; )
p++;
}
int main()
{
freopen("2774.in", "r", stdin);
freopen("2774.out", "w", stdout);
read();
work_suffix(tot, 50);
work_height(tot);
for (i = 2; i <= tot; i++)
if (id[sa[i]] != id[sa[i-1]])
ans = ans < height[i] ? height[i] : ans;
printf("%d", ans);
return 0;
}