題意:給一個長度爲n的字符串,問有多少個字符串至少出現2次以上且不重疊。
思路:後綴數組解,求後綴數組的時間複雜度nlogn,求答案的時間複雜度爲n*n,枚舉字符串長度爲i個串,利用height數組,求某個連續區間(height[i] >= i)中排在最左邊和左右邊的sa,然後兩個的差值如果大於等於i的話,ans++。
代碼如下:
#include<cstdio>
#include<iostream>
#include<cstring>
#include<cmath>
#define N 10005
using namespace std;
char s[N];
int r[N];
int wa[N],wb[N],wv[N],WS[N],sa[N];
//sa[i]表示排在第i個的字符串是後綴 sa[i]
//rank[i]表示後綴i的排名
int cmp(int *r,int a,int b,int l)
{
return r[a] == r[b] && r[a+l] == r[b+l];
}
void da(int n,int m)
{
int i,j,p,*x = wa,*y = wb,*t;
for(i = 0; i < m; i++) WS[i] = 0;
for(i = 0; i < n; i++) WS[ x[i]=r[i] ]++;
for(i = 1; i < m; i++) WS[i] += WS[i-1];
for(i = n-1; i >= 0; i--) sa[--WS[ x[i] ]] = i;
for(j = 1, p = 1; p < n; j *= 2,m = p)
{
for(p = 0, i = n-j; i < n; i++) y[p++] = i;
for(i = 0; i < n; i++) if(sa[i] >= j) y[p++] = sa[i]-j;
for(i = 0; i < n; i++) wv[i] = x[ y[i] ];
for(i = 0; i < m; i++) WS[i] = 0;
for(i = 0; i < n; i++) WS[ wv[i] ]++;
for(i = 1; i < m; i++) WS[i] += WS[i-1];
for(i = n-1; i >= 0; i--) sa[ --WS[ wv[i] ] ] = y[i];
for(t = x,x = y,y = t,p = 1,x[ sa[0] ] = 0,i = 1; i < n; i++)
x[ sa[i] ] = cmp(y,sa[i - 1],sa[i],j) ? p-1:p++;
}
}
int rank[N],height[N];
void calheight(int n)
{
int i,j,k = 0;
for(i = 1; i <= n; i++) rank[ sa[i] ] = i;
for(i = 0; i < n; height[ rank[i++] ] = k)
for(k ? k--:0,j = sa[ rank[i]-1 ]; r[i+k] == r[j+k]; k++);
}
int main()
{
while(cin>>s && s[0] != '#')
{
int n = strlen(s);
int i;
for(i = 0; i < n; i++)
r[i] = s[i] - 'a' + 1;
r[n] = 0;
da(n+1,27);
calheight(n);
height[n+1] = 0;
long long ans = 0 ;
for(i = 1; i <= (n+1)/2; i++)
{
int l = N,r = - N;
for(int j = 0; j <= n+1; j++)
{
if(height[j] >= i)
{
l = min(l,sa[j]);
l = min(l,sa[j-1]);
r = max(r,sa[j]);
r = max(r,sa[j-1]);
}
else
{
if(l != N && r - l >= i) ans++;
l = N;
r = -N;
}
}
}
//printf("%I64d\n",ans);
cout<<ans<<endl;
}
return 0;
}