【倍增後綴數組】 poj1743 &2406& 3261 & 3294 & 2774


        曾經有幾次想寫後綴數組,結果被論文中縮成豆腐塊的程序嚇到了.......

        現在看懂了之後, 被這個程序寫的醜陋嚇到了。

        醜雖醜,但是寫的還是很精巧的, 照着打吧。


        後綴數組其實就是吧一個長度爲n的字符串(當然也可以是其他的,只要能比大小)的所有後綴排序後存在數組裏。

        打裸是O(n*n*log(n))的。

        其實如果以每個點爲開頭,分別求出向後延伸1, 2, 4 ....  的排列順序,後者是可以有前者推出的,配合基數排序,就可以把複雜度降低到O  (n log (n));

         當然, 代碼很精巧,也很醜。


         把求出的後綴排序後的首字母放在sa數組中, 用sa 退出height (height (i) =  suffix(i)與 suffix(pre-rank(i)的最長公共前綴),  有這麼一個定理 height[ rank[i ]] >= height[ rank[i -1]]-1  , 按照rank的順序來求, 用變量存上一個rank的height, 由於這個變量頂多減少n次, 類似kmp的複雜度分析,這是o(n) 的。


         用height就可以求很多東西了。


        poj 1743  求不可重疊最長重複字串。

        求出height後,二分答案,把height數組 分成若干份, 每份內的height都大於二分的數,觀察每份內,是否存在解即可。

         nlogn

         當然,這道題規定,如果一段數同時減去一個數後, 與另一段數相同,也算作相同,這裏就要用差分思想解決了。

       

        

# include <cstdlib>
# include <cstdio>
# include <cmath>
# include <cstring>

using namespace std;

const int maxn = 100000, oo = 1073741819;
int pret[maxn], s[maxn], height[maxn], sa[maxn], stmp[maxn], lis[maxn], rank[maxn], rk[maxn];
int p, n, l, r, mid, i;

void read()
{
	int i;
	memset(s, 0, sizeof(s));
	memset(sa, 0, sizeof(sa));
	scanf("%d", &n);
    for (i = 1; i<= n; i++)
       scanf("%d", &s[i]);
    for (i = 1; i< n; i++)
       s[i] = s[i+1]-s[i];
    s[n] = 0; n--;
    for (i = 1; i<= n; i++)
       s[i] += 89;
    
}

void work_suffix(int n, int m)
{
	int i, j; 
	memset(pret, 0, sizeof(pret));
	for (i = 1; i <= n; i++) pret[rk[i]=s[i]]++;
	for (i = 1; i <= m; i++) pret[i]+= pret[i-1];
	for (i = n; i >= 1; i--) sa[pret[rk[i]]--] = i;
	for (j = 1, p = 0; p < n; j <<=1, m = p)
	{
		for (p = 0, i = n-j+1; i <=n; i++) lis[++p] = i;
		for (i = 1; i <=n ; i++) if (sa[i]>j) lis[++p] = sa[i] - j;
		memset(pret, 0, sizeof(pret));
		for (i = 1; i <=n ; i++) stmp[i] = rk[lis[i]];
		for (i = 1; i <=n ; i++) pret[stmp[i]]++;
		for (i = 1; i <=m ; i++) pret[i]+= pret[i-1];
		for (i = n; i >=1 ; i--) sa[pret[stmp[i]]--] = lis[i];
		for (i = 1; i <=n ; i++) lis[i] = rk[i];
		for (p = 1, rk[sa[1]] = 1, i = 2; i <= n; i++)
		    rk[sa[i]] = (lis[sa[i]] == lis[sa[i-1]] && lis[sa[i]+j] == lis[sa[i-1]+j])? p:++p;
	} 
}

void work_height()
{
	int i, j, p = 0;
	for (i = 1; i <= n; i++) rank[sa[i]] = i;
	for (i = 1; i < n; height[rank[i++]] = p)
       for (p?p--:0,j = sa[rank[i]-1]; j+p <=n+1 && i+p <=n+1 && s[j+p]==s[i+p]; p++);
}

bool check(int mid)
{
    int ll,rr;
	for (i = 1; i <= n; i++)
	{
		if (height[i] < mid) ll = oo, rr = -oo;
		ll = ll > sa[i] ? sa[i]:ll;
		rr = rr < sa[i] ? sa[i]:rr;
		if (rr- ll >= mid) return true; 
	}
	return false;
}

int main()
{
	freopen("1743.in", "r", stdin);
	freopen("1743.out", "w", stdout);
	for (;;)
	{
		read();
		if (n == -1) break;
		s[++n]= 0;
		work_suffix(n, 1000);
		work_height();
		for (l = 0,r = n; l < r;)
		{
			if (check(mid = (l+r+1 >>1))) l = mid;
			else r = mid-1;
		}
		l++;
		printf("%d\n", l >= 5? l:0);
	}
	return 0;
}



         poj 3261 求重複k次的最長重複字串, 可重疊。

         有木有發現和前面一道題是一樣的二分檢驗?

      

# include <cstdlib>
# include <cstdio>
# include <cstring>

using namespace std;

const int oo = 1073741819, maxn = 30000;
int rank[maxn], a[maxn], pret[1000000+5], rk[maxn];
int sa[maxn], lis[maxn], stmp[maxn], height[maxn];
int i , n, k; 
void read()
{
  int i;
  scanf("%d%d", &n, &k);
  for (i = 1; i <= n; i++)
    scanf("%d", &a[i]), a[i]++;
  a[++n]=0;
}

 
void work_suffix(int n, int m)
{
  int i, j, p;
  for (i=1; i<=m; i++) pret[i] = 0;
  for (i=1; i<=n; i++) pret[rk[i] = a[i]]++;
  for (i=1; i<=m; i++) pret[i]+= pret[i-1];
  for (i=n; i>=1; i--) sa[pret[a[i]]--] = i;
  for (j = 1, p = 0; p <n; j <<=1, m = p)
  {
    for (p = 0, i=n-j+1; i<=n; i++) lis[++p] = i;
    for (i=1; i<=n; i++) if (sa[i] > j) lis[++p] = sa[i]-j;
    for (i=1; i<=m; i++) pret[i] = 0; 
    for (i=1; i<=n; i++) stmp[i]=rk[lis[i]];
    for (i=1; i<=n; i++) pret[stmp[i]]++;
    for (i=1; i<=m; i++) pret[i]+=pret[i-1];
    for (i=n; i>=1; i--) sa[pret[stmp[i]]--]=lis[i];
    for (i=1; i<=n; i++) lis[i]=rk[i];
    for (rk[sa[1]]=1, i=2, p=1; i<=n; i++)
      rk[sa[i]] = (lis[sa[i]] == lis[sa[i-1]] && lis[sa[i]+j] == lis[sa[i-1]+j]) ? p:++p;
  } 
}

bool check(int mid)
{
  int i, ll, rr;
  for (i=1; i<=n; i++)
  {
    if (height[i] < mid ) ll = oo, rr = -oo;
    if (i < ll) ll = i;
    if (i > rr) rr = i;
    if (rr - ll >= k-1) return true;
  }
  return false;
}

void work_height()
{
  int i,j,p = 0;
  for (i=1; i<=n; i++) rank[sa[i]] = i;
  for (i=1, p=0; i<=n; height[rank[i++]] = p) 
    for (p?p--:0, j=sa[rank[i]-1];a[j+p]==a[i+p]; p++); 
}

int main()
{
  freopen("3261.in","r", stdin);
  freopen("3261.out", "w", stdout);
  read();
  work_suffix(n, 1000000+5);
  work_height();
  int mid, l = 0, r = n;
  for (;l < r;)
  {
    if (check(mid = (l+r+1 >> 1))) l = mid;
    else r = mid-1;
  }
  printf("%d", l);
  return 0;
}

         poj 2406

         給定字符串l,它是由字串s重複k次得到的,求k的最大值。

     

          同樣的求height, 枚舉s的長度,首先strlen(l)%  strlen(s) == 0, 其次, suffix(1)與suffix(1+strlen(s)) 的最長公共前綴 = n - strlen(s), 畫個圖就很明瞭了。

        

          當然,這道題不是出給後綴數組的,是出個kmp的,數據出到了100 0000, 倍增是過不了的, 只能用醜陋的DC3

          貼個kmp的算了:

# include <cstdlib>
# include <cstdio>
# include <cmath>
# include <cstring>

const int maxn = 1000000+5;
char s[maxn];
int n,i,j, next[maxn];

int main()
{
  //freopen("2406.in", "r", stdin);
  //freopen("2406.out", "w", stdout);
  for (;;)
    {
      memset(s, 0, sizeof(s));
      scanf("%s\n", s+1);
      n = strlen(s+1);
      if (s[1] == '.') return 0;  
      for (i = 2, j = 0; i <= n; next[i++] = j)
	{
	  for (;j > 0 && s[j+1] != s[i];j = next[j]);
	  if (s[i] == s[j+1]) j++;
	}
      printf("%d\n", n % (n - next[n]) == 0 ? n / (n - next[n]): 1);
    }
  return 0;
}




poj 3294

求:對於n個字符串,至少出現n div 2+1 個字符串中的 最長字串。

 把n個字符串用未出現且不同的字符連接, 同樣的二分答案,分組,組內統計即可。

# include <cstdlib>
# include <cstdio>
# include <cstring>

using namespace std;

const int maxn = 100000+5000;
char s[10000+5];
int height[maxn], pret[maxn], sa[maxn], rk[maxn], rank[maxn], stmp[maxn], lis[maxn], a[maxn],id[maxn]; 
int ans[1000+5];
bool have[1000+5];
int lim, n, i, j, key, len, tot;

void read()
{
  scanf("%d", &n);  key = 30, tot = 0;
  for (i = 1; i <= n; i++)
  {
     scanf("%s", s+1); len = strlen(s+1);
     for (j = 1; j <=len; j++) a[++tot] = s[j] - 'a'+1, id[tot] = i;
     a[++tot] = key++;  id[tot] = 0;
  }
  a[tot] = 0;
}

void work_suffix(int n, int m)
{
  int i, j, p;
  for (i = 1; i<=m; i++) pret[i] = 0;
  for (i = 1; i<=n; i++) pret[rk[i]=a[i]]++;
  for (i = 1; i<=m; i++) pret[i]+= pret[i-1];
  for (i = n; i>=1; i--) sa[pret[rk[i]]--] = i;
  for (j = 1, p = 0; p < n; j <<=1, m =p)
  {
    for (p = 0, i = n-j+1; i<=n; i++) lis[++p] = i;
    for (i = 1; i <=n; i++) if (sa[i]>j) lis[++p]=sa[i]-j;
    for (i = 1; i <=m; i++) pret[i] = 0;
    for (i = 1; i <=n; i++) stmp[i]= rk[lis[i]];
    for (i = 1; i <=n; i++) pret[stmp[i]]++;
    for (i = 1; i <=m; i++) pret[i] += pret[i-1];
    for (i = n; i >=1; i--) sa[pret[stmp[i]]--] = lis[i];
    for (i = 1; i <=n; i++) lis[i] = rk[i];
    for (i = 2, p = 1, rk[sa[1]] = 1; i <=n; i++)
      rk[sa[i]] = (lis[sa[i]] == lis[sa[i-1]] && lis[sa[i]+j] ==lis[sa[i-1]+j]) ? p:++p;
  } 
}

void work_height(int n)
{
  int i ,j, p;
  for (i = 1; i<=n; i++) rank[sa[i]] = i;
  for (i = 1, p =0; i <=n; height[rank[i++]] = p)
    for (p?p--:0, j = sa[rank[i]-1]; a[i+p]== a[j+p]; p++);
}

bool check(int mid)
{
  int all, i;
  if (mid == 0) return true;
  memset(ans, 0, sizeof(ans));
  for (i = 1; i <=tot; i++)
   {
    if (height[i] < mid)
    {
        memset(have, false, sizeof(have));
        all = 0;
    }
    if (id[sa[i]] != 0) 
    {
	  int old  = all;
      if (have[id[sa[i]]] == false) all++, have[id[sa[i]]] = true;
      if (all == n / 2 +1 && old < n / 2+1) ans[++ans[0]] = sa[i];
    }
  }
  if (ans[0] != 0 ) return true;
  return false; 
}

int main()
{
  
  for (;;)
  {
    read();
    if (n == 0) break;
    if (n == 1) printf("?\n");
    if (n == 1) continue;
    work_suffix(tot, key+1);
    work_height(tot);
    int l, r, mid; lim = n / 2 +1;
    for (l = 0, r = tot; l <r;)
    {
      if (check(mid = l+r+1 >> 1)) l = mid;
	else  r = mid-1; 
    }
    check(l);
      if (l == 0) printf("?\n");
      else 
	{
	  //	  printf("%d\n", l);
          for (i = 1; i <= ans[0]; i++)
	  {
	    for (j = 1; j <=l; j++) printf("%c", a[ans[i]+j-1] + 'a' -1);
            printf("\n");
          }
        }
    printf("\n");
  }
  return 0;
}

poj 2774
給定兩個字符串, 求他們的最長公共子串;

把兩個字符串以未出現字符連接後,求height,
可以肯定的是,答案就出現在height中,在sa上相隔多個沒有相鄰優。
那麼只要掃一遍所有的height, 如果這個height 滿足前後兩個後綴開頭出現在不同字符串上,則可以用來更新答案。

# include <cstdlib>
# include <cstdio>
# include <cmath>
# include <cstring>

using namespace std;

const int maxn = 300000;
int height[maxn],id[maxn],pret[maxn],sa[maxn],stmp[maxn],rk[maxn],a[maxn],lis[maxn],rank[maxn];
int tot, i , n , ans;
char s[maxn / 2];

void read()
{
	int i, len;
	scanf("%s\n", s+1); len = strlen(s+1);
	for (i = 1; i <= len; i++) a[++tot] = s[i]-'a' +1, id[tot] = 1;
	scanf("%s\n", s+1); len = strlen(s+1);
	for (a[++tot] = 30, i = 1; i <= len; i++) a[++tot] = s[i]-'a' +1, id[tot] = 2;
	a[++tot] = 0;
}

void work_suffix(int n, int m)
{
	int i , j, p;
	for (i = 1; i <= m; i++) pret[i] = 0;
	for (i = 1; i <= n; i++) pret[rk[i] = a[i]] ++;
	for (i = 1; i <= m; i++) pret[i] += pret[i-1];
	for (i = n; i >= 1; i--) sa[pret[rk[i]]--] = i;
	for (p = 0, j = 1; p < n; j <<=1, m = p)
	{
		for (p = 0, i = n-j+1; i <= n; i++) lis[++p] = i;
		for (i = 1; i <=n; i++) if (sa[i]> j) lis[++p] = sa[i] - j;
		for (i = 1; i <=m; i++) pret[i] = 0;
		for (i = 1; i <=n; i++) stmp[i] = rk[lis[i]];
		for (i = 1; i <=n; i++) pret[stmp[i]]++;
		for (i = 1; i <=m; i++) pret[i] += pret[i-1];
		for (i = n; i >=1; i--) sa[pret[stmp[i]]--] = lis[i];
		for (i = 1; i <=n; i++) lis[i] = rk[i];
		for (i = 2, p = 1, rk[sa[1]] = 1; i<=n; i++)
		    rk[sa[i]] = (lis[sa[i]] == lis[sa[i-1]] && lis[sa[i]+j] == lis[sa[i-1]+j])? p:++p;
	}
}

void work_height(int n)
{
	int i ,j, p= 0;
	for (i = 1; i <=n; i++) rank[sa[i]] = i;
	for (i = 1; i <n; height[rank[i++]] = p)
	  for (p?p--:0, j = sa[rank[i]-1]; a[i+p] == a[j+p]; )
	     p++;
}

int main()
{
	freopen("2774.in", "r", stdin); 
	freopen("2774.out", "w", stdout);
	read();
	work_suffix(tot, 50);
	work_height(tot);
	for (i = 2; i <= tot; i++)
	if (id[sa[i]] != id[sa[i-1]])
	  ans = ans < height[i] ? height[i] : ans;
	printf("%d", ans);
	return 0;
}



         









發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章