It's well known that DNA Sequence is a sequence only contains A, C, T and G, and it's very useful to analyze a segment of DNA Sequence,For example, if a animal's DNA sequence contains segment ATC then it may mean that the animal may have a genetic disease. Until now scientists have found several those segments, the problem is how many kinds of DNA sequences of a species don't contain those segments.
Suppose that DNA sequences of a species is a sequence that consist of A, C, T and G,and the length of sequences is a given integer n.
Input
First line contains two integer m (0 <= m <= 10), n (1 <= n <=2000000000). Here, m is the number of genetic disease segment, and n is the length of sequences.
Next m lines each line contain a DNA genetic disease segment, and length of these segments is not larger than 10.
Output
An integer, the number of DNA sequences, mod 100000.
Sample Input
4 3 AT AC AG AA
Sample Output
36
题意:给你n个字符串,问你长度为m的字符串且字符串中不含有那n个子串的字符串的数量。
思路:这道题的话,看题之后,不难发现需要用AC自动机来做,但是不得其解。看了一些dalao的博客之后,感觉这道题真的很神奇,是fail数组的另一种用法。我们让所有病毒串构造一个AC自动机,这个AC自动机可以看作一张有向图,图上的每个顶点就是Trie树上的结点,每个结点都可以看作是某个病毒串的前缀,Trie树的根则是空字符串。然后根据fail数组的构造,如果某个结点fail指向的结点被标记了,那么这个结点也是不允许走的,这样,一个符合条件的Trie树就建立出来了,剩下的就是矩阵部分。把题目简化成是从结点0出发到其他结点走n步的的所有允许情况。
AC代码:
#include <stdio.h>
#include <string>
#include <string.h>
#include <algorithm>
#include <iostream>
#include <math.h>
#include <vector>
#include <map>
#include <queue>
#include <stack>
typedef long long ll;
const int maxx=110;
const int maxn=4;
const int inf=0x3f3f3f3f;
const int mod=100000;
using namespace std;
int Trie[maxx][maxn];
int fail[maxx];
int cntword[maxx];
char s[20];
char c['Z'+1];
ll n,m;
int k;
void init()
{
k=0;
memset(fail,0,sizeof(fail));
memset(Trie,0,sizeof(Trie));
memset(cntword,0,sizeof(cntword));
c['A']=0;
c['T']=1;
c['C']=2;
c['G']=3;
}
void insert(char *s)
{
int p=0;
int len=strlen(s);
for(int i=0; i<len; i++)
{
int x=c[s[i]];
if(!Trie[p][x])
Trie[p][x]=++k;
p=Trie[p][x];
}
cntword[p]=1;
}
void build()
{
queue<int>q;
for(int i=0; i<4; i++)
{
if(Trie[0][i])
{
fail[Trie[0][i]]=0;
q.push(Trie[0][i]);
}
}
while(!q.empty())
{
int now=q.front();
q.pop();
if(cntword[fail[now]])//如果当前结点的指向是不允许的,那么这个点也是不允许的
cntword[now]=1;
for(int i=0; i<4; i++)
{
if(Trie[now][i])
{
fail[Trie[now][i]]=Trie[fail[now]][i];
q.push(Trie[now][i]);
}
else
Trie[now][i]=Trie[fail[now]][i];
}
}
}
struct matrix
{
ll mat[maxx][maxx];
matrix()
{
memset(mat,0,sizeof(mat));
}
} ans,cnt;
matrix mul(matrix x,matrix y)
{
matrix a;
for(int i=0; i<=k; i++)
for(int j=0; j<=k; j++)
for(int c=0; c<=k; c++)
{
a.mat[i][j]+=x.mat[i][c]*y.mat[c][j];
a.mat[i][j]%=mod;
}
return a;
}
matrix matrixpow(matrix x,ll y)
{
matrix a;
for(int i=0; i<=k; i++)
a.mat[i][i]=1;
while(y)
{
if(y&1)
a=mul(a,ans);
ans=mul(ans,ans);
y>>=1;
}
return a;
}
matrix build_mat()//构建矩阵
{
matrix temp;
for(int i=0; i<=k; i++)
{
if(cntword[i])
continue;
for(int j=0; j<4; j++)
{
if(cntword[Trie[i][j]])
continue;
++temp.mat[i][Trie[i][j]];
}
}
return temp;
}
int main()
{
init();
scanf("%lld%lld",&m,&n);
for(int i=1; i<=m; i++)
{
scanf("%s",s);
insert(s);
}
build();
ans=build_mat();
cnt=matrixpow(ans,n);
ll x=0;
for(int i=0; i<=k; i++)
{
x+=cnt.mat[0][i];
x%=mod;
}
printf("%lld\n",x);
}