KMP
KMP別看它短小精悍,用處很大,能應用於很多方面呢。尤其fail (next)
數組威力無窮。
void kmp(char *s, int *fail) {
int j = fail[0] = -1;
rep(i, 1, strlen(s)-1) {
while (~j && s[i] != s[j+1]) j = fail[j];
fail[i] = s[i] == s[j+1] ? ++j : -1;
}
}
void match(char *s, char *t, int *fail) {
int j = -1, len = strlen(t);
rep0(i, strlen(s)) {
while (~j && s[i] != t[j+1]) j = fail[j];
s[i] == t[j+1] && j++;
if (j == len-1) printf("%d\n", i-len+2);
}
}
AC自動機
KMP的侷限性在於只能匹配單模式串,而AC自動機能匹配多模板串。同KMP一樣,AC自動機上fail
鏈構成一棵樹,指向的父親都是該結點對應串最長的border。
int ch[20001][26], tag[20001], pt;
void newnode(int x) { memset(ch[x], 0, sizeof ch[x]); tag[x] = 0; }
void ins(char *s, int id) { // id表示模板串編號
int u = 0;
rep0(i, strlen(s)) {
int idx = s[i]-'a';
if (!ch[u][idx]) newnode(ch[u][idx] = ++pt);
u = ch[u][idx];
}
tag[u] = id;
}
int fail[20001];
void build_ac() {
static int q[20001], l, r;
l = r = 0; memset(fail, 0, sizeof fail);
rep0(i, 26)
if (ch[0][i]) fail[q[r++] = ch[0][i]] = 0;
while (l < r) {
int u = q[l++];
rep0(i, 26)
ch[u][i] ? fail[q[r++] = ch[u][i]] = ch[fail[u]][i] : ch[u][i] = ch[fail[u]][i];
// 這裏面將空結點順道匹配了,這個是**很重要**的trick
}
}
void find(char *s) {
int u = 0;
rep0(i, strlen(s)) {
int idx = s[i]-'a';
u = ch[u][idx];
for (int v = u; v; v = fail[v]) if (tag[v]) ans[tag[v]]++;
// 這個是不優秀的寫法,實際上應該在fail樹上打標記,最後dfs求解
}
}
SA後綴數組
SA在處理後綴問題上頗有用處,可以進一步來處理最長公共前綴的問題。我的另一篇博客有詳細的代碼註解。
void build_sa(char *s, int *sa) {
static int t[maxn], t2[maxn], c[maxn], *x = t, *y = t2, n, m;
n = strlen(s), m = 'z'+1;
rep0(i, m) c[i] = 0;
rep0(i, n) c[x[i] = s[i]]++;
rep(i, 1, m) c[i] += c[i-1];
rep0(i, n) sa[--c[x[i]]] = i;
for (int k = 1; k < n; k <<= 1) {
int p = 0;
rep(i, n-k, n-1) y[p++] = i;
rep0(i, n) if (sa[i] >= k) y[p++] = sa[i]-k;
rep0(i, m) c[i] = 0;
rep0(i, n) c[x[y[i]]]++;
rep(i, 1, m) c[i] += c[i-1];
per0(i, n) sa[--c[x[y[i]]]] = y[i];
std::swap(x, y);
p = 1; x[sa[0]] = 0;
rep(i, 1, n-1)
x[sa[i]] = y[sa[i]] == y[sa[i-1]] && y[sa[i]+k] == y[sa[i-1]+k] ? p-1 : p++;
if ((m = p) == n) return;
}
}