應該是最後一次修改了吧 ,最後還是把完全包含情況記入了ac內部,當構建fail指針的時候,
比如 abcd 與bc 當構建到 abcd的c的時候,檢測到bc的c爲終止態,就將bc的c的終止態信息添加到abcd的c上面去,解決了這一個問題.
還有修復了之前的查找問題,如果查找不到parent的fail->next的信息就直接指向parent的fail,這種是不對的,應該直接指向root
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
修改了一次,添加了可擴展的接口,使接口可以傳遞自定義參數進行控制,編寫回調接口.同時放棄strlen的內部調用,使函數可以完全的面向二進制流
但是ac好像本身就有一個缺陷,就是如果添加的字串中,如果存在完全包含關係,必須得外部特例寫出來或者內部實現時寫出來
比如兩個串 abcd bc,在cabcde中搜索,是隻能搜出abcd而不能搜出bc來的,不知道是我寫法上應該在添加字符串時,將bc添加到abcd中去識別 還是在外部額外去處理,還有待研究,個人覺得這種特例外部實現比內部實現要效果好些
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
一言不合就貼代碼
基於單字節流的二級制多模匹配算法
效果截圖
#include "stdio.h"
#include "stdlib.h"
#include "string.h"
extern "C" {
const int kind = 256;
const int chongfu=20; //單字符最大重複次數
typedef int(*ACcallack)(int pos,int id,void*);
/*
主要ac結構體
*/
struct node{
unsigned char id;
node *fail;
node *next[kind];
int count;
int result[chongfu];
};
struct node* bznode()
{
node *t=(struct node*)malloc(sizeof(struct node));
if(t)
memset(t,0,sizeof(node));
return t;
}
/*
廣度優先搜索算法結構體
*/
struct FBSnode{
unsigned char id;
struct node* parent;
struct node* curnode;
struct FBSnode* next;
FBSnode(){parent=0;next=0;id=0;}
};
struct FBSnode* bzfbsnode()
{
struct FBSnode*t=(struct FBSnode*)malloc(sizeof(struct FBSnode));
if(t)
memset(t,0,sizeof(struct FBSnode));
return t;
}
/*
添加字符
*/
void insertchar(unsigned char ch,int is_end,int id,node **leafhead)
{
node *leaf=*leafhead;
if(leaf==0)
{
*leafhead= bznode();
leaf=*leafhead;
}
if(leaf->next[ch]==0)
{
leaf->next[ch]=bznode();
leaf->next[ch]->id=ch;
if(is_end!=0)
{
leaf->next[ch]->count=1;
leaf->next[ch]->result[0]=id;
}
}
else
{
if(is_end!=0)
{
leaf->next[ch]->result[ leaf->next[ch]->count++]=id;
}
}
}
/*
添加字符串
*/
void insert(unsigned char *str,int len,int id,node **root)
{
node **p=root;
int i=0,index;
for(i=0;i<len;i++)
{
index=str[i];
insertchar(*(unsigned char*)&str[i],i==len-1?1:0,id,p);
p=&((*p)->next[index]);
}
}
/*
構建fail表
*/
void addresid(node**leaf,node **leafcopy)
{
int j;
int id;
if((*leafcopy)->count!=0)
{
for(j=0;j<(*leafcopy)->count;j++)
{
id=(*leafcopy)->result[j];
if((*leaf)->count==0)
{
(*leaf)->count=1;
(*leaf)->result[0]=id;
}
else if((*leaf)->count<chongfu)
{
(*leaf)->result[(*leaf)->count]=id;
(*leaf)->count++;
}
else
{
//log(out of memery);
return ;
}
}
}
}
void build(node *root)
{
int i,j;
FBSnode *headnode=bzfbsnode();
FBSnode *cur=0;
headnode->curnode=root;
headnode->parent=root;
FBSnode *endnode=headnode;
root->fail=root;
for(i=0;i<kind;i++) //將root fail 指向自己 同時將一級子節點指向root
{
if(root->next[i]!=0)
{
root->next[i]->fail=root;
for(j=0;j<kind;j++)
{
if(root->next[i]->next[j]!=0)
{
endnode->next=bzfbsnode();
endnode=endnode->next;
endnode->curnode=root->next[i]->next[j];
endnode->parent=root->next[i];
endnode->id=j;
}
}
}
}
cur=headnode;
do //從二級子節點開始fail表構建
{
cur=cur->next;
if(cur->parent->fail->next[cur->id]!=0)
{
cur->curnode->fail=cur->parent->fail->next[cur->id];
//cur->curnode->fail=cur->parent->fail;
addresid(&cur->curnode,&cur->parent->fail->next[cur->id]);
}
else
{
cur->curnode->fail=root;
}
for(j=0;j<kind;j++)
{
if(cur->curnode->next[j]!=0)
{
endnode->next=bzfbsnode();
endnode=endnode->next;
endnode->curnode=cur->curnode->next[j];
endnode->parent=cur->curnode;
endnode->id=j;
}
}
}while(cur!=endnode);
cur=headnode->next;
while(cur!=0)
{
free(headnode);
headnode=cur;
cur=cur->next;
}
}
/*
查詢字符串
str 目標字符串
root 構建完畢的ac樹
function ac查找時找到的回調函數 (int 返回值爲0 繼續查找 爲1 直接完成查找 ,第一個參數int 位置,第二個參數 id ,第三個參數 待擴展
data 傳遞給function的參數 擴展做控制使用
*/
void query(unsigned char* str,int len ,node *root,ACcallack function,void*data)
{
int i=0,j=0;
node *p=root;
for(i=0;i<len;i++)
{
if(p->next[str[i]]==0)
{
if(p->fail->next[str[i]]!=0)
p=p->fail->next[str[i]];
else
//p=p->fail;
p=root;
}
else
{
p=p->next[str[i]];
}
for(j=0;j<p->count;j++)
{
if(function(i,p->result[j],data)==1)
{
return ;
}
}
}
}
}
#ifdef __CPlusPlus__
namespace ACspace{
class ACbin{
public:
ACbin(){root=0;}
void binquery(unsigned char* str,int len ,ACcallack function,void*data)
{
if(root!=0)
query(str,len,root,function,data);
}
void insert(unsigned char *str,int len,int id)
{
node **p=&root;
int i=0,index;
for(i=0;i<len;i++)
{
index=str[i];
insertchar(*(unsigned char*)&str[i],i==len-1?1:0,id,p);
p=&((*p)->next[index]);
}
}
void acbuild()
{
build(root);
}
struct node* root;
};
}
using namespace ACspace;
#endif
char *str="cabcdefghijklmn";
int myACcallack(int pos,int id,void*)
{
char tmp[1024];
memcpy(tmp,str,pos+1);
tmp[pos+1]=0;
printf("%d\t%d\t%s\n",pos,id,tmp);
return 0;
}
void myinsert(char *str,int id,node**root)
{
insert((unsigned char*)str,strlen(str),1,root);
}
int main()
{
node *root=0;
myinsert("ab",1,&root);
myinsert("bc",2,&root);
myinsert("eabce",3,&root);
myinsert("abcd",4,&root);
myinsert("mjamec",5,&root);
myinsert("mjamed",6,&root);
build(root);
query((unsigned char*)str,strlen(str),root,myACcallack,0);
#ifdef __CPlusPlus__
ACbin myac;
myac.insert((unsigned char*)"ab",2,1);
myac.insert((unsigned char*)"bc",2,2);
myac.insert((unsigned char*)"eabce",5,3);
myac.insert((unsigned char*)"abcd",4,4);
myac.acbuild();
myac.binquery((unsigned char*)str,strlen(str),myACcallack,0);
#endif
getchar();
return 0;
}