參考資料:http://bbs.9ria.com/thread-226068-1-1.html
class TreeNode {
private data: Dictionary;
private _isLeaf: boolean;
/**
*是否是敏感詞的詞尾字,敏感詞樹的葉子節點必然是詞尾字,父節點不一定是
*/
public isEnd: boolean = false;
public parent: TreeNode;
public value: string;
public constructor() {
this.data = new Dictionary();
} //end of Function
public getChild(name: string): TreeNode {
return this.data.GetName(name);
} //end of Function
public addChild(char: string): TreeNode {
var node: TreeNode = new TreeNode();
this.data.SetName(char, node);
node.value = char;
node.parent = this;
return node;
} //end of Function
public getFullWord(): string {
var rt: string = this.value;
var node: TreeNode = this.parent;
while (node) {
rt = node.value + rt;
node = node.parent;
} //end while
return rt;
} //end of Function
/**
*是否是葉子節點
*/
public get isLeaf(): boolean {
var index: number = 0;
for (var key in this.data.dic) {
index++;
}
this._isLeaf = index == 0
return this._isLeaf;
}
}
class Dictionary {
public dic: Array<TreeNode>;
public constructor() {
if (!this.dic) {
this.dic = new Array();
}
}
public GetName(name:string):TreeNode {
return this.dic[name];
}
public SetName(name: string, src: TreeNode) {
this.dic[name] = src;
}
}
class SensitiveWordFilter {
public constructor() {
}
public static GetInstance(): SensitiveWordFilter {
if (!this.instance) {
this.instance = new SensitiveWordFilter();
}
return this.instance;
}
private static instance:SensitiveWordFilter;
public treeRoot: TreeNode;
public regSensitiveWords(words: Array<string>): void {
console.log("into");
//這是一個預處理步驟,生成敏感詞索引樹,功耗大於查找時使用的方法,但只在程序開始時調用一次。
var self = this;
self.treeRoot = new TreeNode();
self.treeRoot.value = "";
var words_len: number = words.length;
for (var i: number = 0; i < words_len; i++) {
console.log("into loop");
var word: string = words[i];
var len: number = word.length;
var currentBranch: TreeNode = self.treeRoot;
for (var c: number = 0; c < len; c++) {
var char: string = word.charAt(c);
var tmp: TreeNode = currentBranch.getChild(char);
if (tmp) {
currentBranch = tmp;
}
else {
currentBranch = currentBranch.addChild(char);
} //end if
} //end for
currentBranch.isEnd = true;
} //end for
} //end of Function
/**
*替換字符串中的敏感詞返回
* @param dirtyWords
* @return
*
*/
private getReplaceWord(len: number): string {
var replaceWord: string = "";
for (var i: number = 0; i < len; i++) {
replaceWord += "*";
}
return replaceWord;
}
public replaceSensitiveWord(dirtyWords: string): string {
var self = this;
var char: string;
var curTree: TreeNode = self.treeRoot;
var childTree: TreeNode;
var curEndWordTree: TreeNode;
var dirtyWord: string;
var c: number = 0;//循環索引
var endIndex: number = 0;//詞尾索引
var headIndex: number = -1;//敏感詞詞首索引
while (c < dirtyWords.length) {
char = dirtyWords.charAt(c);
childTree = curTree.getChild(char);
if (childTree)//在樹中遍歷
{
if (childTree.isEnd) {
curEndWordTree = childTree;
endIndex = c;
}
if (headIndex == -1) {
headIndex = c;
}
curTree = childTree;
c++;
}
else//跳出樹的遍歷
{
if (curEndWordTree)//如果之前有遍歷到詞尾,則替換該詞尾所在的敏感詞,然後設置循環索引爲該詞尾索引
{
dirtyWord = curEndWordTree.getFullWord();
dirtyWords = dirtyWords.replace(dirtyWord, self.getReplaceWord(dirtyWord.length));
c = endIndex;
}
else if (curTree != self.treeRoot)//如果之前有遍歷到敏感詞非詞尾,匹配部分未完全匹配,則設置循環索引爲敏感詞詞首索引
{
c = headIndex;
headIndex = -1;
}
curTree = self.treeRoot;
curEndWordTree = null;
c++;
}
}
//循環結束時,如果最後一個字符滿足敏感詞詞尾條件,此時滿足條件,但未執行替換,在這裏補加
if (curEndWordTree) {
dirtyWord = curEndWordTree.getFullWord();
dirtyWords = dirtyWords.replace(dirtyWord, self.getReplaceWord(dirtyWord.length));
}
return dirtyWords;
}
/**
*判斷是否包含敏感詞
* @param dirtyWords
* @return
*
*/
public containsBadWords(dirtyWords: string): boolean {
var self = this;
var char: string;
var curTree: TreeNode = self.treeRoot;
var childTree: TreeNode;
var curEndWordTree: TreeNode;
var dirtyWord: string;
var c: number = 0;//循環索引
var endIndex: number = 0;//詞尾索引
var headIndex: number = -1;//敏感詞詞首索引
while (c < dirtyWords.length) {
char = dirtyWords.charAt(c);
childTree = curTree.getChild(char);
if (childTree)//在樹中遍歷
{
if (childTree.isEnd) {
curEndWordTree = childTree;
endIndex = c;
}
if (headIndex == -1) {
headIndex = c;
}
curTree = childTree;
c++;
}
else//跳出樹的遍歷
{
if (curEndWordTree)//如果之前有遍歷到詞尾,則替換該詞尾所在的敏感詞,然後設置循環索引爲該詞尾索引
{
dirtyWord = curEndWordTree.getFullWord();
dirtyWords = dirtyWords.replace(dirtyWord, self.getReplaceWord(dirtyWord.length));
c = endIndex;
return true;
}
else if (curTree != self.treeRoot)//如果之前有遍歷到敏感詞非詞尾,匹配部分未完全匹配,則設置循環索引爲敏感詞詞首索引
{
c = headIndex;
headIndex = -1;
}
curTree = self.treeRoot;
curEndWordTree = null;
c++;
}
}
//循環結束時,如果最後一個字符滿足敏感詞詞尾條件,此時滿足條件,但未執行替換,在這裏補加
if (curEndWordTree) {
dirtyWord = curEndWordTree.getFullWord();
dirtyWords = dirtyWords.replace(dirtyWord, self.getReplaceWord(dirtyWord.length));
return true;
}
return false;
}
}
我就是搬運了一下改成egret能用的而已。。。
主要想法和代碼都是別人噠。。duang。。
demo
var array: Array<string> = ["敏感","詞","和諧"];
SensitiveWordFilter.GetInstance().regSensitiveWords(array);
var str: string = "這些都是被和諧的敏感詞啊哈哈哈";
console.log(str);
str = SensitiveWordFilter.GetInstance().replaceSensitiveWord(str);
console.log(str);
以下是輸出
before:這些都是被和諧的敏感詞啊哈哈哈
after:這些都是被**的***啊哈哈哈
當然敏感詞庫應該是相當長的數組.我這只是測試一下而已.0.0