egret中實現敏感詞檢測

參考資料:http://bbs.9ria.com/thread-226068-1-1.html

class TreeNode {
    private data: Dictionary;
    private _isLeaf: boolean;
 
/**
 *是否是敏感詞的詞尾字,敏感詞樹的葉子節點必然是詞尾字,父節點不一定是
 */
    public isEnd: boolean = false;
    public parent: TreeNode;
    public value: string;
 
    public constructor() {
        this.data = new Dictionary();
    } //end of Function
 
    public getChild(name: string): TreeNode {
        return this.data.GetName(name);
    } //end of Function
 
    public addChild(char: string): TreeNode {
        var node: TreeNode = new TreeNode();
        this.data.SetName(char, node);
        node.value = char;
        node.parent = this;
        return node;
    } //end of Function
 
    public getFullWord(): string {
        var rt: string = this.value;
        var node: TreeNode = this.parent;
        while (node) {
            rt = node.value + rt;
            node = node.parent;
        } //end while
        return rt;
    } //end of Function
 
/**
 *是否是葉子節點
 */
    public get isLeaf(): boolean {
        var index: number = 0;
        for (var key in this.data.dic) {
            index++;
        }
        this._isLeaf = index == 0
        return this._isLeaf;
    }
}

class Dictionary {
    public dic: Array<TreeNode>;
    public constructor() {
        if (!this.dic) {
            this.dic = new Array();
        }
    }
    public GetName(name:string):TreeNode {
        return this.dic[name];
    }
    public SetName(name: string, src: TreeNode) {
        this.dic[name] = src;
    }
}


class SensitiveWordFilter {
    public constructor() {
    }
    public static GetInstance(): SensitiveWordFilter {
        if (!this.instance) {
            this.instance = new SensitiveWordFilter();
        }
        return this.instance;
    }
    private static instance:SensitiveWordFilter;
    public treeRoot: TreeNode;
    public regSensitiveWords(words: Array<string>): void {
        console.log("into");
        //這是一個預處理步驟,生成敏感詞索引樹,功耗大於查找時使用的方法,但只在程序開始時調用一次。
        var self = this;
        self.treeRoot = new TreeNode();
        self.treeRoot.value = "";
        var words_len: number = words.length;
        for (var i: number = 0; i < words_len; i++) {
            console.log("into loop");
            var word: string = words[i];
            var len: number = word.length;
            var currentBranch: TreeNode = self.treeRoot;
            for (var c: number = 0; c < len; c++) {
                var char: string = word.charAt(c);
                var tmp: TreeNode = currentBranch.getChild(char);
                if (tmp) {
                    currentBranch = tmp;
                }
                else {
                    currentBranch = currentBranch.addChild(char);
                } //end if
            } //end for
            currentBranch.isEnd = true;
        } //end for
    } //end of Function
    /**
     *替換字符串中的敏感詞返回 
     * @param dirtyWords
     * @return 
     * 
     */
    private getReplaceWord(len: number): string {
        var replaceWord: string = "";
        for (var i: number = 0; i < len; i++) {
            replaceWord += "*";
        }
        return replaceWord;
    }
 
    public replaceSensitiveWord(dirtyWords: string): string {
        var self = this;
        var char: string;
        var curTree: TreeNode = self.treeRoot;
        var childTree: TreeNode;
        var curEndWordTree: TreeNode;
        var dirtyWord: string;
        var c: number = 0;//循環索引
        var endIndex: number = 0;//詞尾索引
        var headIndex: number = -1;//敏感詞詞首索引
        while (c < dirtyWords.length) {
            char = dirtyWords.charAt(c);
            childTree = curTree.getChild(char);
            if (childTree)//在樹中遍歷
            {
                if (childTree.isEnd) {
                    curEndWordTree = childTree;
                    endIndex = c;
                }
                if (headIndex == -1) {
                    headIndex = c;
                }
                curTree = childTree;
                c++;
            }
            else//跳出樹的遍歷
            {
                if (curEndWordTree)//如果之前有遍歷到詞尾,則替換該詞尾所在的敏感詞,然後設置循環索引爲該詞尾索引
                {
                    dirtyWord = curEndWordTree.getFullWord();
                    dirtyWords = dirtyWords.replace(dirtyWord, self.getReplaceWord(dirtyWord.length));
                    c = endIndex;
                }
                else if (curTree != self.treeRoot)//如果之前有遍歷到敏感詞非詞尾,匹配部分未完全匹配,則設置循環索引爲敏感詞詞首索引
                {
                    c = headIndex;
                    headIndex = -1;
                }
                curTree = self.treeRoot;
                curEndWordTree = null;
                c++;
            }
        }
                         
        //循環結束時,如果最後一個字符滿足敏感詞詞尾條件,此時滿足條件,但未執行替換,在這裏補加
        if (curEndWordTree) {
            dirtyWord = curEndWordTree.getFullWord();
            dirtyWords = dirtyWords.replace(dirtyWord, self.getReplaceWord(dirtyWord.length));
        }
        return dirtyWords;
    }
                 
    /**
     *判斷是否包含敏感詞 
     * @param dirtyWords
     * @return 
     * 
     */
    public containsBadWords(dirtyWords: string): boolean {
        var self = this;
        var char: string;
        var curTree: TreeNode = self.treeRoot;
        var childTree: TreeNode;
        var curEndWordTree: TreeNode;
        var dirtyWord: string;
 
        var c: number = 0;//循環索引
        var endIndex: number = 0;//詞尾索引
        var headIndex: number = -1;//敏感詞詞首索引
        while (c < dirtyWords.length) {
            char = dirtyWords.charAt(c);
            childTree = curTree.getChild(char);
            if (childTree)//在樹中遍歷
            {
                if (childTree.isEnd) {
                    curEndWordTree = childTree;
                    endIndex = c;
                }
                if (headIndex == -1) {
                    headIndex = c;
                }
                curTree = childTree;
                c++;
            }
            else//跳出樹的遍歷
            {
                if (curEndWordTree)//如果之前有遍歷到詞尾,則替換該詞尾所在的敏感詞,然後設置循環索引爲該詞尾索引
                {
                    dirtyWord = curEndWordTree.getFullWord();
                    dirtyWords = dirtyWords.replace(dirtyWord, self.getReplaceWord(dirtyWord.length));
                    c = endIndex;
                    return true;
                }
                else if (curTree != self.treeRoot)//如果之前有遍歷到敏感詞非詞尾,匹配部分未完全匹配,則設置循環索引爲敏感詞詞首索引
                {
                    c = headIndex;
                    headIndex = -1;
                }
                curTree = self.treeRoot;
                curEndWordTree = null;
                c++;
            }
        }
                         
        //循環結束時,如果最後一個字符滿足敏感詞詞尾條件,此時滿足條件,但未執行替換,在這裏補加
        if (curEndWordTree) {
            dirtyWord = curEndWordTree.getFullWord();
            dirtyWords = dirtyWords.replace(dirtyWord, self.getReplaceWord(dirtyWord.length));
            return true;
        }
        return false;
    }
}


我就是搬運了一下改成egret能用的而已。。。
主要想法和代碼都是別人噠。。duang。。


demo

var array: Array<string> = ["敏感","詞","和諧"];
        SensitiveWordFilter.GetInstance().regSensitiveWords(array);
        var str: string = "這些都是被和諧的敏感詞啊哈哈哈";
        console.log(str);
        str = SensitiveWordFilter.GetInstance().replaceSensitiveWord(str);
        console.log(str);

以下是輸出
before:這些都是被和諧的敏感詞啊哈哈哈
after:這些都是被**的***啊哈哈哈

當然敏感詞庫應該是相當長的數組.我這只是測試一下而已.0.0
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章