C#過濾敏感詞DFA算法

今天遊戲正好用到需要過濾敏感詞將出現的敏感詞替換成*,在網上找了許久找了一片可用的java版本的DFA算法,最後費了一番功夫將其思路用C#實現,裏面的註釋甚至都沒改動的,這裏直接上代碼,這裏不借助任何第三方工具,複製粘貼就是用

當然想看原博客的點擊這裏


using UnityEngine;
using System.Collections.Generic;
public class Test : MonoBehaviour
{
    public TextAsset txt;
    private void Start()
    {
        
    }
    private void Update()
    {
        if (Input.GetKeyDown("x")) {
            string str = "後來的啊微信哈哈嘀嘀嘀微信qqwx呼呼呵呵";
            HashSet<string> set = ReadTxtByLine(txt);
            Dictionary<object, object> map = AddBadWordToHashMap(set);
            string set1 = GetBadWord(str, 2, map);
            print(set1);
        }
    }
    private HashSet<string> ReadTxtByLine(TextAsset file) {
        //使用換行和回車符分割敏感詞
        string[] str = file.text.Split('\n','\r');
        HashSet<string> setTemp = new HashSet<string>();
        if (str != null) {
            foreach (string s in str)
            {
                setTemp.Add(s);
            }
        }
        return setTemp;
    }
    private Dictionary<object, object> AddBadWordToHashMap(HashSet<string> keyWordSet)
    {
        //初始化敏感詞容器,減少擴容操作
        Dictionary<object, object> wordMap = new Dictionary<object, object>(keyWordSet.Count);
        string key = null;
        Dictionary<object,object> nowMap = null;
        Dictionary<object, object> newWorMap = null;
        //迭代keyWordSet
        foreach (var v in keyWordSet) {
            key = v;
            nowMap = wordMap;
            char[] keyChars = key.ToCharArray();
            for (int i = 0; i < keyChars.Length; i++) {
                //轉換成char型
                char keyChar = keyChars[i];
                //如果存在該key,直接賦值
                if (nowMap.ContainsKey(keyChar.ToString())) {
                    nowMap = wordMap;
                }else{
                    //不存在則,則構建一個map,同時將isEnd設置爲0,因爲他不是最後一個
                    newWorMap = new Dictionary<object, object>();
                    //不是最後一個
                    newWorMap.Add("isEnd", "0");
                    nowMap.Add(keyChar.ToString(), newWorMap);
                    nowMap = newWorMap;
                }
                if (i == keyChars.Length - 1){
                    //最後一個
                    if (nowMap.ContainsKey("isEnd")) {
                        nowMap.Remove("isEnd");
                    }
                    nowMap.Add("isEnd", "1");
                }
            }
        }
        return wordMap;
    }
    public string GetBadWord(string txt, int matchType, Dictionary<object, object> wordMap)
    {
        char[] c = txt.ToCharArray();
        for (int i = 0; i < txt.Length; i++)
        {
            //判斷是否包含敏感字符
            int length = CheckBadWord(txt, i, matchType, wordMap);
            if (length > 0)
            {
                string oldStr = txt.Substring(i, length);
                //替換敏感詞
                for (int j = 0; j < length; j++) {
                    c[j + i] = '*';
                }
                //減1的原因,是因爲for會自增
                i = i + length - 1;
            }
        }
        return new string(c);
    }
    private int CheckBadWord(string txt, int beginIndex, int matchType, Dictionary<object, object> wordMap)
    {
        //敏感詞結束標識位:用於敏感詞只有1位的情況
        bool flag = false;
        //匹配標識數默認爲0
        int matchFlag = 0;
        char word = '0';
        object nowMap = wordMap;
        char[] words = txt.ToCharArray();
        for (int i = beginIndex; i < words.Length; i++)
        {
            word = words[i];
            //獲取指定key
            if (((Dictionary<object, object>)nowMap).ContainsKey(word.ToString())) {
                //存在,則判斷是否爲最後一個
                //找到相應key,匹配標識+1
                nowMap = ((Dictionary<object, object>)nowMap)[word.ToString()];
                matchFlag++;
                if (((Dictionary<object, object>)nowMap).ContainsKey("isEnd")) {
                    string temp = (string)((Dictionary<object, object>)nowMap)["isEnd"];
                    if ("1".Equals(temp))
                    {
                        //如果爲最後一個匹配規則,結束循環,返回匹配標識數
                        //結束標誌位爲true
                        flag = true;
                        if (1 == matchType)
                        {
                            //最小規則,直接返回,最大規則還需繼續查找
                            break;
                        }
                    }
                }
            }
            else
            {     //不存在,直接返回
                break;
            }
        }
      
        if (!flag)
        {
            matchFlag = 0;
        }
        return matchFlag;
    }
 
}

下面是是敏感詞庫,其實就是一個.txt文件大家自己創建就行我這內容也不多上個截圖就好

好了今天就這麼多

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章