過濾腳本 wordfilter.lua
-- 敏感詞過濾腳本
local wordfilter = {
_tree = {}
}
local strSub = string.sub
local strLen = string.len
local _maskWord = '*'
function _word2Tree(root, word)
if strLen(word) == 0 or type(word) ~= 'string' then
return
end
local function _byte2Tree(r, ch, tail)
if tail then
if type(r[ch]) == 'table' then
r[ch].isTail = true
else
r[ch] = true
end
else
if r[ch] == true then
r[ch] = { isTail = true }
else
r[ch] = r[ch] or {}
end
end
return r[ch]
end
local tmpparent = root
local len = strLen(word)
for i = 1, len do
tmpparent = _byte2Tree(tmpparent, strSub(word, i, i), i == len)
end
end
function _check(parent, word, idx)
local len = strLen(word)
local ch = strSub(word, 1, 1)
local child = parent[ch]
if not child then
elseif type(child) == 'table' then
if len > 1 then
if child.isTail then
return _check(child, strSub(word, 2), idx + 1) or idx
else
return _check(child, strSub(word, 2), idx + 1)
end
elseif len == 1 then
if child.isTail == true then
return idx
end
end
elseif (child == true) then
return idx
end
return false
end
-- 添加敏感詞
function wordfilter:addWord(word)
_word2Tree(self._tree, word)
end
-- 添加敏感詞(配置表)
function wordfilter:addWords(words)
if type(words) == 'table' then
for _, word in pairs(words) do
_word2Tree(self._tree, word)
end
end
end
-- 過濾敏感詞
function wordfilter:maskString(s)
if type(s) ~= 'string' then return end
local i = 1
local len = strLen(s)
local word, idx, tmps
while i <= len do
word = strSub(s, i)
idx = _check(self._tree, word, i)
if idx then
tmps = strSub(s, 1, i - 1)
for j = 1, idx - i + 1 do
tmps = tmps .. _maskWord
end
s = tmps .. strSub(s, idx + 1)
i = idx + 1
else
i = i + 1
end
end
return s
end
-- 初始化
function _Init()
-- wordfilter = require 'Lua/LuaCommon/wordfilter'
-- 敏感詞的表
local words = require("SensitiveCfg")
wordfilter:addWords(words);
end
_Init();
return wordfilter
敏感詞配置表:SensitiveCfg.lua
這裏只配置如下了幾個,其他需要自己添加。
local SensitiveCfg =
{
"外 掛",
"外掛",
"外/掛",
"外\\掛",
"外_掛",
"外掛",
"外-掛",
"外—掛",
}
return SensitiveCfg
測試腳本:test.lua
local filter = require("wordfilter")
-- 添加敏感詞
--filter:addWord("xxx")
--filter:addWord("tmd")
--local dirtywords =
--{
-- "abc",
-- "abcde",
--}
-- 添加敏感詞
--filter:addWords(dirtywords)
local s = "abcyyabcdefghixxxzzzxxxyyytmd,外掛!"
local ret = ''
local loop = 10000
local t1 = os.clock()
for i=1,loop do
ret = filter:maskString(s)
end
local t2 = os.clock()
print(string.format("%d times call maskString(\"%s\" -> \"%s\"), time used: (%f)\n", loop, s, ret, t2-t1))
說明
這是看到一個 別人寫的lua版本敏感詞過濾,覺得挺不錯,親測可用。
轉載地址:https://blog.csdn.net/fanzhang1990/article/details/62227958