全局敏感詞處理
基于ToolGood.Words類庫豺谈,配合敏感字的文本文件荐吵,寫的API接口棠隐。
一共二種方式
1.ToolGood.Words
類庫配合敏感庫
簡單用法
[Fact]
public void IssuesTest_17()
{
var illegalWordsSearch = new IllegalWordsSearch();
string s = "中國|zg人|abc";
illegalWordsSearch.SetKeywords(s.Split('|'));
var str = illegalWordsSearch.Replace("我是中美國人厲害中國完美abcddb好的", '*');
Assert.Equal("我是中美國人厲害**完美***ddb好的", str);
}
配合敏感庫文本文件衩茸,寫的工具類
二個文件放到wwwroot/_Illegal目錄下暂幼,通過ReplaceStopWords方法調(diào)用即可對
- https://github.com/toolgood/ToolGood.Words/blob/master/csharp/ToolGood.Words.Test/_Illegal/IllegalKeywords.txt
- https://github.com/toolgood/ToolGood.Words/blob/master/csharp/ToolGood.Words.Test/_Illegal/IllegalUrls.txt
public class ToolGoodUtils
{
//敏感庫只要這二個文件存在即可
//本地敏感庫緩存-https://github.com/toolgood/ToolGood.Words/tree/master/csharp/ToolGood.Words.Test/_Illegal
//因為需要上傳至github并同步gitee,安全起見筹煮,所以未上傳至git遮精,需要自行下載并復(fù)制
private const string KeywordsPath = "wwwroot/_Illegal/IllegalKeywords.txt";
private const string UrlsPath = "wwwroot/_Illegal/IllegalUrls.txt";
private const string InfoPath = "wwwroot/_Illegal/IllegalInfo.txt";
private const string BitPath = "wwwroot/_Illegal/IllegalBit.iws";
private static IllegalWordsSearch _search;
/// <summary>
/// 本地敏感庫,文件修改后,重新創(chuàng)建緩存Bit
/// </summary>
/// <returns></returns>
public static IllegalWordsSearch GetIllegalWordsSearch()
{
if (_search == null)
{
string ipath = Path.GetFullPath(InfoPath);
if (File.Exists(ipath) == false)
{
_search = CreateIllegalWordsSearch();
}
else
{
var texts = File.ReadAllText(ipath).Split('|');
if (new FileInfo(Path.GetFullPath(KeywordsPath)).LastWriteTime.ToString("yyyy-MM-dd HH:mm:ss") !=
texts[0] ||
new FileInfo(Path.GetFullPath(UrlsPath)).LastWriteTime.ToString("yyyy-MM-dd HH:mm:ss") !=
texts[1]
)
{
_search = CreateIllegalWordsSearch();
}
else
{
var s = new IllegalWordsSearch();
s.Load(Path.GetFullPath(BitPath));
_search = s;
}
}
}
return _search;
}
private static IllegalWordsSearch CreateIllegalWordsSearch()
{
string[] words1 = File.ReadAllLines(Path.GetFullPath(KeywordsPath), Encoding.UTF8);
string[] words2 = File.ReadAllLines(Path.GetFullPath(UrlsPath), Encoding.UTF8);
var words = new List<string>();
foreach (var item in words1)
{
words.Add(item.Trim());
}
foreach (var item in words2)
{
words.Add(item.Trim());
}
var search = new IllegalWordsSearch();
search.SetKeywords(words);
search.Save(Path.GetFullPath(BitPath));
var text = new FileInfo(Path.GetFullPath(KeywordsPath)).LastWriteTime.ToString("yyyy-MM-dd HH:mm:ss") + "|"+ new FileInfo(Path.GetFullPath(UrlsPath)).LastWriteTime.ToString("yyyy-MM-dd HH:mm:ss");
File.WriteAllText(Path.GetFullPath(InfoPath), text);
return search;
}
}
2. 循環(huán)使用Replace
方案:通過維護敏感庫,循環(huán)replace
大佬分享給我的本冲,稍微改成了從文件中獲取敏感字准脂。
public static class StopWords
{
static readonly ConcurrentDictionary<string, bool> FunNlpDataSensitive = new ConcurrentDictionary<string, bool>();
static readonly ConcurrentDictionary<int, string> ReplaceNewValue = new ConcurrentDictionary<int, string>();
private const string KeywordsPath = "wwwroot/_Illegal/IllegalKeywords.txt";
private const string UrlsPath = "wwwroot/_Illegal/IllegalUrls.txt";
static StopWords()
{
LoadDataFromFile();
}
public static void LoadDataFromFile()
{
string words1 = File.ReadAllText(Path.GetFullPath(KeywordsPath), Encoding.UTF8);
string words2 = File.ReadAllText(Path.GetFullPath(UrlsPath), Encoding.UTF8);
LoadDataFromText(words1);
LoadDataFromText(words2);
}
public static void LoadDataFromText(string text)
{
int oldcount = FunNlpDataSensitive.Count;
foreach (string wd in text.Split('\n'))
{
string keykey = wd.Trim().Trim('\r', '\n').Trim();
if (string.IsNullOrEmpty(keykey)) continue;
FunNlpDataSensitive.TryAdd(keykey, true);
if (ReplaceNewValue.ContainsKey(keykey.Length) == false)
ReplaceNewValue.TryAdd(keykey.Length, "".PadRight(keykey.Length, '*'));
}
Console.WriteLine($"敏感詞加載完畢,增加數(shù)量:{FunNlpDataSensitive.Count - oldcount}");
}
/// <summary>
/// 替換所有敏感詞為 *
/// </summary>
/// <param name="that"></param>
/// <returns></returns>
public static string ReplaceStopWords(this string that)
{
foreach (var wd in FunNlpDataSensitive.Keys)
that = that.Replace(wd, ReplaceNewValue.TryGetValue(wd.Length, out var tryval) ? tryval : "".PadRight(wd.Length, '*'));
return that;
}
}
FreeSql全局處理敏感詞
使用FreeSql這個ORM時檬洞,全局處理string類型的值狸膏,進行敏感詞處理。代碼在StartUp.cs的構(gòu)造函數(shù)中添怔。
//敏感詞處理
IllegalWordsSearch illegalWords = ToolGoodUtils.GetIllegalWordsSearch();
Fsql.Aop.AuditValue += (s, e) =>
{
if (e.Column.CsType == typeof(string) && e.Value != null)
{
string oldVal = (string)e.Value;
string newVal = illegalWords.Replace(oldVal);
//第二種處理敏感詞的方式
//string newVal = oldVal.ReplaceStopWords();
if (newVal != oldVal)
{
e.Value = newVal;
}
}
};