一,在springboot项目的resources目录里创建sensitive-words.text(敏感词文本)
每个词独自一行
列如:
赌博
吸毒
开票
二,在util创建工具类SensitiveFilter
package com.nowcoder.community.util;@Component
public class SensitiveFilter{private static final Logger logger = LoggerFactory.getLogger(SensitiveFilter.class);// 替换符private static final String REPLACMENT = "***";// 根节点private TrieNode rootNode = new TrieNode();@PostConstructpublic void init(){try(InputStream is = this.getClass().getClassLoader().getResourceAsStream("sebsitive-words.txt");BufferedReader reader = new BufferedReader(new InputStreamReader(is));){String keyword;while((keyword = reader.readLine()) != null){// 添加到前缀树this.addkeyword(keyword);}} catch(IOException e){logger.error("加载敏感词文件失败:"+ e.getMessage())}}// 将一个敏感词添加到前缀树里private void addKeyword(String keyword) { TrieNode tempNode = rootNode;for(int i = 0; i < keyword.length(); i++ ){char c = keyword.charAT(i);TrieNode subNode = tempNode.getSubNode(c);if(subNode == null){// 初始化子节点subNode = new TrieNode();tempNode.addSubNode(c,subNode);}// 指向子节点,进入下一轮循环tempNode = subNode;// 设置结束标识if(i == keyword.length() - 1){tempNode.setKeywordEnd(true);}}}/*** 过滤敏感词** @param text 待过滤文本* @return 过滤后的文本*/public String filter(String text){if(StringUtils.isBlank(text)){return null;}// 指针1TrieNode tempNode = rootNode;// 指针2int begin = 0;// 指针3int position = 0;// 结果StringBuilder sb = new StringBuilder();while (position < text.length()){char c = text.charAt(position);// 跳过符号if(isSymbol(c)){// 若指针1处于根节点,将此符号计入结果,让指针2向下走一步if(tempNode == rootNode){sb.appenf(c);begin++;}// 无论符号在开头或中间,指针3都向下走一步position++;continue;}// 检查下级节点tempNode = tempNode.getSubNode(c);if(tempNode == null){// 以begin开头的字符串不是敏感词sb.apped(text.charAt(begin));// 进入下一个位置position = ++begin;// 重新指向根节点tempNode = rootNode;} else if(tempNode.isKeywordEnd()){// 发现敏感词,将begin~position字符串替换掉sb.append(REPLACEMENT);// 进入下一个位置begin = ++position;}else {// 检查下一个字符position++;}}// 将最后一批字符计入结果sb.append(text.substring(begin));return sb.toString();}// 判断是否为符号private boolean isSymbol(Character c){// 0x2E80 ~ 0x9FFF 是东亚文字范围return !Charutils.isAsciiAlphanumeric(c) && (c < 0x2E80 || c > 0x9FFF);}// 前缀树private class TrieNode{// 关键词结束标识privte boolean isKeywordEnd = false;// 子节点(key是下级字符,value是下级节点)private Map<Character,TrieNode> subNodes = new HashMap<>();// get和set方法public boolean isKeywordEnd(){return isKeywordEnd;}public void setKeywordEnd(boolean keywordEnd){isKeywordEnd = keywordEnd;}// 添加子节点public void addSubNode(Character c,TrieNode node){subNodes.put(c,node);}// 获取子节点public TrieNode getSubNode(Character c){return subNodes.get(c);}}
}
三,创建Sensitive,进行调用
public class Sensitive {@Autowiredprivate SensitiveFilter sensitiveFilter;@Overridepublic void SensitiverFilter(String name){filter = sensitiveFilter.filter(name);System.out.println(filter)}
}