java DFA算法模型

标签：index java int text length 算法 DFA final match
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;

/**
 * 敏感词过滤器
 */
public class Sensitive {

    /** 敏感词匹配规则 */
    public final static int MINIMUM = 1; // 最小匹配规则
    public final static int MAXIMUM = 2; // 最大匹配规则
    private final Map<String, Object> sensitive; // 敏感词库

    public Sensitive(Set<String> keywords) {
        // 初始化敏感词容器,减少扩容操作
        this.sensitive = new HashMap<>(keywords.size());
        // 将敏感词加入到HashMap中,构建DFA算法模型
        this.initialize(keywords);
    }

    /** 初始化敏感词库 */
    @SuppressWarnings("unchecked")
    private void initialize(final Set<String> keywords) {
        for (String keyword : keywords) {
            Map nowMap = this.sensitive;
            for (int i = 0; i < keyword.length(); i++) {
                char keyChar = keyword.charAt(i); // 转换成char型
                Object wordMap = nowMap.get(keyChar);
                if (wordMap != null) { // 存在该key则直接赋值
                    nowMap = (Map) wordMap;
                } else { // 不存在则则构建一个map,同时将isEnd设置为0,因为他不是最后一个
                    Map<String, String> newWordMap = new HashMap<>((1));
                    newWordMap.put("isEnd", "0"); // 不是最后一个
                    nowMap.put(keyChar, newWordMap);
                    nowMap = newWordMap;
                }
                if (i == keyword.length() - 1) {
                    nowMap.put("isEnd", "1"); // 最后一个
                }
            }
        }
    }

    /**
     * 检查文字中是否包含敏感字符
     * @param text 待检测的文本
     * @param starting  开始位置
     * @param match 匹配规则(1最小匹配规则;2最大匹配规则)
     * @return 存在则返回敏感词字符的长度,不存在返回0
     */
    private int checking(final String text, final int starting , final int match) {
        boolean ending = false; // 敏感词结束标识位(用于敏感词只有1位的情况)
        int matching = 0; // 匹配标识数默认为0
        Map nowMap = this.sensitive;
        for (int index = starting ; index < text.length(); index++) {
            char word = text.charAt(index);
            nowMap = (Map) nowMap.get(word); // 获取指定key
            if (nowMap == null) break; // 不存在则直接返回
            matching++; // 找到相应key,匹配标识+1
            // 存在则判断是否为最后一个,是则结束循环,返回匹配标识数
            if ("1".equals(nowMap.get("isEnd"))) {
                ending = true; // 结束标志位为true
                if (Sensitive.MINIMUM == match) {
                    break; // 最小规则,直接返回,最大规则还需继续查找
                }
            }
        }
        if (matching < 1 || !ending) { // 长度必须大于等于1
            matching = 0;
        }
        return matching;
    }

    /**
     * 获取文字中的敏感词
     * @param text 待查找的字符串
     * @param match 匹配规则(1最小匹配规则;2最大匹配规则)
     */
    private Set<String> searching(final String text, final int match) {
        final Set<String> sensitive = new HashSet<>(text.length());
        for (int index = 0, length = text.length(); index < length ; index++) {
            final int matching = this.checking(text, index, match); // 敏感字符检查
            if (matching > 0) { // 存在则加入集合中
                sensitive.add(text.substring(index, index + matching));
                index = index + matching - 1; // 减1的原因是因为for会自增
            }
        }
        return sensitive;
    }

    /**
     * 判断文字是否包含敏感字符
     * @param text 待检测的文本
     * @param match  匹配规则(1最小匹配规则;2最大匹配规则)
     */
    public final boolean contains(final String text, final int match) {
        for (int index = 0, length = text.length(); index < length; index++) {
            final int matching = this.checking(text, index, match);
            if (matching > 0) {
                return true; // 包含
            }
        }
        return false; // 不包含
    }

    /**
     * 使用“*”替换敏感字字符
     * @param text 待替换的字符
     * @param match 匹配规则(1最小匹配规则;2最大匹配规则)
     */
    public final String replace(String text, int match) {
        final String replace = "*";
        return replace(text, match, replace);
    }

    /**
     * 替换敏感字字符
     * @param text 待替换的字符
     * @param match 匹配规则(1最小匹配规则;2最大匹配规则)
     * @param replace 替换字符,默认*
     */
    public final String replace(final String text, final int match, final String replace) {
        String result = text;
        // 获取所有的敏感词
        final Set<String> sensitive = this.searching(text, match);
        // 替换同等长度的字符
        for (String element : sensitive) {
            final int length = element.length();
            final String newing = this.newing(replace, length);
            result = result.replaceAll(element, newing);
        }
        return result;
    }

    /**
     * 获取替换字符串
     * @param replace 替换字符
     * @param length 替换长度
     */
    private String newing(final String replace, final int length) {
        final StringBuilder builder = new StringBuilder();
        for (int index = 0; index < length; index++) {
            builder.append(replace);
        }
        return builder.toString();
    }

}
java 得dfa敏感词过滤算法！供大家参考！
标签：index,java,int,text,length,算法,DFA,final,match
From： https://www.cnblogs.com/ruber/p/16802857.html
相关文章

赞助商

阅读排行