老赵
发布于 2025-09-26 / 2 阅读
0
0

使用表达式进行字符匹配

因业务要求,需要一种匹配字符的表达式,支持嵌套括号、否定、或、与、连接、通配等几种运算符。

设计思路

ElementToken:表示一个元素,元素分为表达式或运算符。

OperatorType:表示一个运算符,由符号与优先级表示。

Expression:表示一个表达式,表达式支持匹配操作和转换为文本的能力。表达式分为两类,一类是带运算符的,另一类是文本表达式。

TextExpression:表示一个文本表达式,支持文本匹配,分析表达式中的变量。

OperatorExpression:表示一个运算符表达式,定义了运算符、运算符需要的表达式数量。

NegationOperatorExpression:否定表达式,对表达式的匹配结果取非。

OrOperatorExpression:或运算符,对运算符两边的表达式取或

AndOperatorExpression:与运算符,对运算符两边的表达式取且

WildcardOperatorExpression:通配符表达式,要求左边的表达式匹配的位置要小于右边表达式匹配的位置。

ConcatenationOperatorExpression:连接表达式,要求左边的表达式匹配的位置紧挨着右边表达式匹配的位置。

MatchResult:匹配结果,匹配过程中,有可能会产生多组匹配结果。

MatchResultGroup:描述每一组的匹配结果,包括是否匹配成功,匹配位置信息等。

ParserException:解析表达式时抛出的异常。

KeywordParser:解析器,将一个字符串解析为表达式树,并且调用表达式树可直接执行匹配动作

代码解析

ElementToken


public abstract class ElementToken {

}

OperatorType


// 优先级:'[]' > '*'' > '!' > '|' = '&'
class OperatorType extends ElementToken {
    // 运算符
    private char operator;
    // 优先级
    private int proitory;

    private OperatorType(char operator, int proitory) {
        this.operator = operator;
        this.proitory = proitory;
    }

    public int getProitory() {
        return proitory;
    }

    public char getOperator() {
        return operator;
    }


    public static OperatorType or = new OperatorType('|', 5);
    public static OperatorType and = new OperatorType('&', 5);
    public static OperatorType negation = new OperatorType('!', 8);
    public static OperatorType wildcard = new OperatorType('*', 7);
    public static OperatorType concatenation = new OperatorType('+', 7);
    public static OperatorType left = new OperatorType('[', 9);
    public static OperatorType right = new OperatorType(']', 9);


    public static OperatorType getInstance(char operator) throws ParserException {
        return switch (operator) {
            case '!' -> negation;
            case '|' -> or;
            case '&' -> and;
            case '*' -> wildcard;
            case ']' -> right;
            case '[' -> left;
            case '+' -> concatenation;
            default -> throw new ParserException("未知的运算符 " + operator);
        };
    }

    public static boolean isOperator(char c) {
        return c == '!' || c == '|' || c == '&' || c == '*' || c == ']' || c == '[' || c == '+';
    }
}

Expression


public abstract class Expression extends ElementToken {

    public abstract MatchResult match(String message, Map<String, Object> variables);

    public abstract String toText();
}

TextExpression

class TextExpression extends Expression {
    @Getter
    private final String text;
    private final List<String> variableNames;

    public TextExpression(String text) {
        this.text = text;
        this.variableNames = extractVariableNames(text);
    }

    private List<String> extractVariableNames(String text) {
        List<String> variableNames = new ArrayList<>();
        if (text == null || text.isEmpty()) {
            return variableNames;
        }

        int length = text.length();
        int pos = 0;

        while (pos < length) {
            // 查找开头的 '{'
            int start = text.indexOf('{', pos);
            if (start == -1) break; // 没有更多变量了

            // 查找对应的 '}'
            int end = text.indexOf('}', start + 1);
            if (end == -1) break; // 没有闭合的 '}'

            // 提取变量名(去掉花括号)
            String varName = text.substring(start + 1, end);
            variableNames.add(varName);

            // 更新位置,继续查找
            pos = end + 1;
        }

        return variableNames;
    }


    @Override
    public MatchResult match(String message, Map<String, Object> variables) {
        String textValue = this.text;
        // 有表达式变量,则先替换变量的值
        for (String variableName : variableNames) {
            if (variables.containsKey(variableName)) {
                textValue = textValue.replace("{" + variableName + "}", variables.get(variableName).toString());
            }

        }

        MatchResult result = new MatchResult();
        int startIndex = 0;

        while (true) {
            int index = message.indexOf(textValue, startIndex);
            if (index == -1) {
                if (result.getMatchGroups().isEmpty()) {
                    result.addMatchGroup(new MatchResultGroup(false, 0, 0));
                }
                break;
            }
            result.addMatchGroup(new MatchResultGroup(true, index, index + textValue.length()));
            startIndex = index + textValue.length(); // 继续查找下一个匹配
        }

        return result;
    }

    @Override
    public String toText() {
        return text;
    }

}

OperatorExpression

abstract class OperatorExpression extends Expression {

    private final OperatorType operatorType;

    public abstract int getExpressionCount();

    public OperatorExpression(OperatorType operatorType) {
        this.operatorType = operatorType;
    }

    public OperatorType getOperatorType() {
        return operatorType;
    }
}

class NegationOperatorExpression extends OperatorExpression {

    private final Expression expression;

    public NegationOperatorExpression(Expression expression) {
        super(OperatorType.negation);
        this.expression = expression;
    }

    @Override
    public MatchResult match(String message, Map<String, Object> variables) {
        MatchResult result = new MatchResult();
        var expressionResult = expression.match(message, variables);

        // 如果原表达式匹配,非运算不匹配(返回空列表)
        for (MatchResultGroup matchGroup : expressionResult.getMatchGroups()) {
            result.addMatchGroup(new MatchResultGroup(!matchGroup.result(), matchGroup.startIndex(), matchGroup.endIndex()));
        }

        return result;
    }

    @Override
    public String toText() {
        return "[" + expression.toText() + "]";
    }

    @Override
    public int getExpressionCount() {
        return 1;
    }
}

NegationOperatorExpression

class NegationOperatorExpression extends OperatorExpression {

    private final Expression expression;

    public NegationOperatorExpression(Expression expression) {
        super(OperatorType.negation);
        this.expression = expression;
    }

    @Override
    public MatchResult match(String message, Map<String, Object> variables) {
        MatchResult result = new MatchResult();
        var expressionResult = expression.match(message, variables);

        // 如果原表达式匹配,非运算不匹配(返回空列表)
        for (MatchResultGroup matchGroup : expressionResult.getMatchGroups()) {
            result.addMatchGroup(new MatchResultGroup(!matchGroup.result(), matchGroup.startIndex(), matchGroup.endIndex()));
        }

        return result;
    }

    @Override
    public String toText() {
        return "(" + expression.toText() + ")";
    }

    @Override
    public int getExpressionCount() {
        return 1;
    }
}

OrOperatorExpression

class OrOperatorExpression extends OperatorExpression {
    private final Expression left;
    private final Expression right;

    public OrOperatorExpression(Expression left, Expression right) {
        super(OperatorType.or);
        this.left = left;
        this.right = right;
    }

    @Override
    public MatchResult match(String message, Map<String, Object> variables) {
        MatchResult result = new MatchResult();
        var leftResults = left.match(message, variables);
        var rightResults = right.match(message, variables);

        for (MatchResultGroup leftResult : leftResults.getMatchGroups()) {
            for (MatchResultGroup rightResult : rightResults.getMatchGroups()) {
                result.addMatchGroup(new MatchResultGroup(leftResult.result() || rightResult.result(),
                        Math.min(leftResult.startIndex(), rightResult.startIndex()),
                        Math.max(leftResult.endIndex(), rightResult.endIndex())));
            }
        }

        return result;
    }

    @Override
    public String toText() {
        return "[" + left.toText() + "|" + right.toText() + "]";
    }

    @Override
    public int getExpressionCount() {
        return 2;
    }
}

AndOperatorExpression

class AndOperatorExpression extends OperatorExpression {
    private final Expression left;
    private final Expression right;

    public AndOperatorExpression(Expression left, Expression right) {
        super(OperatorType.and);
        this.left = left;
        this.right = right;
    }

    @Override
    public MatchResult match(String message, Map<String, Object> variables) {
        MatchResult result = new MatchResult();
        var leftResults = left.match(message, variables);
        var rightResults = right.match(message, variables);

        for (MatchResultGroup leftResult : leftResults.getMatchGroups()) {
            for (MatchResultGroup rightResult : rightResults.getMatchGroups()) {
                result.addMatchGroup(new MatchResultGroup(leftResult.result() && rightResult.result(),
                        Math.min(leftResult.startIndex(), rightResult.startIndex()),
                        Math.max(leftResult.endIndex(), rightResult.endIndex())));
            }
        }

        return result;
    }

    @Override
    public String toText() {
        return "[" + left.toText() + "&" + right.toText() + "]";
    }

    @Override
    public int getExpressionCount() {
        return 2;
    }
}

WildcardOperatorExpression

class WildcardOperatorExpression extends OperatorExpression {
    private final Expression left;
    private final Expression right;

    public WildcardOperatorExpression(Expression left, Expression right) {
        super(OperatorType.wildcard);
        this.left = left;
        this.right = right;
    }

    @Override
    public MatchResult match(String message, Map<String, Object> variables) {
        MatchResult result = new MatchResult();
        var leftResults = left.match(message, variables);


        for (MatchResultGroup leftResult : leftResults.getMatchGroups()) {
            String subMessage = message.substring(leftResult.endIndex());
            var rightResults = right.match(subMessage, variables);
            for (MatchResultGroup rightResult : rightResults.getMatchGroups()) {
                result.addMatchGroup(new MatchResultGroup(leftResult.result() && rightResult.result(), leftResult.startIndex(), leftResult.endIndex() + rightResult.endIndex()));
            }
        }
        return result;
    }

    @Override
    public String toText() {
        return "[" + left.toText() + "*" + right.toText() + "]";
    }

    @Override
    public int getExpressionCount() {
        return 2;
    }
}

ConcatenationOperatorExpression

class ConcatenationOperatorExpression extends OperatorExpression {
    private final Expression left;
    private final Expression right;

    public ConcatenationOperatorExpression(Expression left, Expression right) {
        super(OperatorType.concatenation);
        this.left = left;
        this.right = right;
    }

    @Override
    public MatchResult match(String message, Map<String, Object> variables) {
        MatchResult result = new MatchResult();
        var leftResults = left.match(message, variables);

        for (MatchResultGroup leftResult : leftResults.getMatchGroups()) {
            String subMessage = message.substring(leftResult.endIndex());
            var rightResults = right.match(subMessage, variables);
            for (MatchResultGroup rightResult : rightResults.getMatchGroups()) {
                result.addMatchGroup(new MatchResultGroup(leftResult.result() && rightResult.result() && rightResult.startIndex() == 0, leftResult.startIndex(), leftResult.endIndex() + rightResult.endIndex()));
            }
        }
        return result;
    }

    @Override
    public String toText() {
        return "[" + left.toText() + "+" + right.toText() + "]";
    }

    @Override
    public int getExpressionCount() {
        return 2;
    }
}

MatchResult

public class MatchResult {
    private final List<MatchResultGroup> matchGroups;

    public MatchResult() {
        this.matchGroups = new ArrayList<>();
    }

    public MatchResult(List<MatchResultGroup> matchGroups) {
        this.matchGroups = new ArrayList<>(matchGroups);
    }

    public void addMatchGroup(MatchResultGroup group) {
        matchGroups.add(group);
    }

    public void addMatchGroups(List<MatchResultGroup> groups) {
        matchGroups.addAll(groups);
    }

    public List<MatchResultGroup> getMatchGroups() {
        return new ArrayList<>(matchGroups);
    }

    public boolean hasMatches() {
        return matchGroups.stream().anyMatch(MatchResultGroup::result);
    }

    public int getMatchCount() {
        return matchGroups.size();
    }

    public MatchResultGroup getFirstMatch() {
        return matchGroups.isEmpty() ? null : matchGroups.get(0);
    }

    public MatchResultGroup getLastMatch() {
        return matchGroups.isEmpty() ? null : matchGroups.get(matchGroups.size() - 1);
    }
}

MatchResultGroup

public record MatchResultGroup(
        boolean result,
        int startIndex,
        int endIndex) {
}

ParserException

public class ParserException extends Exception{
    public ParserException(String message){
        super(message);
    }
}

KeywordParser

分析表达式最复杂的过程但是解析表达式字符串的过程,这里使用了中缀表达式转后缀表达式的算法。

算法大体思路:

  1. 创建一个运算符栈、一个后缀表达式列表。

  2. 循环整个表达式字符串

    1. 当遇到的字符不是运算符时,将字符内容写入临时空间textBuf

    2. 当遇到的是运算符时。

      1. 将临时空间textBuf的内容写入后缀表达式列表。

      2. 如果当前运算符是右括号,弹出运算符栈的运算符,直到遇到左括号时停止。将非括号的运算符添加到后缀表达式列表中

      3. 若运算符栈为空或栈顶为左括号或当前为左括号,则直接入栈

      4. 当前不是右括号,弹出运算符栈顶所有优先级>=当前运算符的运算符,将弹出的运算符添加到后缀表达式,当前运算符再入运算符栈

  3. 循环结束,将文本缓冲区的文本添加到输出列表

  4. 将运算符栈中的所有运算符弹出并压入输出列表

然后使用后缀表达式转表达式树

算法大体思路:

  1. 创建一个表达式栈

  2. 循环后缀表达式列表

    1. 如果是文本表达式,直接入表达式栈

    2. 如果是运算符:

      1. 根据运算符需要的表达式数从表达式栈中出栈

      2. 根据运算符创建对应的运算符表达式

      3. 将运算符表达式重新入栈到表达式栈

  3. 循环结束,如果表达式栈的数量大于1,则说明表达式字符串存在问题,抛出异常。

  4. 将最后的表达式栈里的表达式出栈,得到最终的表达式树。

public class KeywordParser {


    /**
     * 输入一个表达式字符串,将字符串解析成表达式树的方法
     * 运算符优先级:'[]' > '*'' > '!' > '|' = '&'
     */
    public static Expression parse(String expression) throws ParserException {
        if (expression == null || expression.trim().isEmpty()) {
            throw new ParserException("表达式不能为空");
        }

        // 第一步:中缀转后缀
        List<ElementToken> postfix = infixToPostfix(expression);

        // 第二步:后缀表达式构建表达式树
        return buildExpressionTree(postfix);
    }


    public static void main(String[] args) throws ParserException {
        String expression = "我*{姓名}";
        List<ElementToken> postfix = infixToPostfix(expression);
        Expression expressionTree = buildExpressionTree(postfix);

        MatchResult match = expressionTree.match("我是张涛今年10", Map.of("姓名", "张涛", "年龄", "1"));
        System.out.println(match.hasMatches());
    }

    private static List<ElementToken> infixToPostfix(String expression) throws ParserException {

        StringBuilder expressionBuilder = processDefaultOperator(expression);

        // 中缀表达式转后缀表达式
        List<ElementToken> output = new ArrayList<>();
        Stack<OperatorType> operatorStack = new Stack<>();
        StringBuilder textBuf = new StringBuilder();

        for (int i = 0; i < expressionBuilder.length(); i++) {
            char c = expressionBuilder.charAt(i);

            if (!OperatorType.isOperator(c)) {
                //不是运算符,将文本添加到文本缓冲区
                textBuf.append(c);
            } else {
                //是运算符
                OperatorType cu = OperatorType.getInstance(c);
                //1. 将文本缓冲区的文本添加到输出列表
                if (!textBuf.isEmpty()) {
                    output.add(new TextExpression(textBuf.toString()));
                    textBuf.setLength(0);
                }
                //3. 如果当前为右括号,弹出所有运算符直到遇到左括号
                if (cu == OperatorType.right) {
                    while (!operatorStack.isEmpty() && operatorStack.peek() != OperatorType.left) {
                        output.add(operatorStack.pop());
                    }
                    if (!operatorStack.isEmpty() && operatorStack.peek() == OperatorType.left) {
                        operatorStack.pop(); // 弹出左括号
                    }
                    continue;
                }

                //2. 若栈为空或栈顶为左括号或当前为左括号,则直接入栈
                if (operatorStack.isEmpty() || operatorStack.peek() == OperatorType.left || cu == OperatorType.left) {
                    operatorStack.push(cu);

                    continue;
                }


                //3. 当前不是右括号,弹出栈顶所有优先级>=当前运算符的运算符,再入栈
                while (!operatorStack.isEmpty() && operatorStack.peek().getProitory() >= cu.getProitory() && operatorStack.peek() != OperatorType.left) {
                    output.add(operatorStack.pop());
                }
                operatorStack.push(cu);
            }
        }
        //4. 将文本缓冲区的文本添加到输出列表
        if (!textBuf.isEmpty()) {
            output.add(new TextExpression(textBuf.toString()));
        }
        //5. 将运算符栈中的所有运算符弹出并压入输出列表
        while (!operatorStack.isEmpty()) {
            output.add(operatorStack.pop());
        }
        return output;
    }

    private static @NotNull StringBuilder processDefaultOperator(String expression) {
        StringBuilder expressionBuilder = new StringBuilder();

        // 处理默认运算符,如 a[b|c]=a*[b|c],a!b=a*!b,[a|b]c=[a|b]*c,[a|b][c|d]=[a|b]*[c|d]
        for (int i = 0; i < expression.length(); i++) {
            char c = expression.charAt(i);

            if (i > 0) {
                var lastToken = expression.charAt(i - 1);
                if (!OperatorType.isOperator(lastToken) && (c == OperatorType.negation.getOperator() || c == OperatorType.left.getOperator())) {
                    // 前一个字符不是运算符,且当前字符是!或[,则在前面添加*
                    expressionBuilder.append(OperatorType.wildcard.getOperator());
                }
                if (lastToken == OperatorType.right.getOperator() && (!OperatorType.isOperator(c) || c == OperatorType.negation.getOperator())) {
                    // ]后面跟着文本或!,添加*
                    expressionBuilder.append(OperatorType.wildcard.getOperator());
                }
                if (lastToken == OperatorType.right.getOperator() && c == OperatorType.left.getOperator()) {
                    // ]后面跟着[,添加*
                    expressionBuilder.append(OperatorType.wildcard.getOperator());
                }
            }
            expressionBuilder.append(c);
        }
        return expressionBuilder;
    }

    /**
     * 从后缀表达式构建表达式树
     */
    private static Expression buildExpressionTree(List<ElementToken> postfix) throws ParserException {

        Stack<Expression> expressionStack = new Stack<>();

        for (ElementToken token : postfix) {
            if (token instanceof Expression) {
                // 文本表达式直接入栈
                expressionStack.push((Expression) token);
            } else if (token instanceof OperatorType) {
                OperatorType operator = (OperatorType) token;

                switch (operator.getOperator()) {
                    case '!' -> {
                        // 非运算符:需要一个操作数
                        if (expressionStack.isEmpty()) {
                            throw new ParserException("非运算符:需要一个操作数");
                        }
                        Expression operand = expressionStack.pop();
                        expressionStack.push(new NegationOperatorExpression(operand));
                    }
                    case '|' -> {
                        // 或运算符:需要两个操作数
                        if (expressionStack.size() < 2) {
                            throw new ParserException("或运算符:需要两个操作数");
                        }
                        Expression right = expressionStack.pop();
                        Expression left = expressionStack.pop();
                        expressionStack.push(new OrOperatorExpression(left, right));
                    }
                    case '&' -> {
                        // 与运算符:需要两个操作数
                        if (expressionStack.size() < 2) {
                            throw new ParserException("与运算符:需要两个操作数");
                        }
                        Expression right = expressionStack.pop();
                        Expression left = expressionStack.pop();
                        expressionStack.push(new AndOperatorExpression(left, right));
                    }
                    case '*' -> {
                        // 通配符运算符:需要两个操作数
                        if (expressionStack.size() < 2) {
                            throw new ParserException("通配符:需要两个操作数");
                        }
                        Expression right = expressionStack.pop();
                        Expression left = expressionStack.pop();
                        expressionStack.push(new WildcardOperatorExpression(left, right));
                    }
                    case '+' -> {
                        // 连接运算符:需要两个操作数
                        if (expressionStack.size() < 2) {
                            throw new ParserException("连接运算符:需要两个操作数");
                        }
                        Expression right = expressionStack.pop();
                        Expression left = expressionStack.pop();
                        expressionStack.push(new ConcatenationOperatorExpression(left, right));
                    }
                    default -> throw new ParserException("未知的运算符: " + operator.getOperator());
                }
            }
        }

        if (expressionStack.size() != 1) {
            throw new ParserException("发现多余的运算符");
        }

        return expressionStack.pop();
    }


}


评论