From 9d4c0b31f172782def72ccc2fb2dc217d3135e6f Mon Sep 17 00:00:00 2001 From: "djm@openbsd.org" Date: Sun, 31 May 2026 04:19:16 +0000 Subject: [PATCH] upstream: Replace the old recursive match_pattern() with an implementation that uses a NFA for matching. This avoids the exponential worst- case behaviour for the old implementation. ok markus@ OpenBSD-Commit-ID: fc6b75a52f4c0acb52b7900658c8d25ff873cbae --- match.c | 131 ++++++++++++++++++++++++++++++++++++-------------------- 1 file changed, 85 insertions(+), 46 deletions(-) diff --git a/match.c b/match.c index 3ef536931..b8f350df1 100644 --- a/match.c +++ b/match.c @@ -1,4 +1,4 @@ -/* $OpenBSD: match.c,v 1.45 2024/09/06 02:30:44 djm Exp $ */ +/* $OpenBSD: match.c,v 1.46 2026/05/31 04:19:16 djm Exp $ */ /* * Author: Tatu Ylonen * Copyright (c) 1995 Tatu Ylonen , Espoo, Finland @@ -13,6 +13,7 @@ */ /* * Copyright (c) 2000 Markus Friedl. All rights reserved. + * Copyright (c) 2026 Damien Miller. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -49,67 +50,105 @@ #include "match.h" #include "misc.h" +/* + * Computes the epsilon closure of an NFA set. + * In our wildcard grammar, epsilon transitions only exist for '*' wildcards, + * allowing us to transition from state i to i+1 without consuming input. + * + * This function modifies 'states' in place. + */ +static void +epsilon_closure(char *states, const char *pattern, size_t M) +{ + size_t i; + + /* only need a forward pass as there are no back jumps in our grammar */ + for (i = 0; i < M; i++) { + if (!states[i] || pattern[i] != '*') + continue; + /* + * State i is active, and pattern[i] is '*', so we can + * epsilon-transition to i+1. + */ + states[i + 1] = 1; + } +} + /* * Returns true if the given string matches the pattern (which may contain ? - * and * as wildcards), and zero if it does not match. + * and * as wildcards), and zero if it does not match. Uses an NFA internally. */ int match_pattern(const char *s, const char *pattern) { - for (;;) { - /* If at end of pattern, accept if also at end of string. */ - if (!*pattern) - return !*s; + size_t M; + size_t i; + char *states, *next_states, *tmp; + int active, matched = 0; - if (*pattern == '*') { - /* Skip this and any consecutive asterisks. */ - while (*pattern == '*') - pattern++; + /* trivial case: empty pattern vs empty input */ + if ((M = strlen(pattern)) == 0) + return *s == '\0'; - /* If at end of pattern, accept immediately. */ - if (!*pattern) - return 1; + /* A state for each pattern character, plus one final accepting state */ + states = xcalloc(M + 1, sizeof(*states)); + next_states = xcalloc(M + 1, sizeof(*next_states)); - /* If next character in pattern is known, optimize. */ - if (*pattern != '?' && *pattern != '*') { + /* Initial state: state 0 is active */ + states[0] = 1; + /* Other states might be reachable now if the pattern starts with '*' */ + epsilon_closure(states, pattern, M); + + for (; *s; s++) { + memset(next_states, 0, M + 1); + + /* Calculate the reachable next states given the input char */ + for (i = 0; i < M; i++) { + if (!states[i]) + continue; + if (pattern[i] == '*') { /* - * Look instances of the next character in - * pattern, and try to match starting from - * those. + * '*' matches any character, so we can + * stay in state i */ - for (; *s; s++) - if (*s == *pattern && - match_pattern(s + 1, pattern + 1)) - return 1; - /* Failed. */ - return 0; + next_states[i] = 1; + } else if (pattern[i] == '?' || pattern[i] == *s) { + /* + * '?' matches any character, or we have + * a literal match. + */ + next_states[i + 1] = 1; } - /* - * Move ahead one character at a time and try to - * match at each position. - */ - for (; *s; s++) - if (match_pattern(s, pattern)) - return 1; - /* Failed. */ - return 0; } - /* - * There must be at least one more character in the string. - * If we are at the end, fail. - */ - if (!*s) - return 0; - /* Check if the next character of the string is acceptable. */ - if (*pattern != '?' && *pattern != *s) - return 0; + /* Expand the reachable next states with epsilon transitions */ + epsilon_closure(next_states, pattern, M); - /* Move to the next character, both in string and in pattern. */ - s++; - pattern++; + /* Swap states and next_states */ + tmp = states; + states = next_states; + next_states = tmp; + + /* Check if we have any active pattern states left */ + active = 0; + for (i = 0; i <= M; i++) { + if (states[i]) { + active = 1; + break; + } + } + if (!active) + goto out; /* No active states, fail early */ } - /* NOTREACHED */ + /* + * We matched only if we ended up in the final, accepting state + * after consuming all the input. + */ + matched = states[M]; + out: + free(states); + free(next_states); + return matched; } /*