upstream: Replace the old recursive match_pattern() with an

implementation that uses a NFA for matching. This avoids the exponential
worst- case behaviour for the old implementation.

ok markus@

OpenBSD-Commit-ID: fc6b75a52f4c0acb52b7900658c8d25ff873cbae
This commit is contained in:
djm@openbsd.org
2026-05-31 04:19:16 +00:00
committed by Damien Miller
parent 7ab700f170
commit 9d4c0b31f1

131
match.c
View File

@@ -1,4 +1,4 @@
/* $OpenBSD: match.c,v 1.45 2024/09/06 02:30:44 djm Exp $ */
/* $OpenBSD: match.c,v 1.46 2026/05/31 04:19:16 djm Exp $ */
/*
* Author: Tatu Ylonen <ylo@cs.hut.fi>
* Copyright (c) 1995 Tatu Ylonen <ylo@cs.hut.fi>, Espoo, Finland
@@ -13,6 +13,7 @@
*/
/*
* Copyright (c) 2000 Markus Friedl. All rights reserved.
* Copyright (c) 2026 Damien Miller. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -49,67 +50,105 @@
#include "match.h"
#include "misc.h"
/*
* Computes the epsilon closure of an NFA set.
* In our wildcard grammar, epsilon transitions only exist for '*' wildcards,
* allowing us to transition from state i to i+1 without consuming input.
*
* This function modifies 'states' in place.
*/
static void
epsilon_closure(char *states, const char *pattern, size_t M)
{
size_t i;
/* only need a forward pass as there are no back jumps in our grammar */
for (i = 0; i < M; i++) {
if (!states[i] || pattern[i] != '*')
continue;
/*
* State i is active, and pattern[i] is '*', so we can
* epsilon-transition to i+1.
*/
states[i + 1] = 1;
}
}
/*
* Returns true if the given string matches the pattern (which may contain ?
* and * as wildcards), and zero if it does not match.
* and * as wildcards), and zero if it does not match. Uses an NFA internally.
*/
int
match_pattern(const char *s, const char *pattern)
{
for (;;) {
/* If at end of pattern, accept if also at end of string. */
if (!*pattern)
return !*s;
size_t M;
size_t i;
char *states, *next_states, *tmp;
int active, matched = 0;
if (*pattern == '*') {
/* Skip this and any consecutive asterisks. */
while (*pattern == '*')
pattern++;
/* trivial case: empty pattern vs empty input */
if ((M = strlen(pattern)) == 0)
return *s == '\0';
/* If at end of pattern, accept immediately. */
if (!*pattern)
return 1;
/* A state for each pattern character, plus one final accepting state */
states = xcalloc(M + 1, sizeof(*states));
next_states = xcalloc(M + 1, sizeof(*next_states));
/* If next character in pattern is known, optimize. */
if (*pattern != '?' && *pattern != '*') {
/* Initial state: state 0 is active */
states[0] = 1;
/* Other states might be reachable now if the pattern starts with '*' */
epsilon_closure(states, pattern, M);
for (; *s; s++) {
memset(next_states, 0, M + 1);
/* Calculate the reachable next states given the input char */
for (i = 0; i < M; i++) {
if (!states[i])
continue;
if (pattern[i] == '*') {
/*
* Look instances of the next character in
* pattern, and try to match starting from
* those.
* '*' matches any character, so we can
* stay in state i
*/
for (; *s; s++)
if (*s == *pattern &&
match_pattern(s + 1, pattern + 1))
return 1;
/* Failed. */
return 0;
next_states[i] = 1;
} else if (pattern[i] == '?' || pattern[i] == *s) {
/*
* '?' matches any character, or we have
* a literal match.
*/
next_states[i + 1] = 1;
}
/*
* Move ahead one character at a time and try to
* match at each position.
*/
for (; *s; s++)
if (match_pattern(s, pattern))
return 1;
/* Failed. */
return 0;
}
/*
* There must be at least one more character in the string.
* If we are at the end, fail.
*/
if (!*s)
return 0;
/* Check if the next character of the string is acceptable. */
if (*pattern != '?' && *pattern != *s)
return 0;
/* Expand the reachable next states with epsilon transitions */
epsilon_closure(next_states, pattern, M);
/* Move to the next character, both in string and in pattern. */
s++;
pattern++;
/* Swap states and next_states */
tmp = states;
states = next_states;
next_states = tmp;
/* Check if we have any active pattern states left */
active = 0;
for (i = 0; i <= M; i++) {
if (states[i]) {
active = 1;
break;
}
}
if (!active)
goto out; /* No active states, fail early */
}
/* NOTREACHED */
/*
* We matched only if we ended up in the final, accepting state
* after consuming all the input.
*/
matched = states[M];
out:
free(states);
free(next_states);
return matched;
}
/*