mirror of
https://github.com/openssh/openssh-portable.git
synced 2026-06-30 19:57:57 +00:00
upstream: Replace the old recursive match_pattern() with an
implementation that uses a NFA for matching. This avoids the exponential worst- case behaviour for the old implementation. ok markus@ OpenBSD-Commit-ID: fc6b75a52f4c0acb52b7900658c8d25ff873cbae
This commit is contained in:
committed by
Damien Miller
parent
7ab700f170
commit
9d4c0b31f1
131
match.c
131
match.c
@@ -1,4 +1,4 @@
|
||||
/* $OpenBSD: match.c,v 1.45 2024/09/06 02:30:44 djm Exp $ */
|
||||
/* $OpenBSD: match.c,v 1.46 2026/05/31 04:19:16 djm Exp $ */
|
||||
/*
|
||||
* Author: Tatu Ylonen <ylo@cs.hut.fi>
|
||||
* Copyright (c) 1995 Tatu Ylonen <ylo@cs.hut.fi>, Espoo, Finland
|
||||
@@ -13,6 +13,7 @@
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 2000 Markus Friedl. All rights reserved.
|
||||
* Copyright (c) 2026 Damien Miller. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
@@ -49,67 +50,105 @@
|
||||
#include "match.h"
|
||||
#include "misc.h"
|
||||
|
||||
/*
|
||||
* Computes the epsilon closure of an NFA set.
|
||||
* In our wildcard grammar, epsilon transitions only exist for '*' wildcards,
|
||||
* allowing us to transition from state i to i+1 without consuming input.
|
||||
*
|
||||
* This function modifies 'states' in place.
|
||||
*/
|
||||
static void
|
||||
epsilon_closure(char *states, const char *pattern, size_t M)
|
||||
{
|
||||
size_t i;
|
||||
|
||||
/* only need a forward pass as there are no back jumps in our grammar */
|
||||
for (i = 0; i < M; i++) {
|
||||
if (!states[i] || pattern[i] != '*')
|
||||
continue;
|
||||
/*
|
||||
* State i is active, and pattern[i] is '*', so we can
|
||||
* epsilon-transition to i+1.
|
||||
*/
|
||||
states[i + 1] = 1;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns true if the given string matches the pattern (which may contain ?
|
||||
* and * as wildcards), and zero if it does not match.
|
||||
* and * as wildcards), and zero if it does not match. Uses an NFA internally.
|
||||
*/
|
||||
int
|
||||
match_pattern(const char *s, const char *pattern)
|
||||
{
|
||||
for (;;) {
|
||||
/* If at end of pattern, accept if also at end of string. */
|
||||
if (!*pattern)
|
||||
return !*s;
|
||||
size_t M;
|
||||
size_t i;
|
||||
char *states, *next_states, *tmp;
|
||||
int active, matched = 0;
|
||||
|
||||
if (*pattern == '*') {
|
||||
/* Skip this and any consecutive asterisks. */
|
||||
while (*pattern == '*')
|
||||
pattern++;
|
||||
/* trivial case: empty pattern vs empty input */
|
||||
if ((M = strlen(pattern)) == 0)
|
||||
return *s == '\0';
|
||||
|
||||
/* If at end of pattern, accept immediately. */
|
||||
if (!*pattern)
|
||||
return 1;
|
||||
/* A state for each pattern character, plus one final accepting state */
|
||||
states = xcalloc(M + 1, sizeof(*states));
|
||||
next_states = xcalloc(M + 1, sizeof(*next_states));
|
||||
|
||||
/* If next character in pattern is known, optimize. */
|
||||
if (*pattern != '?' && *pattern != '*') {
|
||||
/* Initial state: state 0 is active */
|
||||
states[0] = 1;
|
||||
/* Other states might be reachable now if the pattern starts with '*' */
|
||||
epsilon_closure(states, pattern, M);
|
||||
|
||||
for (; *s; s++) {
|
||||
memset(next_states, 0, M + 1);
|
||||
|
||||
/* Calculate the reachable next states given the input char */
|
||||
for (i = 0; i < M; i++) {
|
||||
if (!states[i])
|
||||
continue;
|
||||
if (pattern[i] == '*') {
|
||||
/*
|
||||
* Look instances of the next character in
|
||||
* pattern, and try to match starting from
|
||||
* those.
|
||||
* '*' matches any character, so we can
|
||||
* stay in state i
|
||||
*/
|
||||
for (; *s; s++)
|
||||
if (*s == *pattern &&
|
||||
match_pattern(s + 1, pattern + 1))
|
||||
return 1;
|
||||
/* Failed. */
|
||||
return 0;
|
||||
next_states[i] = 1;
|
||||
} else if (pattern[i] == '?' || pattern[i] == *s) {
|
||||
/*
|
||||
* '?' matches any character, or we have
|
||||
* a literal match.
|
||||
*/
|
||||
next_states[i + 1] = 1;
|
||||
}
|
||||
/*
|
||||
* Move ahead one character at a time and try to
|
||||
* match at each position.
|
||||
*/
|
||||
for (; *s; s++)
|
||||
if (match_pattern(s, pattern))
|
||||
return 1;
|
||||
/* Failed. */
|
||||
return 0;
|
||||
}
|
||||
/*
|
||||
* There must be at least one more character in the string.
|
||||
* If we are at the end, fail.
|
||||
*/
|
||||
if (!*s)
|
||||
return 0;
|
||||
|
||||
/* Check if the next character of the string is acceptable. */
|
||||
if (*pattern != '?' && *pattern != *s)
|
||||
return 0;
|
||||
/* Expand the reachable next states with epsilon transitions */
|
||||
epsilon_closure(next_states, pattern, M);
|
||||
|
||||
/* Move to the next character, both in string and in pattern. */
|
||||
s++;
|
||||
pattern++;
|
||||
/* Swap states and next_states */
|
||||
tmp = states;
|
||||
states = next_states;
|
||||
next_states = tmp;
|
||||
|
||||
/* Check if we have any active pattern states left */
|
||||
active = 0;
|
||||
for (i = 0; i <= M; i++) {
|
||||
if (states[i]) {
|
||||
active = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!active)
|
||||
goto out; /* No active states, fail early */
|
||||
}
|
||||
/* NOTREACHED */
|
||||
/*
|
||||
* We matched only if we ended up in the final, accepting state
|
||||
* after consuming all the input.
|
||||
*/
|
||||
matched = states[M];
|
||||
out:
|
||||
free(states);
|
||||
free(next_states);
|
||||
return matched;
|
||||
}
|
||||
|
||||
/*
|
||||
|
||||
Reference in New Issue
Block a user