summaryrefslogtreecommitdiff
path: root/glib/glib/gpattern.c
diff options
context:
space:
mode:
Diffstat (limited to 'glib/glib/gpattern.c')
-rw-r--r--glib/glib/gpattern.c442
1 files changed, 442 insertions, 0 deletions
diff --git a/glib/glib/gpattern.c b/glib/glib/gpattern.c
new file mode 100644
index 0000000..bc0cc71
--- /dev/null
+++ b/glib/glib/gpattern.c
@@ -0,0 +1,442 @@
+/* GLIB - Library of useful routines for C programming
+ * Copyright (C) 1995-1997, 1999 Peter Mattis, Red Hat, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+#include "config.h"
+
+#include <string.h>
+
+#include "gpattern.h"
+
+#include "gmacros.h"
+#include "gmessages.h"
+#include "gmem.h"
+#include "gunicode.h"
+#include "gutils.h"
+
+/**
+ * SECTION:patterns
+ * @title: Glob-style pattern matching
+ * @short_description: matches strings against patterns containing '*'
+ * (wildcard) and '?' (joker)
+ *
+ * The <function>g_pattern_match*</function> functions match a string
+ * against a pattern containing '*' and '?' wildcards with similar
+ * semantics as the standard glob() function: '*' matches an arbitrary,
+ * possibly empty, string, '?' matches an arbitrary character.
+ *
+ * Note that in contrast to glob(), the '/' character
+ * <emphasis>can</emphasis> be matched by the wildcards, there are no
+ * '[...]' character ranges and '*' and '?' can
+ * <emphasis>not</emphasis> be escaped to include them literally in a
+ * pattern.
+ *
+ * When multiple strings must be matched against the same pattern, it
+ * is better to compile the pattern to a #GPatternSpec using
+ * g_pattern_spec_new() and use g_pattern_match_string() instead of
+ * g_pattern_match_simple(). This avoids the overhead of repeated
+ * pattern compilation.
+ **/
+
+/**
+ * GPatternSpec:
+ *
+ * A <structname>GPatternSpec</structname> is the 'compiled' form of a
+ * pattern. This structure is opaque and its fields cannot be accessed
+ * directly.
+ **/
+
+/* keep enum and structure of gpattern.c and patterntest.c in sync */
+typedef enum
+{
+ G_MATCH_ALL, /* "*A?A*" */
+ G_MATCH_ALL_TAIL, /* "*A?AA" */
+ G_MATCH_HEAD, /* "AAAA*" */
+ G_MATCH_TAIL, /* "*AAAA" */
+ G_MATCH_EXACT, /* "AAAAA" */
+ G_MATCH_LAST
+} GMatchType;
+
+struct _GPatternSpec
+{
+ GMatchType match_type;
+ guint pattern_length;
+ guint min_length;
+ guint max_length;
+ gchar *pattern;
+};
+
+
+/* --- functions --- */
+static inline gboolean
+g_pattern_ph_match (const gchar *match_pattern,
+ const gchar *match_string,
+ gboolean *wildcard_reached_p)
+{
+ register const gchar *pattern, *string;
+ register gchar ch;
+
+ pattern = match_pattern;
+ string = match_string;
+
+ ch = *pattern;
+ pattern++;
+ while (ch)
+ {
+ switch (ch)
+ {
+ case '?':
+ if (!*string)
+ return FALSE;
+ string = g_utf8_next_char (string);
+ break;
+
+ case '*':
+ *wildcard_reached_p = TRUE;
+ do
+ {
+ ch = *pattern;
+ pattern++;
+ if (ch == '?')
+ {
+ if (!*string)
+ return FALSE;
+ string = g_utf8_next_char (string);
+ }
+ }
+ while (ch == '*' || ch == '?');
+ if (!ch)
+ return TRUE;
+ do
+ {
+ gboolean next_wildcard_reached = FALSE;
+ while (ch != *string)
+ {
+ if (!*string)
+ return FALSE;
+ string = g_utf8_next_char (string);
+ }
+ string++;
+ if (g_pattern_ph_match (pattern, string, &next_wildcard_reached))
+ return TRUE;
+ if (next_wildcard_reached)
+ /* the forthcoming pattern substring up to the next wildcard has
+ * been matched, but a mismatch occoured for the rest of the
+ * pattern, following the next wildcard.
+ * there's no need to advance the current match position any
+ * further if the rest pattern will not match.
+ */
+ return FALSE;
+ }
+ while (*string);
+ break;
+
+ default:
+ if (ch == *string)
+ string++;
+ else
+ return FALSE;
+ break;
+ }
+
+ ch = *pattern;
+ pattern++;
+ }
+
+ return *string == 0;
+}
+
+/**
+ * g_pattern_match:
+ * @pspec: a #GPatternSpec
+ * @string_length: the length of @string (in bytes, i.e. strlen(),
+ * <emphasis>not</emphasis> g_utf8_strlen())
+ * @string: the UTF-8 encoded string to match
+ * @string_reversed: (allow-none): the reverse of @string or %NULL
+ * @Returns: %TRUE if @string matches @pspec
+ *
+ * Matches a string against a compiled pattern. Passing the correct
+ * length of the string given is mandatory. The reversed string can be
+ * omitted by passing %NULL, this is more efficient if the reversed
+ * version of the string to be matched is not at hand, as
+ * g_pattern_match() will only construct it if the compiled pattern
+ * requires reverse matches.
+ *
+ * Note that, if the user code will (possibly) match a string against a
+ * multitude of patterns containing wildcards, chances are high that
+ * some patterns will require a reversed string. In this case, it's
+ * more efficient to provide the reversed string to avoid multiple
+ * constructions thereof in the various calls to g_pattern_match().
+ *
+ * Note also that the reverse of a UTF-8 encoded string can in general
+ * <emphasis>not</emphasis> be obtained by g_strreverse(). This works
+ * only if the string doesn't contain any multibyte characters. GLib
+ * offers the g_utf8_strreverse() function to reverse UTF-8 encoded
+ * strings.
+ **/
+gboolean
+g_pattern_match (GPatternSpec *pspec,
+ guint string_length,
+ const gchar *string,
+ const gchar *string_reversed)
+{
+ g_return_val_if_fail (pspec != NULL, FALSE);
+ g_return_val_if_fail (string != NULL, FALSE);
+
+ if (string_length < pspec->min_length ||
+ string_length > pspec->max_length)
+ return FALSE;
+
+ switch (pspec->match_type)
+ {
+ gboolean dummy;
+ case G_MATCH_ALL:
+ return g_pattern_ph_match (pspec->pattern, string, &dummy);
+ case G_MATCH_ALL_TAIL:
+ if (string_reversed)
+ return g_pattern_ph_match (pspec->pattern, string_reversed, &dummy);
+ else
+ {
+ gboolean result;
+ gchar *tmp;
+ tmp = g_utf8_strreverse (string, string_length);
+ result = g_pattern_ph_match (pspec->pattern, tmp, &dummy);
+ g_free (tmp);
+ return result;
+ }
+ case G_MATCH_HEAD:
+ if (pspec->pattern_length == string_length)
+ return strcmp (pspec->pattern, string) == 0;
+ else if (pspec->pattern_length)
+ return strncmp (pspec->pattern, string, pspec->pattern_length) == 0;
+ else
+ return TRUE;
+ case G_MATCH_TAIL:
+ if (pspec->pattern_length)
+ return strcmp (pspec->pattern, string + (string_length - pspec->pattern_length)) == 0;
+ else
+ return TRUE;
+ case G_MATCH_EXACT:
+ if (pspec->pattern_length != string_length)
+ return FALSE;
+ else
+ return strcmp (pspec->pattern, string) == 0;
+ default:
+ g_return_val_if_fail (pspec->match_type < G_MATCH_LAST, FALSE);
+ return FALSE;
+ }
+}
+
+/**
+ * g_pattern_spec_new:
+ * @pattern: a zero-terminated UTF-8 encoded string
+ * @Returns: a newly-allocated #GPatternSpec
+ *
+ * Compiles a pattern to a #GPatternSpec.
+ **/
+GPatternSpec*
+g_pattern_spec_new (const gchar *pattern)
+{
+ GPatternSpec *pspec;
+ gboolean seen_joker = FALSE, seen_wildcard = FALSE, more_wildcards = FALSE;
+ gint hw_pos = -1, tw_pos = -1, hj_pos = -1, tj_pos = -1;
+ gboolean follows_wildcard = FALSE;
+ guint pending_jokers = 0;
+ const gchar *s;
+ gchar *d;
+ guint i;
+
+ g_return_val_if_fail (pattern != NULL, NULL);
+
+ /* canonicalize pattern and collect necessary stats */
+ pspec = g_new (GPatternSpec, 1);
+ pspec->pattern_length = strlen (pattern);
+ pspec->min_length = 0;
+ pspec->max_length = 0;
+ pspec->pattern = g_new (gchar, pspec->pattern_length + 1);
+ d = pspec->pattern;
+ for (i = 0, s = pattern; *s != 0; s++)
+ {
+ switch (*s)
+ {
+ case '*':
+ if (follows_wildcard) /* compress multiple wildcards */
+ {
+ pspec->pattern_length--;
+ continue;
+ }
+ follows_wildcard = TRUE;
+ if (hw_pos < 0)
+ hw_pos = i;
+ tw_pos = i;
+ break;
+ case '?':
+ pending_jokers++;
+ pspec->min_length++;
+ pspec->max_length += 4; /* maximum UTF-8 character length */
+ continue;
+ default:
+ for (; pending_jokers; pending_jokers--, i++) {
+ *d++ = '?';
+ if (hj_pos < 0)
+ hj_pos = i;
+ tj_pos = i;
+ }
+ follows_wildcard = FALSE;
+ pspec->min_length++;
+ pspec->max_length++;
+ break;
+ }
+ *d++ = *s;
+ i++;
+ }
+ for (; pending_jokers; pending_jokers--) {
+ *d++ = '?';
+ if (hj_pos < 0)
+ hj_pos = i;
+ tj_pos = i;
+ }
+ *d++ = 0;
+ seen_joker = hj_pos >= 0;
+ seen_wildcard = hw_pos >= 0;
+ more_wildcards = seen_wildcard && hw_pos != tw_pos;
+ if (seen_wildcard)
+ pspec->max_length = G_MAXUINT;
+
+ /* special case sole head/tail wildcard or exact matches */
+ if (!seen_joker && !more_wildcards)
+ {
+ if (pspec->pattern[0] == '*')
+ {
+ pspec->match_type = G_MATCH_TAIL;
+ memmove (pspec->pattern, pspec->pattern + 1, --pspec->pattern_length);
+ pspec->pattern[pspec->pattern_length] = 0;
+ return pspec;
+ }
+ if (pspec->pattern_length > 0 &&
+ pspec->pattern[pspec->pattern_length - 1] == '*')
+ {
+ pspec->match_type = G_MATCH_HEAD;
+ pspec->pattern[--pspec->pattern_length] = 0;
+ return pspec;
+ }
+ if (!seen_wildcard)
+ {
+ pspec->match_type = G_MATCH_EXACT;
+ return pspec;
+ }
+ }
+
+ /* now just need to distinguish between head or tail match start */
+ tw_pos = pspec->pattern_length - 1 - tw_pos; /* last pos to tail distance */
+ tj_pos = pspec->pattern_length - 1 - tj_pos; /* last pos to tail distance */
+ if (seen_wildcard)
+ pspec->match_type = tw_pos > hw_pos ? G_MATCH_ALL_TAIL : G_MATCH_ALL;
+ else /* seen_joker */
+ pspec->match_type = tj_pos > hj_pos ? G_MATCH_ALL_TAIL : G_MATCH_ALL;
+ if (pspec->match_type == G_MATCH_ALL_TAIL) {
+ gchar *tmp = pspec->pattern;
+ pspec->pattern = g_utf8_strreverse (pspec->pattern, pspec->pattern_length);
+ g_free (tmp);
+ }
+ return pspec;
+}
+
+/**
+ * g_pattern_spec_free:
+ * @pspec: a #GPatternSpec
+ *
+ * Frees the memory allocated for the #GPatternSpec.
+ **/
+void
+g_pattern_spec_free (GPatternSpec *pspec)
+{
+ g_return_if_fail (pspec != NULL);
+
+ g_free (pspec->pattern);
+ g_free (pspec);
+}
+
+/**
+ * g_pattern_spec_equal:
+ * @pspec1: a #GPatternSpec
+ * @pspec2: another #GPatternSpec
+ * @Returns: Whether the compiled patterns are equal
+ *
+ * Compares two compiled pattern specs and returns whether they will
+ * match the same set of strings.
+ **/
+gboolean
+g_pattern_spec_equal (GPatternSpec *pspec1,
+ GPatternSpec *pspec2)
+{
+ g_return_val_if_fail (pspec1 != NULL, FALSE);
+ g_return_val_if_fail (pspec2 != NULL, FALSE);
+
+ return (pspec1->pattern_length == pspec2->pattern_length &&
+ pspec1->match_type == pspec2->match_type &&
+ strcmp (pspec1->pattern, pspec2->pattern) == 0);
+}
+
+/**
+ * g_pattern_match_string:
+ * @pspec: a #GPatternSpec
+ * @string: the UTF-8 encoded string to match
+ * @Returns: %TRUE if @string matches @pspec
+ *
+ * Matches a string against a compiled pattern. If the string is to be
+ * matched against more than one pattern, consider using
+ * g_pattern_match() instead while supplying the reversed string.
+ **/
+gboolean
+g_pattern_match_string (GPatternSpec *pspec,
+ const gchar *string)
+{
+ g_return_val_if_fail (pspec != NULL, FALSE);
+ g_return_val_if_fail (string != NULL, FALSE);
+
+ return g_pattern_match (pspec, strlen (string), string, NULL);
+}
+
+/**
+ * g_pattern_match_simple:
+ * @pattern: the UTF-8 encoded pattern
+ * @string: the UTF-8 encoded string to match
+ * @Returns: %TRUE if @string matches @pspec
+ *
+ * Matches a string against a pattern given as a string. If this
+ * function is to be called in a loop, it's more efficient to compile
+ * the pattern once with g_pattern_spec_new() and call
+ * g_pattern_match_string() repeatedly.
+ **/
+gboolean
+g_pattern_match_simple (const gchar *pattern,
+ const gchar *string)
+{
+ GPatternSpec *pspec;
+ gboolean ergo;
+
+ g_return_val_if_fail (pattern != NULL, FALSE);
+ g_return_val_if_fail (string != NULL, FALSE);
+
+ pspec = g_pattern_spec_new (pattern);
+ ergo = g_pattern_match (pspec, strlen (string), string, NULL);
+ g_pattern_spec_free (pspec);
+
+ return ergo;
+}