diff options
author | Kim Kibum <kb0929.kim@samsung.com> | 2012-05-21 17:42:36 +0900 |
---|---|---|
committer | Kim Kibum <kb0929.kim@samsung.com> | 2012-05-21 17:42:36 +0900 |
commit | 2ae147a745432eb1f8d2c72b4dc22351e3534b5c (patch) | |
tree | cfbde0d1f4e0d05d6b92eb8e71485f6d36a2145a /ccl.c | |
parent | c22a3ed09b982b93feb23655eba7b55fa27885cb (diff) | |
download | flex-2ae147a745432eb1f8d2c72b4dc22351e3534b5c.tar.gz flex-2ae147a745432eb1f8d2c72b4dc22351e3534b5c.tar.bz2 flex-2ae147a745432eb1f8d2c72b4dc22351e3534b5c.zip |
Upload Tizen:Base source
Diffstat (limited to 'ccl.c')
-rw-r--r-- | ccl.c | 310 |
1 files changed, 310 insertions, 0 deletions
@@ -0,0 +1,310 @@ +/* ccl - routines for character classes */ + +/* Copyright (c) 1990 The Regents of the University of California. */ +/* All rights reserved. */ + +/* This code is derived from software contributed to Berkeley by */ +/* Vern Paxson. */ + +/* The United States Government has rights in this work pursuant */ +/* to contract no. DE-AC03-76SF00098 between the United States */ + /* Department of Energy and the University of California. */ + +/* This file is part of flex. */ + +/* Redistribution and use in source and binary forms, with or without */ +/* modification, are permitted provided that the following conditions */ +/* are met: */ + +/* 1. Redistributions of source code must retain the above copyright */ +/* notice, this list of conditions and the following disclaimer. */ +/* 2. Redistributions in binary form must reproduce the above copyright */ +/* notice, this list of conditions and the following disclaimer in the */ +/* documentation and/or other materials provided with the distribution. */ + +/* Neither the name of the University nor the names of its contributors */ +/* may be used to endorse or promote products derived from this software */ +/* without specific prior written permission. */ + +/* THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR */ +/* IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED */ +/* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ +/* PURPOSE. */ + +#include "flexdef.h" + +/* return true if the chr is in the ccl. Takes negation into account. */ +static bool +ccl_contains (const int cclp, const int ch) +{ + int ind, len, i; + + len = ccllen[cclp]; + ind = cclmap[cclp]; + + for (i = 0; i < len; ++i) + if (ccltbl[ind + i] == ch) + return !cclng[cclp]; + + return cclng[cclp]; +} + + +/* ccladd - add a single character to a ccl */ + +void ccladd (cclp, ch) + int cclp; + int ch; +{ + int ind, len, newpos, i; + + check_char (ch); + + len = ccllen[cclp]; + ind = cclmap[cclp]; + + /* check to see if the character is already in the ccl */ + + for (i = 0; i < len; ++i) + if (ccltbl[ind + i] == ch) + return; + + /* mark newlines */ + if (ch == nlch) + ccl_has_nl[cclp] = true; + + newpos = ind + len; + + if (newpos >= current_max_ccl_tbl_size) { + current_max_ccl_tbl_size += MAX_CCL_TBL_SIZE_INCREMENT; + + ++num_reallocs; + + ccltbl = reallocate_Character_array (ccltbl, + current_max_ccl_tbl_size); + } + + ccllen[cclp] = len + 1; + ccltbl[newpos] = ch; +} + +/* dump_cclp - same thing as list_character_set, but for cclps. */ + +static void dump_cclp (FILE* file, int cclp) +{ + register int i; + + putc ('[', file); + + for (i = 0; i < csize; ++i) { + if (ccl_contains(cclp, i)){ + register int start_char = i; + + putc (' ', file); + + fputs (readable_form (i), file); + + while (++i < csize && ccl_contains(cclp,i)) ; + + if (i - 1 > start_char) + /* this was a run */ + fprintf (file, "-%s", + readable_form (i - 1)); + + putc (' ', file); + } + } + + putc (']', file); +} + + + +/* ccl_set_diff - create a new ccl as the set difference of the two given ccls. */ +int +ccl_set_diff (int a, int b) +{ + int d, ch; + + /* create new class */ + d = cclinit(); + + /* In order to handle negation, we spin through all possible chars, + * addding each char in a that is not in b. + * (This could be O(n^2), but n is small and bounded.) + */ + for ( ch = 0; ch < csize; ++ch ) + if (ccl_contains (a, ch) && !ccl_contains(b, ch)) + ccladd (d, ch); + + /* debug */ + if (0){ + fprintf(stderr, "ccl_set_diff ("); + fprintf(stderr, "\n "); + dump_cclp (stderr, a); + fprintf(stderr, "\n "); + dump_cclp (stderr, b); + fprintf(stderr, "\n "); + dump_cclp (stderr, d); + fprintf(stderr, "\n)\n"); + } + return d; +} + +/* ccl_set_union - create a new ccl as the set union of the two given ccls. */ +int +ccl_set_union (int a, int b) +{ + int d, i; + + /* create new class */ + d = cclinit(); + + /* Add all of a */ + for (i = 0; i < ccllen[a]; ++i) + ccladd (d, ccltbl[cclmap[a] + i]); + + /* Add all of b */ + for (i = 0; i < ccllen[b]; ++i) + ccladd (d, ccltbl[cclmap[b] + i]); + + /* debug */ + if (0){ + fprintf(stderr, "ccl_set_union (%d + %d = %d", a, b, d); + fprintf(stderr, "\n "); + dump_cclp (stderr, a); + fprintf(stderr, "\n "); + dump_cclp (stderr, b); + fprintf(stderr, "\n "); + dump_cclp (stderr, d); + fprintf(stderr, "\n)\n"); + } + return d; +} + + +/* cclinit - return an empty ccl */ + +int cclinit () +{ + if (++lastccl >= current_maxccls) { + current_maxccls += MAX_CCLS_INCREMENT; + + ++num_reallocs; + + cclmap = + reallocate_integer_array (cclmap, current_maxccls); + ccllen = + reallocate_integer_array (ccllen, current_maxccls); + cclng = reallocate_integer_array (cclng, current_maxccls); + ccl_has_nl = + reallocate_bool_array (ccl_has_nl, + current_maxccls); + } + + if (lastccl == 1) + /* we're making the first ccl */ + cclmap[lastccl] = 0; + + else + /* The new pointer is just past the end of the last ccl. + * Since the cclmap points to the \first/ character of a + * ccl, adding the length of the ccl to the cclmap pointer + * will produce a cursor to the first free space. + */ + cclmap[lastccl] = + cclmap[lastccl - 1] + ccllen[lastccl - 1]; + + ccllen[lastccl] = 0; + cclng[lastccl] = 0; /* ccl's start out life un-negated */ + ccl_has_nl[lastccl] = false; + + return lastccl; +} + + +/* cclnegate - negate the given ccl */ + +void cclnegate (cclp) + int cclp; +{ + cclng[cclp] = 1; + ccl_has_nl[cclp] = !ccl_has_nl[cclp]; +} + + +/* list_character_set - list the members of a set of characters in CCL form + * + * Writes to the given file a character-class representation of those + * characters present in the given CCL. A character is present if it + * has a non-zero value in the cset array. + */ + +void list_character_set (file, cset) + FILE *file; + int cset[]; +{ + register int i; + + putc ('[', file); + + for (i = 0; i < csize; ++i) { + if (cset[i]) { + register int start_char = i; + + putc (' ', file); + + fputs (readable_form (i), file); + + while (++i < csize && cset[i]) ; + + if (i - 1 > start_char) + /* this was a run */ + fprintf (file, "-%s", + readable_form (i - 1)); + + putc (' ', file); + } + } + + putc (']', file); +} + +/** Determines if the range [c1-c2] is unambiguous in a case-insensitive + * scanner. Specifically, if a lowercase or uppercase character, x, is in the + * range [c1-c2], then we require that UPPERCASE(x) and LOWERCASE(x) must also + * be in the range. If not, then this range is ambiguous, and the function + * returns false. For example, [@-_] spans [a-z] but not [A-Z]. Beware that + * [a-z] will be labeled ambiguous because it does not include [A-Z]. + * + * @param c1 the lower end of the range + * @param c2 the upper end of the range + * @return true if [c1-c2] is not ambiguous for a caseless scanner. + */ +bool range_covers_case (int c1, int c2) +{ + int i, o; + + for (i = c1; i <= c2; i++) { + if (has_case (i)) { + o = reverse_case (i); + if (o < c1 || c2 < o) + return false; + } + } + return true; +} + +/** Reverse the case of a character, if possible. + * @return c if case-reversal does not apply. + */ +int reverse_case (int c) +{ + return isupper (c) ? tolower (c) : (islower (c) ? toupper (c) : c); +} + +/** Return true if c is uppercase or lowercase. */ +bool has_case (int c) +{ + return (isupper (c) || islower (c)) ? true : false; +} |