From a95df6d374a9be6fa8cee82be6ff1fb51a3c8593 Mon Sep 17 00:00:00 2001 From: Anas Nashif Date: Tue, 6 Nov 2012 22:56:34 -0800 Subject: Imported Upstream version 1.6 --- regex.c | 403 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 403 insertions(+) create mode 100644 regex.c (limited to 'regex.c') diff --git a/regex.c b/regex.c new file mode 100644 index 0000000..d66b20a --- /dev/null +++ b/regex.c @@ -0,0 +1,403 @@ +/* regex.c: regular expression interface routines for the ed line editor. */ +/* GNU ed - The GNU line editor. + Copyright (C) 1993, 1994 Andrew Moore, Talke Studio + Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 + Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#include +#include +#include +#include +#include +#include +#include + +#include "ed.h" + + +static regex_t * global_pat = 0; +static bool patlock = false; /* if set, pattern not freed by get_compiled_pattern */ + +static char * stbuf = 0; /* substitution template buffer */ +static int stbufsz = 0; /* substitution template buffer size */ +static int stlen = 0; /* substitution template length */ + +static char * rbuf = 0; /* replace_matching_text buffer */ +static int rbufsz = 0; /* replace_matching_text buffer size */ + + +bool prev_pattern( void ) { return global_pat != 0; } + + +/* translate characters in a string */ +static void translit_text( char * p, int len, const char from, const char to ) + { + while( --len >= 0 ) + { + if( *p == from ) *p = to; + ++p; + } + } + + +/* overwrite newlines with ASCII NULs */ +static void newline_to_nul( char * const s, const int len ) + { translit_text( s, len, '\n', '\0' ); } + +/* overwrite ASCII NULs with newlines */ +static void nul_to_newline( char * const s, const int len ) + { translit_text( s, len, '\0', '\n' ); } + + +/* expand a POSIX character class */ +static const char * parse_char_class( const char * p ) + { + char c, d; + + if( *p == '^' ) ++p; + if( *p == ']' ) ++p; + for( ; *p != ']' && *p != '\n'; ++p ) + if( *p == '[' && ( ( d = p[1] ) == '.' || d == ':' || d == '=' ) ) + for( ++p, c = *++p; *p != ']' || c != d; ++p ) + if( ( c = *p ) == '\n' ) + return 0; + return ( ( *p == ']' ) ? p : 0 ); + } + + +/* copy a pattern string from the command buffer; return pointer to the copy */ +static char * extract_pattern( const char ** const ibufpp, const char delimiter ) + { + static char * buf = 0; + static int bufsz = 0; + const char * nd = *ibufpp; + int len; + + while( *nd != delimiter && *nd != '\n' ) + { + if( *nd == '[' ) + { + nd = parse_char_class( ++nd ); + if( !nd ) { set_error_msg( "Unbalanced brackets ([])" ); return 0; } + } + else if( *nd == '\\' && *++nd == '\n' ) + { set_error_msg( "Trailing backslash (\\)" ); return 0; } + ++nd; + } + len = nd - *ibufpp; + if( !resize_buffer( &buf, &bufsz, len + 1 ) ) return 0; + memcpy( buf, *ibufpp, len ); + buf[len] = 0; + *ibufpp = nd; + if( isbinary() ) nul_to_newline( buf, len ); + return buf; + } + + +/* return pointer to compiled pattern from command buffer */ +static regex_t * get_compiled_pattern( const char ** const ibufpp ) + { + static regex_t * exp = 0; + const char * exps; + const char delimiter = **ibufpp; + int n; + + if( delimiter == ' ' ) + { set_error_msg( "Invalid pattern delimiter" ); return 0; } + if( delimiter == '\n' || *++*ibufpp == '\n' || **ibufpp == delimiter ) + { + if( !exp ) set_error_msg( "No previous pattern" ); + return exp; + } + exps = extract_pattern( ibufpp, delimiter ); + if( !exps ) return 0; + /* buffer alloc'd && not reserved */ + if( exp && !patlock ) regfree( exp ); + else + { + exp = (regex_t *) malloc( sizeof (regex_t) ); + if( !exp ) + { + show_strerror( 0, errno ); + set_error_msg( "Memory exhausted" ); + return 0; + } + } + patlock = false; + n = regcomp( exp, exps, 0 ); + if( n ) + { + char buf[80]; + regerror( n, exp, buf, sizeof buf ); + set_error_msg( buf ); + free( exp ); + exp = 0; + } + return exp; + } + + +/* add line matching a pattern to the global-active list */ +bool build_active_list( const char ** const ibufpp, const int first_addr, + const int second_addr, const bool match ) + { + const regex_t * pat; + const line_t * lp; + int addr; + const char delimiter = **ibufpp; + + if( delimiter == ' ' || delimiter == '\n' ) + { set_error_msg( "Invalid pattern delimiter" ); return false; } + pat = get_compiled_pattern( ibufpp ); + if( !pat ) return false; + if( **ibufpp == delimiter ) ++*ibufpp; + clear_active_list(); + lp = search_line_node( first_addr ); + for( addr = first_addr; addr <= second_addr; ++addr, lp = lp->q_forw ) + { + char * const s = get_sbuf_line( lp ); + if( !s ) return false; + if( isbinary() ) nul_to_newline( s, lp->len ); + if( !regexec( pat, s, 0, 0, 0 ) == match && !set_active_node( lp ) ) + return false; + } + return true; + } + + +/* return pointer to copy of substitution template in the command buffer */ +static char * extract_subst_template( const char ** const ibufpp, + const bool isglobal ) + { + int i = 0, n = 0; + char c; + const char delimiter = **ibufpp; + + ++*ibufpp; + if( **ibufpp == '%' && (*ibufpp)[1] == delimiter ) + { + ++*ibufpp; + if( !stbuf ) set_error_msg( "No previous substitution" ); + return stbuf; + } + while( **ibufpp != delimiter ) + { + if( !resize_buffer( &stbuf, &stbufsz, i + 2 ) ) return 0; + c = stbuf[i++] = *(*ibufpp)++; + if( c == '\n' && **ibufpp == 0 ) { --i, --*ibufpp; break; } + if( c == '\\' && ( stbuf[i++] = *(*ibufpp)++ ) == '\n' && !isglobal ) + { + while( ( *ibufpp = get_tty_line( &n ) ) && + ( n == 0 || ( n > 0 && (*ibufpp)[n-1] != '\n' ) ) ) + clearerr( stdin ); + if( !*ibufpp ) return 0; + } + } + if( !resize_buffer( &stbuf, &stbufsz, i + 1 ) ) return 0; + stbuf[stlen = i] = 0; + return stbuf; + } + + +/* extract substitution tail from the command buffer */ +bool extract_subst_tail( const char ** const ibufpp, int * const gflagsp, + int * const snump, const bool isglobal ) + { + const char delimiter = **ibufpp; + + *gflagsp = *snump = 0; + if( delimiter == '\n' ) { stlen = 0; *gflagsp = GPR; return true; } + if( !extract_subst_template( ibufpp, isglobal ) ) return false; + if( **ibufpp == '\n' ) { *gflagsp = GPR; return true; } + if( **ibufpp == delimiter ) ++*ibufpp; + if( **ibufpp >= '1' && **ibufpp <= '9' ) + return parse_int( snump, *ibufpp, ibufpp ); + if( **ibufpp == 'g' ) { ++*ibufpp; *gflagsp = GSG; } + return true; + } + + +/* return the address of the next line matching a pattern in a given + direction. wrap around begin/end of editor buffer if necessary */ +int next_matching_node_addr( const char ** const ibufpp, const bool forward ) + { + const regex_t * const pat = get_compiled_pattern( ibufpp ); + int addr = current_addr(); + + if( !pat ) return -1; + do { + addr = ( forward ? inc_addr( addr ) : dec_addr( addr ) ); + if( addr ) + { + const line_t * const lp = search_line_node( addr ); + char * const s = get_sbuf_line( lp ); + if( !s ) return -1; + if( isbinary() ) nul_to_newline( s, lp->len ); + if( !regexec( pat, s, 0, 0, 0 ) ) return addr; + } + } + while( addr != current_addr() ); + set_error_msg( "No match" ); + return -1; + } + + +bool new_compiled_pattern( const char ** const ibufpp ) + { + regex_t * tpat; + + disable_interrupts(); + tpat = get_compiled_pattern( ibufpp ); + if( tpat && tpat != global_pat ) + { + if( global_pat ) { regfree( global_pat ); free( global_pat ); } + global_pat = tpat; + patlock = true; /* reserve pattern */ + } + enable_interrupts(); + return ( tpat ? true : false ); + } + + +/* modify text according to a substitution template; return offset to + end of modified text */ +static int apply_subst_template( const char * const boln, + const regmatch_t * const rm, int offset, + const int re_nsub ) + { + const char * sub = stbuf; + + for( ; sub - stbuf < stlen; ++sub ) + { + int n; + if( *sub == '&' ) + { + int j = rm[0].rm_so; int k = rm[0].rm_eo; + if( !resize_buffer( &rbuf, &rbufsz, offset + k - j ) ) return -1; + while( j < k ) rbuf[offset++] = boln[j++]; + } + else if( *sub == '\\' && *++sub >= '1' && *sub <= '9' && + ( n = *sub - '0' ) <= re_nsub ) + { + int j = rm[n].rm_so; int k = rm[n].rm_eo; + if( !resize_buffer( &rbuf, &rbufsz, offset + k - j ) ) return -1; + while( j < k ) rbuf[offset++] = boln[j++]; + } + else + { + if( !resize_buffer( &rbuf, &rbufsz, offset + 1 ) ) return -1; + rbuf[offset++] = *sub; + } + } + if( !resize_buffer( &rbuf, &rbufsz, offset + 1 ) ) return -1; + rbuf[offset] = 0; + return offset; + } + + +/* replace text matched by a pattern according to a substitution + template; return size of the modified text */ +static int replace_matching_text( const line_t * const lp, const int gflags, + const int snum ) + { + enum { se_max = 30 }; /* max subexpressions in a regular expression */ + regmatch_t rm[se_max]; + char * txt = get_sbuf_line( lp ); + const char * eot; + int i = 0, offset = 0; + bool changed = false; + + if( !txt ) return -1; + if( isbinary() ) nul_to_newline( txt, lp->len ); + eot = txt + lp->len; + if( !regexec( global_pat, txt, se_max, rm, 0 ) ) + { + int matchno = 0; + do { + if( !snum || snum == ++matchno ) + { + changed = true; i = rm[0].rm_so; + if( !resize_buffer( &rbuf, &rbufsz, offset + i ) ) return -1; + if( isbinary() ) newline_to_nul( txt, rm[0].rm_eo ); + memcpy( rbuf + offset, txt, i ); offset += i; + offset = apply_subst_template( txt, rm, offset, global_pat->re_nsub ); + if( offset < 0 ) return -1; + } + else + { + i = rm[0].rm_eo; + if( !resize_buffer( &rbuf, &rbufsz, offset + i ) ) return -1; + if( isbinary() ) newline_to_nul( txt, i ); + memcpy( rbuf + offset, txt, i ); offset += i; + } + txt += rm[0].rm_eo; + } + while( *txt && ( !changed || ( ( gflags & GSG ) && rm[0].rm_eo ) ) && + !regexec( global_pat, txt, se_max, rm, REG_NOTBOL ) ); + i = eot - txt; + if( !resize_buffer( &rbuf, &rbufsz, offset + i + 2 ) ) return -1; + if( i > 0 && !rm[0].rm_eo && ( gflags & GSG ) ) + { set_error_msg( "Infinite substitution loop" ); return -1; } + if( isbinary() ) newline_to_nul( txt, i ); + memcpy( rbuf + offset, txt, i ); + memcpy( rbuf + offset + i, "\n", 2 ); + } + return ( changed ? offset + i + 1 : 0 ); + } + + +/* for each line in a range, change text matching a pattern according to + a substitution template; return false if error */ +bool search_and_replace( const int first_addr, const int second_addr, + const int gflags, const int snum, const bool isglobal ) + { + int lc; + bool match_found = false; + + set_current_addr( first_addr - 1 ); + for( lc = 0; lc <= second_addr - first_addr; ++lc ) + { + const line_t * const lp = search_line_node( inc_current_addr() ); + const int size = replace_matching_text( lp, gflags, snum ); + if( size < 0 ) return false; + if( size ) + { + const char * txt = rbuf; + const char * const eot = rbuf + size; + undo_t * up = 0; + disable_interrupts(); + if( !delete_lines( current_addr(), current_addr(), isglobal ) ) + { enable_interrupts(); return false; } + do { + txt = put_sbuf_line( txt, size, current_addr() ); + if( !txt ) { enable_interrupts(); return false; } + if( up ) up->tail = search_line_node( current_addr() ); + else + { + up = push_undo_atom( UADD, current_addr(), current_addr() ); + if( !up ) { enable_interrupts(); return false; } + } + } + while( txt != eot ); + enable_interrupts(); + match_found = true; + } + } + if( !match_found && !( gflags & GLB ) ) + { set_error_msg( "No match" ); return false; } + return true; + } -- cgit v1.2.3