summaryrefslogtreecommitdiff
path: root/regex.c
diff options
context:
space:
mode:
authorAnas Nashif <anas.nashif@intel.com>2012-11-06 22:56:34 -0800
committerAnas Nashif <anas.nashif@intel.com>2012-11-06 22:56:34 -0800
commita95df6d374a9be6fa8cee82be6ff1fb51a3c8593 (patch)
treed9e3755a2062db09de25982bc0d33b3eab272275 /regex.c
downloaded-a95df6d374a9be6fa8cee82be6ff1fb51a3c8593.tar.gz
ed-a95df6d374a9be6fa8cee82be6ff1fb51a3c8593.tar.bz2
ed-a95df6d374a9be6fa8cee82be6ff1fb51a3c8593.zip
Imported Upstream version 1.6upstream/1.6
Diffstat (limited to 'regex.c')
-rw-r--r--regex.c403
1 files changed, 403 insertions, 0 deletions
diff --git a/regex.c b/regex.c
new file mode 100644
index 0000000..d66b20a
--- /dev/null
+++ b/regex.c
@@ -0,0 +1,403 @@
+/* regex.c: regular expression interface routines for the ed line editor. */
+/* GNU ed - The GNU line editor.
+ Copyright (C) 1993, 1994 Andrew Moore, Talke Studio
+ Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012
+ Free Software Foundation, Inc.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include <stddef.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <regex.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "ed.h"
+
+
+static regex_t * global_pat = 0;
+static bool patlock = false; /* if set, pattern not freed by get_compiled_pattern */
+
+static char * stbuf = 0; /* substitution template buffer */
+static int stbufsz = 0; /* substitution template buffer size */
+static int stlen = 0; /* substitution template length */
+
+static char * rbuf = 0; /* replace_matching_text buffer */
+static int rbufsz = 0; /* replace_matching_text buffer size */
+
+
+bool prev_pattern( void ) { return global_pat != 0; }
+
+
+/* translate characters in a string */
+static void translit_text( char * p, int len, const char from, const char to )
+ {
+ while( --len >= 0 )
+ {
+ if( *p == from ) *p = to;
+ ++p;
+ }
+ }
+
+
+/* overwrite newlines with ASCII NULs */
+static void newline_to_nul( char * const s, const int len )
+ { translit_text( s, len, '\n', '\0' ); }
+
+/* overwrite ASCII NULs with newlines */
+static void nul_to_newline( char * const s, const int len )
+ { translit_text( s, len, '\0', '\n' ); }
+
+
+/* expand a POSIX character class */
+static const char * parse_char_class( const char * p )
+ {
+ char c, d;
+
+ if( *p == '^' ) ++p;
+ if( *p == ']' ) ++p;
+ for( ; *p != ']' && *p != '\n'; ++p )
+ if( *p == '[' && ( ( d = p[1] ) == '.' || d == ':' || d == '=' ) )
+ for( ++p, c = *++p; *p != ']' || c != d; ++p )
+ if( ( c = *p ) == '\n' )
+ return 0;
+ return ( ( *p == ']' ) ? p : 0 );
+ }
+
+
+/* copy a pattern string from the command buffer; return pointer to the copy */
+static char * extract_pattern( const char ** const ibufpp, const char delimiter )
+ {
+ static char * buf = 0;
+ static int bufsz = 0;
+ const char * nd = *ibufpp;
+ int len;
+
+ while( *nd != delimiter && *nd != '\n' )
+ {
+ if( *nd == '[' )
+ {
+ nd = parse_char_class( ++nd );
+ if( !nd ) { set_error_msg( "Unbalanced brackets ([])" ); return 0; }
+ }
+ else if( *nd == '\\' && *++nd == '\n' )
+ { set_error_msg( "Trailing backslash (\\)" ); return 0; }
+ ++nd;
+ }
+ len = nd - *ibufpp;
+ if( !resize_buffer( &buf, &bufsz, len + 1 ) ) return 0;
+ memcpy( buf, *ibufpp, len );
+ buf[len] = 0;
+ *ibufpp = nd;
+ if( isbinary() ) nul_to_newline( buf, len );
+ return buf;
+ }
+
+
+/* return pointer to compiled pattern from command buffer */
+static regex_t * get_compiled_pattern( const char ** const ibufpp )
+ {
+ static regex_t * exp = 0;
+ const char * exps;
+ const char delimiter = **ibufpp;
+ int n;
+
+ if( delimiter == ' ' )
+ { set_error_msg( "Invalid pattern delimiter" ); return 0; }
+ if( delimiter == '\n' || *++*ibufpp == '\n' || **ibufpp == delimiter )
+ {
+ if( !exp ) set_error_msg( "No previous pattern" );
+ return exp;
+ }
+ exps = extract_pattern( ibufpp, delimiter );
+ if( !exps ) return 0;
+ /* buffer alloc'd && not reserved */
+ if( exp && !patlock ) regfree( exp );
+ else
+ {
+ exp = (regex_t *) malloc( sizeof (regex_t) );
+ if( !exp )
+ {
+ show_strerror( 0, errno );
+ set_error_msg( "Memory exhausted" );
+ return 0;
+ }
+ }
+ patlock = false;
+ n = regcomp( exp, exps, 0 );
+ if( n )
+ {
+ char buf[80];
+ regerror( n, exp, buf, sizeof buf );
+ set_error_msg( buf );
+ free( exp );
+ exp = 0;
+ }
+ return exp;
+ }
+
+
+/* add line matching a pattern to the global-active list */
+bool build_active_list( const char ** const ibufpp, const int first_addr,
+ const int second_addr, const bool match )
+ {
+ const regex_t * pat;
+ const line_t * lp;
+ int addr;
+ const char delimiter = **ibufpp;
+
+ if( delimiter == ' ' || delimiter == '\n' )
+ { set_error_msg( "Invalid pattern delimiter" ); return false; }
+ pat = get_compiled_pattern( ibufpp );
+ if( !pat ) return false;
+ if( **ibufpp == delimiter ) ++*ibufpp;
+ clear_active_list();
+ lp = search_line_node( first_addr );
+ for( addr = first_addr; addr <= second_addr; ++addr, lp = lp->q_forw )
+ {
+ char * const s = get_sbuf_line( lp );
+ if( !s ) return false;
+ if( isbinary() ) nul_to_newline( s, lp->len );
+ if( !regexec( pat, s, 0, 0, 0 ) == match && !set_active_node( lp ) )
+ return false;
+ }
+ return true;
+ }
+
+
+/* return pointer to copy of substitution template in the command buffer */
+static char * extract_subst_template( const char ** const ibufpp,
+ const bool isglobal )
+ {
+ int i = 0, n = 0;
+ char c;
+ const char delimiter = **ibufpp;
+
+ ++*ibufpp;
+ if( **ibufpp == '%' && (*ibufpp)[1] == delimiter )
+ {
+ ++*ibufpp;
+ if( !stbuf ) set_error_msg( "No previous substitution" );
+ return stbuf;
+ }
+ while( **ibufpp != delimiter )
+ {
+ if( !resize_buffer( &stbuf, &stbufsz, i + 2 ) ) return 0;
+ c = stbuf[i++] = *(*ibufpp)++;
+ if( c == '\n' && **ibufpp == 0 ) { --i, --*ibufpp; break; }
+ if( c == '\\' && ( stbuf[i++] = *(*ibufpp)++ ) == '\n' && !isglobal )
+ {
+ while( ( *ibufpp = get_tty_line( &n ) ) &&
+ ( n == 0 || ( n > 0 && (*ibufpp)[n-1] != '\n' ) ) )
+ clearerr( stdin );
+ if( !*ibufpp ) return 0;
+ }
+ }
+ if( !resize_buffer( &stbuf, &stbufsz, i + 1 ) ) return 0;
+ stbuf[stlen = i] = 0;
+ return stbuf;
+ }
+
+
+/* extract substitution tail from the command buffer */
+bool extract_subst_tail( const char ** const ibufpp, int * const gflagsp,
+ int * const snump, const bool isglobal )
+ {
+ const char delimiter = **ibufpp;
+
+ *gflagsp = *snump = 0;
+ if( delimiter == '\n' ) { stlen = 0; *gflagsp = GPR; return true; }
+ if( !extract_subst_template( ibufpp, isglobal ) ) return false;
+ if( **ibufpp == '\n' ) { *gflagsp = GPR; return true; }
+ if( **ibufpp == delimiter ) ++*ibufpp;
+ if( **ibufpp >= '1' && **ibufpp <= '9' )
+ return parse_int( snump, *ibufpp, ibufpp );
+ if( **ibufpp == 'g' ) { ++*ibufpp; *gflagsp = GSG; }
+ return true;
+ }
+
+
+/* return the address of the next line matching a pattern in a given
+ direction. wrap around begin/end of editor buffer if necessary */
+int next_matching_node_addr( const char ** const ibufpp, const bool forward )
+ {
+ const regex_t * const pat = get_compiled_pattern( ibufpp );
+ int addr = current_addr();
+
+ if( !pat ) return -1;
+ do {
+ addr = ( forward ? inc_addr( addr ) : dec_addr( addr ) );
+ if( addr )
+ {
+ const line_t * const lp = search_line_node( addr );
+ char * const s = get_sbuf_line( lp );
+ if( !s ) return -1;
+ if( isbinary() ) nul_to_newline( s, lp->len );
+ if( !regexec( pat, s, 0, 0, 0 ) ) return addr;
+ }
+ }
+ while( addr != current_addr() );
+ set_error_msg( "No match" );
+ return -1;
+ }
+
+
+bool new_compiled_pattern( const char ** const ibufpp )
+ {
+ regex_t * tpat;
+
+ disable_interrupts();
+ tpat = get_compiled_pattern( ibufpp );
+ if( tpat && tpat != global_pat )
+ {
+ if( global_pat ) { regfree( global_pat ); free( global_pat ); }
+ global_pat = tpat;
+ patlock = true; /* reserve pattern */
+ }
+ enable_interrupts();
+ return ( tpat ? true : false );
+ }
+
+
+/* modify text according to a substitution template; return offset to
+ end of modified text */
+static int apply_subst_template( const char * const boln,
+ const regmatch_t * const rm, int offset,
+ const int re_nsub )
+ {
+ const char * sub = stbuf;
+
+ for( ; sub - stbuf < stlen; ++sub )
+ {
+ int n;
+ if( *sub == '&' )
+ {
+ int j = rm[0].rm_so; int k = rm[0].rm_eo;
+ if( !resize_buffer( &rbuf, &rbufsz, offset + k - j ) ) return -1;
+ while( j < k ) rbuf[offset++] = boln[j++];
+ }
+ else if( *sub == '\\' && *++sub >= '1' && *sub <= '9' &&
+ ( n = *sub - '0' ) <= re_nsub )
+ {
+ int j = rm[n].rm_so; int k = rm[n].rm_eo;
+ if( !resize_buffer( &rbuf, &rbufsz, offset + k - j ) ) return -1;
+ while( j < k ) rbuf[offset++] = boln[j++];
+ }
+ else
+ {
+ if( !resize_buffer( &rbuf, &rbufsz, offset + 1 ) ) return -1;
+ rbuf[offset++] = *sub;
+ }
+ }
+ if( !resize_buffer( &rbuf, &rbufsz, offset + 1 ) ) return -1;
+ rbuf[offset] = 0;
+ return offset;
+ }
+
+
+/* replace text matched by a pattern according to a substitution
+ template; return size of the modified text */
+static int replace_matching_text( const line_t * const lp, const int gflags,
+ const int snum )
+ {
+ enum { se_max = 30 }; /* max subexpressions in a regular expression */
+ regmatch_t rm[se_max];
+ char * txt = get_sbuf_line( lp );
+ const char * eot;
+ int i = 0, offset = 0;
+ bool changed = false;
+
+ if( !txt ) return -1;
+ if( isbinary() ) nul_to_newline( txt, lp->len );
+ eot = txt + lp->len;
+ if( !regexec( global_pat, txt, se_max, rm, 0 ) )
+ {
+ int matchno = 0;
+ do {
+ if( !snum || snum == ++matchno )
+ {
+ changed = true; i = rm[0].rm_so;
+ if( !resize_buffer( &rbuf, &rbufsz, offset + i ) ) return -1;
+ if( isbinary() ) newline_to_nul( txt, rm[0].rm_eo );
+ memcpy( rbuf + offset, txt, i ); offset += i;
+ offset = apply_subst_template( txt, rm, offset, global_pat->re_nsub );
+ if( offset < 0 ) return -1;
+ }
+ else
+ {
+ i = rm[0].rm_eo;
+ if( !resize_buffer( &rbuf, &rbufsz, offset + i ) ) return -1;
+ if( isbinary() ) newline_to_nul( txt, i );
+ memcpy( rbuf + offset, txt, i ); offset += i;
+ }
+ txt += rm[0].rm_eo;
+ }
+ while( *txt && ( !changed || ( ( gflags & GSG ) && rm[0].rm_eo ) ) &&
+ !regexec( global_pat, txt, se_max, rm, REG_NOTBOL ) );
+ i = eot - txt;
+ if( !resize_buffer( &rbuf, &rbufsz, offset + i + 2 ) ) return -1;
+ if( i > 0 && !rm[0].rm_eo && ( gflags & GSG ) )
+ { set_error_msg( "Infinite substitution loop" ); return -1; }
+ if( isbinary() ) newline_to_nul( txt, i );
+ memcpy( rbuf + offset, txt, i );
+ memcpy( rbuf + offset + i, "\n", 2 );
+ }
+ return ( changed ? offset + i + 1 : 0 );
+ }
+
+
+/* for each line in a range, change text matching a pattern according to
+ a substitution template; return false if error */
+bool search_and_replace( const int first_addr, const int second_addr,
+ const int gflags, const int snum, const bool isglobal )
+ {
+ int lc;
+ bool match_found = false;
+
+ set_current_addr( first_addr - 1 );
+ for( lc = 0; lc <= second_addr - first_addr; ++lc )
+ {
+ const line_t * const lp = search_line_node( inc_current_addr() );
+ const int size = replace_matching_text( lp, gflags, snum );
+ if( size < 0 ) return false;
+ if( size )
+ {
+ const char * txt = rbuf;
+ const char * const eot = rbuf + size;
+ undo_t * up = 0;
+ disable_interrupts();
+ if( !delete_lines( current_addr(), current_addr(), isglobal ) )
+ { enable_interrupts(); return false; }
+ do {
+ txt = put_sbuf_line( txt, size, current_addr() );
+ if( !txt ) { enable_interrupts(); return false; }
+ if( up ) up->tail = search_line_node( current_addr() );
+ else
+ {
+ up = push_undo_atom( UADD, current_addr(), current_addr() );
+ if( !up ) { enable_interrupts(); return false; }
+ }
+ }
+ while( txt != eot );
+ enable_interrupts();
+ match_found = true;
+ }
+ }
+ if( !match_found && !( gflags & GLB ) )
+ { set_error_msg( "No match" ); return false; }
+ return true;
+ }