/* HTTP URI handling Copyright (C) 1999-2000, Joe Orton This library is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public License for more details. You should have received a copy of the GNU Library General Public License along with this library; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA Id: uri.c,v 1.7 2000/05/10 16:47:06 joe Exp */ #include #ifdef HAVE_STRING_H #include #endif #ifdef HAVE_UNISTD_H #include #endif #ifdef HAVE_STDLIB_H #include #endif #include #include #include "http_utils.h" /* for 'min' */ #include "string_utils.h" /* for CONCAT3 */ #include "uri.h" char *uri_parent( const char *uri ) { const char *pnt; char *ret; pnt = uri+strlen(uri)-1; while( *(--pnt) != '/' && pnt >= uri ) /* noop */; if( pnt < uri ) { /* not a valid absPath */ return NULL; } /* uri * V * |---| * /foo/bar/ */ ret = xmalloc( (pnt - uri) + 2 ); memcpy( ret, uri, (pnt - uri) + 1 ); ret[1+(pnt-uri)] = '\0'; pnt++; return ret; } int uri_has_trailing_slash( const char *uri ) { return (uri[strlen(uri)-1] == '/'); } const char *uri_abspath( const char *uri ) { const char *ret; /* Look for the scheme: */ ret = strstr( uri, "://" ); if( ret == NULL ) { /* No scheme */ ret = uri; } else { /* Look for the abs_path */ ret = strchr( ret+3, '/' ); if( ret == NULL ) { /* Uh-oh */ ret = uri; } } return ret; } /* TODO: not a proper URI parser */ int uri_parse( const char *uri, struct uri *parsed, const struct uri *defaults ) { const char *pnt, *slash, *colon; parsed->port = -1; parsed->host = NULL; parsed->path = NULL; parsed->scheme = NULL; pnt = strstr( uri, "://" ); if( pnt ) { parsed->scheme = xstrndup( uri, pnt - uri ); pnt += 3; /* start of hostport segment */ slash = strchr( pnt, '/' ); colon = strchr( pnt, ':' ); if( slash == NULL ) { parsed->path = xstrdup( "/" ); if( colon == NULL ) { if( defaults ) parsed->port = defaults->port; parsed->host = xstrdup( pnt ); } else { parsed->port = atoi(colon+1); parsed->host = xstrndup( pnt, colon - pnt ); } } else { if( colon == NULL || colon > slash ) { /* No port segment */ if( defaults ) parsed->port = defaults->port; parsed->host = xstrndup( pnt, slash - pnt ); } else { /* Port segment */ parsed->port = atoi( colon + 1 ); parsed->host = xstrndup( pnt, colon - pnt ); } parsed->path = xstrdup( slash ); } } else { if( defaults && defaults->scheme != NULL ) { parsed->scheme = xstrdup( defaults->scheme ); } if( defaults && defaults->host != NULL ) { parsed->host = xstrdup( defaults->host ); } if( defaults ) parsed->port = defaults->port; parsed->path = xstrdup(uri); } return 0; } void uri_free( struct uri *uri ) { HTTP_FREE( uri->host ); HTTP_FREE( uri->path ); HTTP_FREE( uri->scheme ); } /* Returns an absoluteURI */ char *uri_absolute( const char *uri, const char *scheme, const char *hostport ) { char *ret; /* Is it absolute already? */ if( strncmp( uri, scheme, strlen( scheme ) ) == 0 ) { /* Yes it is */ ret = xstrdup( uri ); } else { /* Oh no it isn't */ CONCAT3( ret, scheme, hostport, uri ); } return ret; } /* Un-escapes a URI. Returns xmalloc-allocated URI */ char *uri_unescape( const char *uri ) { const char *pnt; char *ret, *retpos, buf[5] = { "0x00\0" }; retpos = ret = xmalloc( strlen( uri ) + 1 ); for( pnt = uri; *pnt != '\0'; pnt++ ) { if( *pnt == '%' ) { if( !isxdigit((unsigned char) pnt[1]) || !isxdigit((unsigned char) pnt[2]) ) { /* Invalid URI */ return NULL; } buf[2] = *++pnt; buf[3] = *++pnt; /* bit faster than memcpy */ *retpos++ = strtol( buf, NULL, 16 ); } else { *retpos++ = *pnt; } } *retpos = '\0'; return ret; } /* RFC2396 spake: * "Data must be escaped if it does not have a representation * using an unreserved character". * ...where... * unreserved = alphanum | mark * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")" * * We need also to skip reserved characters * reserved = ";" | "/" | "?" | ":" | "@" | "&" | * "=" | "+" | "$" | "," */ /* Lookup table: * 1 marks an RESERVED character. 2 marks a UNRESERVED character. * 0 marks everything else. */ #define RE 1 #define UN 2 static const short uri_chars[128] = { /* 0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 16 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 32 */ 0, UN, 0, 0, RE, 0, RE, UN, UN, UN, UN, RE, RE, UN, UN, RE, /* 48 */ UN, UN, UN, UN, UN, UN, UN, UN, UN, UN, RE, RE, 0, RE, 0, RE, /* 64 */ RE, UN, UN, UN, UN, UN, UN, UN, UN, UN, UN, UN, UN, UN, UN, UN, /* 80 */ UN, UN, UN, UN, UN, UN, UN, UN, UN, UN, UN, 0, 0, 0, 0, UN, /* 96 */ 0, UN, UN, UN, UN, UN, UN, UN, UN, UN, UN, UN, UN, UN, UN, UN, /* 112 */ UN, UN, UN, UN, UN, UN, UN, UN, UN, UN, UN, 0, 0, 0, UN, 0 }; #undef RE #undef UN /* Escapes the abspath segment of a URI. * Returns xmalloc-allocated string. */ char *uri_abspath_escape( const char *abs_path ) { const char *pnt; char *ret, *retpos; /* Rather than mess about growing the buffer, allocate as much as * the URI could possibly need, i.e. every character gets %XX * escaped. Hence 3 times input size. */ retpos = ret = xmalloc( strlen( abs_path ) * 3 + 1 ); for( pnt = abs_path; *pnt != '\0'; pnt++ ) { /* Escape it: * - if it isn't 7-bit * - if it is a reserved character (but ignore '/') * - otherwise, if it is not an unreserved character * (note, there are many characters that are neither reserved * nor unreserved) */ if( *pnt<0 || (uri_chars[(int) *pnt] < 2 && *pnt!='/' )) { /* Escape it - % */ /* FIXME: Could overflow buffer with uri_abspath_escape("%") */ sprintf( retpos, "%%%02x", (unsigned char) *pnt ); retpos += 3; } else { /* It's cool */ *retpos++ = *pnt; } } *retpos = '\0'; return ret; } /* TODO: implement properly */ int uri_compare( const char *a, const char *b ) { int ret = strcasecmp( a, b ); if( ret ) { /* TODO: joe: I'm not 100% sure this is logically sound. * It feels right, though */ int traila = uri_has_trailing_slash( a ), trailb = uri_has_trailing_slash( b ), lena = strlen(a), lenb = strlen(b); if( traila != trailb && abs(lena - lenb) == 1) { /* They are the same length, apart from one has a trailing * slash and the other doesn't. */ if( strncasecmp( a, b, min(lena, lenb) ) == 0 ) ret = 0; } } return ret; } /* Hrm, well, this is kind of not very generic over URI schemes, but wth */ int uri_childof( const char *parent, const char *child ) { char *root = xstrdup(child); int ret; if( strlen(parent) >= strlen(child) ) { ret = 0; } else { /* root is the first of child, equal to length of parent */ root[strlen(parent)] = '\0'; ret = (uri_compare( parent, root ) == 0 ); } free( root ); return ret; } #ifdef URITEST int main( int argc, char *argv[] ) { char *tmp; if( argc<2 || argc>3 ) { printf( "doh. usage:\nuritest uria [urib]\n" "e.g. uritest \"/this/is/a silly/but/hey\"\n" ); exit(-1); } if( argv[2] ) { printf( "uri_compare: %s with %s: %s\n", argv[1], argv[2], uri_compare(argv[1], argv[2])==0?"true":"false" ); } else { printf( "Input URI: %s\n", argv[1] ); tmp = uri_abspath_escape( argv[1] ); printf( "Encoded: %s\n", tmp ); printf( "Decoded: %s\n", uri_unescape( tmp ) ); } return 0; } #endif /* URITEST */