diff options
author | Petr Machata <pmachata@redhat.com> | 2013-11-11 02:24:42 +0100 |
---|---|---|
committer | Chanho Park <chanho61.park@samsung.com> | 2014-08-22 20:38:23 +0900 |
commit | e6c25f6799825812e2b87990333c649ba796f600 (patch) | |
tree | 8d68002e600f53c912433310ced3b13fb2883b95 | |
parent | f292cf4e52c73fcc3b63ba2b032d17822f63e6e8 (diff) | |
download | ltrace-e6c25f6799825812e2b87990333c649ba796f600.tar.gz ltrace-e6c25f6799825812e2b87990333c649ba796f600.tar.bz2 ltrace-e6c25f6799825812e2b87990333c649ba796f600.zip |
Support wide character strings
- "string" lens and "format" pack were extended such that using an
integer as underlying array type denotes a wide character string.
- several prototypes from wchar.h were added to libc.so.conf.
- ltrace.conf.5 was updated
-rw-r--r-- | TODO | 7 | ||||
-rw-r--r-- | etc/libc.so.conf | 32 | ||||
-rw-r--r-- | lens_default.c | 115 | ||||
-rw-r--r-- | libltrace.c | 8 | ||||
-rw-r--r-- | ltrace.conf.5 | 19 | ||||
-rw-r--r-- | printf.c | 61 |
6 files changed, 185 insertions, 57 deletions
@@ -127,6 +127,13 @@ | void func(int*, int*, +long*, long*); | | void func(in int*, in int*, out long*, out long*); | + This is useful in particular for: + + | ulong mbsrtowcs(+string(array(uint, zero(arg3))), string*, ulong, addr); | + + Where we would like to render arg2 on the way in, and arg1 on the + way out. + But sometimes we may want to see a different type on the way in and on the way out. E.g. in asprintf, what's interesting on the way in is the address, but on the way out we want to see buffer contents. diff --git a/etc/libc.so.conf b/etc/libc.so.conf index efa1b95..fa95887 100644 --- a/etc/libc.so.conf +++ b/etc/libc.so.conf @@ -43,6 +43,7 @@ string tgoto(string, int, int); # POSIX always uses pointer to the structure, so it's fine. typedef DIR = struct(int); +typedef FILE = addr; # XXX We can't represent the following portably without having either # uulong, or directly uint64_t.' @@ -400,7 +401,36 @@ addr getutent(); void setutent(); # wchar.h -int fwide(addr, int); +typedef wchar_t = string(uint); +typedef wint_t = string(int); +typedef wstring_t = string(array(uint, zero)*); + +int fwide(FILE*, int); +wint_t btowc(int); +wint_t fgetwc(FILE*); +wstring_t fgetws(+string(array(uint, zero(arg2))*), int, FILE*); +wint_t fputwc(wchar_t, FILE*); +int fputws(wstring_t, FILE*); +int fwprintf(FILE*, format(wstring_t)); +; int fwscanf(FILE *restrict, const wchar_t *restrict, ...); +wint_t getwc(FILE *); +wint_t getwchar(); +int iswalnum(wint_t); +int iswalpha(wint_t); +int iswcntrl(wint_t); +; int iswctype(wint_t, wctype_t); +int iswdigit(wint_t); +int iswgraph(wint_t); +int iswlower(wint_t); +int iswprint(wint_t); +int iswpunct(wint_t); +int iswspace(wint_t); +int iswupper(wint_t); +int iswxdigit(wint_t); +ulong mbrlen(string, ulong, addr); +ulong mbrtowc(+wchar_t*, string[arg3], ulong, addr); +int mbsinit(addr); +ulong mbsrtowcs(+string(array(uint, zero(arg3))), string*, ulong, addr); # sys/wait.h int wait(addr); diff --git a/lens_default.c b/lens_default.c index f0bd616..1e57587 100644 --- a/lens_default.c +++ b/lens_default.c @@ -123,13 +123,8 @@ acc_fprintf(int *countp, FILE *stream, const char *format, ...) } static int -format_char(FILE *stream, struct value *value, struct value_dict *arguments) +print_char(FILE *stream, int c) { - long lc; - if (value_extract_word(value, &lc, arguments) < 0) - return -1; - int c = (int)lc; - const char *fmt; switch (c) { case -1: @@ -173,13 +168,23 @@ format_char(FILE *stream, struct value *value, struct value_dict *arguments) } static int -format_naked_char(FILE *stream, struct value *value, - struct value_dict *arguments) +format_char(FILE *stream, struct value *value, struct value_dict *arguments) +{ + long lc; + if (value_extract_word(value, &lc, arguments) < 0) + return -1; + return print_char(stream, (int) lc); +} + +static int +format_naked(FILE *stream, struct value *value, + struct value_dict *arguments, + int (*what)(FILE *, struct value *, struct value_dict *)) { int written = 0; if (acc_fprintf(&written, stream, "'") < 0 || account_output(&written, - format_char(stream, value, arguments)) < 0 + what(stream, value, arguments)) < 0 || acc_fprintf(&written, stream, "'") < 0) return -1; @@ -339,7 +344,7 @@ done: * OPEN, CLOSE, DELIM are opening and closing parenthesis and element * delimiter. */ -int +static int format_array(FILE *stream, struct value *value, struct value_dict *arguments, struct expr_node *length, size_t maxlen, int before, const char *open, const char *close, const char *delim) @@ -407,7 +412,8 @@ toplevel_format_lens(struct lens *lens, FILE *stream, case ARGTYPE_CHAR: if (int_fmt == INT_FMT_default) - return format_naked_char(stream, value, arguments); + return format_naked(stream, value, arguments, + &format_char); return format_integer(stream, value, int_fmt, arguments); case ARGTYPE_FLOAT: @@ -542,6 +548,47 @@ struct lens bool_lens = { .format_cb = bool_lens_format_cb, }; +static int +redispatch_as_array(struct lens *lens, FILE *stream, + struct value *value, struct value_dict *arguments, + int (*cb)(struct lens *, FILE *, + struct value *, struct value_dict *)) +{ + struct arg_type_info info[2]; + type_init_array(&info[1], value->type->u.ptr_info.info, 0, + expr_node_zero(), 0); + type_init_pointer(&info[0], &info[1], 0); + info->lens = lens; + info->own_lens = 0; + struct value tmp; + if (value_clone(&tmp, value) < 0) + return -1; + value_set_type(&tmp, info, 0); + int ret = cb(lens, stream, &tmp, arguments); + type_destroy(&info[0]); + type_destroy(&info[1]); + value_destroy(&tmp); + return ret; +} + +static int +format_wchar(FILE *stream, struct value *value, struct value_dict *arguments) +{ + long l; + if (value_extract_word(value, &l, arguments) < 0) + return -1; + wchar_t wc = (wchar_t) l; + char buf[MB_CUR_MAX + 1]; + + int c = wctomb(buf, wc); + if (c < 0) + return -1; + if (c == 1) + return print_char(stream, buf[0]); + + buf[c] = 0; + return fprintf(stream, "%s", buf) >= 0 ? 1 : -1; +} static int string_lens_format_cb(struct lens *lens, FILE *stream, @@ -554,39 +601,39 @@ string_lens_format_cb(struct lens *lens, FILE *stream, * I suspect people are so used to the char * C idiom, * that string(char *) might actually turn up. So * let's just support it. */ - if (value->type->u.ptr_info.info->type == ARGTYPE_CHAR) { - struct arg_type_info info[2]; - type_init_array(&info[1], - value->type->u.ptr_info.info, 0, - expr_node_zero(), 0); - type_init_pointer(&info[0], &info[1], 0); - info->lens = lens; - info->own_lens = 0; - struct value tmp; - if (value_clone(&tmp, value) < 0) - return -1; - value_set_type(&tmp, info, 0); - int ret = string_lens_format_cb(lens, stream, &tmp, - arguments); - type_destroy(&info[0]); - type_destroy(&info[1]); - value_destroy(&tmp); - return ret; - } - - /* fall-through */ + switch ((int) value->type->u.ptr_info.info->type) + case ARGTYPE_CHAR: + case ARGTYPE_SHORT: + case ARGTYPE_USHORT: + case ARGTYPE_INT: + case ARGTYPE_UINT: + case ARGTYPE_LONG: + case ARGTYPE_ULONG: + return redispatch_as_array(lens, stream, value, + arguments, + &string_lens_format_cb); + + /* Otherwise dispatch to whatever the default for the + * pointee is--most likely this will again be us. */ + /* Fall through. */ case ARGTYPE_VOID: case ARGTYPE_FLOAT: case ARGTYPE_DOUBLE: case ARGTYPE_STRUCT: + return toplevel_format_lens(lens, stream, value, + arguments, INT_FMT_default); + case ARGTYPE_SHORT: case ARGTYPE_INT: case ARGTYPE_LONG: case ARGTYPE_USHORT: case ARGTYPE_UINT: case ARGTYPE_ULONG: - return toplevel_format_lens(lens, stream, value, - arguments, INT_FMT_default); + if (value->parent != NULL && value->type->lens == NULL) + return format_wchar(stream, value, arguments); + else + return format_naked(stream, value, arguments, + &format_wchar); case ARGTYPE_CHAR: return format_char(stream, value, arguments); diff --git a/libltrace.c b/libltrace.c index d43a9b2..a8dd61e 100644 --- a/libltrace.c +++ b/libltrace.c @@ -21,10 +21,11 @@ #include "config.h" -#include <limits.h> #include <sys/param.h> #include <sys/wait.h> #include <errno.h> +#include <limits.h> +#include <locale.h> #include <signal.h> #include <stdio.h> #include <stdlib.h> @@ -99,7 +100,10 @@ normal_exit(void) } void -ltrace_init(int argc, char **argv) { +ltrace_init(int argc, char **argv) +{ + setlocale(LC_ALL, ""); + struct opt_p_t *opt_p_tmp; atexit(normal_exit); diff --git a/ltrace.conf.5 b/ltrace.conf.5 index 957fe8b..bdf0ceb 100644 --- a/ltrace.conf.5 +++ b/ltrace.conf.5 @@ -1,5 +1,5 @@ .\" -*-nroff-*- -.\" Copyright (c) 2012 Petr Machata, Red Hat Inc. +.\" Copyright (c) 2012, 2013 Petr Machata, Red Hat Inc. .\" Copyright (c) 1997-2005 Juan Cespedes <cespedes@debian.org> .\" .\" This program is free software; you can redistribute it and/or @@ -171,13 +171,16 @@ pointer to 256-bit bit vector. .RS The first form of the argument is canonical, the latter two are syntactic sugar. In the canonical form, the function argument is -formatted as string. The \fITYPE\fR shall be either a \fBchar*\fR, or -\fBarray(char,\fIEXPR\fB)\fR, or \fBarray(char,\fIEXPR\fB)*\fR. If an -array is given, the length will typically be a \fBzero\fR expression -(but doesn't have to be). Using argument that is plain array -(i.e. not a pointer to array) makes sense e.g. in C structs, in cases -like \fBstruct(string(array(char, \fR6\fB)))\fR, which describes the C -type \fBstruct {char \fRs\fB[\fR6\fB];}\fR. +formatted as string. The \fITYPE\fR can have either of the following +forms: \fIX\fB*\fR, or \fBarray(\fIX\fB,\fIEXPR\fB)\fR, or +\fBarray(\fIX\fB,\fIEXPR\fB)*\fR. \fIX\fR is either \fBchar\fR for +normal strings, or an integer type for wide-character strings. + +If an array is given, the length will typically be a \fBzero\fR +expression (but doesn't have to be). Using argument that is plain +array (i.e. not a pointer to array) makes sense e.g. in C structs, in +cases like \fBstruct(string(array(char, \fR6\fB)))\fR, which describes +the C type \fBstruct {char \fRs\fB[\fR6\fB];}\fR. Because simple C-like strings are pretty common, there are two shorthand forms. The first shorthand form (with brackets) means the @@ -22,7 +22,9 @@ */ #include <assert.h> +#include <stdint.h> #include <stdlib.h> +#include <string.h> #include "printf.h" #include "type.h" @@ -39,6 +41,7 @@ struct param_enum { char *format; char const *ptr; char const *end; + size_t width; }; static struct param_enum * @@ -47,12 +50,30 @@ param_printf_init(struct value *cb_args, size_t nargs, { assert(nargs == 1); - /* We expect a char array pointer. */ + struct process *proc = cb_args[0].inferior; + assert(proc != NULL); + + /* We expect a pointer to array. */ if (cb_args->type->type != ARGTYPE_POINTER - || cb_args->type->u.ptr_info.info->type != ARGTYPE_ARRAY - || (cb_args->type->u.ptr_info.info->u.array_info.elt_type->type - != ARGTYPE_CHAR)) + || cb_args->type->u.ptr_info.info->type != ARGTYPE_ARRAY) + return NULL; + + /* The element type should be either character (for narrow + * strings) or an integral type (for wide strings). */ + struct arg_type_info *et + = cb_args->type->u.ptr_info.info->u.array_info.elt_type; + switch (et->type) { + case ARGTYPE_CHAR: + case ARGTYPE_SHORT: + case ARGTYPE_USHORT: + case ARGTYPE_INT: + case ARGTYPE_UINT: + case ARGTYPE_LONG: + case ARGTYPE_ULONG: + break; + default: return NULL; + } struct param_enum *self = malloc(sizeof(*self)); if (self == NULL) { @@ -60,10 +81,12 @@ param_printf_init(struct value *cb_args, size_t nargs, free(self); return NULL; } + self->width = type_sizeof(proc, et); + if (self->width == (size_t) -1) + goto fail; if (value_init_deref(&self->array, cb_args) < 0) goto fail; - assert(self->array.type->type == ARGTYPE_ARRAY); self->format = (char *)value_get_data(&self->array, arguments); @@ -189,14 +212,29 @@ param_printf_next(struct param_enum *self, struct arg_type_info *infop, size_t len_buf_len = 0; struct lens *lens = NULL; - for (; self->ptr < self->end; ++self->ptr) { + for (; self->ptr < self->end; self->ptr += self->width) { + union { + uint8_t u8; + uint16_t u16; + uint32_t u32; + uint64_t u64; + char buf[0]; + } u; + memcpy(u.buf, self->ptr, self->width); + switch (self->width) { + case 1: u.u64 = u.u8; break; + case 2: u.u64 = u.u16; break; + case 4: u.u64 = u.u32; break; + } + uint64_t c = u.u64; + if (!self->percent) { - if (*self->ptr == '%') + if (c == '%') self->percent = 1; continue; } - switch (*self->ptr) { + switch (c) { case '#': case ' ': case '-': case '+': case 'I': case '\'': /* These are only important for formatting, @@ -214,7 +252,7 @@ param_printf_next(struct param_enum *self, struct arg_type_info *infop, = malloc(sizeof(*self->future_length)); if (self->future_length != NULL) { - ++self->ptr; + self->ptr += self->width; format_type = ARGTYPE_INT; break; } @@ -227,7 +265,7 @@ param_printf_next(struct param_enum *self, struct arg_type_info *infop, * this to attach the appropriate string * length expression. */ if (len_buf_len < sizeof(len_buf) - 1) - len_buf[len_buf_len++] = *self->ptr; + len_buf[len_buf_len++] = c; continue; case 'h': @@ -299,8 +337,7 @@ param_printf_next(struct param_enum *self, struct arg_type_info *infop, lng++; case 's': format_type = ARGTYPE_ARRAY; - /* XXX "ls" means wchar_t string. */ - elt_type = ARGTYPE_CHAR; + elt_type = lng == 0 ? ARGTYPE_CHAR : ARGTYPE_INT; self->percent = 0; lens = &string_lens; break; |