From 55ebcc965ffd6e627782e7ec5ef1f72d44a8b1fb Mon Sep 17 00:00:00 2001 From: Michael Schroeder Date: Wed, 17 Jul 2013 13:49:13 +0200 Subject: support replchar parameter in (currently unused) solv_replacebadutf8 --- src/util.c | 48 +++++++++++++++++++++++++++++++++++++++++------- src/util.h | 2 +- 2 files changed, 42 insertions(+), 8 deletions(-) diff --git a/src/util.c b/src/util.c index 4a24096..d8ae7ca 100644 --- a/src/util.c +++ b/src/util.c @@ -322,12 +322,37 @@ solv_latin1toutf8(const char *buf) } char * -solv_replacebadutf8(const char *buf) +solv_replacebadutf8(const char *buf, int replchar) { size_t l, nl; const char *p; char *r = 0, *rp = 0; + int repllen, replin; + if (replchar < 0 || replchar > 0x10ffff) + replchar = 0xfffd; + if (!replchar) + repllen = replin = 0; + else if (replchar < 0x80) + { + repllen = 1; + replin = (replchar & 0x40) | 0x80; + } + else if (replchar < 0x800) + { + repllen = 2; + replin = 0x40; + } + else if (replchar < 0x10000) + { + repllen = 3; + replin = 0x60; + } + else + { + repllen = 4; + replin = 0x70; + } for (;;) { for (p = buf, nl = 0; *p; ) @@ -342,14 +367,23 @@ solv_replacebadutf8(const char *buf) p += l; if (!*p) break; - /* found a bad char, replace with 0xfffd */ - if (rp) + /* found a bad char, replace with replchar */ + if (rp && replchar) { - *rp++ = 0xef; - *rp++ = 0xbf; - *rp++ = 0xbd; + switch (repllen) + { + case 4: + *rp++ = (replchar >> 18 & 0x3f) | 0x80; + case 3: + *rp++ = (replchar >> 12 & 0x3f) | 0x80; + case 2: + *rp++ = (replchar >> 6 & 0x3f) | 0x80; + default: + *rp++ = (replchar & 0x3f) | 0x80; + } + rp[-repllen] ^= replin; } - nl += 3; + nl += repllen; p++; while ((*(const unsigned char *)p & 0xc0) == 0x80) p++; diff --git a/src/util.h b/src/util.h index 0c15d95..2f2f096 100644 --- a/src/util.h +++ b/src/util.h @@ -40,7 +40,7 @@ extern int solv_hex2bin(const char **strp, unsigned char *buf, int bufl); extern char *solv_bin2hex(const unsigned char *buf, int l, char *str); extern size_t solv_validutf8(const char *buf); extern char *solv_latin1toutf8(const char *buf); -extern char *solv_replacebadutf8(const char *buf); +extern char *solv_replacebadutf8(const char *buf, int replchar); static inline void *solv_extend(void *buf, size_t len, size_t nmemb, size_t size, size_t block) -- cgit v1.2.3