summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorH. Peter Anvin <hpa@zytor.com>2008-06-01 16:07:48 -0700
committerH. Peter Anvin <hpa@zytor.com>2008-06-01 16:07:48 -0700
commit7f2f8b35e6b8ea67fe620f3363b5eaa06f55a222 (patch)
treec555df26a518a0f34d7fdf7c18759fd25866afa1
parent311d27d328984a4b87aeed2d84de291c8960bb0f (diff)
downloadnasm-7f2f8b35e6b8ea67fe620f3363b5eaa06f55a222.tar.gz
nasm-7f2f8b35e6b8ea67fe620f3363b5eaa06f55a222.tar.bz2
nasm-7f2f8b35e6b8ea67fe620f3363b5eaa06f55a222.zip
qstring: add nasm_unquote() supporting `...`
Add a nasm_unquote() function supporting the intended `...` syntax.
-rw-r--r--nasmlib.c205
1 files changed, 205 insertions, 0 deletions
diff --git a/nasmlib.c b/nasmlib.c
index 8e56bf4..8cd41cf 100644
--- a/nasmlib.c
+++ b/nasmlib.c
@@ -950,6 +950,211 @@ void nasm_quote(char **str)
*str = p;
}
+static char *emit_utf8(char *q, int32_t v)
+{
+ if (v < 0) {
+ /* Impossible - do nothing */
+ } else if (v <= 0x7f) {
+ *q++ = v;
+ } else if (v <= 0x000007ff) {
+ *q++ = 0xc0 | (v >> 6);
+ *q++ = 0x80 | (v & 63);
+ } else if (v <= 0x0000ffff) {
+ *q++ = 0xe0 | (v >> 12);
+ *q++ = 0x80 | ((v >> 6) & 63);
+ *q++ = 0x80 | (v & 63);
+ } else if (v <= 0x001fffff) {
+ *q++ = 0xf0 | (v >> 18);
+ *q++ = 0x80 | ((v >> 12) & 63);
+ *q++ = 0x80 | ((v >> 6) & 63);
+ *q++ = 0x80 | (v & 63);
+ } else if (v <= 0x03ffffff) {
+ *q++ = 0xf8 | (v >> 24);
+ *q++ = 0x80 | ((v >> 18) & 63);
+ *q++ = 0x80 | ((v >> 12) & 63);
+ *q++ = 0x80 | ((v >> 6) & 63);
+ *q++ = 0x80 | (v & 63);
+ } else {
+ *q++ = 0xfc | (v >> 30);
+ *q++ = 0x80 | ((v >> 24) & 63);
+ *q++ = 0x80 | ((v >> 18) & 63);
+ *q++ = 0x80 | ((v >> 12) & 63);
+ *q++ = 0x80 | ((v >> 6) & 63);
+ *q++ = 0x80 | (v & 63);
+ }
+ return q;
+}
+
+/*
+ * Do an *in-place* dequoting of the specified string, returning the
+ * resulting length (which may be containing embedded nulls.)
+ *
+ * In-place replacement is possible since the unquoted length is always
+ * shorter than or equal to the quoted length.
+ */
+size_t nasm_unquote(char *str)
+{
+ size_t ln;
+ char bq, eq;
+ char *p, *q, *ep, *escp;
+ char c;
+ enum unq_state {
+ st_start,
+ st_backslash,
+ st_hex,
+ st_oct,
+ st_ucs,
+ } state;
+ int ndig = 0;
+ int32_t nval = 0;
+
+ bq = str[0];
+ if (!bq)
+ return 0;
+ ln = strlen(str);
+ eq = str[ln-1];
+
+ if ((bq == '\'' || bq == '\"') && bq == eq) {
+ /* '...' or "..." string */
+ memmove(str, str+1, ln-2);
+ str[ln-2] = '\0';
+ return ln-2;
+ }
+ if (bq == '`' || eq == '`') {
+ /* `...` string */
+ q = str;
+ p = str+1;
+ ep = str+ln-1;
+ state = st_start;
+
+ while (p < ep) {
+ c = *p++;
+ switch (state) {
+ case st_start:
+ if (c == '\\')
+ state = st_backslash;
+ else
+ *q++ = c;
+ break;
+
+ case st_backslash:
+ state = st_start;
+ escp = p-1;
+ switch (c) {
+ case 'a':
+ *q++ = 7;
+ break;
+ case 'b':
+ *q++ = 8;
+ break;
+ case 'e':
+ *q++ = 27;
+ break;
+ case 'f':
+ *q++ = 12;
+ break;
+ case 'n':
+ *q++ = 10;
+ break;
+ case 'r':
+ *q++ = 13;
+ break;
+ case 't':
+ *q++ = 9;
+ break;
+ case 'u':
+ state = st_ucs;
+ ndig = 4;
+ nval = 0;
+ break;
+ case 'U':
+ state = st_ucs;
+ ndig = 8;
+ nval = 0;
+ break;
+ case 'v':
+ *q++ = 11;
+ case 'x':
+ case 'X':
+ state = st_hex;
+ ndig = nval = 0;
+ break;
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ state = st_oct;
+ ndig = 1;
+ nval = c - '0';
+ break;
+ default:
+ *q++ = c;
+ break;
+ }
+ break;
+
+ case st_oct:
+ if (c >= '0' && c <= '7') {
+ nval = (nval << 3) + (c - '0');
+ if (++ndig >= 3) {
+ *q++ = nval;
+ state = st_start;
+ }
+ } else {
+ p--; /* Process this character again */
+ *q++ = nval;
+ state = st_start;
+ }
+ break;
+
+ case st_hex:
+ if ((c >= '0' && c <= '9') ||
+ (c >= 'A' && c <= 'F') ||
+ (c >= 'a' && c <= 'f')) {
+ nval = (nval << 4) + numvalue(c);
+ if (++ndig >= 2) {
+ *q++ = nval;
+ state = st_start;
+ }
+ } else {
+ p--; /* Process this character again */
+ *q++ = ndig ? nval : *escp;
+ state = st_start;
+ }
+ break;
+
+ case st_ucs:
+ if ((c >= '0' && c <= '9') ||
+ (c >= 'A' && c <= 'F') ||
+ (c >= 'a' && c <= 'f')) {
+ nval = (nval << 4) + numvalue(c);
+ if (!--ndig) {
+ q = emit_utf8(q, nval);
+ state = st_start;
+ }
+ } else {
+ p--; /* Process this character again */
+ if (p > escp+1)
+ q = emit_utf8(q, nval);
+ else
+ *q++ = *escp;
+ state = st_start;
+ }
+ break;
+ }
+ }
+ *q = '\0';
+ return q-str;
+ }
+
+ /* Otherwise, just return the input... */
+ return ln;
+}
+
char *nasm_strcat(char *one, char *two)
{
char *rslt;