1 files changed, 848 insertions, 0 deletions
diff --git a/boost/beast/http/detail/basic_parser.ipp b/boost/beast/http/detail/basic_parser.ipp
new file mode 100644
index 0000000000..934d0dc505
--- /dev/null
+++ b/boost/beast/http/detail/basic_parser.ipp
@@ -0,0 +1,848 @@
+//
+// Copyright (c) 2016-2019 Vinnie Falco (vinnie dot falco at gmail dot com)
+//
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+//
+// Official repository: https://github.com/boostorg/beast
+//
+
+#ifndef BOOST_BEAST_HTTP_DETAIL_BASIC_PARSER_IPP
+#define BOOST_BEAST_HTTP_DETAIL_BASIC_PARSER_IPP
+
+#include <boost/beast/http/detail/basic_parser.hpp>
+#include <limits>
+
+namespace boost {
+namespace beast {
+namespace http {
+namespace detail {
+
+char const*
+basic_parser_base::
+trim_front(char const* it, char const* end)
+{
+    while(it != end)
+    {
+        if(*it != ' ' && *it != '\t')
+            break;
+        ++it;
+    }
+    return it;
+}
+
+char const*
+basic_parser_base::
+trim_back(
+    char const* it, char const* first)
+{
+    while(it != first)
+    {
+        auto const c = it[-1];
+        if(c != ' ' && c != '\t')
+            break;
+        --it;
+    }
+    return it;
+}
+
+bool
+basic_parser_base::
+is_pathchar(char c)
+{
+    // VFALCO This looks the same as the one below...
+
+    // TEXT = <any OCTET except CTLs, and excluding LWS>
+    static bool constexpr tab[256] = {
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //   0
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //  16
+        0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, //  32
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, //  48
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, //  64
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, //  80
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, //  96
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, // 112
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 128
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 144
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 160
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 176
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 192
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 208
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 224
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1  // 240
+    };
+    return tab[static_cast<unsigned char>(c)];
+}
+
+bool
+basic_parser_base::
+unhex(unsigned char& d, char c)
+{
+    static signed char constexpr tab[256] = {
+        -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, //   0
+        -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, //  16
+        -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, //  32
+            0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1, //  48
+        -1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1, //  64
+        -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, //  80
+        -1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1, //  96
+        -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, // 112
+
+        -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, // 128
+        -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, // 144
+        -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, // 160
+        -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, // 176
+        -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, // 192
+        -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, // 208
+        -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, // 224
+        -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1  // 240
+    };
+    d = static_cast<unsigned char>(
+        tab[static_cast<unsigned char>(c)]);
+    return d != static_cast<unsigned char>(-1);
+}
+
+//--------------------------------------------------------------------------
+
+std::pair<char const*, bool>
+basic_parser_base::
+find_fast(
+    char const* buf,
+    char const* buf_end,
+    char const* ranges,
+    size_t ranges_size)
+{
+    bool found = false;
+    boost::ignore_unused(buf_end, ranges, ranges_size);
+    return {buf, found};
+}
+
+// VFALCO Can SIMD help this?
+char const*
+basic_parser_base::
+find_eol(
+    char const* it, char const* last,
+        error_code& ec)
+{
+    for(;;)
+    {
+        if(it == last)
+        {
+            ec = {};
+            return nullptr;
+        }
+        if(*it == '\r')
+        {
+            if(++it == last)
+            {
+                ec = {};
+                return nullptr;
+            }
+            if(*it != '\n')
+            {
+                ec = error::bad_line_ending;
+                return nullptr;
+            }
+            ec = {};
+            return ++it;
+        }
+        // VFALCO Should we handle the legacy case
+        // for lines terminated with a single '\n'?
+        ++it;
+    }
+}
+
+bool
+basic_parser_base::
+parse_dec(char const* it, char const* last, std::uint64_t& v)
+{
+    if(it == last)
+        return false;
+    std::uint64_t tmp = 0;
+    do
+    {
+        if((! is_digit(*it)) ||
+            tmp > (std::numeric_limits<std::uint64_t>::max)() / 10)
+            return false;
+        tmp *= 10;
+        std::uint64_t const d = *it - '0';
+        if((std::numeric_limits<std::uint64_t>::max)() - tmp < d)
+            return false;
+        tmp += d;
+    }
+    while(++it != last);
+    v = tmp;
+    return true;
+}
+
+bool
+basic_parser_base::
+parse_hex(char const*& it, std::uint64_t& v)
+{
+    unsigned char d;
+    if(! unhex(d, *it))
+        return false;
+    std::uint64_t tmp = 0;
+    do
+    {
+        if(tmp > (std::numeric_limits<std::uint64_t>::max)() / 16)
+            return false;
+        tmp *= 16;
+        if((std::numeric_limits<std::uint64_t>::max)() - tmp < d)
+            return false;
+        tmp += d;
+    }
+    while(unhex(d, *++it));
+    v = tmp;
+    return true;
+}
+
+char const*
+basic_parser_base::
+find_eom(char const* p, char const* last)
+{
+    for(;;)
+    {
+        if(p + 4 > last)
+            return nullptr;
+        if(p[3] != '\n')
+        {
+            if(p[3] == '\r')
+                ++p;
+            else
+                p += 4;
+        }
+        else if(p[2] != '\r')
+        {
+            p += 4;
+        }
+        else if(p[1] != '\n')
+        {
+            p += 2;
+        }
+        else if(p[0] != '\r')
+        {
+            p += 2;
+        }
+        else
+        {
+            return p + 4;
+        }
+    }
+}
+
+//--------------------------------------------------------------------------
+
+char const*
+basic_parser_base::
+parse_token_to_eol(
+    char const* p,
+    char const* last,
+    char const*& token_last,
+    error_code& ec)
+{
+    for(;; ++p)
+    {
+        if(p >= last)
+        {
+            ec = error::need_more;
+            return p;
+        }
+        if(BOOST_UNLIKELY(! is_print(*p)))
+            if((BOOST_LIKELY(static_cast<
+                    unsigned char>(*p) < '\040') &&
+                BOOST_LIKELY(*p != 9)) ||
+                BOOST_UNLIKELY(*p == 127))
+                goto found_control;
+    }
+found_control:
+    if(BOOST_LIKELY(*p == '\r'))
+    {
+        if(++p >= last)
+        {
+            ec = error::need_more;
+            return last;
+        }
+        if(*p++ != '\n')
+        {
+            ec = error::bad_line_ending;
+            return last;
+        }
+        token_last = p - 2;
+    }
+#if 0
+    // VFALCO This allows `\n` by itself
+    //        to terminate a line
+    else if(*p == '\n')
+    {
+        token_last = p;
+        ++p;
+    }
+#endif
+    else
+    {
+        // invalid character
+        return nullptr;
+    }
+    return p;
+}
+
+bool
+basic_parser_base::
+parse_crlf(char const*& it)
+{
+    if( it[0] != '\r' || it[1] != '\n')
+        return false;
+    it += 2;
+    return true;
+}
+
+void
+basic_parser_base::
+parse_method(
+    char const*& it, char const* last,
+    string_view& result, error_code& ec)
+{
+    // parse token SP
+    auto const first = it;
+    for(;; ++it)
+    {
+        if(it + 1 > last)
+        {
+            ec = error::need_more;
+            return;
+        }
+        if(! detail::is_token_char(*it))
+            break;
+    }
+    if(it + 1 > last)
+    {
+        ec = error::need_more;
+        return;
+    }
+    if(*it != ' ')
+    {
+        ec = error::bad_method;
+        return;
+    }
+    if(it == first)
+    {
+        // cannot be empty
+        ec = error::bad_method;
+        return;
+    }
+    result = make_string(first, it++);
+}
+
+void
+basic_parser_base::
+parse_target(
+    char const*& it, char const* last,
+    string_view& result, error_code& ec)
+{
+    // parse target SP
+    auto const first = it;
+    for(;; ++it)
+    {
+        if(it + 1 > last)
+        {
+            ec = error::need_more;
+            return;
+        }
+        if(! is_pathchar(*it))
+            break;
+    }
+    if(it + 1 > last)
+    {
+        ec = error::need_more;
+        return;
+    }
+    if(*it != ' ')
+    {
+        ec = error::bad_target;
+        return;
+    }
+    if(it == first)
+    {
+        // cannot be empty
+        ec = error::bad_target;
+        return;
+    }
+    result = make_string(first, it++);
+}
+
+void
+basic_parser_base::
+parse_version(
+    char const*& it, char const* last,
+    int& result, error_code& ec)
+{
+    if(it + 8 > last)
+    {
+        ec = error::need_more;
+        return;
+    }
+    if(*it++ != 'H')
+    {
+        ec = error::bad_version;
+        return;
+    }
+    if(*it++ != 'T')
+    {
+        ec = error::bad_version;
+        return;
+    }
+    if(*it++ != 'T')
+    {
+        ec = error::bad_version;
+        return;
+    }
+    if(*it++ != 'P')
+    {
+        ec = error::bad_version;
+        return;
+    }
+    if(*it++ != '/')
+    {
+        ec = error::bad_version;
+        return;
+    }
+    if(! is_digit(*it))
+    {
+        ec = error::bad_version;
+        return;
+    }
+    result = 10 * (*it++ - '0');
+    if(*it++ != '.')
+    {
+        ec = error::bad_version;
+        return;
+    }
+    if(! is_digit(*it))
+    {
+        ec = error::bad_version;
+        return;
+    }
+    result += *it++ - '0';
+}
+
+void
+basic_parser_base::
+parse_status(
+    char const*& it, char const* last,
+    unsigned short& result, error_code& ec)
+{
+    // parse 3(digit) SP
+    if(it + 4 > last)
+    {
+        ec = error::need_more;
+        return;
+    }
+    if(! is_digit(*it))
+    {
+        ec = error::bad_status;
+        return;
+    }
+    result = 100 * (*it++ - '0');
+    if(! is_digit(*it))
+    {
+        ec = error::bad_status;
+        return;
+    }
+    result += 10 * (*it++ - '0');
+    if(! is_digit(*it))
+    {
+        ec = error::bad_status;
+        return;
+    }
+    result += *it++ - '0';
+    if(*it++ != ' ')
+    {
+        ec = error::bad_status;
+        return;
+    }
+}
+
+void
+basic_parser_base::
+parse_reason(
+    char const*& it, char const* last,
+    string_view& result, error_code& ec)
+{
+    auto const first = it;
+    char const* token_last = nullptr;
+    auto p = parse_token_to_eol(
+        it, last, token_last, ec);
+    if(ec)
+        return;
+    if(! p)
+    {
+        ec = error::bad_reason;
+        return;
+    }
+    result = make_string(first, token_last);
+    it = p;
+}
+
+void
+basic_parser_base::
+parse_field(
+    char const*& p,
+    char const* last,
+    string_view& name,
+    string_view& value,
+    static_string<max_obs_fold>& buf,
+    error_code& ec)
+{
+/*  header-field    = field-name ":" OWS field-value OWS
+
+    field-name      = token
+    field-value     = *( field-content / obs-fold )
+    field-content   = field-vchar [ 1*( SP / HTAB ) field-vchar ]
+    field-vchar     = VCHAR / obs-text
+
+    obs-fold        = CRLF 1*( SP / HTAB )
+                    ; obsolete line folding
+                    ; see Section 3.2.4
+
+    token           = 1*<any CHAR except CTLs or separators>
+    CHAR            = <any US-ASCII character (octets 0 - 127)>
+    sep             = "(" | ")" | "<" | ">" | "@"
+                    | "," | ";" | ":" | "\" | <">
+                    | "/" | "[" | "]" | "?" | "="
+                    | "{" | "}" | SP | HT
+*/
+    static char const* is_token =
+        "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
+        "\0\1\0\1\1\1\1\1\0\0\1\1\0\1\1\0\1\1\1\1\1\1\1\1\1\1\0\0\0\0\0\0"
+        "\0\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\0\0\0\1\1"
+        "\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\0\1\0\1\0"
+        "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
+        "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
+        "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
+        "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0";
+
+    // name
+    BOOST_ALIGNMENT(16) static const char ranges1[] =
+        "\x00 "  /* control chars and up to SP */
+        "\"\""   /* 0x22 */
+        "()"     /* 0x28,0x29 */
+        ",,"     /* 0x2c */
+        "//"     /* 0x2f */
+        ":@"     /* 0x3a-0x40 */
+        "[]"     /* 0x5b-0x5d */
+        "{\377"; /* 0x7b-0xff */
+    auto first = p;
+    bool found;
+    std::tie(p, found) = find_fast(
+        p, last, ranges1, sizeof(ranges1)-1);
+    if(! found && p >= last)
+    {
+        ec = error::need_more;
+        return;
+    }
+    for(;;)
+    {
+        if(*p == ':')
+            break;
+        if(! is_token[static_cast<
+            unsigned char>(*p)])
+        {
+            ec = error::bad_field;
+            return;
+        }
+        ++p;
+        if(p >= last)
+        {
+            ec = error::need_more;
+            return;
+        }
+    }
+    if(p == first)
+    {
+        // empty name
+        ec = error::bad_field;
+        return;
+    }
+    name = make_string(first, p);
+    ++p; // eat ':'
+    char const* token_last = nullptr;
+    for(;;)
+    {
+        // eat leading ' ' and '\t'
+        for(;;++p)
+        {
+            if(p + 1 > last)
+            {
+                ec = error::need_more;
+                return;
+            }
+            if(! (*p == ' ' || *p == '\t'))
+                break;
+        }
+        // parse to CRLF
+        first = p;
+        p = parse_token_to_eol(p, last, token_last, ec);
+        if(ec)
+            return;
+        if(! p)
+        {
+            ec = error::bad_value;
+            return;
+        }
+        // Look 1 char past the CRLF to handle obs-fold.
+        if(p + 1 > last)
+        {
+            ec = error::need_more;
+            return;
+        }
+        token_last =
+            trim_back(token_last, first);
+        if(*p != ' ' && *p != '\t')
+        {
+            value = make_string(first, token_last);
+            return;
+        }
+        ++p;
+        if(token_last != first)
+            break;
+    }
+    buf.resize(0);
+    buf.append(first, token_last);
+    BOOST_ASSERT(! buf.empty());
+#ifndef BOOST_NO_EXCEPTIONS
+    try
+#endif
+    {
+        for(;;)
+        {
+            // eat leading ' ' and '\t'
+            for(;;++p)
+            {
+                if(p + 1 > last)
+                {
+                    ec = error::need_more;
+                    return;
+                }
+                if(! (*p == ' ' || *p == '\t'))
+                    break;
+            }
+            // parse to CRLF
+            first = p;
+            p = parse_token_to_eol(p, last, token_last, ec);
+            if(ec)
+                return;
+            if(! p)
+            {
+                ec = error::bad_value;
+                return;
+            }
+            // Look 1 char past the CRLF to handle obs-fold.
+            if(p + 1 > last)
+            {
+                ec = error::need_more;
+                return;
+            }
+            token_last = trim_back(token_last, first);
+            if(first != token_last)
+            {
+                buf.push_back(' ');
+                buf.append(first, token_last);
+            }
+            if(*p != ' ' && *p != '\t')
+            {
+                value = {buf.data(), buf.size()};
+                return;
+            }
+            ++p;
+        }
+    }
+#ifndef BOOST_NO_EXCEPTIONS
+    catch(std::length_error const&)
+    {
+        ec = error::header_limit;
+        return;
+    }
+#endif
+}
+
+
+void
+basic_parser_base::
+parse_chunk_extensions(
+    char const*& it,
+    char const* last,
+    error_code& ec)
+{
+/*
+    chunk-ext       = *( BWS  ";" BWS chunk-ext-name [ BWS  "=" BWS chunk-ext-val ] )
+    BWS             = *( SP / HTAB ) ; "Bad White Space"
+    chunk-ext-name  = token
+    chunk-ext-val   = token / quoted-string
+    token           = 1*tchar
+    quoted-string   = DQUOTE *( qdtext / quoted-pair ) DQUOTE
+    qdtext          = HTAB / SP / "!" / %x23-5B ; '#'-'[' / %x5D-7E ; ']'-'~' / obs-text
+    quoted-pair     = "\" ( HTAB / SP / VCHAR / obs-text )
+    obs-text        = %x80-FF
+
+    https://www.rfc-editor.org/errata_search.php?rfc=7230&eid=4667
+*/
+loop:
+    if(it == last)
+    {
+        ec = error::need_more;
+        return;
+    }
+    if(*it != ' ' && *it != '\t' && *it != ';')
+        return;
+    // BWS
+    if(*it == ' ' || *it == '\t')
+    {
+        for(;;)
+        {
+            ++it;
+            if(it == last)
+            {
+                ec = error::need_more;
+                return;
+            }
+            if(*it != ' ' && *it != '\t')
+                break;
+        }
+    }
+    // ';'
+    if(*it != ';')
+    {
+        ec = error::bad_chunk_extension;
+        return;
+    }
+semi:
+    ++it; // skip ';'
+    // BWS
+    for(;;)
+    {
+        if(it == last)
+        {
+            ec = error::need_more;
+            return;
+        }
+        if(*it != ' ' && *it != '\t')
+            break;
+        ++it;
+    }
+    // chunk-ext-name
+    if(! detail::is_token_char(*it))
+    {
+        ec = error::bad_chunk_extension;
+        return;
+    }
+    for(;;)
+    {
+        ++it;
+        if(it == last)
+        {
+            ec = error::need_more;
+            return;
+        }
+        if(! detail::is_token_char(*it))
+            break;
+    }
+    // BWS [ ";" / "=" ]
+    {
+        bool bws;
+        if(*it == ' ' || *it == '\t')
+        {
+            for(;;)
+            {
+                ++it;
+                if(it == last)
+                {
+                    ec = error::need_more;
+                    return;
+                }
+                if(*it != ' ' && *it != '\t')
+                    break;
+            }
+            bws = true;
+        }
+        else
+        {
+            bws = false;
+        }
+        if(*it == ';')
+            goto semi;
+        if(*it != '=')
+        {
+            if(bws)
+                ec = error::bad_chunk_extension;
+            return;
+        }
+        ++it; // skip '='
+    }
+    // BWS
+    for(;;)
+    {
+        if(it == last)
+        {
+            ec = error::need_more;
+            return;
+        }
+        if(*it != ' ' && *it != '\t')
+            break;
+        ++it;
+    }
+    // chunk-ext-val
+    if(*it != '"')
+    {
+        // token
+        if(! detail::is_token_char(*it))
+        {
+            ec = error::bad_chunk_extension;
+            return;
+        }
+        for(;;)
+        {
+            ++it;
+            if(it == last)
+            {
+                ec = error::need_more;
+                return;
+            }
+            if(! detail::is_token_char(*it))
+                break;
+        }
+    }
+    else
+    {
+        // quoted-string
+        for(;;)
+        {
+            ++it;
+            if(it == last)
+            {
+                ec = error::need_more;
+                return;
+            }
+            if(*it == '"')
+                break;
+            if(*it == '\\')
+            {
+                ++it;
+                if(it == last)
+                {
+                    ec = error::need_more;
+                    return;
+                }
+            }
+        }
+        ++it;
+    }
+    goto loop;
+}
+
+} // detail
+} // http
+} // beast
+} // boost
+
+#endif