summaryrefslogtreecommitdiff
path: root/boost/beast/http/detail/basic_parser.ipp
diff options
context:
space:
mode:
Diffstat (limited to 'boost/beast/http/detail/basic_parser.ipp')
-rw-r--r--boost/beast/http/detail/basic_parser.ipp848
1 files changed, 848 insertions, 0 deletions
diff --git a/boost/beast/http/detail/basic_parser.ipp b/boost/beast/http/detail/basic_parser.ipp
new file mode 100644
index 0000000000..934d0dc505
--- /dev/null
+++ b/boost/beast/http/detail/basic_parser.ipp
@@ -0,0 +1,848 @@
+//
+// Copyright (c) 2016-2019 Vinnie Falco (vinnie dot falco at gmail dot com)
+//
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+//
+// Official repository: https://github.com/boostorg/beast
+//
+
+#ifndef BOOST_BEAST_HTTP_DETAIL_BASIC_PARSER_IPP
+#define BOOST_BEAST_HTTP_DETAIL_BASIC_PARSER_IPP
+
+#include <boost/beast/http/detail/basic_parser.hpp>
+#include <limits>
+
+namespace boost {
+namespace beast {
+namespace http {
+namespace detail {
+
+char const*
+basic_parser_base::
+trim_front(char const* it, char const* end)
+{
+ while(it != end)
+ {
+ if(*it != ' ' && *it != '\t')
+ break;
+ ++it;
+ }
+ return it;
+}
+
+char const*
+basic_parser_base::
+trim_back(
+ char const* it, char const* first)
+{
+ while(it != first)
+ {
+ auto const c = it[-1];
+ if(c != ' ' && c != '\t')
+ break;
+ --it;
+ }
+ return it;
+}
+
+bool
+basic_parser_base::
+is_pathchar(char c)
+{
+ // VFALCO This looks the same as the one below...
+
+ // TEXT = <any OCTET except CTLs, and excluding LWS>
+ static bool constexpr tab[256] = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 16
+ 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 32
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 48
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 64
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 80
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 96
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, // 112
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 128
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 144
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 160
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 176
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 192
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 208
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 224
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 // 240
+ };
+ return tab[static_cast<unsigned char>(c)];
+}
+
+bool
+basic_parser_base::
+unhex(unsigned char& d, char c)
+{
+ static signed char constexpr tab[256] = {
+ -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, // 0
+ -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, // 16
+ -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, // 32
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1, // 48
+ -1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1, // 64
+ -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, // 80
+ -1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1, // 96
+ -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, // 112
+
+ -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, // 128
+ -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, // 144
+ -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, // 160
+ -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, // 176
+ -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, // 192
+ -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, // 208
+ -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, // 224
+ -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 // 240
+ };
+ d = static_cast<unsigned char>(
+ tab[static_cast<unsigned char>(c)]);
+ return d != static_cast<unsigned char>(-1);
+}
+
+//--------------------------------------------------------------------------
+
+std::pair<char const*, bool>
+basic_parser_base::
+find_fast(
+ char const* buf,
+ char const* buf_end,
+ char const* ranges,
+ size_t ranges_size)
+{
+ bool found = false;
+ boost::ignore_unused(buf_end, ranges, ranges_size);
+ return {buf, found};
+}
+
+// VFALCO Can SIMD help this?
+char const*
+basic_parser_base::
+find_eol(
+ char const* it, char const* last,
+ error_code& ec)
+{
+ for(;;)
+ {
+ if(it == last)
+ {
+ ec = {};
+ return nullptr;
+ }
+ if(*it == '\r')
+ {
+ if(++it == last)
+ {
+ ec = {};
+ return nullptr;
+ }
+ if(*it != '\n')
+ {
+ ec = error::bad_line_ending;
+ return nullptr;
+ }
+ ec = {};
+ return ++it;
+ }
+ // VFALCO Should we handle the legacy case
+ // for lines terminated with a single '\n'?
+ ++it;
+ }
+}
+
+bool
+basic_parser_base::
+parse_dec(char const* it, char const* last, std::uint64_t& v)
+{
+ if(it == last)
+ return false;
+ std::uint64_t tmp = 0;
+ do
+ {
+ if((! is_digit(*it)) ||
+ tmp > (std::numeric_limits<std::uint64_t>::max)() / 10)
+ return false;
+ tmp *= 10;
+ std::uint64_t const d = *it - '0';
+ if((std::numeric_limits<std::uint64_t>::max)() - tmp < d)
+ return false;
+ tmp += d;
+ }
+ while(++it != last);
+ v = tmp;
+ return true;
+}
+
+bool
+basic_parser_base::
+parse_hex(char const*& it, std::uint64_t& v)
+{
+ unsigned char d;
+ if(! unhex(d, *it))
+ return false;
+ std::uint64_t tmp = 0;
+ do
+ {
+ if(tmp > (std::numeric_limits<std::uint64_t>::max)() / 16)
+ return false;
+ tmp *= 16;
+ if((std::numeric_limits<std::uint64_t>::max)() - tmp < d)
+ return false;
+ tmp += d;
+ }
+ while(unhex(d, *++it));
+ v = tmp;
+ return true;
+}
+
+char const*
+basic_parser_base::
+find_eom(char const* p, char const* last)
+{
+ for(;;)
+ {
+ if(p + 4 > last)
+ return nullptr;
+ if(p[3] != '\n')
+ {
+ if(p[3] == '\r')
+ ++p;
+ else
+ p += 4;
+ }
+ else if(p[2] != '\r')
+ {
+ p += 4;
+ }
+ else if(p[1] != '\n')
+ {
+ p += 2;
+ }
+ else if(p[0] != '\r')
+ {
+ p += 2;
+ }
+ else
+ {
+ return p + 4;
+ }
+ }
+}
+
+//--------------------------------------------------------------------------
+
+char const*
+basic_parser_base::
+parse_token_to_eol(
+ char const* p,
+ char const* last,
+ char const*& token_last,
+ error_code& ec)
+{
+ for(;; ++p)
+ {
+ if(p >= last)
+ {
+ ec = error::need_more;
+ return p;
+ }
+ if(BOOST_UNLIKELY(! is_print(*p)))
+ if((BOOST_LIKELY(static_cast<
+ unsigned char>(*p) < '\040') &&
+ BOOST_LIKELY(*p != 9)) ||
+ BOOST_UNLIKELY(*p == 127))
+ goto found_control;
+ }
+found_control:
+ if(BOOST_LIKELY(*p == '\r'))
+ {
+ if(++p >= last)
+ {
+ ec = error::need_more;
+ return last;
+ }
+ if(*p++ != '\n')
+ {
+ ec = error::bad_line_ending;
+ return last;
+ }
+ token_last = p - 2;
+ }
+#if 0
+ // VFALCO This allows `\n` by itself
+ // to terminate a line
+ else if(*p == '\n')
+ {
+ token_last = p;
+ ++p;
+ }
+#endif
+ else
+ {
+ // invalid character
+ return nullptr;
+ }
+ return p;
+}
+
+bool
+basic_parser_base::
+parse_crlf(char const*& it)
+{
+ if( it[0] != '\r' || it[1] != '\n')
+ return false;
+ it += 2;
+ return true;
+}
+
+void
+basic_parser_base::
+parse_method(
+ char const*& it, char const* last,
+ string_view& result, error_code& ec)
+{
+ // parse token SP
+ auto const first = it;
+ for(;; ++it)
+ {
+ if(it + 1 > last)
+ {
+ ec = error::need_more;
+ return;
+ }
+ if(! detail::is_token_char(*it))
+ break;
+ }
+ if(it + 1 > last)
+ {
+ ec = error::need_more;
+ return;
+ }
+ if(*it != ' ')
+ {
+ ec = error::bad_method;
+ return;
+ }
+ if(it == first)
+ {
+ // cannot be empty
+ ec = error::bad_method;
+ return;
+ }
+ result = make_string(first, it++);
+}
+
+void
+basic_parser_base::
+parse_target(
+ char const*& it, char const* last,
+ string_view& result, error_code& ec)
+{
+ // parse target SP
+ auto const first = it;
+ for(;; ++it)
+ {
+ if(it + 1 > last)
+ {
+ ec = error::need_more;
+ return;
+ }
+ if(! is_pathchar(*it))
+ break;
+ }
+ if(it + 1 > last)
+ {
+ ec = error::need_more;
+ return;
+ }
+ if(*it != ' ')
+ {
+ ec = error::bad_target;
+ return;
+ }
+ if(it == first)
+ {
+ // cannot be empty
+ ec = error::bad_target;
+ return;
+ }
+ result = make_string(first, it++);
+}
+
+void
+basic_parser_base::
+parse_version(
+ char const*& it, char const* last,
+ int& result, error_code& ec)
+{
+ if(it + 8 > last)
+ {
+ ec = error::need_more;
+ return;
+ }
+ if(*it++ != 'H')
+ {
+ ec = error::bad_version;
+ return;
+ }
+ if(*it++ != 'T')
+ {
+ ec = error::bad_version;
+ return;
+ }
+ if(*it++ != 'T')
+ {
+ ec = error::bad_version;
+ return;
+ }
+ if(*it++ != 'P')
+ {
+ ec = error::bad_version;
+ return;
+ }
+ if(*it++ != '/')
+ {
+ ec = error::bad_version;
+ return;
+ }
+ if(! is_digit(*it))
+ {
+ ec = error::bad_version;
+ return;
+ }
+ result = 10 * (*it++ - '0');
+ if(*it++ != '.')
+ {
+ ec = error::bad_version;
+ return;
+ }
+ if(! is_digit(*it))
+ {
+ ec = error::bad_version;
+ return;
+ }
+ result += *it++ - '0';
+}
+
+void
+basic_parser_base::
+parse_status(
+ char const*& it, char const* last,
+ unsigned short& result, error_code& ec)
+{
+ // parse 3(digit) SP
+ if(it + 4 > last)
+ {
+ ec = error::need_more;
+ return;
+ }
+ if(! is_digit(*it))
+ {
+ ec = error::bad_status;
+ return;
+ }
+ result = 100 * (*it++ - '0');
+ if(! is_digit(*it))
+ {
+ ec = error::bad_status;
+ return;
+ }
+ result += 10 * (*it++ - '0');
+ if(! is_digit(*it))
+ {
+ ec = error::bad_status;
+ return;
+ }
+ result += *it++ - '0';
+ if(*it++ != ' ')
+ {
+ ec = error::bad_status;
+ return;
+ }
+}
+
+void
+basic_parser_base::
+parse_reason(
+ char const*& it, char const* last,
+ string_view& result, error_code& ec)
+{
+ auto const first = it;
+ char const* token_last = nullptr;
+ auto p = parse_token_to_eol(
+ it, last, token_last, ec);
+ if(ec)
+ return;
+ if(! p)
+ {
+ ec = error::bad_reason;
+ return;
+ }
+ result = make_string(first, token_last);
+ it = p;
+}
+
+void
+basic_parser_base::
+parse_field(
+ char const*& p,
+ char const* last,
+ string_view& name,
+ string_view& value,
+ static_string<max_obs_fold>& buf,
+ error_code& ec)
+{
+/* header-field = field-name ":" OWS field-value OWS
+
+ field-name = token
+ field-value = *( field-content / obs-fold )
+ field-content = field-vchar [ 1*( SP / HTAB ) field-vchar ]
+ field-vchar = VCHAR / obs-text
+
+ obs-fold = CRLF 1*( SP / HTAB )
+ ; obsolete line folding
+ ; see Section 3.2.4
+
+ token = 1*<any CHAR except CTLs or separators>
+ CHAR = <any US-ASCII character (octets 0 - 127)>
+ sep = "(" | ")" | "<" | ">" | "@"
+ | "," | ";" | ":" | "\" | <">
+ | "/" | "[" | "]" | "?" | "="
+ | "{" | "}" | SP | HT
+*/
+ static char const* is_token =
+ "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
+ "\0\1\0\1\1\1\1\1\0\0\1\1\0\1\1\0\1\1\1\1\1\1\1\1\1\1\0\0\0\0\0\0"
+ "\0\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\0\0\0\1\1"
+ "\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\0\1\0\1\0"
+ "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
+ "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
+ "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
+ "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0";
+
+ // name
+ BOOST_ALIGNMENT(16) static const char ranges1[] =
+ "\x00 " /* control chars and up to SP */
+ "\"\"" /* 0x22 */
+ "()" /* 0x28,0x29 */
+ ",," /* 0x2c */
+ "//" /* 0x2f */
+ ":@" /* 0x3a-0x40 */
+ "[]" /* 0x5b-0x5d */
+ "{\377"; /* 0x7b-0xff */
+ auto first = p;
+ bool found;
+ std::tie(p, found) = find_fast(
+ p, last, ranges1, sizeof(ranges1)-1);
+ if(! found && p >= last)
+ {
+ ec = error::need_more;
+ return;
+ }
+ for(;;)
+ {
+ if(*p == ':')
+ break;
+ if(! is_token[static_cast<
+ unsigned char>(*p)])
+ {
+ ec = error::bad_field;
+ return;
+ }
+ ++p;
+ if(p >= last)
+ {
+ ec = error::need_more;
+ return;
+ }
+ }
+ if(p == first)
+ {
+ // empty name
+ ec = error::bad_field;
+ return;
+ }
+ name = make_string(first, p);
+ ++p; // eat ':'
+ char const* token_last = nullptr;
+ for(;;)
+ {
+ // eat leading ' ' and '\t'
+ for(;;++p)
+ {
+ if(p + 1 > last)
+ {
+ ec = error::need_more;
+ return;
+ }
+ if(! (*p == ' ' || *p == '\t'))
+ break;
+ }
+ // parse to CRLF
+ first = p;
+ p = parse_token_to_eol(p, last, token_last, ec);
+ if(ec)
+ return;
+ if(! p)
+ {
+ ec = error::bad_value;
+ return;
+ }
+ // Look 1 char past the CRLF to handle obs-fold.
+ if(p + 1 > last)
+ {
+ ec = error::need_more;
+ return;
+ }
+ token_last =
+ trim_back(token_last, first);
+ if(*p != ' ' && *p != '\t')
+ {
+ value = make_string(first, token_last);
+ return;
+ }
+ ++p;
+ if(token_last != first)
+ break;
+ }
+ buf.resize(0);
+ buf.append(first, token_last);
+ BOOST_ASSERT(! buf.empty());
+#ifndef BOOST_NO_EXCEPTIONS
+ try
+#endif
+ {
+ for(;;)
+ {
+ // eat leading ' ' and '\t'
+ for(;;++p)
+ {
+ if(p + 1 > last)
+ {
+ ec = error::need_more;
+ return;
+ }
+ if(! (*p == ' ' || *p == '\t'))
+ break;
+ }
+ // parse to CRLF
+ first = p;
+ p = parse_token_to_eol(p, last, token_last, ec);
+ if(ec)
+ return;
+ if(! p)
+ {
+ ec = error::bad_value;
+ return;
+ }
+ // Look 1 char past the CRLF to handle obs-fold.
+ if(p + 1 > last)
+ {
+ ec = error::need_more;
+ return;
+ }
+ token_last = trim_back(token_last, first);
+ if(first != token_last)
+ {
+ buf.push_back(' ');
+ buf.append(first, token_last);
+ }
+ if(*p != ' ' && *p != '\t')
+ {
+ value = {buf.data(), buf.size()};
+ return;
+ }
+ ++p;
+ }
+ }
+#ifndef BOOST_NO_EXCEPTIONS
+ catch(std::length_error const&)
+ {
+ ec = error::header_limit;
+ return;
+ }
+#endif
+}
+
+
+void
+basic_parser_base::
+parse_chunk_extensions(
+ char const*& it,
+ char const* last,
+ error_code& ec)
+{
+/*
+ chunk-ext = *( BWS ";" BWS chunk-ext-name [ BWS "=" BWS chunk-ext-val ] )
+ BWS = *( SP / HTAB ) ; "Bad White Space"
+ chunk-ext-name = token
+ chunk-ext-val = token / quoted-string
+ token = 1*tchar
+ quoted-string = DQUOTE *( qdtext / quoted-pair ) DQUOTE
+ qdtext = HTAB / SP / "!" / %x23-5B ; '#'-'[' / %x5D-7E ; ']'-'~' / obs-text
+ quoted-pair = "\" ( HTAB / SP / VCHAR / obs-text )
+ obs-text = %x80-FF
+
+ https://www.rfc-editor.org/errata_search.php?rfc=7230&eid=4667
+*/
+loop:
+ if(it == last)
+ {
+ ec = error::need_more;
+ return;
+ }
+ if(*it != ' ' && *it != '\t' && *it != ';')
+ return;
+ // BWS
+ if(*it == ' ' || *it == '\t')
+ {
+ for(;;)
+ {
+ ++it;
+ if(it == last)
+ {
+ ec = error::need_more;
+ return;
+ }
+ if(*it != ' ' && *it != '\t')
+ break;
+ }
+ }
+ // ';'
+ if(*it != ';')
+ {
+ ec = error::bad_chunk_extension;
+ return;
+ }
+semi:
+ ++it; // skip ';'
+ // BWS
+ for(;;)
+ {
+ if(it == last)
+ {
+ ec = error::need_more;
+ return;
+ }
+ if(*it != ' ' && *it != '\t')
+ break;
+ ++it;
+ }
+ // chunk-ext-name
+ if(! detail::is_token_char(*it))
+ {
+ ec = error::bad_chunk_extension;
+ return;
+ }
+ for(;;)
+ {
+ ++it;
+ if(it == last)
+ {
+ ec = error::need_more;
+ return;
+ }
+ if(! detail::is_token_char(*it))
+ break;
+ }
+ // BWS [ ";" / "=" ]
+ {
+ bool bws;
+ if(*it == ' ' || *it == '\t')
+ {
+ for(;;)
+ {
+ ++it;
+ if(it == last)
+ {
+ ec = error::need_more;
+ return;
+ }
+ if(*it != ' ' && *it != '\t')
+ break;
+ }
+ bws = true;
+ }
+ else
+ {
+ bws = false;
+ }
+ if(*it == ';')
+ goto semi;
+ if(*it != '=')
+ {
+ if(bws)
+ ec = error::bad_chunk_extension;
+ return;
+ }
+ ++it; // skip '='
+ }
+ // BWS
+ for(;;)
+ {
+ if(it == last)
+ {
+ ec = error::need_more;
+ return;
+ }
+ if(*it != ' ' && *it != '\t')
+ break;
+ ++it;
+ }
+ // chunk-ext-val
+ if(*it != '"')
+ {
+ // token
+ if(! detail::is_token_char(*it))
+ {
+ ec = error::bad_chunk_extension;
+ return;
+ }
+ for(;;)
+ {
+ ++it;
+ if(it == last)
+ {
+ ec = error::need_more;
+ return;
+ }
+ if(! detail::is_token_char(*it))
+ break;
+ }
+ }
+ else
+ {
+ // quoted-string
+ for(;;)
+ {
+ ++it;
+ if(it == last)
+ {
+ ec = error::need_more;
+ return;
+ }
+ if(*it == '"')
+ break;
+ if(*it == '\\')
+ {
+ ++it;
+ if(it == last)
+ {
+ ec = error::need_more;
+ return;
+ }
+ }
+ }
+ ++it;
+ }
+ goto loop;
+}
+
+} // detail
+} // http
+} // beast
+} // boost
+
+#endif