summaryrefslogtreecommitdiff
path: root/boost/beast/http/detail/basic_parser.hpp
diff options
context:
space:
mode:
Diffstat (limited to 'boost/beast/http/detail/basic_parser.hpp')
-rw-r--r--boost/beast/http/detail/basic_parser.hpp813
1 files changed, 56 insertions, 757 deletions
diff --git a/boost/beast/http/detail/basic_parser.hpp b/boost/beast/http/detail/basic_parser.hpp
index 741de6edf0..f4e9083ffa 100644
--- a/boost/beast/http/detail/basic_parser.hpp
+++ b/boost/beast/http/detail/basic_parser.hpp
@@ -1,5 +1,5 @@
//
-// Copyright (c) 2016-2017 Vinnie Falco (vinnie dot falco at gmail dot com)
+// Copyright (c) 2016-2019 Vinnie Falco (vinnie dot falco at gmail dot com)
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
@@ -12,14 +12,11 @@
#include <boost/beast/core/static_string.hpp>
#include <boost/beast/core/string.hpp>
-#include <boost/beast/core/detail/cpu_info.hpp>
#include <boost/beast/http/error.hpp>
#include <boost/beast/http/detail/rfc7230.hpp>
#include <boost/config.hpp>
#include <boost/version.hpp>
-#include <algorithm>
#include <cstddef>
-#include <limits>
#include <utility>
namespace boost {
@@ -35,12 +32,6 @@ struct basic_parser_base
//
static std::size_t constexpr max_obs_fold = 4096;
- template<class T>
- struct is_unsigned_integer:
- std::integral_constant<bool,
- std::numeric_limits<T>::is_integer &&
- ! std::numeric_limits<T>::is_signed> {};
-
enum class state
{
nothing_yet = 0,
@@ -58,63 +49,6 @@ struct basic_parser_base
static
bool
- is_pathchar(char c)
- {
- // VFALCO This looks the same as the one below...
-
- // TEXT = <any OCTET except CTLs, and excluding LWS>
- static bool constexpr tab[256] = {
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 16
- 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 32
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 48
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 64
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 80
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 96
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, // 112
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 128
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 144
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 160
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 176
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 192
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 208
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 224
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 // 240
- };
- return tab[static_cast<unsigned char>(c)];
- }
-
- static
- inline
- bool
- unhex(unsigned char& d, char c)
- {
- static signed char constexpr tab[256] = {
- -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, // 0
- -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, // 16
- -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, // 32
- 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1, // 48
- -1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1, // 64
- -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, // 80
- -1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1, // 96
- -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 // 112
-
- -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 // 128
- -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 // 144
- -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 // 160
- -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 // 176
- -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 // 192
- -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 // 208
- -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 // 224
- -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 // 240
- };
- d = static_cast<unsigned char>(
- tab[static_cast<unsigned char>(c)]);
- return d != static_cast<unsigned char>(-1);
- }
-
- static
- bool
is_digit(char c)
{
return static_cast<unsigned char>(c-'0') < 10;
@@ -127,35 +61,16 @@ struct basic_parser_base
return static_cast<unsigned char>(c-32) < 95;
}
- template<class FwdIt>
+ BOOST_BEAST_DECL
static
- FwdIt
- trim_front(FwdIt it, FwdIt const& end)
- {
- while(it != end)
- {
- if(*it != ' ' && *it != '\t')
- break;
- ++it;
- }
- return it;
- }
+ char const*
+ trim_front(char const* it, char const* end);
- template<class RanIt>
+ BOOST_BEAST_DECL
static
- RanIt
+ char const*
trim_back(
- RanIt it, RanIt const& first)
- {
- while(it != first)
- {
- auto const c = it[-1];
- if(c != ' ' && c != '\t')
- break;
- --it;
- }
- return it;
- }
+ char const* it, char const* first);
static
string_view
@@ -167,736 +82,116 @@ struct basic_parser_base
//--------------------------------------------------------------------------
+ BOOST_BEAST_DECL
+ static
+ bool
+ is_pathchar(char c);
+
+ BOOST_BEAST_DECL
+ static
+ bool
+ unhex(unsigned char& d, char c);
+
+ BOOST_BEAST_DECL
static
std::pair<char const*, bool>
find_fast(
char const* buf,
char const* buf_end,
char const* ranges,
- size_t ranges_size)
- {
- bool found = false;
- boost::ignore_unused(buf_end, ranges, ranges_size);
- return {buf, found};
- }
+ size_t ranges_size);
- // VFALCO Can SIMD help this?
+ BOOST_BEAST_DECL
static
char const*
find_eol(
char const* it, char const* last,
- error_code& ec)
- {
- for(;;)
- {
- if(it == last)
- {
- ec.assign(0, ec.category());
- return nullptr;
- }
- if(*it == '\r')
- {
- if(++it == last)
- {
- ec.assign(0, ec.category());
- return nullptr;
- }
- if(*it != '\n')
- {
- ec = error::bad_line_ending;
- return nullptr;
- }
- ec.assign(0, ec.category());
- return ++it;
- }
- // VFALCO Should we handle the legacy case
- // for lines terminated with a single '\n'?
- ++it;
- }
- }
+ error_code& ec);
+ BOOST_BEAST_DECL
static
char const*
- find_eom(char const* p, char const* last)
- {
- for(;;)
- {
- if(p + 4 > last)
- return nullptr;
- if(p[3] != '\n')
- {
- if(p[3] == '\r')
- ++p;
- else
- p += 4;
- }
- else if(p[2] != '\r')
- {
- p += 4;
- }
- else if(p[1] != '\n')
- {
- p += 2;
- }
- else if(p[0] != '\r')
- {
- p += 2;
- }
- else
- {
- return p + 4;
- }
- }
- }
+ find_eom(char const* p, char const* last);
//--------------------------------------------------------------------------
+ BOOST_BEAST_DECL
static
char const*
parse_token_to_eol(
char const* p,
char const* last,
char const*& token_last,
- error_code& ec)
- {
- for(;; ++p)
- {
- if(p >= last)
- {
- ec = error::need_more;
- return p;
- }
- if(BOOST_UNLIKELY(! is_print(*p)))
- if((BOOST_LIKELY(static_cast<
- unsigned char>(*p) < '\040') &&
- BOOST_LIKELY(*p != '\011')) ||
- BOOST_UNLIKELY(*p == '\177'))
- goto found_control;
- }
- found_control:
- if(BOOST_LIKELY(*p == '\r'))
- {
- if(++p >= last)
- {
- ec = error::need_more;
- return last;
- }
- if(*p++ != '\n')
- {
- ec = error::bad_line_ending;
- return last;
- }
- token_last = p - 2;
- }
- #if 0
- // VFALCO This allows `\n` by itself
- // to terminate a line
- else if(*p == '\n')
- {
- token_last = p;
- ++p;
- }
- #endif
- else
- {
- // invalid character
- return nullptr;
- }
- return p;
- }
+ error_code& ec);
- template<class Iter, class T>
+ BOOST_BEAST_DECL
static
- typename std::enable_if<is_unsigned_integer<T>::value, bool>::type
- parse_dec(Iter it, Iter last, T& v)
- {
- if(it == last)
- return false;
- T tmp = 0;
- do
- {
- if((! is_digit(*it)) ||
- tmp > (std::numeric_limits<T>::max)() / 10)
- return false;
- tmp *= 10;
- T const d = *it - '0';
- if((std::numeric_limits<T>::max)() - tmp < d)
- return false;
- tmp += d;
- }
- while(++it != last);
- v = tmp;
- return true;
- }
+ bool
+ parse_dec(char const* it, char const* last, std::uint64_t& v);
- template<class Iter, class T>
+ BOOST_BEAST_DECL
static
- typename std::enable_if<is_unsigned_integer<T>::value, bool>::type
- parse_hex(Iter& it, T& v)
- {
- unsigned char d;
- if(! unhex(d, *it))
- return false;
- T tmp = 0;
- do
- {
- if(tmp > (std::numeric_limits<T>::max)() / 16)
- return false;
- tmp *= 16;
- if((std::numeric_limits<T>::max)() - tmp < d)
- return false;
- tmp += d;
- }
- while(unhex(d, *++it));
- v = tmp;
- return true;
- }
+ bool
+ parse_hex(char const*& it, std::uint64_t& v);
+ BOOST_BEAST_DECL
static
bool
- parse_crlf(char const*& it)
- {
- if( it[0] != '\r' || it[1] != '\n')
- return false;
- it += 2;
- return true;
- }
+ parse_crlf(char const*& it);
+ BOOST_BEAST_DECL
static
void
parse_method(
char const*& it, char const* last,
- string_view& result, error_code& ec)
- {
- // parse token SP
- auto const first = it;
- for(;; ++it)
- {
- if(it + 1 > last)
- {
- ec = error::need_more;
- return;
- }
- if(! detail::is_token_char(*it))
- break;
- }
- if(it + 1 > last)
- {
- ec = error::need_more;
- return;
- }
- if(*it != ' ')
- {
- ec = error::bad_method;
- return;
- }
- if(it == first)
- {
- // cannot be empty
- ec = error::bad_method;
- return;
- }
- result = make_string(first, it++);
- }
+ string_view& result, error_code& ec);
+ BOOST_BEAST_DECL
static
void
parse_target(
char const*& it, char const* last,
- string_view& result, error_code& ec)
- {
- // parse target SP
- auto const first = it;
- for(;; ++it)
- {
- if(it + 1 > last)
- {
- ec = error::need_more;
- return;
- }
- if(! is_pathchar(*it))
- break;
- }
- if(it + 1 > last)
- {
- ec = error::need_more;
- return;
- }
- if(*it != ' ')
- {
- ec = error::bad_target;
- return;
- }
- if(it == first)
- {
- // cannot be empty
- ec = error::bad_target;
- return;
- }
- result = make_string(first, it++);
- }
+ string_view& result, error_code& ec);
+ BOOST_BEAST_DECL
static
void
parse_version(
char const*& it, char const* last,
- int& result, error_code& ec)
- {
- if(it + 8 > last)
- {
- ec = error::need_more;
- return;
- }
- if(*it++ != 'H')
- {
- ec = error::bad_version;
- return;
- }
- if(*it++ != 'T')
- {
- ec = error::bad_version;
- return;
- }
- if(*it++ != 'T')
- {
- ec = error::bad_version;
- return;
- }
- if(*it++ != 'P')
- {
- ec = error::bad_version;
- return;
- }
- if(*it++ != '/')
- {
- ec = error::bad_version;
- return;
- }
- if(! is_digit(*it))
- {
- ec = error::bad_version;
- return;
- }
- result = 10 * (*it++ - '0');
- if(*it++ != '.')
- {
- ec = error::bad_version;
- return;
- }
- if(! is_digit(*it))
- {
- ec = error::bad_version;
- return;
- }
- result += *it++ - '0';
- }
+ int& result, error_code& ec);
+ BOOST_BEAST_DECL
static
void
parse_status(
char const*& it, char const* last,
- unsigned short& result, error_code& ec)
- {
- // parse 3(digit) SP
- if(it + 4 > last)
- {
- ec = error::need_more;
- return;
- }
- if(! is_digit(*it))
- {
- ec = error::bad_status;
- return;
- }
- result = 100 * (*it++ - '0');
- if(! is_digit(*it))
- {
- ec = error::bad_status;
- return;
- }
- result += 10 * (*it++ - '0');
- if(! is_digit(*it))
- {
- ec = error::bad_status;
- return;
- }
- result += *it++ - '0';
- if(*it++ != ' ')
- {
- ec = error::bad_status;
- return;
- }
- }
-
+ unsigned short& result, error_code& ec);
+
+ BOOST_BEAST_DECL
+ static
void
parse_reason(
char const*& it, char const* last,
- string_view& result, error_code& ec)
- {
- auto const first = it;
- char const* token_last = nullptr;
- auto p = parse_token_to_eol(
- it, last, token_last, ec);
- if(ec)
- return;
- if(! p)
- {
- ec = error::bad_reason;
- return;
- }
- result = make_string(first, token_last);
- it = p;
- }
+ string_view& result, error_code& ec);
- template<std::size_t N>
+ BOOST_BEAST_DECL
+ static
void
parse_field(
char const*& p,
char const* last,
string_view& name,
string_view& value,
- static_string<N>& buf,
- error_code& ec)
- {
- /* header-field = field-name ":" OWS field-value OWS
-
- field-name = token
- field-value = *( field-content / obs-fold )
- field-content = field-vchar [ 1*( SP / HTAB ) field-vchar ]
- field-vchar = VCHAR / obs-text
-
- obs-fold = CRLF 1*( SP / HTAB )
- ; obsolete line folding
- ; see Section 3.2.4
-
- token = 1*<any CHAR except CTLs or separators>
- CHAR = <any US-ASCII character (octets 0 - 127)>
- sep = "(" | ")" | "<" | ">" | "@"
- | "," | ";" | ":" | "\" | <">
- | "/" | "[" | "]" | "?" | "="
- | "{" | "}" | SP | HT
- */
- static char const* is_token =
- "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
- "\0\1\0\1\1\1\1\1\0\0\1\1\0\1\1\0\1\1\1\1\1\1\1\1\1\1\0\0\0\0\0\0"
- "\0\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\0\0\0\1\1"
- "\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\0\1\0\1\0"
- "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
- "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
- "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
- "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0";
-
- // name
- BOOST_ALIGNMENT(16) static const char ranges1[] =
- "\x00 " /* control chars and up to SP */
- "\"\"" /* 0x22 */
- "()" /* 0x28,0x29 */
- ",," /* 0x2c */
- "//" /* 0x2f */
- ":@" /* 0x3a-0x40 */
- "[]" /* 0x5b-0x5d */
- "{\377"; /* 0x7b-0xff */
- auto first = p;
- bool found;
- std::tie(p, found) = find_fast(
- p, last, ranges1, sizeof(ranges1)-1);
- if(! found && p >= last)
- {
- ec = error::need_more;
- return;
- }
- for(;;)
- {
- if(*p == ':')
- break;
- if(! is_token[static_cast<
- unsigned char>(*p)])
- {
- ec = error::bad_field;
- return;
- }
- ++p;
- if(p >= last)
- {
- ec = error::need_more;
- return;
- }
- }
- if(p == first)
- {
- // empty name
- ec = error::bad_field;
- return;
- }
- name = make_string(first, p);
- ++p; // eat ':'
- char const* token_last = nullptr;
- for(;;)
- {
- // eat leading ' ' and '\t'
- for(;;++p)
- {
- if(p + 1 > last)
- {
- ec = error::need_more;
- return;
- }
- if(! (*p == ' ' || *p == '\t'))
- break;
- }
- // parse to CRLF
- first = p;
- p = parse_token_to_eol(p, last, token_last, ec);
- if(ec)
- return;
- if(! p)
- {
- ec = error::bad_value;
- return;
- }
- // Look 1 char past the CRLF to handle obs-fold.
- if(p + 1 > last)
- {
- ec = error::need_more;
- return;
- }
- token_last =
- trim_back(token_last, first);
- if(*p != ' ' && *p != '\t')
- {
- value = make_string(first, token_last);
- return;
- }
- ++p;
- if(token_last != first)
- break;
- }
- buf.resize(0);
- buf.append(first, token_last);
- BOOST_ASSERT(! buf.empty());
- try
- {
- for(;;)
- {
- // eat leading ' ' and '\t'
- for(;;++p)
- {
- if(p + 1 > last)
- {
- ec = error::need_more;
- return;
- }
- if(! (*p == ' ' || *p == '\t'))
- break;
- }
- // parse to CRLF
- first = p;
- p = parse_token_to_eol(p, last, token_last, ec);
- if(ec)
- return;
- if(! p)
- {
- ec = error::bad_value;
- return;
- }
- // Look 1 char past the CRLF to handle obs-fold.
- if(p + 1 > last)
- {
- ec = error::need_more;
- return;
- }
- token_last = trim_back(token_last, first);
- if(first != token_last)
- {
- buf.push_back(' ');
- buf.append(first, token_last);
- }
- if(*p != ' ' && *p != '\t')
- {
- value = {buf.data(), buf.size()};
- return;
- }
- ++p;
- }
- }
- catch(std::length_error const&)
- {
- ec = error::header_limit;
- return;
- }
- }
+ static_string<max_obs_fold>& buf,
+ error_code& ec);
+ BOOST_BEAST_DECL
+ static
void
parse_chunk_extensions(
char const*& it,
char const* last,
- error_code& ec)
- {
- /*
- chunk-ext = *( BWS ";" BWS chunk-ext-name [ BWS "=" BWS chunk-ext-val ] )
- BWS = *( SP / HTAB ) ; "Bad White Space"
- chunk-ext-name = token
- chunk-ext-val = token / quoted-string
- token = 1*tchar
- quoted-string = DQUOTE *( qdtext / quoted-pair ) DQUOTE
- qdtext = HTAB / SP / "!" / %x23-5B ; '#'-'[' / %x5D-7E ; ']'-'~' / obs-text
- quoted-pair = "\" ( HTAB / SP / VCHAR / obs-text )
- obs-text = %x80-FF
-
- https://www.rfc-editor.org/errata_search.php?rfc=7230&eid=4667
- */
- loop:
- if(it == last)
- {
- ec = error::need_more;
- return;
- }
- if(*it != ' ' && *it != '\t' && *it != ';')
- return;
- // BWS
- if(*it == ' ' || *it == '\t')
- {
- for(;;)
- {
- ++it;
- if(it == last)
- {
- ec = error::need_more;
- return;
- }
- if(*it != ' ' && *it != '\t')
- break;
- }
- }
- // ';'
- if(*it != ';')
- {
- ec = error::bad_chunk_extension;
- return;
- }
- semi:
- ++it; // skip ';'
- // BWS
- for(;;)
- {
- if(it == last)
- {
- ec = error::need_more;
- return;
- }
- if(*it != ' ' && *it != '\t')
- break;
- ++it;
- }
- // chunk-ext-name
- if(! detail::is_token_char(*it))
- {
- ec = error::bad_chunk_extension;
- return;
- }
- for(;;)
- {
- ++it;
- if(it == last)
- {
- ec = error::need_more;
- return;
- }
- if(! detail::is_token_char(*it))
- break;
- }
- // BWS [ ";" / "=" ]
- {
- bool bws;
- if(*it == ' ' || *it == '\t')
- {
- for(;;)
- {
- ++it;
- if(it == last)
- {
- ec = error::need_more;
- return;
- }
- if(*it != ' ' && *it != '\t')
- break;
- }
- bws = true;
- }
- else
- {
- bws = false;
- }
- if(*it == ';')
- goto semi;
- if(*it != '=')
- {
- if(bws)
- ec = error::bad_chunk_extension;
- return;
- }
- ++it; // skip '='
- }
- // BWS
- for(;;)
- {
- if(it == last)
- {
- ec = error::need_more;
- return;
- }
- if(*it != ' ' && *it != '\t')
- break;
- ++it;
- }
- // chunk-ext-val
- if(*it != '"')
- {
- // token
- if(! detail::is_token_char(*it))
- {
- ec = error::bad_chunk_extension;
- return;
- }
- for(;;)
- {
- ++it;
- if(it == last)
- {
- ec = error::need_more;
- return;
- }
- if(! detail::is_token_char(*it))
- break;
- }
- }
- else
- {
- // quoted-string
- for(;;)
- {
- ++it;
- if(it == last)
- {
- ec = error::need_more;
- return;
- }
- if(*it == '"')
- break;
- if(*it == '\\')
- {
- ++it;
- if(it == last)
- {
- ec = error::need_more;
- return;
- }
- }
- }
- ++it;
- }
- goto loop;
- }
+ error_code& ec);
};
} // detail
@@ -904,4 +199,8 @@ struct basic_parser_base
} // beast
} // boost
+#ifdef BOOST_BEAST_HEADER_ONLY
+#include <boost/beast/http/detail/basic_parser.ipp>
+#endif
+
#endif