diff options
Diffstat (limited to 'boost/beast/http/detail/basic_parser.hpp')
-rw-r--r-- | boost/beast/http/detail/basic_parser.hpp | 813 |
1 files changed, 56 insertions, 757 deletions
diff --git a/boost/beast/http/detail/basic_parser.hpp b/boost/beast/http/detail/basic_parser.hpp index 741de6edf0..f4e9083ffa 100644 --- a/boost/beast/http/detail/basic_parser.hpp +++ b/boost/beast/http/detail/basic_parser.hpp @@ -1,5 +1,5 @@ // -// Copyright (c) 2016-2017 Vinnie Falco (vinnie dot falco at gmail dot com) +// Copyright (c) 2016-2019 Vinnie Falco (vinnie dot falco at gmail dot com) // // Distributed under the Boost Software License, Version 1.0. (See accompanying // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) @@ -12,14 +12,11 @@ #include <boost/beast/core/static_string.hpp> #include <boost/beast/core/string.hpp> -#include <boost/beast/core/detail/cpu_info.hpp> #include <boost/beast/http/error.hpp> #include <boost/beast/http/detail/rfc7230.hpp> #include <boost/config.hpp> #include <boost/version.hpp> -#include <algorithm> #include <cstddef> -#include <limits> #include <utility> namespace boost { @@ -35,12 +32,6 @@ struct basic_parser_base // static std::size_t constexpr max_obs_fold = 4096; - template<class T> - struct is_unsigned_integer: - std::integral_constant<bool, - std::numeric_limits<T>::is_integer && - ! std::numeric_limits<T>::is_signed> {}; - enum class state { nothing_yet = 0, @@ -58,63 +49,6 @@ struct basic_parser_base static bool - is_pathchar(char c) - { - // VFALCO This looks the same as the one below... - - // TEXT = <any OCTET except CTLs, and excluding LWS> - static bool constexpr tab[256] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0 - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 16 - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 32 - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 48 - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 64 - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 80 - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 96 - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, // 112 - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 128 - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 144 - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 160 - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 176 - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 192 - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 208 - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 224 - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 // 240 - }; - return tab[static_cast<unsigned char>(c)]; - } - - static - inline - bool - unhex(unsigned char& d, char c) - { - static signed char constexpr tab[256] = { - -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, // 0 - -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, // 16 - -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, // 32 - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1, // 48 - -1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1, // 64 - -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, // 80 - -1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1, // 96 - -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 // 112 - - -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 // 128 - -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 // 144 - -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 // 160 - -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 // 176 - -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 // 192 - -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 // 208 - -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 // 224 - -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 // 240 - }; - d = static_cast<unsigned char>( - tab[static_cast<unsigned char>(c)]); - return d != static_cast<unsigned char>(-1); - } - - static - bool is_digit(char c) { return static_cast<unsigned char>(c-'0') < 10; @@ -127,35 +61,16 @@ struct basic_parser_base return static_cast<unsigned char>(c-32) < 95; } - template<class FwdIt> + BOOST_BEAST_DECL static - FwdIt - trim_front(FwdIt it, FwdIt const& end) - { - while(it != end) - { - if(*it != ' ' && *it != '\t') - break; - ++it; - } - return it; - } + char const* + trim_front(char const* it, char const* end); - template<class RanIt> + BOOST_BEAST_DECL static - RanIt + char const* trim_back( - RanIt it, RanIt const& first) - { - while(it != first) - { - auto const c = it[-1]; - if(c != ' ' && c != '\t') - break; - --it; - } - return it; - } + char const* it, char const* first); static string_view @@ -167,736 +82,116 @@ struct basic_parser_base //-------------------------------------------------------------------------- + BOOST_BEAST_DECL + static + bool + is_pathchar(char c); + + BOOST_BEAST_DECL + static + bool + unhex(unsigned char& d, char c); + + BOOST_BEAST_DECL static std::pair<char const*, bool> find_fast( char const* buf, char const* buf_end, char const* ranges, - size_t ranges_size) - { - bool found = false; - boost::ignore_unused(buf_end, ranges, ranges_size); - return {buf, found}; - } + size_t ranges_size); - // VFALCO Can SIMD help this? + BOOST_BEAST_DECL static char const* find_eol( char const* it, char const* last, - error_code& ec) - { - for(;;) - { - if(it == last) - { - ec.assign(0, ec.category()); - return nullptr; - } - if(*it == '\r') - { - if(++it == last) - { - ec.assign(0, ec.category()); - return nullptr; - } - if(*it != '\n') - { - ec = error::bad_line_ending; - return nullptr; - } - ec.assign(0, ec.category()); - return ++it; - } - // VFALCO Should we handle the legacy case - // for lines terminated with a single '\n'? - ++it; - } - } + error_code& ec); + BOOST_BEAST_DECL static char const* - find_eom(char const* p, char const* last) - { - for(;;) - { - if(p + 4 > last) - return nullptr; - if(p[3] != '\n') - { - if(p[3] == '\r') - ++p; - else - p += 4; - } - else if(p[2] != '\r') - { - p += 4; - } - else if(p[1] != '\n') - { - p += 2; - } - else if(p[0] != '\r') - { - p += 2; - } - else - { - return p + 4; - } - } - } + find_eom(char const* p, char const* last); //-------------------------------------------------------------------------- + BOOST_BEAST_DECL static char const* parse_token_to_eol( char const* p, char const* last, char const*& token_last, - error_code& ec) - { - for(;; ++p) - { - if(p >= last) - { - ec = error::need_more; - return p; - } - if(BOOST_UNLIKELY(! is_print(*p))) - if((BOOST_LIKELY(static_cast< - unsigned char>(*p) < '\040') && - BOOST_LIKELY(*p != '\011')) || - BOOST_UNLIKELY(*p == '\177')) - goto found_control; - } - found_control: - if(BOOST_LIKELY(*p == '\r')) - { - if(++p >= last) - { - ec = error::need_more; - return last; - } - if(*p++ != '\n') - { - ec = error::bad_line_ending; - return last; - } - token_last = p - 2; - } - #if 0 - // VFALCO This allows `\n` by itself - // to terminate a line - else if(*p == '\n') - { - token_last = p; - ++p; - } - #endif - else - { - // invalid character - return nullptr; - } - return p; - } + error_code& ec); - template<class Iter, class T> + BOOST_BEAST_DECL static - typename std::enable_if<is_unsigned_integer<T>::value, bool>::type - parse_dec(Iter it, Iter last, T& v) - { - if(it == last) - return false; - T tmp = 0; - do - { - if((! is_digit(*it)) || - tmp > (std::numeric_limits<T>::max)() / 10) - return false; - tmp *= 10; - T const d = *it - '0'; - if((std::numeric_limits<T>::max)() - tmp < d) - return false; - tmp += d; - } - while(++it != last); - v = tmp; - return true; - } + bool + parse_dec(char const* it, char const* last, std::uint64_t& v); - template<class Iter, class T> + BOOST_BEAST_DECL static - typename std::enable_if<is_unsigned_integer<T>::value, bool>::type - parse_hex(Iter& it, T& v) - { - unsigned char d; - if(! unhex(d, *it)) - return false; - T tmp = 0; - do - { - if(tmp > (std::numeric_limits<T>::max)() / 16) - return false; - tmp *= 16; - if((std::numeric_limits<T>::max)() - tmp < d) - return false; - tmp += d; - } - while(unhex(d, *++it)); - v = tmp; - return true; - } + bool + parse_hex(char const*& it, std::uint64_t& v); + BOOST_BEAST_DECL static bool - parse_crlf(char const*& it) - { - if( it[0] != '\r' || it[1] != '\n') - return false; - it += 2; - return true; - } + parse_crlf(char const*& it); + BOOST_BEAST_DECL static void parse_method( char const*& it, char const* last, - string_view& result, error_code& ec) - { - // parse token SP - auto const first = it; - for(;; ++it) - { - if(it + 1 > last) - { - ec = error::need_more; - return; - } - if(! detail::is_token_char(*it)) - break; - } - if(it + 1 > last) - { - ec = error::need_more; - return; - } - if(*it != ' ') - { - ec = error::bad_method; - return; - } - if(it == first) - { - // cannot be empty - ec = error::bad_method; - return; - } - result = make_string(first, it++); - } + string_view& result, error_code& ec); + BOOST_BEAST_DECL static void parse_target( char const*& it, char const* last, - string_view& result, error_code& ec) - { - // parse target SP - auto const first = it; - for(;; ++it) - { - if(it + 1 > last) - { - ec = error::need_more; - return; - } - if(! is_pathchar(*it)) - break; - } - if(it + 1 > last) - { - ec = error::need_more; - return; - } - if(*it != ' ') - { - ec = error::bad_target; - return; - } - if(it == first) - { - // cannot be empty - ec = error::bad_target; - return; - } - result = make_string(first, it++); - } + string_view& result, error_code& ec); + BOOST_BEAST_DECL static void parse_version( char const*& it, char const* last, - int& result, error_code& ec) - { - if(it + 8 > last) - { - ec = error::need_more; - return; - } - if(*it++ != 'H') - { - ec = error::bad_version; - return; - } - if(*it++ != 'T') - { - ec = error::bad_version; - return; - } - if(*it++ != 'T') - { - ec = error::bad_version; - return; - } - if(*it++ != 'P') - { - ec = error::bad_version; - return; - } - if(*it++ != '/') - { - ec = error::bad_version; - return; - } - if(! is_digit(*it)) - { - ec = error::bad_version; - return; - } - result = 10 * (*it++ - '0'); - if(*it++ != '.') - { - ec = error::bad_version; - return; - } - if(! is_digit(*it)) - { - ec = error::bad_version; - return; - } - result += *it++ - '0'; - } + int& result, error_code& ec); + BOOST_BEAST_DECL static void parse_status( char const*& it, char const* last, - unsigned short& result, error_code& ec) - { - // parse 3(digit) SP - if(it + 4 > last) - { - ec = error::need_more; - return; - } - if(! is_digit(*it)) - { - ec = error::bad_status; - return; - } - result = 100 * (*it++ - '0'); - if(! is_digit(*it)) - { - ec = error::bad_status; - return; - } - result += 10 * (*it++ - '0'); - if(! is_digit(*it)) - { - ec = error::bad_status; - return; - } - result += *it++ - '0'; - if(*it++ != ' ') - { - ec = error::bad_status; - return; - } - } - + unsigned short& result, error_code& ec); + + BOOST_BEAST_DECL + static void parse_reason( char const*& it, char const* last, - string_view& result, error_code& ec) - { - auto const first = it; - char const* token_last = nullptr; - auto p = parse_token_to_eol( - it, last, token_last, ec); - if(ec) - return; - if(! p) - { - ec = error::bad_reason; - return; - } - result = make_string(first, token_last); - it = p; - } + string_view& result, error_code& ec); - template<std::size_t N> + BOOST_BEAST_DECL + static void parse_field( char const*& p, char const* last, string_view& name, string_view& value, - static_string<N>& buf, - error_code& ec) - { - /* header-field = field-name ":" OWS field-value OWS - - field-name = token - field-value = *( field-content / obs-fold ) - field-content = field-vchar [ 1*( SP / HTAB ) field-vchar ] - field-vchar = VCHAR / obs-text - - obs-fold = CRLF 1*( SP / HTAB ) - ; obsolete line folding - ; see Section 3.2.4 - - token = 1*<any CHAR except CTLs or separators> - CHAR = <any US-ASCII character (octets 0 - 127)> - sep = "(" | ")" | "<" | ">" | "@" - | "," | ";" | ":" | "\" | <"> - | "/" | "[" | "]" | "?" | "=" - | "{" | "}" | SP | HT - */ - static char const* is_token = - "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0" - "\0\1\0\1\1\1\1\1\0\0\1\1\0\1\1\0\1\1\1\1\1\1\1\1\1\1\0\0\0\0\0\0" - "\0\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\0\0\0\1\1" - "\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\0\1\0\1\0" - "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0" - "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0" - "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0" - "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"; - - // name - BOOST_ALIGNMENT(16) static const char ranges1[] = - "\x00 " /* control chars and up to SP */ - "\"\"" /* 0x22 */ - "()" /* 0x28,0x29 */ - ",," /* 0x2c */ - "//" /* 0x2f */ - ":@" /* 0x3a-0x40 */ - "[]" /* 0x5b-0x5d */ - "{\377"; /* 0x7b-0xff */ - auto first = p; - bool found; - std::tie(p, found) = find_fast( - p, last, ranges1, sizeof(ranges1)-1); - if(! found && p >= last) - { - ec = error::need_more; - return; - } - for(;;) - { - if(*p == ':') - break; - if(! is_token[static_cast< - unsigned char>(*p)]) - { - ec = error::bad_field; - return; - } - ++p; - if(p >= last) - { - ec = error::need_more; - return; - } - } - if(p == first) - { - // empty name - ec = error::bad_field; - return; - } - name = make_string(first, p); - ++p; // eat ':' - char const* token_last = nullptr; - for(;;) - { - // eat leading ' ' and '\t' - for(;;++p) - { - if(p + 1 > last) - { - ec = error::need_more; - return; - } - if(! (*p == ' ' || *p == '\t')) - break; - } - // parse to CRLF - first = p; - p = parse_token_to_eol(p, last, token_last, ec); - if(ec) - return; - if(! p) - { - ec = error::bad_value; - return; - } - // Look 1 char past the CRLF to handle obs-fold. - if(p + 1 > last) - { - ec = error::need_more; - return; - } - token_last = - trim_back(token_last, first); - if(*p != ' ' && *p != '\t') - { - value = make_string(first, token_last); - return; - } - ++p; - if(token_last != first) - break; - } - buf.resize(0); - buf.append(first, token_last); - BOOST_ASSERT(! buf.empty()); - try - { - for(;;) - { - // eat leading ' ' and '\t' - for(;;++p) - { - if(p + 1 > last) - { - ec = error::need_more; - return; - } - if(! (*p == ' ' || *p == '\t')) - break; - } - // parse to CRLF - first = p; - p = parse_token_to_eol(p, last, token_last, ec); - if(ec) - return; - if(! p) - { - ec = error::bad_value; - return; - } - // Look 1 char past the CRLF to handle obs-fold. - if(p + 1 > last) - { - ec = error::need_more; - return; - } - token_last = trim_back(token_last, first); - if(first != token_last) - { - buf.push_back(' '); - buf.append(first, token_last); - } - if(*p != ' ' && *p != '\t') - { - value = {buf.data(), buf.size()}; - return; - } - ++p; - } - } - catch(std::length_error const&) - { - ec = error::header_limit; - return; - } - } + static_string<max_obs_fold>& buf, + error_code& ec); + BOOST_BEAST_DECL + static void parse_chunk_extensions( char const*& it, char const* last, - error_code& ec) - { - /* - chunk-ext = *( BWS ";" BWS chunk-ext-name [ BWS "=" BWS chunk-ext-val ] ) - BWS = *( SP / HTAB ) ; "Bad White Space" - chunk-ext-name = token - chunk-ext-val = token / quoted-string - token = 1*tchar - quoted-string = DQUOTE *( qdtext / quoted-pair ) DQUOTE - qdtext = HTAB / SP / "!" / %x23-5B ; '#'-'[' / %x5D-7E ; ']'-'~' / obs-text - quoted-pair = "\" ( HTAB / SP / VCHAR / obs-text ) - obs-text = %x80-FF - - https://www.rfc-editor.org/errata_search.php?rfc=7230&eid=4667 - */ - loop: - if(it == last) - { - ec = error::need_more; - return; - } - if(*it != ' ' && *it != '\t' && *it != ';') - return; - // BWS - if(*it == ' ' || *it == '\t') - { - for(;;) - { - ++it; - if(it == last) - { - ec = error::need_more; - return; - } - if(*it != ' ' && *it != '\t') - break; - } - } - // ';' - if(*it != ';') - { - ec = error::bad_chunk_extension; - return; - } - semi: - ++it; // skip ';' - // BWS - for(;;) - { - if(it == last) - { - ec = error::need_more; - return; - } - if(*it != ' ' && *it != '\t') - break; - ++it; - } - // chunk-ext-name - if(! detail::is_token_char(*it)) - { - ec = error::bad_chunk_extension; - return; - } - for(;;) - { - ++it; - if(it == last) - { - ec = error::need_more; - return; - } - if(! detail::is_token_char(*it)) - break; - } - // BWS [ ";" / "=" ] - { - bool bws; - if(*it == ' ' || *it == '\t') - { - for(;;) - { - ++it; - if(it == last) - { - ec = error::need_more; - return; - } - if(*it != ' ' && *it != '\t') - break; - } - bws = true; - } - else - { - bws = false; - } - if(*it == ';') - goto semi; - if(*it != '=') - { - if(bws) - ec = error::bad_chunk_extension; - return; - } - ++it; // skip '=' - } - // BWS - for(;;) - { - if(it == last) - { - ec = error::need_more; - return; - } - if(*it != ' ' && *it != '\t') - break; - ++it; - } - // chunk-ext-val - if(*it != '"') - { - // token - if(! detail::is_token_char(*it)) - { - ec = error::bad_chunk_extension; - return; - } - for(;;) - { - ++it; - if(it == last) - { - ec = error::need_more; - return; - } - if(! detail::is_token_char(*it)) - break; - } - } - else - { - // quoted-string - for(;;) - { - ++it; - if(it == last) - { - ec = error::need_more; - return; - } - if(*it == '"') - break; - if(*it == '\\') - { - ++it; - if(it == last) - { - ec = error::need_more; - return; - } - } - } - ++it; - } - goto loop; - } + error_code& ec); }; } // detail @@ -904,4 +199,8 @@ struct basic_parser_base } // beast } // boost +#ifdef BOOST_BEAST_HEADER_ONLY +#include <boost/beast/http/detail/basic_parser.ipp> +#endif + #endif |