diff options
Diffstat (limited to 'boost/beast/http/detail/basic_parser.ipp')
-rw-r--r-- | boost/beast/http/detail/basic_parser.ipp | 848 |
1 files changed, 848 insertions, 0 deletions
diff --git a/boost/beast/http/detail/basic_parser.ipp b/boost/beast/http/detail/basic_parser.ipp new file mode 100644 index 0000000000..934d0dc505 --- /dev/null +++ b/boost/beast/http/detail/basic_parser.ipp @@ -0,0 +1,848 @@ +// +// Copyright (c) 2016-2019 Vinnie Falco (vinnie dot falco at gmail dot com) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +// +// Official repository: https://github.com/boostorg/beast +// + +#ifndef BOOST_BEAST_HTTP_DETAIL_BASIC_PARSER_IPP +#define BOOST_BEAST_HTTP_DETAIL_BASIC_PARSER_IPP + +#include <boost/beast/http/detail/basic_parser.hpp> +#include <limits> + +namespace boost { +namespace beast { +namespace http { +namespace detail { + +char const* +basic_parser_base:: +trim_front(char const* it, char const* end) +{ + while(it != end) + { + if(*it != ' ' && *it != '\t') + break; + ++it; + } + return it; +} + +char const* +basic_parser_base:: +trim_back( + char const* it, char const* first) +{ + while(it != first) + { + auto const c = it[-1]; + if(c != ' ' && c != '\t') + break; + --it; + } + return it; +} + +bool +basic_parser_base:: +is_pathchar(char c) +{ + // VFALCO This looks the same as the one below... + + // TEXT = <any OCTET except CTLs, and excluding LWS> + static bool constexpr tab[256] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 16 + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 32 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 48 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 64 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 80 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 96 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, // 112 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 128 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 144 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 160 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 176 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 192 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 208 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 224 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 // 240 + }; + return tab[static_cast<unsigned char>(c)]; +} + +bool +basic_parser_base:: +unhex(unsigned char& d, char c) +{ + static signed char constexpr tab[256] = { + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, // 0 + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, // 16 + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, // 32 + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1, // 48 + -1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1, // 64 + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, // 80 + -1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1, // 96 + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, // 112 + + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, // 128 + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, // 144 + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, // 160 + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, // 176 + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, // 192 + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, // 208 + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, // 224 + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 // 240 + }; + d = static_cast<unsigned char>( + tab[static_cast<unsigned char>(c)]); + return d != static_cast<unsigned char>(-1); +} + +//-------------------------------------------------------------------------- + +std::pair<char const*, bool> +basic_parser_base:: +find_fast( + char const* buf, + char const* buf_end, + char const* ranges, + size_t ranges_size) +{ + bool found = false; + boost::ignore_unused(buf_end, ranges, ranges_size); + return {buf, found}; +} + +// VFALCO Can SIMD help this? +char const* +basic_parser_base:: +find_eol( + char const* it, char const* last, + error_code& ec) +{ + for(;;) + { + if(it == last) + { + ec = {}; + return nullptr; + } + if(*it == '\r') + { + if(++it == last) + { + ec = {}; + return nullptr; + } + if(*it != '\n') + { + ec = error::bad_line_ending; + return nullptr; + } + ec = {}; + return ++it; + } + // VFALCO Should we handle the legacy case + // for lines terminated with a single '\n'? + ++it; + } +} + +bool +basic_parser_base:: +parse_dec(char const* it, char const* last, std::uint64_t& v) +{ + if(it == last) + return false; + std::uint64_t tmp = 0; + do + { + if((! is_digit(*it)) || + tmp > (std::numeric_limits<std::uint64_t>::max)() / 10) + return false; + tmp *= 10; + std::uint64_t const d = *it - '0'; + if((std::numeric_limits<std::uint64_t>::max)() - tmp < d) + return false; + tmp += d; + } + while(++it != last); + v = tmp; + return true; +} + +bool +basic_parser_base:: +parse_hex(char const*& it, std::uint64_t& v) +{ + unsigned char d; + if(! unhex(d, *it)) + return false; + std::uint64_t tmp = 0; + do + { + if(tmp > (std::numeric_limits<std::uint64_t>::max)() / 16) + return false; + tmp *= 16; + if((std::numeric_limits<std::uint64_t>::max)() - tmp < d) + return false; + tmp += d; + } + while(unhex(d, *++it)); + v = tmp; + return true; +} + +char const* +basic_parser_base:: +find_eom(char const* p, char const* last) +{ + for(;;) + { + if(p + 4 > last) + return nullptr; + if(p[3] != '\n') + { + if(p[3] == '\r') + ++p; + else + p += 4; + } + else if(p[2] != '\r') + { + p += 4; + } + else if(p[1] != '\n') + { + p += 2; + } + else if(p[0] != '\r') + { + p += 2; + } + else + { + return p + 4; + } + } +} + +//-------------------------------------------------------------------------- + +char const* +basic_parser_base:: +parse_token_to_eol( + char const* p, + char const* last, + char const*& token_last, + error_code& ec) +{ + for(;; ++p) + { + if(p >= last) + { + ec = error::need_more; + return p; + } + if(BOOST_UNLIKELY(! is_print(*p))) + if((BOOST_LIKELY(static_cast< + unsigned char>(*p) < '\040') && + BOOST_LIKELY(*p != 9)) || + BOOST_UNLIKELY(*p == 127)) + goto found_control; + } +found_control: + if(BOOST_LIKELY(*p == '\r')) + { + if(++p >= last) + { + ec = error::need_more; + return last; + } + if(*p++ != '\n') + { + ec = error::bad_line_ending; + return last; + } + token_last = p - 2; + } +#if 0 + // VFALCO This allows `\n` by itself + // to terminate a line + else if(*p == '\n') + { + token_last = p; + ++p; + } +#endif + else + { + // invalid character + return nullptr; + } + return p; +} + +bool +basic_parser_base:: +parse_crlf(char const*& it) +{ + if( it[0] != '\r' || it[1] != '\n') + return false; + it += 2; + return true; +} + +void +basic_parser_base:: +parse_method( + char const*& it, char const* last, + string_view& result, error_code& ec) +{ + // parse token SP + auto const first = it; + for(;; ++it) + { + if(it + 1 > last) + { + ec = error::need_more; + return; + } + if(! detail::is_token_char(*it)) + break; + } + if(it + 1 > last) + { + ec = error::need_more; + return; + } + if(*it != ' ') + { + ec = error::bad_method; + return; + } + if(it == first) + { + // cannot be empty + ec = error::bad_method; + return; + } + result = make_string(first, it++); +} + +void +basic_parser_base:: +parse_target( + char const*& it, char const* last, + string_view& result, error_code& ec) +{ + // parse target SP + auto const first = it; + for(;; ++it) + { + if(it + 1 > last) + { + ec = error::need_more; + return; + } + if(! is_pathchar(*it)) + break; + } + if(it + 1 > last) + { + ec = error::need_more; + return; + } + if(*it != ' ') + { + ec = error::bad_target; + return; + } + if(it == first) + { + // cannot be empty + ec = error::bad_target; + return; + } + result = make_string(first, it++); +} + +void +basic_parser_base:: +parse_version( + char const*& it, char const* last, + int& result, error_code& ec) +{ + if(it + 8 > last) + { + ec = error::need_more; + return; + } + if(*it++ != 'H') + { + ec = error::bad_version; + return; + } + if(*it++ != 'T') + { + ec = error::bad_version; + return; + } + if(*it++ != 'T') + { + ec = error::bad_version; + return; + } + if(*it++ != 'P') + { + ec = error::bad_version; + return; + } + if(*it++ != '/') + { + ec = error::bad_version; + return; + } + if(! is_digit(*it)) + { + ec = error::bad_version; + return; + } + result = 10 * (*it++ - '0'); + if(*it++ != '.') + { + ec = error::bad_version; + return; + } + if(! is_digit(*it)) + { + ec = error::bad_version; + return; + } + result += *it++ - '0'; +} + +void +basic_parser_base:: +parse_status( + char const*& it, char const* last, + unsigned short& result, error_code& ec) +{ + // parse 3(digit) SP + if(it + 4 > last) + { + ec = error::need_more; + return; + } + if(! is_digit(*it)) + { + ec = error::bad_status; + return; + } + result = 100 * (*it++ - '0'); + if(! is_digit(*it)) + { + ec = error::bad_status; + return; + } + result += 10 * (*it++ - '0'); + if(! is_digit(*it)) + { + ec = error::bad_status; + return; + } + result += *it++ - '0'; + if(*it++ != ' ') + { + ec = error::bad_status; + return; + } +} + +void +basic_parser_base:: +parse_reason( + char const*& it, char const* last, + string_view& result, error_code& ec) +{ + auto const first = it; + char const* token_last = nullptr; + auto p = parse_token_to_eol( + it, last, token_last, ec); + if(ec) + return; + if(! p) + { + ec = error::bad_reason; + return; + } + result = make_string(first, token_last); + it = p; +} + +void +basic_parser_base:: +parse_field( + char const*& p, + char const* last, + string_view& name, + string_view& value, + static_string<max_obs_fold>& buf, + error_code& ec) +{ +/* header-field = field-name ":" OWS field-value OWS + + field-name = token + field-value = *( field-content / obs-fold ) + field-content = field-vchar [ 1*( SP / HTAB ) field-vchar ] + field-vchar = VCHAR / obs-text + + obs-fold = CRLF 1*( SP / HTAB ) + ; obsolete line folding + ; see Section 3.2.4 + + token = 1*<any CHAR except CTLs or separators> + CHAR = <any US-ASCII character (octets 0 - 127)> + sep = "(" | ")" | "<" | ">" | "@" + | "," | ";" | ":" | "\" | <"> + | "/" | "[" | "]" | "?" | "=" + | "{" | "}" | SP | HT +*/ + static char const* is_token = + "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0" + "\0\1\0\1\1\1\1\1\0\0\1\1\0\1\1\0\1\1\1\1\1\1\1\1\1\1\0\0\0\0\0\0" + "\0\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\0\0\0\1\1" + "\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\0\1\0\1\0" + "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0" + "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0" + "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0" + "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"; + + // name + BOOST_ALIGNMENT(16) static const char ranges1[] = + "\x00 " /* control chars and up to SP */ + "\"\"" /* 0x22 */ + "()" /* 0x28,0x29 */ + ",," /* 0x2c */ + "//" /* 0x2f */ + ":@" /* 0x3a-0x40 */ + "[]" /* 0x5b-0x5d */ + "{\377"; /* 0x7b-0xff */ + auto first = p; + bool found; + std::tie(p, found) = find_fast( + p, last, ranges1, sizeof(ranges1)-1); + if(! found && p >= last) + { + ec = error::need_more; + return; + } + for(;;) + { + if(*p == ':') + break; + if(! is_token[static_cast< + unsigned char>(*p)]) + { + ec = error::bad_field; + return; + } + ++p; + if(p >= last) + { + ec = error::need_more; + return; + } + } + if(p == first) + { + // empty name + ec = error::bad_field; + return; + } + name = make_string(first, p); + ++p; // eat ':' + char const* token_last = nullptr; + for(;;) + { + // eat leading ' ' and '\t' + for(;;++p) + { + if(p + 1 > last) + { + ec = error::need_more; + return; + } + if(! (*p == ' ' || *p == '\t')) + break; + } + // parse to CRLF + first = p; + p = parse_token_to_eol(p, last, token_last, ec); + if(ec) + return; + if(! p) + { + ec = error::bad_value; + return; + } + // Look 1 char past the CRLF to handle obs-fold. + if(p + 1 > last) + { + ec = error::need_more; + return; + } + token_last = + trim_back(token_last, first); + if(*p != ' ' && *p != '\t') + { + value = make_string(first, token_last); + return; + } + ++p; + if(token_last != first) + break; + } + buf.resize(0); + buf.append(first, token_last); + BOOST_ASSERT(! buf.empty()); +#ifndef BOOST_NO_EXCEPTIONS + try +#endif + { + for(;;) + { + // eat leading ' ' and '\t' + for(;;++p) + { + if(p + 1 > last) + { + ec = error::need_more; + return; + } + if(! (*p == ' ' || *p == '\t')) + break; + } + // parse to CRLF + first = p; + p = parse_token_to_eol(p, last, token_last, ec); + if(ec) + return; + if(! p) + { + ec = error::bad_value; + return; + } + // Look 1 char past the CRLF to handle obs-fold. + if(p + 1 > last) + { + ec = error::need_more; + return; + } + token_last = trim_back(token_last, first); + if(first != token_last) + { + buf.push_back(' '); + buf.append(first, token_last); + } + if(*p != ' ' && *p != '\t') + { + value = {buf.data(), buf.size()}; + return; + } + ++p; + } + } +#ifndef BOOST_NO_EXCEPTIONS + catch(std::length_error const&) + { + ec = error::header_limit; + return; + } +#endif +} + + +void +basic_parser_base:: +parse_chunk_extensions( + char const*& it, + char const* last, + error_code& ec) +{ +/* + chunk-ext = *( BWS ";" BWS chunk-ext-name [ BWS "=" BWS chunk-ext-val ] ) + BWS = *( SP / HTAB ) ; "Bad White Space" + chunk-ext-name = token + chunk-ext-val = token / quoted-string + token = 1*tchar + quoted-string = DQUOTE *( qdtext / quoted-pair ) DQUOTE + qdtext = HTAB / SP / "!" / %x23-5B ; '#'-'[' / %x5D-7E ; ']'-'~' / obs-text + quoted-pair = "\" ( HTAB / SP / VCHAR / obs-text ) + obs-text = %x80-FF + + https://www.rfc-editor.org/errata_search.php?rfc=7230&eid=4667 +*/ +loop: + if(it == last) + { + ec = error::need_more; + return; + } + if(*it != ' ' && *it != '\t' && *it != ';') + return; + // BWS + if(*it == ' ' || *it == '\t') + { + for(;;) + { + ++it; + if(it == last) + { + ec = error::need_more; + return; + } + if(*it != ' ' && *it != '\t') + break; + } + } + // ';' + if(*it != ';') + { + ec = error::bad_chunk_extension; + return; + } +semi: + ++it; // skip ';' + // BWS + for(;;) + { + if(it == last) + { + ec = error::need_more; + return; + } + if(*it != ' ' && *it != '\t') + break; + ++it; + } + // chunk-ext-name + if(! detail::is_token_char(*it)) + { + ec = error::bad_chunk_extension; + return; + } + for(;;) + { + ++it; + if(it == last) + { + ec = error::need_more; + return; + } + if(! detail::is_token_char(*it)) + break; + } + // BWS [ ";" / "=" ] + { + bool bws; + if(*it == ' ' || *it == '\t') + { + for(;;) + { + ++it; + if(it == last) + { + ec = error::need_more; + return; + } + if(*it != ' ' && *it != '\t') + break; + } + bws = true; + } + else + { + bws = false; + } + if(*it == ';') + goto semi; + if(*it != '=') + { + if(bws) + ec = error::bad_chunk_extension; + return; + } + ++it; // skip '=' + } + // BWS + for(;;) + { + if(it == last) + { + ec = error::need_more; + return; + } + if(*it != ' ' && *it != '\t') + break; + ++it; + } + // chunk-ext-val + if(*it != '"') + { + // token + if(! detail::is_token_char(*it)) + { + ec = error::bad_chunk_extension; + return; + } + for(;;) + { + ++it; + if(it == last) + { + ec = error::need_more; + return; + } + if(! detail::is_token_char(*it)) + break; + } + } + else + { + // quoted-string + for(;;) + { + ++it; + if(it == last) + { + ec = error::need_more; + return; + } + if(*it == '"') + break; + if(*it == '\\') + { + ++it; + if(it == last) + { + ec = error::need_more; + return; + } + } + } + ++it; + } + goto loop; +} + +} // detail +} // http +} // beast +} // boost + +#endif |