diff options
Diffstat (limited to 'boost/beast/http/basic_parser.hpp')
-rw-r--r-- | boost/beast/http/basic_parser.hpp | 621 |
1 files changed, 621 insertions, 0 deletions
diff --git a/boost/beast/http/basic_parser.hpp b/boost/beast/http/basic_parser.hpp new file mode 100644 index 0000000000..552adec7e4 --- /dev/null +++ b/boost/beast/http/basic_parser.hpp @@ -0,0 +1,621 @@ +// +// Copyright (c) 2016-2017 Vinnie Falco (vinnie dot falco at gmail dot com) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +// +// Official repository: https://github.com/boostorg/beast +// + +#ifndef BOOST_BEAST_HTTP_BASIC_PARSER_HPP +#define BOOST_BEAST_HTTP_BASIC_PARSER_HPP + +#include <boost/beast/core/detail/config.hpp> +#include <boost/beast/core/error.hpp> +#include <boost/beast/core/string.hpp> +#include <boost/beast/http/field.hpp> +#include <boost/beast/http/verb.hpp> +#include <boost/beast/http/detail/basic_parser.hpp> +#include <boost/asio/buffer.hpp> +#include <boost/optional.hpp> +#include <boost/assert.hpp> +#include <limits> +#include <memory> +#include <type_traits> +#include <utility> + +namespace boost { +namespace beast { +namespace http { + +/** A parser for decoding HTTP/1 wire format messages. + + This parser is designed to efficiently parse messages in the + HTTP/1 wire format. It allocates no memory when input is + presented as a single contiguous buffer, and uses minimal + state. It will handle chunked encoding and it understands + the semantics of the Connection, Content-Length, and Upgrade + fields. + The parser is optimized for the case where the input buffer + sequence consists of a single contiguous buffer. The + @ref flat_buffer class is provided, which guarantees + that the input sequence of the stream buffer will be represented + by exactly one contiguous buffer. To ensure the optimum performance + of the parser, use @ref flat_buffer with HTTP algorithms + such as @ref read, @ref read_some, @ref async_read, and @ref async_read_some. + Alternatively, the caller may use custom techniques to ensure that + the structured portion of the HTTP message (header or chunk header) + is contained in a linear buffer. + + The interface uses CRTP (Curiously Recurring Template Pattern). + To use this class directly, derive from @ref basic_parser. When + bytes are presented, the implementation will make a series of zero + or more calls to derived class members functions (termed "callbacks" + in this context) matching a specific signature. + + Every callback must be provided by the derived class, or else + a compilation error will be generated. This exemplar shows + the signature and description of the callbacks required in + the derived class. + For each callback, the function will ensure that `!ec` is `true` + if there was no error or set to the appropriate error code if + there was one. If an error is set, the value is propagated to + the caller of the parser. + + @par Derived Class Requirements + @code + template<bool isRequest> + class derived + : public basic_parser<isRequest, derived<isRequest>> + { + private: + // The friend declaration is needed, + // otherwise the callbacks must be made public. + friend class basic_parser<isRequest, derived>; + + /// Called after receiving the request-line (isRequest == true). + void + on_request_impl( + verb method, // The method verb, verb::unknown if no match + string_view method_str, // The method as a string + string_view target, // The request-target + int version, // The HTTP-version + error_code& ec); // The error returned to the caller, if any + + /// Called after receiving the start-line (isRequest == false). + void + on_response_impl( + int code, // The status-code + string_view reason, // The obsolete reason-phrase + int version, // The HTTP-version + error_code& ec); // The error returned to the caller, if any + + /// Called after receiving a header field. + void + on_field_impl( + field f, // The known-field enumeration constant + string_view name, // The field name string. + string_view value, // The field value + error_code& ec); // The error returned to the caller, if any + + /// Called after the complete header is received. + void + on_header_impl( + error_code& ec); // The error returned to the caller, if any + + /// Called just before processing the body, if a body exists. + void + on_body_init_impl( + boost::optional< + std::uint64_t> const& + content_length, // Content length if known, else `boost::none` + error_code& ec); // The error returned to the caller, if any + + /// Called for each piece of the body, if a body exists. + //! + //! This is used when there is no chunked transfer coding. + //! + //! The function returns the number of bytes consumed from the + //! input buffer. Any input octets not consumed will be will be + //! presented on subsequent calls. + //! + std::size_t + on_body_impl( + string_view s, // A portion of the body + error_code& ec); // The error returned to the caller, if any + + /// Called for each chunk header. + void + on_chunk_header_impl( + std::uint64_t size, // The size of the upcoming chunk, + // or zero for the last chunk + string_view extension, // The chunk extensions (may be empty) + error_code& ec); // The error returned to the caller, if any + + /// Called to deliver the chunk body. + //! + //! This is used when there is a chunked transfer coding. The + //! implementation will automatically remove the encoding before + //! calling this function. + //! + //! The function returns the number of bytes consumed from the + //! input buffer. Any input octets not consumed will be will be + //! presented on subsequent calls. + //! + std::size_t + on_chunk_body_impl( + std::uint64_t remain, // The number of bytes remaining in the chunk, + // including what is being passed here. + // or zero for the last chunk + string_view body, // The next piece of the chunk body + error_code& ec); // The error returned to the caller, if any + + /// Called when the complete message is parsed. + void + on_finish_impl(error_code& ec); + + public: + derived() = default; + }; + @endcode + + @tparam isRequest A `bool` indicating whether the parser will be + presented with request or response message. + + @tparam Derived The derived class type. This is part of the + Curiously Recurring Template Pattern interface. + + @note If the parser encounters a field value with obs-fold + longer than 4 kilobytes in length, an error is generated. +*/ +template<bool isRequest, class Derived> +class basic_parser + : private detail::basic_parser_base +{ + template<bool OtherIsRequest, class OtherDerived> + friend class basic_parser; + + // limit on the size of the stack flat buffer + static std::size_t constexpr max_stack_buffer = 8192; + + // Message will be complete after reading header + static unsigned constexpr flagSkipBody = 1<< 0; + + // Consume input buffers across semantic boundaries + static unsigned constexpr flagEager = 1<< 1; + + // The parser has read at least one byte + static unsigned constexpr flagGotSome = 1<< 2; + + // Message semantics indicate a body is expected. + // cleared if flagSkipBody set + // + static unsigned constexpr flagHasBody = 1<< 3; + + static unsigned constexpr flagHTTP11 = 1<< 4; + static unsigned constexpr flagNeedEOF = 1<< 5; + static unsigned constexpr flagExpectCRLF = 1<< 6; + static unsigned constexpr flagConnectionClose = 1<< 7; + static unsigned constexpr flagConnectionUpgrade = 1<< 8; + static unsigned constexpr flagConnectionKeepAlive = 1<< 9; + static unsigned constexpr flagContentLength = 1<< 10; + static unsigned constexpr flagChunked = 1<< 11; + static unsigned constexpr flagUpgrade = 1<< 12; + static unsigned constexpr flagFinalChunk = 1<< 13; + + static constexpr + std::uint64_t + default_body_limit(std::true_type) + { + // limit for requests + return 1 * 1024 * 1024; // 1MB + } + + static constexpr + std::uint64_t + default_body_limit(std::false_type) + { + // limit for responses + return 8 * 1024 * 1024; // 8MB + } + + std::uint64_t body_limit_ = + default_body_limit(is_request{}); // max payload body + std::uint64_t len_ = 0; // size of chunk or body + std::unique_ptr<char[]> buf_; // temp storage + std::size_t buf_len_ = 0; // size of buf_ + std::size_t skip_ = 0; // resume search here + std::uint32_t header_limit_ = 8192; // max header size + unsigned short status_ = 0; // response status + state state_ = state::nothing_yet; // initial state + unsigned f_ = 0; // flags + +protected: + /// Default constructor + basic_parser() = default; + + /// Move constructor + basic_parser(basic_parser &&) = default; + + /// Move assignment + basic_parser& operator=(basic_parser &&) = default; + + /** Move constructor + + @note + + After the move, the only valid operation on the + moved-from object is destruction. + */ + template<class OtherDerived> + basic_parser(basic_parser<isRequest, OtherDerived>&&); + +public: + /// `true` if this parser parses requests, `false` for responses. + using is_request = + std::integral_constant<bool, isRequest>; + + /// Destructor + ~basic_parser() = default; + + /// Copy constructor + basic_parser(basic_parser const&) = delete; + + /// Copy assignment + basic_parser& operator=(basic_parser const&) = delete; + + /** Returns a reference to this object as a @ref basic_parser. + + This is used to pass a derived class where a base class is + expected, to choose a correct function overload when the + resolution would be ambiguous. + */ + basic_parser& + base() + { + return *this; + } + + /** Returns a constant reference to this object as a @ref basic_parser. + + This is used to pass a derived class where a base class is + expected, to choose a correct function overload when the + resolution would be ambiguous. + */ + basic_parser const& + base() const + { + return *this; + } + + /// Returns `true` if the parser has received at least one byte of input. + bool + got_some() const + { + return state_ != state::nothing_yet; + } + + /** Returns `true` if the message is complete. + + The message is complete after the full header is prduced + and one of the following is true: + + @li The skip body option was set. + + @li The semantics of the message indicate there is no body. + + @li The semantics of the message indicate a body is expected, + and the entire body was parsed. + */ + bool + is_done() const + { + return state_ == state::complete; + } + + /** Returns `true` if a the parser has produced the full header. + */ + bool + is_header_done() const + { + return state_ > state::fields; + } + + /** Returns `true` if the message is an upgrade message. + + @note The return value is undefined unless + @ref is_header_done would return `true`. + */ + bool + upgrade() const + { + return (f_ & flagConnectionUpgrade) != 0; + } + + /** Returns `true` if the last value for Transfer-Encoding is "chunked". + + @note The return value is undefined unless + @ref is_header_done would return `true`. + */ + bool + chunked() const + { + return (f_ & flagChunked) != 0; + } + + /** Returns `true` if the message has keep-alive connection semantics. + + This function always returns `false` if @ref need_eof would return + `false`. + + @note The return value is undefined unless + @ref is_header_done would return `true`. + */ + bool + keep_alive() const; + + /** Returns the optional value of Content-Length if known. + + @note The return value is undefined unless + @ref is_header_done would return `true`. + */ + boost::optional<std::uint64_t> + content_length() const; + + /** Returns `true` if the message semantics require an end of file. + + Depending on the contents of the header, the parser may + require and end of file notification to know where the end + of the body lies. If this function returns `true` it will be + necessary to call @ref put_eof when there will never be additional + data from the input. + */ + bool + need_eof() const + { + return (f_ & flagNeedEOF) != 0; + } + + /** Set the limit on the payload body. + + This function sets the maximum allowed size of the payload body, + before any encodings except chunked have been removed. Depending + on the message semantics, one of these cases will apply: + + @li The Content-Length is specified and exceeds the limit. In + this case the result @ref error::body_limit is returned + immediately after the header is parsed. + + @li The Content-Length is unspecified and the chunked encoding + is not specified as the last encoding. In this case the end of + message is determined by the end of file indicator on the + associated stream or input source. If a sufficient number of + body payload octets are presented to the parser to exceed the + configured limit, the parse fails with the result + @ref error::body_limit + + @li The Transfer-Encoding specifies the chunked encoding as the + last encoding. In this case, when the number of payload body + octets produced by removing the chunked encoding exceeds + the configured limit, the parse fails with the result + @ref error::body_limit. + + Setting the limit after any body octets have been parsed + results in undefined behavior. + + The default limit is 1MB for requests and 8MB for responses. + + @param v The payload body limit to set + */ + void + body_limit(std::uint64_t v) + { + body_limit_ = v; + } + + /** Set a limit on the total size of the header. + + This function sets the maximum allowed size of the header + including all field name, value, and delimiter characters + and also including the CRLF sequences in the serialized + input. If the end of the header is not found within the + limit of the header size, the error @ref error::header_limit + is returned by @ref put. + + Setting the limit after any header octets have been parsed + results in undefined behavior. + */ + void + header_limit(std::uint32_t v) + { + header_limit_ = v; + } + + /// Returns `true` if the eager parse option is set. + bool + eager() const + { + return (f_ & flagEager) != 0; + } + + /** Set the eager parse option. + + Normally the parser returns after successfully parsing a structured + element (header, chunk header, or chunk body) even if there are octets + remaining in the input. This is necessary when attempting to parse the + header first, or when the caller wants to inspect information which may + be invalidated by subsequent parsing, such as a chunk extension. The + `eager` option controls whether the parser keeps going after parsing + structured element if there are octets remaining in the buffer and no + error occurs. This option is automatically set or cleared during certain + stream operations to improve performance with no change in functionality. + + The default setting is `false`. + + @param v `true` to set the eager parse option or `false` to disable it. + */ + void + eager(bool v) + { + if(v) + f_ |= flagEager; + else + f_ &= ~flagEager; + } + + /// Returns `true` if the skip parse option is set. + bool + skip() const + { + return (f_ & flagSkipBody) != 0; + } + + /** Set the skip parse option. + + This option controls whether or not the parser expects to see an HTTP + body, regardless of the presence or absence of certain fields such as + Content-Length or a chunked Transfer-Encoding. Depending on the request, + some responses do not carry a body. For example, a 200 response to a + CONNECT request from a tunneling proxy, or a response to a HEAD request. + In these cases, callers may use this function inform the parser that + no body is expected. The parser will consider the message complete + after the header has been received. + + @param v `true` to set the skip body option or `false` to disable it. + + @note This function must called before any bytes are processed. + */ + void + skip(bool v); + + /** Write a buffer sequence to the parser. + + This function attempts to incrementally parse the HTTP + message data stored in the caller provided buffers. Upon + success, a positive return value indicates that the parser + made forward progress, consuming that number of + bytes. + + In some cases there may be an insufficient number of octets + in the input buffer in order to make forward progress. This + is indicated by the code @ref error::need_more. When + this happens, the caller should place additional bytes into + the buffer sequence and call @ref put again. + + The error code @ref error::need_more is special. When this + error is returned, a subsequent call to @ref put may succeed + if the buffers have been updated. Otherwise, upon error + the parser may not be restarted. + + @param buffers An object meeting the requirements of + @b ConstBufferSequence that represents the next chunk of + message data. If the length of this buffer sequence is + one, the implementation will not allocate additional memory. + The class @ref beast::flat_buffer is provided as one way to + meet this requirement + + @param ec Set to the error, if any occurred. + + @return The number of octets consumed in the buffer + sequence. The caller should remove these octets even if the + error is set. + */ + template<class ConstBufferSequence> + std::size_t + put(ConstBufferSequence const& buffers, error_code& ec); + +#if ! BOOST_BEAST_DOXYGEN + std::size_t + put(boost::asio::const_buffer const& buffer, + error_code& ec); +#endif + + /** Inform the parser that the end of stream was reached. + + In certain cases, HTTP needs to know where the end of + the stream is. For example, sometimes servers send + responses without Content-Length and expect the client + to consume input (for the body) until EOF. Callbacks + and errors will still be processed as usual. + + This is typically called when a read from the + underlying stream object sets the error code to + `boost::asio::error::eof`. + + @note Only valid after parsing a complete header. + + @param ec Set to the error, if any occurred. + */ + void + put_eof(error_code& ec); + +private: + inline + Derived& + impl() + { + return *static_cast<Derived*>(this); + } + + template<class ConstBufferSequence> + std::size_t + put_from_stack(std::size_t size, + ConstBufferSequence const& buffers, + error_code& ec); + + void + maybe_need_more( + char const* p, std::size_t n, + error_code& ec); + + void + parse_start_line( + char const*& p, char const* last, + error_code& ec, std::true_type); + + void + parse_start_line( + char const*& p, char const* last, + error_code& ec, std::false_type); + + void + parse_fields( + char const*& p, char const* last, + error_code& ec); + + void + finish_header( + error_code& ec, std::true_type); + + void + finish_header( + error_code& ec, std::false_type); + + void + parse_body(char const*& p, + std::size_t n, error_code& ec); + + void + parse_body_to_eof(char const*& p, + std::size_t n, error_code& ec); + + void + parse_chunk_header(char const*& p, + std::size_t n, error_code& ec); + + void + parse_chunk_body(char const*& p, + std::size_t n, error_code& ec); + + void + do_field(field f, + string_view value, error_code& ec); +}; + +} // http +} // beast +} // boost + +#include <boost/beast/http/impl/basic_parser.ipp> + +#endif |