summaryrefslogtreecommitdiff
path: root/boost/asio/detail/impl/epoll_reactor.ipp
diff options
context:
space:
mode:
Diffstat (limited to 'boost/asio/detail/impl/epoll_reactor.ipp')
-rw-r--r--boost/asio/detail/impl/epoll_reactor.ipp637
1 files changed, 637 insertions, 0 deletions
diff --git a/boost/asio/detail/impl/epoll_reactor.ipp b/boost/asio/detail/impl/epoll_reactor.ipp
new file mode 100644
index 0000000000..771edea67c
--- /dev/null
+++ b/boost/asio/detail/impl/epoll_reactor.ipp
@@ -0,0 +1,637 @@
+//
+// detail/impl/epoll_reactor.ipp
+// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+//
+// Copyright (c) 2003-2012 Christopher M. Kohlhoff (chris at kohlhoff dot com)
+//
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+//
+
+#ifndef BOOST_ASIO_DETAIL_IMPL_EPOLL_REACTOR_IPP
+#define BOOST_ASIO_DETAIL_IMPL_EPOLL_REACTOR_IPP
+
+#if defined(_MSC_VER) && (_MSC_VER >= 1200)
+# pragma once
+#endif // defined(_MSC_VER) && (_MSC_VER >= 1200)
+
+#include <boost/asio/detail/config.hpp>
+
+#if defined(BOOST_ASIO_HAS_EPOLL)
+
+#include <cstddef>
+#include <sys/epoll.h>
+#include <boost/asio/detail/epoll_reactor.hpp>
+#include <boost/asio/detail/throw_error.hpp>
+#include <boost/asio/error.hpp>
+
+#if defined(BOOST_ASIO_HAS_TIMERFD)
+# include <sys/timerfd.h>
+#endif // defined(BOOST_ASIO_HAS_TIMERFD)
+
+#include <boost/asio/detail/push_options.hpp>
+
+namespace boost {
+namespace asio {
+namespace detail {
+
+epoll_reactor::epoll_reactor(boost::asio::io_service& io_service)
+ : boost::asio::detail::service_base<epoll_reactor>(io_service),
+ io_service_(use_service<io_service_impl>(io_service)),
+ mutex_(),
+ interrupter_(),
+ epoll_fd_(do_epoll_create()),
+ timer_fd_(do_timerfd_create()),
+ shutdown_(false)
+{
+ // Add the interrupter's descriptor to epoll.
+ epoll_event ev = { 0, { 0 } };
+ ev.events = EPOLLIN | EPOLLERR | EPOLLET;
+ ev.data.ptr = &interrupter_;
+ epoll_ctl(epoll_fd_, EPOLL_CTL_ADD, interrupter_.read_descriptor(), &ev);
+ interrupter_.interrupt();
+
+ // Add the timer descriptor to epoll.
+ if (timer_fd_ != -1)
+ {
+ ev.events = EPOLLIN | EPOLLERR;
+ ev.data.ptr = &timer_fd_;
+ epoll_ctl(epoll_fd_, EPOLL_CTL_ADD, timer_fd_, &ev);
+ }
+}
+
+epoll_reactor::~epoll_reactor()
+{
+ if (epoll_fd_ != -1)
+ close(epoll_fd_);
+ if (timer_fd_ != -1)
+ close(timer_fd_);
+}
+
+void epoll_reactor::shutdown_service()
+{
+ mutex::scoped_lock lock(mutex_);
+ shutdown_ = true;
+ lock.unlock();
+
+ op_queue<operation> ops;
+
+ while (descriptor_state* state = registered_descriptors_.first())
+ {
+ for (int i = 0; i < max_ops; ++i)
+ ops.push(state->op_queue_[i]);
+ state->shutdown_ = true;
+ registered_descriptors_.free(state);
+ }
+
+ timer_queues_.get_all_timers(ops);
+
+ io_service_.abandon_operations(ops);
+}
+
+void epoll_reactor::fork_service(boost::asio::io_service::fork_event fork_ev)
+{
+ if (fork_ev == boost::asio::io_service::fork_child)
+ {
+ if (epoll_fd_ != -1)
+ ::close(epoll_fd_);
+ epoll_fd_ = -1;
+ epoll_fd_ = do_epoll_create();
+
+ if (timer_fd_ != -1)
+ ::close(timer_fd_);
+ timer_fd_ = -1;
+ timer_fd_ = do_timerfd_create();
+
+ interrupter_.recreate();
+
+ // Add the interrupter's descriptor to epoll.
+ epoll_event ev = { 0, { 0 } };
+ ev.events = EPOLLIN | EPOLLERR | EPOLLET;
+ ev.data.ptr = &interrupter_;
+ epoll_ctl(epoll_fd_, EPOLL_CTL_ADD, interrupter_.read_descriptor(), &ev);
+ interrupter_.interrupt();
+
+ // Add the timer descriptor to epoll.
+ if (timer_fd_ != -1)
+ {
+ ev.events = EPOLLIN | EPOLLERR;
+ ev.data.ptr = &timer_fd_;
+ epoll_ctl(epoll_fd_, EPOLL_CTL_ADD, timer_fd_, &ev);
+ }
+
+ update_timeout();
+
+ // Re-register all descriptors with epoll.
+ mutex::scoped_lock descriptors_lock(registered_descriptors_mutex_);
+ for (descriptor_state* state = registered_descriptors_.first();
+ state != 0; state = state->next_)
+ {
+ ev.events = EPOLLIN | EPOLLERR | EPOLLHUP | EPOLLOUT | EPOLLPRI | EPOLLET;
+ ev.data.ptr = state;
+ int result = epoll_ctl(epoll_fd_, EPOLL_CTL_ADD, state->descriptor_, &ev);
+ if (result != 0)
+ {
+ boost::system::error_code ec(errno,
+ boost::asio::error::get_system_category());
+ boost::asio::detail::throw_error(ec, "epoll re-registration");
+ }
+ }
+ }
+}
+
+void epoll_reactor::init_task()
+{
+ io_service_.init_task();
+}
+
+int epoll_reactor::register_descriptor(socket_type descriptor,
+ epoll_reactor::per_descriptor_data& descriptor_data)
+{
+ descriptor_data = allocate_descriptor_state();
+
+ {
+ mutex::scoped_lock descriptor_lock(descriptor_data->mutex_);
+
+ descriptor_data->reactor_ = this;
+ descriptor_data->descriptor_ = descriptor;
+ descriptor_data->shutdown_ = false;
+ }
+
+ epoll_event ev = { 0, { 0 } };
+ ev.events = EPOLLIN | EPOLLERR | EPOLLHUP | EPOLLOUT | EPOLLPRI | EPOLLET;
+ ev.data.ptr = descriptor_data;
+ int result = epoll_ctl(epoll_fd_, EPOLL_CTL_ADD, descriptor, &ev);
+ if (result != 0)
+ return errno;
+
+ return 0;
+}
+
+int epoll_reactor::register_internal_descriptor(
+ int op_type, socket_type descriptor,
+ epoll_reactor::per_descriptor_data& descriptor_data, reactor_op* op)
+{
+ descriptor_data = allocate_descriptor_state();
+
+ {
+ mutex::scoped_lock descriptor_lock(descriptor_data->mutex_);
+
+ descriptor_data->reactor_ = this;
+ descriptor_data->descriptor_ = descriptor;
+ descriptor_data->shutdown_ = false;
+ descriptor_data->op_queue_[op_type].push(op);
+ }
+
+ epoll_event ev = { 0, { 0 } };
+ ev.events = EPOLLIN | EPOLLERR | EPOLLHUP | EPOLLOUT | EPOLLPRI | EPOLLET;
+ ev.data.ptr = descriptor_data;
+ int result = epoll_ctl(epoll_fd_, EPOLL_CTL_ADD, descriptor, &ev);
+ if (result != 0)
+ return errno;
+
+ return 0;
+}
+
+void epoll_reactor::move_descriptor(socket_type,
+ epoll_reactor::per_descriptor_data& target_descriptor_data,
+ epoll_reactor::per_descriptor_data& source_descriptor_data)
+{
+ target_descriptor_data = source_descriptor_data;
+ source_descriptor_data = 0;
+}
+
+void epoll_reactor::start_op(int op_type, socket_type descriptor,
+ epoll_reactor::per_descriptor_data& descriptor_data,
+ reactor_op* op, bool allow_speculative)
+{
+ if (!descriptor_data)
+ {
+ op->ec_ = boost::asio::error::bad_descriptor;
+ post_immediate_completion(op);
+ return;
+ }
+
+ mutex::scoped_lock descriptor_lock(descriptor_data->mutex_);
+
+ if (descriptor_data->shutdown_)
+ {
+ post_immediate_completion(op);
+ return;
+ }
+
+ if (descriptor_data->op_queue_[op_type].empty())
+ {
+ if (allow_speculative)
+ {
+ if (op_type != read_op || descriptor_data->op_queue_[except_op].empty())
+ {
+ if (op->perform())
+ {
+ descriptor_lock.unlock();
+ io_service_.post_immediate_completion(op);
+ return;
+ }
+ }
+ }
+ else
+ {
+ epoll_event ev = { 0, { 0 } };
+ ev.events = EPOLLIN | EPOLLERR | EPOLLHUP
+ | EPOLLOUT | EPOLLPRI | EPOLLET;
+ ev.data.ptr = descriptor_data;
+ epoll_ctl(epoll_fd_, EPOLL_CTL_MOD, descriptor, &ev);
+ }
+ }
+
+ descriptor_data->op_queue_[op_type].push(op);
+ io_service_.work_started();
+}
+
+void epoll_reactor::cancel_ops(socket_type,
+ epoll_reactor::per_descriptor_data& descriptor_data)
+{
+ if (!descriptor_data)
+ return;
+
+ mutex::scoped_lock descriptor_lock(descriptor_data->mutex_);
+
+ op_queue<operation> ops;
+ for (int i = 0; i < max_ops; ++i)
+ {
+ while (reactor_op* op = descriptor_data->op_queue_[i].front())
+ {
+ op->ec_ = boost::asio::error::operation_aborted;
+ descriptor_data->op_queue_[i].pop();
+ ops.push(op);
+ }
+ }
+
+ descriptor_lock.unlock();
+
+ io_service_.post_deferred_completions(ops);
+}
+
+void epoll_reactor::deregister_descriptor(socket_type descriptor,
+ epoll_reactor::per_descriptor_data& descriptor_data, bool closing)
+{
+ if (!descriptor_data)
+ return;
+
+ mutex::scoped_lock descriptor_lock(descriptor_data->mutex_);
+
+ if (!descriptor_data->shutdown_)
+ {
+ if (closing)
+ {
+ // The descriptor will be automatically removed from the epoll set when
+ // it is closed.
+ }
+ else
+ {
+ epoll_event ev = { 0, { 0 } };
+ epoll_ctl(epoll_fd_, EPOLL_CTL_DEL, descriptor, &ev);
+ }
+
+ op_queue<operation> ops;
+ for (int i = 0; i < max_ops; ++i)
+ {
+ while (reactor_op* op = descriptor_data->op_queue_[i].front())
+ {
+ op->ec_ = boost::asio::error::operation_aborted;
+ descriptor_data->op_queue_[i].pop();
+ ops.push(op);
+ }
+ }
+
+ descriptor_data->descriptor_ = -1;
+ descriptor_data->shutdown_ = true;
+
+ descriptor_lock.unlock();
+
+ free_descriptor_state(descriptor_data);
+ descriptor_data = 0;
+
+ io_service_.post_deferred_completions(ops);
+ }
+}
+
+void epoll_reactor::deregister_internal_descriptor(socket_type descriptor,
+ epoll_reactor::per_descriptor_data& descriptor_data)
+{
+ if (!descriptor_data)
+ return;
+
+ mutex::scoped_lock descriptor_lock(descriptor_data->mutex_);
+
+ if (!descriptor_data->shutdown_)
+ {
+ epoll_event ev = { 0, { 0 } };
+ epoll_ctl(epoll_fd_, EPOLL_CTL_DEL, descriptor, &ev);
+
+ op_queue<operation> ops;
+ for (int i = 0; i < max_ops; ++i)
+ ops.push(descriptor_data->op_queue_[i]);
+
+ descriptor_data->descriptor_ = -1;
+ descriptor_data->shutdown_ = true;
+
+ descriptor_lock.unlock();
+
+ free_descriptor_state(descriptor_data);
+ descriptor_data = 0;
+ }
+}
+
+void epoll_reactor::run(bool block, op_queue<operation>& ops)
+{
+ // This code relies on the fact that the task_io_service queues the reactor
+ // task behind all descriptor operations generated by this function. This
+ // means, that by the time we reach this point, any previously returned
+ // descriptor operations have already been dequeued. Therefore it is now safe
+ // for us to reuse and return them for the task_io_service to queue again.
+
+ // Calculate a timeout only if timerfd is not used.
+ int timeout;
+ if (timer_fd_ != -1)
+ timeout = block ? -1 : 0;
+ else
+ {
+ mutex::scoped_lock lock(mutex_);
+ timeout = block ? get_timeout() : 0;
+ }
+
+ // Block on the epoll descriptor.
+ epoll_event events[128];
+ int num_events = epoll_wait(epoll_fd_, events, 128, timeout);
+
+#if defined(BOOST_ASIO_HAS_TIMERFD)
+ bool check_timers = (timer_fd_ == -1);
+#else // defined(BOOST_ASIO_HAS_TIMERFD)
+ bool check_timers = true;
+#endif // defined(BOOST_ASIO_HAS_TIMERFD)
+
+ // Dispatch the waiting events.
+ for (int i = 0; i < num_events; ++i)
+ {
+ void* ptr = events[i].data.ptr;
+ if (ptr == &interrupter_)
+ {
+ // No need to reset the interrupter since we're leaving the descriptor
+ // in a ready-to-read state and relying on edge-triggered notifications
+ // to make it so that we only get woken up when the descriptor's epoll
+ // registration is updated.
+
+#if defined(BOOST_ASIO_HAS_TIMERFD)
+ if (timer_fd_ == -1)
+ check_timers = true;
+#else // defined(BOOST_ASIO_HAS_TIMERFD)
+ check_timers = true;
+#endif // defined(BOOST_ASIO_HAS_TIMERFD)
+ }
+#if defined(BOOST_ASIO_HAS_TIMERFD)
+ else if (ptr == &timer_fd_)
+ {
+ check_timers = true;
+ }
+#endif // defined(BOOST_ASIO_HAS_TIMERFD)
+ else
+ {
+ // The descriptor operation doesn't count as work in and of itself, so we
+ // don't call work_started() here. This still allows the io_service to
+ // stop if the only remaining operations are descriptor operations.
+ descriptor_state* descriptor_data = static_cast<descriptor_state*>(ptr);
+ descriptor_data->set_ready_events(events[i].events);
+ ops.push(descriptor_data);
+ }
+ }
+
+ if (check_timers)
+ {
+ mutex::scoped_lock common_lock(mutex_);
+ timer_queues_.get_ready_timers(ops);
+
+#if defined(BOOST_ASIO_HAS_TIMERFD)
+ if (timer_fd_ != -1)
+ {
+ itimerspec new_timeout;
+ itimerspec old_timeout;
+ int flags = get_timeout(new_timeout);
+ timerfd_settime(timer_fd_, flags, &new_timeout, &old_timeout);
+ }
+#endif // defined(BOOST_ASIO_HAS_TIMERFD)
+ }
+}
+
+void epoll_reactor::interrupt()
+{
+ epoll_event ev = { 0, { 0 } };
+ ev.events = EPOLLIN | EPOLLERR | EPOLLET;
+ ev.data.ptr = &interrupter_;
+ epoll_ctl(epoll_fd_, EPOLL_CTL_MOD, interrupter_.read_descriptor(), &ev);
+}
+
+int epoll_reactor::do_epoll_create()
+{
+#if defined(EPOLL_CLOEXEC)
+ int fd = epoll_create1(EPOLL_CLOEXEC);
+#else // defined(EPOLL_CLOEXEC)
+ int fd = -1;
+ errno = EINVAL;
+#endif // defined(EPOLL_CLOEXEC)
+
+ if (fd == -1 && errno == EINVAL)
+ {
+ fd = epoll_create(epoll_size);
+ if (fd != -1)
+ ::fcntl(fd, F_SETFD, FD_CLOEXEC);
+ }
+
+ if (fd == -1)
+ {
+ boost::system::error_code ec(errno,
+ boost::asio::error::get_system_category());
+ boost::asio::detail::throw_error(ec, "epoll");
+ }
+
+ return fd;
+}
+
+int epoll_reactor::do_timerfd_create()
+{
+#if defined(BOOST_ASIO_HAS_TIMERFD)
+# if defined(TFD_CLOEXEC)
+ int fd = timerfd_create(CLOCK_MONOTONIC, TFD_CLOEXEC);
+# else // defined(TFD_CLOEXEC)
+ int fd = -1;
+ errno = EINVAL;
+# endif // defined(TFD_CLOEXEC)
+
+ if (fd == -1 && errno == EINVAL)
+ {
+ fd = timerfd_create(CLOCK_MONOTONIC, 0);
+ if (fd != -1)
+ ::fcntl(fd, F_SETFD, FD_CLOEXEC);
+ }
+
+ return fd;
+#else // defined(BOOST_ASIO_HAS_TIMERFD)
+ return -1;
+#endif // defined(BOOST_ASIO_HAS_TIMERFD)
+}
+
+epoll_reactor::descriptor_state* epoll_reactor::allocate_descriptor_state()
+{
+ mutex::scoped_lock descriptors_lock(registered_descriptors_mutex_);
+ return registered_descriptors_.alloc();
+}
+
+void epoll_reactor::free_descriptor_state(epoll_reactor::descriptor_state* s)
+{
+ mutex::scoped_lock descriptors_lock(registered_descriptors_mutex_);
+ registered_descriptors_.free(s);
+}
+
+void epoll_reactor::do_add_timer_queue(timer_queue_base& queue)
+{
+ mutex::scoped_lock lock(mutex_);
+ timer_queues_.insert(&queue);
+}
+
+void epoll_reactor::do_remove_timer_queue(timer_queue_base& queue)
+{
+ mutex::scoped_lock lock(mutex_);
+ timer_queues_.erase(&queue);
+}
+
+void epoll_reactor::update_timeout()
+{
+#if defined(BOOST_ASIO_HAS_TIMERFD)
+ if (timer_fd_ != -1)
+ {
+ itimerspec new_timeout;
+ itimerspec old_timeout;
+ int flags = get_timeout(new_timeout);
+ timerfd_settime(timer_fd_, flags, &new_timeout, &old_timeout);
+ return;
+ }
+#endif // defined(BOOST_ASIO_HAS_TIMERFD)
+ interrupt();
+}
+
+int epoll_reactor::get_timeout()
+{
+ // By default we will wait no longer than 5 minutes. This will ensure that
+ // any changes to the system clock are detected after no longer than this.
+ return timer_queues_.wait_duration_msec(5 * 60 * 1000);
+}
+
+#if defined(BOOST_ASIO_HAS_TIMERFD)
+int epoll_reactor::get_timeout(itimerspec& ts)
+{
+ ts.it_interval.tv_sec = 0;
+ ts.it_interval.tv_nsec = 0;
+
+ long usec = timer_queues_.wait_duration_usec(5 * 60 * 1000 * 1000);
+ ts.it_value.tv_sec = usec / 1000000;
+ ts.it_value.tv_nsec = usec ? (usec % 1000000) * 1000 : 1;
+
+ return usec ? 0 : TFD_TIMER_ABSTIME;
+}
+#endif // defined(BOOST_ASIO_HAS_TIMERFD)
+
+struct epoll_reactor::perform_io_cleanup_on_block_exit
+{
+ explicit perform_io_cleanup_on_block_exit(epoll_reactor* r)
+ : reactor_(r), first_op_(0)
+ {
+ }
+
+ ~perform_io_cleanup_on_block_exit()
+ {
+ if (first_op_)
+ {
+ // Post the remaining completed operations for invocation.
+ if (!ops_.empty())
+ reactor_->io_service_.post_deferred_completions(ops_);
+
+ // A user-initiated operation has completed, but there's no need to
+ // explicitly call work_finished() here. Instead, we'll take advantage of
+ // the fact that the task_io_service will call work_finished() once we
+ // return.
+ }
+ else
+ {
+ // No user-initiated operations have completed, so we need to compensate
+ // for the work_finished() call that the task_io_service will make once
+ // this operation returns.
+ reactor_->io_service_.work_started();
+ }
+ }
+
+ epoll_reactor* reactor_;
+ op_queue<operation> ops_;
+ operation* first_op_;
+};
+
+epoll_reactor::descriptor_state::descriptor_state()
+ : operation(&epoll_reactor::descriptor_state::do_complete)
+{
+}
+
+operation* epoll_reactor::descriptor_state::perform_io(uint32_t events)
+{
+ perform_io_cleanup_on_block_exit io_cleanup(reactor_);
+ mutex::scoped_lock descriptor_lock(mutex_);
+
+ // Exception operations must be processed first to ensure that any
+ // out-of-band data is read before normal data.
+ static const int flag[max_ops] = { EPOLLIN, EPOLLOUT, EPOLLPRI };
+ for (int j = max_ops - 1; j >= 0; --j)
+ {
+ if (events & (flag[j] | EPOLLERR | EPOLLHUP))
+ {
+ while (reactor_op* op = op_queue_[j].front())
+ {
+ if (op->perform())
+ {
+ op_queue_[j].pop();
+ io_cleanup.ops_.push(op);
+ }
+ else
+ break;
+ }
+ }
+ }
+
+ // The first operation will be returned for completion now. The others will
+ // be posted for later by the io_cleanup object's destructor.
+ io_cleanup.first_op_ = io_cleanup.ops_.front();
+ io_cleanup.ops_.pop();
+ return io_cleanup.first_op_;
+}
+
+void epoll_reactor::descriptor_state::do_complete(
+ io_service_impl* owner, operation* base,
+ const boost::system::error_code& ec, std::size_t bytes_transferred)
+{
+ if (owner)
+ {
+ descriptor_state* descriptor_data = static_cast<descriptor_state*>(base);
+ uint32_t events = static_cast<uint32_t>(bytes_transferred);
+ if (operation* op = descriptor_data->perform_io(events))
+ {
+ op->complete(*owner, ec, 0);
+ }
+ }
+}
+
+} // namespace detail
+} // namespace asio
+} // namespace boost
+
+#include <boost/asio/detail/pop_options.hpp>
+
+#endif // defined(BOOST_ASIO_HAS_EPOLL)
+
+#endif // BOOST_ASIO_DETAIL_IMPL_EPOLL_REACTOR_IPP