summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDaniel Stenberg <daniel@haxx.se>2008-11-01 18:35:19 +0000
committerDaniel Stenberg <daniel@haxx.se>2008-11-01 18:35:19 +0000
commit1a1b4eb4f2fb6112534994c20ac20679c5c7339c (patch)
tree76fda13f7f58853b63f184e061cb75eec6922d44
parentaa8d8b4933ca1f5de8da3f92f614e5f3236d6f89 (diff)
downloadc-ares-1a1b4eb4f2fb6112534994c20ac20679c5c7339c.tar.gz
c-ares-1a1b4eb4f2fb6112534994c20ac20679c5c7339c.tar.bz2
c-ares-1a1b4eb4f2fb6112534994c20ac20679c5c7339c.zip
- Carlo Contavalli added support for the glibc "rotate" option, as documented
in man resolv.conf: causes round robin selection of nameservers from among those listed. This has the effect of spreading the query load among all listed servers, rather than having all clients try the first listed server first every time. You can enable it with ARES_OPT_ROTATE
-rw-r--r--CHANGES10
-rw-r--r--ares.h2
-rw-r--r--ares_init.c11
-rw-r--r--ares_private.h8
-rw-r--r--ares_process.c50
-rw-r--r--ares_send.c8
6 files changed, 62 insertions, 27 deletions
diff --git a/CHANGES b/CHANGES
index dd9bac6..c590079 100644
--- a/CHANGES
+++ b/CHANGES
@@ -1,5 +1,15 @@
Changelog for the c-ares project
+* Nov 1 2008 (Daniel Stenberg)
+- Carlo Contavalli added support for the glibc "rotate" option, as documented
+ in man resolv.conf:
+
+ causes round robin selection of nameservers from among those listed. This
+ has the effect of spreading the query load among all listed servers, rather
+ than having all clients try the first listed server first every time.
+
+ You can enable it with ARES_OPT_ROTATE
+
* Oct 21 2008 (Yang Tse)
Charles Hardin added handling of EINPROGRESS for UDP connects.
diff --git a/ares.h b/ares.h
index 3609f86..1507f41 100644
--- a/ares.h
+++ b/ares.h
@@ -114,6 +114,7 @@ extern "C" {
#define ARES_OPT_SOCK_SNDBUF (1 << 11)
#define ARES_OPT_SOCK_RCVBUF (1 << 12)
#define ARES_OPT_TIMEOUTMS (1 << 13)
+#define ARES_OPT_ROTATE (1 << 14)
/* Nameinfo flag values */
#define ARES_NI_NOFQDN (1 << 0)
@@ -184,6 +185,7 @@ struct ares_options {
int timeout; /* in seconds or milliseconds, depending on options */
int tries;
int ndots;
+ int rotate;
unsigned short udp_port;
unsigned short tcp_port;
int socket_send_buffer_size;
diff --git a/ares_init.c b/ares_init.c
index 4d4ba1e..4a147fc 100644
--- a/ares_init.c
+++ b/ares_init.c
@@ -144,6 +144,7 @@ int ares_init_options(ares_channel *channelptr, struct ares_options *options,
channel->timeout = -1;
channel->tries = -1;
channel->ndots = -1;
+ channel->rotate = -1;
channel->udp_port = -1;
channel->tcp_port = -1;
channel->socket_send_buffer_size = -1;
@@ -159,6 +160,7 @@ int ares_init_options(ares_channel *channelptr, struct ares_options *options,
channel->sock_state_cb = NULL;
channel->sock_state_cb_data = NULL;
+ channel->last_server = 0;
channel->last_timeout_processed = (time_t)now.tv_sec;
/* Initialize our lists of queries */
@@ -352,6 +354,8 @@ static int init_by_options(ares_channel channel,
channel->tries = options->tries;
if ((optmask & ARES_OPT_NDOTS) && channel->ndots == -1)
channel->ndots = options->ndots;
+ if ((optmask & ARES_OPT_ROTATE) && channel->rotate == -1)
+ channel->rotate = options->rotate;
if ((optmask & ARES_OPT_UDP_PORT) && channel->udp_port == -1)
channel->udp_port = options->udp_port;
if ((optmask & ARES_OPT_TCP_PORT) && channel->tcp_port == -1)
@@ -932,6 +936,8 @@ static int init_by_defaults(ares_channel channel)
channel->tries = DEFAULT_TRIES;
if (channel->ndots == -1)
channel->ndots = 1;
+ if (channel->rotate == -1)
+ channel->rotate = 0;
if (channel->udp_port == -1)
channel->udp_port = htons(NAMESERVER_PORT);
if (channel->tcp_port == -1)
@@ -1302,6 +1308,9 @@ static int set_options(ares_channel channel, const char *str)
val = try_option(p, q, "retry:");
if (val && channel->tries == -1)
channel->tries = atoi(val);
+ val = try_option(p, q, "rotate");
+ if (val && channel->rotate == -1)
+ channel->rotate = 1;
p = q;
while (ISSPACE(*p))
p++;
@@ -1374,7 +1383,7 @@ static char *try_config(char *s, const char *opt)
static const char *try_option(const char *p, const char *q, const char *opt)
{
size_t len = strlen(opt);
- return ((size_t)(q - p) > len && !strncmp(p, opt, len)) ? &p[len] : NULL;
+ return ((size_t)(q - p) >= len && !strncmp(p, opt, len)) ? &p[len] : NULL;
}
#ifndef WIN32
diff --git a/ares_private.h b/ares_private.h
index 0b4edf9..976fa9f 100644
--- a/ares_private.h
+++ b/ares_private.h
@@ -195,8 +195,8 @@ struct query {
void *arg;
/* Query status */
- int try;
- int server;
+ int try; /* Number of times we tried this query already. */
+ int server; /* Server this query has last been sent to. */
struct query_server_info *server_info; /* per-server state */
int using_tcp;
int error_status;
@@ -242,6 +242,7 @@ struct ares_channeldata {
int timeout; /* in milliseconds */
int tries;
int ndots;
+ int rotate; /* if true, all servers specified are used */
int udp_port;
int tcp_port;
int socket_send_buffer_size;
@@ -268,6 +269,9 @@ struct ares_channeldata {
just to draw the line somewhere. */
time_t last_timeout_processed;
+ /* Last server we sent a query to. */
+ int last_server;
+
/* Circular, doubly-linked list of queries, bucketed various ways.... */
/* All active queries in a single list: */
struct list_node all_queries;
diff --git a/ares_process.c b/ares_process.c
index 675af48..b7f375e 100644
--- a/ares_process.c
+++ b/ares_process.c
@@ -670,30 +670,33 @@ static void skip_server(ares_channel channel, struct query *query,
static void next_server(ares_channel channel, struct query *query,
struct timeval *now)
{
- /* Advance to the next server or try. */
- query->server++;
- for (; query->try < channel->tries; query->try++)
+ /* We need to try each server channel->tries times. We have channel->nservers
+ * servers to try. In total, we need to do channel->nservers * channel->tries
+ * attempts. Use query->try to remember how many times we already attempted
+ * this query. Use modular arithmetic to find the next server to try. */
+ while (++(query->try) < (channel->nservers * channel->tries))
{
- for (; query->server < channel->nservers; query->server++)
+ struct server_state *server;
+
+ /* Move on to the next server. */
+ query->server = (query->server + 1) % channel->nservers;
+ server = &channel->servers[query->server];
+
+ /* We don't want to use this server if (1) we decided this
+ * connection is broken, and thus about to be closed, (2)
+ * we've decided to skip this server because of earlier
+ * errors we encountered, or (3) we already sent this query
+ * over this exact connection.
+ */
+ if (!server->is_broken &&
+ !query->server_info[query->server].skip_server &&
+ !(query->using_tcp &&
+ (query->server_info[query->server].tcp_connection_generation ==
+ server->tcp_connection_generation)))
{
- struct server_state *server = &channel->servers[query->server];
- /* We don't want to use this server if (1) we decided this
- * connection is broken, and thus about to be closed, (2)
- * we've decided to skip this server because of earlier
- * errors we encountered, or (3) we already sent this query
- * over this exact connection.
- */
- if (!server->is_broken &&
- !query->server_info[query->server].skip_server &&
- !(query->using_tcp &&
- (query->server_info[query->server].tcp_connection_generation ==
- server->tcp_connection_generation)))
- {
- ares__send_query(channel, query, now);
- return;
- }
+ ares__send_query(channel, query, now);
+ return;
}
- query->server = 0;
/* You might think that with TCP we only need one try. However,
* even when using TCP, servers can time-out our connection just
@@ -702,6 +705,8 @@ static void next_server(ares_channel channel, struct query *query,
* tickle a bug that drops our request.
*/
}
+
+ /* If we are here, all attempts to perform query failed. */
end_query(channel, query, query->error_status, NULL, 0);
}
@@ -775,8 +780,7 @@ void ares__send_query(ares_channel channel, struct query *query,
}
query->timeout = *now;
ares__timeadd(&query->timeout,
- (query->try == 0) ? channel->timeout
- : channel->timeout << query->try / channel->nservers);
+ channel->timeout << (query->try / channel->nservers));
/* Keep track of queries bucketed by timeout, so we can process
* timeout events quickly.
*/
diff --git a/ares_send.c b/ares_send.c
index a5811d3..be5478d 100644
--- a/ares_send.c
+++ b/ares_send.c
@@ -95,7 +95,13 @@ void ares_send(ares_channel channel, const unsigned char *qbuf, int qlen,
/* Initialize query status. */
query->try = 0;
- query->server = 0;
+
+ /* Choose the server to send the query to. If rotation is enabled, keep track
+ * of the next server we want to use. */
+ query->server = channel->last_server;
+ if (channel->rotate == 1)
+ channel->last_server = (channel->last_server + 1) % channel->nservers;
+
for (i = 0; i < channel->nservers; i++)
{
query->server_info[i].skip_server = 0;