summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
authorKuniyuki Iwashima <kuniyu@amazon.co.jp>2020-07-21 15:15:31 +0900
committerDavid S. Miller <davem@davemloft.net>2020-07-21 15:31:02 -0700
commitefc6b6f6c3113e8b203b9debfb72d81e0f3dcace (patch)
treec791fb65506d506e2dabcc05b6491f25652472e8 /net
parentf2b2c55e512879a05456eaf5de4d1ed2f7757509 (diff)
downloadlinux-rpi-efc6b6f6c3113e8b203b9debfb72d81e0f3dcace.tar.gz
linux-rpi-efc6b6f6c3113e8b203b9debfb72d81e0f3dcace.tar.bz2
linux-rpi-efc6b6f6c3113e8b203b9debfb72d81e0f3dcace.zip
udp: Improve load balancing for SO_REUSEPORT.
Currently, SO_REUSEPORT does not work well if connected sockets are in a UDP reuseport group. Then reuseport_has_conns() returns true and the result of reuseport_select_sock() is discarded. Also, unconnected sockets have the same score, hence only does the first unconnected socket in udp_hslot always receive all packets sent to unconnected sockets. So, the result of reuseport_select_sock() should be used for load balancing. The noteworthy point is that the unconnected sockets placed after connected sockets in sock_reuseport.socks will receive more packets than others because of the algorithm in reuseport_select_sock(). index | connected | reciprocal_scale | result --------------------------------------------- 0 | no | 20% | 40% 1 | no | 20% | 20% 2 | yes | 20% | 0% 3 | no | 20% | 40% 4 | yes | 20% | 0% If most of the sockets are connected, this can be a problem, but it still works better than now. Fixes: acdcecc61285 ("udp: correct reuseport selection with connected sockets") CC: Willem de Bruijn <willemb@google.com> Reviewed-by: Benjamin Herrenschmidt <benh@amazon.com> Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.co.jp> Acked-by: Willem de Bruijn <willemb@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r--net/ipv4/udp.c15
-rw-r--r--net/ipv6/udp.c15
2 files changed, 18 insertions, 12 deletions
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 1b7ebbcae497..99251d3c70d0 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -416,7 +416,7 @@ static struct sock *udp4_lib_lookup2(struct net *net,
struct udp_hslot *hslot2,
struct sk_buff *skb)
{
- struct sock *sk, *result;
+ struct sock *sk, *result, *reuseport_result;
int score, badness;
u32 hash = 0;
@@ -426,17 +426,20 @@ static struct sock *udp4_lib_lookup2(struct net *net,
score = compute_score(sk, net, saddr, sport,
daddr, hnum, dif, sdif);
if (score > badness) {
+ reuseport_result = NULL;
+
if (sk->sk_reuseport &&
sk->sk_state != TCP_ESTABLISHED) {
hash = udp_ehashfn(net, daddr, hnum,
saddr, sport);
- result = reuseport_select_sock(sk, hash, skb,
- sizeof(struct udphdr));
- if (result && !reuseport_has_conns(sk, false))
- return result;
+ reuseport_result = reuseport_select_sock(sk, hash, skb,
+ sizeof(struct udphdr));
+ if (reuseport_result && !reuseport_has_conns(sk, false))
+ return reuseport_result;
}
+
+ result = reuseport_result ? : sk;
badness = score;
- result = sk;
}
}
return result;
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 7d4151747340..9503c87ac0b3 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -148,7 +148,7 @@ static struct sock *udp6_lib_lookup2(struct net *net,
int dif, int sdif, struct udp_hslot *hslot2,
struct sk_buff *skb)
{
- struct sock *sk, *result;
+ struct sock *sk, *result, *reuseport_result;
int score, badness;
u32 hash = 0;
@@ -158,17 +158,20 @@ static struct sock *udp6_lib_lookup2(struct net *net,
score = compute_score(sk, net, saddr, sport,
daddr, hnum, dif, sdif);
if (score > badness) {
+ reuseport_result = NULL;
+
if (sk->sk_reuseport &&
sk->sk_state != TCP_ESTABLISHED) {
hash = udp6_ehashfn(net, daddr, hnum,
saddr, sport);
- result = reuseport_select_sock(sk, hash, skb,
- sizeof(struct udphdr));
- if (result && !reuseport_has_conns(sk, false))
- return result;
+ reuseport_result = reuseport_select_sock(sk, hash, skb,
+ sizeof(struct udphdr));
+ if (reuseport_result && !reuseport_has_conns(sk, false))
+ return reuseport_result;
}
- result = sk;
+
+ result = reuseport_result ? : sk;
badness = score;
}
}