summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBen Hutchings <bhutchings@solarflare.com>2012-07-30 16:11:42 +0000
committerMarkus Lehtonen <markus.lehtonen@linux.intel.com>2013-04-05 09:13:00 +0300
commitd375395fe63775b3750d435ca6be46a2f4ad4cb3 (patch)
treeab721fddf3a1abf535d0f60bb921bffd7cbd1598
parentbbc5b279baf44c891e1ddbfad41aa94d2a715e53 (diff)
downloadkernel-mfld-blackbay-d375395fe63775b3750d435ca6be46a2f4ad4cb3.tar.gz
kernel-mfld-blackbay-d375395fe63775b3750d435ca6be46a2f4ad4cb3.tar.bz2
kernel-mfld-blackbay-d375395fe63775b3750d435ca6be46a2f4ad4cb3.zip
tcp: Apply device TSO segment limit earlier
[ Upstream commit 1485348d2424e1131ea42efc033cbd9366462b01 ] Cache the device gso_max_segs in sock::sk_gso_max_segs and use it to limit the size of TSO skbs. This avoids the need to fall back to software GSO for local TCP senders. Signed-off-by: Ben Hutchings <bhutchings@solarflare.com> Signed-off-by: David S. Miller <davem@davemloft.net> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
-rw-r--r--include/net/sock.h2
-rw-r--r--net/core/sock.c1
-rw-r--r--net/ipv4/tcp.c4
-rw-r--r--net/ipv4/tcp_cong.c3
-rw-r--r--net/ipv4/tcp_output.c21
5 files changed, 20 insertions, 11 deletions
diff --git a/include/net/sock.h b/include/net/sock.h
index c0b938cb4b1..b2deeab84c9 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -194,6 +194,7 @@ struct sock_common {
* @sk_route_nocaps: forbidden route capabilities (e.g NETIF_F_GSO_MASK)
* @sk_gso_type: GSO type (e.g. %SKB_GSO_TCPV4)
* @sk_gso_max_size: Maximum GSO segment size to build
+ * @sk_gso_max_segs: Maximum number of GSO segments
* @sk_lingertime: %SO_LINGER l_linger setting
* @sk_backlog: always used with the per-socket spinlock held
* @sk_callback_lock: used with the callbacks in the end of this struct
@@ -310,6 +311,7 @@ struct sock {
int sk_route_nocaps;
int sk_gso_type;
unsigned int sk_gso_max_size;
+ u16 sk_gso_max_segs;
int sk_rcvlowat;
unsigned long sk_lingertime;
struct sk_buff_head sk_error_queue;
diff --git a/net/core/sock.c b/net/core/sock.c
index 0531f5c4694..0783e9ae6d4 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1311,6 +1311,7 @@ void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
} else {
sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM;
sk->sk_gso_max_size = dst->dev->gso_max_size;
+ sk->sk_gso_max_segs = dst->dev->gso_max_segs;
}
}
}
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 09ced58e6a5..4be25418e01 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -743,7 +743,9 @@ static unsigned int tcp_xmit_size_goal(struct sock *sk, u32 mss_now,
old_size_goal + mss_now > xmit_size_goal)) {
xmit_size_goal = old_size_goal;
} else {
- tp->xmit_size_goal_segs = xmit_size_goal / mss_now;
+ tp->xmit_size_goal_segs =
+ min_t(u16, xmit_size_goal / mss_now,
+ sk->sk_gso_max_segs);
xmit_size_goal = tp->xmit_size_goal_segs * mss_now;
}
}
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index 850c737e08e..6cebfd2df61 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -290,7 +290,8 @@ int tcp_is_cwnd_limited(const struct sock *sk, u32 in_flight)
left = tp->snd_cwnd - in_flight;
if (sk_can_gso(sk) &&
left * sysctl_tcp_tso_win_divisor < tp->snd_cwnd &&
- left * tp->mss_cache < sk->sk_gso_max_size)
+ left * tp->mss_cache < sk->sk_gso_max_size &&
+ left < sk->sk_gso_max_segs)
return 1;
return left <= tcp_max_burst(tp);
}
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 882e0b0964d..b306c626991 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1312,21 +1312,21 @@ static void tcp_cwnd_validate(struct sock *sk)
* when we would be allowed to send the split-due-to-Nagle skb fully.
*/
static unsigned int tcp_mss_split_point(struct sock *sk, struct sk_buff *skb,
- unsigned int mss_now, unsigned int cwnd)
+ unsigned int mss_now, unsigned int max_segs)
{
struct tcp_sock *tp = tcp_sk(sk);
- u32 needed, window, cwnd_len;
+ u32 needed, window, max_len;
window = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq;
- cwnd_len = mss_now * cwnd;
+ max_len = mss_now * max_segs;
- if (likely(cwnd_len <= window && skb != tcp_write_queue_tail(sk)))
- return cwnd_len;
+ if (likely(max_len <= window && skb != tcp_write_queue_tail(sk)))
+ return max_len;
needed = min(skb->len, window);
- if (cwnd_len <= needed)
- return cwnd_len;
+ if (max_len <= needed)
+ return max_len;
return needed - needed % mss_now;
}
@@ -1553,7 +1553,8 @@ static int tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb)
limit = min(send_win, cong_win);
/* If a full-sized TSO skb can be sent, do it. */
- if (limit >= sk->sk_gso_max_size)
+ if (limit >= min_t(unsigned int, sk->sk_gso_max_size,
+ sk->sk_gso_max_segs * tp->mss_cache))
goto send_now;
/* Middle in queue won't get any more data, full sendable already? */
@@ -1779,7 +1780,9 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
limit = mss_now;
if (tso_segs > 1 && !tcp_urg_mode(tp))
limit = tcp_mss_split_point(sk, skb, mss_now,
- cwnd_quota);
+ min_t(unsigned int,
+ cwnd_quota,
+ sk->sk_gso_max_segs));
if (skb->len > limit &&
unlikely(tso_fragment(sk, skb, limit, mss_now, gfp)))