From 3820c3f3e41786322c0bb225b9c77b8deff869d1 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 29 Jun 2006 20:11:25 -0700 Subject: [TCP]: Reset gso_segs if packet is dodgy I wasn't paranoid enough in verifying GSO information. A bogus gso_segs could upset drivers as much as a bogus header would. Let's reset it in the per-protocol gso_segment functions. I didn't verify gso_size because that can be verified by the source of the dodgy packets. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 0336422c88a0..0bb0ac96d675 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2166,13 +2166,19 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int features) if (!pskb_may_pull(skb, thlen)) goto out; - segs = NULL; - if (skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST)) - goto out; - oldlen = (u16)~skb->len; __skb_pull(skb, thlen); + if (skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST)) { + /* Packet is from an untrusted source, reset gso_segs. */ + int mss = skb_shinfo(skb)->gso_size; + + skb_shinfo(skb)->gso_segs = (skb->len + mss - 1) / mss; + + segs = NULL; + goto out; + } + segs = skb_segment(skb, features); if (IS_ERR(segs)) goto out; -- cgit v1.2.3 From 29315e8770c20cbfe607ad962d87867115a44555 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Thu, 29 Jun 2006 20:12:30 -0700 Subject: [TG3]: Add tg3_netif_stop() in vlan functions Add tg3_netif_stop() when changing the vlgrp (vlan group) pointer. It is necessary to quiesce the device before changing that pointer. Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/tg3.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c index 35f931638750..2447fa3b471b 100644 --- a/drivers/net/tg3.c +++ b/drivers/net/tg3.c @@ -8738,6 +8738,9 @@ static void tg3_vlan_rx_register(struct net_device *dev, struct vlan_group *grp) { struct tg3 *tp = netdev_priv(dev); + if (netif_running(dev)) + tg3_netif_stop(tp); + tg3_full_lock(tp, 0); tp->vlgrp = grp; @@ -8746,16 +8749,25 @@ static void tg3_vlan_rx_register(struct net_device *dev, struct vlan_group *grp) __tg3_set_rx_mode(dev); tg3_full_unlock(tp); + + if (netif_running(dev)) + tg3_netif_start(tp); } static void tg3_vlan_rx_kill_vid(struct net_device *dev, unsigned short vid) { struct tg3 *tp = netdev_priv(dev); + if (netif_running(dev)) + tg3_netif_stop(tp); + tg3_full_lock(tp, 0); if (tp->vlgrp) tp->vlgrp->vlan_devices[vid] = NULL; tg3_full_unlock(tp); + + if (netif_running(dev)) + tg3_netif_start(tp); } #endif -- cgit v1.2.3 From f92905deb9bc89834dac247ca1a0d905ebcf629b Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Thu, 29 Jun 2006 20:14:29 -0700 Subject: [TG3]: Add rx BD workaround Add workaround to limit the burst size of rx BDs being DMA'ed to the chip. This works around hardware errata on a number of 5750, 5752, and 5755 chips. Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/tg3.c | 30 ++++++++++++++++++++++++++++-- drivers/net/tg3.h | 1 + 2 files changed, 29 insertions(+), 2 deletions(-) diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c index 2447fa3b471b..c32655ca3c46 100644 --- a/drivers/net/tg3.c +++ b/drivers/net/tg3.c @@ -3195,7 +3195,7 @@ static int tg3_vlan_rx(struct tg3 *tp, struct sk_buff *skb, u16 vlan_tag) */ static int tg3_rx(struct tg3 *tp, int budget) { - u32 work_mask; + u32 work_mask, rx_std_posted = 0; u32 sw_idx = tp->rx_rcb_ptr; u16 hw_idx; int received; @@ -3222,6 +3222,7 @@ static int tg3_rx(struct tg3 *tp, int budget) mapping); skb = tp->rx_std_buffers[desc_idx].skb; post_ptr = &tp->rx_std_ptr; + rx_std_posted++; } else if (opaque_key == RXD_OPAQUE_RING_JUMBO) { dma_addr = pci_unmap_addr(&tp->rx_jumbo_buffers[desc_idx], mapping); @@ -3309,6 +3310,15 @@ static int tg3_rx(struct tg3 *tp, int budget) next_pkt: (*post_ptr)++; + + if (unlikely(rx_std_posted >= tp->rx_std_max_post)) { + u32 idx = *post_ptr % TG3_RX_RING_SIZE; + + tw32_rx_mbox(MAILBOX_RCV_STD_PROD_IDX + + TG3_64BIT_REG_LOW, idx); + work_mask &= ~RXD_OPAQUE_RING_STD; + rx_std_posted = 0; + } next_pkt_nopost: sw_idx++; sw_idx %= TG3_RX_RCB_RING_SIZE(tp); @@ -5981,7 +5991,13 @@ static int tg3_reset_hw(struct tg3 *tp, int reset_phy) } /* Setup replenish threshold. */ - tw32(RCVBDI_STD_THRESH, tp->rx_pending / 8); + val = tp->rx_pending / 8; + if (val == 0) + val = 1; + else if (val > tp->rx_std_max_post) + val = tp->rx_std_max_post; + + tw32(RCVBDI_STD_THRESH, val); /* Initialize TG3_BDINFO's at: * RCVDBDI_STD_BD: standard eth size rx ring @@ -10545,6 +10561,16 @@ static int __devinit tg3_get_invariants(struct tg3 *tp) (tp->tg3_flags & TG3_FLAG_PCIX_MODE) != 0) tp->rx_offset = 0; + tp->rx_std_max_post = TG3_RX_RING_SIZE; + + /* Increment the rx prod index on the rx std ring by at most + * 8 for these chips to workaround hw errata. + */ + if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5750 || + GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5752 || + GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5755) + tp->rx_std_max_post = 8; + /* By default, disable wake-on-lan. User can change this * using ETHTOOL_SWOL. */ diff --git a/drivers/net/tg3.h b/drivers/net/tg3.h index 8209da5dd15f..e9177f8521df 100644 --- a/drivers/net/tg3.h +++ b/drivers/net/tg3.h @@ -2137,6 +2137,7 @@ struct tg3 { struct tg3_rx_buffer_desc *rx_std; struct ring_info *rx_std_buffers; dma_addr_t rx_std_mapping; + u32 rx_std_max_post; struct tg3_rx_buffer_desc *rx_jumbo; struct ring_info *rx_jumbo_buffers; -- cgit v1.2.3 From 1661394e78b3b2cc868cd0e89c1066974302aaca Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Thu, 29 Jun 2006 20:15:13 -0700 Subject: [TG3]: Turn on hw fix for ASF problems Clear a bit to enable a hardware fix for some ASF related problem. Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/tg3.c | 8 ++++++-- drivers/net/tg3.h | 1 + 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c index c32655ca3c46..2c36e70e37e5 100644 --- a/drivers/net/tg3.c +++ b/drivers/net/tg3.c @@ -6157,8 +6157,12 @@ static int tg3_reset_hw(struct tg3 *tp, int reset_phy) #endif /* Receive/send statistics. */ - if ((rdmac_mode & RDMAC_MODE_FIFO_SIZE_128) && - (tp->tg3_flags2 & TG3_FLG2_TSO_CAPABLE)) { + if (tp->tg3_flags2 & TG3_FLG2_5750_PLUS) { + val = tr32(RCVLPC_STATS_ENABLE); + val &= ~RCVLPC_STATSENAB_DACK_FIX; + tw32(RCVLPC_STATS_ENABLE, val); + } else if ((rdmac_mode & RDMAC_MODE_FIFO_SIZE_128) && + (tp->tg3_flags2 & TG3_FLG2_TSO_CAPABLE)) { val = tr32(RCVLPC_STATS_ENABLE); val &= ~RCVLPC_STATSENAB_LNGBRST_RFIX; tw32(RCVLPC_STATS_ENABLE, val); diff --git a/drivers/net/tg3.h b/drivers/net/tg3.h index e9177f8521df..97a860433b50 100644 --- a/drivers/net/tg3.h +++ b/drivers/net/tg3.h @@ -760,6 +760,7 @@ #define RCVLPC_STATSCTRL_ENABLE 0x00000001 #define RCVLPC_STATSCTRL_FASTUPD 0x00000002 #define RCVLPC_STATS_ENABLE 0x00002018 +#define RCVLPC_STATSENAB_DACK_FIX 0x00040000 #define RCVLPC_STATSENAB_LNGBRST_RFIX 0x00400000 #define RCVLPC_STATS_INCMASK 0x0000201c /* 0x2020 --> 0x2100 unused */ -- cgit v1.2.3 From 52c0fd834ea0e7c6ef8616ce0a1f85bac4233ed7 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Thu, 29 Jun 2006 20:15:54 -0700 Subject: [TG3]: Add TSO workaround using GSO Use GSO to workaround a rare TSO bug on some chips. This hardware bug may be triggered when the TSO header size is greater than 80 bytes. When this condition is detected in a TSO packet, the driver will use GSO to segment the packet to workaround the hardware bug. Thanks to Juergen Kreileder for reporting the problem and collecting traces to help debug the problem. And thanks to Herbert Xu for providing the GSO mechanism that happens to be the perfect workaround for this problem. Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/tg3.c | 53 +++++++++++++++++++++++++++++++++++++++++++++++++---- drivers/net/tg3.h | 3 ++- 2 files changed, 51 insertions(+), 5 deletions(-) diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c index 2c36e70e37e5..63d4b2c797e8 100644 --- a/drivers/net/tg3.c +++ b/drivers/net/tg3.c @@ -3880,6 +3880,40 @@ out_unlock: return NETDEV_TX_OK; } +#if TG3_TSO_SUPPORT != 0 +static int tg3_start_xmit_dma_bug(struct sk_buff *, struct net_device *); + +/* Use GSO to workaround a rare TSO bug that may be triggered when the + * TSO header is greater than 80 bytes. + */ +static int tg3_tso_bug(struct tg3 *tp, struct sk_buff *skb) +{ + struct sk_buff *segs, *nskb; + + /* Estimate the number of fragments in the worst case */ + if (unlikely(TX_BUFFS_AVAIL(tp) <= (skb_shinfo(skb)->gso_segs * 3))) { + netif_stop_queue(tp->dev); + return NETDEV_TX_BUSY; + } + + segs = skb_gso_segment(skb, tp->dev->features & ~NETIF_F_TSO); + if (unlikely(IS_ERR(segs))) + goto tg3_tso_bug_end; + + do { + nskb = segs; + segs = segs->next; + nskb->next = NULL; + tg3_start_xmit_dma_bug(nskb, tp->dev); + } while (segs); + +tg3_tso_bug_end: + dev_kfree_skb(skb); + + return NETDEV_TX_OK; +} +#endif + /* hard_start_xmit for devices that have the 4G bug and/or 40-bit bug and * support TG3_FLG2_HW_TSO_1 or firmware TSO only. */ @@ -3916,7 +3950,7 @@ static int tg3_start_xmit_dma_bug(struct sk_buff *skb, struct net_device *dev) mss = 0; if (skb->len > (tp->dev->mtu + ETH_HLEN) && (mss = skb_shinfo(skb)->gso_size) != 0) { - int tcp_opt_len, ip_tcp_len; + int tcp_opt_len, ip_tcp_len, hdr_len; if (skb_header_cloned(skb) && pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) { @@ -3927,11 +3961,16 @@ static int tg3_start_xmit_dma_bug(struct sk_buff *skb, struct net_device *dev) tcp_opt_len = ((skb->h.th->doff - 5) * 4); ip_tcp_len = (skb->nh.iph->ihl * 4) + sizeof(struct tcphdr); + hdr_len = ip_tcp_len + tcp_opt_len; + if (unlikely((ETH_HLEN + hdr_len) > 80) && + (tp->tg3_flags2 & TG3_FLG2_HW_TSO_1_BUG)) + return (tg3_tso_bug(tp, skb)); + base_flags |= (TXD_FLAG_CPU_PRE_DMA | TXD_FLAG_CPU_POST_DMA); skb->nh.iph->check = 0; - skb->nh.iph->tot_len = htons(mss + ip_tcp_len + tcp_opt_len); + skb->nh.iph->tot_len = htons(mss + hdr_len); if (tp->tg3_flags2 & TG3_FLG2_HW_TSO) { skb->h.th->check = 0; base_flags &= ~TXD_FLAG_TCPUDP_CSUM; @@ -10192,8 +10231,14 @@ static int __devinit tg3_get_invariants(struct tg3 *tp) GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5787) { tp->tg3_flags2 |= TG3_FLG2_HW_TSO_2; tp->tg3_flags2 |= TG3_FLG2_1SHOT_MSI; - } else - tp->tg3_flags2 |= TG3_FLG2_HW_TSO_1; + } else { + tp->tg3_flags2 |= TG3_FLG2_HW_TSO_1 | + TG3_FLG2_HW_TSO_1_BUG; + if (GET_ASIC_REV(tp->pci_chip_rev_id) == + ASIC_REV_5750 && + tp->pci_chip_rev_id >= CHIPREV_ID_5750_C2) + tp->tg3_flags2 &= ~TG3_FLG2_HW_TSO_1_BUG; + } } if (GET_ASIC_REV(tp->pci_chip_rev_id) != ASIC_REV_5705 && diff --git a/drivers/net/tg3.h b/drivers/net/tg3.h index 97a860433b50..ba2c98711c88 100644 --- a/drivers/net/tg3.h +++ b/drivers/net/tg3.h @@ -125,6 +125,7 @@ #define CHIPREV_ID_5750_A0 0x4000 #define CHIPREV_ID_5750_A1 0x4001 #define CHIPREV_ID_5750_A3 0x4003 +#define CHIPREV_ID_5750_C2 0x4202 #define CHIPREV_ID_5752_A0_HW 0x5000 #define CHIPREV_ID_5752_A0 0x6000 #define CHIPREV_ID_5752_A1 0x6001 @@ -2193,7 +2194,7 @@ struct tg3 { #define TG3_FLAG_INIT_COMPLETE 0x80000000 u32 tg3_flags2; #define TG3_FLG2_RESTART_TIMER 0x00000001 -/* 0x00000002 available */ +#define TG3_FLG2_HW_TSO_1_BUG 0x00000002 #define TG3_FLG2_NO_ETH_WIRE_SPEED 0x00000004 #define TG3_FLG2_IS_5788 0x00000008 #define TG3_FLG2_MAX_RXPEND_64 0x00000010 -- cgit v1.2.3 From 2c6059bca8cf5e7f722d909f2e5edda0491ac604 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Thu, 29 Jun 2006 20:16:28 -0700 Subject: [TG3]: Update version and reldate Update version to 3.61. Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/tg3.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c index 63d4b2c797e8..47c96fc01265 100644 --- a/drivers/net/tg3.c +++ b/drivers/net/tg3.c @@ -69,8 +69,8 @@ #define DRV_MODULE_NAME "tg3" #define PFX DRV_MODULE_NAME ": " -#define DRV_MODULE_VERSION "3.60" -#define DRV_MODULE_RELDATE "June 17, 2006" +#define DRV_MODULE_VERSION "3.61" +#define DRV_MODULE_RELDATE "June 29, 2006" #define TG3_DEF_MAC_MODE 0 #define TG3_DEF_RX_MODE 0 -- cgit v1.2.3 From dd7271feba61d5dc0fab1cb5365db9926d35ea3a Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Thu, 29 Jun 2006 21:40:23 -0700 Subject: [NETFILTER]: SCTP conntrack: fix crash triggered by packet without chunks When a packet without any chunks is received, the newconntrack variable in sctp_packet contains an out of bounds value that is used to look up an pointer from the array of timeouts, which is then dereferenced, resulting in a crash. Make sure at least a single chunk is present. Problem noticed by George A. Theall Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv4/netfilter/ip_conntrack_proto_sctp.c | 2 +- net/netfilter/nf_conntrack_proto_sctp.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv4/netfilter/ip_conntrack_proto_sctp.c b/net/ipv4/netfilter/ip_conntrack_proto_sctp.c index 0416073c5600..2d3612cd5f18 100644 --- a/net/ipv4/netfilter/ip_conntrack_proto_sctp.c +++ b/net/ipv4/netfilter/ip_conntrack_proto_sctp.c @@ -254,7 +254,7 @@ static int do_basic_checks(struct ip_conntrack *conntrack, } DEBUGP("Basic checks passed\n"); - return 0; + return count == 0; } static int new_state(enum ip_conntrack_dir dir, diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index 0839b701b930..9bd8a7877fd5 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c @@ -261,7 +261,7 @@ static int do_basic_checks(struct nf_conn *conntrack, } DEBUGP("Basic checks passed\n"); - return 0; + return count == 0; } static int new_state(enum ip_conntrack_dir dir, -- cgit v1.2.3 From 2889139a6acd2945f6143eb85f7dc2a22a352e1a Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 30 Jun 2006 13:35:46 -0700 Subject: [IPV6]: Remove redundant length check on input We don't need to check skb->len when we're just about to call pskb_may_pull since that checks it for us. Signed-off-by: Herbert Xu Acked-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- net/ipv6/ip6_input.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index aceee252503d..df8f051c0fce 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -84,14 +84,9 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt */ IP6CB(skb)->iif = skb->dst ? ((struct rt6_info *)skb->dst)->rt6i_idev->dev->ifindex : dev->ifindex; - if (skb->len < sizeof(struct ipv6hdr)) + if (unlikely(!pskb_may_pull(skb, sizeof(*hdr)))) goto err; - if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) { - IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS); - goto drop; - } - hdr = skb->nh.ipv6h; if (hdr->version != 6) -- cgit v1.2.3 From adcfc7d0b4d7bc3c7edac6fdde9f3ae510bd6054 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 30 Jun 2006 13:36:15 -0700 Subject: [IPV6]: Added GSO support for TCPv6 This patch adds GSO support for IPv6 and TCPv6. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/net/protocol.h | 6 +++++ net/ipv4/tcp.c | 1 + net/ipv6/exthdrs.c | 4 ++-- net/ipv6/ipv6_sockglue.c | 62 ++++++++++++++++++++++++++++++++++++++++++++++++ net/ipv6/tcp_ipv6.c | 1 + 5 files changed, 72 insertions(+), 2 deletions(-) diff --git a/include/net/protocol.h b/include/net/protocol.h index 40b6b9c9973f..a225d6371cb1 100644 --- a/include/net/protocol.h +++ b/include/net/protocol.h @@ -50,11 +50,17 @@ struct inet6_protocol struct inet6_skb_parm *opt, int type, int code, int offset, __u32 info); + + struct sk_buff *(*gso_segment)(struct sk_buff *skb, + int features); + unsigned int flags; /* INET6_PROTO_xxx */ }; #define INET6_PROTO_NOPOLICY 0x1 #define INET6_PROTO_FINAL 0x2 +/* This should be set for any extension header which is compatible with GSO. */ +#define INET6_PROTO_GSO_EXTHDR 0x4 #endif /* This is used to register socket interfaces for IP protocols. */ diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 0bb0ac96d675..b7cf26e0e5c2 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2215,6 +2215,7 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int features) out: return segs; } +EXPORT_SYMBOL(tcp_tso_segment); extern void __skb_cb_too_small_for_tcp(int, int); extern struct tcp_congestion_ops tcp_reno; diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index a18d4256372c..9d0ee7f0eeb5 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -179,7 +179,7 @@ static int ipv6_destopt_rcv(struct sk_buff **skbp) static struct inet6_protocol destopt_protocol = { .handler = ipv6_destopt_rcv, - .flags = INET6_PROTO_NOPOLICY, + .flags = INET6_PROTO_NOPOLICY | INET6_PROTO_GSO_EXTHDR, }; void __init ipv6_destopt_init(void) @@ -340,7 +340,7 @@ looped_back: static struct inet6_protocol rthdr_protocol = { .handler = ipv6_rthdr_rcv, - .flags = INET6_PROTO_NOPOLICY, + .flags = INET6_PROTO_NOPOLICY | INET6_PROTO_GSO_EXTHDR, }; void __init ipv6_rthdr_init(void) diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 4c20eeb3d568..25f8bf8ac49b 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -58,9 +58,71 @@ DEFINE_SNMP_STAT(struct ipstats_mib, ipv6_statistics) __read_mostly; +static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, int features) +{ + struct sk_buff *segs = ERR_PTR(-EINVAL); + struct ipv6hdr *ipv6h; + struct inet6_protocol *ops; + int proto; + + if (unlikely(!pskb_may_pull(skb, sizeof(*ipv6h)))) + goto out; + + ipv6h = skb->nh.ipv6h; + proto = ipv6h->nexthdr; + __skb_pull(skb, sizeof(*ipv6h)); + + rcu_read_lock(); + for (;;) { + struct ipv6_opt_hdr *opth; + int len; + + if (proto != NEXTHDR_HOP) { + ops = rcu_dereference(inet6_protos[proto]); + + if (unlikely(!ops)) + goto unlock; + + if (!(ops->flags & INET6_PROTO_GSO_EXTHDR)) + break; + } + + if (unlikely(!pskb_may_pull(skb, 8))) + goto unlock; + + opth = (void *)skb->data; + len = opth->hdrlen * 8 + 8; + + if (unlikely(!pskb_may_pull(skb, len))) + goto unlock; + + proto = opth->nexthdr; + __skb_pull(skb, len); + } + + skb->h.raw = skb->data; + if (likely(ops->gso_segment)) + segs = ops->gso_segment(skb, features); + +unlock: + rcu_read_unlock(); + + if (unlikely(IS_ERR(segs))) + goto out; + + for (skb = segs; skb; skb = skb->next) { + ipv6h = skb->nh.ipv6h; + ipv6h->payload_len = htons(skb->len - skb->mac_len); + } + +out: + return segs; +} + static struct packet_type ipv6_packet_type = { .type = __constant_htons(ETH_P_IPV6), .func = ipv6_rcv, + .gso_segment = ipv6_gso_segment, }; struct ip6_ra_chain *ip6_ra_chain; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index b36d5b2e7c30..bf7f8c26c488 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1606,6 +1606,7 @@ struct proto tcpv6_prot = { static struct inet6_protocol tcpv6_protocol = { .handler = tcp_v6_rcv, .err_handler = tcp_v6_err, + .gso_segment = tcp_tso_segment, .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL, }; -- cgit v1.2.3 From bcd76111178ebccedd46a9b3eaff65c78e5a70af Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 30 Jun 2006 13:36:35 -0700 Subject: [NET]: Generalise TSO-specific bits from skb_setup_caps This patch generalises the TSO-specific bits from sk_setup_caps by adding the sk_gso_type member to struct sock. This makes sk_setup_caps generic so that it can be used by TCPv6 or UFO. The only catch is that whoever uses this must provide a GSO implementation for their protocol which I think is a fair deal :) For now UFO continues to live without a GSO implementation which is OK since it doesn't use the sock caps field at the moment. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/netdevice.h | 12 +++++++++--- include/net/sock.h | 13 ++++++++++--- include/net/tcp.h | 2 +- net/ipv4/tcp.c | 2 +- net/ipv4/tcp_ipv4.c | 2 ++ net/ipv4/tcp_output.c | 9 +++------ 6 files changed, 26 insertions(+), 14 deletions(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index aa2d3c12c4d8..6db03ab7cec8 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -313,6 +313,7 @@ struct net_device /* Segmentation offload features */ #define NETIF_F_GSO_SHIFT 16 +#define NETIF_F_GSO_MASK 0xffff0000 #define NETIF_F_TSO (SKB_GSO_TCPV4 << NETIF_F_GSO_SHIFT) #define NETIF_F_UFO (SKB_GSO_UDPV4 << NETIF_F_GSO_SHIFT) #define NETIF_F_GSO_ROBUST (SKB_GSO_DODGY << NETIF_F_GSO_SHIFT) @@ -991,13 +992,18 @@ extern void dev_seq_stop(struct seq_file *seq, void *v); extern void linkwatch_run_queue(void); -static inline int skb_gso_ok(struct sk_buff *skb, int features) +static inline int net_gso_ok(int features, int gso_type) { - int feature = skb_shinfo(skb)->gso_size ? - skb_shinfo(skb)->gso_type << NETIF_F_GSO_SHIFT : 0; + int feature = gso_type << NETIF_F_GSO_SHIFT; return (features & feature) == feature; } +static inline int skb_gso_ok(struct sk_buff *skb, int features) +{ + return net_gso_ok(features, skb_shinfo(skb)->gso_size ? + skb_shinfo(skb)->gso_type : 0); +} + static inline int netif_needs_gso(struct net_device *dev, struct sk_buff *skb) { return !skb_gso_ok(skb, dev->features); diff --git a/include/net/sock.h b/include/net/sock.h index 7136bae48c2f..7b3d6b856946 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -140,6 +140,7 @@ struct sock_common { * @sk_flags: %SO_LINGER (l_onoff), %SO_BROADCAST, %SO_KEEPALIVE, %SO_OOBINLINE settings * @sk_no_check: %SO_NO_CHECK setting, wether or not checkup packets * @sk_route_caps: route capabilities (e.g. %NETIF_F_TSO) + * @sk_gso_type: GSO type (e.g. %SKB_GSO_TCPV4) * @sk_lingertime: %SO_LINGER l_linger setting * @sk_backlog: always used with the per-socket spinlock held * @sk_callback_lock: used with the callbacks in the end of this struct @@ -211,6 +212,7 @@ struct sock { gfp_t sk_allocation; int sk_sndbuf; int sk_route_caps; + int sk_gso_type; int sk_rcvlowat; unsigned long sk_flags; unsigned long sk_lingertime; @@ -1025,15 +1027,20 @@ extern struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie); extern struct dst_entry *sk_dst_check(struct sock *sk, u32 cookie); +static inline int sk_can_gso(const struct sock *sk) +{ + return net_gso_ok(sk->sk_route_caps, sk->sk_gso_type); +} + static inline void sk_setup_caps(struct sock *sk, struct dst_entry *dst) { __sk_dst_set(sk, dst); sk->sk_route_caps = dst->dev->features; if (sk->sk_route_caps & NETIF_F_GSO) - sk->sk_route_caps |= NETIF_F_TSO; - if (sk->sk_route_caps & NETIF_F_TSO) { + sk->sk_route_caps |= NETIF_F_GSO_MASK; + if (sk_can_gso(sk)) { if (dst->header_len) - sk->sk_route_caps &= ~NETIF_F_TSO; + sk->sk_route_caps &= ~NETIF_F_GSO_MASK; else sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM; } diff --git a/include/net/tcp.h b/include/net/tcp.h index 624921e76332..3cd803b0d7a5 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -751,7 +751,7 @@ static inline int tcp_is_cwnd_limited(const struct sock *sk, u32 in_flight) if (in_flight >= tp->snd_cwnd) return 1; - if (!(sk->sk_route_caps & NETIF_F_TSO)) + if (!sk_can_gso(sk)) return 0; left = tp->snd_cwnd - in_flight; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index b7cf26e0e5c2..59e30bab0606 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -643,7 +643,7 @@ static inline int select_size(struct sock *sk, struct tcp_sock *tp) int tmp = tp->mss_cache; if (sk->sk_route_caps & NETIF_F_SG) { - if (sk->sk_route_caps & NETIF_F_TSO) + if (sk_can_gso(sk)) tmp = 0; else { int pgbreak = SKB_MAX_HEAD(MAX_TCP_HEADER); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 4c6ef47eb1c3..38e001076a5f 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -242,6 +242,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) goto failure; /* OK, now commit destination to socket. */ + sk->sk_gso_type = SKB_GSO_TCPV4; sk_setup_caps(sk, &rt->u.dst); if (!tp->write_seq) @@ -884,6 +885,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, if (!newsk) goto exit; + newsk->sk_gso_type = SKB_GSO_TCPV4; sk_setup_caps(newsk, dst); newtp = tcp_sk(newsk); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 5a7cb4a9c867..5c08ea20a18d 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -510,8 +510,7 @@ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb) static void tcp_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb, unsigned int mss_now) { - if (skb->len <= mss_now || - !(sk->sk_route_caps & NETIF_F_TSO)) { + if (skb->len <= mss_now || !sk_can_gso(sk)) { /* Avoid the costly divide in the normal * non-TSO case. */ @@ -525,7 +524,7 @@ static void tcp_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb, unsigned factor /= mss_now; skb_shinfo(skb)->gso_segs = factor; skb_shinfo(skb)->gso_size = mss_now; - skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4; + skb_shinfo(skb)->gso_type = sk->sk_gso_type; } } @@ -824,9 +823,7 @@ unsigned int tcp_current_mss(struct sock *sk, int large_allowed) mss_now = tp->mss_cache; - if (large_allowed && - (sk->sk_route_caps & NETIF_F_TSO) && - !tp->urg_mode) + if (large_allowed && sk_can_gso(sk) && !tp->urg_mode) doing_tso = 1; if (dst) { -- cgit v1.2.3 From f83ef8c0b58dac17211a4c0b6df0e2b1bd6637b1 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 30 Jun 2006 13:37:03 -0700 Subject: [IPV6]: Added GSO support for TCPv6 This patch adds GSO support for IPv6 and TCPv6. This is based on a patch by Ananda Raju . His original description is: This patch enables TSO over IPv6. Currently Linux network stacks restricts TSO over IPv6 by clearing of the NETIF_F_TSO bit from "dev->features". This patch will remove this restriction. This patch will introduce a new flag NETIF_F_TSO6 which will be used to check whether device supports TSO over IPv6. If device support TSO over IPv6 then we don't clear of NETIF_F_TSO and which will make the TCP layer to create TSO packets. Any device supporting TSO over IPv6 will set NETIF_F_TSO6 flag in "dev->features" along with NETIF_F_TSO. In case when user disables TSO using ethtool, NETIF_F_TSO will get cleared from "dev->features". So even if we have NETIF_F_TSO6 we don't get TSO packets created by TCP layer. SKB_GSO_TCPV4 renamed to SKB_GSO_TCP to make it generic GSO packet. SKB_GSO_UDPV4 renamed to SKB_GSO_UDP as UFO is not a IPv4 feature. UFO is supported over IPv6 also The following table shows there is significant improvement in throughput with normal frames and CPU usage for both normal and jumbo. -------------------------------------------------- | | 1500 | 9600 | | ------------------|-------------------| | | thru CPU | thru CPU | -------------------------------------------------- | TSO OFF | 2.00 5.5% id | 5.66 20.0% id | -------------------------------------------------- | TSO ON | 2.63 78.0 id | 5.67 39.0% id | -------------------------------------------------- Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- drivers/net/s2io.c | 15 +++++++++------ include/linux/netdevice.h | 5 +++-- include/linux/skbuff.h | 6 ++++-- include/net/ip6_route.h | 2 +- include/net/tcp_ecn.h | 4 +--- net/ipv4/ip_output.c | 4 ++-- net/ipv6/af_inet6.c | 2 -- net/ipv6/inet6_connection_sock.c | 2 -- net/ipv6/ip6_output.c | 4 ++-- net/ipv6/tcp_ipv6.c | 6 ++---- 10 files changed, 24 insertions(+), 26 deletions(-) diff --git a/drivers/net/s2io.c b/drivers/net/s2io.c index 3defe5d4f7d3..74764694c64f 100644 --- a/drivers/net/s2io.c +++ b/drivers/net/s2io.c @@ -3960,7 +3960,7 @@ static int s2io_xmit(struct sk_buff *skb, struct net_device *dev) txdp->Control_2 = 0; #ifdef NETIF_F_TSO mss = skb_shinfo(skb)->gso_size; - if (skb_shinfo(skb)->gso_type == SKB_GSO_TCPV4) { + if (skb_shinfo(skb)->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)) { txdp->Control_1 |= TXD_TCP_LSO_EN; txdp->Control_1 |= TXD_TCP_LSO_MSS(mss); } @@ -3980,7 +3980,7 @@ static int s2io_xmit(struct sk_buff *skb, struct net_device *dev) } frg_len = skb->len - skb->data_len; - if (skb_shinfo(skb)->gso_type == SKB_GSO_UDPV4) { + if (skb_shinfo(skb)->gso_type == SKB_GSO_UDP) { int ufo_size; ufo_size = skb_shinfo(skb)->gso_size; @@ -4009,7 +4009,7 @@ static int s2io_xmit(struct sk_buff *skb, struct net_device *dev) txdp->Host_Control = (unsigned long) skb; txdp->Control_1 |= TXD_BUFFER0_SIZE(frg_len); - if (skb_shinfo(skb)->gso_type == SKB_GSO_UDPV4) + if (skb_shinfo(skb)->gso_type == SKB_GSO_UDP) txdp->Control_1 |= TXD_UFO_EN; frg_cnt = skb_shinfo(skb)->nr_frags; @@ -4024,12 +4024,12 @@ static int s2io_xmit(struct sk_buff *skb, struct net_device *dev) (sp->pdev, frag->page, frag->page_offset, frag->size, PCI_DMA_TODEVICE); txdp->Control_1 = TXD_BUFFER0_SIZE(frag->size); - if (skb_shinfo(skb)->gso_type == SKB_GSO_UDPV4) + if (skb_shinfo(skb)->gso_type == SKB_GSO_UDP) txdp->Control_1 |= TXD_UFO_EN; } txdp->Control_1 |= TXD_GATHER_CODE_LAST; - if (skb_shinfo(skb)->gso_type == SKB_GSO_UDPV4) + if (skb_shinfo(skb)->gso_type == SKB_GSO_UDP) frg_cnt++; /* as Txd0 was used for inband header */ tx_fifo = mac_control->tx_FIFO_start[queue]; @@ -4043,7 +4043,7 @@ static int s2io_xmit(struct sk_buff *skb, struct net_device *dev) if (mss) val64 |= TX_FIFO_SPECIAL_FUNC; #endif - if (skb_shinfo(skb)->gso_type == SKB_GSO_UDPV4) + if (skb_shinfo(skb)->gso_type == SKB_GSO_UDP) val64 |= TX_FIFO_SPECIAL_FUNC; writeq(val64, &tx_fifo->List_Control); @@ -7020,6 +7020,9 @@ s2io_init_nic(struct pci_dev *pdev, const struct pci_device_id *pre) dev->features |= NETIF_F_HIGHDMA; #ifdef NETIF_F_TSO dev->features |= NETIF_F_TSO; +#endif +#ifdef NETIF_F_TSO6 + dev->features |= NETIF_F_TSO6; #endif if (sp->device_type & XFRAME_II_DEVICE) { dev->features |= NETIF_F_UFO; diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 6db03ab7cec8..85f99f60deea 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -315,9 +315,10 @@ struct net_device #define NETIF_F_GSO_SHIFT 16 #define NETIF_F_GSO_MASK 0xffff0000 #define NETIF_F_TSO (SKB_GSO_TCPV4 << NETIF_F_GSO_SHIFT) -#define NETIF_F_UFO (SKB_GSO_UDPV4 << NETIF_F_GSO_SHIFT) +#define NETIF_F_UFO (SKB_GSO_UDP << NETIF_F_GSO_SHIFT) #define NETIF_F_GSO_ROBUST (SKB_GSO_DODGY << NETIF_F_GSO_SHIFT) -#define NETIF_F_TSO_ECN (SKB_GSO_TCPV4_ECN << NETIF_F_GSO_SHIFT) +#define NETIF_F_TSO_ECN (SKB_GSO_TCP_ECN << NETIF_F_GSO_SHIFT) +#define NETIF_F_TSO6 (SKB_GSO_TCPV6 << NETIF_F_GSO_SHIFT) #define NETIF_F_GEN_CSUM (NETIF_F_NO_CSUM | NETIF_F_HW_CSUM) #define NETIF_F_ALL_CSUM (NETIF_F_IP_CSUM | NETIF_F_GEN_CSUM) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 59918be91d0a..57d7d4965f9a 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -171,13 +171,15 @@ enum { enum { SKB_GSO_TCPV4 = 1 << 0, - SKB_GSO_UDPV4 = 1 << 1, + SKB_GSO_UDP = 1 << 1, /* This indicates the skb is from an untrusted source. */ SKB_GSO_DODGY = 1 << 2, /* This indicates the tcp segment has CWR set. */ - SKB_GSO_TCPV4_ECN = 1 << 3, + SKB_GSO_TCP_ECN = 1 << 3, + + SKB_GSO_TCPV6 = 1 << 4, }; /** diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index a398ae5e30f9..ab29dafb1a6a 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -146,7 +146,7 @@ static inline void ip6_dst_store(struct sock *sk, struct dst_entry *dst, struct rt6_info *rt = (struct rt6_info *) dst; write_lock(&sk->sk_dst_lock); - __sk_dst_set(sk, dst); + sk_setup_caps(sk, dst); np->daddr_cache = daddr; np->dst_cookie = rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0; write_unlock(&sk->sk_dst_lock); diff --git a/include/net/tcp_ecn.h b/include/net/tcp_ecn.h index 7bb366f70934..4629d77173f2 100644 --- a/include/net/tcp_ecn.h +++ b/include/net/tcp_ecn.h @@ -55,9 +55,7 @@ static inline void TCP_ECN_send(struct sock *sk, struct tcp_sock *tp, if (tp->ecn_flags&TCP_ECN_QUEUE_CWR) { tp->ecn_flags &= ~TCP_ECN_QUEUE_CWR; skb->h.th->cwr = 1; - if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4) - skb_shinfo(skb)->gso_type |= - SKB_GSO_TCPV4_ECN; + skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN; } } else { /* ACK or retransmitted segment: clear ECT|CE */ diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 7624fd1d8f9f..243d2a763363 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -744,7 +744,7 @@ static inline int ip_ufo_append_data(struct sock *sk, if (!err) { /* specify the length of each IP datagram fragment*/ skb_shinfo(skb)->gso_size = mtu - fragheaderlen; - skb_shinfo(skb)->gso_type = SKB_GSO_UDPV4; + skb_shinfo(skb)->gso_type = SKB_GSO_UDP; __skb_queue_tail(&sk->sk_write_queue, skb); return 0; @@ -1089,7 +1089,7 @@ ssize_t ip_append_page(struct sock *sk, struct page *page, if ((sk->sk_protocol == IPPROTO_UDP) && (rt->u.dst.dev->features & NETIF_F_UFO)) { skb_shinfo(skb)->gso_size = mtu - fragheaderlen; - skb_shinfo(skb)->gso_type = SKB_GSO_UDPV4; + skb_shinfo(skb)->gso_type = SKB_GSO_UDP; } diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index e19457fe4f6e..0f26073117a3 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -660,8 +660,6 @@ int inet6_sk_rebuild_header(struct sock *sk) } ip6_dst_store(sk, dst, NULL); - sk->sk_route_caps = dst->dev->features & - ~(NETIF_F_IP_CSUM | NETIF_F_TSO); } return 0; diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index eb2865d5ae28..5624f373164d 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -187,8 +187,6 @@ int inet6_csk_xmit(struct sk_buff *skb, int ipfragok) } ip6_dst_store(sk, dst, NULL); - sk->sk_route_caps = dst->dev->features & - ~(NETIF_F_IP_CSUM | NETIF_F_TSO); } skb->dst = dst_clone(dst); diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index abb94de33768..11007c75ae02 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -230,7 +230,7 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl, skb->priority = sk->sk_priority; mtu = dst_mtu(dst); - if ((skb->len <= mtu) || ipfragok) { + if ((skb->len <= mtu) || ipfragok || skb_shinfo(skb)->gso_size) { IP6_INC_STATS(IPSTATS_MIB_OUTREQUESTS); return NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, dst->dev, dst_output); @@ -835,7 +835,7 @@ static inline int ip6_ufo_append_data(struct sock *sk, /* specify the length of each IP datagram fragment*/ skb_shinfo(skb)->gso_size = mtu - fragheaderlen - sizeof(struct frag_hdr); - skb_shinfo(skb)->gso_type = SKB_GSO_UDPV4; + skb_shinfo(skb)->gso_type = SKB_GSO_UDP; ipv6_select_ident(skb, &fhdr); skb_shinfo(skb)->ip6_frag_id = fhdr.identification; __skb_queue_tail(&sk->sk_write_queue, skb); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index bf7f8c26c488..7ea5bea49aa9 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -270,9 +270,8 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, ipv6_addr_copy(&np->saddr, saddr); inet->rcv_saddr = LOOPBACK4_IPV6; + sk->sk_gso_type = SKB_GSO_TCPV6; ip6_dst_store(sk, dst, NULL); - sk->sk_route_caps = dst->dev->features & - ~(NETIF_F_IP_CSUM | NETIF_F_TSO); icsk->icsk_ext_hdr_len = 0; if (np->opt) @@ -930,9 +929,8 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, * comment in that function for the gory details. -acme */ + sk->sk_gso_type = SKB_GSO_TCPV6; ip6_dst_store(newsk, dst, NULL); - newsk->sk_route_caps = dst->dev->features & - ~(NETIF_F_IP_CSUM | NETIF_F_TSO); newtcp6sk = (struct tcp6_sock *)newsk; inet_sk(newsk)->pinet6 = &newtcp6sk->inet6; -- cgit v1.2.3