diff options
Diffstat (limited to 'net/ipv4')
-rw-r--r-- | net/ipv4/inetpeer.c | 81 | ||||
-rw-r--r-- | net/ipv4/route.c | 12 | ||||
-rw-r--r-- | net/ipv4/tcp_input.c | 23 |
3 files changed, 97 insertions, 19 deletions
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index bf4a9c4808e..d4d61b694fa 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -17,6 +17,7 @@ #include <linux/kernel.h> #include <linux/mm.h> #include <linux/net.h> +#include <linux/workqueue.h> #include <net/ip.h> #include <net/inetpeer.h> #include <net/secure_seq.h> @@ -66,6 +67,11 @@ static struct kmem_cache *peer_cachep __read_mostly; +static LIST_HEAD(gc_list); +static const int gc_delay = 60 * HZ; +static struct delayed_work gc_work; +static DEFINE_SPINLOCK(gc_lock); + #define node_height(x) x->avl_height #define peer_avl_empty ((struct inet_peer *)&peer_fake_node) @@ -102,6 +108,50 @@ int inet_peer_threshold __read_mostly = 65536 + 128; /* start to throw entries m int inet_peer_minttl __read_mostly = 120 * HZ; /* TTL under high load: 120 sec */ int inet_peer_maxttl __read_mostly = 10 * 60 * HZ; /* usual time to live: 10 min */ +static void inetpeer_gc_worker(struct work_struct *work) +{ + struct inet_peer *p, *n; + LIST_HEAD(list); + + spin_lock_bh(&gc_lock); + list_replace_init(&gc_list, &list); + spin_unlock_bh(&gc_lock); + + if (list_empty(&list)) + return; + + list_for_each_entry_safe(p, n, &list, gc_list) { + + if(need_resched()) + cond_resched(); + + if (p->avl_left != peer_avl_empty) { + list_add_tail(&p->avl_left->gc_list, &list); + p->avl_left = peer_avl_empty; + } + + if (p->avl_right != peer_avl_empty) { + list_add_tail(&p->avl_right->gc_list, &list); + p->avl_right = peer_avl_empty; + } + + n = list_entry(p->gc_list.next, struct inet_peer, gc_list); + + if (!atomic_read(&p->refcnt)) { + list_del(&p->gc_list); + kmem_cache_free(peer_cachep, p); + } + } + + if (list_empty(&list)) + return; + + spin_lock_bh(&gc_lock); + list_splice(&list, &gc_list); + spin_unlock_bh(&gc_lock); + + schedule_delayed_work(&gc_work, gc_delay); +} /* Called from ip_output.c:ip_init */ void __init inet_initpeers(void) @@ -126,6 +176,7 @@ void __init inet_initpeers(void) 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL); + INIT_DELAYED_WORK_DEFERRABLE(&gc_work, inetpeer_gc_worker); } static int addr_compare(const struct inetpeer_addr *a, @@ -447,9 +498,8 @@ relookup: p->rate_last = 0; p->pmtu_expires = 0; p->pmtu_orig = 0; - p->redirect_genid = 0; memset(&p->redirect_learned, 0, sizeof(p->redirect_learned)); - + INIT_LIST_HEAD(&p->gc_list); /* Link the node. */ link_to_pool(p, base); @@ -509,3 +559,30 @@ bool inet_peer_xrlim_allow(struct inet_peer *peer, int timeout) return rc; } EXPORT_SYMBOL(inet_peer_xrlim_allow); + +void inetpeer_invalidate_tree(int family) +{ + struct inet_peer *old, *new, *prev; + struct inet_peer_base *base = family_to_base(family); + + write_seqlock_bh(&base->lock); + + old = base->root; + if (old == peer_avl_empty_rcu) + goto out; + + new = peer_avl_empty_rcu; + + prev = cmpxchg(&base->root, old, new); + if (prev == old) { + base->total = 0; + spin_lock(&gc_lock); + list_add_tail(&prev->gc_list, &gc_list); + spin_unlock(&gc_lock); + schedule_delayed_work(&gc_work, gc_delay); + } + +out: + write_sequnlock_bh(&base->lock); +} +EXPORT_SYMBOL(inetpeer_invalidate_tree); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index bcacf54e541..01977479617 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -132,7 +132,6 @@ static int ip_rt_mtu_expires __read_mostly = 10 * 60 * HZ; static int ip_rt_min_pmtu __read_mostly = 512 + 20 + 20; static int ip_rt_min_advmss __read_mostly = 256; static int rt_chain_length_max __read_mostly = 20; -static int redirect_genid; static struct delayed_work expires_work; static unsigned long expires_ljiffies; @@ -937,7 +936,7 @@ static void rt_cache_invalidate(struct net *net) get_random_bytes(&shuffle, sizeof(shuffle)); atomic_add(shuffle + 1U, &net->ipv4.rt_genid); - redirect_genid++; + inetpeer_invalidate_tree(AF_INET); } /* @@ -1485,10 +1484,8 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, peer = rt->peer; if (peer) { - if (peer->redirect_learned.a4 != new_gw || - peer->redirect_genid != redirect_genid) { + if (peer->redirect_learned.a4 != new_gw) { peer->redirect_learned.a4 = new_gw; - peer->redirect_genid = redirect_genid; atomic_inc(&__rt_peer_genid); } check_peer_redir(&rt->dst, peer); @@ -1793,8 +1790,6 @@ static void ipv4_validate_peer(struct rtable *rt) if (peer) { check_peer_pmtu(&rt->dst, peer); - if (peer->redirect_genid != redirect_genid) - peer->redirect_learned.a4 = 0; if (peer->redirect_learned.a4 && peer->redirect_learned.a4 != rt->rt_gateway) check_peer_redir(&rt->dst, peer); @@ -1958,8 +1953,7 @@ static void rt_init_metrics(struct rtable *rt, const struct flowi4 *fl4, dst_init_metrics(&rt->dst, peer->metrics, false); check_peer_pmtu(&rt->dst, peer); - if (peer->redirect_genid != redirect_genid) - peer->redirect_learned.a4 = 0; + if (peer->redirect_learned.a4 && peer->redirect_learned.a4 != rt->rt_gateway) { rt->rt_gateway = peer->redirect_learned.a4; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 53c8ce4046b..b5e315f1364 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1403,8 +1403,16 @@ static int tcp_shifted_skb(struct sock *sk, struct sk_buff *skb, BUG_ON(!pcount); - /* Adjust hint for FACK. Non-FACK is handled in tcp_sacktag_one(). */ - if (tcp_is_fack(tp) && (skb == tp->lost_skb_hint)) + /* Adjust counters and hints for the newly sacked sequence + * range but discard the return value since prev is already + * marked. We must tag the range first because the seq + * advancement below implicitly advances + * tcp_highest_sack_seq() when skb is highest_sack. + */ + tcp_sacktag_one(sk, state, TCP_SKB_CB(skb)->sacked, + start_seq, end_seq, dup_sack, pcount); + + if (skb == tp->lost_skb_hint) tp->lost_cnt_hint += pcount; TCP_SKB_CB(prev)->end_seq += shifted; @@ -1430,12 +1438,6 @@ static int tcp_shifted_skb(struct sock *sk, struct sk_buff *skb, skb_shinfo(skb)->gso_type = 0; } - /* Adjust counters and hints for the newly sacked sequence range but - * discard the return value since prev is already marked. - */ - tcp_sacktag_one(sk, state, TCP_SKB_CB(skb)->sacked, - start_seq, end_seq, dup_sack, pcount); - /* Difference in this won't matter, both ACKed by the same cumul. ACK */ TCP_SKB_CB(prev)->sacked |= (TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS); @@ -1583,6 +1585,10 @@ static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb, } } + /* tcp_sacktag_one() won't SACK-tag ranges below snd_una */ + if (!after(TCP_SKB_CB(skb)->seq + len, tp->snd_una)) + goto fallback; + if (!skb_shift(prev, skb, len)) goto fallback; if (!tcp_shifted_skb(sk, skb, state, pcount, len, mss, dup_sack)) @@ -2567,6 +2573,7 @@ static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head) if (cnt > packets) { if ((tcp_is_sack(tp) && !tcp_is_fack(tp)) || + (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED) || (oldcnt >= packets)) break; |