diff options
Diffstat (limited to 'net/ipv4')
-rw-r--r-- | net/ipv4/Kconfig | 6 | ||||
-rw-r--r-- | net/ipv4/devinet.c | 1 | ||||
-rw-r--r-- | net/ipv4/fib_frontend.c | 2 | ||||
-rw-r--r-- | net/ipv4/inet_diag.c | 2 | ||||
-rw-r--r-- | net/ipv4/inet_hashtables.c | 24 | ||||
-rw-r--r-- | net/ipv4/inet_timewait_sock.c | 61 | ||||
-rw-r--r-- | net/ipv4/ip_output.c | 2 | ||||
-rw-r--r-- | net/ipv4/ipconfig.c | 2 | ||||
-rw-r--r-- | net/ipv4/route.c | 2 | ||||
-rw-r--r-- | net/ipv4/syncookies.c | 27 | ||||
-rw-r--r-- | net/ipv4/tcp.c | 5 | ||||
-rw-r--r-- | net/ipv4/tcp_input.c | 59 | ||||
-rw-r--r-- | net/ipv4/tcp_ipv4.c | 23 | ||||
-rw-r--r-- | net/ipv4/tcp_minisocks.c | 10 | ||||
-rw-r--r-- | net/ipv4/tcp_output.c | 18 | ||||
-rw-r--r-- | net/ipv4/tcp_probe.c | 19 | ||||
-rw-r--r-- | net/ipv4/tcp_timer.c | 29 | ||||
-rw-r--r-- | net/ipv4/udp.c | 7 | ||||
-rw-r--r-- | net/ipv4/xfrm4_policy.c | 14 |
19 files changed, 194 insertions, 119 deletions
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 70491d9035eb..0c94a1ac2946 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -166,7 +166,7 @@ config IP_PNP_DHCP If unsure, say Y. Note that if you want to use DHCP, a DHCP server must be operating on your network. Read - <file:Documentation/filesystems/nfsroot.txt> for details. + <file:Documentation/filesystems/nfs/nfsroot.txt> for details. config IP_PNP_BOOTP bool "IP: BOOTP support" @@ -181,7 +181,7 @@ config IP_PNP_BOOTP does BOOTP itself, providing all necessary information on the kernel command line, you can say N here. If unsure, say Y. Note that if you want to use BOOTP, a BOOTP server must be operating on your network. - Read <file:Documentation/filesystems/nfsroot.txt> for details. + Read <file:Documentation/filesystems/nfs/nfsroot.txt> for details. config IP_PNP_RARP bool "IP: RARP support" @@ -194,7 +194,7 @@ config IP_PNP_RARP older protocol which is being obsoleted by BOOTP and DHCP), say Y here. Note that if you want to use RARP, a RARP server must be operating on your network. Read - <file:Documentation/filesystems/nfsroot.txt> for details. + <file:Documentation/filesystems/nfs/nfsroot.txt> for details. # not yet ready.. # bool ' IP: ARP support' CONFIG_IP_PNP_ARP diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 5cdbc102a418..040c4f05b653 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1397,6 +1397,7 @@ static struct devinet_sysctl_table { DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE, "accept_source_route"), DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"), + DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"), DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"), DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"), DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"), diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 3323168ee52d..82dbf711d6d0 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -252,6 +252,8 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, no_addr = in_dev->ifa_list == NULL; rpf = IN_DEV_RPFILTER(in_dev); accept_local = IN_DEV_ACCEPT_LOCAL(in_dev); + if (mark && !IN_DEV_SRC_VMARK(in_dev)) + fl.mark = 0; } rcu_read_unlock(); diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index bdb78dd180ce..1aaa8110d84b 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -368,7 +368,7 @@ static int inet_diag_bc_run(const void *bc, int len, yes = entry->sport >= op[1].no; break; case INET_DIAG_BC_S_LE: - yes = entry->dport <= op[1].no; + yes = entry->sport <= op[1].no; break; case INET_DIAG_BC_D_GE: yes = entry->dport >= op[1].no; diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 21e5e32d8c60..2b79377b468d 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -351,12 +351,13 @@ static inline u32 inet_sk_port_offset(const struct sock *sk) inet->inet_dport); } -void __inet_hash_nolisten(struct sock *sk) +int __inet_hash_nolisten(struct sock *sk, struct inet_timewait_sock *tw) { struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; struct hlist_nulls_head *list; spinlock_t *lock; struct inet_ehash_bucket *head; + int twrefcnt = 0; WARN_ON(!sk_unhashed(sk)); @@ -367,8 +368,13 @@ void __inet_hash_nolisten(struct sock *sk) spin_lock(lock); __sk_nulls_add_node_rcu(sk, list); + if (tw) { + WARN_ON(sk->sk_hash != tw->tw_hash); + twrefcnt = inet_twsk_unhash(tw); + } spin_unlock(lock); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); + return twrefcnt; } EXPORT_SYMBOL_GPL(__inet_hash_nolisten); @@ -378,7 +384,7 @@ static void __inet_hash(struct sock *sk) struct inet_listen_hashbucket *ilb; if (sk->sk_state != TCP_LISTEN) { - __inet_hash_nolisten(sk); + __inet_hash_nolisten(sk, NULL); return; } @@ -427,7 +433,7 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row, struct sock *sk, u32 port_offset, int (*check_established)(struct inet_timewait_death_row *, struct sock *, __u16, struct inet_timewait_sock **), - void (*hash)(struct sock *sk)) + int (*hash)(struct sock *sk, struct inet_timewait_sock *twp)) { struct inet_hashinfo *hinfo = death_row->hashinfo; const unsigned short snum = inet_sk(sk)->inet_num; @@ -435,6 +441,7 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row, struct inet_bind_bucket *tb; int ret; struct net *net = sock_net(sk); + int twrefcnt = 1; if (!snum) { int i, remaining, low, high, port; @@ -493,13 +500,18 @@ ok: inet_bind_hash(sk, tb, port); if (sk_unhashed(sk)) { inet_sk(sk)->inet_sport = htons(port); - hash(sk); + twrefcnt += hash(sk, tw); } + if (tw) + twrefcnt += inet_twsk_bind_unhash(tw, hinfo); spin_unlock(&head->lock); if (tw) { inet_twsk_deschedule(tw, death_row); - inet_twsk_put(tw); + while (twrefcnt) { + twrefcnt--; + inet_twsk_put(tw); + } } ret = 0; @@ -510,7 +522,7 @@ ok: tb = inet_csk(sk)->icsk_bind_hash; spin_lock_bh(&head->lock); if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) { - hash(sk); + hash(sk, NULL); spin_unlock_bh(&head->lock); return 0; } else { diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index 0fdf45e4c90c..cc94cc2d8b2d 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -15,9 +15,13 @@ #include <net/ip.h> -/* - * unhash a timewait socket from established hash - * lock must be hold by caller +/** + * inet_twsk_unhash - unhash a timewait socket from established hash + * @tw: timewait socket + * + * unhash a timewait socket from established hash, if hashed. + * ehash lock must be held by caller. + * Returns 1 if caller should call inet_twsk_put() after lock release. */ int inet_twsk_unhash(struct inet_timewait_sock *tw) { @@ -26,6 +30,37 @@ int inet_twsk_unhash(struct inet_timewait_sock *tw) hlist_nulls_del_rcu(&tw->tw_node); sk_nulls_node_init(&tw->tw_node); + /* + * We cannot call inet_twsk_put() ourself under lock, + * caller must call it for us. + */ + return 1; +} + +/** + * inet_twsk_bind_unhash - unhash a timewait socket from bind hash + * @tw: timewait socket + * @hashinfo: hashinfo pointer + * + * unhash a timewait socket from bind hash, if hashed. + * bind hash lock must be held by caller. + * Returns 1 if caller should call inet_twsk_put() after lock release. + */ +int inet_twsk_bind_unhash(struct inet_timewait_sock *tw, + struct inet_hashinfo *hashinfo) +{ + struct inet_bind_bucket *tb = tw->tw_tb; + + if (!tb) + return 0; + + __hlist_del(&tw->tw_bind_node); + tw->tw_tb = NULL; + inet_bind_bucket_destroy(hashinfo->bind_bucket_cachep, tb); + /* + * We cannot call inet_twsk_put() ourself under lock, + * caller must call it for us. + */ return 1; } @@ -34,7 +69,6 @@ static void __inet_twsk_kill(struct inet_timewait_sock *tw, struct inet_hashinfo *hashinfo) { struct inet_bind_hashbucket *bhead; - struct inet_bind_bucket *tb; int refcnt; /* Unlink from established hashes. */ spinlock_t *lock = inet_ehash_lockp(hashinfo, tw->tw_hash); @@ -46,15 +80,11 @@ static void __inet_twsk_kill(struct inet_timewait_sock *tw, /* Disassociate with bind bucket. */ bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), tw->tw_num, hashinfo->bhash_size)]; + spin_lock(&bhead->lock); - tb = tw->tw_tb; - if (tb) { - __hlist_del(&tw->tw_bind_node); - tw->tw_tb = NULL; - inet_bind_bucket_destroy(hashinfo->bind_bucket_cachep, tb); - refcnt++; - } + refcnt += inet_twsk_bind_unhash(tw, hashinfo); spin_unlock(&bhead->lock); + #ifdef SOCK_REFCNT_DEBUG if (atomic_read(&tw->tw_refcnt) != 1) { printk(KERN_DEBUG "%s timewait_sock %p refcnt=%d\n", @@ -126,7 +156,7 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, /* * Notes : - * - We initially set tw_refcnt to 0 in inet_twsk_alloc() + * - We initially set tw_refcnt to 0 in inet_twsk_alloc() * - We add one reference for the bhash link * - We add one reference for the ehash link * - We want this refcnt update done before allowing other @@ -136,7 +166,6 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, spin_unlock(lock); } - EXPORT_SYMBOL_GPL(__inet_twsk_hashdance); struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, const int state) @@ -177,7 +206,6 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, const int stat return tw; } - EXPORT_SYMBOL_GPL(inet_twsk_alloc); /* Returns non-zero if quota exceeded. */ @@ -256,7 +284,6 @@ void inet_twdr_hangman(unsigned long data) out: spin_unlock(&twdr->death_lock); } - EXPORT_SYMBOL_GPL(inet_twdr_hangman); void inet_twdr_twkill_work(struct work_struct *work) @@ -287,7 +314,6 @@ void inet_twdr_twkill_work(struct work_struct *work) spin_unlock_bh(&twdr->death_lock); } } - EXPORT_SYMBOL_GPL(inet_twdr_twkill_work); /* These are always called from BH context. See callers in @@ -307,7 +333,6 @@ void inet_twsk_deschedule(struct inet_timewait_sock *tw, spin_unlock(&twdr->death_lock); __inet_twsk_kill(tw, twdr->hashinfo); } - EXPORT_SYMBOL(inet_twsk_deschedule); void inet_twsk_schedule(struct inet_timewait_sock *tw, @@ -388,7 +413,6 @@ void inet_twsk_schedule(struct inet_timewait_sock *tw, mod_timer(&twdr->tw_timer, jiffies + twdr->period); spin_unlock(&twdr->death_lock); } - EXPORT_SYMBOL_GPL(inet_twsk_schedule); void inet_twdr_twcal_tick(unsigned long data) @@ -449,7 +473,6 @@ out: #endif spin_unlock(&twdr->death_lock); } - EXPORT_SYMBOL_GPL(inet_twdr_twcal_tick); void inet_twsk_purge(struct inet_hashinfo *hashinfo, diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index e34013a78ef4..3451799e3dbf 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -254,7 +254,7 @@ int ip_mc_output(struct sk_buff *skb) */ if (rt->rt_flags&RTCF_MULTICAST) { - if ((!sk || inet_sk(sk)->mc_loop) + if (sk_mc_loop(sk) #ifdef CONFIG_IP_MROUTE /* Small optimization: do not loopback not local frames, which returned after forwarding; they will be dropped diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index 4e08b7f2331c..10a6a604bf32 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -1446,7 +1446,7 @@ late_initcall(ip_auto_config); /* * Decode any IP configuration options in the "ip=" or "nfsaddrs=" kernel - * command line parameter. See Documentation/filesystems/nfsroot.txt. + * command line parameter. See Documentation/filesystems/nfs/nfsroot.txt. */ static int __init ic_proto_name(char *name) { diff --git a/net/ipv4/route.c b/net/ipv4/route.c index e446496f564f..d62b05d33384 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -586,7 +586,9 @@ static void __net_exit ip_rt_do_proc_exit(struct net *net) { remove_proc_entry("rt_cache", net->proc_net_stat); remove_proc_entry("rt_cache", net->proc_net); +#ifdef CONFIG_NET_CLS_ROUTE remove_proc_entry("rt_acct", net->proc_net); +#endif } static struct pernet_operations ip_rt_proc_ops __net_initdata = { diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 26399ad2a289..66fd80ef2473 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -277,6 +277,13 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESRECV); + /* check for timestamp cookie support */ + memset(&tcp_opt, 0, sizeof(tcp_opt)); + tcp_parse_options(skb, &tcp_opt, &hash_location, 0); + + if (tcp_opt.saw_tstamp) + cookie_check_timestamp(&tcp_opt); + ret = NULL; req = inet_reqsk_alloc(&tcp_request_sock_ops); /* for safety */ if (!req) @@ -292,6 +299,12 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, ireq->loc_addr = ip_hdr(skb)->daddr; ireq->rmt_addr = ip_hdr(skb)->saddr; ireq->ecn_ok = 0; + ireq->snd_wscale = tcp_opt.snd_wscale; + ireq->rcv_wscale = tcp_opt.rcv_wscale; + ireq->sack_ok = tcp_opt.sack_ok; + ireq->wscale_ok = tcp_opt.wscale_ok; + ireq->tstamp_ok = tcp_opt.saw_tstamp; + req->ts_recent = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0; /* We throwed the options of the initial SYN away, so we hope * the ACK carries the same options again (see RFC1122 4.2.3.8) @@ -340,20 +353,6 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, } } - /* check for timestamp cookie support */ - memset(&tcp_opt, 0, sizeof(tcp_opt)); - tcp_parse_options(skb, &tcp_opt, &hash_location, 0, &rt->u.dst); - - if (tcp_opt.saw_tstamp) - cookie_check_timestamp(&tcp_opt); - - ireq->snd_wscale = tcp_opt.snd_wscale; - ireq->rcv_wscale = tcp_opt.rcv_wscale; - ireq->sack_ok = tcp_opt.sack_ok; - ireq->wscale_ok = tcp_opt.wscale_ok; - ireq->tstamp_ok = tcp_opt.saw_tstamp; - req->ts_recent = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0; - /* Try to redo what tcp_v4_send_synack did. */ req->window_clamp = tp->window_clamp ? :dst_metric(&rt->u.dst, RTAX_WINDOW); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index c8666b70cde0..b0a26bb25e2e 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2540,11 +2540,6 @@ static int do_tcp_getsockopt(struct sock *sk, int level, ctd.tcpct_cookie_desired = cvp->cookie_desired; ctd.tcpct_s_data_desired = cvp->s_data_desired; - /* Cookie(s) saved, return as nonce */ - if (sizeof(ctd.tcpct_value) < cvp->cookie_pair_size) { - /* impossible? */ - return -EINVAL; - } memcpy(&ctd.tcpct_value[0], &cvp->cookie_pair[0], cvp->cookie_pair_size); ctd.tcpct_used = cvp->cookie_pair_size; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 57ae96a04220..28e029632493 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2717,6 +2717,35 @@ static void tcp_try_undo_dsack(struct sock *sk) } } +/* We can clear retrans_stamp when there are no retransmissions in the + * window. It would seem that it is trivially available for us in + * tp->retrans_out, however, that kind of assumptions doesn't consider + * what will happen if errors occur when sending retransmission for the + * second time. ...It could the that such segment has only + * TCPCB_EVER_RETRANS set at the present time. It seems that checking + * the head skb is enough except for some reneging corner cases that + * are not worth the effort. + * + * Main reason for all this complexity is the fact that connection dying + * time now depends on the validity of the retrans_stamp, in particular, + * that successive retransmissions of a segment must not advance + * retrans_stamp under any conditions. + */ +static int tcp_any_retrans_done(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct sk_buff *skb; + + if (tp->retrans_out) + return 1; + + skb = tcp_write_queue_head(sk); + if (unlikely(skb && TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS)) + return 1; + + return 0; +} + /* Undo during fast recovery after partial ACK. */ static int tcp_try_undo_partial(struct sock *sk, int acked) @@ -2729,7 +2758,7 @@ static int tcp_try_undo_partial(struct sock *sk, int acked) /* Plain luck! Hole if filled with delayed * packet, rather than with a retransmit. */ - if (tp->retrans_out == 0) + if (!tcp_any_retrans_done(sk)) tp->retrans_stamp = 0; tcp_update_reordering(sk, tcp_fackets_out(tp) + acked, 1); @@ -2788,7 +2817,7 @@ static void tcp_try_keep_open(struct sock *sk) struct tcp_sock *tp = tcp_sk(sk); int state = TCP_CA_Open; - if (tcp_left_out(tp) || tp->retrans_out || tp->undo_marker) + if (tcp_left_out(tp) || tcp_any_retrans_done(sk) || tp->undo_marker) state = TCP_CA_Disorder; if (inet_csk(sk)->icsk_ca_state != state) { @@ -2803,7 +2832,7 @@ static void tcp_try_to_open(struct sock *sk, int flag) tcp_verify_left_out(tp); - if (!tp->frto_counter && tp->retrans_out == 0) + if (!tp->frto_counter && !tcp_any_retrans_done(sk)) tp->retrans_stamp = 0; if (flag & FLAG_ECE) @@ -3698,7 +3727,7 @@ old_ack: * the fast version below fails. */ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx, - u8 **hvpp, int estab, struct dst_entry *dst) + u8 **hvpp, int estab) { unsigned char *ptr; struct tcphdr *th = tcp_hdr(skb); @@ -3737,8 +3766,7 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx, break; case TCPOPT_WINDOW: if (opsize == TCPOLEN_WINDOW && th->syn && - !estab && sysctl_tcp_window_scaling && - !dst_feature(dst, RTAX_FEATURE_NO_WSCALE)) { + !estab && sysctl_tcp_window_scaling) { __u8 snd_wscale = *(__u8 *)ptr; opt_rx->wscale_ok = 1; if (snd_wscale > 14) { @@ -3754,8 +3782,7 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx, case TCPOPT_TIMESTAMP: if ((opsize == TCPOLEN_TIMESTAMP) && ((estab && opt_rx->tstamp_ok) || - (!estab && sysctl_tcp_timestamps && - !dst_feature(dst, RTAX_FEATURE_NO_TSTAMP)))) { + (!estab && sysctl_tcp_timestamps))) { opt_rx->saw_tstamp = 1; opt_rx->rcv_tsval = get_unaligned_be32(ptr); opt_rx->rcv_tsecr = get_unaligned_be32(ptr + 4); @@ -3763,8 +3790,7 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx, break; case TCPOPT_SACK_PERM: if (opsize == TCPOLEN_SACK_PERM && th->syn && - !estab && sysctl_tcp_sack && - !dst_feature(dst, RTAX_FEATURE_NO_SACK)) { + !estab && sysctl_tcp_sack) { opt_rx->sack_ok = 1; tcp_sack_reset(opt_rx); } @@ -3849,7 +3875,7 @@ static int tcp_fast_parse_options(struct sk_buff *skb, struct tcphdr *th, if (tcp_parse_aligned_timestamp(tp, th)) return 1; } - tcp_parse_options(skb, &tp->rx_opt, hvpp, 1, NULL); + tcp_parse_options(skb, &tp->rx_opt, hvpp, 1); return 1; } @@ -4104,10 +4130,8 @@ static inline int tcp_sack_extend(struct tcp_sack_block *sp, u32 seq, static void tcp_dsack_set(struct sock *sk, u32 seq, u32 end_seq) { struct tcp_sock *tp = tcp_sk(sk); - struct dst_entry *dst = __sk_dst_get(sk); - if (tcp_is_sack(tp) && sysctl_tcp_dsack && - !dst_feature(dst, RTAX_FEATURE_NO_DSACK)) { + if (tcp_is_sack(tp) && sysctl_tcp_dsack) { int mib_idx; if (before(seq, tp->rcv_nxt)) @@ -4136,15 +4160,13 @@ static void tcp_dsack_extend(struct sock *sk, u32 seq, u32 end_seq) static void tcp_send_dupack(struct sock *sk, struct sk_buff *skb) { struct tcp_sock *tp = tcp_sk(sk); - struct dst_entry *dst = __sk_dst_get(sk); if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq && before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) { NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKLOST); tcp_enter_quickack_mode(sk); - if (tcp_is_sack(tp) && sysctl_tcp_dsack && - !dst_feature(dst, RTAX_FEATURE_NO_DSACK)) { + if (tcp_is_sack(tp) && sysctl_tcp_dsack) { u32 end_seq = TCP_SKB_CB(skb)->end_seq; if (after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) @@ -5399,11 +5421,10 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, u8 *hash_location; struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); - struct dst_entry *dst = __sk_dst_get(sk); struct tcp_cookie_values *cvp = tp->cookie_values; int saved_clamp = tp->rx_opt.mss_clamp; - tcp_parse_options(skb, &tp->rx_opt, &hash_location, 0, dst); + tcp_parse_options(skb, &tp->rx_opt, &hash_location, 0); if (th->ack) { /* rfc793: diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 29002ab26e0d..65b8ebfd078a 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1262,20 +1262,10 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) tcp_rsk(req)->af_specific = &tcp_request_sock_ipv4_ops; #endif - ireq = inet_rsk(req); - ireq->loc_addr = daddr; - ireq->rmt_addr = saddr; - ireq->no_srccheck = inet_sk(sk)->transparent; - ireq->opt = tcp_v4_save_options(sk, skb); - - dst = inet_csk_route_req(sk, req); - if(!dst) - goto drop_and_free; - tcp_clear_options(&tmp_opt); tmp_opt.mss_clamp = TCP_MSS_DEFAULT; tmp_opt.user_mss = tp->rx_opt.user_mss; - tcp_parse_options(skb, &tmp_opt, &hash_location, 0, dst); + tcp_parse_options(skb, &tmp_opt, &hash_location, 0); if (tmp_opt.cookie_plus > 0 && tmp_opt.saw_tstamp && @@ -1319,8 +1309,14 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) tmp_opt.tstamp_ok = tmp_opt.saw_tstamp; tcp_openreq_init(req, &tmp_opt, skb); + ireq = inet_rsk(req); + ireq->loc_addr = daddr; + ireq->rmt_addr = saddr; + ireq->no_srccheck = inet_sk(sk)->transparent; + ireq->opt = tcp_v4_save_options(sk, skb); + if (security_inet_conn_request(sk, skb, req)) - goto drop_and_release; + goto drop_and_free; if (!want_cookie) TCP_ECN_create_request(req, tcp_hdr(skb)); @@ -1345,6 +1341,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) */ if (tmp_opt.saw_tstamp && tcp_death_row.sysctl_tw_recycle && + (dst = inet_csk_route_req(sk, req)) != NULL && (peer = rt_get_peer((struct rtable *)dst)) != NULL && peer->v4daddr == saddr) { if ((u32)get_seconds() - peer->tcp_ts_stamp < TCP_PAWS_MSL && @@ -1464,7 +1461,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, } #endif - __inet_hash_nolisten(newsk); + __inet_hash_nolisten(newsk, NULL); __inet_inherit_port(sk, newsk); return newsk; diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 87accec8d097..f206ee5dda80 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -95,9 +95,9 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw); int paws_reject = 0; + tmp_opt.saw_tstamp = 0; if (th->doff > (sizeof(*th) >> 2) && tcptw->tw_ts_recent_stamp) { - tmp_opt.tstamp_ok = 1; - tcp_parse_options(skb, &tmp_opt, &hash_location, 1, NULL); + tcp_parse_options(skb, &tmp_opt, &hash_location, 0); if (tmp_opt.saw_tstamp) { tmp_opt.ts_recent = tcptw->tw_ts_recent; @@ -526,9 +526,9 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, __be32 flg = tcp_flag_word(th) & (TCP_FLAG_RST|TCP_FLAG_SYN|TCP_FLAG_ACK); int paws_reject = 0; - if ((th->doff > (sizeof(*th) >> 2)) && (req->ts_recent)) { - tmp_opt.tstamp_ok = 1; - tcp_parse_options(skb, &tmp_opt, &hash_location, 1, NULL); + tmp_opt.saw_tstamp = 0; + if (th->doff > (sizeof(struct tcphdr)>>2)) { + tcp_parse_options(skb, &tmp_opt, &hash_location, 0); if (tmp_opt.saw_tstamp) { tmp_opt.ts_recent = req->ts_recent; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 93316a96d820..383ce237640f 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -553,7 +553,6 @@ static unsigned tcp_syn_options(struct sock *sk, struct sk_buff *skb, struct tcp_md5sig_key **md5) { struct tcp_sock *tp = tcp_sk(sk); struct tcp_cookie_values *cvp = tp->cookie_values; - struct dst_entry *dst = __sk_dst_get(sk); unsigned remaining = MAX_TCP_OPTION_SPACE; u8 cookie_size = (!tp->rx_opt.cookie_out_never && cvp != NULL) ? tcp_cookie_size_check(cvp->cookie_desired) : @@ -581,22 +580,18 @@ static unsigned tcp_syn_options(struct sock *sk, struct sk_buff *skb, opts->mss = tcp_advertise_mss(sk); remaining -= TCPOLEN_MSS_ALIGNED; - if (likely(sysctl_tcp_timestamps && - !dst_feature(dst, RTAX_FEATURE_NO_TSTAMP) && - *md5 == NULL)) { + if (likely(sysctl_tcp_timestamps && *md5 == NULL)) { opts->options |= OPTION_TS; opts->tsval = TCP_SKB_CB(skb)->when; opts->tsecr = tp->rx_opt.ts_recent; remaining -= TCPOLEN_TSTAMP_ALIGNED; } - if (likely(sysctl_tcp_window_scaling && - !dst_feature(dst, RTAX_FEATURE_NO_WSCALE))) { + if (likely(sysctl_tcp_window_scaling)) { opts->ws = tp->rx_opt.rcv_wscale; opts->options |= OPTION_WSCALE; remaining -= TCPOLEN_WSCALE_ALIGNED; } - if (likely(sysctl_tcp_sack && - !dst_feature(dst, RTAX_FEATURE_NO_SACK))) { + if (likely(sysctl_tcp_sack)) { opts->options |= OPTION_SACK_ADVERTISE; if (unlikely(!(OPTION_TS & opts->options))) remaining -= TCPOLEN_SACKPERM_ALIGNED; @@ -2527,9 +2522,7 @@ static void tcp_connect_init(struct sock *sk) * See tcp_input.c:tcp_rcv_state_process case TCP_SYN_SENT. */ tp->tcp_header_len = sizeof(struct tcphdr) + - (sysctl_tcp_timestamps && - (!dst_feature(dst, RTAX_FEATURE_NO_TSTAMP) ? - TCPOLEN_TSTAMP_ALIGNED : 0)); + (sysctl_tcp_timestamps ? TCPOLEN_TSTAMP_ALIGNED : 0); #ifdef CONFIG_TCP_MD5SIG if (tp->af_specific->md5_lookup(sk, sk) != NULL) @@ -2555,8 +2548,7 @@ static void tcp_connect_init(struct sock *sk) tp->advmss - (tp->rx_opt.ts_recent_stamp ? tp->tcp_header_len - sizeof(struct tcphdr) : 0), &tp->rcv_wnd, &tp->window_clamp, - (sysctl_tcp_window_scaling && - !dst_feature(dst, RTAX_FEATURE_NO_WSCALE)), + sysctl_tcp_window_scaling, &rcv_wscale); tp->rx_opt.rcv_wscale = rcv_wscale; diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c index bb110c5ce1d2..9bc805df95d2 100644 --- a/net/ipv4/tcp_probe.c +++ b/net/ipv4/tcp_probe.c @@ -39,9 +39,9 @@ static int port __read_mostly = 0; MODULE_PARM_DESC(port, "Port to match (0=all)"); module_param(port, int, 0); -static int bufsize __read_mostly = 4096; +static unsigned int bufsize __read_mostly = 4096; MODULE_PARM_DESC(bufsize, "Log buffer size in packets (4096)"); -module_param(bufsize, int, 0); +module_param(bufsize, uint, 0); static int full __read_mostly; MODULE_PARM_DESC(full, "Full log (1=every ack packet received, 0=only cwnd changes)"); @@ -75,12 +75,12 @@ static struct { static inline int tcp_probe_used(void) { - return (tcp_probe.head - tcp_probe.tail) % bufsize; + return (tcp_probe.head - tcp_probe.tail) & (bufsize - 1); } static inline int tcp_probe_avail(void) { - return bufsize - tcp_probe_used(); + return bufsize - tcp_probe_used() - 1; } /* @@ -116,7 +116,7 @@ static int jtcp_rcv_established(struct sock *sk, struct sk_buff *skb, p->ssthresh = tcp_current_ssthresh(sk); p->srtt = tp->srtt >> 3; - tcp_probe.head = (tcp_probe.head + 1) % bufsize; + tcp_probe.head = (tcp_probe.head + 1) & (bufsize - 1); } tcp_probe.lastcwnd = tp->snd_cwnd; spin_unlock(&tcp_probe.lock); @@ -149,7 +149,7 @@ static int tcpprobe_open(struct inode * inode, struct file * file) static int tcpprobe_sprint(char *tbuf, int n) { const struct tcp_log *p - = tcp_probe.log + tcp_probe.tail % bufsize; + = tcp_probe.log + tcp_probe.tail; struct timespec tv = ktime_to_timespec(ktime_sub(p->tstamp, tcp_probe.start)); @@ -192,7 +192,7 @@ static ssize_t tcpprobe_read(struct file *file, char __user *buf, width = tcpprobe_sprint(tbuf, sizeof(tbuf)); if (cnt + width < len) - tcp_probe.tail = (tcp_probe.tail + 1) % bufsize; + tcp_probe.tail = (tcp_probe.tail + 1) & (bufsize - 1); spin_unlock_bh(&tcp_probe.lock); @@ -222,9 +222,10 @@ static __init int tcpprobe_init(void) init_waitqueue_head(&tcp_probe.wait); spin_lock_init(&tcp_probe.lock); - if (bufsize < 0) + if (bufsize == 0) return -EINVAL; + bufsize = roundup_pow_of_two(bufsize); tcp_probe.log = kcalloc(bufsize, sizeof(struct tcp_log), GFP_KERNEL); if (!tcp_probe.log) goto err0; @@ -236,7 +237,7 @@ static __init int tcpprobe_init(void) if (ret) goto err1; - pr_info("TCP probe registered (port=%d)\n", port); + pr_info("TCP probe registered (port=%d) bufsize=%u\n", port, bufsize); return 0; err1: proc_net_remove(&init_net, procname); diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 8353a538cd4c..8816a20c2597 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -132,6 +132,35 @@ static void tcp_mtu_probing(struct inet_connection_sock *icsk, struct sock *sk) } } +/* This function calculates a "timeout" which is equivalent to the timeout of a + * TCP connection after "boundary" unsucessful, exponentially backed-off + * retransmissions with an initial RTO of TCP_RTO_MIN. + */ +static bool retransmits_timed_out(struct sock *sk, + unsigned int boundary) +{ + unsigned int timeout, linear_backoff_thresh; + unsigned int start_ts; + + if (!inet_csk(sk)->icsk_retransmits) + return false; + + if (unlikely(!tcp_sk(sk)->retrans_stamp)) + start_ts = TCP_SKB_CB(tcp_write_queue_head(sk))->when; + else + start_ts = tcp_sk(sk)->retrans_stamp; + + linear_backoff_thresh = ilog2(TCP_RTO_MAX/TCP_RTO_MIN); + + if (boundary <= linear_backoff_thresh) + timeout = ((2 << boundary) - 1) * TCP_RTO_MIN; + else + timeout = ((2 << linear_backoff_thresh) - 1) * TCP_RTO_MIN + + (boundary - linear_backoff_thresh) * TCP_RTO_MAX; + + return (tcp_time_stamp - start_ts) >= timeout; +} + /* A write timeout has occurred. Process the after effects. */ static int tcp_write_timeout(struct sock *sk) { diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 1f9534846ca9..f0126fdd7e04 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -216,9 +216,8 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum, * force rand to be an odd multiple of UDP_HTABLE_SIZE */ rand = (rand | 1) * (udptable->mask + 1); - for (last = first + udptable->mask + 1; - first != last; - first++) { + last = first + udptable->mask + 1; + do { hslot = udp_hashslot(udptable, net, first); bitmap_zero(bitmap, PORTS_PER_CHAIN); spin_lock_bh(&hslot->lock); @@ -238,7 +237,7 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum, snum += rand; } while (snum != first); spin_unlock_bh(&hslot->lock); - } + } while (++first != last); goto fail; } else { hslot = udp_hashslot(udptable, net, snum); diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 8c08a28d8f83..67107d63c1cd 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -15,7 +15,6 @@ #include <net/xfrm.h> #include <net/ip.h> -static struct dst_ops xfrm4_dst_ops; static struct xfrm_policy_afinfo xfrm4_policy_afinfo; static struct dst_entry *xfrm4_dst_lookup(struct net *net, int tos, @@ -190,8 +189,10 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) static inline int xfrm4_garbage_collect(struct dst_ops *ops) { - xfrm4_policy_afinfo.garbage_collect(&init_net); - return (atomic_read(&xfrm4_dst_ops.entries) > xfrm4_dst_ops.gc_thresh*2); + struct net *net = container_of(ops, struct net, xfrm.xfrm4_dst_ops); + + xfrm4_policy_afinfo.garbage_collect(net); + return (atomic_read(&ops->entries) > ops->gc_thresh * 2); } static void xfrm4_update_pmtu(struct dst_entry *dst, u32 mtu) @@ -268,7 +269,7 @@ static struct xfrm_policy_afinfo xfrm4_policy_afinfo = { static struct ctl_table xfrm4_policy_table[] = { { .procname = "xfrm4_gc_thresh", - .data = &xfrm4_dst_ops.gc_thresh, + .data = &init_net.xfrm.xfrm4_dst_ops.gc_thresh, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, @@ -295,8 +296,6 @@ static void __exit xfrm4_policy_fini(void) void __init xfrm4_init(int rt_max_size) { - xfrm4_state_init(); - xfrm4_policy_init(); /* * Select a default value for the gc_thresh based on the main route * table hash size. It seems to me the worst case scenario is when @@ -308,6 +307,9 @@ void __init xfrm4_init(int rt_max_size) * and start cleaning when were 1/2 full */ xfrm4_dst_ops.gc_thresh = rt_max_size/2; + + xfrm4_state_init(); + xfrm4_policy_init(); #ifdef CONFIG_SYSCTL sysctl_hdr = register_net_sysctl_table(&init_net, net_ipv4_ctl_path, xfrm4_policy_table); |