summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/ax25/af_ax25.c33
-rw-r--r--net/bluetooth/hci_core.c3
-rw-r--r--net/bluetooth/hci_sync.c1
-rw-r--r--net/bridge/br_netfilter_hooks.c21
-rw-r--r--net/can/bcm.c18
-rw-r--r--net/core/dev.c25
-rw-r--r--net/core/filter.c34
-rw-r--r--net/core/net-sysfs.c1
-rw-r--r--net/core/skmsg.c5
-rw-r--r--net/dccp/proto.c33
-rw-r--r--net/ethtool/eeprom.c2
-rw-r--r--net/ipv4/inet_connection_sock.c247
-rw-r--r--net/ipv4/inet_hashtables.c193
-rw-r--r--net/ipv4/ip_gre.c15
-rw-r--r--net/ipv4/ip_tunnel_core.c2
-rw-r--r--net/ipv4/ping.c10
-rw-r--r--net/ipv4/tcp.c14
-rw-r--r--net/ipv4/tcp_bpf.c3
-rw-r--r--net/ipv4/tcp_ipv4.c6
-rw-r--r--net/ipv6/addrconf.c8
-rw-r--r--net/ipv6/ip6_gre.c15
-rw-r--r--net/ipv6/route.c9
-rw-r--r--net/ipv6/seg6_hmac.c1
-rw-r--r--net/ipv6/sit.c8
-rw-r--r--net/mptcp/options.c10
-rw-r--r--net/mptcp/pm.c10
-rw-r--r--net/mptcp/pm_netlink.c46
-rw-r--r--net/mptcp/pm_userspace.c51
-rw-r--r--net/mptcp/protocol.c93
-rw-r--r--net/mptcp/protocol.h33
-rw-r--r--net/mptcp/subflow.c127
-rw-r--r--net/ncsi/ncsi-manage.c3
-rw-r--r--net/netfilter/nf_dup_netdev.c25
-rw-r--r--net/netfilter/nf_tables_api.c9
-rw-r--r--net/netfilter/nf_tables_core.c24
-rw-r--r--net/netfilter/nf_tables_trace.c44
-rw-r--r--net/netfilter/nfnetlink_cttimeout.c2
-rw-r--r--net/netfilter/nft_meta.c13
-rw-r--r--net/netfilter/nft_numgen.c12
-rw-r--r--net/netfilter/nft_set_hash.c2
-rw-r--r--net/netfilter/nft_set_pipapo.c48
-rw-r--r--net/openvswitch/flow.c2
-rw-r--r--net/rose/rose_route.c4
-rw-r--r--net/rose/rose_timer.c34
-rw-r--r--net/sched/act_api.c22
-rw-r--r--net/sched/act_police.c2
-rw-r--r--net/sched/sch_netem.c4
-rw-r--r--net/socket.c16
-rw-r--r--net/sunrpc/clnt.c1
-rw-r--r--net/sunrpc/xdr.c2
-rw-r--r--net/tipc/core.c3
-rw-r--r--net/tipc/node.c41
-rw-r--r--net/tipc/socket.c1
-rw-r--r--net/tls/tls_main.c2
-rw-r--r--net/tls/tls_sw.c8
-rw-r--r--net/xdp/xsk.c16
-rw-r--r--net/xdp/xsk_buff_pool.c1
57 files changed, 713 insertions, 705 deletions
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index 95393bb2760b..4c7030ed8d33 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -1661,9 +1661,12 @@ static int ax25_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
int flags)
{
struct sock *sk = sock->sk;
- struct sk_buff *skb;
+ struct sk_buff *skb, *last;
+ struct sk_buff_head *sk_queue;
int copied;
int err = 0;
+ int off = 0;
+ long timeo;
lock_sock(sk);
/*
@@ -1675,10 +1678,29 @@ static int ax25_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
goto out;
}
- /* Now we can treat all alike */
- skb = skb_recv_datagram(sk, flags, &err);
- if (skb == NULL)
- goto out;
+ /* We need support for non-blocking reads. */
+ sk_queue = &sk->sk_receive_queue;
+ skb = __skb_try_recv_datagram(sk, sk_queue, flags, &off, &err, &last);
+ /* If no packet is available, release_sock(sk) and try again. */
+ if (!skb) {
+ if (err != -EAGAIN)
+ goto out;
+ release_sock(sk);
+ timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
+ while (timeo && !__skb_wait_for_more_packets(sk, sk_queue, &err,
+ &timeo, last)) {
+ skb = __skb_try_recv_datagram(sk, sk_queue, flags, &off,
+ &err, &last);
+ if (skb)
+ break;
+
+ if (err != -EAGAIN)
+ goto done;
+ }
+ if (!skb)
+ goto done;
+ lock_sock(sk);
+ }
if (!sk_to_ax25(sk)->pidincl)
skb_pull(skb, 1); /* Remove PID */
@@ -1725,6 +1747,7 @@ static int ax25_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
out:
release_sock(sk);
+done:
return err;
}
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 59a5c1341c26..a0f99baafd35 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -571,6 +571,7 @@ int hci_dev_close(__u16 dev)
goto done;
}
+ cancel_work_sync(&hdev->power_on);
if (hci_dev_test_and_clear_flag(hdev, HCI_AUTO_OFF))
cancel_delayed_work(&hdev->power_off);
@@ -2675,6 +2676,8 @@ void hci_unregister_dev(struct hci_dev *hdev)
list_del(&hdev->list);
write_unlock(&hci_dev_list_lock);
+ cancel_work_sync(&hdev->power_on);
+
hci_cmd_sync_clear(hdev);
if (!test_bit(HCI_QUIRK_NO_SUSPEND_NOTIFIER, &hdev->quirks))
diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c
index 286d6767f017..1739e8cb3291 100644
--- a/net/bluetooth/hci_sync.c
+++ b/net/bluetooth/hci_sync.c
@@ -4088,7 +4088,6 @@ int hci_dev_close_sync(struct hci_dev *hdev)
bt_dev_dbg(hdev, "");
- cancel_work_sync(&hdev->power_on);
cancel_delayed_work(&hdev->power_off);
cancel_delayed_work(&hdev->ncmd_timer);
diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c
index 4fd882686b04..ff4779036649 100644
--- a/net/bridge/br_netfilter_hooks.c
+++ b/net/bridge/br_netfilter_hooks.c
@@ -1012,9 +1012,24 @@ int br_nf_hook_thresh(unsigned int hook, struct net *net,
return okfn(net, sk, skb);
ops = nf_hook_entries_get_hook_ops(e);
- for (i = 0; i < e->num_hook_entries &&
- ops[i]->priority <= NF_BR_PRI_BRNF; i++)
- ;
+ for (i = 0; i < e->num_hook_entries; i++) {
+ /* These hooks have already been called */
+ if (ops[i]->priority < NF_BR_PRI_BRNF)
+ continue;
+
+ /* These hooks have not been called yet, run them. */
+ if (ops[i]->priority > NF_BR_PRI_BRNF)
+ break;
+
+ /* take a closer look at NF_BR_PRI_BRNF. */
+ if (ops[i]->hook == br_nf_pre_routing) {
+ /* This hook diverted the skb to this function,
+ * hooks after this have not been run yet.
+ */
+ i++;
+ break;
+ }
+ }
nf_hook_state_init(&state, hook, NFPROTO_BRIDGE, indev, outdev,
sk, net, okfn);
diff --git a/net/can/bcm.c b/net/can/bcm.c
index 65ee1b784a30..e60161bec850 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -100,6 +100,7 @@ static inline u64 get_u64(const struct canfd_frame *cp, int offset)
struct bcm_op {
struct list_head list;
+ struct rcu_head rcu;
int ifindex;
canid_t can_id;
u32 flags;
@@ -718,10 +719,9 @@ static struct bcm_op *bcm_find_op(struct list_head *ops,
return NULL;
}
-static void bcm_remove_op(struct bcm_op *op)
+static void bcm_free_op_rcu(struct rcu_head *rcu_head)
{
- hrtimer_cancel(&op->timer);
- hrtimer_cancel(&op->thrtimer);
+ struct bcm_op *op = container_of(rcu_head, struct bcm_op, rcu);
if ((op->frames) && (op->frames != &op->sframe))
kfree(op->frames);
@@ -732,6 +732,14 @@ static void bcm_remove_op(struct bcm_op *op)
kfree(op);
}
+static void bcm_remove_op(struct bcm_op *op)
+{
+ hrtimer_cancel(&op->timer);
+ hrtimer_cancel(&op->thrtimer);
+
+ call_rcu(&op->rcu, bcm_free_op_rcu);
+}
+
static void bcm_rx_unreg(struct net_device *dev, struct bcm_op *op)
{
if (op->rx_reg_dev == dev) {
@@ -757,6 +765,9 @@ static int bcm_delete_rx_op(struct list_head *ops, struct bcm_msg_head *mh,
if ((op->can_id == mh->can_id) && (op->ifindex == ifindex) &&
(op->flags & CAN_FD_FRAME) == (mh->flags & CAN_FD_FRAME)) {
+ /* disable automatic timer on frame reception */
+ op->flags |= RX_NO_AUTOTIMER;
+
/*
* Don't care if we're bound or not (due to netdev
* problems) can_rx_unregister() is always a save
@@ -785,7 +796,6 @@ static int bcm_delete_rx_op(struct list_head *ops, struct bcm_msg_head *mh,
bcm_rx_handler, op);
list_del(&op->list);
- synchronize_rcu();
bcm_remove_op(op);
return 1; /* done */
}
diff --git a/net/core/dev.c b/net/core/dev.c
index 08ce317fcec8..8e6f22961206 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -397,16 +397,18 @@ static void list_netdevice(struct net_device *dev)
/* Device list removal
* caller must respect a RCU grace period before freeing/reusing dev
*/
-static void unlist_netdevice(struct net_device *dev)
+static void unlist_netdevice(struct net_device *dev, bool lock)
{
ASSERT_RTNL();
/* Unlink dev from the device chain */
- write_lock(&dev_base_lock);
+ if (lock)
+ write_lock(&dev_base_lock);
list_del_rcu(&dev->dev_list);
netdev_name_node_del(dev->name_node);
hlist_del_rcu(&dev->index_hlist);
- write_unlock(&dev_base_lock);
+ if (lock)
+ write_unlock(&dev_base_lock);
dev_base_seq_inc(dev_net(dev));
}
@@ -10043,11 +10045,11 @@ int register_netdevice(struct net_device *dev)
goto err_uninit;
ret = netdev_register_kobject(dev);
- if (ret) {
- dev->reg_state = NETREG_UNREGISTERED;
+ write_lock(&dev_base_lock);
+ dev->reg_state = ret ? NETREG_UNREGISTERED : NETREG_REGISTERED;
+ write_unlock(&dev_base_lock);
+ if (ret)
goto err_uninit;
- }
- dev->reg_state = NETREG_REGISTERED;
__netdev_update_features(dev);
@@ -10329,7 +10331,9 @@ void netdev_run_todo(void)
continue;
}
+ write_lock(&dev_base_lock);
dev->reg_state = NETREG_UNREGISTERED;
+ write_unlock(&dev_base_lock);
linkwatch_forget_dev(dev);
}
@@ -10810,9 +10814,10 @@ void unregister_netdevice_many(struct list_head *head)
list_for_each_entry(dev, head, unreg_list) {
/* And unlink it from device chain. */
- unlist_netdevice(dev);
-
+ write_lock(&dev_base_lock);
+ unlist_netdevice(dev, false);
dev->reg_state = NETREG_UNREGISTERING;
+ write_unlock(&dev_base_lock);
}
flush_all_backlogs();
@@ -10959,7 +10964,7 @@ int __dev_change_net_namespace(struct net_device *dev, struct net *net,
dev_close(dev);
/* And unlink it from device chain */
- unlist_netdevice(dev);
+ unlist_netdevice(dev, true);
synchronize_net();
diff --git a/net/core/filter.c b/net/core/filter.c
index 5af58eb48587..5d16d66727fc 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -6516,10 +6516,21 @@ __bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
ifindex, proto, netns_id, flags);
if (sk) {
- sk = sk_to_full_sk(sk);
- if (!sk_fullsock(sk)) {
+ struct sock *sk2 = sk_to_full_sk(sk);
+
+ /* sk_to_full_sk() may return (sk)->rsk_listener, so make sure the original sk
+ * sock refcnt is decremented to prevent a request_sock leak.
+ */
+ if (!sk_fullsock(sk2))
+ sk2 = NULL;
+ if (sk2 != sk) {
sock_gen_put(sk);
- return NULL;
+ /* Ensure there is no need to bump sk2 refcnt */
+ if (unlikely(sk2 && !sock_flag(sk2, SOCK_RCU_FREE))) {
+ WARN_ONCE(1, "Found non-RCU, unreferenced socket!");
+ return NULL;
+ }
+ sk = sk2;
}
}
@@ -6553,10 +6564,21 @@ bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
flags);
if (sk) {
- sk = sk_to_full_sk(sk);
- if (!sk_fullsock(sk)) {
+ struct sock *sk2 = sk_to_full_sk(sk);
+
+ /* sk_to_full_sk() may return (sk)->rsk_listener, so make sure the original sk
+ * sock refcnt is decremented to prevent a request_sock leak.
+ */
+ if (!sk_fullsock(sk2))
+ sk2 = NULL;
+ if (sk2 != sk) {
sock_gen_put(sk);
- return NULL;
+ /* Ensure there is no need to bump sk2 refcnt */
+ if (unlikely(sk2 && !sock_flag(sk2, SOCK_RCU_FREE))) {
+ WARN_ONCE(1, "Found non-RCU, unreferenced socket!");
+ return NULL;
+ }
+ sk = sk2;
}
}
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index e319e242dddf..a3642569fe53 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -33,6 +33,7 @@ static const char fmt_dec[] = "%d\n";
static const char fmt_ulong[] = "%lu\n";
static const char fmt_u64[] = "%llu\n";
+/* Caller holds RTNL or dev_base_lock */
static inline int dev_isalive(const struct net_device *dev)
{
return dev->reg_state <= NETREG_REGISTERED;
diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index 22b983ade0e7..b0fcd0200e84 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -699,6 +699,11 @@ struct sk_psock *sk_psock_init(struct sock *sk, int node)
write_lock_bh(&sk->sk_callback_lock);
+ if (sk_is_inet(sk) && inet_csk_has_ulp(sk)) {
+ psock = ERR_PTR(-EINVAL);
+ goto out;
+ }
+
if (sk->sk_user_data) {
psock = ERR_PTR(-EBUSY);
goto out;
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index 2e78458900f2..eb8e128e43e8 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -1120,12 +1120,6 @@ static int __init dccp_init(void)
SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT, NULL);
if (!dccp_hashinfo.bind_bucket_cachep)
goto out_free_hashinfo2;
- dccp_hashinfo.bind2_bucket_cachep =
- kmem_cache_create("dccp_bind2_bucket",
- sizeof(struct inet_bind2_bucket), 0,
- SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT, NULL);
- if (!dccp_hashinfo.bind2_bucket_cachep)
- goto out_free_bind_bucket_cachep;
/*
* Size and allocate the main established and bind bucket
@@ -1156,7 +1150,7 @@ static int __init dccp_init(void)
if (!dccp_hashinfo.ehash) {
DCCP_CRIT("Failed to allocate DCCP established hash table");
- goto out_free_bind2_bucket_cachep;
+ goto out_free_bind_bucket_cachep;
}
for (i = 0; i <= dccp_hashinfo.ehash_mask; i++)
@@ -1182,23 +1176,14 @@ static int __init dccp_init(void)
goto out_free_dccp_locks;
}
- dccp_hashinfo.bhash2 = (struct inet_bind2_hashbucket *)
- __get_free_pages(GFP_ATOMIC | __GFP_NOWARN, bhash_order);
-
- if (!dccp_hashinfo.bhash2) {
- DCCP_CRIT("Failed to allocate DCCP bind2 hash table");
- goto out_free_dccp_bhash;
- }
-
for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
spin_lock_init(&dccp_hashinfo.bhash[i].lock);
INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain);
- INIT_HLIST_HEAD(&dccp_hashinfo.bhash2[i].chain);
}
rc = dccp_mib_init();
if (rc)
- goto out_free_dccp_bhash2;
+ goto out_free_dccp_bhash;
rc = dccp_ackvec_init();
if (rc)
@@ -1222,38 +1207,30 @@ out_ackvec_exit:
dccp_ackvec_exit();
out_free_dccp_mib:
dccp_mib_exit();
-out_free_dccp_bhash2:
- free_pages((unsigned long)dccp_hashinfo.bhash2, bhash_order);
out_free_dccp_bhash:
free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
out_free_dccp_locks:
inet_ehash_locks_free(&dccp_hashinfo);
out_free_dccp_ehash:
free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
-out_free_bind2_bucket_cachep:
- kmem_cache_destroy(dccp_hashinfo.bind2_bucket_cachep);
out_free_bind_bucket_cachep:
kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
out_free_hashinfo2:
inet_hashinfo2_free_mod(&dccp_hashinfo);
out_fail:
dccp_hashinfo.bhash = NULL;
- dccp_hashinfo.bhash2 = NULL;
dccp_hashinfo.ehash = NULL;
dccp_hashinfo.bind_bucket_cachep = NULL;
- dccp_hashinfo.bind2_bucket_cachep = NULL;
return rc;
}
static void __exit dccp_fini(void)
{
- int bhash_order = get_order(dccp_hashinfo.bhash_size *
- sizeof(struct inet_bind_hashbucket));
-
ccid_cleanup_builtins();
dccp_mib_exit();
- free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
- free_pages((unsigned long)dccp_hashinfo.bhash2, bhash_order);
+ free_pages((unsigned long)dccp_hashinfo.bhash,
+ get_order(dccp_hashinfo.bhash_size *
+ sizeof(struct inet_bind_hashbucket)));
free_pages((unsigned long)dccp_hashinfo.ehash,
get_order((dccp_hashinfo.ehash_mask + 1) *
sizeof(struct inet_ehash_bucket)));
diff --git a/net/ethtool/eeprom.c b/net/ethtool/eeprom.c
index 7e6b37a54add..1c94bb8ea03f 100644
--- a/net/ethtool/eeprom.c
+++ b/net/ethtool/eeprom.c
@@ -36,7 +36,7 @@ static int fallback_set_params(struct eeprom_req_info *request,
if (request->page)
offset = request->page * ETH_MODULE_EEPROM_PAGE_LEN + offset;
- if (modinfo->type == ETH_MODULE_SFF_8079 &&
+ if (modinfo->type == ETH_MODULE_SFF_8472 &&
request->i2c_address == 0x51)
offset += ETH_MODULE_EEPROM_PAGE_LEN * 2;
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index c0b7e6c21360..53f5f956d948 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -117,32 +117,6 @@ bool inet_rcv_saddr_any(const struct sock *sk)
return !sk->sk_rcv_saddr;
}
-static bool use_bhash2_on_bind(const struct sock *sk)
-{
-#if IS_ENABLED(CONFIG_IPV6)
- int addr_type;
-
- if (sk->sk_family == AF_INET6) {
- addr_type = ipv6_addr_type(&sk->sk_v6_rcv_saddr);
- return addr_type != IPV6_ADDR_ANY &&
- addr_type != IPV6_ADDR_MAPPED;
- }
-#endif
- return sk->sk_rcv_saddr != htonl(INADDR_ANY);
-}
-
-static u32 get_bhash2_nulladdr_hash(const struct sock *sk, struct net *net,
- int port)
-{
-#if IS_ENABLED(CONFIG_IPV6)
- struct in6_addr nulladdr = {};
-
- if (sk->sk_family == AF_INET6)
- return ipv6_portaddr_hash(net, &nulladdr, port);
-#endif
- return ipv4_portaddr_hash(net, 0, port);
-}
-
void inet_get_local_port_range(struct net *net, int *low, int *high)
{
unsigned int seq;
@@ -156,71 +130,16 @@ void inet_get_local_port_range(struct net *net, int *low, int *high)
}
EXPORT_SYMBOL(inet_get_local_port_range);
-static bool bind_conflict_exist(const struct sock *sk, struct sock *sk2,
- kuid_t sk_uid, bool relax,
- bool reuseport_cb_ok, bool reuseport_ok)
-{
- int bound_dev_if2;
-
- if (sk == sk2)
- return false;
-
- bound_dev_if2 = READ_ONCE(sk2->sk_bound_dev_if);
-
- if (!sk->sk_bound_dev_if || !bound_dev_if2 ||
- sk->sk_bound_dev_if == bound_dev_if2) {
- if (sk->sk_reuse && sk2->sk_reuse &&
- sk2->sk_state != TCP_LISTEN) {
- if (!relax || (!reuseport_ok && sk->sk_reuseport &&
- sk2->sk_reuseport && reuseport_cb_ok &&
- (sk2->sk_state == TCP_TIME_WAIT ||
- uid_eq(sk_uid, sock_i_uid(sk2)))))
- return true;
- } else if (!reuseport_ok || !sk->sk_reuseport ||
- !sk2->sk_reuseport || !reuseport_cb_ok ||
- (sk2->sk_state != TCP_TIME_WAIT &&
- !uid_eq(sk_uid, sock_i_uid(sk2)))) {
- return true;
- }
- }
- return false;
-}
-
-static bool check_bhash2_conflict(const struct sock *sk,
- struct inet_bind2_bucket *tb2, kuid_t sk_uid,
- bool relax, bool reuseport_cb_ok,
- bool reuseport_ok)
-{
- struct sock *sk2;
-
- sk_for_each_bound_bhash2(sk2, &tb2->owners) {
- if (sk->sk_family == AF_INET && ipv6_only_sock(sk2))
- continue;
-
- if (bind_conflict_exist(sk, sk2, sk_uid, relax,
- reuseport_cb_ok, reuseport_ok))
- return true;
- }
- return false;
-}
-
-/* This should be called only when the corresponding inet_bind_bucket spinlock
- * is held
- */
-static int inet_csk_bind_conflict(const struct sock *sk, int port,
- struct inet_bind_bucket *tb,
- struct inet_bind2_bucket *tb2, /* may be null */
+static int inet_csk_bind_conflict(const struct sock *sk,
+ const struct inet_bind_bucket *tb,
bool relax, bool reuseport_ok)
{
- struct inet_hashinfo *hinfo = sk->sk_prot->h.hashinfo;
- kuid_t uid = sock_i_uid((struct sock *)sk);
- struct sock_reuseport *reuseport_cb;
- struct inet_bind2_hashbucket *head2;
- bool reuseport_cb_ok;
struct sock *sk2;
- struct net *net;
- int l3mdev;
- u32 hash;
+ bool reuseport_cb_ok;
+ bool reuse = sk->sk_reuse;
+ bool reuseport = !!sk->sk_reuseport;
+ struct sock_reuseport *reuseport_cb;
+ kuid_t uid = sock_i_uid((struct sock *)sk);
rcu_read_lock();
reuseport_cb = rcu_dereference(sk->sk_reuseport_cb);
@@ -231,42 +150,40 @@ static int inet_csk_bind_conflict(const struct sock *sk, int port,
/*
* Unlike other sk lookup places we do not check
* for sk_net here, since _all_ the socks listed
- * in tb->owners and tb2->owners list belong
- * to the same net
+ * in tb->owners list belong to the same net - the
+ * one this bucket belongs to.
*/
- if (!use_bhash2_on_bind(sk)) {
- sk_for_each_bound(sk2, &tb->owners)
- if (bind_conflict_exist(sk, sk2, uid, relax,
- reuseport_cb_ok, reuseport_ok) &&
- inet_rcv_saddr_equal(sk, sk2, true))
- return true;
+ sk_for_each_bound(sk2, &tb->owners) {
+ int bound_dev_if2;
- return false;
+ if (sk == sk2)
+ continue;
+ bound_dev_if2 = READ_ONCE(sk2->sk_bound_dev_if);
+ if ((!sk->sk_bound_dev_if ||
+ !bound_dev_if2 ||
+ sk->sk_bound_dev_if == bound_dev_if2)) {
+ if (reuse && sk2->sk_reuse &&
+ sk2->sk_state != TCP_LISTEN) {
+ if ((!relax ||
+ (!reuseport_ok &&
+ reuseport && sk2->sk_reuseport &&
+ reuseport_cb_ok &&
+ (sk2->sk_state == TCP_TIME_WAIT ||
+ uid_eq(uid, sock_i_uid(sk2))))) &&
+ inet_rcv_saddr_equal(sk, sk2, true))
+ break;
+ } else if (!reuseport_ok ||
+ !reuseport || !sk2->sk_reuseport ||
+ !reuseport_cb_ok ||
+ (sk2->sk_state != TCP_TIME_WAIT &&
+ !uid_eq(uid, sock_i_uid(sk2)))) {
+ if (inet_rcv_saddr_equal(sk, sk2, true))
+ break;
+ }
+ }
}
-
- if (tb2 && check_bhash2_conflict(sk, tb2, uid, relax, reuseport_cb_ok,
- reuseport_ok))
- return true;
-
- net = sock_net(sk);
-
- /* check there's no conflict with an existing IPV6_ADDR_ANY (if ipv6) or
- * INADDR_ANY (if ipv4) socket.
- */
- hash = get_bhash2_nulladdr_hash(sk, net, port);
- head2 = &hinfo->bhash2[hash & (hinfo->bhash_size - 1)];
-
- l3mdev = inet_sk_bound_l3mdev(sk);
- inet_bind_bucket_for_each(tb2, &head2->chain)
- if (check_bind2_bucket_match_nulladdr(tb2, net, port, l3mdev, sk))
- break;
-
- if (tb2 && check_bhash2_conflict(sk, tb2, uid, relax, reuseport_cb_ok,
- reuseport_ok))
- return true;
-
- return false;
+ return sk2 != NULL;
}
/*
@@ -274,20 +191,16 @@ static int inet_csk_bind_conflict(const struct sock *sk, int port,
* inet_bind_hashbucket lock held.
*/
static struct inet_bind_hashbucket *
-inet_csk_find_open_port(struct sock *sk, struct inet_bind_bucket **tb_ret,
- struct inet_bind2_bucket **tb2_ret,
- struct inet_bind2_hashbucket **head2_ret, int *port_ret)
+inet_csk_find_open_port(struct sock *sk, struct inet_bind_bucket **tb_ret, int *port_ret)
{
struct inet_hashinfo *hinfo = sk->sk_prot->h.hashinfo;
- struct inet_bind2_hashbucket *head2;
+ int port = 0;
struct inet_bind_hashbucket *head;
struct net *net = sock_net(sk);
+ bool relax = false;
int i, low, high, attempt_half;
- struct inet_bind2_bucket *tb2;
struct inet_bind_bucket *tb;
u32 remaining, offset;
- bool relax = false;
- int port = 0;
int l3mdev;
l3mdev = inet_sk_bound_l3mdev(sk);
@@ -326,12 +239,10 @@ other_parity_scan:
head = &hinfo->bhash[inet_bhashfn(net, port,
hinfo->bhash_size)];
spin_lock_bh(&head->lock);
- tb2 = inet_bind2_bucket_find(hinfo, net, port, l3mdev, sk,
- &head2);
inet_bind_bucket_for_each(tb, &head->chain)
- if (check_bind_bucket_match(tb, net, port, l3mdev)) {
- if (!inet_csk_bind_conflict(sk, port, tb, tb2,
- relax, false))
+ if (net_eq(ib_net(tb), net) && tb->l3mdev == l3mdev &&
+ tb->port == port) {
+ if (!inet_csk_bind_conflict(sk, tb, relax, false))
goto success;
goto next_port;
}
@@ -361,8 +272,6 @@ next_port:
success:
*port_ret = port;
*tb_ret = tb;
- *tb2_ret = tb2;
- *head2_ret = head2;
return head;
}
@@ -458,81 +367,54 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum)
{
bool reuse = sk->sk_reuse && sk->sk_state != TCP_LISTEN;
struct inet_hashinfo *hinfo = sk->sk_prot->h.hashinfo;
- bool bhash_created = false, bhash2_created = false;
- struct inet_bind2_bucket *tb2 = NULL;
- struct inet_bind2_hashbucket *head2;
- struct inet_bind_bucket *tb = NULL;
+ int ret = 1, port = snum;
struct inet_bind_hashbucket *head;
struct net *net = sock_net(sk);
- int ret = 1, port = snum;
- bool found_port = false;
+ struct inet_bind_bucket *tb = NULL;
int l3mdev;
l3mdev = inet_sk_bound_l3mdev(sk);
if (!port) {
- head = inet_csk_find_open_port(sk, &tb, &tb2, &head2, &port);
+ head = inet_csk_find_open_port(sk, &tb, &port);
if (!head)
return ret;
- if (tb && tb2)
- goto success;
- found_port = true;
- } else {
- head = &hinfo->bhash[inet_bhashfn(net, port,
- hinfo->bhash_size)];
- spin_lock_bh(&head->lock);
- inet_bind_bucket_for_each(tb, &head->chain)
- if (check_bind_bucket_match(tb, net, port, l3mdev))
- break;
-
- tb2 = inet_bind2_bucket_find(hinfo, net, port, l3mdev, sk,
- &head2);
- }
-
- if (!tb) {
- tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep, net,
- head, port, l3mdev);
if (!tb)
- goto fail_unlock;
- bhash_created = true;
- }
-
- if (!tb2) {
- tb2 = inet_bind2_bucket_create(hinfo->bind2_bucket_cachep,
- net, head2, port, l3mdev, sk);
- if (!tb2)
- goto fail_unlock;
- bhash2_created = true;
+ goto tb_not_found;
+ goto success;
}
-
- /* If we had to find an open port, we already checked for conflicts */
- if (!found_port && !hlist_empty(&tb->owners)) {
+ head = &hinfo->bhash[inet_bhashfn(net, port,
+ hinfo->bhash_size)];
+ spin_lock_bh(&head->lock);
+ inet_bind_bucket_for_each(tb, &head->chain)
+ if (net_eq(ib_net(tb), net) && tb->l3mdev == l3mdev &&
+ tb->port == port)
+ goto tb_found;
+tb_not_found:
+ tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep,
+ net, head, port, l3mdev);
+ if (!tb)
+ goto fail_unlock;
+tb_found:
+ if (!hlist_empty(&tb->owners)) {
if (sk->sk_reuse == SK_FORCE_REUSE)
goto success;
if ((tb->fastreuse > 0 && reuse) ||
sk_reuseport_match(tb, sk))
goto success;
- if (inet_csk_bind_conflict(sk, port, tb, tb2, true, true))
+ if (inet_csk_bind_conflict(sk, tb, true, true))
goto fail_unlock;
}
success:
inet_csk_update_fastreuse(tb, sk);
if (!inet_csk(sk)->icsk_bind_hash)
- inet_bind_hash(sk, tb, tb2, port);
+ inet_bind_hash(sk, tb, port);
WARN_ON(inet_csk(sk)->icsk_bind_hash != tb);
- WARN_ON(inet_csk(sk)->icsk_bind2_hash != tb2);
ret = 0;
fail_unlock:
- if (ret) {
- if (bhash_created)
- inet_bind_bucket_destroy(hinfo->bind_bucket_cachep, tb);
- if (bhash2_created)
- inet_bind2_bucket_destroy(hinfo->bind2_bucket_cachep,
- tb2);
- }
spin_unlock_bh(&head->lock);
return ret;
}
@@ -1079,7 +961,6 @@ struct sock *inet_csk_clone_lock(const struct sock *sk,
inet_sk_set_state(newsk, TCP_SYN_RECV);
newicsk->icsk_bind_hash = NULL;
- newicsk->icsk_bind2_hash = NULL;
inet_sk(newsk)->inet_dport = inet_rsk(req)->ir_rmt_port;
inet_sk(newsk)->inet_num = inet_rsk(req)->ir_num;
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 545f91b6cb5e..b9d995b5ce24 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -81,41 +81,6 @@ struct inet_bind_bucket *inet_bind_bucket_create(struct kmem_cache *cachep,
return tb;
}
-struct inet_bind2_bucket *inet_bind2_bucket_create(struct kmem_cache *cachep,
- struct net *net,
- struct inet_bind2_hashbucket *head,
- const unsigned short port,
- int l3mdev,
- const struct sock *sk)
-{
- struct inet_bind2_bucket *tb = kmem_cache_alloc(cachep, GFP_ATOMIC);
-
- if (tb) {
- write_pnet(&tb->ib_net, net);
- tb->l3mdev = l3mdev;
- tb->port = port;
-#if IS_ENABLED(CONFIG_IPV6)
- if (sk->sk_family == AF_INET6)
- tb->v6_rcv_saddr = sk->sk_v6_rcv_saddr;
- else
-#endif
- tb->rcv_saddr = sk->sk_rcv_saddr;
- INIT_HLIST_HEAD(&tb->owners);
- hlist_add_head(&tb->node, &head->chain);
- }
- return tb;
-}
-
-static bool bind2_bucket_addr_match(struct inet_bind2_bucket *tb2, struct sock *sk)
-{
-#if IS_ENABLED(CONFIG_IPV6)
- if (sk->sk_family == AF_INET6)
- return ipv6_addr_equal(&tb2->v6_rcv_saddr,
- &sk->sk_v6_rcv_saddr);
-#endif
- return tb2->rcv_saddr == sk->sk_rcv_saddr;
-}
-
/*
* Caller must hold hashbucket lock for this tb with local BH disabled
*/
@@ -127,25 +92,12 @@ void inet_bind_bucket_destroy(struct kmem_cache *cachep, struct inet_bind_bucket
}
}
-/* Caller must hold the lock for the corresponding hashbucket in the bhash table
- * with local BH disabled
- */
-void inet_bind2_bucket_destroy(struct kmem_cache *cachep, struct inet_bind2_bucket *tb)
-{
- if (hlist_empty(&tb->owners)) {
- __hlist_del(&tb->node);
- kmem_cache_free(cachep, tb);
- }
-}
-
void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb,
- struct inet_bind2_bucket *tb2, const unsigned short snum)
+ const unsigned short snum)
{
inet_sk(sk)->inet_num = snum;
sk_add_bind_node(sk, &tb->owners);
inet_csk(sk)->icsk_bind_hash = tb;
- sk_add_bind2_node(sk, &tb2->owners);
- inet_csk(sk)->icsk_bind2_hash = tb2;
}
/*
@@ -157,7 +109,6 @@ static void __inet_put_port(struct sock *sk)
const int bhash = inet_bhashfn(sock_net(sk), inet_sk(sk)->inet_num,
hashinfo->bhash_size);
struct inet_bind_hashbucket *head = &hashinfo->bhash[bhash];
- struct inet_bind2_bucket *tb2;
struct inet_bind_bucket *tb;
spin_lock(&head->lock);
@@ -166,13 +117,6 @@ static void __inet_put_port(struct sock *sk)
inet_csk(sk)->icsk_bind_hash = NULL;
inet_sk(sk)->inet_num = 0;
inet_bind_bucket_destroy(hashinfo->bind_bucket_cachep, tb);
-
- if (inet_csk(sk)->icsk_bind2_hash) {
- tb2 = inet_csk(sk)->icsk_bind2_hash;
- __sk_del_bind2_node(sk);
- inet_csk(sk)->icsk_bind2_hash = NULL;
- inet_bind2_bucket_destroy(hashinfo->bind2_bucket_cachep, tb2);
- }
spin_unlock(&head->lock);
}
@@ -189,19 +133,14 @@ int __inet_inherit_port(const struct sock *sk, struct sock *child)
struct inet_hashinfo *table = sk->sk_prot->h.hashinfo;
unsigned short port = inet_sk(child)->inet_num;
const int bhash = inet_bhashfn(sock_net(sk), port,
- table->bhash_size);
+ table->bhash_size);
struct inet_bind_hashbucket *head = &table->bhash[bhash];
- struct inet_bind2_hashbucket *head_bhash2;
- bool created_inet_bind_bucket = false;
- struct net *net = sock_net(sk);
- struct inet_bind2_bucket *tb2;
struct inet_bind_bucket *tb;
int l3mdev;
spin_lock(&head->lock);
tb = inet_csk(sk)->icsk_bind_hash;
- tb2 = inet_csk(sk)->icsk_bind2_hash;
- if (unlikely(!tb || !tb2)) {
+ if (unlikely(!tb)) {
spin_unlock(&head->lock);
return -ENOENT;
}
@@ -214,45 +153,25 @@ int __inet_inherit_port(const struct sock *sk, struct sock *child)
* as that of the child socket. We have to look up or
* create a new bind bucket for the child here. */
inet_bind_bucket_for_each(tb, &head->chain) {
- if (check_bind_bucket_match(tb, net, port, l3mdev))
+ if (net_eq(ib_net(tb), sock_net(sk)) &&
+ tb->l3mdev == l3mdev && tb->port == port)
break;
}
if (!tb) {
tb = inet_bind_bucket_create(table->bind_bucket_cachep,
- net, head, port, l3mdev);
+ sock_net(sk), head, port,
+ l3mdev);
if (!tb) {
spin_unlock(&head->lock);
return -ENOMEM;
}
- created_inet_bind_bucket = true;
}
inet_csk_update_fastreuse(tb, child);
-
- goto bhash2_find;
- } else if (!bind2_bucket_addr_match(tb2, child)) {
- l3mdev = inet_sk_bound_l3mdev(sk);
-
-bhash2_find:
- tb2 = inet_bind2_bucket_find(table, net, port, l3mdev, child,
- &head_bhash2);
- if (!tb2) {
- tb2 = inet_bind2_bucket_create(table->bind2_bucket_cachep,
- net, head_bhash2, port,
- l3mdev, child);
- if (!tb2)
- goto error;
- }
}
- inet_bind_hash(child, tb, tb2, port);
+ inet_bind_hash(child, tb, port);
spin_unlock(&head->lock);
return 0;
-
-error:
- if (created_inet_bind_bucket)
- inet_bind_bucket_destroy(table->bind_bucket_cachep, tb);
- spin_unlock(&head->lock);
- return -ENOMEM;
}
EXPORT_SYMBOL_GPL(__inet_inherit_port);
@@ -756,76 +675,6 @@ void inet_unhash(struct sock *sk)
}
EXPORT_SYMBOL_GPL(inet_unhash);
-static bool check_bind2_bucket_match(struct inet_bind2_bucket *tb,
- struct net *net, unsigned short port,
- int l3mdev, struct sock *sk)
-{
-#if IS_ENABLED(CONFIG_IPV6)
- if (sk->sk_family == AF_INET6)
- return net_eq(ib2_net(tb), net) && tb->port == port &&
- tb->l3mdev == l3mdev &&
- ipv6_addr_equal(&tb->v6_rcv_saddr, &sk->sk_v6_rcv_saddr);
- else
-#endif
- return net_eq(ib2_net(tb), net) && tb->port == port &&
- tb->l3mdev == l3mdev && tb->rcv_saddr == sk->sk_rcv_saddr;
-}
-
-bool check_bind2_bucket_match_nulladdr(struct inet_bind2_bucket *tb,
- struct net *net, const unsigned short port,
- int l3mdev, const struct sock *sk)
-{
-#if IS_ENABLED(CONFIG_IPV6)
- struct in6_addr nulladdr = {};
-
- if (sk->sk_family == AF_INET6)
- return net_eq(ib2_net(tb), net) && tb->port == port &&
- tb->l3mdev == l3mdev &&
- ipv6_addr_equal(&tb->v6_rcv_saddr, &nulladdr);
- else
-#endif
- return net_eq(ib2_net(tb), net) && tb->port == port &&
- tb->l3mdev == l3mdev && tb->rcv_saddr == 0;
-}
-
-static struct inet_bind2_hashbucket *
-inet_bhashfn_portaddr(struct inet_hashinfo *hinfo, const struct sock *sk,
- const struct net *net, unsigned short port)
-{
- u32 hash;
-
-#if IS_ENABLED(CONFIG_IPV6)
- if (sk->sk_family == AF_INET6)
- hash = ipv6_portaddr_hash(net, &sk->sk_v6_rcv_saddr, port);
- else
-#endif
- hash = ipv4_portaddr_hash(net, sk->sk_rcv_saddr, port);
- return &hinfo->bhash2[hash & (hinfo->bhash_size - 1)];
-}
-
-/* This should only be called when the spinlock for the socket's corresponding
- * bind_hashbucket is held
- */
-struct inet_bind2_bucket *
-inet_bind2_bucket_find(struct inet_hashinfo *hinfo, struct net *net,
- const unsigned short port, int l3mdev, struct sock *sk,
- struct inet_bind2_hashbucket **head)
-{
- struct inet_bind2_bucket *bhash2 = NULL;
- struct inet_bind2_hashbucket *h;
-
- h = inet_bhashfn_portaddr(hinfo, sk, net, port);
- inet_bind_bucket_for_each(bhash2, &h->chain) {
- if (check_bind2_bucket_match(bhash2, net, port, l3mdev, sk))
- break;
- }
-
- if (head)
- *head = h;
-
- return bhash2;
-}
-
/* RFC 6056 3.3.4. Algorithm 4: Double-Hash Port Selection Algorithm
* Note that we use 32bit integers (vs RFC 'short integers')
* because 2^16 is not a multiple of num_ephemeral and this
@@ -846,13 +695,10 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
{
struct inet_hashinfo *hinfo = death_row->hashinfo;
struct inet_timewait_sock *tw = NULL;
- struct inet_bind2_hashbucket *head2;
struct inet_bind_hashbucket *head;
int port = inet_sk(sk)->inet_num;
struct net *net = sock_net(sk);
- struct inet_bind2_bucket *tb2;
struct inet_bind_bucket *tb;
- bool tb_created = false;
u32 remaining, offset;
int ret, i, low, high;
int l3mdev;
@@ -909,7 +755,8 @@ other_parity_scan:
* the established check is already unique enough.
*/
inet_bind_bucket_for_each(tb, &head->chain) {
- if (check_bind_bucket_match(tb, net, port, l3mdev)) {
+ if (net_eq(ib_net(tb), net) && tb->l3mdev == l3mdev &&
+ tb->port == port) {
if (tb->fastreuse >= 0 ||
tb->fastreuseport >= 0)
goto next_port;
@@ -927,7 +774,6 @@ other_parity_scan:
spin_unlock_bh(&head->lock);
return -ENOMEM;
}
- tb_created = true;
tb->fastreuse = -1;
tb->fastreuseport = -1;
goto ok;
@@ -943,17 +789,6 @@ next_port:
return -EADDRNOTAVAIL;
ok:
- /* Find the corresponding tb2 bucket since we need to
- * add the socket to the bhash2 table as well
- */
- tb2 = inet_bind2_bucket_find(hinfo, net, port, l3mdev, sk, &head2);
- if (!tb2) {
- tb2 = inet_bind2_bucket_create(hinfo->bind2_bucket_cachep, net,
- head2, port, l3mdev, sk);
- if (!tb2)
- goto error;
- }
-
/* Here we want to add a little bit of randomness to the next source
* port that will be chosen. We use a max() with a random here so that
* on low contention the randomness is maximal and on high contention
@@ -963,7 +798,7 @@ ok:
WRITE_ONCE(table_perturb[index], READ_ONCE(table_perturb[index]) + i + 2);
/* Head lock still held and bh's disabled */
- inet_bind_hash(sk, tb, tb2, port);
+ inet_bind_hash(sk, tb, port);
if (sk_unhashed(sk)) {
inet_sk(sk)->inet_sport = htons(port);
inet_ehash_nolisten(sk, (struct sock *)tw, NULL);
@@ -975,12 +810,6 @@ ok:
inet_twsk_deschedule_put(tw);
local_bh_enable();
return 0;
-
-error:
- if (tb_created)
- inet_bind_bucket_destroy(hinfo->bind_bucket_cachep, tb);
- spin_unlock_bh(&head->lock);
- return -ENOMEM;
}
/*
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 3b9cd487075a..5c58e21f724e 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -524,7 +524,6 @@ static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev)
int tunnel_hlen;
int version;
int nhoff;
- int thoff;
tun_info = skb_tunnel_info(skb);
if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
@@ -558,10 +557,16 @@ static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev)
(ntohs(ip_hdr(skb)->tot_len) > skb->len - nhoff))
truncate = true;
- thoff = skb_transport_header(skb) - skb_mac_header(skb);
- if (skb->protocol == htons(ETH_P_IPV6) &&
- (ntohs(ipv6_hdr(skb)->payload_len) > skb->len - thoff))
- truncate = true;
+ if (skb->protocol == htons(ETH_P_IPV6)) {
+ int thoff;
+
+ if (skb_transport_header_was_set(skb))
+ thoff = skb_transport_header(skb) - skb_mac_header(skb);
+ else
+ thoff = nhoff + sizeof(struct ipv6hdr);
+ if (ntohs(ipv6_hdr(skb)->payload_len) > skb->len - thoff)
+ truncate = true;
+ }
if (version == 1) {
erspan_build_header(skb, ntohl(tunnel_id_to_key32(key->tun_id)),
diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c
index 6b2dc7b2b612..cc1caab4a654 100644
--- a/net/ipv4/ip_tunnel_core.c
+++ b/net/ipv4/ip_tunnel_core.c
@@ -410,7 +410,7 @@ int skb_tunnel_check_pmtu(struct sk_buff *skb, struct dst_entry *encap_dst,
u32 mtu = dst_mtu(encap_dst) - headroom;
if ((skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu)) ||
- (!skb_is_gso(skb) && (skb->len - skb_mac_header_len(skb)) <= mtu))
+ (!skb_is_gso(skb) && (skb->len - skb_network_offset(skb)) <= mtu))
return 0;
skb_dst_update_pmtu_no_confirm(skb, mtu);
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index 1a43ca73f94d..3c6101def7d6 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -319,12 +319,16 @@ static int ping_check_bind_addr(struct sock *sk, struct inet_sock *isk,
pr_debug("ping_check_bind_addr(sk=%p,addr=%pI4,port=%d)\n",
sk, &addr->sin_addr.s_addr, ntohs(addr->sin_port));
+ if (addr->sin_addr.s_addr == htonl(INADDR_ANY))
+ return 0;
+
tb_id = l3mdev_fib_table_by_index(net, sk->sk_bound_dev_if) ? : tb_id;
chk_addr_ret = inet_addr_type_table(net, addr->sin_addr.s_addr, tb_id);
- if (!inet_addr_valid_or_nonlocal(net, inet_sk(sk),
- addr->sin_addr.s_addr,
- chk_addr_ret))
+ if (chk_addr_ret == RTN_MULTICAST ||
+ chk_addr_ret == RTN_BROADCAST ||
+ (chk_addr_ret != RTN_LOCAL &&
+ !inet_can_nonlocal_bind(net, isk)))
return -EADDRNOTAVAIL;
#if IS_ENABLED(CONFIG_IPV6)
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 9984d23a7f3e..028513d3e2a2 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -4604,12 +4604,6 @@ void __init tcp_init(void)
SLAB_HWCACHE_ALIGN | SLAB_PANIC |
SLAB_ACCOUNT,
NULL);
- tcp_hashinfo.bind2_bucket_cachep =
- kmem_cache_create("tcp_bind2_bucket",
- sizeof(struct inet_bind2_bucket), 0,
- SLAB_HWCACHE_ALIGN | SLAB_PANIC |
- SLAB_ACCOUNT,
- NULL);
/* Size and allocate the main established and bind bucket
* hash tables.
@@ -4632,9 +4626,8 @@ void __init tcp_init(void)
if (inet_ehash_locks_alloc(&tcp_hashinfo))
panic("TCP: failed to alloc ehash_locks");
tcp_hashinfo.bhash =
- alloc_large_system_hash("TCP bind bhash tables",
- sizeof(struct inet_bind_hashbucket) +
- sizeof(struct inet_bind2_hashbucket),
+ alloc_large_system_hash("TCP bind",
+ sizeof(struct inet_bind_hashbucket),
tcp_hashinfo.ehash_mask + 1,
17, /* one slot per 128 KB of memory */
0,
@@ -4643,12 +4636,9 @@ void __init tcp_init(void)
0,
64 * 1024);
tcp_hashinfo.bhash_size = 1U << tcp_hashinfo.bhash_size;
- tcp_hashinfo.bhash2 =
- (struct inet_bind2_hashbucket *)(tcp_hashinfo.bhash + tcp_hashinfo.bhash_size);
for (i = 0; i < tcp_hashinfo.bhash_size; i++) {
spin_lock_init(&tcp_hashinfo.bhash[i].lock);
INIT_HLIST_HEAD(&tcp_hashinfo.bhash[i].chain);
- INIT_HLIST_HEAD(&tcp_hashinfo.bhash2[i].chain);
}
diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c
index be3947e70fec..0d3f68bb51c0 100644
--- a/net/ipv4/tcp_bpf.c
+++ b/net/ipv4/tcp_bpf.c
@@ -611,9 +611,6 @@ int tcp_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore)
return 0;
}
- if (inet_csk_has_ulp(sk))
- return -EINVAL;
-
if (sk->sk_family == AF_INET6) {
if (tcp_bpf_assert_proto_ops(psock->sk_proto))
return -EINVAL;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index fe8f23b95d32..da5a3c44c4fb 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1964,7 +1964,10 @@ process:
struct sock *nsk;
sk = req->rsk_listener;
- drop_reason = tcp_inbound_md5_hash(sk, skb,
+ if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
+ drop_reason = SKB_DROP_REASON_XFRM_POLICY;
+ else
+ drop_reason = tcp_inbound_md5_hash(sk, skb,
&iph->saddr, &iph->daddr,
AF_INET, dif, sdif);
if (unlikely(drop_reason)) {
@@ -2016,6 +2019,7 @@ process:
}
goto discard_and_relse;
}
+ nf_reset_ct(skb);
if (nsk == sk) {
reqsk_put(req);
tcp_v4_restore_cb(skb);
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 1b1932502e9e..49cc6587dd77 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -1109,10 +1109,6 @@ ipv6_add_addr(struct inet6_dev *idev, struct ifa6_config *cfg,
goto out;
}
- if (net->ipv6.devconf_all->disable_policy ||
- idev->cnf.disable_policy)
- f6i->dst_nopolicy = true;
-
neigh_parms_data_state_setall(idev->nd_parms);
ifa->addr = *cfg->pfx;
@@ -5172,9 +5168,9 @@ next:
fillargs->event = RTM_GETMULTICAST;
/* multicast address */
- for (ifmca = rcu_dereference(idev->mc_list);
+ for (ifmca = rtnl_dereference(idev->mc_list);
ifmca;
- ifmca = rcu_dereference(ifmca->next), ip_idx++) {
+ ifmca = rtnl_dereference(ifmca->next), ip_idx++) {
if (ip_idx < s_ip_idx)
continue;
err = inet6_fill_ifmcaddr(skb, ifmca, fillargs);
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 4e37f7c29900..a9051df0625d 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -939,7 +939,6 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb,
__be16 proto;
__u32 mtu;
int nhoff;
- int thoff;
if (!pskb_inet_may_pull(skb))
goto tx_err;
@@ -960,10 +959,16 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb,
(ntohs(ip_hdr(skb)->tot_len) > skb->len - nhoff))
truncate = true;
- thoff = skb_transport_header(skb) - skb_mac_header(skb);
- if (skb->protocol == htons(ETH_P_IPV6) &&
- (ntohs(ipv6_hdr(skb)->payload_len) > skb->len - thoff))
- truncate = true;
+ if (skb->protocol == htons(ETH_P_IPV6)) {
+ int thoff;
+
+ if (skb_transport_header_was_set(skb))
+ thoff = skb_transport_header(skb) - skb_mac_header(skb);
+ else
+ thoff = nhoff + sizeof(struct ipv6hdr);
+ if (ntohs(ipv6_hdr(skb)->payload_len) > skb->len - thoff)
+ truncate = true;
+ }
if (skb_cow_head(skb, dev->needed_headroom ?: t->hlen))
goto tx_err;
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index d25dc83bac62..828355710c57 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -4569,8 +4569,15 @@ struct fib6_info *addrconf_f6i_alloc(struct net *net,
}
f6i = ip6_route_info_create(&cfg, gfp_flags, NULL);
- if (!IS_ERR(f6i))
+ if (!IS_ERR(f6i)) {
f6i->dst_nocount = true;
+
+ if (!anycast &&
+ (net->ipv6.devconf_all->disable_policy ||
+ idev->cnf.disable_policy))
+ f6i->dst_nopolicy = true;
+ }
+
return f6i;
}
diff --git a/net/ipv6/seg6_hmac.c b/net/ipv6/seg6_hmac.c
index 6de01185cc68..d43c50a7310d 100644
--- a/net/ipv6/seg6_hmac.c
+++ b/net/ipv6/seg6_hmac.c
@@ -406,7 +406,6 @@ int __net_init seg6_hmac_net_init(struct net *net)
return rhashtable_init(&sdata->hmac_infos, &rht_params);
}
-EXPORT_SYMBOL(seg6_hmac_net_init);
void seg6_hmac_exit(void)
{
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index c0b138c20992..6bcd5e419a08 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -323,8 +323,6 @@ static int ipip6_tunnel_get_prl(struct net_device *dev, struct ip_tunnel_prl __u
kcalloc(cmax, sizeof(*kp), GFP_KERNEL_ACCOUNT | __GFP_NOWARN) :
NULL;
- rcu_read_lock();
-
ca = min(t->prl_count, cmax);
if (!kp) {
@@ -341,7 +339,7 @@ static int ipip6_tunnel_get_prl(struct net_device *dev, struct ip_tunnel_prl __u
}
}
- c = 0;
+ rcu_read_lock();
for_each_prl_rcu(t->prl) {
if (c >= cmax)
break;
@@ -353,7 +351,7 @@ static int ipip6_tunnel_get_prl(struct net_device *dev, struct ip_tunnel_prl __u
if (kprl.addr != htonl(INADDR_ANY))
break;
}
-out:
+
rcu_read_unlock();
len = sizeof(*kp) * c;
@@ -362,7 +360,7 @@ out:
ret = -EFAULT;
kfree(kp);
-
+out:
return ret;
}
diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index be3b918a6d15..bd8f0f425be4 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -765,6 +765,7 @@ static noinline bool mptcp_established_options_rst(struct sock *sk, struct sk_bu
opts->suboptions |= OPTION_MPTCP_RST;
opts->reset_transient = subflow->reset_transient;
opts->reset_reason = subflow->reset_reason;
+ MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPRSTTX);
return true;
}
@@ -788,6 +789,7 @@ static bool mptcp_established_options_fastclose(struct sock *sk,
opts->rcvr_key = msk->remote_key;
pr_debug("FASTCLOSE key=%llu", opts->rcvr_key);
+ MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPFASTCLOSETX);
return true;
}
@@ -809,6 +811,7 @@ static bool mptcp_established_options_mp_fail(struct sock *sk,
opts->fail_seq = subflow->map_seq;
pr_debug("MP_FAIL fail_seq=%llu", opts->fail_seq);
+ MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPFAILTX);
return true;
}
@@ -833,13 +836,11 @@ bool mptcp_established_options(struct sock *sk, struct sk_buff *skb,
mptcp_established_options_mp_fail(sk, &opt_size, remaining, opts)) {
*size += opt_size;
remaining -= opt_size;
- MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPFASTCLOSETX);
}
/* MP_RST can be used with MP_FASTCLOSE and MP_FAIL if there is room */
if (mptcp_established_options_rst(sk, skb, &opt_size, remaining, opts)) {
*size += opt_size;
remaining -= opt_size;
- MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPRSTTX);
}
return true;
}
@@ -966,7 +967,7 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *ssk,
goto reset;
subflow->mp_capable = 0;
pr_fallback(msk);
- __mptcp_do_fallback(msk);
+ mptcp_do_fallback(ssk);
return false;
}
@@ -1583,6 +1584,9 @@ mp_rst:
*ptr++ = mptcp_option(MPTCPOPT_MP_PRIO,
TCPOLEN_MPTCP_PRIO,
opts->backup, TCPOPT_NOP);
+
+ MPTCP_INC_STATS(sock_net((const struct sock *)tp),
+ MPTCP_MIB_MPPRIOTX);
}
mp_capable_done:
diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c
index 59a85220edc9..45e2a48397b9 100644
--- a/net/mptcp/pm.c
+++ b/net/mptcp/pm.c
@@ -299,23 +299,21 @@ void mptcp_pm_mp_fail_received(struct sock *sk, u64 fail_seq)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
struct mptcp_sock *msk = mptcp_sk(subflow->conn);
- struct sock *s = (struct sock *)msk;
pr_debug("fail_seq=%llu", fail_seq);
if (!READ_ONCE(msk->allow_infinite_fallback))
return;
- if (!READ_ONCE(subflow->mp_fail_response_expect)) {
+ if (!subflow->fail_tout) {
pr_debug("send MP_FAIL response and infinite map");
subflow->send_mp_fail = 1;
- MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPFAILTX);
subflow->send_infinite_map = 1;
- } else if (!sock_flag(sk, SOCK_DEAD)) {
+ tcp_send_ack(sk);
+ } else {
pr_debug("MP_FAIL response received");
-
- sk_stop_timer(s, &s->sk_timer);
+ WRITE_ONCE(subflow->fail_tout, 0);
}
}
diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
index e099f2a12504..7c7395b58944 100644
--- a/net/mptcp/pm_netlink.c
+++ b/net/mptcp/pm_netlink.c
@@ -717,9 +717,10 @@ void mptcp_pm_nl_addr_send_ack(struct mptcp_sock *msk)
}
}
-static int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk,
- struct mptcp_addr_info *addr,
- u8 bkup)
+int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk,
+ struct mptcp_addr_info *addr,
+ struct mptcp_addr_info *rem,
+ u8 bkup)
{
struct mptcp_subflow_context *subflow;
@@ -727,24 +728,29 @@ static int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk,
mptcp_for_each_subflow(msk, subflow) {
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
- struct sock *sk = (struct sock *)msk;
- struct mptcp_addr_info local;
+ struct mptcp_addr_info local, remote;
+ bool slow;
local_address((struct sock_common *)ssk, &local);
if (!mptcp_addresses_equal(&local, addr, addr->port))
continue;
+ if (rem && rem->family != AF_UNSPEC) {
+ remote_address((struct sock_common *)ssk, &remote);
+ if (!mptcp_addresses_equal(&remote, rem, rem->port))
+ continue;
+ }
+
+ slow = lock_sock_fast(ssk);
if (subflow->backup != bkup)
msk->last_snd = NULL;
subflow->backup = bkup;
subflow->send_mp_prio = 1;
subflow->request_bkup = bkup;
- __MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPPRIOTX);
- spin_unlock_bh(&msk->pm.lock);
pr_debug("send ack for mp_prio");
- mptcp_subflow_send_ack(ssk);
- spin_lock_bh(&msk->pm.lock);
+ __mptcp_subflow_send_ack(ssk);
+ unlock_sock_fast(ssk, slow);
return 0;
}
@@ -801,7 +807,8 @@ static void mptcp_pm_nl_rm_addr_or_subflow(struct mptcp_sock *msk,
removed = true;
__MPTCP_INC_STATS(sock_net(sk), rm_type);
}
- __set_bit(rm_list->ids[i], msk->pm.id_avail_bitmap);
+ if (rm_type == MPTCP_MIB_RMSUBFLOW)
+ __set_bit(rm_list->ids[i], msk->pm.id_avail_bitmap);
if (!removed)
continue;
@@ -1816,8 +1823,10 @@ static void mptcp_pm_nl_fullmesh(struct mptcp_sock *msk,
list.ids[list.nr++] = addr->id;
+ spin_lock_bh(&msk->pm.lock);
mptcp_pm_nl_rm_subflow_received(msk, &list);
mptcp_pm_create_subflow_or_signal_addr(msk);
+ spin_unlock_bh(&msk->pm.lock);
}
static int mptcp_nl_set_flags(struct net *net,
@@ -1835,12 +1844,10 @@ static int mptcp_nl_set_flags(struct net *net,
goto next;
lock_sock(sk);
- spin_lock_bh(&msk->pm.lock);
if (changed & MPTCP_PM_ADDR_FLAG_BACKUP)
- ret = mptcp_pm_nl_mp_prio_send_ack(msk, addr, bkup);
+ ret = mptcp_pm_nl_mp_prio_send_ack(msk, addr, NULL, bkup);
if (changed & MPTCP_PM_ADDR_FLAG_FULLMESH)
mptcp_pm_nl_fullmesh(msk, addr);
- spin_unlock_bh(&msk->pm.lock);
release_sock(sk);
next:
@@ -1854,6 +1861,9 @@ next:
static int mptcp_nl_cmd_set_flags(struct sk_buff *skb, struct genl_info *info)
{
struct mptcp_pm_addr_entry addr = { .addr = { .family = AF_UNSPEC }, }, *entry;
+ struct mptcp_pm_addr_entry remote = { .addr = { .family = AF_UNSPEC }, };
+ struct nlattr *attr_rem = info->attrs[MPTCP_PM_ATTR_ADDR_REMOTE];
+ struct nlattr *token = info->attrs[MPTCP_PM_ATTR_TOKEN];
struct nlattr *attr = info->attrs[MPTCP_PM_ATTR_ADDR];
struct pm_nl_pernet *pernet = genl_info_pm_nl(info);
u8 changed, mask = MPTCP_PM_ADDR_FLAG_BACKUP |
@@ -1866,6 +1876,12 @@ static int mptcp_nl_cmd_set_flags(struct sk_buff *skb, struct genl_info *info)
if (ret < 0)
return ret;
+ if (attr_rem) {
+ ret = mptcp_pm_parse_entry(attr_rem, info, false, &remote);
+ if (ret < 0)
+ return ret;
+ }
+
if (addr.flags & MPTCP_PM_ADDR_FLAG_BACKUP)
bkup = 1;
if (addr.addr.family == AF_UNSPEC) {
@@ -1874,6 +1890,10 @@ static int mptcp_nl_cmd_set_flags(struct sk_buff *skb, struct genl_info *info)
return -EOPNOTSUPP;
}
+ if (token)
+ return mptcp_userspace_pm_set_flags(sock_net(skb->sk),
+ token, &addr, &remote, bkup);
+
spin_lock_bh(&pernet->lock);
entry = __lookup_addr(pernet, &addr.addr, lookup_by_id);
if (!entry) {
diff --git a/net/mptcp/pm_userspace.c b/net/mptcp/pm_userspace.c
index f56378e4f597..9e82250cbb70 100644
--- a/net/mptcp/pm_userspace.c
+++ b/net/mptcp/pm_userspace.c
@@ -5,6 +5,7 @@
*/
#include "protocol.h"
+#include "mib.h"
void mptcp_free_local_addr_list(struct mptcp_sock *msk)
{
@@ -306,15 +307,11 @@ static struct sock *mptcp_nl_find_ssk(struct mptcp_sock *msk,
const struct mptcp_addr_info *local,
const struct mptcp_addr_info *remote)
{
- struct sock *sk = &msk->sk.icsk_inet.sk;
struct mptcp_subflow_context *subflow;
- struct sock *found = NULL;
if (local->family != remote->family)
return NULL;
- lock_sock(sk);
-
mptcp_for_each_subflow(msk, subflow) {
const struct inet_sock *issk;
struct sock *ssk;
@@ -347,16 +344,11 @@ static struct sock *mptcp_nl_find_ssk(struct mptcp_sock *msk,
}
if (issk->inet_sport == local->port &&
- issk->inet_dport == remote->port) {
- found = ssk;
- goto found;
- }
+ issk->inet_dport == remote->port)
+ return ssk;
}
-found:
- release_sock(sk);
-
- return found;
+ return NULL;
}
int mptcp_nl_cmd_sf_destroy(struct sk_buff *skb, struct genl_info *info)
@@ -412,18 +404,51 @@ int mptcp_nl_cmd_sf_destroy(struct sk_buff *skb, struct genl_info *info)
}
sk = &msk->sk.icsk_inet.sk;
+ lock_sock(sk);
ssk = mptcp_nl_find_ssk(msk, &addr_l, &addr_r);
if (ssk) {
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
mptcp_subflow_shutdown(sk, ssk, RCV_SHUTDOWN | SEND_SHUTDOWN);
mptcp_close_ssk(sk, ssk, subflow);
+ MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_RMSUBFLOW);
err = 0;
} else {
err = -ESRCH;
}
+ release_sock(sk);
- destroy_err:
+destroy_err:
sock_put((struct sock *)msk);
return err;
}
+
+int mptcp_userspace_pm_set_flags(struct net *net, struct nlattr *token,
+ struct mptcp_pm_addr_entry *loc,
+ struct mptcp_pm_addr_entry *rem, u8 bkup)
+{
+ struct mptcp_sock *msk;
+ int ret = -EINVAL;
+ u32 token_val;
+
+ token_val = nla_get_u32(token);
+
+ msk = mptcp_token_get_sock(net, token_val);
+ if (!msk)
+ return ret;
+
+ if (!mptcp_pm_is_userspace(msk))
+ goto set_flags_err;
+
+ if (loc->addr.family == AF_UNSPEC ||
+ rem->addr.family == AF_UNSPEC)
+ goto set_flags_err;
+
+ lock_sock((struct sock *)msk);
+ ret = mptcp_pm_nl_mp_prio_send_ack(msk, &loc->addr, &rem->addr, bkup);
+ release_sock((struct sock *)msk);
+
+set_flags_err:
+ sock_put((struct sock *)msk);
+ return ret;
+}
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 17e13396024a..cc21fafd9726 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -500,19 +500,24 @@ static void mptcp_set_timeout(struct sock *sk)
__mptcp_set_timeout(sk, tout);
}
-static bool tcp_can_send_ack(const struct sock *ssk)
+static inline bool tcp_can_send_ack(const struct sock *ssk)
{
return !((1 << inet_sk_state_load(ssk)) &
(TCPF_SYN_SENT | TCPF_SYN_RECV | TCPF_TIME_WAIT | TCPF_CLOSE | TCPF_LISTEN));
}
+void __mptcp_subflow_send_ack(struct sock *ssk)
+{
+ if (tcp_can_send_ack(ssk))
+ tcp_send_ack(ssk);
+}
+
void mptcp_subflow_send_ack(struct sock *ssk)
{
bool slow;
slow = lock_sock_fast(ssk);
- if (tcp_can_send_ack(ssk))
- tcp_send_ack(ssk);
+ __mptcp_subflow_send_ack(ssk);
unlock_sock_fast(ssk, slow);
}
@@ -1245,7 +1250,7 @@ static void mptcp_update_infinite_map(struct mptcp_sock *msk,
MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_INFINITEMAPTX);
mptcp_subflow_ctx(ssk)->send_infinite_map = 0;
pr_fallback(msk);
- __mptcp_do_fallback(msk);
+ mptcp_do_fallback(ssk);
}
static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
@@ -2175,21 +2180,6 @@ static void mptcp_retransmit_timer(struct timer_list *t)
sock_put(sk);
}
-static struct mptcp_subflow_context *
-mp_fail_response_expect_subflow(struct mptcp_sock *msk)
-{
- struct mptcp_subflow_context *subflow, *ret = NULL;
-
- mptcp_for_each_subflow(msk, subflow) {
- if (READ_ONCE(subflow->mp_fail_response_expect)) {
- ret = subflow;
- break;
- }
- }
-
- return ret;
-}
-
static void mptcp_timeout_timer(struct timer_list *t)
{
struct sock *sk = from_timer(sk, t, sk_timer);
@@ -2346,6 +2336,11 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
kfree_rcu(subflow, rcu);
} else {
/* otherwise tcp will dispose of the ssk and subflow ctx */
+ if (ssk->sk_state == TCP_LISTEN) {
+ tcp_set_state(ssk, TCP_CLOSE);
+ mptcp_subflow_queue_clean(ssk);
+ inet_csk_listen_stop(ssk);
+ }
__tcp_close(ssk, 0);
/* close acquired an extra ref */
@@ -2518,27 +2513,50 @@ reset_timer:
mptcp_reset_timer(sk);
}
+/* schedule the timeout timer for the relevant event: either close timeout
+ * or mp_fail timeout. The close timeout takes precedence on the mp_fail one
+ */
+void mptcp_reset_timeout(struct mptcp_sock *msk, unsigned long fail_tout)
+{
+ struct sock *sk = (struct sock *)msk;
+ unsigned long timeout, close_timeout;
+
+ if (!fail_tout && !sock_flag(sk, SOCK_DEAD))
+ return;
+
+ close_timeout = inet_csk(sk)->icsk_mtup.probe_timestamp - tcp_jiffies32 + jiffies + TCP_TIMEWAIT_LEN;
+
+ /* the close timeout takes precedence on the fail one, and here at least one of
+ * them is active
+ */
+ timeout = sock_flag(sk, SOCK_DEAD) ? close_timeout : fail_tout;
+
+ sk_reset_timer(sk, &sk->sk_timer, timeout);
+}
+
static void mptcp_mp_fail_no_response(struct mptcp_sock *msk)
{
- struct mptcp_subflow_context *subflow;
- struct sock *ssk;
+ struct sock *ssk = msk->first;
bool slow;
- subflow = mp_fail_response_expect_subflow(msk);
- if (subflow) {
- pr_debug("MP_FAIL doesn't respond, reset the subflow");
+ if (!ssk)
+ return;
- ssk = mptcp_subflow_tcp_sock(subflow);
- slow = lock_sock_fast(ssk);
- mptcp_subflow_reset(ssk);
- unlock_sock_fast(ssk, slow);
- }
+ pr_debug("MP_FAIL doesn't respond, reset the subflow");
+
+ slow = lock_sock_fast(ssk);
+ mptcp_subflow_reset(ssk);
+ WRITE_ONCE(mptcp_subflow_ctx(ssk)->fail_tout, 0);
+ unlock_sock_fast(ssk, slow);
+
+ mptcp_reset_timeout(msk, 0);
}
static void mptcp_worker(struct work_struct *work)
{
struct mptcp_sock *msk = container_of(work, struct mptcp_sock, work);
struct sock *sk = &msk->sk.icsk_inet.sk;
+ unsigned long fail_tout;
int state;
lock_sock(sk);
@@ -2575,7 +2593,9 @@ static void mptcp_worker(struct work_struct *work)
if (test_and_clear_bit(MPTCP_WORK_RTX, &msk->flags))
__mptcp_retrans(sk);
- mptcp_mp_fail_no_response(msk);
+ fail_tout = msk->first ? READ_ONCE(mptcp_subflow_ctx(msk->first)->fail_tout) : 0;
+ if (fail_tout && time_after(jiffies, fail_tout))
+ mptcp_mp_fail_no_response(msk);
unlock:
release_sock(sk);
@@ -2822,6 +2842,7 @@ static void __mptcp_destroy_sock(struct sock *sk)
static void mptcp_close(struct sock *sk, long timeout)
{
struct mptcp_subflow_context *subflow;
+ struct mptcp_sock *msk = mptcp_sk(sk);
bool do_cancel_work = false;
lock_sock(sk);
@@ -2840,10 +2861,16 @@ static void mptcp_close(struct sock *sk, long timeout)
cleanup:
/* orphan all the subflows */
inet_csk(sk)->icsk_mtup.probe_timestamp = tcp_jiffies32;
- mptcp_for_each_subflow(mptcp_sk(sk), subflow) {
+ mptcp_for_each_subflow(msk, subflow) {
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
bool slow = lock_sock_fast_nested(ssk);
+ /* since the close timeout takes precedence on the fail one,
+ * cancel the latter
+ */
+ if (ssk == msk->first)
+ subflow->fail_tout = 0;
+
sock_orphan(ssk);
unlock_sock_fast(ssk, slow);
}
@@ -2852,13 +2879,13 @@ cleanup:
sock_hold(sk);
pr_debug("msk=%p state=%d", sk, sk->sk_state);
if (mptcp_sk(sk)->token)
- mptcp_event(MPTCP_EVENT_CLOSED, mptcp_sk(sk), NULL, GFP_KERNEL);
+ mptcp_event(MPTCP_EVENT_CLOSED, msk, NULL, GFP_KERNEL);
if (sk->sk_state == TCP_CLOSE) {
__mptcp_destroy_sock(sk);
do_cancel_work = true;
} else {
- sk_reset_timer(sk, &sk->sk_timer, jiffies + TCP_TIMEWAIT_LEN);
+ mptcp_reset_timeout(msk, 0);
}
release_sock(sk);
if (do_cancel_work)
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 200f89f6d62f..480c5320b86e 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -306,6 +306,7 @@ struct mptcp_sock {
u32 setsockopt_seq;
char ca_name[TCP_CA_NAME_MAX];
+ struct mptcp_sock *dl_next;
};
#define mptcp_data_lock(sk) spin_lock_bh(&(sk)->sk_lock.slock)
@@ -468,7 +469,6 @@ struct mptcp_subflow_context {
local_id_valid : 1, /* local_id is correctly initialized */
valid_csum_seen : 1; /* at least one csum validated */
enum mptcp_data_avail data_avail;
- bool mp_fail_response_expect;
u32 remote_nonce;
u64 thmac;
u32 local_nonce;
@@ -482,6 +482,7 @@ struct mptcp_subflow_context {
u8 stale_count;
long delegated_status;
+ unsigned long fail_tout;
);
@@ -606,8 +607,10 @@ void __init mptcp_subflow_init(void);
void mptcp_subflow_shutdown(struct sock *sk, struct sock *ssk, int how);
void mptcp_close_ssk(struct sock *sk, struct sock *ssk,
struct mptcp_subflow_context *subflow);
+void __mptcp_subflow_send_ack(struct sock *ssk);
void mptcp_subflow_send_ack(struct sock *ssk);
void mptcp_subflow_reset(struct sock *ssk);
+void mptcp_subflow_queue_clean(struct sock *ssk);
void mptcp_sock_graft(struct sock *sk, struct socket *parent);
struct socket *__mptcp_nmpc_socket(const struct mptcp_sock *msk);
@@ -662,6 +665,7 @@ void mptcp_get_options(const struct sk_buff *skb,
void mptcp_finish_connect(struct sock *sk);
void __mptcp_set_connected(struct sock *sk);
+void mptcp_reset_timeout(struct mptcp_sock *msk, unsigned long fail_tout);
static inline bool mptcp_is_fully_established(struct sock *sk)
{
return inet_sk_state_load(sk) == TCP_ESTABLISHED &&
@@ -768,6 +772,10 @@ void mptcp_pm_rm_addr_received(struct mptcp_sock *msk,
const struct mptcp_rm_list *rm_list);
void mptcp_pm_mp_prio_received(struct sock *sk, u8 bkup);
void mptcp_pm_mp_fail_received(struct sock *sk, u64 fail_seq);
+int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk,
+ struct mptcp_addr_info *addr,
+ struct mptcp_addr_info *rem,
+ u8 bkup);
bool mptcp_pm_alloc_anno_list(struct mptcp_sock *msk,
const struct mptcp_pm_addr_entry *entry);
void mptcp_pm_free_anno_list(struct mptcp_sock *msk);
@@ -784,7 +792,9 @@ int mptcp_pm_get_flags_and_ifindex_by_id(struct mptcp_sock *msk,
int mptcp_userspace_pm_get_flags_and_ifindex_by_id(struct mptcp_sock *msk,
unsigned int id,
u8 *flags, int *ifindex);
-
+int mptcp_userspace_pm_set_flags(struct net *net, struct nlattr *token,
+ struct mptcp_pm_addr_entry *loc,
+ struct mptcp_pm_addr_entry *rem, u8 bkup);
int mptcp_pm_announce_addr(struct mptcp_sock *msk,
const struct mptcp_addr_info *addr,
bool echo);
@@ -926,12 +936,25 @@ static inline void __mptcp_do_fallback(struct mptcp_sock *msk)
set_bit(MPTCP_FALLBACK_DONE, &msk->flags);
}
-static inline void mptcp_do_fallback(struct sock *sk)
+static inline void mptcp_do_fallback(struct sock *ssk)
{
- struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
- struct mptcp_sock *msk = mptcp_sk(subflow->conn);
+ struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
+ struct sock *sk = subflow->conn;
+ struct mptcp_sock *msk;
+ msk = mptcp_sk(sk);
__mptcp_do_fallback(msk);
+ if (READ_ONCE(msk->snd_data_fin_enable) && !(ssk->sk_shutdown & SEND_SHUTDOWN)) {
+ gfp_t saved_allocation = ssk->sk_allocation;
+
+ /* we are in a atomic (BH) scope, override ssk default for data
+ * fin allocation
+ */
+ ssk->sk_allocation = GFP_ATOMIC;
+ ssk->sk_shutdown |= SEND_SHUTDOWN;
+ tcp_shutdown(ssk, SEND_SHUTDOWN);
+ ssk->sk_allocation = saved_allocation;
+ }
}
#define pr_fallback(a) pr_debug("%s:fallback to TCP (msk=%p)", __func__, a)
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 8841e8cd9ad8..63e8892ec807 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -843,7 +843,8 @@ enum mapping_status {
MAPPING_INVALID,
MAPPING_EMPTY,
MAPPING_DATA_FIN,
- MAPPING_DUMMY
+ MAPPING_DUMMY,
+ MAPPING_BAD_CSUM
};
static void dbg_bad_map(struct mptcp_subflow_context *subflow, u32 ssn)
@@ -958,11 +959,7 @@ static enum mapping_status validate_data_csum(struct sock *ssk, struct sk_buff *
subflow->map_data_csum);
if (unlikely(csum)) {
MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_DATACSUMERR);
- if (subflow->mp_join || subflow->valid_csum_seen) {
- subflow->send_mp_fail = 1;
- MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_MPFAILTX);
- }
- return subflow->mp_join ? MAPPING_INVALID : MAPPING_DUMMY;
+ return MAPPING_BAD_CSUM;
}
subflow->valid_csum_seen = 1;
@@ -974,7 +971,6 @@ static enum mapping_status get_mapping_status(struct sock *ssk,
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
bool csum_reqd = READ_ONCE(msk->csum_enabled);
- struct sock *sk = (struct sock *)msk;
struct mptcp_ext *mpext;
struct sk_buff *skb;
u16 data_len;
@@ -1016,9 +1012,6 @@ static enum mapping_status get_mapping_status(struct sock *ssk,
pr_debug("infinite mapping received");
MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_INFINITEMAPRX);
subflow->map_data_len = 0;
- if (!sock_flag(ssk, SOCK_DEAD))
- sk_stop_timer(sk, &sk->sk_timer);
-
return MAPPING_INVALID;
}
@@ -1165,6 +1158,33 @@ static bool subflow_can_fallback(struct mptcp_subflow_context *subflow)
return !subflow->fully_established;
}
+static void mptcp_subflow_fail(struct mptcp_sock *msk, struct sock *ssk)
+{
+ struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
+ unsigned long fail_tout;
+
+ /* greceful failure can happen only on the MPC subflow */
+ if (WARN_ON_ONCE(ssk != READ_ONCE(msk->first)))
+ return;
+
+ /* since the close timeout take precedence on the fail one,
+ * no need to start the latter when the first is already set
+ */
+ if (sock_flag((struct sock *)msk, SOCK_DEAD))
+ return;
+
+ /* we don't need extreme accuracy here, use a zero fail_tout as special
+ * value meaning no fail timeout at all;
+ */
+ fail_tout = jiffies + TCP_RTO_MAX;
+ if (!fail_tout)
+ fail_tout = 1;
+ WRITE_ONCE(subflow->fail_tout, fail_tout);
+ tcp_send_ack(ssk);
+
+ mptcp_reset_timeout(msk, subflow->fail_tout);
+}
+
static bool subflow_check_data_avail(struct sock *ssk)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
@@ -1184,10 +1204,8 @@ static bool subflow_check_data_avail(struct sock *ssk)
status = get_mapping_status(ssk, msk);
trace_subflow_check_data_avail(status, skb_peek(&ssk->sk_receive_queue));
- if (unlikely(status == MAPPING_INVALID))
- goto fallback;
-
- if (unlikely(status == MAPPING_DUMMY))
+ if (unlikely(status == MAPPING_INVALID || status == MAPPING_DUMMY ||
+ status == MAPPING_BAD_CSUM))
goto fallback;
if (status != MAPPING_OK)
@@ -1229,22 +1247,17 @@ no_data:
fallback:
if (!__mptcp_check_fallback(msk)) {
/* RFC 8684 section 3.7. */
- if (subflow->send_mp_fail) {
+ if (status == MAPPING_BAD_CSUM &&
+ (subflow->mp_join || subflow->valid_csum_seen)) {
+ subflow->send_mp_fail = 1;
+
if (!READ_ONCE(msk->allow_infinite_fallback)) {
- ssk->sk_err = EBADMSG;
- tcp_set_state(ssk, TCP_CLOSE);
subflow->reset_transient = 0;
subflow->reset_reason = MPTCP_RST_EMIDDLEBOX;
- tcp_send_active_reset(ssk, GFP_ATOMIC);
- while ((skb = skb_peek(&ssk->sk_receive_queue)))
- sk_eat_skb(ssk, skb);
- } else if (!sock_flag(ssk, SOCK_DEAD)) {
- WRITE_ONCE(subflow->mp_fail_response_expect, true);
- sk_reset_timer((struct sock *)msk,
- &((struct sock *)msk)->sk_timer,
- jiffies + TCP_RTO_MAX);
+ goto reset;
}
- WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_NODATA);
+ mptcp_subflow_fail(msk, ssk);
+ WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_DATA_AVAIL);
return true;
}
@@ -1252,16 +1265,20 @@ fallback:
/* fatal protocol error, close the socket.
* subflow_error_report() will introduce the appropriate barriers
*/
- ssk->sk_err = EBADMSG;
- tcp_set_state(ssk, TCP_CLOSE);
subflow->reset_transient = 0;
subflow->reset_reason = MPTCP_RST_EMPTCP;
+
+reset:
+ ssk->sk_err = EBADMSG;
+ tcp_set_state(ssk, TCP_CLOSE);
+ while ((skb = skb_peek(&ssk->sk_receive_queue)))
+ sk_eat_skb(ssk, skb);
tcp_send_active_reset(ssk, GFP_ATOMIC);
WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_NODATA);
return false;
}
- __mptcp_do_fallback(msk);
+ mptcp_do_fallback(ssk);
}
skb = skb_peek(&ssk->sk_receive_queue);
@@ -1706,6 +1723,58 @@ static void subflow_state_change(struct sock *sk)
}
}
+void mptcp_subflow_queue_clean(struct sock *listener_ssk)
+{
+ struct request_sock_queue *queue = &inet_csk(listener_ssk)->icsk_accept_queue;
+ struct mptcp_sock *msk, *next, *head = NULL;
+ struct request_sock *req;
+
+ /* build a list of all unaccepted mptcp sockets */
+ spin_lock_bh(&queue->rskq_lock);
+ for (req = queue->rskq_accept_head; req; req = req->dl_next) {
+ struct mptcp_subflow_context *subflow;
+ struct sock *ssk = req->sk;
+ struct mptcp_sock *msk;
+
+ if (!sk_is_mptcp(ssk))
+ continue;
+
+ subflow = mptcp_subflow_ctx(ssk);
+ if (!subflow || !subflow->conn)
+ continue;
+
+ /* skip if already in list */
+ msk = mptcp_sk(subflow->conn);
+ if (msk->dl_next || msk == head)
+ continue;
+
+ msk->dl_next = head;
+ head = msk;
+ }
+ spin_unlock_bh(&queue->rskq_lock);
+ if (!head)
+ return;
+
+ /* can't acquire the msk socket lock under the subflow one,
+ * or will cause ABBA deadlock
+ */
+ release_sock(listener_ssk);
+
+ for (msk = head; msk; msk = next) {
+ struct sock *sk = (struct sock *)msk;
+ bool slow;
+
+ slow = lock_sock_fast_nested(sk);
+ next = msk->dl_next;
+ msk->first = NULL;
+ msk->dl_next = NULL;
+ unlock_sock_fast(sk, slow);
+ }
+
+ /* we are still under the listener msk socket lock */
+ lock_sock_nested(listener_ssk, SINGLE_DEPTH_NESTING);
+}
+
static int subflow_ulp_init(struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);
diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c
index 78814417d753..80713febfac6 100644
--- a/net/ncsi/ncsi-manage.c
+++ b/net/ncsi/ncsi-manage.c
@@ -1803,7 +1803,8 @@ struct ncsi_dev *ncsi_register_dev(struct net_device *dev,
pdev = to_platform_device(dev->dev.parent);
if (pdev) {
np = pdev->dev.of_node;
- if (np && of_get_property(np, "mlx,multi-host", NULL))
+ if (np && (of_get_property(np, "mellanox,multi-host", NULL) ||
+ of_get_property(np, "mlx,multi-host", NULL)))
ndp->mlx_multi_host = true;
}
diff --git a/net/netfilter/nf_dup_netdev.c b/net/netfilter/nf_dup_netdev.c
index 7873bd1389c3..a8e2425e43b0 100644
--- a/net/netfilter/nf_dup_netdev.c
+++ b/net/netfilter/nf_dup_netdev.c
@@ -13,14 +13,31 @@
#include <net/netfilter/nf_tables_offload.h>
#include <net/netfilter/nf_dup_netdev.h>
-static void nf_do_netdev_egress(struct sk_buff *skb, struct net_device *dev)
+#define NF_RECURSION_LIMIT 2
+
+static DEFINE_PER_CPU(u8, nf_dup_skb_recursion);
+
+static void nf_do_netdev_egress(struct sk_buff *skb, struct net_device *dev,
+ enum nf_dev_hooks hook)
{
- if (skb_mac_header_was_set(skb))
+ if (__this_cpu_read(nf_dup_skb_recursion) > NF_RECURSION_LIMIT)
+ goto err;
+
+ if (hook == NF_NETDEV_INGRESS && skb_mac_header_was_set(skb)) {
+ if (skb_cow_head(skb, skb->mac_len))
+ goto err;
+
skb_push(skb, skb->mac_len);
+ }
skb->dev = dev;
skb_clear_tstamp(skb);
+ __this_cpu_inc(nf_dup_skb_recursion);
dev_queue_xmit(skb);
+ __this_cpu_dec(nf_dup_skb_recursion);
+ return;
+err:
+ kfree_skb(skb);
}
void nf_fwd_netdev_egress(const struct nft_pktinfo *pkt, int oif)
@@ -33,7 +50,7 @@ void nf_fwd_netdev_egress(const struct nft_pktinfo *pkt, int oif)
return;
}
- nf_do_netdev_egress(pkt->skb, dev);
+ nf_do_netdev_egress(pkt->skb, dev, nft_hook(pkt));
}
EXPORT_SYMBOL_GPL(nf_fwd_netdev_egress);
@@ -48,7 +65,7 @@ void nf_dup_netdev_egress(const struct nft_pktinfo *pkt, int oif)
skb = skb_clone(pkt->skb, GFP_ATOMIC);
if (skb)
- nf_do_netdev_egress(skb, dev);
+ nf_do_netdev_egress(skb, dev, nft_hook(pkt));
}
EXPORT_SYMBOL_GPL(nf_dup_netdev_egress);
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 51144fc66889..d6b59beab3a9 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -5213,13 +5213,20 @@ static int nft_setelem_parse_data(struct nft_ctx *ctx, struct nft_set *set,
struct nft_data *data,
struct nlattr *attr)
{
+ u32 dtype;
int err;
err = nft_data_init(ctx, data, NFT_DATA_VALUE_MAXLEN, desc, attr);
if (err < 0)
return err;
- if (desc->type != NFT_DATA_VERDICT && desc->len != set->dlen) {
+ if (set->dtype == NFT_DATA_VERDICT)
+ dtype = NFT_DATA_VERDICT;
+ else
+ dtype = NFT_DATA_VALUE;
+
+ if (dtype != desc->type ||
+ set->dlen != desc->len) {
nft_data_release(data, desc->type);
return -EINVAL;
}
diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c
index 53f40e473855..3ddce24ac76d 100644
--- a/net/netfilter/nf_tables_core.c
+++ b/net/netfilter/nf_tables_core.c
@@ -25,9 +25,7 @@ static noinline void __nft_trace_packet(struct nft_traceinfo *info,
const struct nft_chain *chain,
enum nft_trace_types type)
{
- const struct nft_pktinfo *pkt = info->pkt;
-
- if (!info->trace || !pkt->skb->nf_trace)
+ if (!info->trace || !info->nf_trace)
return;
info->chain = chain;
@@ -42,11 +40,24 @@ static inline void nft_trace_packet(struct nft_traceinfo *info,
enum nft_trace_types type)
{
if (static_branch_unlikely(&nft_trace_enabled)) {
+ const struct nft_pktinfo *pkt = info->pkt;
+
+ info->nf_trace = pkt->skb->nf_trace;
info->rule = rule;
__nft_trace_packet(info, chain, type);
}
}
+static inline void nft_trace_copy_nftrace(struct nft_traceinfo *info)
+{
+ if (static_branch_unlikely(&nft_trace_enabled)) {
+ const struct nft_pktinfo *pkt = info->pkt;
+
+ if (info->trace)
+ info->nf_trace = pkt->skb->nf_trace;
+ }
+}
+
static void nft_bitwise_fast_eval(const struct nft_expr *expr,
struct nft_regs *regs)
{
@@ -85,6 +96,7 @@ static noinline void __nft_trace_verdict(struct nft_traceinfo *info,
const struct nft_chain *chain,
const struct nft_regs *regs)
{
+ const struct nft_pktinfo *pkt = info->pkt;
enum nft_trace_types type;
switch (regs->verdict.code) {
@@ -92,8 +104,13 @@ static noinline void __nft_trace_verdict(struct nft_traceinfo *info,
case NFT_RETURN:
type = NFT_TRACETYPE_RETURN;
break;
+ case NF_STOLEN:
+ type = NFT_TRACETYPE_RULE;
+ /* can't access skb->nf_trace; use copy */
+ break;
default:
type = NFT_TRACETYPE_RULE;
+ info->nf_trace = pkt->skb->nf_trace;
break;
}
@@ -254,6 +271,7 @@ next_rule:
switch (regs.verdict.code) {
case NFT_BREAK:
regs.verdict.code = NFT_CONTINUE;
+ nft_trace_copy_nftrace(&info);
continue;
case NFT_CONTINUE:
nft_trace_packet(&info, chain, rule,
diff --git a/net/netfilter/nf_tables_trace.c b/net/netfilter/nf_tables_trace.c
index 5041725423c2..1163ba9c1401 100644
--- a/net/netfilter/nf_tables_trace.c
+++ b/net/netfilter/nf_tables_trace.c
@@ -7,7 +7,7 @@
#include <linux/module.h>
#include <linux/static_key.h>
#include <linux/hash.h>
-#include <linux/jhash.h>
+#include <linux/siphash.h>
#include <linux/if_vlan.h>
#include <linux/init.h>
#include <linux/skbuff.h>
@@ -25,22 +25,6 @@
DEFINE_STATIC_KEY_FALSE(nft_trace_enabled);
EXPORT_SYMBOL_GPL(nft_trace_enabled);
-static int trace_fill_id(struct sk_buff *nlskb, struct sk_buff *skb)
-{
- __be32 id;
-
- /* using skb address as ID results in a limited number of
- * values (and quick reuse).
- *
- * So we attempt to use as many skb members that will not
- * change while skb is with netfilter.
- */
- id = (__be32)jhash_2words(hash32_ptr(skb), skb_get_hash(skb),
- skb->skb_iif);
-
- return nla_put_be32(nlskb, NFTA_TRACE_ID, id);
-}
-
static int trace_fill_header(struct sk_buff *nlskb, u16 type,
const struct sk_buff *skb,
int off, unsigned int len)
@@ -186,6 +170,7 @@ void nft_trace_notify(struct nft_traceinfo *info)
struct nlmsghdr *nlh;
struct sk_buff *skb;
unsigned int size;
+ u32 mark = 0;
u16 event;
if (!nfnetlink_has_listeners(nft_net(pkt), NFNLGRP_NFTRACE))
@@ -229,7 +214,7 @@ void nft_trace_notify(struct nft_traceinfo *info)
if (nla_put_be32(skb, NFTA_TRACE_TYPE, htonl(info->type)))
goto nla_put_failure;
- if (trace_fill_id(skb, pkt->skb))
+ if (nla_put_u32(skb, NFTA_TRACE_ID, info->skbid))
goto nla_put_failure;
if (nla_put_string(skb, NFTA_TRACE_CHAIN, info->chain->name))
@@ -249,16 +234,24 @@ void nft_trace_notify(struct nft_traceinfo *info)
case NFT_TRACETYPE_RULE:
if (nft_verdict_dump(skb, NFTA_TRACE_VERDICT, info->verdict))
goto nla_put_failure;
+
+ /* pkt->skb undefined iff NF_STOLEN, disable dump */
+ if (info->verdict->code == NF_STOLEN)
+ info->packet_dumped = true;
+ else
+ mark = pkt->skb->mark;
+
break;
case NFT_TRACETYPE_POLICY:
+ mark = pkt->skb->mark;
+
if (nla_put_be32(skb, NFTA_TRACE_POLICY,
htonl(info->basechain->policy)))
goto nla_put_failure;
break;
}
- if (pkt->skb->mark &&
- nla_put_be32(skb, NFTA_TRACE_MARK, htonl(pkt->skb->mark)))
+ if (mark && nla_put_be32(skb, NFTA_TRACE_MARK, htonl(mark)))
goto nla_put_failure;
if (!info->packet_dumped) {
@@ -283,9 +276,20 @@ void nft_trace_init(struct nft_traceinfo *info, const struct nft_pktinfo *pkt,
const struct nft_verdict *verdict,
const struct nft_chain *chain)
{
+ static siphash_key_t trace_key __read_mostly;
+ struct sk_buff *skb = pkt->skb;
+
info->basechain = nft_base_chain(chain);
info->trace = true;
+ info->nf_trace = pkt->skb->nf_trace;
info->packet_dumped = false;
info->pkt = pkt;
info->verdict = verdict;
+
+ net_get_random_once(&trace_key, sizeof(trace_key));
+
+ info->skbid = (u32)siphash_3u32(hash32_ptr(skb),
+ skb_get_hash(skb),
+ skb->skb_iif,
+ &trace_key);
}
diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c
index af15102bc696..f466af4f8531 100644
--- a/net/netfilter/nfnetlink_cttimeout.c
+++ b/net/netfilter/nfnetlink_cttimeout.c
@@ -614,7 +614,7 @@ static void __net_exit cttimeout_net_exit(struct net *net)
nf_ct_untimeout(net, NULL);
- list_for_each_entry_safe(cur, tmp, &pernet->nfct_timeout_freelist, head) {
+ list_for_each_entry_safe(cur, tmp, &pernet->nfct_timeout_freelist, free_head) {
list_del(&cur->free_head);
if (refcount_dec_and_test(&cur->refcnt))
diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c
index ac4859241e17..55d2d49c3425 100644
--- a/net/netfilter/nft_meta.c
+++ b/net/netfilter/nft_meta.c
@@ -14,6 +14,7 @@
#include <linux/in.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
+#include <linux/random.h>
#include <linux/smp.h>
#include <linux/static_key.h>
#include <net/dst.h>
@@ -32,8 +33,6 @@
#define NFT_META_SECS_PER_DAY 86400
#define NFT_META_DAYS_PER_WEEK 7
-static DEFINE_PER_CPU(struct rnd_state, nft_prandom_state);
-
static u8 nft_meta_weekday(void)
{
time64_t secs = ktime_get_real_seconds();
@@ -271,13 +270,6 @@ static bool nft_meta_get_eval_ifname(enum nft_meta_keys key, u32 *dest,
return true;
}
-static noinline u32 nft_prandom_u32(void)
-{
- struct rnd_state *state = this_cpu_ptr(&nft_prandom_state);
-
- return prandom_u32_state(state);
-}
-
#ifdef CONFIG_IP_ROUTE_CLASSID
static noinline bool
nft_meta_get_eval_rtclassid(const struct sk_buff *skb, u32 *dest)
@@ -389,7 +381,7 @@ void nft_meta_get_eval(const struct nft_expr *expr,
break;
#endif
case NFT_META_PRANDOM:
- *dest = nft_prandom_u32();
+ *dest = get_random_u32();
break;
#ifdef CONFIG_XFRM
case NFT_META_SECPATH:
@@ -518,7 +510,6 @@ int nft_meta_get_init(const struct nft_ctx *ctx,
len = IFNAMSIZ;
break;
case NFT_META_PRANDOM:
- prandom_init_once(&nft_prandom_state);
len = sizeof(u32);
break;
#ifdef CONFIG_XFRM
diff --git a/net/netfilter/nft_numgen.c b/net/netfilter/nft_numgen.c
index 81b40c663d86..45d3dc9e96f2 100644
--- a/net/netfilter/nft_numgen.c
+++ b/net/netfilter/nft_numgen.c
@@ -9,12 +9,11 @@
#include <linux/netlink.h>
#include <linux/netfilter.h>
#include <linux/netfilter/nf_tables.h>
+#include <linux/random.h>
#include <linux/static_key.h>
#include <net/netfilter/nf_tables.h>
#include <net/netfilter/nf_tables_core.h>
-static DEFINE_PER_CPU(struct rnd_state, nft_numgen_prandom_state);
-
struct nft_ng_inc {
u8 dreg;
u32 modulus;
@@ -135,12 +134,9 @@ struct nft_ng_random {
u32 offset;
};
-static u32 nft_ng_random_gen(struct nft_ng_random *priv)
+static u32 nft_ng_random_gen(const struct nft_ng_random *priv)
{
- struct rnd_state *state = this_cpu_ptr(&nft_numgen_prandom_state);
-
- return reciprocal_scale(prandom_u32_state(state), priv->modulus) +
- priv->offset;
+ return reciprocal_scale(get_random_u32(), priv->modulus) + priv->offset;
}
static void nft_ng_random_eval(const struct nft_expr *expr,
@@ -168,8 +164,6 @@ static int nft_ng_random_init(const struct nft_ctx *ctx,
if (priv->offset + priv->modulus - 1 < priv->offset)
return -EOVERFLOW;
- prandom_init_once(&nft_numgen_prandom_state);
-
return nft_parse_register_store(ctx, tb[NFTA_NG_DREG], &priv->dreg,
NULL, NFT_DATA_VALUE, sizeof(u32));
}
diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c
index df40314de21f..76de6c8d9865 100644
--- a/net/netfilter/nft_set_hash.c
+++ b/net/netfilter/nft_set_hash.c
@@ -143,6 +143,7 @@ static bool nft_rhash_update(struct nft_set *set, const u32 *key,
/* Another cpu may race to insert the element with the same key */
if (prev) {
nft_set_elem_destroy(set, he, true);
+ atomic_dec(&set->nelems);
he = prev;
}
@@ -152,6 +153,7 @@ out:
err2:
nft_set_elem_destroy(set, he, true);
+ atomic_dec(&set->nelems);
err1:
return false;
}
diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c
index 2c8051d8cca6..4f9299b9dcdd 100644
--- a/net/netfilter/nft_set_pipapo.c
+++ b/net/netfilter/nft_set_pipapo.c
@@ -2125,6 +2125,32 @@ out_scratch:
}
/**
+ * nft_set_pipapo_match_destroy() - Destroy elements from key mapping array
+ * @set: nftables API set representation
+ * @m: matching data pointing to key mapping array
+ */
+static void nft_set_pipapo_match_destroy(const struct nft_set *set,
+ struct nft_pipapo_match *m)
+{
+ struct nft_pipapo_field *f;
+ int i, r;
+
+ for (i = 0, f = m->f; i < m->field_count - 1; i++, f++)
+ ;
+
+ for (r = 0; r < f->rules; r++) {
+ struct nft_pipapo_elem *e;
+
+ if (r < f->rules - 1 && f->mt[r + 1].e == f->mt[r].e)
+ continue;
+
+ e = f->mt[r].e;
+
+ nft_set_elem_destroy(set, e, true);
+ }
+}
+
+/**
* nft_pipapo_destroy() - Free private data for set and all committed elements
* @set: nftables API set representation
*/
@@ -2132,26 +2158,13 @@ static void nft_pipapo_destroy(const struct nft_set *set)
{
struct nft_pipapo *priv = nft_set_priv(set);
struct nft_pipapo_match *m;
- struct nft_pipapo_field *f;
- int i, r, cpu;
+ int cpu;
m = rcu_dereference_protected(priv->match, true);
if (m) {
rcu_barrier();
- for (i = 0, f = m->f; i < m->field_count - 1; i++, f++)
- ;
-
- for (r = 0; r < f->rules; r++) {
- struct nft_pipapo_elem *e;
-
- if (r < f->rules - 1 && f->mt[r + 1].e == f->mt[r].e)
- continue;
-
- e = f->mt[r].e;
-
- nft_set_elem_destroy(set, e, true);
- }
+ nft_set_pipapo_match_destroy(set, m);
#ifdef NFT_PIPAPO_ALIGN
free_percpu(m->scratch_aligned);
@@ -2165,6 +2178,11 @@ static void nft_pipapo_destroy(const struct nft_set *set)
}
if (priv->clone) {
+ m = priv->clone;
+
+ if (priv->dirty)
+ nft_set_pipapo_match_destroy(set, m);
+
#ifdef NFT_PIPAPO_ALIGN
free_percpu(priv->clone->scratch_aligned);
#endif
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index 372bf54a0ca9..e20d1a973417 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -407,7 +407,7 @@ static int parse_ipv6hdr(struct sk_buff *skb, struct sw_flow_key *key)
if (flags & IP6_FH_F_FRAG) {
if (frag_off) {
key->ip.frag = OVS_FRAG_TYPE_LATER;
- key->ip.proto = nexthdr;
+ key->ip.proto = NEXTHDR_FRAGMENT;
return 0;
}
key->ip.frag = OVS_FRAG_TYPE_FIRST;
diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c
index fee6409c2bb3..eb0b8197ac82 100644
--- a/net/rose/rose_route.c
+++ b/net/rose/rose_route.c
@@ -227,8 +227,8 @@ static void rose_remove_neigh(struct rose_neigh *rose_neigh)
{
struct rose_neigh *s;
- rose_stop_ftimer(rose_neigh);
- rose_stop_t0timer(rose_neigh);
+ del_timer_sync(&rose_neigh->ftimer);
+ del_timer_sync(&rose_neigh->t0timer);
skb_queue_purge(&rose_neigh->queue);
diff --git a/net/rose/rose_timer.c b/net/rose/rose_timer.c
index b3138fc2e552..f06ddbed3fed 100644
--- a/net/rose/rose_timer.c
+++ b/net/rose/rose_timer.c
@@ -31,89 +31,89 @@ static void rose_idletimer_expiry(struct timer_list *);
void rose_start_heartbeat(struct sock *sk)
{
- del_timer(&sk->sk_timer);
+ sk_stop_timer(sk, &sk->sk_timer);
sk->sk_timer.function = rose_heartbeat_expiry;
sk->sk_timer.expires = jiffies + 5 * HZ;
- add_timer(&sk->sk_timer);
+ sk_reset_timer(sk, &sk->sk_timer, sk->sk_timer.expires);
}
void rose_start_t1timer(struct sock *sk)
{
struct rose_sock *rose = rose_sk(sk);
- del_timer(&rose->timer);
+ sk_stop_timer(sk, &rose->timer);
rose->timer.function = rose_timer_expiry;
rose->timer.expires = jiffies + rose->t1;
- add_timer(&rose->timer);
+ sk_reset_timer(sk, &rose->timer, rose->timer.expires);
}
void rose_start_t2timer(struct sock *sk)
{
struct rose_sock *rose = rose_sk(sk);
- del_timer(&rose->timer);
+ sk_stop_timer(sk, &rose->timer);
rose->timer.function = rose_timer_expiry;
rose->timer.expires = jiffies + rose->t2;
- add_timer(&rose->timer);
+ sk_reset_timer(sk, &rose->timer, rose->timer.expires);
}
void rose_start_t3timer(struct sock *sk)
{
struct rose_sock *rose = rose_sk(sk);
- del_timer(&rose->timer);
+ sk_stop_timer(sk, &rose->timer);
rose->timer.function = rose_timer_expiry;
rose->timer.expires = jiffies + rose->t3;
- add_timer(&rose->timer);
+ sk_reset_timer(sk, &rose->timer, rose->timer.expires);
}
void rose_start_hbtimer(struct sock *sk)
{
struct rose_sock *rose = rose_sk(sk);
- del_timer(&rose->timer);
+ sk_stop_timer(sk, &rose->timer);
rose->timer.function = rose_timer_expiry;
rose->timer.expires = jiffies + rose->hb;
- add_timer(&rose->timer);
+ sk_reset_timer(sk, &rose->timer, rose->timer.expires);
}
void rose_start_idletimer(struct sock *sk)
{
struct rose_sock *rose = rose_sk(sk);
- del_timer(&rose->idletimer);
+ sk_stop_timer(sk, &rose->idletimer);
if (rose->idle > 0) {
rose->idletimer.function = rose_idletimer_expiry;
rose->idletimer.expires = jiffies + rose->idle;
- add_timer(&rose->idletimer);
+ sk_reset_timer(sk, &rose->idletimer, rose->idletimer.expires);
}
}
void rose_stop_heartbeat(struct sock *sk)
{
- del_timer(&sk->sk_timer);
+ sk_stop_timer(sk, &sk->sk_timer);
}
void rose_stop_timer(struct sock *sk)
{
- del_timer(&rose_sk(sk)->timer);
+ sk_stop_timer(sk, &rose_sk(sk)->timer);
}
void rose_stop_idletimer(struct sock *sk)
{
- del_timer(&rose_sk(sk)->idletimer);
+ sk_stop_timer(sk, &rose_sk(sk)->idletimer);
}
static void rose_heartbeat_expiry(struct timer_list *t)
@@ -130,6 +130,7 @@ static void rose_heartbeat_expiry(struct timer_list *t)
(sk->sk_state == TCP_LISTEN && sock_flag(sk, SOCK_DEAD))) {
bh_unlock_sock(sk);
rose_destroy_socket(sk);
+ sock_put(sk);
return;
}
break;
@@ -152,6 +153,7 @@ static void rose_heartbeat_expiry(struct timer_list *t)
rose_start_heartbeat(sk);
bh_unlock_sock(sk);
+ sock_put(sk);
}
static void rose_timer_expiry(struct timer_list *t)
@@ -181,6 +183,7 @@ static void rose_timer_expiry(struct timer_list *t)
break;
}
bh_unlock_sock(sk);
+ sock_put(sk);
}
static void rose_idletimer_expiry(struct timer_list *t)
@@ -205,4 +208,5 @@ static void rose_idletimer_expiry(struct timer_list *t)
sock_set_flag(sk, SOCK_DEAD);
}
bh_unlock_sock(sk);
+ sock_put(sk);
}
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index da9733da9868..817065aa2833 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -588,7 +588,8 @@ static int tcf_idr_release_unsafe(struct tc_action *p)
}
static int tcf_del_walker(struct tcf_idrinfo *idrinfo, struct sk_buff *skb,
- const struct tc_action_ops *ops)
+ const struct tc_action_ops *ops,
+ struct netlink_ext_ack *extack)
{
struct nlattr *nest;
int n_i = 0;
@@ -604,20 +605,25 @@ static int tcf_del_walker(struct tcf_idrinfo *idrinfo, struct sk_buff *skb,
if (nla_put_string(skb, TCA_KIND, ops->kind))
goto nla_put_failure;
+ ret = 0;
mutex_lock(&idrinfo->lock);
idr_for_each_entry_ul(idr, p, tmp, id) {
if (IS_ERR(p))
continue;
ret = tcf_idr_release_unsafe(p);
- if (ret == ACT_P_DELETED) {
+ if (ret == ACT_P_DELETED)
module_put(ops->owner);
- n_i++;
- } else if (ret < 0) {
- mutex_unlock(&idrinfo->lock);
- goto nla_put_failure;
- }
+ else if (ret < 0)
+ break;
+ n_i++;
}
mutex_unlock(&idrinfo->lock);
+ if (ret < 0) {
+ if (n_i)
+ NL_SET_ERR_MSG(extack, "Unable to flush all TC actions");
+ else
+ goto nla_put_failure;
+ }
ret = nla_put_u32(skb, TCA_FCNT, n_i);
if (ret)
@@ -638,7 +644,7 @@ int tcf_generic_walker(struct tc_action_net *tn, struct sk_buff *skb,
struct tcf_idrinfo *idrinfo = tn->idrinfo;
if (type == RTM_DELACTION) {
- return tcf_del_walker(idrinfo, skb, ops);
+ return tcf_del_walker(idrinfo, skb, ops, extack);
} else if (type == RTM_GETACTION) {
return tcf_dump_walker(idrinfo, skb, cb);
} else {
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index 79c8901f66ab..b759628a47c2 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -442,7 +442,7 @@ static int tcf_police_act_to_flow_act(int tc_act, u32 *extval,
act_id = FLOW_ACTION_JUMP;
*extval = tc_act & TC_ACT_EXT_VAL_MASK;
} else if (tc_act == TC_ACT_UNSPEC) {
- NL_SET_ERR_MSG_MOD(extack, "Offload not supported when conform/exceed action is \"continue\"");
+ act_id = FLOW_ACTION_CONTINUE;
} else {
NL_SET_ERR_MSG_MOD(extack, "Unsupported conform/exceed action offload");
}
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index ed4ccef5d6a8..5449ed114e40 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -1146,9 +1146,9 @@ static int netem_dump(struct Qdisc *sch, struct sk_buff *skb)
struct tc_netem_rate rate;
struct tc_netem_slot slot;
- qopt.latency = min_t(psched_tdiff_t, PSCHED_NS2TICKS(q->latency),
+ qopt.latency = min_t(psched_time_t, PSCHED_NS2TICKS(q->latency),
UINT_MAX);
- qopt.jitter = min_t(psched_tdiff_t, PSCHED_NS2TICKS(q->jitter),
+ qopt.jitter = min_t(psched_time_t, PSCHED_NS2TICKS(q->jitter),
UINT_MAX);
qopt.limit = q->limit;
qopt.loss = q->loss;
diff --git a/net/socket.c b/net/socket.c
index 2bc8773d9dc5..96300cdc0625 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -2149,10 +2149,13 @@ SYSCALL_DEFINE4(send, int, fd, void __user *, buff, size_t, len,
int __sys_recvfrom(int fd, void __user *ubuf, size_t size, unsigned int flags,
struct sockaddr __user *addr, int __user *addr_len)
{
+ struct sockaddr_storage address;
+ struct msghdr msg = {
+ /* Save some cycles and don't copy the address if not needed */
+ .msg_name = addr ? (struct sockaddr *)&address : NULL,
+ };
struct socket *sock;
struct iovec iov;
- struct msghdr msg;
- struct sockaddr_storage address;
int err, err2;
int fput_needed;
@@ -2163,14 +2166,6 @@ int __sys_recvfrom(int fd, void __user *ubuf, size_t size, unsigned int flags,
if (!sock)
goto out;
- msg.msg_control = NULL;
- msg.msg_controllen = 0;
- /* Save some cycles and don't copy the address if not needed */
- msg.msg_name = addr ? (struct sockaddr *)&address : NULL;
- /* We assume all kernel code knows the size of sockaddr_storage */
- msg.msg_namelen = 0;
- msg.msg_iocb = NULL;
- msg.msg_flags = 0;
if (sock->file->f_flags & O_NONBLOCK)
flags |= MSG_DONTWAIT;
err = sock_recvmsg(sock, &msg, flags);
@@ -2375,6 +2370,7 @@ int __copy_msghdr_from_user(struct msghdr *kmsg,
return -EFAULT;
kmsg->msg_control_is_user = true;
+ kmsg->msg_get_inq = 0;
kmsg->msg_control_user = msg.msg_control;
kmsg->msg_controllen = msg.msg_controllen;
kmsg->msg_flags = msg.msg_flags;
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index e2c6eca0271b..b6781ada3aa8 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -651,6 +651,7 @@ static struct rpc_clnt *__rpc_clone_client(struct rpc_create_args *args,
new->cl_discrtry = clnt->cl_discrtry;
new->cl_chatty = clnt->cl_chatty;
new->cl_principal = clnt->cl_principal;
+ new->cl_max_connect = clnt->cl_max_connect;
return new;
out_err:
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index f87a2d8f23a7..5d2b3e6979fb 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -984,7 +984,7 @@ static noinline __be32 *xdr_get_next_encode_buffer(struct xdr_stream *xdr,
p = page_address(*xdr->page_ptr);
xdr->p = p + frag2bytes;
space_left = xdr->buf->buflen - xdr->buf->len;
- if (space_left - nbytes >= PAGE_SIZE)
+ if (space_left - frag1bytes >= PAGE_SIZE)
xdr->end = p + PAGE_SIZE;
else
xdr->end = p + space_left - frag1bytes;
diff --git a/net/tipc/core.c b/net/tipc/core.c
index 3f4542e0f065..434e70eabe08 100644
--- a/net/tipc/core.c
+++ b/net/tipc/core.c
@@ -109,10 +109,9 @@ static void __net_exit tipc_exit_net(struct net *net)
struct tipc_net *tn = tipc_net(net);
tipc_detach_loopback(net);
+ tipc_net_stop(net);
/* Make sure the tipc_net_finalize_work() finished */
cancel_work_sync(&tn->work);
- tipc_net_stop(net);
-
tipc_bcast_stop(net);
tipc_nametbl_stop(net);
tipc_sk_rht_destroy(net);
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 6ef95ce565bd..b48d97cbbe29 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -472,8 +472,8 @@ struct tipc_node *tipc_node_create(struct net *net, u32 addr, u8 *peer_id,
bool preliminary)
{
struct tipc_net *tn = net_generic(net, tipc_net_id);
+ struct tipc_link *l, *snd_l = tipc_bc_sndlink(net);
struct tipc_node *n, *temp_node;
- struct tipc_link *l;
unsigned long intv;
int bearer_id;
int i;
@@ -488,6 +488,16 @@ struct tipc_node *tipc_node_create(struct net *net, u32 addr, u8 *peer_id,
goto exit;
/* A preliminary node becomes "real" now, refresh its data */
tipc_node_write_lock(n);
+ if (!tipc_link_bc_create(net, tipc_own_addr(net), addr, peer_id, U16_MAX,
+ tipc_link_min_win(snd_l), tipc_link_max_win(snd_l),
+ n->capabilities, &n->bc_entry.inputq1,
+ &n->bc_entry.namedq, snd_l, &n->bc_entry.link)) {
+ pr_warn("Broadcast rcv link refresh failed, no memory\n");
+ tipc_node_write_unlock_fast(n);
+ tipc_node_put(n);
+ n = NULL;
+ goto exit;
+ }
n->preliminary = false;
n->addr = addr;
hlist_del_rcu(&n->hash);
@@ -567,7 +577,16 @@ update:
n->signature = INVALID_NODE_SIG;
n->active_links[0] = INVALID_BEARER_ID;
n->active_links[1] = INVALID_BEARER_ID;
- n->bc_entry.link = NULL;
+ if (!preliminary &&
+ !tipc_link_bc_create(net, tipc_own_addr(net), addr, peer_id, U16_MAX,
+ tipc_link_min_win(snd_l), tipc_link_max_win(snd_l),
+ n->capabilities, &n->bc_entry.inputq1,
+ &n->bc_entry.namedq, snd_l, &n->bc_entry.link)) {
+ pr_warn("Broadcast rcv link creation failed, no memory\n");
+ kfree(n);
+ n = NULL;
+ goto exit;
+ }
tipc_node_get(n);
timer_setup(&n->timer, tipc_node_timeout, 0);
/* Start a slow timer anyway, crypto needs it */
@@ -1155,7 +1174,7 @@ void tipc_node_check_dest(struct net *net, u32 addr,
bool *respond, bool *dupl_addr)
{
struct tipc_node *n;
- struct tipc_link *l, *snd_l;
+ struct tipc_link *l;
struct tipc_link_entry *le;
bool addr_match = false;
bool sign_match = false;
@@ -1175,22 +1194,6 @@ void tipc_node_check_dest(struct net *net, u32 addr,
return;
tipc_node_write_lock(n);
- if (unlikely(!n->bc_entry.link)) {
- snd_l = tipc_bc_sndlink(net);
- if (!tipc_link_bc_create(net, tipc_own_addr(net),
- addr, peer_id, U16_MAX,
- tipc_link_min_win(snd_l),
- tipc_link_max_win(snd_l),
- n->capabilities,
- &n->bc_entry.inputq1,
- &n->bc_entry.namedq, snd_l,
- &n->bc_entry.link)) {
- pr_warn("Broadcast rcv link creation failed, no mem\n");
- tipc_node_write_unlock_fast(n);
- tipc_node_put(n);
- return;
- }
- }
le = &n->links[b->identity];
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 17f8c523e33b..43509c7e90fc 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -502,6 +502,7 @@ static int tipc_sk_create(struct net *net, struct socket *sock,
sock_init_data(sock, sk);
tipc_set_sk_state(sk, TIPC_OPEN);
if (tipc_sk_insert(tsk)) {
+ sk_free(sk);
pr_warn("Socket create failed; port number exhausted\n");
return -EINVAL;
}
diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index da176411c1b5..2ffede463e4a 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -921,6 +921,8 @@ static void tls_update(struct sock *sk, struct proto *p,
{
struct tls_context *ctx;
+ WARN_ON_ONCE(sk->sk_prot == p);
+
ctx = tls_get_ctx(sk);
if (likely(ctx)) {
ctx->sk_write_space = write_space;
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 0513f82b8537..e30649f6dde5 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -267,9 +267,6 @@ static int tls_do_decryption(struct sock *sk,
}
darg->async = false;
- if (ret == -EBADMSG)
- TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSDECRYPTERROR);
-
return ret;
}
@@ -1579,8 +1576,11 @@ static int decrypt_skb_update(struct sock *sk, struct sk_buff *skb,
}
err = decrypt_internal(sk, skb, dest, NULL, darg);
- if (err < 0)
+ if (err < 0) {
+ if (err == -EBADMSG)
+ TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSDECRYPTERROR);
return err;
+ }
if (darg->async)
goto decrypt_next;
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 19ac872a6624..09002387987e 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -538,12 +538,6 @@ static int xsk_generic_xmit(struct sock *sk)
goto out;
}
- skb = xsk_build_skb(xs, &desc);
- if (IS_ERR(skb)) {
- err = PTR_ERR(skb);
- goto out;
- }
-
/* This is the backpressure mechanism for the Tx path.
* Reserve space in the completion queue and only proceed
* if there is space in it. This avoids having to implement
@@ -552,11 +546,19 @@ static int xsk_generic_xmit(struct sock *sk)
spin_lock_irqsave(&xs->pool->cq_lock, flags);
if (xskq_prod_reserve(xs->pool->cq)) {
spin_unlock_irqrestore(&xs->pool->cq_lock, flags);
- kfree_skb(skb);
goto out;
}
spin_unlock_irqrestore(&xs->pool->cq_lock, flags);
+ skb = xsk_build_skb(xs, &desc);
+ if (IS_ERR(skb)) {
+ err = PTR_ERR(skb);
+ spin_lock_irqsave(&xs->pool->cq_lock, flags);
+ xskq_prod_cancel(xs->pool->cq);
+ spin_unlock_irqrestore(&xs->pool->cq_lock, flags);
+ goto out;
+ }
+
err = __dev_direct_xmit(skb, xs->queue_id);
if (err == NETDEV_TX_BUSY) {
/* Tell user-space to retry the send */
diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c
index 87bdd71c7bb6..f70112176b7c 100644
--- a/net/xdp/xsk_buff_pool.c
+++ b/net/xdp/xsk_buff_pool.c
@@ -332,6 +332,7 @@ static void __xp_dma_unmap(struct xsk_dma_map *dma_map, unsigned long attrs)
for (i = 0; i < dma_map->dma_pages_cnt; i++) {
dma = &dma_map->dma_pages[i];
if (*dma) {
+ *dma &= ~XSK_NEXT_PG_CONTIG_MASK;
dma_unmap_page_attrs(dma_map->dev, *dma, PAGE_SIZE,
DMA_BIDIRECTIONAL, attrs);
*dma = 0;