From 746a508861f77efe43c9c30158c063ece6f96ad8 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Mon, 29 Nov 2021 10:39:20 -0500 Subject: wireguard: allowedips: add missing __rcu annotation to satisfy sparse A __rcu annotation got lost during refactoring, which caused sparse to become enraged. Fixes: aa6fe5251561 ("wireguard: allowedips: free empty intermediate nodes when removing single node") Signed-off-by: Jason A. Donenfeld Signed-off-by: Jakub Kicinski --- drivers/net/wireguard/allowedips.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireguard/allowedips.c b/drivers/net/wireguard/allowedips.c index b7197e8..9a4c8ff 100644 --- a/drivers/net/wireguard/allowedips.c +++ b/drivers/net/wireguard/allowedips.c @@ -163,7 +163,7 @@ static bool node_placement(struct allowedips_node __rcu *trie, const u8 *key, return exact; } -static inline void connect_node(struct allowedips_node **parent, u8 bit, struct allowedips_node *node) +static inline void connect_node(struct allowedips_node __rcu **parent, u8 bit, struct allowedips_node *node) { node->parent_bit_packed = (unsigned long)parent | bit; rcu_assign_pointer(*parent, node); -- cgit v1.2.3 From e05c003da143cb7f567a46406ed41c4295027544 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 29 Nov 2021 10:39:23 -0500 Subject: wireguard: main: rename 'mod_init' & 'mod_exit' functions to be module-specific Rename module_init & module_exit functions that are named "mod_init" and "mod_exit" so that they are unique in both the System.map file and in initcall_debug output instead of showing up as almost anonymous "mod_init". This is helpful for debugging and in determining how long certain module_init calls take to execute. Signed-off-by: Randy Dunlap Signed-off-by: Jason A. Donenfeld Signed-off-by: Jakub Kicinski --- drivers/net/wireguard/main.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireguard/main.c b/drivers/net/wireguard/main.c index 75dbe77..ee4da9a 100644 --- a/drivers/net/wireguard/main.c +++ b/drivers/net/wireguard/main.c @@ -17,7 +17,7 @@ #include #include -static int __init mod_init(void) +static int __init wg_mod_init(void) { int ret; @@ -60,7 +60,7 @@ err_allowedips: return ret; } -static void __exit mod_exit(void) +static void __exit wg_mod_exit(void) { wg_genetlink_uninit(); wg_device_uninit(); @@ -68,8 +68,8 @@ static void __exit mod_exit(void) wg_allowedips_slab_uninit(); } -module_init(mod_init); -module_exit(mod_exit); +module_init(wg_mod_init); +module_exit(wg_mod_exit); MODULE_LICENSE("GPL v2"); MODULE_DESCRIPTION("WireGuard secure network tunnel"); MODULE_AUTHOR("Jason A. Donenfeld "); -- cgit v1.2.3 From 7ba34d3e96f64a6ad7f164e9b930abf6e45874d4 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Mon, 29 Nov 2021 10:39:25 -0500 Subject: wireguard: device: reset peer src endpoint when netns exits MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Each peer's endpoint contains a dst_cache entry that takes a reference to another netdev. When the containing namespace exits, we take down the socket and prevent future sockets from being created (by setting creating_net to NULL), which removes that potential reference on the netns. However, it doesn't release references to the netns that a netdev cached in dst_cache might be taking, so the netns still might fail to exit. Since the socket is gimped anyway, we can simply clear all the dst_caches (by way of clearing the endpoint src), which will release all references. However, the current dst_cache_reset function only releases those references lazily. But it turns out that all of our usages of wg_socket_clear_peer_endpoint_src are called from contexts that are not exactly high-speed or bottle-necked. For example, when there's connection difficulty, or when userspace is reconfiguring the interface. And in particular for this patch, when the netns is exiting. So for those cases, it makes more sense to call dst_release immediately. For that, we add a small helper function to dst_cache. This patch also adds a test to netns.sh from Hangbin Liu to ensure this doesn't regress. Tested-by: Hangbin Liu Reported-by: Xiumei Mu Cc: Toke Høiland-Jørgensen Cc: Paolo Abeni Fixes: be0d977e56c7 ("wireguard: device: avoid circular netns references") Signed-off-by: Jason A. Donenfeld Signed-off-by: Jakub Kicinski --- drivers/net/wireguard/device.c | 3 +++ drivers/net/wireguard/socket.c | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireguard/device.c b/drivers/net/wireguard/device.c index 551ddaa..77e64ea 100644 --- a/drivers/net/wireguard/device.c +++ b/drivers/net/wireguard/device.c @@ -398,6 +398,7 @@ static struct rtnl_link_ops link_ops __read_mostly = { static void wg_netns_pre_exit(struct net *net) { struct wg_device *wg; + struct wg_peer *peer; rtnl_lock(); list_for_each_entry(wg, &device_list, device_list) { @@ -407,6 +408,8 @@ static void wg_netns_pre_exit(struct net *net) mutex_lock(&wg->device_update_lock); rcu_assign_pointer(wg->creating_net, NULL); wg_socket_reinit(wg, NULL, NULL); + list_for_each_entry(peer, &wg->peer_list, peer_list) + wg_socket_clear_peer_endpoint_src(peer); mutex_unlock(&wg->device_update_lock); } } diff --git a/drivers/net/wireguard/socket.c b/drivers/net/wireguard/socket.c index 8c496b7..6f07b94 100644 --- a/drivers/net/wireguard/socket.c +++ b/drivers/net/wireguard/socket.c @@ -308,7 +308,7 @@ void wg_socket_clear_peer_endpoint_src(struct wg_peer *peer) { write_lock_bh(&peer->endpoint_lock); memset(&peer->endpoint.src6, 0, sizeof(peer->endpoint.src6)); - dst_cache_reset(&peer->endpoint_cache); + dst_cache_reset_now(&peer->endpoint_cache); write_unlock_bh(&peer->endpoint_lock); } -- cgit v1.2.3 From 0188f76c8d48dd77fad54172b9459c2e21020c7b Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Mon, 29 Nov 2021 10:39:26 -0500 Subject: wireguard: receive: use ring buffer for incoming handshakes Apparently the spinlock on incoming_handshake's skb_queue is highly contended, and a torrent of handshake or cookie packets can bring the data plane to its knees, simply by virtue of enqueueing the handshake packets to be processed asynchronously. So, we try switching this to a ring buffer to hopefully have less lock contention. This alleviates the problem somewhat, though it still isn't perfect, so future patches will have to improve this further. However, it at least doesn't completely diminish the data plane. Reported-by: Streun Fabio Reported-by: Joel Wanner Fixes: a8f1bc7bdea3 ("net: WireGuard secure network tunnel") Signed-off-by: Jason A. Donenfeld Signed-off-by: Jakub Kicinski --- drivers/net/wireguard/device.c | 36 ++++++++++++++++++------------------ drivers/net/wireguard/device.h | 9 +++------ drivers/net/wireguard/queueing.c | 6 +++--- drivers/net/wireguard/queueing.h | 2 +- drivers/net/wireguard/receive.c | 27 ++++++++++++--------------- 5 files changed, 37 insertions(+), 43 deletions(-) diff --git a/drivers/net/wireguard/device.c b/drivers/net/wireguard/device.c index 77e64ea..a46067c 100644 --- a/drivers/net/wireguard/device.c +++ b/drivers/net/wireguard/device.c @@ -98,6 +98,7 @@ static int wg_stop(struct net_device *dev) { struct wg_device *wg = netdev_priv(dev); struct wg_peer *peer; + struct sk_buff *skb; mutex_lock(&wg->device_update_lock); list_for_each_entry(peer, &wg->peer_list, peer_list) { @@ -108,7 +109,9 @@ static int wg_stop(struct net_device *dev) wg_noise_reset_last_sent_handshake(&peer->last_sent_handshake); } mutex_unlock(&wg->device_update_lock); - skb_queue_purge(&wg->incoming_handshakes); + while ((skb = ptr_ring_consume(&wg->handshake_queue.ring)) != NULL) + kfree_skb(skb); + atomic_set(&wg->handshake_queue_len, 0); wg_socket_reinit(wg, NULL, NULL); return 0; } @@ -235,14 +238,13 @@ static void wg_destruct(struct net_device *dev) destroy_workqueue(wg->handshake_receive_wq); destroy_workqueue(wg->handshake_send_wq); destroy_workqueue(wg->packet_crypt_wq); - wg_packet_queue_free(&wg->decrypt_queue); - wg_packet_queue_free(&wg->encrypt_queue); + wg_packet_queue_free(&wg->handshake_queue, true); + wg_packet_queue_free(&wg->decrypt_queue, false); + wg_packet_queue_free(&wg->encrypt_queue, false); rcu_barrier(); /* Wait for all the peers to be actually freed. */ wg_ratelimiter_uninit(); memzero_explicit(&wg->static_identity, sizeof(wg->static_identity)); - skb_queue_purge(&wg->incoming_handshakes); free_percpu(dev->tstats); - free_percpu(wg->incoming_handshakes_worker); kvfree(wg->index_hashtable); kvfree(wg->peer_hashtable); mutex_unlock(&wg->device_update_lock); @@ -298,7 +300,6 @@ static int wg_newlink(struct net *src_net, struct net_device *dev, init_rwsem(&wg->static_identity.lock); mutex_init(&wg->socket_update_lock); mutex_init(&wg->device_update_lock); - skb_queue_head_init(&wg->incoming_handshakes); wg_allowedips_init(&wg->peer_allowedips); wg_cookie_checker_init(&wg->cookie_checker, wg); INIT_LIST_HEAD(&wg->peer_list); @@ -316,16 +317,10 @@ static int wg_newlink(struct net *src_net, struct net_device *dev, if (!dev->tstats) goto err_free_index_hashtable; - wg->incoming_handshakes_worker = - wg_packet_percpu_multicore_worker_alloc( - wg_packet_handshake_receive_worker, wg); - if (!wg->incoming_handshakes_worker) - goto err_free_tstats; - wg->handshake_receive_wq = alloc_workqueue("wg-kex-%s", WQ_CPU_INTENSIVE | WQ_FREEZABLE, 0, dev->name); if (!wg->handshake_receive_wq) - goto err_free_incoming_handshakes; + goto err_free_tstats; wg->handshake_send_wq = alloc_workqueue("wg-kex-%s", WQ_UNBOUND | WQ_FREEZABLE, 0, dev->name); @@ -347,10 +342,15 @@ static int wg_newlink(struct net *src_net, struct net_device *dev, if (ret < 0) goto err_free_encrypt_queue; - ret = wg_ratelimiter_init(); + ret = wg_packet_queue_init(&wg->handshake_queue, wg_packet_handshake_receive_worker, + MAX_QUEUED_INCOMING_HANDSHAKES); if (ret < 0) goto err_free_decrypt_queue; + ret = wg_ratelimiter_init(); + if (ret < 0) + goto err_free_handshake_queue; + ret = register_netdevice(dev); if (ret < 0) goto err_uninit_ratelimiter; @@ -367,18 +367,18 @@ static int wg_newlink(struct net *src_net, struct net_device *dev, err_uninit_ratelimiter: wg_ratelimiter_uninit(); +err_free_handshake_queue: + wg_packet_queue_free(&wg->handshake_queue, false); err_free_decrypt_queue: - wg_packet_queue_free(&wg->decrypt_queue); + wg_packet_queue_free(&wg->decrypt_queue, false); err_free_encrypt_queue: - wg_packet_queue_free(&wg->encrypt_queue); + wg_packet_queue_free(&wg->encrypt_queue, false); err_destroy_packet_crypt: destroy_workqueue(wg->packet_crypt_wq); err_destroy_handshake_send: destroy_workqueue(wg->handshake_send_wq); err_destroy_handshake_receive: destroy_workqueue(wg->handshake_receive_wq); -err_free_incoming_handshakes: - free_percpu(wg->incoming_handshakes_worker); err_free_tstats: free_percpu(dev->tstats); err_free_index_hashtable: diff --git a/drivers/net/wireguard/device.h b/drivers/net/wireguard/device.h index 854bc3d..43c7ceb 100644 --- a/drivers/net/wireguard/device.h +++ b/drivers/net/wireguard/device.h @@ -39,21 +39,18 @@ struct prev_queue { struct wg_device { struct net_device *dev; - struct crypt_queue encrypt_queue, decrypt_queue; + struct crypt_queue encrypt_queue, decrypt_queue, handshake_queue; struct sock __rcu *sock4, *sock6; struct net __rcu *creating_net; struct noise_static_identity static_identity; - struct workqueue_struct *handshake_receive_wq, *handshake_send_wq; - struct workqueue_struct *packet_crypt_wq; - struct sk_buff_head incoming_handshakes; - int incoming_handshake_cpu; - struct multicore_worker __percpu *incoming_handshakes_worker; + struct workqueue_struct *packet_crypt_wq,*handshake_receive_wq, *handshake_send_wq; struct cookie_checker cookie_checker; struct pubkey_hashtable *peer_hashtable; struct index_hashtable *index_hashtable; struct allowedips peer_allowedips; struct mutex device_update_lock, socket_update_lock; struct list_head device_list, peer_list; + atomic_t handshake_queue_len; unsigned int num_peers, device_update_gen; u32 fwmark; u16 incoming_port; diff --git a/drivers/net/wireguard/queueing.c b/drivers/net/wireguard/queueing.c index 48e7b98..1de413b 100644 --- a/drivers/net/wireguard/queueing.c +++ b/drivers/net/wireguard/queueing.c @@ -38,11 +38,11 @@ int wg_packet_queue_init(struct crypt_queue *queue, work_func_t function, return 0; } -void wg_packet_queue_free(struct crypt_queue *queue) +void wg_packet_queue_free(struct crypt_queue *queue, bool purge) { free_percpu(queue->worker); - WARN_ON(!__ptr_ring_empty(&queue->ring)); - ptr_ring_cleanup(&queue->ring, NULL); + WARN_ON(!purge && !__ptr_ring_empty(&queue->ring)); + ptr_ring_cleanup(&queue->ring, purge ? (void(*)(void*))kfree_skb : NULL); } #define NEXT(skb) ((skb)->prev) diff --git a/drivers/net/wireguard/queueing.h b/drivers/net/wireguard/queueing.h index 4ef2944..e238810 100644 --- a/drivers/net/wireguard/queueing.h +++ b/drivers/net/wireguard/queueing.h @@ -23,7 +23,7 @@ struct sk_buff; /* queueing.c APIs: */ int wg_packet_queue_init(struct crypt_queue *queue, work_func_t function, unsigned int len); -void wg_packet_queue_free(struct crypt_queue *queue); +void wg_packet_queue_free(struct crypt_queue *queue, bool purge); struct multicore_worker __percpu * wg_packet_percpu_multicore_worker_alloc(work_func_t function, void *ptr); diff --git a/drivers/net/wireguard/receive.c b/drivers/net/wireguard/receive.c index 7dc84bc..f4e537e 100644 --- a/drivers/net/wireguard/receive.c +++ b/drivers/net/wireguard/receive.c @@ -116,8 +116,8 @@ static void wg_receive_handshake_packet(struct wg_device *wg, return; } - under_load = skb_queue_len(&wg->incoming_handshakes) >= - MAX_QUEUED_INCOMING_HANDSHAKES / 8; + under_load = atomic_read(&wg->handshake_queue_len) >= + MAX_QUEUED_INCOMING_HANDSHAKES / 8; if (under_load) { last_under_load = ktime_get_coarse_boottime_ns(); } else if (last_under_load) { @@ -212,13 +212,14 @@ static void wg_receive_handshake_packet(struct wg_device *wg, void wg_packet_handshake_receive_worker(struct work_struct *work) { - struct wg_device *wg = container_of(work, struct multicore_worker, - work)->ptr; + struct crypt_queue *queue = container_of(work, struct multicore_worker, work)->ptr; + struct wg_device *wg = container_of(queue, struct wg_device, handshake_queue); struct sk_buff *skb; - while ((skb = skb_dequeue(&wg->incoming_handshakes)) != NULL) { + while ((skb = ptr_ring_consume_bh(&queue->ring)) != NULL) { wg_receive_handshake_packet(wg, skb); dev_kfree_skb(skb); + atomic_dec(&wg->handshake_queue_len); cond_resched(); } } @@ -554,21 +555,17 @@ void wg_packet_receive(struct wg_device *wg, struct sk_buff *skb) case cpu_to_le32(MESSAGE_HANDSHAKE_RESPONSE): case cpu_to_le32(MESSAGE_HANDSHAKE_COOKIE): { int cpu; - - if (skb_queue_len(&wg->incoming_handshakes) > - MAX_QUEUED_INCOMING_HANDSHAKES || - unlikely(!rng_is_initialized())) { + if (unlikely(!rng_is_initialized() || + ptr_ring_produce_bh(&wg->handshake_queue.ring, skb))) { net_dbg_skb_ratelimited("%s: Dropping handshake packet from %pISpfsc\n", wg->dev->name, skb); goto err; } - skb_queue_tail(&wg->incoming_handshakes, skb); - /* Queues up a call to packet_process_queued_handshake_ - * packets(skb): - */ - cpu = wg_cpumask_next_online(&wg->incoming_handshake_cpu); + atomic_inc(&wg->handshake_queue_len); + cpu = wg_cpumask_next_online(&wg->handshake_queue.last_cpu); + /* Queues up a call to packet_process_queued_handshake_packets(skb): */ queue_work_on(cpu, wg->handshake_receive_wq, - &per_cpu_ptr(wg->incoming_handshakes_worker, cpu)->work); + &per_cpu_ptr(wg->handshake_queue.worker, cpu)->work); break; } case cpu_to_le32(MESSAGE_DATA): -- cgit v1.2.3 From a0b44acb448d7a4805f062fe4f776142305f89aa Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Mon, 29 Nov 2021 10:39:27 -0500 Subject: wireguard: receive: drop handshakes if queue lock is contended If we're being delivered packets from multiple CPUs so quickly that the ring lock is contended for CPU tries, then it's safe to assume that the queue is near capacity anyway, so just drop the packet rather than spinning. This helps deal with multicore DoS that can interfere with data path performance. It _still_ does not completely fix the issue, but it again chips away at it. Reported-by: Streun Fabio Fixes: a8f1bc7bdea3 ("net: WireGuard secure network tunnel") Signed-off-by: Jason A. Donenfeld Signed-off-by: Jakub Kicinski --- drivers/net/wireguard/receive.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireguard/receive.c b/drivers/net/wireguard/receive.c index f4e537e..7b8df40 100644 --- a/drivers/net/wireguard/receive.c +++ b/drivers/net/wireguard/receive.c @@ -554,9 +554,19 @@ void wg_packet_receive(struct wg_device *wg, struct sk_buff *skb) case cpu_to_le32(MESSAGE_HANDSHAKE_INITIATION): case cpu_to_le32(MESSAGE_HANDSHAKE_RESPONSE): case cpu_to_le32(MESSAGE_HANDSHAKE_COOKIE): { - int cpu; - if (unlikely(!rng_is_initialized() || - ptr_ring_produce_bh(&wg->handshake_queue.ring, skb))) { + int cpu, ret = -EBUSY; + + if (unlikely(!rng_is_initialized())) + goto drop; + if (atomic_read(&wg->handshake_queue_len) > MAX_QUEUED_INCOMING_HANDSHAKES / 2) { + if (spin_trylock_bh(&wg->handshake_queue.ring.producer_lock)) { + ret = __ptr_ring_produce(&wg->handshake_queue.ring, skb); + spin_unlock_bh(&wg->handshake_queue.ring.producer_lock); + } + } else + ret = ptr_ring_produce_bh(&wg->handshake_queue.ring, skb); + if (ret) { + drop: net_dbg_skb_ratelimited("%s: Dropping handshake packet from %pISpfsc\n", wg->dev->name, skb); goto err; -- cgit v1.2.3 From 0065ed65b309439c1ea2d70c717c73672550ae6a Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 29 Nov 2021 10:39:28 -0500 Subject: wireguard: ratelimiter: use kvcalloc() instead of kvzalloc() Use 2-factor argument form kvcalloc() instead of kvzalloc(). Link: https://github.com/KSPP/linux/issues/162 Fixes: a8f1bc7bdea3 ("net: WireGuard secure network tunnel") Signed-off-by: Gustavo A. R. Silva [Jason: Gustavo's link above is for KSPP, but this isn't actually a security fix, as table_size is bounded to 8192 anyway, and gcc realizes this, so the codegen comes out to be about the same.] Signed-off-by: Jason A. Donenfeld Signed-off-by: Jakub Kicinski --- drivers/net/wireguard/ratelimiter.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireguard/ratelimiter.c b/drivers/net/wireguard/ratelimiter.c index 3fedd1d..dd55e5c 100644 --- a/drivers/net/wireguard/ratelimiter.c +++ b/drivers/net/wireguard/ratelimiter.c @@ -176,12 +176,12 @@ int wg_ratelimiter_init(void) (1U << 14) / sizeof(struct hlist_head))); max_entries = table_size * 8; - table_v4 = kvzalloc(table_size * sizeof(*table_v4), GFP_KERNEL); + table_v4 = kvcalloc(table_size, sizeof(*table_v4), GFP_KERNEL); if (unlikely(!table_v4)) goto err_kmemcache; #if IS_ENABLED(CONFIG_IPV6) - table_v6 = kvzalloc(table_size * sizeof(*table_v6), GFP_KERNEL); + table_v6 = kvcalloc(table_size, sizeof(*table_v6), GFP_KERNEL); if (unlikely(!table_v6)) { kvfree(table_v4); goto err_kmemcache; -- cgit v1.2.3