diff options
Diffstat (limited to 'net/ipv4')
| -rw-r--r-- | net/ipv4/Kconfig | 2 | ||||
| -rw-r--r-- | net/ipv4/af_inet.c | 6 | ||||
| -rw-r--r-- | net/ipv4/esp4.c | 9 | ||||
| -rw-r--r-- | net/ipv4/icmp.c | 4 | ||||
| -rw-r--r-- | net/ipv4/inet_connection_sock.c | 20 | ||||
| -rw-r--r-- | net/ipv4/inet_hashtables.c | 8 | ||||
| -rw-r--r-- | net/ipv4/ip_gre.c | 3 | ||||
| -rw-r--r-- | net/ipv4/ip_tunnel_core.c | 15 | ||||
| -rw-r--r-- | net/ipv4/nexthop.c | 14 | ||||
| -rw-r--r-- | net/ipv4/syncookies.c | 11 | ||||
| -rw-r--r-- | net/ipv4/sysctl_net_ipv4.c | 5 | ||||
| -rw-r--r-- | net/ipv4/tcp.c | 7 | ||||
| -rw-r--r-- | net/ipv4/tcp_ao.c | 3 | ||||
| -rw-r--r-- | net/ipv4/tcp_bpf.c | 2 | ||||
| -rw-r--r-- | net/ipv4/tcp_diag.c | 2 | ||||
| -rw-r--r-- | net/ipv4/tcp_input.c | 38 | ||||
| -rw-r--r-- | net/ipv4/tcp_ipv4.c | 40 | ||||
| -rw-r--r-- | net/ipv4/tcp_minisocks.c | 2 | ||||
| -rw-r--r-- | net/ipv4/udp.c | 29 | ||||
| -rw-r--r-- | net/ipv4/udp_bpf.c | 2 |
20 files changed, 124 insertions, 98 deletions
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index b71c22475c51..df922f9f5289 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -748,6 +748,7 @@ config TCP_SIGPOOL config TCP_AO bool "TCP: Authentication Option (RFC5925)" select CRYPTO + select CRYPTO_LIB_UTILS select TCP_SIGPOOL depends on 64BIT && IPV6 != m # seq-number extension needs WRITE_ONCE(u64) help @@ -761,6 +762,7 @@ config TCP_AO config TCP_MD5SIG bool "TCP: MD5 Signature Option support (RFC2385)" select CRYPTO_LIB_MD5 + select CRYPTO_LIB_UTILS help RFC2385 specifies a method of giving MD5 protection to TCP sessions. Its main (only?) use is to protect BGP sessions between core routers diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 8036e76aa1e4..c7731e300a44 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -124,6 +124,12 @@ #include <trace/events/sock.h> +/* Keep the definition of IPv6 disable here for now, to avoid annoying linker + * issues in case IPv6=m + */ +int disable_ipv6_mod; +EXPORT_SYMBOL(disable_ipv6_mod); + /* The inetsw table contains everything that inet_create needs to * build a new socket. */ diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 2c922afadb8f..6dfc0bcdef65 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -235,10 +235,13 @@ static void esp_output_done(void *data, int err) xfrm_dev_resume(skb); } else { if (!err && - x->encap && x->encap->encap_type == TCP_ENCAP_ESPINTCP) - esp_output_tail_tcp(x, skb); - else + x->encap && x->encap->encap_type == TCP_ENCAP_ESPINTCP) { + err = esp_output_tail_tcp(x, skb); + if (err != -EINPROGRESS) + kfree_skb(skb); + } else { xfrm_output_resume(skb_to_full_sk(skb), skb, err); + } } } diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index a62b4c4033cc..568bd1e95d44 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -1079,10 +1079,12 @@ out: static bool icmp_tag_validation(int proto) { + const struct net_protocol *ipprot; bool ok; rcu_read_lock(); - ok = rcu_dereference(inet_protos[proto])->icmp_strict_tag_validation; + ipprot = rcu_dereference(inet_protos[proto]); + ok = ipprot ? ipprot->icmp_strict_tag_validation : false; rcu_read_unlock(); return ok; } diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 5dfac6ce1110..e961936b6be7 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -154,20 +154,6 @@ bool inet_sk_get_local_port_range(const struct sock *sk, int *low, int *high) } EXPORT_SYMBOL(inet_sk_get_local_port_range); -static bool inet_use_bhash2_on_bind(const struct sock *sk) -{ -#if IS_ENABLED(CONFIG_IPV6) - if (sk->sk_family == AF_INET6) { - if (ipv6_addr_any(&sk->sk_v6_rcv_saddr)) - return false; - - if (!ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr)) - return true; - } -#endif - return sk->sk_rcv_saddr != htonl(INADDR_ANY); -} - static bool inet_bind_conflict(const struct sock *sk, struct sock *sk2, kuid_t uid, bool relax, bool reuseport_cb_ok, bool reuseport_ok) @@ -259,7 +245,7 @@ static int inet_csk_bind_conflict(const struct sock *sk, * checks separately because their spinlocks have to be acquired/released * independently of each other, to prevent possible deadlocks */ - if (inet_use_bhash2_on_bind(sk)) + if (inet_use_hash2_on_bind(sk)) return tb2 && inet_bhash2_conflict(sk, tb2, uid, relax, reuseport_cb_ok, reuseport_ok); @@ -376,7 +362,7 @@ other_parity_scan: head = &hinfo->bhash[inet_bhashfn(net, port, hinfo->bhash_size)]; spin_lock_bh(&head->lock); - if (inet_use_bhash2_on_bind(sk)) { + if (inet_use_hash2_on_bind(sk)) { if (inet_bhash2_addr_any_conflict(sk, port, l3mdev, relax, false)) goto next_port; } @@ -562,7 +548,7 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum) check_bind_conflict = false; } - if (check_bind_conflict && inet_use_bhash2_on_bind(sk)) { + if (check_bind_conflict && inet_use_hash2_on_bind(sk)) { if (inet_bhash2_addr_any_conflict(sk, port, l3mdev, true, true)) goto fail_unlock; } diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index fca980772c81..9bfccc283fa6 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -200,7 +200,7 @@ static bool inet_bind2_bucket_addr_match(const struct inet_bind2_bucket *tb2, void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb, struct inet_bind2_bucket *tb2, unsigned short port) { - inet_sk(sk)->inet_num = port; + WRITE_ONCE(inet_sk(sk)->inet_num, port); inet_csk(sk)->icsk_bind_hash = tb; inet_csk(sk)->icsk_bind2_hash = tb2; sk_add_bind_node(sk, &tb2->owners); @@ -224,7 +224,7 @@ static void __inet_put_port(struct sock *sk) spin_lock(&head->lock); tb = inet_csk(sk)->icsk_bind_hash; inet_csk(sk)->icsk_bind_hash = NULL; - inet_sk(sk)->inet_num = 0; + WRITE_ONCE(inet_sk(sk)->inet_num, 0); sk->sk_userlocks &= ~SOCK_CONNECT_BIND; spin_lock(&head2->lock); @@ -352,7 +352,7 @@ static inline int compute_score(struct sock *sk, const struct net *net, { int score = -1; - if (net_eq(sock_net(sk), net) && sk->sk_num == hnum && + if (net_eq(sock_net(sk), net) && READ_ONCE(sk->sk_num) == hnum && !ipv6_only_sock(sk)) { if (sk->sk_rcv_saddr != daddr) return -1; @@ -1206,7 +1206,7 @@ error: sk->sk_hash = 0; inet_sk(sk)->inet_sport = 0; - inet_sk(sk)->inet_num = 0; + WRITE_ONCE(inet_sk(sk)->inet_num, 0); if (tw) inet_twsk_bind_unhash(tw, hinfo); diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index e13244729ad8..35f0baa99d40 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -919,7 +919,8 @@ static int ipgre_header(struct sk_buff *skb, struct net_device *dev, return -(t->hlen + sizeof(*iph)); } -static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr) +static int ipgre_header_parse(const struct sk_buff *skb, const struct net_device *dev, + unsigned char *haddr) { const struct iphdr *iph = (const struct iphdr *) skb_mac_header(skb); memcpy(haddr, &iph->saddr, 4); diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index 2e61ac137128..5683c328990f 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -58,6 +58,19 @@ void iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb, struct iphdr *iph; int err; + if (unlikely(dev_recursion_level() > IP_TUNNEL_RECURSION_LIMIT)) { + if (dev) { + net_crit_ratelimited("Dead loop on virtual device %s, fix it urgently!\n", + dev->name); + DEV_STATS_INC(dev, tx_errors); + } + ip_rt_put(rt); + kfree_skb(skb); + return; + } + + dev_xmit_recursion_inc(); + skb_scrub_packet(skb, xnet); skb_clear_hash_if_not_l4(skb); @@ -88,6 +101,8 @@ void iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb, pkt_len = 0; iptunnel_xmit_stats(dev, pkt_len); } + + dev_xmit_recursion_dec(); } EXPORT_SYMBOL_GPL(iptunnel_xmit); diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index 1aa2b05ee8de..c942f1282236 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -2002,7 +2002,8 @@ static void nh_hthr_group_rebalance(struct nh_group *nhg) } static void remove_nh_grp_entry(struct net *net, struct nh_grp_entry *nhge, - struct nl_info *nlinfo) + struct nl_info *nlinfo, + struct list_head *deferred_free) { struct nh_grp_entry *nhges, *new_nhges; struct nexthop *nhp = nhge->nh_parent; @@ -2062,8 +2063,8 @@ static void remove_nh_grp_entry(struct net *net, struct nh_grp_entry *nhge, rcu_assign_pointer(nhp->nh_grp, newg); list_del(&nhge->nh_list); - free_percpu(nhge->stats); nexthop_put(nhge->nh); + list_add(&nhge->nh_list, deferred_free); /* Removal of a NH from a resilient group is notified through * bucket notifications. @@ -2083,6 +2084,7 @@ static void remove_nexthop_from_groups(struct net *net, struct nexthop *nh, struct nl_info *nlinfo) { struct nh_grp_entry *nhge, *tmp; + LIST_HEAD(deferred_free); /* If there is nothing to do, let's avoid the costly call to * synchronize_net() @@ -2091,10 +2093,16 @@ static void remove_nexthop_from_groups(struct net *net, struct nexthop *nh, return; list_for_each_entry_safe(nhge, tmp, &nh->grp_list, nh_list) - remove_nh_grp_entry(net, nhge, nlinfo); + remove_nh_grp_entry(net, nhge, nlinfo, &deferred_free); /* make sure all see the newly published array before releasing rtnl */ synchronize_net(); + + /* Now safe to free percpu stats — all RCU readers have finished */ + list_for_each_entry_safe(nhge, tmp, &deferred_free, nh_list) { + list_del(&nhge->nh_list); + free_percpu(nhge->stats); + } } static void remove_nexthop_group(struct nexthop *nh, struct nl_info *nlinfo) diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 061751aabc8e..fc3affd9c801 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -378,9 +378,14 @@ static struct request_sock *cookie_tcp_check(struct net *net, struct sock *sk, tcp_parse_options(net, skb, &tcp_opt, 0, NULL); if (tcp_opt.saw_tstamp && tcp_opt.rcv_tsecr) { - tsoff = secure_tcp_ts_off(net, - ip_hdr(skb)->daddr, - ip_hdr(skb)->saddr); + union tcp_seq_and_ts_off st; + + st = secure_tcp_seq_and_ts_off(net, + ip_hdr(skb)->daddr, + ip_hdr(skb)->saddr, + tcp_hdr(skb)->dest, + tcp_hdr(skb)->source); + tsoff = st.ts_off; tcp_opt.rcv_tsecr -= tsoff; } diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 643763bc2142..5654cc9c8a0b 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -486,7 +486,8 @@ static void proc_fib_multipath_hash_set_seed(struct net *net, u32 user_seed) proc_fib_multipath_hash_rand_seed), }; - WRITE_ONCE(net->ipv4.sysctl_fib_multipath_hash_seed, new); + WRITE_ONCE(net->ipv4.sysctl_fib_multipath_hash_seed.user_seed, new.user_seed); + WRITE_ONCE(net->ipv4.sysctl_fib_multipath_hash_seed.mp_seed, new.mp_seed); } static int proc_fib_multipath_hash_seed(const struct ctl_table *table, int write, @@ -500,7 +501,7 @@ static int proc_fib_multipath_hash_seed(const struct ctl_table *table, int write int ret; mphs = &net->ipv4.sysctl_fib_multipath_hash_seed; - user_seed = mphs->user_seed; + user_seed = READ_ONCE(mphs->user_seed); tmp = *table; tmp.data = &user_seed; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index f84d9a45cc9d..202a4e57a218 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -244,6 +244,7 @@ #define pr_fmt(fmt) "TCP: " fmt #include <crypto/md5.h> +#include <crypto/utils.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/types.h> @@ -1446,7 +1447,7 @@ out_err: err = sk_stream_error(sk, flags, err); /* make sure we wake any epoll edge trigger waiter */ if (unlikely(tcp_rtx_and_write_queues_empty(sk) && err == -EAGAIN)) { - sk->sk_write_space(sk); + READ_ONCE(sk->sk_write_space)(sk); tcp_chrono_stop(sk, TCP_CHRONO_SNDBUF_LIMITED); } if (binding) @@ -4181,7 +4182,7 @@ ao_parse: break; case TCP_NOTSENT_LOWAT: WRITE_ONCE(tp->notsent_lowat, val); - sk->sk_write_space(sk); + READ_ONCE(sk->sk_write_space)(sk); break; case TCP_INQ: if (val > 1 || val < 0) @@ -4970,7 +4971,7 @@ tcp_inbound_md5_hash(const struct sock *sk, const struct sk_buff *skb, tcp_v4_md5_hash_skb(newhash, key, NULL, skb); else tp->af_specific->calc_md5_hash(newhash, key, NULL, skb); - if (memcmp(hash_location, newhash, 16) != 0) { + if (crypto_memneq(hash_location, newhash, 16)) { NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE); trace_tcp_hash_md5_mismatch(sk, skb); return SKB_DROP_REASON_TCP_MD5FAILURE; diff --git a/net/ipv4/tcp_ao.c b/net/ipv4/tcp_ao.c index 4980caddb0fc..a97cdf3e6af4 100644 --- a/net/ipv4/tcp_ao.c +++ b/net/ipv4/tcp_ao.c @@ -10,6 +10,7 @@ #define pr_fmt(fmt) "TCP: " fmt #include <crypto/hash.h> +#include <crypto/utils.h> #include <linux/inetdevice.h> #include <linux/tcp.h> @@ -922,7 +923,7 @@ tcp_ao_verify_hash(const struct sock *sk, const struct sk_buff *skb, /* XXX: make it per-AF callback? */ tcp_ao_hash_skb(family, hash_buf, key, sk, skb, traffic_key, (phash - (u8 *)th), sne); - if (memcmp(phash, hash_buf, maclen)) { + if (crypto_memneq(phash, hash_buf, maclen)) { NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPAOBAD); atomic64_inc(&info->counters.pkt_bad); atomic64_inc(&key->pkt_bad); diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c index c449a044895e..813d2e498c93 100644 --- a/net/ipv4/tcp_bpf.c +++ b/net/ipv4/tcp_bpf.c @@ -725,7 +725,7 @@ int tcp_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore) WRITE_ONCE(sk->sk_prot->unhash, psock->saved_unhash); tcp_update_ulp(sk, psock->sk_proto, psock->saved_write_space); } else { - sk->sk_write_space = psock->saved_write_space; + WRITE_ONCE(sk->sk_write_space, psock->saved_write_space); /* Pairs with lockless read in sk_clone_lock() */ sock_replace_proto(sk, psock->sk_proto); } diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c index d83efd91f461..7935702e394b 100644 --- a/net/ipv4/tcp_diag.c +++ b/net/ipv4/tcp_diag.c @@ -509,7 +509,7 @@ next_chunk: if (r->sdiag_family != AF_UNSPEC && sk->sk_family != r->sdiag_family) goto next_normal; - if (r->id.idiag_sport != htons(sk->sk_num) && + if (r->id.idiag_sport != htons(READ_ONCE(sk->sk_num)) && r->id.idiag_sport) goto next_normal; if (r->id.idiag_dport != sk->sk_dport && diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 41c57efd125c..cba89733d121 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -5374,25 +5374,11 @@ static void tcp_ofo_queue(struct sock *sk) static bool tcp_prune_ofo_queue(struct sock *sk, const struct sk_buff *in_skb); static int tcp_prune_queue(struct sock *sk, const struct sk_buff *in_skb); -/* Check if this incoming skb can be added to socket receive queues - * while satisfying sk->sk_rcvbuf limit. - * - * In theory we should use skb->truesize, but this can cause problems - * when applications use too small SO_RCVBUF values. - * When LRO / hw gro is used, the socket might have a high tp->scaling_ratio, - * allowing RWIN to be close to available space. - * Whenever the receive queue gets full, we can receive a small packet - * filling RWIN, but with a high skb->truesize, because most NIC use 4K page - * plus sk_buff metadata even when receiving less than 1500 bytes of payload. - * - * Note that we use skb->len to decide to accept or drop this packet, - * but sk->sk_rmem_alloc is the sum of all skb->truesize. - */ static bool tcp_can_ingest(const struct sock *sk, const struct sk_buff *skb) { unsigned int rmem = atomic_read(&sk->sk_rmem_alloc); - return rmem + skb->len <= sk->sk_rcvbuf; + return rmem <= sk->sk_rcvbuf; } static int tcp_try_rmem_schedule(struct sock *sk, const struct sk_buff *skb, @@ -5425,7 +5411,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) if (unlikely(tcp_try_rmem_schedule(sk, skb, skb->truesize))) { NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFODROP); - sk->sk_data_ready(sk); + READ_ONCE(sk->sk_data_ready)(sk); tcp_drop_reason(sk, skb, SKB_DROP_REASON_PROTO_MEM); return; } @@ -5635,7 +5621,7 @@ err: void tcp_data_ready(struct sock *sk) { if (tcp_epollin_ready(sk, sk->sk_rcvlowat) || sock_flag(sk, SOCK_DONE)) - sk->sk_data_ready(sk); + READ_ONCE(sk->sk_data_ready)(sk); } static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) @@ -5691,7 +5677,7 @@ queue_and_out: inet_csk(sk)->icsk_ack.pending |= (ICSK_ACK_NOMEM | ICSK_ACK_NOW); inet_csk_schedule_ack(sk); - sk->sk_data_ready(sk); + READ_ONCE(sk->sk_data_ready)(sk); if (skb_queue_len(&sk->sk_receive_queue) && skb->len) { reason = SKB_DROP_REASON_PROTO_MEM; @@ -6114,7 +6100,9 @@ static void tcp_new_space(struct sock *sk) tp->snd_cwnd_stamp = tcp_jiffies32; } - INDIRECT_CALL_1(sk->sk_write_space, sk_stream_write_space, sk); + INDIRECT_CALL_1(READ_ONCE(sk->sk_write_space), + sk_stream_write_space, + sk); } /* Caller made space either from: @@ -6325,7 +6313,7 @@ static void tcp_urg(struct sock *sk, struct sk_buff *skb, const struct tcphdr *t BUG(); WRITE_ONCE(tp->urg_data, TCP_URG_VALID | tmp); if (!sock_flag(sk, SOCK_DEAD)) - sk->sk_data_ready(sk); + READ_ONCE(sk->sk_data_ready)(sk); } } } @@ -7658,6 +7646,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, const struct tcp_sock *tp = tcp_sk(sk); struct net *net = sock_net(sk); struct sock *fastopen_sk = NULL; + union tcp_seq_and_ts_off st; struct request_sock *req; bool want_cookie = false; struct dst_entry *dst; @@ -7727,9 +7716,12 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, if (!dst) goto drop_and_free; + if (tmp_opt.tstamp_ok || (!want_cookie && !isn)) + st = af_ops->init_seq_and_ts_off(net, skb); + if (tmp_opt.tstamp_ok) { tcp_rsk(req)->req_usec_ts = dst_tcp_usec_ts(dst); - tcp_rsk(req)->ts_off = af_ops->init_ts_off(net, skb); + tcp_rsk(req)->ts_off = st.ts_off; } if (!want_cookie && !isn) { int max_syn_backlog = READ_ONCE(net->ipv4.sysctl_max_syn_backlog); @@ -7751,7 +7743,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, goto drop_and_release; } - isn = af_ops->init_seq(skb); + isn = st.seq; } tcp_ecn_create_request(req, skb, sk, dst); @@ -7792,7 +7784,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, sock_put(fastopen_sk); goto drop_and_free; } - sk->sk_data_ready(sk); + READ_ONCE(sk->sk_data_ready)(sk); bh_unlock_sock(fastopen_sk); sock_put(fastopen_sk); } else { diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index d53d39be291a..c7b2463c2e25 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -88,6 +88,7 @@ #include <linux/skbuff_ref.h> #include <crypto/md5.h> +#include <crypto/utils.h> #include <trace/events/tcp.h> @@ -104,17 +105,14 @@ static DEFINE_PER_CPU(struct sock_bh_locked, ipv4_tcp_sk) = { static DEFINE_MUTEX(tcp_exit_batch_mutex); -static u32 tcp_v4_init_seq(const struct sk_buff *skb) +static union tcp_seq_and_ts_off +tcp_v4_init_seq_and_ts_off(const struct net *net, const struct sk_buff *skb) { - return secure_tcp_seq(ip_hdr(skb)->daddr, - ip_hdr(skb)->saddr, - tcp_hdr(skb)->dest, - tcp_hdr(skb)->source); -} - -static u32 tcp_v4_init_ts_off(const struct net *net, const struct sk_buff *skb) -{ - return secure_tcp_ts_off(net, ip_hdr(skb)->daddr, ip_hdr(skb)->saddr); + return secure_tcp_seq_and_ts_off(net, + ip_hdr(skb)->daddr, + ip_hdr(skb)->saddr, + tcp_hdr(skb)->dest, + tcp_hdr(skb)->source); } int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp) @@ -326,15 +324,16 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr_unsized *uaddr, int addr_len rt = NULL; if (likely(!tp->repair)) { + union tcp_seq_and_ts_off st; + + st = secure_tcp_seq_and_ts_off(net, + inet->inet_saddr, + inet->inet_daddr, + inet->inet_sport, + usin->sin_port); if (!tp->write_seq) - WRITE_ONCE(tp->write_seq, - secure_tcp_seq(inet->inet_saddr, - inet->inet_daddr, - inet->inet_sport, - usin->sin_port)); - WRITE_ONCE(tp->tsoffset, - secure_tcp_ts_off(net, inet->inet_saddr, - inet->inet_daddr)); + WRITE_ONCE(tp->write_seq, st.seq); + WRITE_ONCE(tp->tsoffset, st.ts_off); } atomic_set(&inet->inet_id, get_random_u16()); @@ -839,7 +838,7 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb, goto out; tcp_v4_md5_hash_skb(newhash, key, NULL, skb); - if (memcmp(md5_hash_location, newhash, 16) != 0) + if (crypto_memneq(md5_hash_location, newhash, 16)) goto out; } @@ -1676,8 +1675,7 @@ const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = { .cookie_init_seq = cookie_v4_init_sequence, #endif .route_req = tcp_v4_route_req, - .init_seq = tcp_v4_init_seq, - .init_ts_off = tcp_v4_init_ts_off, + .init_seq_and_ts_off = tcp_v4_init_seq_and_ts_off, .send_synack = tcp_v4_send_synack, }; diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index d9c5a43bd281..dafb63b923d0 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -1004,7 +1004,7 @@ enum skb_drop_reason tcp_child_process(struct sock *parent, struct sock *child, reason = tcp_rcv_state_process(child, skb); /* Wakeup parent, send SIGIO */ if (state == TCP_SYN_RECV && child->sk_state != state) - parent->sk_data_ready(parent); + READ_ONCE(parent->sk_data_ready)(parent); } else { /* Alas, it is possible again, because we do lookup * in main socket hash table and lock on listening diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 6c6b68a66dcd..cb99a3c27053 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -287,7 +287,7 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum, } else { hslot = udp_hashslot(udptable, net, snum); spin_lock_bh(&hslot->lock); - if (hslot->count > 10) { + if (inet_use_hash2_on_bind(sk) && hslot->count > 10) { int exist; unsigned int slot2 = udp_sk(sk)->udp_portaddr_hash ^ snum; @@ -1787,7 +1787,7 @@ int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb) * using prepare_to_wait_exclusive(). */ while (nb) { - INDIRECT_CALL_1(sk->sk_data_ready, + INDIRECT_CALL_1(READ_ONCE(sk->sk_data_ready), sock_def_readable, sk); nb--; } @@ -2287,7 +2287,6 @@ void udp_lib_rehash(struct sock *sk, u16 newhash, u16 newhash4) udp_sk(sk)->udp_port_hash); hslot2 = udp_hashslot2(udptable, udp_sk(sk)->udp_portaddr_hash); nhslot2 = udp_hashslot2(udptable, newhash); - udp_sk(sk)->udp_portaddr_hash = newhash; if (hslot2 != nhslot2 || rcu_access_pointer(sk->sk_reuseport_cb)) { @@ -2321,19 +2320,25 @@ void udp_lib_rehash(struct sock *sk, u16 newhash, u16 newhash4) if (udp_hashed4(sk)) { spin_lock_bh(&hslot->lock); - udp_rehash4(udptable, sk, newhash4); - if (hslot2 != nhslot2) { - spin_lock(&hslot2->lock); - udp_hash4_dec(hslot2); - spin_unlock(&hslot2->lock); - - spin_lock(&nhslot2->lock); - udp_hash4_inc(nhslot2); - spin_unlock(&nhslot2->lock); + if (inet_rcv_saddr_any(sk)) { + udp_unhash4(udptable, sk); + } else { + udp_rehash4(udptable, sk, newhash4); + if (hslot2 != nhslot2) { + spin_lock(&hslot2->lock); + udp_hash4_dec(hslot2); + spin_unlock(&hslot2->lock); + + spin_lock(&nhslot2->lock); + udp_hash4_inc(nhslot2); + spin_unlock(&nhslot2->lock); + } } spin_unlock_bh(&hslot->lock); } + + udp_sk(sk)->udp_portaddr_hash = newhash; } } EXPORT_IPV6_MOD(udp_lib_rehash); diff --git a/net/ipv4/udp_bpf.c b/net/ipv4/udp_bpf.c index 91233e37cd97..779a3a03762f 100644 --- a/net/ipv4/udp_bpf.c +++ b/net/ipv4/udp_bpf.c @@ -158,7 +158,7 @@ int udp_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore) int family = sk->sk_family == AF_INET ? UDP_BPF_IPV4 : UDP_BPF_IPV6; if (restore) { - sk->sk_write_space = psock->saved_write_space; + WRITE_ONCE(sk->sk_write_space, psock->saved_write_space); sock_replace_proto(sk, psock->sk_proto); return 0; } |
