diff options
86 files changed, 1057 insertions, 387 deletions
diff --git a/drivers/bluetooth/btintel.c b/drivers/bluetooth/btintel.c index 246b6205c5e0..ab146894ba4e 100644 --- a/drivers/bluetooth/btintel.c +++ b/drivers/bluetooth/btintel.c @@ -251,11 +251,13 @@ void btintel_hw_error(struct hci_dev *hdev, u8 code) bt_dev_err(hdev, "Hardware error 0x%2.2x", code); + hci_req_sync_lock(hdev); + skb = __hci_cmd_sync(hdev, HCI_OP_RESET, 0, NULL, HCI_INIT_TIMEOUT); if (IS_ERR(skb)) { bt_dev_err(hdev, "Reset after hardware error failed (%ld)", PTR_ERR(skb)); - return; + goto unlock; } kfree_skb(skb); @@ -263,18 +265,21 @@ void btintel_hw_error(struct hci_dev *hdev, u8 code) if (IS_ERR(skb)) { bt_dev_err(hdev, "Retrieving Intel exception info failed (%ld)", PTR_ERR(skb)); - return; + goto unlock; } if (skb->len != 13) { bt_dev_err(hdev, "Exception info size mismatch"); kfree_skb(skb); - return; + goto unlock; } bt_dev_err(hdev, "Exception info %s", (char *)(skb->data + 1)); kfree_skb(skb); + +unlock: + hci_req_sync_unlock(hdev); } EXPORT_SYMBOL_GPL(btintel_hw_error); diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index a1c5eb993e47..5c535f3ab722 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -2376,8 +2376,11 @@ static void btusb_work(struct work_struct *work) if (data->air_mode == HCI_NOTIFY_ENABLE_SCO_CVSD) { if (hdev->voice_setting & 0x0020) { static const int alts[3] = { 2, 4, 5 }; + unsigned int sco_idx; - new_alts = alts[data->sco_num - 1]; + sco_idx = min_t(unsigned int, data->sco_num - 1, + ARRAY_SIZE(alts) - 1); + new_alts = alts[sco_idx]; } else { new_alts = data->sco_num; } diff --git a/drivers/bluetooth/hci_ll.c b/drivers/bluetooth/hci_ll.c index 91acf24f1ef5..91c96ad12342 100644 --- a/drivers/bluetooth/hci_ll.c +++ b/drivers/bluetooth/hci_ll.c @@ -541,6 +541,8 @@ static int download_firmware(struct ll_device *lldev) if (err || !fw->data || !fw->size) { bt_dev_err(lldev->hu.hdev, "request_firmware failed(errno %d) for %s", err, bts_scr_name); + if (!err) + release_firmware(fw); return -EINVAL; } ptr = (void *)fw->data; diff --git a/drivers/net/can/dev/netlink.c b/drivers/net/can/dev/netlink.c index 0498198a4696..766d455950f5 100644 --- a/drivers/net/can/dev/netlink.c +++ b/drivers/net/can/dev/netlink.c @@ -601,7 +601,9 @@ static int can_changelink(struct net_device *dev, struct nlattr *tb[], /* We need synchronization with dev->stop() */ ASSERT_RTNL(); - can_ctrlmode_changelink(dev, data, extack); + err = can_ctrlmode_changelink(dev, data, extack); + if (err) + return err; if (data[IFLA_CAN_BITTIMING]) { struct can_bittiming bt; diff --git a/drivers/net/can/spi/mcp251x.c b/drivers/net/can/spi/mcp251x.c index bb7782582f40..0d0190ae094a 100644 --- a/drivers/net/can/spi/mcp251x.c +++ b/drivers/net/can/spi/mcp251x.c @@ -1225,7 +1225,11 @@ static int mcp251x_open(struct net_device *net) } mutex_lock(&priv->mcp_lock); - mcp251x_power_enable(priv->transceiver, 1); + ret = mcp251x_power_enable(priv->transceiver, 1); + if (ret) { + dev_err(&spi->dev, "failed to enable transceiver power: %pe\n", ERR_PTR(ret)); + goto out_close_candev; + } priv->force_quit = 0; priv->tx_skb = NULL; @@ -1272,6 +1276,7 @@ out_free_irq: mcp251x_hw_sleep(spi); out_close: mcp251x_power_enable(priv->transceiver, 0); +out_close_candev: close_candev(net); mutex_unlock(&priv->mcp_lock); if (release_irq) @@ -1516,11 +1521,25 @@ static int __maybe_unused mcp251x_can_resume(struct device *dev) { struct spi_device *spi = to_spi_device(dev); struct mcp251x_priv *priv = spi_get_drvdata(spi); + int ret = 0; - if (priv->after_suspend & AFTER_SUSPEND_POWER) - mcp251x_power_enable(priv->power, 1); - if (priv->after_suspend & AFTER_SUSPEND_UP) - mcp251x_power_enable(priv->transceiver, 1); + if (priv->after_suspend & AFTER_SUSPEND_POWER) { + ret = mcp251x_power_enable(priv->power, 1); + if (ret) { + dev_err(dev, "failed to restore power: %pe\n", ERR_PTR(ret)); + return ret; + } + } + + if (priv->after_suspend & AFTER_SUSPEND_UP) { + ret = mcp251x_power_enable(priv->transceiver, 1); + if (ret) { + dev_err(dev, "failed to restore transceiver power: %pe\n", ERR_PTR(ret)); + if (priv->after_suspend & AFTER_SUSPEND_POWER) + mcp251x_power_enable(priv->power, 0); + return ret; + } + } if (priv->after_suspend & (AFTER_SUSPEND_POWER | AFTER_SUSPEND_UP)) queue_work(priv->wq, &priv->restart_work); diff --git a/drivers/net/ethernet/airoha/airoha_ppe.c b/drivers/net/ethernet/airoha/airoha_ppe.c index 42dbe8f93231..5724f8f2defd 100644 --- a/drivers/net/ethernet/airoha/airoha_ppe.c +++ b/drivers/net/ethernet/airoha/airoha_ppe.c @@ -227,7 +227,9 @@ static int airoha_ppe_get_wdma_info(struct net_device *dev, const u8 *addr, if (!dev) return -ENODEV; + rcu_read_lock(); err = dev_fill_forward_path(dev, addr, &stack); + rcu_read_unlock(); if (err) return err; diff --git a/drivers/net/ethernet/broadcom/Kconfig b/drivers/net/ethernet/broadcom/Kconfig index cd7dddeb91dd..9787c1857e13 100644 --- a/drivers/net/ethernet/broadcom/Kconfig +++ b/drivers/net/ethernet/broadcom/Kconfig @@ -25,7 +25,7 @@ config B44 select SSB select MII select PHYLIB - select FIXED_PHY if BCM47XX + select FIXED_PHY help If you have a network (Ethernet) controller of this type, say Y or M here. diff --git a/drivers/net/ethernet/broadcom/asp2/bcmasp.c b/drivers/net/ethernet/broadcom/asp2/bcmasp.c index aa6d8606849f..972474893a6b 100644 --- a/drivers/net/ethernet/broadcom/asp2/bcmasp.c +++ b/drivers/net/ethernet/broadcom/asp2/bcmasp.c @@ -1152,12 +1152,6 @@ void bcmasp_enable_wol(struct bcmasp_intf *intf, bool en) } } -static void bcmasp_wol_irq_destroy(struct bcmasp_priv *priv) -{ - if (priv->wol_irq > 0) - free_irq(priv->wol_irq, priv); -} - static void bcmasp_eee_fixup(struct bcmasp_intf *intf, bool en) { u32 reg, phy_lpi_overwrite; @@ -1255,7 +1249,7 @@ static int bcmasp_probe(struct platform_device *pdev) if (priv->irq <= 0) return -EINVAL; - priv->clk = devm_clk_get_optional_enabled(dev, "sw_asp"); + priv->clk = devm_clk_get_optional(dev, "sw_asp"); if (IS_ERR(priv->clk)) return dev_err_probe(dev, PTR_ERR(priv->clk), "failed to request clock\n"); @@ -1283,6 +1277,10 @@ static int bcmasp_probe(struct platform_device *pdev) bcmasp_set_pdata(priv, pdata); + ret = clk_prepare_enable(priv->clk); + if (ret) + return dev_err_probe(dev, ret, "failed to start clock\n"); + /* Enable all clocks to ensure successful probing */ bcmasp_core_clock_set(priv, ASP_CTRL_CLOCK_CTRL_ASP_ALL_DISABLE, 0); @@ -1294,8 +1292,10 @@ static int bcmasp_probe(struct platform_device *pdev) ret = devm_request_irq(&pdev->dev, priv->irq, bcmasp_isr, 0, pdev->name, priv); - if (ret) - return dev_err_probe(dev, ret, "failed to request ASP interrupt: %d", ret); + if (ret) { + dev_err(dev, "Failed to request ASP interrupt: %d", ret); + goto err_clock_disable; + } /* Register mdio child nodes */ of_platform_populate(dev->of_node, bcmasp_mdio_of_match, NULL, dev); @@ -1307,13 +1307,17 @@ static int bcmasp_probe(struct platform_device *pdev) priv->mda_filters = devm_kcalloc(dev, priv->num_mda_filters, sizeof(*priv->mda_filters), GFP_KERNEL); - if (!priv->mda_filters) - return -ENOMEM; + if (!priv->mda_filters) { + ret = -ENOMEM; + goto err_clock_disable; + } priv->net_filters = devm_kcalloc(dev, priv->num_net_filters, sizeof(*priv->net_filters), GFP_KERNEL); - if (!priv->net_filters) - return -ENOMEM; + if (!priv->net_filters) { + ret = -ENOMEM; + goto err_clock_disable; + } bcmasp_core_init_filters(priv); @@ -1322,7 +1326,8 @@ static int bcmasp_probe(struct platform_device *pdev) ports_node = of_find_node_by_name(dev->of_node, "ethernet-ports"); if (!ports_node) { dev_warn(dev, "No ports found\n"); - return -EINVAL; + ret = -EINVAL; + goto err_clock_disable; } i = 0; @@ -1344,8 +1349,6 @@ static int bcmasp_probe(struct platform_device *pdev) */ bcmasp_core_clock_set(priv, 0, ASP_CTRL_CLOCK_CTRL_ASP_ALL_DISABLE); - clk_disable_unprepare(priv->clk); - /* Now do the registration of the network ports which will take care * of managing the clock properly. */ @@ -1358,13 +1361,16 @@ static int bcmasp_probe(struct platform_device *pdev) count++; } + clk_disable_unprepare(priv->clk); + dev_info(dev, "Initialized %d port(s)\n", count); return ret; err_cleanup: - bcmasp_wol_irq_destroy(priv); bcmasp_remove_intfs(priv); +err_clock_disable: + clk_disable_unprepare(priv->clk); return ret; } @@ -1376,7 +1382,6 @@ static void bcmasp_remove(struct platform_device *pdev) if (!priv) return; - bcmasp_wol_irq_destroy(priv); bcmasp_remove_intfs(priv); } diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index c16ac9c76aa3..99e7d5cf3786 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -1071,7 +1071,7 @@ static void macb_tx_unmap(struct macb *bp, struct macb_tx_skb *tx_skb, int budge } if (tx_skb->skb) { - napi_consume_skb(tx_skb->skb, budget); + dev_consume_skb_any(tx_skb->skb); tx_skb->skb = NULL; } } @@ -3224,7 +3224,7 @@ static void gem_get_ethtool_stats(struct net_device *dev, spin_lock_irq(&bp->stats_lock); gem_update_stats(bp); memcpy(data, &bp->ethtool_stats, sizeof(u64) - * (GEM_STATS_LEN + QUEUE_STATS_LEN * MACB_MAX_QUEUES)); + * (GEM_STATS_LEN + QUEUE_STATS_LEN * bp->num_queues)); spin_unlock_irq(&bp->stats_lock); } @@ -5776,9 +5776,9 @@ static int __maybe_unused macb_suspend(struct device *dev) struct macb_queue *queue; struct in_device *idev; unsigned long flags; + u32 tmp, ifa_local; unsigned int q; int err; - u32 tmp; if (!device_may_wakeup(&bp->dev->dev)) phy_exit(bp->phy); @@ -5787,14 +5787,21 @@ static int __maybe_unused macb_suspend(struct device *dev) return 0; if (bp->wol & MACB_WOL_ENABLED) { - /* Check for IP address in WOL ARP mode */ - idev = __in_dev_get_rcu(bp->dev); - if (idev) - ifa = rcu_dereference(idev->ifa_list); - if ((bp->wolopts & WAKE_ARP) && !ifa) { - netdev_err(netdev, "IP address not assigned as required by WoL walk ARP\n"); - return -EOPNOTSUPP; + if (bp->wolopts & WAKE_ARP) { + /* Check for IP address in WOL ARP mode */ + rcu_read_lock(); + idev = __in_dev_get_rcu(bp->dev); + if (idev) + ifa = rcu_dereference(idev->ifa_list); + if (!ifa) { + rcu_read_unlock(); + netdev_err(netdev, "IP address not assigned as required by WoL walk ARP\n"); + return -EOPNOTSUPP; + } + ifa_local = be32_to_cpu(ifa->ifa_local); + rcu_read_unlock(); } + spin_lock_irqsave(&bp->lock, flags); /* Disable Tx and Rx engines before disabling the queues, @@ -5833,8 +5840,9 @@ static int __maybe_unused macb_suspend(struct device *dev) if (bp->wolopts & WAKE_ARP) { tmp |= MACB_BIT(ARP); /* write IP address into register */ - tmp |= MACB_BFEXT(IP, be32_to_cpu(ifa->ifa_local)); + tmp |= MACB_BFEXT(IP, ifa_local); } + spin_unlock_irqrestore(&bp->lock, flags); /* Change interrupt handler and * Enable WoL IRQ on queue 0 @@ -5847,11 +5855,12 @@ static int __maybe_unused macb_suspend(struct device *dev) dev_err(dev, "Unable to request IRQ %d (error %d)\n", bp->queues[0].irq, err); - spin_unlock_irqrestore(&bp->lock, flags); return err; } + spin_lock_irqsave(&bp->lock, flags); queue_writel(bp->queues, IER, GEM_BIT(WOL)); gem_writel(bp, WOL, tmp); + spin_unlock_irqrestore(&bp->lock, flags); } else { err = devm_request_irq(dev, bp->queues[0].irq, macb_wol_interrupt, IRQF_SHARED, netdev->name, bp->queues); @@ -5859,13 +5868,13 @@ static int __maybe_unused macb_suspend(struct device *dev) dev_err(dev, "Unable to request IRQ %d (error %d)\n", bp->queues[0].irq, err); - spin_unlock_irqrestore(&bp->lock, flags); return err; } + spin_lock_irqsave(&bp->lock, flags); queue_writel(bp->queues, IER, MACB_BIT(WOL)); macb_writel(bp, WOL, tmp); + spin_unlock_irqrestore(&bp->lock, flags); } - spin_unlock_irqrestore(&bp->lock, flags); enable_irq_wake(bp->queues[0].irq); } @@ -5932,6 +5941,8 @@ static int __maybe_unused macb_resume(struct device *dev) queue_readl(bp->queues, ISR); if (bp->caps & MACB_CAPS_ISR_CLEAR_ON_WRITE) queue_writel(bp->queues, ISR, -1); + spin_unlock_irqrestore(&bp->lock, flags); + /* Replace interrupt handler on queue 0 */ devm_free_irq(dev, bp->queues[0].irq, bp->queues); err = devm_request_irq(dev, bp->queues[0].irq, macb_interrupt, @@ -5940,10 +5951,8 @@ static int __maybe_unused macb_resume(struct device *dev) dev_err(dev, "Unable to request IRQ %d (error %d)\n", bp->queues[0].irq, err); - spin_unlock_irqrestore(&bp->lock, flags); return err; } - spin_unlock_irqrestore(&bp->lock, flags); disable_irq_wake(bp->queues[0].irq); diff --git a/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c b/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c index fed89d4f1e1d..2fe140ddebb2 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c @@ -813,6 +813,8 @@ static void enetc_get_ringparam(struct net_device *ndev, { struct enetc_ndev_priv *priv = netdev_priv(ndev); + ring->rx_max_pending = priv->rx_bd_count; + ring->tx_max_pending = priv->tx_bd_count; ring->rx_pending = priv->rx_bd_count; ring->tx_pending = priv->tx_bd_count; diff --git a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c index ab67c709d5a0..1cd1f3f2930a 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c +++ b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c @@ -313,14 +313,13 @@ static int iavf_get_sset_count(struct net_device *netdev, int sset) { /* Report the maximum number queues, even if not every queue is * currently configured. Since allocation of queues is in pairs, - * use netdev->real_num_tx_queues * 2. The real_num_tx_queues is set - * at device creation and never changes. + * use netdev->num_tx_queues * 2. The num_tx_queues is set at + * device creation and never changes. */ if (sset == ETH_SS_STATS) return IAVF_STATS_LEN + - (IAVF_QUEUE_STATS_LEN * 2 * - netdev->real_num_tx_queues); + (IAVF_QUEUE_STATS_LEN * 2 * netdev->num_tx_queues); else return -EINVAL; } @@ -345,19 +344,19 @@ static void iavf_get_ethtool_stats(struct net_device *netdev, iavf_add_ethtool_stats(&data, adapter, iavf_gstrings_stats); rcu_read_lock(); - /* As num_active_queues describe both tx and rx queues, we can use - * it to iterate over rings' stats. + /* Use num_tx_queues to report stats for the maximum number of queues. + * Queues beyond num_active_queues will report zero. */ - for (i = 0; i < adapter->num_active_queues; i++) { - struct iavf_ring *ring; + for (i = 0; i < netdev->num_tx_queues; i++) { + struct iavf_ring *tx_ring = NULL, *rx_ring = NULL; - /* Tx rings stats */ - ring = &adapter->tx_rings[i]; - iavf_add_queue_stats(&data, ring); + if (i < adapter->num_active_queues) { + tx_ring = &adapter->tx_rings[i]; + rx_ring = &adapter->rx_rings[i]; + } - /* Rx rings stats */ - ring = &adapter->rx_rings[i]; - iavf_add_queue_stats(&data, ring); + iavf_add_queue_stats(&data, tx_ring); + iavf_add_queue_stats(&data, rx_ring); } rcu_read_unlock(); } @@ -376,9 +375,9 @@ static void iavf_get_stat_strings(struct net_device *netdev, u8 *data) iavf_add_stat_strings(&data, iavf_gstrings_stats); /* Queues are always allocated in pairs, so we just use - * real_num_tx_queues for both Tx and Rx queues. + * num_tx_queues for both Tx and Rx queues. */ - for (i = 0; i < netdev->real_num_tx_queues; i++) { + for (i = 0; i < netdev->num_tx_queues; i++) { iavf_add_stat_strings(&data, iavf_gstrings_queue_stats, "tx", i); iavf_add_stat_strings(&data, iavf_gstrings_queue_stats, diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h index 2b2b22af42be..eb3a48330cc1 100644 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -840,6 +840,28 @@ static inline void ice_tx_xsk_pool(struct ice_vsi *vsi, u16 qid) } /** + * ice_get_max_txq - return the maximum number of Tx queues for in a PF + * @pf: PF structure + * + * Return: maximum number of Tx queues + */ +static inline int ice_get_max_txq(struct ice_pf *pf) +{ + return min(num_online_cpus(), pf->hw.func_caps.common_cap.num_txq); +} + +/** + * ice_get_max_rxq - return the maximum number of Rx queues for in a PF + * @pf: PF structure + * + * Return: maximum number of Rx queues + */ +static inline int ice_get_max_rxq(struct ice_pf *pf) +{ + return min(num_online_cpus(), pf->hw.func_caps.common_cap.num_rxq); +} + +/** * ice_get_main_vsi - Get the PF VSI * @pf: PF instance * diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c index 301947d53ede..e6a20af6f63d 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c @@ -1930,6 +1930,17 @@ __ice_get_ethtool_stats(struct net_device *netdev, int i = 0; char *p; + if (ice_is_port_repr_netdev(netdev)) { + ice_update_eth_stats(vsi); + + for (j = 0; j < ICE_VSI_STATS_LEN; j++) { + p = (char *)vsi + ice_gstrings_vsi_stats[j].stat_offset; + data[i++] = (ice_gstrings_vsi_stats[j].sizeof_stat == + sizeof(u64)) ? *(u64 *)p : *(u32 *)p; + } + return; + } + ice_update_pf_stats(pf); ice_update_vsi_stats(vsi); @@ -1939,9 +1950,6 @@ __ice_get_ethtool_stats(struct net_device *netdev, sizeof(u64)) ? *(u64 *)p : *(u32 *)p; } - if (ice_is_port_repr_netdev(netdev)) - return; - /* populate per queue stats */ rcu_read_lock(); @@ -3774,24 +3782,6 @@ ice_get_ts_info(struct net_device *dev, struct kernel_ethtool_ts_info *info) } /** - * ice_get_max_txq - return the maximum number of Tx queues for in a PF - * @pf: PF structure - */ -static int ice_get_max_txq(struct ice_pf *pf) -{ - return min(num_online_cpus(), pf->hw.func_caps.common_cap.num_txq); -} - -/** - * ice_get_max_rxq - return the maximum number of Rx queues for in a PF - * @pf: PF structure - */ -static int ice_get_max_rxq(struct ice_pf *pf) -{ - return min(num_online_cpus(), pf->hw.func_caps.common_cap.num_rxq); -} - -/** * ice_get_combined_cnt - return the current number of combined channels * @vsi: PF VSI pointer * diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index e7308e381e2f..3c36e3641b9e 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -4699,8 +4699,8 @@ static int ice_cfg_netdev(struct ice_vsi *vsi) struct net_device *netdev; u8 mac_addr[ETH_ALEN]; - netdev = alloc_etherdev_mqs(sizeof(*np), vsi->alloc_txq, - vsi->alloc_rxq); + netdev = alloc_etherdev_mqs(sizeof(*np), ice_get_max_txq(vsi->back), + ice_get_max_rxq(vsi->back)); if (!netdev) return -ENOMEM; diff --git a/drivers/net/ethernet/intel/ice/ice_repr.c b/drivers/net/ethernet/intel/ice/ice_repr.c index 90f99443a922..096566c697f4 100644 --- a/drivers/net/ethernet/intel/ice/ice_repr.c +++ b/drivers/net/ethernet/intel/ice/ice_repr.c @@ -2,6 +2,7 @@ /* Copyright (C) 2019-2021, Intel Corporation. */ #include "ice.h" +#include "ice_lib.h" #include "ice_eswitch.h" #include "devlink/devlink.h" #include "devlink/port.h" @@ -67,7 +68,7 @@ ice_repr_get_stats64(struct net_device *netdev, struct rtnl_link_stats64 *stats) return; vsi = repr->src_vsi; - ice_update_vsi_stats(vsi); + ice_update_eth_stats(vsi); eth_stats = &vsi->eth_stats; stats->tx_packets = eth_stats->tx_unicast + eth_stats->tx_broadcast + @@ -315,7 +316,7 @@ ice_repr_reg_netdev(struct net_device *netdev, const struct net_device_ops *ops) static int ice_repr_ready_vf(struct ice_repr *repr) { - return !ice_check_vf_ready_for_cfg(repr->vf); + return ice_check_vf_ready_for_cfg(repr->vf); } static int ice_repr_ready_sf(struct ice_repr *repr) diff --git a/drivers/net/ethernet/intel/idpf/idpf.h b/drivers/net/ethernet/intel/idpf/idpf.h index b206fba092c8..ec1b75f039bb 100644 --- a/drivers/net/ethernet/intel/idpf/idpf.h +++ b/drivers/net/ethernet/intel/idpf/idpf.h @@ -1066,7 +1066,7 @@ bool idpf_vport_set_hsplit(const struct idpf_vport *vport, u8 val); int idpf_idc_init(struct idpf_adapter *adapter); int idpf_idc_init_aux_core_dev(struct idpf_adapter *adapter, enum iidc_function_type ftype); -void idpf_idc_deinit_core_aux_device(struct iidc_rdma_core_dev_info *cdev_info); +void idpf_idc_deinit_core_aux_device(struct idpf_adapter *adapter); void idpf_idc_deinit_vport_aux_device(struct iidc_rdma_vport_dev_info *vdev_info); void idpf_idc_issue_reset_event(struct iidc_rdma_core_dev_info *cdev_info); void idpf_idc_vdev_mtu_event(struct iidc_rdma_vport_dev_info *vdev_info, diff --git a/drivers/net/ethernet/intel/idpf/idpf_idc.c b/drivers/net/ethernet/intel/idpf/idpf_idc.c index bd4785fb8d3e..7e4f4ac92653 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_idc.c +++ b/drivers/net/ethernet/intel/idpf/idpf_idc.c @@ -470,10 +470,11 @@ err_privd_alloc: /** * idpf_idc_deinit_core_aux_device - de-initialize Auxiliary Device(s) - * @cdev_info: IDC core device info pointer + * @adapter: driver private data structure */ -void idpf_idc_deinit_core_aux_device(struct iidc_rdma_core_dev_info *cdev_info) +void idpf_idc_deinit_core_aux_device(struct idpf_adapter *adapter) { + struct iidc_rdma_core_dev_info *cdev_info = adapter->cdev_info; struct iidc_rdma_priv_dev_info *privd; if (!cdev_info) @@ -485,6 +486,7 @@ void idpf_idc_deinit_core_aux_device(struct iidc_rdma_core_dev_info *cdev_info) kfree(privd->mapped_mem_regions); kfree(privd); kfree(cdev_info); + adapter->cdev_info = NULL; } /** diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_txrx.c index 252259993022..f6b3b15364ff 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_txrx.c +++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.c @@ -1860,13 +1860,13 @@ static int idpf_rxq_group_alloc(struct idpf_vport *vport, idpf_queue_assign(HSPLIT_EN, q, hs); idpf_queue_assign(RSC_EN, q, rsc); - bufq_set->num_refillqs = num_rxq; bufq_set->refillqs = kcalloc(num_rxq, swq_size, GFP_KERNEL); if (!bufq_set->refillqs) { err = -ENOMEM; goto err_alloc; } + bufq_set->num_refillqs = num_rxq; for (unsigned int k = 0; k < bufq_set->num_refillqs; k++) { struct idpf_sw_queue *refillq = &bufq_set->refillqs[k]; diff --git a/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c b/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c index d5a877e1fef8..113ecfc16dd7 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c +++ b/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c @@ -3668,7 +3668,7 @@ void idpf_vc_core_deinit(struct idpf_adapter *adapter) idpf_ptp_release(adapter); idpf_deinit_task(adapter); - idpf_idc_deinit_core_aux_device(adapter->cdev_info); + idpf_idc_deinit_core_aux_device(adapter); idpf_rel_rx_pt_lkup(adapter); idpf_intr_rel(adapter); diff --git a/drivers/net/ethernet/microchip/lan743x_main.c b/drivers/net/ethernet/microchip/lan743x_main.c index a3845edf0e48..f0b5dd752f08 100644 --- a/drivers/net/ethernet/microchip/lan743x_main.c +++ b/drivers/net/ethernet/microchip/lan743x_main.c @@ -3053,6 +3053,11 @@ static void lan743x_phylink_mac_link_up(struct phylink_config *config, else if (speed == SPEED_100) mac_cr |= MAC_CR_CFG_L_; + if (duplex == DUPLEX_FULL) + mac_cr |= MAC_CR_DPX_; + else + mac_cr &= ~MAC_CR_DPX_; + lan743x_csr_write(adapter, MAC_CR, mac_cr); lan743x_ptp_update_latency(adapter, speed); diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c index 9017e806ecda..dca62fb9a3a9 100644 --- a/drivers/net/ethernet/microsoft/mana/mana_en.c +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c @@ -3425,6 +3425,7 @@ static int add_adev(struct gdma_dev *gd, const char *name) struct auxiliary_device *adev; struct mana_adev *madev; int ret; + int id; madev = kzalloc_obj(*madev); if (!madev) @@ -3434,7 +3435,8 @@ static int add_adev(struct gdma_dev *gd, const char *name) ret = mana_adev_idx_alloc(); if (ret < 0) goto idx_fail; - adev->id = ret; + id = ret; + adev->id = id; adev->name = name; adev->dev.parent = gd->gdma_context->dev; @@ -3460,7 +3462,7 @@ add_fail: auxiliary_device_uninit(adev); init_fail: - mana_adev_idx_free(adev->id); + mana_adev_idx_free(id); idx_fail: kfree(madev); diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c index 8d040e611d5a..637e635bbf03 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c @@ -1719,13 +1719,18 @@ static int ionic_set_mac_address(struct net_device *netdev, void *sa) if (ether_addr_equal(netdev->dev_addr, mac)) return 0; - err = ionic_program_mac(lif, mac); - if (err < 0) - return err; + /* Only program macs for virtual functions to avoid losing the permanent + * Mac across warm reset/reboot. + */ + if (lif->ionic->pdev->is_virtfn) { + err = ionic_program_mac(lif, mac); + if (err < 0) + return err; - if (err > 0) - netdev_dbg(netdev, "%s: SET and GET ATTR Mac are not equal-due to old FW running\n", - __func__); + if (err > 0) + netdev_dbg(netdev, "%s: SET and GET ATTR Mac are not equal-due to old FW running\n", + __func__); + } err = eth_prepare_mac_addr_change(netdev, addr); if (err) diff --git a/drivers/net/ethernet/ti/icssg/icssg_common.c b/drivers/net/ethernet/ti/icssg/icssg_common.c index 0a3cf2f848a5..fd4e7622f123 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_common.c +++ b/drivers/net/ethernet/ti/icssg/icssg_common.c @@ -962,7 +962,6 @@ static int emac_rx_packet_zc(struct prueth_emac *emac, u32 flow_id, pkt_len -= 4; cppi5_desc_get_tags_ids(&desc_rx->hdr, &port_id, NULL); psdata = cppi5_hdesc_get_psdata(desc_rx); - k3_cppi_desc_pool_free(rx_chn->desc_pool, desc_rx); count++; xsk_buff_set_size(xdp, pkt_len); xsk_buff_dma_sync_for_cpu(xdp); @@ -988,6 +987,7 @@ static int emac_rx_packet_zc(struct prueth_emac *emac, u32 flow_id, emac_dispatch_skb_zc(emac, xdp, psdata); xsk_buff_free(xdp); } + k3_cppi_desc_pool_free(rx_chn->desc_pool, desc_rx); } if (xdp_status & ICSSG_XDP_REDIR) @@ -1057,7 +1057,6 @@ static int emac_rx_packet(struct prueth_emac *emac, u32 flow_id, u32 *xdp_state) /* firmware adds 4 CRC bytes, strip them */ pkt_len -= 4; cppi5_desc_get_tags_ids(&desc_rx->hdr, &port_id, NULL); - k3_cppi_desc_pool_free(rx_chn->desc_pool, desc_rx); /* if allocation fails we drop the packet but push the * descriptor back to the ring with old page to prevent a stall @@ -1115,6 +1114,7 @@ static int emac_rx_packet(struct prueth_emac *emac, u32 flow_id, u32 *xdp_state) ndev->stats.rx_packets++; requeue: + k3_cppi_desc_pool_free(rx_chn->desc_pool, desc_rx); /* queue another RX DMA */ ret = prueth_dma_rx_push_mapped(emac, &emac->rx_chns, new_page, PRUETH_MAX_PKT_SIZE); diff --git a/drivers/net/team/team_core.c b/drivers/net/team/team_core.c index b7282f5c9632..120aeb539d9f 100644 --- a/drivers/net/team/team_core.c +++ b/drivers/net/team/team_core.c @@ -2058,6 +2058,68 @@ static const struct ethtool_ops team_ethtool_ops = { * rt netlink interface ***********************/ +/* For tx path we need a linkup && enabled port and for parse any port + * suffices. + */ +static struct team_port *team_header_port_get_rcu(struct team *team, + bool txable) +{ + struct team_port *port; + + list_for_each_entry_rcu(port, &team->port_list, list) { + if (!txable || team_port_txable(port)) + return port; + } + + return NULL; +} + +static int team_header_create(struct sk_buff *skb, struct net_device *team_dev, + unsigned short type, const void *daddr, + const void *saddr, unsigned int len) +{ + struct team *team = netdev_priv(team_dev); + const struct header_ops *port_ops; + struct team_port *port; + int ret = 0; + + rcu_read_lock(); + port = team_header_port_get_rcu(team, true); + if (port) { + port_ops = READ_ONCE(port->dev->header_ops); + if (port_ops && port_ops->create) + ret = port_ops->create(skb, port->dev, + type, daddr, saddr, len); + } + rcu_read_unlock(); + return ret; +} + +static int team_header_parse(const struct sk_buff *skb, + const struct net_device *team_dev, + unsigned char *haddr) +{ + struct team *team = netdev_priv(team_dev); + const struct header_ops *port_ops; + struct team_port *port; + int ret = 0; + + rcu_read_lock(); + port = team_header_port_get_rcu(team, false); + if (port) { + port_ops = READ_ONCE(port->dev->header_ops); + if (port_ops && port_ops->parse) + ret = port_ops->parse(skb, port->dev, haddr); + } + rcu_read_unlock(); + return ret; +} + +static const struct header_ops team_header_ops = { + .create = team_header_create, + .parse = team_header_parse, +}; + static void team_setup_by_port(struct net_device *dev, struct net_device *port_dev) { @@ -2066,7 +2128,8 @@ static void team_setup_by_port(struct net_device *dev, if (port_dev->type == ARPHRD_ETHER) dev->header_ops = team->header_ops_cache; else - dev->header_ops = port_dev->header_ops; + dev->header_ops = port_dev->header_ops ? + &team_header_ops : NULL; dev->type = port_dev->type; dev->hard_header_len = port_dev->hard_header_len; dev->needed_headroom = port_dev->needed_headroom; diff --git a/drivers/net/tun_vnet.h b/drivers/net/tun_vnet.h index a5f93b6c4482..fa5cab9d3e55 100644 --- a/drivers/net/tun_vnet.h +++ b/drivers/net/tun_vnet.h @@ -244,7 +244,7 @@ tun_vnet_hdr_tnl_from_skb(unsigned int flags, if (virtio_net_hdr_tnl_from_skb(skb, tnl_hdr, has_tnl_offload, tun_vnet_is_little_endian(flags), - vlan_hlen, true)) { + vlan_hlen, true, false)) { struct virtio_net_hdr_v1 *hdr = &tnl_hdr->hash_hdr.hdr; struct skb_shared_info *sinfo = skb_shinfo(skb); diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 72d6a9c6a5a2..ab2108ee206a 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -3267,8 +3267,12 @@ static int xmit_skb(struct send_queue *sq, struct sk_buff *skb, bool orphan) struct virtio_net_hdr_v1_hash_tunnel *hdr; int num_sg; unsigned hdr_len = vi->hdr_len; + bool feature_hdrlen; bool can_push; + feature_hdrlen = virtio_has_feature(vi->vdev, + VIRTIO_NET_F_GUEST_HDRLEN); + pr_debug("%s: xmit %p %pM\n", vi->dev->name, skb, dest); /* Make sure it's safe to cast between formats */ @@ -3288,7 +3292,7 @@ static int xmit_skb(struct send_queue *sq, struct sk_buff *skb, bool orphan) if (virtio_net_hdr_tnl_from_skb(skb, hdr, vi->tx_tnl, virtio_is_little_endian(vi->vdev), 0, - false)) + false, feature_hdrlen)) return -EPROTO; if (vi->mergeable_rx_bufs) @@ -3351,6 +3355,7 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev) /* Don't wait up for transmitted skbs to be freed. */ if (!use_napi) { skb_orphan(skb); + skb_dst_drop(skb); nf_reset_ct(skb); } diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h index 75dabb763c65..f36d21b5bc19 100644 --- a/include/linux/virtio_net.h +++ b/include/linux/virtio_net.h @@ -207,6 +207,39 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb, return __virtio_net_hdr_to_skb(skb, hdr, little_endian, hdr->gso_type); } +/* This function must be called after virtio_net_hdr_from_skb(). */ +static inline void __virtio_net_set_hdrlen(const struct sk_buff *skb, + struct virtio_net_hdr *hdr, + bool little_endian) +{ + u16 hdr_len; + + hdr_len = skb_transport_offset(skb); + + if (hdr->gso_type == VIRTIO_NET_HDR_GSO_UDP_L4) + hdr_len += sizeof(struct udphdr); + else + hdr_len += tcp_hdrlen(skb); + + hdr->hdr_len = __cpu_to_virtio16(little_endian, hdr_len); +} + +/* This function must be called after virtio_net_hdr_from_skb(). */ +static inline void __virtio_net_set_tnl_hdrlen(const struct sk_buff *skb, + struct virtio_net_hdr *hdr) +{ + u16 hdr_len; + + hdr_len = skb_inner_transport_offset(skb); + + if (hdr->gso_type == VIRTIO_NET_HDR_GSO_UDP_L4) + hdr_len += sizeof(struct udphdr); + else + hdr_len += inner_tcp_hdrlen(skb); + + hdr->hdr_len = __cpu_to_virtio16(true, hdr_len); +} + static inline int virtio_net_hdr_from_skb(const struct sk_buff *skb, struct virtio_net_hdr *hdr, bool little_endian, @@ -385,7 +418,8 @@ virtio_net_hdr_tnl_from_skb(const struct sk_buff *skb, bool tnl_hdr_negotiated, bool little_endian, int vlan_hlen, - bool has_data_valid) + bool has_data_valid, + bool feature_hdrlen) { struct virtio_net_hdr *hdr = (struct virtio_net_hdr *)vhdr; unsigned int inner_nh, outer_th; @@ -394,9 +428,17 @@ virtio_net_hdr_tnl_from_skb(const struct sk_buff *skb, tnl_gso_type = skb_shinfo(skb)->gso_type & (SKB_GSO_UDP_TUNNEL | SKB_GSO_UDP_TUNNEL_CSUM); - if (!tnl_gso_type) - return virtio_net_hdr_from_skb(skb, hdr, little_endian, - has_data_valid, vlan_hlen); + if (!tnl_gso_type) { + ret = virtio_net_hdr_from_skb(skb, hdr, little_endian, + has_data_valid, vlan_hlen); + if (ret) + return ret; + + if (feature_hdrlen && hdr->hdr_len) + __virtio_net_set_hdrlen(skb, hdr, little_endian); + + return ret; + } /* Tunnel support not negotiated but skb ask for it. */ if (!tnl_hdr_negotiated) @@ -414,6 +456,9 @@ virtio_net_hdr_tnl_from_skb(const struct sk_buff *skb, if (ret) return ret; + if (feature_hdrlen && hdr->hdr_len) + __virtio_net_set_tnl_hdrlen(skb, hdr); + if (skb->protocol == htons(ETH_P_IPV6)) hdr->gso_type |= VIRTIO_NET_HDR_GSO_UDP_TUNNEL_IPV6; else diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h index 010f1a8fd15f..5172afee5494 100644 --- a/include/net/bluetooth/l2cap.h +++ b/include/net/bluetooth/l2cap.h @@ -658,6 +658,7 @@ struct l2cap_conn { struct sk_buff *rx_skb; __u32 rx_len; struct ida tx_ida; + __u8 tx_ident; struct sk_buff_head pending_rx; struct work_struct pending_rx_work; diff --git a/include/net/codel_impl.h b/include/net/codel_impl.h index 78a27ac73070..b2c359c6dd1b 100644 --- a/include/net/codel_impl.h +++ b/include/net/codel_impl.h @@ -158,6 +158,7 @@ static struct sk_buff *codel_dequeue(void *ctx, bool drop; if (!skb) { + vars->first_above_time = 0; vars->dropping = false; return skb; } diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index 5a979dcab538..6d936e9f2fd3 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -264,6 +264,20 @@ inet_bhashfn_portaddr(const struct inet_hashinfo *hinfo, const struct sock *sk, return &hinfo->bhash2[hash & (hinfo->bhash_size - 1)]; } +static inline bool inet_use_hash2_on_bind(const struct sock *sk) +{ +#if IS_ENABLED(CONFIG_IPV6) + if (sk->sk_family == AF_INET6) { + if (ipv6_addr_any(&sk->sk_v6_rcv_saddr)) + return false; + + if (!ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr)) + return true; + } +#endif + return sk->sk_rcv_saddr != htonl(INADDR_ANY); +} + struct inet_bind_hashbucket * inet_bhash2_addr_any_hashbucket(const struct sock *sk, const struct net *net, int port); diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 88b0dd4d8e09..9f8b6814a96a 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -507,12 +507,14 @@ void fib6_rt_update(struct net *net, struct fib6_info *rt, void inet6_rt_notify(int event, struct fib6_info *rt, struct nl_info *info, unsigned int flags); +void fib6_age_exceptions(struct fib6_info *rt, struct fib6_gc_args *gc_args, + unsigned long now); void fib6_run_gc(unsigned long expires, struct net *net, bool force); - void fib6_gc_cleanup(void); int fib6_init(void); +#if IS_ENABLED(CONFIG_IPV6) /* Add the route to the gc list if it is not already there * * The callers should hold f6i->fib6_table->tb6_lock. @@ -545,6 +547,23 @@ static inline void fib6_remove_gc_list(struct fib6_info *f6i) hlist_del_init(&f6i->gc_link); } +static inline void fib6_may_remove_gc_list(struct net *net, + struct fib6_info *f6i) +{ + struct fib6_gc_args gc_args; + + if (hlist_unhashed(&f6i->gc_link)) + return; + + gc_args.timeout = READ_ONCE(net->ipv6.sysctl.ip6_rt_gc_interval); + gc_args.more = 0; + + rcu_read_lock(); + fib6_age_exceptions(f6i, &gc_args, jiffies); + rcu_read_unlock(); +} +#endif + struct ipv6_route_iter { struct seq_net_private p; struct fib6_walker w; diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h index 3384859a8921..8883575adcc1 100644 --- a/include/net/netfilter/nf_conntrack_core.h +++ b/include/net/netfilter/nf_conntrack_core.h @@ -83,6 +83,11 @@ void nf_conntrack_lock(spinlock_t *lock); extern spinlock_t nf_conntrack_expect_lock; +static inline void lockdep_nfct_expect_lock_held(void) +{ + lockdep_assert_held(&nf_conntrack_expect_lock); +} + /* ctnetlink code shared by both ctnetlink and nf_conntrack_bpf */ static inline void __nf_ct_set_timeout(struct nf_conn *ct, u64 timeout) diff --git a/include/net/netfilter/nf_conntrack_expect.h b/include/net/netfilter/nf_conntrack_expect.h index 165e7a03b8e9..e9a8350e7ccf 100644 --- a/include/net/netfilter/nf_conntrack_expect.h +++ b/include/net/netfilter/nf_conntrack_expect.h @@ -22,10 +22,16 @@ struct nf_conntrack_expect { /* Hash member */ struct hlist_node hnode; + /* Network namespace */ + possible_net_t net; + /* We expect this tuple, with the following mask */ struct nf_conntrack_tuple tuple; struct nf_conntrack_tuple_mask mask; +#ifdef CONFIG_NF_CONNTRACK_ZONES + struct nf_conntrack_zone zone; +#endif /* Usage count. */ refcount_t use; @@ -40,7 +46,7 @@ struct nf_conntrack_expect { struct nf_conntrack_expect *this); /* Helper to assign to new connection */ - struct nf_conntrack_helper *helper; + struct nf_conntrack_helper __rcu *helper; /* The conntrack of the master connection */ struct nf_conn *master; @@ -62,7 +68,17 @@ struct nf_conntrack_expect { static inline struct net *nf_ct_exp_net(struct nf_conntrack_expect *exp) { - return nf_ct_net(exp->master); + return read_pnet(&exp->net); +} + +static inline bool nf_ct_exp_zone_equal_any(const struct nf_conntrack_expect *a, + const struct nf_conntrack_zone *b) +{ +#ifdef CONFIG_NF_CONNTRACK_ZONES + return a->zone.id == b->id; +#else + return true; +#endif } #define NF_CT_EXP_POLICY_NAME_LEN 16 diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h index 23dd647fe024..b73983a17e08 100644 --- a/include/net/netns/xfrm.h +++ b/include/net/netns/xfrm.h @@ -59,7 +59,7 @@ struct netns_xfrm { struct list_head inexact_bins; - struct sock *nlsk; + struct sock __rcu *nlsk; struct sock *nlsk_stash; u32 sysctl_aevent_etime; diff --git a/include/uapi/linux/netfilter/nf_conntrack_common.h b/include/uapi/linux/netfilter/nf_conntrack_common.h index 26071021e986..56b6b60a814f 100644 --- a/include/uapi/linux/netfilter/nf_conntrack_common.h +++ b/include/uapi/linux/netfilter/nf_conntrack_common.h @@ -159,5 +159,9 @@ enum ip_conntrack_expect_events { #define NF_CT_EXPECT_INACTIVE 0x2 #define NF_CT_EXPECT_USERSPACE 0x4 +#ifdef __KERNEL__ +#define NF_CT_EXPECT_MASK (NF_CT_EXPECT_PERMANENT | NF_CT_EXPECT_INACTIVE | \ + NF_CT_EXPECT_USERSPACE) +#endif #endif /* _UAPI_NF_CONNTRACK_COMMON_H */ diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 6eb59e9f2aa8..e6393f17576b 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -3095,7 +3095,7 @@ int hci_abort_conn(struct hci_conn *conn, u8 reason) * hci_connect_le serializes the connection attempts so only one * connection can be in BT_CONNECT at time. */ - if (conn->state == BT_CONNECT && hdev->req_status == HCI_REQ_PEND) { + if (conn->state == BT_CONNECT && READ_ONCE(hdev->req_status) == HCI_REQ_PEND) { switch (hci_skb_event(hdev->sent_cmd)) { case HCI_EV_CONN_COMPLETE: case HCI_EV_LE_CONN_COMPLETE: diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 31308c1de4ec..01f8ceeb1c0c 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -4126,7 +4126,7 @@ static int hci_send_cmd_sync(struct hci_dev *hdev, struct sk_buff *skb) kfree_skb(skb); } - if (hdev->req_status == HCI_REQ_PEND && + if (READ_ONCE(hdev->req_status) == HCI_REQ_PEND && !hci_dev_test_and_set_flag(hdev, HCI_CMD_PENDING)) { kfree_skb(hdev->req_skb); hdev->req_skb = skb_clone(hdev->sent_cmd, GFP_KERNEL); diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index 3166914b0d6c..45d16639874a 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -25,11 +25,11 @@ static void hci_cmd_sync_complete(struct hci_dev *hdev, u8 result, u16 opcode, { bt_dev_dbg(hdev, "result 0x%2.2x", result); - if (hdev->req_status != HCI_REQ_PEND) + if (READ_ONCE(hdev->req_status) != HCI_REQ_PEND) return; hdev->req_result = result; - hdev->req_status = HCI_REQ_DONE; + WRITE_ONCE(hdev->req_status, HCI_REQ_DONE); /* Free the request command so it is not used as response */ kfree_skb(hdev->req_skb); @@ -167,20 +167,20 @@ struct sk_buff *__hci_cmd_sync_sk(struct hci_dev *hdev, u16 opcode, u32 plen, hci_cmd_sync_add(&req, opcode, plen, param, event, sk); - hdev->req_status = HCI_REQ_PEND; + WRITE_ONCE(hdev->req_status, HCI_REQ_PEND); err = hci_req_sync_run(&req); if (err < 0) return ERR_PTR(err); err = wait_event_interruptible_timeout(hdev->req_wait_q, - hdev->req_status != HCI_REQ_PEND, + READ_ONCE(hdev->req_status) != HCI_REQ_PEND, timeout); if (err == -ERESTARTSYS) return ERR_PTR(-EINTR); - switch (hdev->req_status) { + switch (READ_ONCE(hdev->req_status)) { case HCI_REQ_DONE: err = -bt_to_errno(hdev->req_result); break; @@ -194,7 +194,7 @@ struct sk_buff *__hci_cmd_sync_sk(struct hci_dev *hdev, u16 opcode, u32 plen, break; } - hdev->req_status = 0; + WRITE_ONCE(hdev->req_status, 0); hdev->req_result = 0; skb = hdev->req_rsp; hdev->req_rsp = NULL; @@ -665,9 +665,9 @@ void hci_cmd_sync_cancel(struct hci_dev *hdev, int err) { bt_dev_dbg(hdev, "err 0x%2.2x", err); - if (hdev->req_status == HCI_REQ_PEND) { + if (READ_ONCE(hdev->req_status) == HCI_REQ_PEND) { hdev->req_result = err; - hdev->req_status = HCI_REQ_CANCELED; + WRITE_ONCE(hdev->req_status, HCI_REQ_CANCELED); queue_work(hdev->workqueue, &hdev->cmd_sync_cancel_work); } @@ -683,12 +683,12 @@ void hci_cmd_sync_cancel_sync(struct hci_dev *hdev, int err) { bt_dev_dbg(hdev, "err 0x%2.2x", err); - if (hdev->req_status == HCI_REQ_PEND) { + if (READ_ONCE(hdev->req_status) == HCI_REQ_PEND) { /* req_result is __u32 so error must be positive to be properly * propagated. */ hdev->req_result = err < 0 ? -err : err; - hdev->req_status = HCI_REQ_CANCELED; + WRITE_ONCE(hdev->req_status, HCI_REQ_CANCELED); wake_up_interruptible(&hdev->req_wait_q); } diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 5deb6c4f1e41..95c65fece39b 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -926,16 +926,39 @@ int l2cap_chan_check_security(struct l2cap_chan *chan, bool initiator) static int l2cap_get_ident(struct l2cap_conn *conn) { + u8 max; + int ident; + /* LE link does not support tools like l2ping so use the full range */ if (conn->hcon->type == LE_LINK) - return ida_alloc_range(&conn->tx_ida, 1, 255, GFP_ATOMIC); - + max = 255; /* Get next available identificator. * 1 - 128 are used by kernel. * 129 - 199 are reserved. * 200 - 254 are used by utilities like l2ping, etc. */ - return ida_alloc_range(&conn->tx_ida, 1, 128, GFP_ATOMIC); + else + max = 128; + + /* Allocate ident using min as last used + 1 (cyclic) */ + ident = ida_alloc_range(&conn->tx_ida, READ_ONCE(conn->tx_ident) + 1, + max, GFP_ATOMIC); + /* Force min 1 to start over */ + if (ident <= 0) { + ident = ida_alloc_range(&conn->tx_ida, 1, max, GFP_ATOMIC); + if (ident <= 0) { + /* If all idents are in use, log an error, this is + * extremely unlikely to happen and would indicate a bug + * in the code that idents are not being freed properly. + */ + BT_ERR("Unable to allocate ident: %d", ident); + return 0; + } + } + + WRITE_ONCE(conn->tx_ident, ident); + + return ident; } static void l2cap_send_acl(struct l2cap_conn *conn, struct sk_buff *skb, @@ -1748,6 +1771,9 @@ static void l2cap_conn_del(struct hci_conn *hcon, int err) BT_DBG("hcon %p conn %p, err %d", hcon, conn, err); + disable_delayed_work_sync(&conn->info_timer); + disable_delayed_work_sync(&conn->id_addr_timer); + mutex_lock(&conn->lock); kfree_skb(conn->rx_skb); @@ -1763,8 +1789,6 @@ static void l2cap_conn_del(struct hci_conn *hcon, int err) ida_destroy(&conn->tx_ida); - cancel_delayed_work_sync(&conn->id_addr_timer); - l2cap_unregister_all_users(conn); /* Force the connection to be immediately dropped */ @@ -1783,9 +1807,6 @@ static void l2cap_conn_del(struct hci_conn *hcon, int err) l2cap_chan_put(chan); } - if (conn->info_state & L2CAP_INFO_FEAT_MASK_REQ_SENT) - cancel_delayed_work_sync(&conn->info_timer); - hci_chan_del(conn->hchan); conn->hchan = NULL; @@ -2377,6 +2398,9 @@ static int l2cap_segment_sdu(struct l2cap_chan *chan, /* Remote device may have requested smaller PDUs */ pdu_len = min_t(size_t, pdu_len, chan->remote_mps); + if (!pdu_len) + return -EINVAL; + if (len <= pdu_len) { sar = L2CAP_SAR_UNSEGMENTED; sdu_len = 0; @@ -4312,14 +4336,16 @@ static inline int l2cap_config_req(struct l2cap_conn *conn, if (test_bit(CONF_INPUT_DONE, &chan->conf_state)) { set_default_fcs(chan); - if (chan->mode == L2CAP_MODE_ERTM || - chan->mode == L2CAP_MODE_STREAMING) - err = l2cap_ertm_init(chan); + if (chan->state != BT_CONNECTED) { + if (chan->mode == L2CAP_MODE_ERTM || + chan->mode == L2CAP_MODE_STREAMING) + err = l2cap_ertm_init(chan); - if (err < 0) - l2cap_send_disconn_req(chan, -err); - else - l2cap_chan_ready(chan); + if (err < 0) + l2cap_send_disconn_req(chan, -err); + else + l2cap_chan_ready(chan); + } goto unlock; } @@ -5081,14 +5107,14 @@ static inline int l2cap_ecred_conn_req(struct l2cap_conn *conn, cmd_len -= sizeof(*req); num_scid = cmd_len / sizeof(u16); - /* Always respond with the same number of scids as in the request */ - rsp_len = cmd_len; - if (num_scid > L2CAP_ECRED_MAX_CID) { result = L2CAP_CR_LE_INVALID_PARAMS; goto response; } + /* Always respond with the same number of scids as in the request */ + rsp_len = cmd_len; + mtu = __le16_to_cpu(req->mtu); mps = __le16_to_cpu(req->mps); @@ -6607,6 +6633,10 @@ static void l2cap_chan_le_send_credits(struct l2cap_chan *chan) struct l2cap_le_credits pkt; u16 return_credits = l2cap_le_rx_credits(chan); + if (chan->mode != L2CAP_MODE_LE_FLOWCTL && + chan->mode != L2CAP_MODE_EXT_FLOWCTL) + return; + if (chan->rx_credits >= return_credits) return; @@ -6690,6 +6720,11 @@ static int l2cap_ecred_data_rcv(struct l2cap_chan *chan, struct sk_buff *skb) if (!chan->sdu) { u16 sdu_len; + if (!pskb_may_pull(skb, L2CAP_SDULEN_SIZE)) { + err = -EINVAL; + goto failed; + } + sdu_len = get_unaligned_le16(skb->data); skb_pull(skb, L2CAP_SDULEN_SIZE); diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index 597686790371..71e8c1b45bce 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -1698,6 +1698,9 @@ static void l2cap_sock_ready_cb(struct l2cap_chan *chan) struct sock *sk = chan->data; struct sock *parent; + if (!sk) + return; + lock_sock(sk); parent = bt_sk(sk)->parent; diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index d52238ce6a9a..e5f9287fb826 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -5355,7 +5355,7 @@ static void mgmt_add_adv_patterns_monitor_complete(struct hci_dev *hdev, * hci_adv_monitors_clear is about to be called which will take care of * freeing the adv_monitor instances. */ - if (status == -ECANCELED && !mgmt_pending_valid(hdev, cmd)) + if (status == -ECANCELED || !mgmt_pending_valid(hdev, cmd)) return; monitor = cmd->user_data; diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index e7db50165879..584e059de20a 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -401,7 +401,7 @@ static void sco_recv_frame(struct sco_conn *conn, struct sk_buff *skb) struct sock *sk; sco_conn_lock(conn); - sk = conn->sk; + sk = sco_sock_hold(conn); sco_conn_unlock(conn); if (!sk) @@ -410,11 +410,15 @@ static void sco_recv_frame(struct sco_conn *conn, struct sk_buff *skb) BT_DBG("sk %p len %u", sk, skb->len); if (sk->sk_state != BT_CONNECTED) - goto drop; + goto drop_put; - if (!sock_queue_rcv_skb(sk, skb)) + if (!sock_queue_rcv_skb(sk, skb)) { + sock_put(sk); return; + } +drop_put: + sock_put(sk); drop: kfree_skb(skb); } diff --git a/net/can/af_can.c b/net/can/af_can.c index f70e2ba0aadc..7bc86b176b4d 100644 --- a/net/can/af_can.c +++ b/net/can/af_can.c @@ -469,7 +469,7 @@ int can_rx_register(struct net *net, struct net_device *dev, canid_t can_id, rcv->can_id = can_id; rcv->mask = mask; - rcv->matches = 0; + atomic_long_set(&rcv->matches, 0); rcv->func = func; rcv->data = data; rcv->ident = ident; @@ -573,7 +573,7 @@ EXPORT_SYMBOL(can_rx_unregister); static inline void deliver(struct sk_buff *skb, struct receiver *rcv) { rcv->func(skb, rcv->data); - rcv->matches++; + atomic_long_inc(&rcv->matches); } static int can_rcv_filter(struct can_dev_rcv_lists *dev_rcv_lists, struct sk_buff *skb) diff --git a/net/can/af_can.h b/net/can/af_can.h index 22f3352c77fe..87887014f562 100644 --- a/net/can/af_can.h +++ b/net/can/af_can.h @@ -52,7 +52,7 @@ struct receiver { struct hlist_node list; canid_t can_id; canid_t mask; - unsigned long matches; + atomic_long_t matches; void (*func)(struct sk_buff *skb, void *data); void *data; char *ident; diff --git a/net/can/gw.c b/net/can/gw.c index 8ee4d67a07d3..0ec99f68aa45 100644 --- a/net/can/gw.c +++ b/net/can/gw.c @@ -375,10 +375,10 @@ static void cgw_csum_crc8_rel(struct canfd_frame *cf, return; if (from <= to) { - for (i = crc8->from_idx; i <= crc8->to_idx; i++) + for (i = from; i <= to; i++) crc = crc8->crctab[crc ^ cf->data[i]]; } else { - for (i = crc8->from_idx; i >= crc8->to_idx; i--) + for (i = from; i >= to; i--) crc = crc8->crctab[crc ^ cf->data[i]]; } @@ -397,7 +397,7 @@ static void cgw_csum_crc8_rel(struct canfd_frame *cf, break; } - cf->data[crc8->result_idx] = crc ^ crc8->final_xor_val; + cf->data[res] = crc ^ crc8->final_xor_val; } static void cgw_csum_crc8_pos(struct canfd_frame *cf, diff --git a/net/can/isotp.c b/net/can/isotp.c index da3b72e7afcc..2770f43f4951 100644 --- a/net/can/isotp.c +++ b/net/can/isotp.c @@ -1248,12 +1248,6 @@ static int isotp_release(struct socket *sock) so->ifindex = 0; so->bound = 0; - if (so->rx.buf != so->rx.sbuf) - kfree(so->rx.buf); - - if (so->tx.buf != so->tx.sbuf) - kfree(so->tx.buf); - sock_orphan(sk); sock->sk = NULL; @@ -1622,6 +1616,21 @@ static int isotp_notifier(struct notifier_block *nb, unsigned long msg, return NOTIFY_DONE; } +static void isotp_sock_destruct(struct sock *sk) +{ + struct isotp_sock *so = isotp_sk(sk); + + /* do the standard CAN sock destruct work */ + can_sock_destruct(sk); + + /* free potential extended PDU buffers */ + if (so->rx.buf != so->rx.sbuf) + kfree(so->rx.buf); + + if (so->tx.buf != so->tx.sbuf) + kfree(so->tx.buf); +} + static int isotp_init(struct sock *sk) { struct isotp_sock *so = isotp_sk(sk); @@ -1666,6 +1675,9 @@ static int isotp_init(struct sock *sk) list_add_tail(&so->notifier, &isotp_notifier_list); spin_unlock(&isotp_notifier_lock); + /* re-assign default can_sock_destruct() reference */ + sk->sk_destruct = isotp_sock_destruct; + return 0; } diff --git a/net/can/proc.c b/net/can/proc.c index 0938bf7dd646..de4d05ae3459 100644 --- a/net/can/proc.c +++ b/net/can/proc.c @@ -196,7 +196,8 @@ static void can_print_rcvlist(struct seq_file *m, struct hlist_head *rx_list, " %-5s %03x %08x %pK %pK %8ld %s\n"; seq_printf(m, fmt, DNAME(dev), r->can_id, r->mask, - r->func, r->data, r->matches, r->ident); + r->func, r->data, atomic_long_read(&r->matches), + r->ident); } } diff --git a/net/core/dev.c b/net/core/dev.c index 14a83f2035b9..fc5557062414 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3769,6 +3769,22 @@ static netdev_features_t dflt_features_check(struct sk_buff *skb, return vlan_features_check(skb, features); } +static bool skb_gso_has_extension_hdr(const struct sk_buff *skb) +{ + if (!skb->encapsulation) + return ((skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6 || + (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4 && + vlan_get_protocol(skb) == htons(ETH_P_IPV6))) && + skb_transport_header_was_set(skb) && + skb_network_header_len(skb) != sizeof(struct ipv6hdr)); + else + return (!skb_inner_network_header_was_set(skb) || + ((skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6 || + (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4 && + inner_ip_hdr(skb)->version == 6)) && + skb_inner_network_header_len(skb) != sizeof(struct ipv6hdr))); +} + static netdev_features_t gso_features_check(const struct sk_buff *skb, struct net_device *dev, netdev_features_t features) @@ -3816,11 +3832,7 @@ static netdev_features_t gso_features_check(const struct sk_buff *skb, * so neither does TSO that depends on it. */ if (features & NETIF_F_IPV6_CSUM && - (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6 || - (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4 && - vlan_get_protocol(skb) == htons(ETH_P_IPV6))) && - skb_transport_header_was_set(skb) && - skb_network_header_len(skb) != sizeof(struct ipv6hdr)) + skb_gso_has_extension_hdr(skb)) features &= ~(NETIF_F_IPV6_CSUM | NETIF_F_TSO6 | NETIF_F_GSO_UDP_L4); return features; diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index dad4b1054955..fae8034efbff 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -629,6 +629,9 @@ int rtnl_link_register(struct rtnl_link_ops *ops) unlock: mutex_unlock(&link_ops_mutex); + if (err) + cleanup_srcu_struct(&ops->srcu); + return err; } EXPORT_SYMBOL_GPL(rtnl_link_register); @@ -707,11 +710,14 @@ static size_t rtnl_link_get_slave_info_data_size(const struct net_device *dev) goto out; ops = master_dev->rtnl_link_ops; - if (!ops || !ops->get_slave_size) + if (!ops) + goto out; + size += nla_total_size(strlen(ops->kind) + 1); /* IFLA_INFO_SLAVE_KIND */ + if (!ops->get_slave_size) goto out; /* IFLA_INFO_SLAVE_DATA + nested data */ - size = nla_total_size(sizeof(struct nlattr)) + - ops->get_slave_size(master_dev, dev); + size += nla_total_size(sizeof(struct nlattr)) + + ops->get_slave_size(master_dev, dev); out: rcu_read_unlock(); @@ -1267,6 +1273,21 @@ static size_t rtnl_dpll_pin_size(const struct net_device *dev) return size; } +static size_t rtnl_dev_parent_size(const struct net_device *dev) +{ + size_t size = 0; + + /* IFLA_PARENT_DEV_NAME */ + if (dev->dev.parent) + size += nla_total_size(strlen(dev_name(dev->dev.parent)) + 1); + + /* IFLA_PARENT_DEV_BUS_NAME */ + if (dev->dev.parent && dev->dev.parent->bus) + size += nla_total_size(strlen(dev->dev.parent->bus->name) + 1); + + return size; +} + static noinline size_t if_nlmsg_size(const struct net_device *dev, u32 ext_filter_mask) { @@ -1328,6 +1349,7 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev, + nla_total_size(8) /* IFLA_MAX_PACING_OFFLOAD_HORIZON */ + nla_total_size(2) /* IFLA_HEADROOM */ + nla_total_size(2) /* IFLA_TAILROOM */ + + rtnl_dev_parent_size(dev) + 0; if (!(ext_filter_mask & RTEXT_FILTER_SKIP_STATS)) diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 2c922afadb8f..6dfc0bcdef65 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -235,10 +235,13 @@ static void esp_output_done(void *data, int err) xfrm_dev_resume(skb); } else { if (!err && - x->encap && x->encap->encap_type == TCP_ENCAP_ESPINTCP) - esp_output_tail_tcp(x, skb); - else + x->encap && x->encap->encap_type == TCP_ENCAP_ESPINTCP) { + err = esp_output_tail_tcp(x, skb); + if (err != -EINPROGRESS) + kfree_skb(skb); + } else { xfrm_output_resume(skb_to_full_sk(skb), skb, err); + } } } diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 5dfac6ce1110..e961936b6be7 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -154,20 +154,6 @@ bool inet_sk_get_local_port_range(const struct sock *sk, int *low, int *high) } EXPORT_SYMBOL(inet_sk_get_local_port_range); -static bool inet_use_bhash2_on_bind(const struct sock *sk) -{ -#if IS_ENABLED(CONFIG_IPV6) - if (sk->sk_family == AF_INET6) { - if (ipv6_addr_any(&sk->sk_v6_rcv_saddr)) - return false; - - if (!ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr)) - return true; - } -#endif - return sk->sk_rcv_saddr != htonl(INADDR_ANY); -} - static bool inet_bind_conflict(const struct sock *sk, struct sock *sk2, kuid_t uid, bool relax, bool reuseport_cb_ok, bool reuseport_ok) @@ -259,7 +245,7 @@ static int inet_csk_bind_conflict(const struct sock *sk, * checks separately because their spinlocks have to be acquired/released * independently of each other, to prevent possible deadlocks */ - if (inet_use_bhash2_on_bind(sk)) + if (inet_use_hash2_on_bind(sk)) return tb2 && inet_bhash2_conflict(sk, tb2, uid, relax, reuseport_cb_ok, reuseport_ok); @@ -376,7 +362,7 @@ other_parity_scan: head = &hinfo->bhash[inet_bhashfn(net, port, hinfo->bhash_size)]; spin_lock_bh(&head->lock); - if (inet_use_bhash2_on_bind(sk)) { + if (inet_use_hash2_on_bind(sk)) { if (inet_bhash2_addr_any_conflict(sk, port, l3mdev, relax, false)) goto next_port; } @@ -562,7 +548,7 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum) check_bind_conflict = false; } - if (check_bind_conflict && inet_use_bhash2_on_bind(sk)) { + if (check_bind_conflict && inet_use_hash2_on_bind(sk)) { if (inet_bhash2_addr_any_conflict(sk, port, l3mdev, true, true)) goto fail_unlock; } diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index b60fad393e18..cb99a3c27053 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -287,7 +287,7 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum, } else { hslot = udp_hashslot(udptable, net, snum); spin_lock_bh(&hslot->lock); - if (hslot->count > 10) { + if (inet_use_hash2_on_bind(sk) && hslot->count > 10) { int exist; unsigned int slot2 = udp_sk(sk)->udp_portaddr_hash ^ snum; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 0e55f139e05d..f4e23b543585 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2862,7 +2862,7 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao) fib6_add_gc_list(rt); } else { fib6_clean_expires(rt); - fib6_remove_gc_list(rt); + fib6_may_remove_gc_list(net, rt); } spin_unlock_bh(&table->tb6_lock); @@ -4840,7 +4840,7 @@ static int modify_prefix_route(struct net *net, struct inet6_ifaddr *ifp, if (!(flags & RTF_EXPIRES)) { fib6_clean_expires(f6i); - fib6_remove_gc_list(f6i); + fib6_may_remove_gc_list(net, f6i); } else { fib6_set_expires(f6i, expires); fib6_add_gc_list(f6i); diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index e75da98f5283..9f75313734f8 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -271,10 +271,13 @@ static void esp_output_done(void *data, int err) xfrm_dev_resume(skb); } else { if (!err && - x->encap && x->encap->encap_type == TCP_ENCAP_ESPINTCP) - esp_output_tail_tcp(x, skb); - else + x->encap && x->encap->encap_type == TCP_ENCAP_ESPINTCP) { + err = esp_output_tail_tcp(x, skb); + if (err != -EINPROGRESS) + kfree_skb(skb); + } else { xfrm_output_resume(skb_to_full_sk(skb), skb, err); + } } } diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 9058e71241dc..dd26657b6a4a 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -1133,7 +1133,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt, return -EEXIST; if (!(rt->fib6_flags & RTF_EXPIRES)) { fib6_clean_expires(iter); - fib6_remove_gc_list(iter); + fib6_may_remove_gc_list(info->nl_net, iter); } else { fib6_set_expires(iter, rt->expires); fib6_add_gc_list(iter); @@ -2348,6 +2348,17 @@ static void fib6_flush_trees(struct net *net) /* * Garbage collection */ +void fib6_age_exceptions(struct fib6_info *rt, struct fib6_gc_args *gc_args, + unsigned long now) +{ + bool may_expire = rt->fib6_flags & RTF_EXPIRES && rt->expires; + int old_more = gc_args->more; + + rt6_age_exceptions(rt, gc_args, now); + + if (!may_expire && old_more == gc_args->more) + fib6_remove_gc_list(rt); +} static int fib6_age(struct fib6_info *rt, struct fib6_gc_args *gc_args) { @@ -2370,7 +2381,7 @@ static int fib6_age(struct fib6_info *rt, struct fib6_gc_args *gc_args) * Note, that clones are aged out * only if they are not in use now. */ - rt6_age_exceptions(rt, gc_args, now); + fib6_age_exceptions(rt, gc_args, now); return 0; } diff --git a/net/ipv6/netfilter/ip6t_rt.c b/net/ipv6/netfilter/ip6t_rt.c index 4ad8b2032f1f..5561bd9cea81 100644 --- a/net/ipv6/netfilter/ip6t_rt.c +++ b/net/ipv6/netfilter/ip6t_rt.c @@ -157,6 +157,10 @@ static int rt_mt6_check(const struct xt_mtchk_param *par) pr_debug("unknown flags %X\n", rtinfo->invflags); return -EINVAL; } + if (rtinfo->addrnr > IP6T_RT_HOPS) { + pr_debug("too many addresses specified\n"); + return -EINVAL; + } if ((rtinfo->flags & (IP6T_RT_RES | IP6T_RT_FST_MASK)) && (!(rtinfo->flags & IP6T_RT_TYP) || (rtinfo->rt_type != 0) || diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 08cd86f49bf9..cb521700cee7 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1033,7 +1033,7 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, if (!addrconf_finite_timeout(lifetime)) { fib6_clean_expires(rt); - fib6_remove_gc_list(rt); + fib6_may_remove_gc_list(net, rt); } else { fib6_set_expires(rt, jiffies + HZ * lifetime); fib6_add_gc_list(rt); diff --git a/net/key/af_key.c b/net/key/af_key.c index 0756bac62f7c..72ac2ace419d 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -3518,7 +3518,7 @@ static int set_sadb_kmaddress(struct sk_buff *skb, const struct xfrm_kmaddress * static int set_ipsecrequest(struct sk_buff *skb, uint8_t proto, uint8_t mode, int level, - uint32_t reqid, uint8_t family, + uint32_t reqid, sa_family_t family, const xfrm_address_t *src, const xfrm_address_t *dst) { struct sadb_x_ipsecrequest *rq; @@ -3583,12 +3583,17 @@ static int pfkey_send_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, /* ipsecrequests */ for (i = 0, mp = m; i < num_bundles; i++, mp++) { - /* old locator pair */ - size_pol += sizeof(struct sadb_x_ipsecrequest) + - pfkey_sockaddr_pair_size(mp->old_family); - /* new locator pair */ - size_pol += sizeof(struct sadb_x_ipsecrequest) + - pfkey_sockaddr_pair_size(mp->new_family); + int pair_size; + + pair_size = pfkey_sockaddr_pair_size(mp->old_family); + if (!pair_size) + return -EINVAL; + size_pol += sizeof(struct sadb_x_ipsecrequest) + pair_size; + + pair_size = pfkey_sockaddr_pair_size(mp->new_family); + if (!pair_size) + return -EINVAL; + size_pol += sizeof(struct sadb_x_ipsecrequest) + pair_size; } size += sizeof(struct sadb_msg) + size_pol; diff --git a/net/netfilter/nf_conntrack_broadcast.c b/net/netfilter/nf_conntrack_broadcast.c index a7552a46d6ac..4f39bf7c843f 100644 --- a/net/netfilter/nf_conntrack_broadcast.c +++ b/net/netfilter/nf_conntrack_broadcast.c @@ -21,6 +21,7 @@ int nf_conntrack_broadcast_help(struct sk_buff *skb, unsigned int timeout) { const struct nf_conntrack_helper *helper; + struct net *net = read_pnet(&ct->ct_net); struct nf_conntrack_expect *exp; struct iphdr *iph = ip_hdr(skb); struct rtable *rt = skb_rtable(skb); @@ -70,8 +71,11 @@ int nf_conntrack_broadcast_help(struct sk_buff *skb, exp->expectfn = NULL; exp->flags = NF_CT_EXPECT_PERMANENT; exp->class = NF_CT_EXPECT_CLASS_DEFAULT; - exp->helper = NULL; - + rcu_assign_pointer(exp->helper, helper); + write_pnet(&exp->net, net); +#ifdef CONFIG_NF_CONNTRACK_ZONES + exp->zone = ct->zone; +#endif nf_ct_expect_related(exp, 0); nf_ct_expect_put(exp); diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c index 81baf2082604..9df159448b89 100644 --- a/net/netfilter/nf_conntrack_ecache.c +++ b/net/netfilter/nf_conntrack_ecache.c @@ -247,6 +247,8 @@ void nf_ct_expect_event_report(enum ip_conntrack_expect_events event, struct nf_ct_event_notifier *notify; struct nf_conntrack_ecache *e; + lockdep_nfct_expect_lock_held(); + rcu_read_lock(); notify = rcu_dereference(net->ct.nf_conntrack_event_cb); if (!notify) diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index cfc2daa3fc7f..24d0576d84b7 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -51,6 +51,7 @@ void nf_ct_unlink_expect_report(struct nf_conntrack_expect *exp, struct net *net = nf_ct_exp_net(exp); struct nf_conntrack_net *cnet; + lockdep_nfct_expect_lock_held(); WARN_ON(!master_help); WARN_ON(timer_pending(&exp->timeout)); @@ -112,12 +113,14 @@ nf_ct_exp_equal(const struct nf_conntrack_tuple *tuple, const struct net *net) { return nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask) && - net_eq(net, nf_ct_net(i->master)) && - nf_ct_zone_equal_any(i->master, zone); + net_eq(net, read_pnet(&i->net)) && + nf_ct_exp_zone_equal_any(i, zone); } bool nf_ct_remove_expect(struct nf_conntrack_expect *exp) { + lockdep_nfct_expect_lock_held(); + if (timer_delete(&exp->timeout)) { nf_ct_unlink_expect(exp); nf_ct_expect_put(exp); @@ -177,6 +180,8 @@ nf_ct_find_expectation(struct net *net, struct nf_conntrack_expect *i, *exp = NULL; unsigned int h; + lockdep_nfct_expect_lock_held(); + if (!cnet->expect_count) return NULL; @@ -309,12 +314,20 @@ struct nf_conntrack_expect *nf_ct_expect_alloc(struct nf_conn *me) } EXPORT_SYMBOL_GPL(nf_ct_expect_alloc); +/* This function can only be used from packet path, where accessing + * master's helper is safe, because the packet holds a reference on + * the conntrack object. Never use it from control plane. + */ void nf_ct_expect_init(struct nf_conntrack_expect *exp, unsigned int class, u_int8_t family, const union nf_inet_addr *saddr, const union nf_inet_addr *daddr, u_int8_t proto, const __be16 *src, const __be16 *dst) { + struct nf_conntrack_helper *helper = NULL; + struct nf_conn *ct = exp->master; + struct net *net = read_pnet(&ct->ct_net); + struct nf_conn_help *help; int len; if (family == AF_INET) @@ -325,7 +338,16 @@ void nf_ct_expect_init(struct nf_conntrack_expect *exp, unsigned int class, exp->flags = 0; exp->class = class; exp->expectfn = NULL; - exp->helper = NULL; + + help = nfct_help(ct); + if (help) + helper = rcu_dereference(help->helper); + + rcu_assign_pointer(exp->helper, helper); + write_pnet(&exp->net, net); +#ifdef CONFIG_NF_CONNTRACK_ZONES + exp->zone = ct->zone; +#endif exp->tuple.src.l3num = family; exp->tuple.dst.protonum = proto; @@ -442,6 +464,8 @@ static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect, unsigned int h; int ret = 0; + lockdep_nfct_expect_lock_held(); + if (!master_help) { ret = -ESHUTDOWN; goto out; @@ -498,8 +522,9 @@ int nf_ct_expect_related_report(struct nf_conntrack_expect *expect, nf_ct_expect_insert(expect); - spin_unlock_bh(&nf_conntrack_expect_lock); nf_ct_expect_event_report(IPEXP_NEW, expect, portid, report); + spin_unlock_bh(&nf_conntrack_expect_lock); + return 0; out: spin_unlock_bh(&nf_conntrack_expect_lock); @@ -627,11 +652,15 @@ static int exp_seq_show(struct seq_file *s, void *v) { struct nf_conntrack_expect *expect; struct nf_conntrack_helper *helper; + struct net *net = seq_file_net(s); struct hlist_node *n = v; char *delim = ""; expect = hlist_entry(n, struct nf_conntrack_expect, hnode); + if (!net_eq(nf_ct_exp_net(expect), net)) + return 0; + if (expect->timeout.function) seq_printf(s, "%ld ", timer_pending(&expect->timeout) ? (long)(expect->timeout.expires - jiffies)/HZ : 0); @@ -654,7 +683,7 @@ static int exp_seq_show(struct seq_file *s, void *v) if (expect->flags & NF_CT_EXPECT_USERSPACE) seq_printf(s, "%sUSERSPACE", delim); - helper = rcu_dereference(nfct_help(expect->master)->helper); + helper = rcu_dereference(expect->helper); if (helper) { seq_printf(s, "%s%s", expect->flags ? " " : "", helper->name); if (helper->expect_policy[expect->class].name[0]) diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c index a2a0e22ccee1..3f5c50455b71 100644 --- a/net/netfilter/nf_conntrack_h323_main.c +++ b/net/netfilter/nf_conntrack_h323_main.c @@ -643,7 +643,7 @@ static int expect_h245(struct sk_buff *skb, struct nf_conn *ct, &ct->tuplehash[!dir].tuple.src.u3, &ct->tuplehash[!dir].tuple.dst.u3, IPPROTO_TCP, NULL, &port); - exp->helper = &nf_conntrack_helper_h245; + rcu_assign_pointer(exp->helper, &nf_conntrack_helper_h245); nathook = rcu_dereference(nfct_h323_nat_hook); if (memcmp(&ct->tuplehash[dir].tuple.src.u3, @@ -767,7 +767,7 @@ static int expect_callforwarding(struct sk_buff *skb, nf_ct_expect_init(exp, NF_CT_EXPECT_CLASS_DEFAULT, nf_ct_l3num(ct), &ct->tuplehash[!dir].tuple.src.u3, &addr, IPPROTO_TCP, NULL, &port); - exp->helper = nf_conntrack_helper_q931; + rcu_assign_pointer(exp->helper, nf_conntrack_helper_q931); nathook = rcu_dereference(nfct_h323_nat_hook); if (memcmp(&ct->tuplehash[dir].tuple.src.u3, @@ -1234,7 +1234,7 @@ static int expect_q931(struct sk_buff *skb, struct nf_conn *ct, &ct->tuplehash[!dir].tuple.src.u3 : NULL, &ct->tuplehash[!dir].tuple.dst.u3, IPPROTO_TCP, NULL, &port); - exp->helper = nf_conntrack_helper_q931; + rcu_assign_pointer(exp->helper, nf_conntrack_helper_q931); exp->flags = NF_CT_EXPECT_PERMANENT; /* Accept multiple calls */ nathook = rcu_dereference(nfct_h323_nat_hook); @@ -1306,7 +1306,7 @@ static int process_gcf(struct sk_buff *skb, struct nf_conn *ct, nf_ct_expect_init(exp, NF_CT_EXPECT_CLASS_DEFAULT, nf_ct_l3num(ct), &ct->tuplehash[!dir].tuple.src.u3, &addr, IPPROTO_UDP, NULL, &port); - exp->helper = nf_conntrack_helper_ras; + rcu_assign_pointer(exp->helper, nf_conntrack_helper_ras); if (nf_ct_expect_related(exp, 0) == 0) { pr_debug("nf_ct_ras: expect RAS "); @@ -1523,7 +1523,7 @@ static int process_acf(struct sk_buff *skb, struct nf_conn *ct, &ct->tuplehash[!dir].tuple.src.u3, &addr, IPPROTO_TCP, NULL, &port); exp->flags = NF_CT_EXPECT_PERMANENT; - exp->helper = nf_conntrack_helper_q931; + rcu_assign_pointer(exp->helper, nf_conntrack_helper_q931); if (nf_ct_expect_related(exp, 0) == 0) { pr_debug("nf_ct_ras: expect Q.931 "); @@ -1577,7 +1577,7 @@ static int process_lcf(struct sk_buff *skb, struct nf_conn *ct, &ct->tuplehash[!dir].tuple.src.u3, &addr, IPPROTO_TCP, NULL, &port); exp->flags = NF_CT_EXPECT_PERMANENT; - exp->helper = nf_conntrack_helper_q931; + rcu_assign_pointer(exp->helper, nf_conntrack_helper_q931); if (nf_ct_expect_related(exp, 0) == 0) { pr_debug("nf_ct_ras: expect Q.931 "); diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index ceb48c3ca0a4..1b330ba6613b 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -395,14 +395,10 @@ EXPORT_SYMBOL_GPL(nf_conntrack_helper_register); static bool expect_iter_me(struct nf_conntrack_expect *exp, void *data) { - struct nf_conn_help *help = nfct_help(exp->master); const struct nf_conntrack_helper *me = data; const struct nf_conntrack_helper *this; - if (exp->helper == me) - return true; - - this = rcu_dereference_protected(help->helper, + this = rcu_dereference_protected(exp->helper, lockdep_is_held(&nf_conntrack_expect_lock)); return this == me; } @@ -421,6 +417,11 @@ void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me) nf_ct_expect_iterate_destroy(expect_iter_me, NULL); nf_ct_iterate_destroy(unhelp, me); + + /* nf_ct_iterate_destroy() does an unconditional synchronize_rcu() as + * last step, this ensures rcu readers of exp->helper are done. + * No need for another synchronize_rcu() here. + */ } EXPORT_SYMBOL_GPL(nf_conntrack_helper_unregister); diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index c156574e1273..3f408f3713bb 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -910,8 +910,8 @@ struct ctnetlink_filter { }; static const struct nla_policy cta_filter_nla_policy[CTA_FILTER_MAX + 1] = { - [CTA_FILTER_ORIG_FLAGS] = { .type = NLA_U32 }, - [CTA_FILTER_REPLY_FLAGS] = { .type = NLA_U32 }, + [CTA_FILTER_ORIG_FLAGS] = NLA_POLICY_MASK(NLA_U32, CTA_FILTER_F_ALL), + [CTA_FILTER_REPLY_FLAGS] = NLA_POLICY_MASK(NLA_U32, CTA_FILTER_F_ALL), }; static int ctnetlink_parse_filter(const struct nlattr *attr, @@ -925,17 +925,11 @@ static int ctnetlink_parse_filter(const struct nlattr *attr, if (ret) return ret; - if (tb[CTA_FILTER_ORIG_FLAGS]) { + if (tb[CTA_FILTER_ORIG_FLAGS]) filter->orig_flags = nla_get_u32(tb[CTA_FILTER_ORIG_FLAGS]); - if (filter->orig_flags & ~CTA_FILTER_F_ALL) - return -EOPNOTSUPP; - } - if (tb[CTA_FILTER_REPLY_FLAGS]) { + if (tb[CTA_FILTER_REPLY_FLAGS]) filter->reply_flags = nla_get_u32(tb[CTA_FILTER_REPLY_FLAGS]); - if (filter->reply_flags & ~CTA_FILTER_F_ALL) - return -EOPNOTSUPP; - } return 0; } @@ -2634,7 +2628,7 @@ static const struct nla_policy exp_nla_policy[CTA_EXPECT_MAX+1] = { [CTA_EXPECT_HELP_NAME] = { .type = NLA_NUL_STRING, .len = NF_CT_HELPER_NAME_LEN - 1 }, [CTA_EXPECT_ZONE] = { .type = NLA_U16 }, - [CTA_EXPECT_FLAGS] = { .type = NLA_U32 }, + [CTA_EXPECT_FLAGS] = NLA_POLICY_MASK(NLA_BE32, NF_CT_EXPECT_MASK), [CTA_EXPECT_CLASS] = { .type = NLA_U32 }, [CTA_EXPECT_NAT] = { .type = NLA_NESTED }, [CTA_EXPECT_FN] = { .type = NLA_NUL_STRING }, @@ -3012,7 +3006,7 @@ ctnetlink_exp_dump_expect(struct sk_buff *skb, { struct nf_conn *master = exp->master; long timeout = ((long)exp->timeout.expires - (long)jiffies) / HZ; - struct nf_conn_help *help; + struct nf_conntrack_helper *helper; #if IS_ENABLED(CONFIG_NF_NAT) struct nlattr *nest_parms; struct nf_conntrack_tuple nat_tuple = {}; @@ -3057,15 +3051,12 @@ ctnetlink_exp_dump_expect(struct sk_buff *skb, nla_put_be32(skb, CTA_EXPECT_FLAGS, htonl(exp->flags)) || nla_put_be32(skb, CTA_EXPECT_CLASS, htonl(exp->class))) goto nla_put_failure; - help = nfct_help(master); - if (help) { - struct nf_conntrack_helper *helper; - helper = rcu_dereference(help->helper); - if (helper && - nla_put_string(skb, CTA_EXPECT_HELP_NAME, helper->name)) - goto nla_put_failure; - } + helper = rcu_dereference(exp->helper); + if (helper && + nla_put_string(skb, CTA_EXPECT_HELP_NAME, helper->name)) + goto nla_put_failure; + expfn = nf_ct_helper_expectfn_find_by_symbol(exp->expectfn); if (expfn != NULL && nla_put_string(skb, CTA_EXPECT_FN, expfn->name)) @@ -3358,31 +3349,37 @@ static int ctnetlink_get_expect(struct sk_buff *skb, if (err < 0) return err; + skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!skb2) + return -ENOMEM; + + spin_lock_bh(&nf_conntrack_expect_lock); exp = nf_ct_expect_find_get(info->net, &zone, &tuple); - if (!exp) + if (!exp) { + spin_unlock_bh(&nf_conntrack_expect_lock); + kfree_skb(skb2); return -ENOENT; + } if (cda[CTA_EXPECT_ID]) { __be32 id = nla_get_be32(cda[CTA_EXPECT_ID]); if (id != nf_expect_get_id(exp)) { nf_ct_expect_put(exp); + spin_unlock_bh(&nf_conntrack_expect_lock); + kfree_skb(skb2); return -ENOENT; } } - skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); - if (!skb2) { - nf_ct_expect_put(exp); - return -ENOMEM; - } - rcu_read_lock(); err = ctnetlink_exp_fill_info(skb2, NETLINK_CB(skb).portid, info->nlh->nlmsg_seq, IPCTNL_MSG_EXP_NEW, exp); rcu_read_unlock(); nf_ct_expect_put(exp); + spin_unlock_bh(&nf_conntrack_expect_lock); + if (err <= 0) { kfree_skb(skb2); return -ENOMEM; @@ -3394,12 +3391,9 @@ static int ctnetlink_get_expect(struct sk_buff *skb, static bool expect_iter_name(struct nf_conntrack_expect *exp, void *data) { struct nf_conntrack_helper *helper; - const struct nf_conn_help *m_help; const char *name = data; - m_help = nfct_help(exp->master); - - helper = rcu_dereference(m_help->helper); + helper = rcu_dereference(exp->helper); if (!helper) return false; @@ -3432,22 +3426,26 @@ static int ctnetlink_del_expect(struct sk_buff *skb, if (err < 0) return err; + spin_lock_bh(&nf_conntrack_expect_lock); + /* bump usage count to 2 */ exp = nf_ct_expect_find_get(info->net, &zone, &tuple); - if (!exp) + if (!exp) { + spin_unlock_bh(&nf_conntrack_expect_lock); return -ENOENT; + } if (cda[CTA_EXPECT_ID]) { __be32 id = nla_get_be32(cda[CTA_EXPECT_ID]); if (id != nf_expect_get_id(exp)) { nf_ct_expect_put(exp); + spin_unlock_bh(&nf_conntrack_expect_lock); return -ENOENT; } } /* after list removal, usage count == 1 */ - spin_lock_bh(&nf_conntrack_expect_lock); if (timer_delete(&exp->timeout)) { nf_ct_unlink_expect_report(exp, NETLINK_CB(skb).portid, nlmsg_report(info->nlh)); @@ -3534,9 +3532,10 @@ ctnetlink_alloc_expect(const struct nlattr * const cda[], struct nf_conn *ct, struct nf_conntrack_tuple *tuple, struct nf_conntrack_tuple *mask) { - u_int32_t class = 0; + struct net *net = read_pnet(&ct->ct_net); struct nf_conntrack_expect *exp; struct nf_conn_help *help; + u32 class = 0; int err; help = nfct_help(ct); @@ -3573,7 +3572,13 @@ ctnetlink_alloc_expect(const struct nlattr * const cda[], struct nf_conn *ct, exp->class = class; exp->master = ct; - exp->helper = helper; + write_pnet(&exp->net, net); +#ifdef CONFIG_NF_CONNTRACK_ZONES + exp->zone = ct->zone; +#endif + if (!helper) + helper = rcu_dereference(help->helper); + rcu_assign_pointer(exp->helper, helper); exp->tuple = *tuple; exp->mask.src.u3 = mask->src.u3; exp->mask.src.u.all = mask->src.u.all; diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 0c1d086e96cb..b67426c2189b 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -1385,9 +1385,9 @@ nla_put_failure: } static const struct nla_policy tcp_nla_policy[CTA_PROTOINFO_TCP_MAX+1] = { - [CTA_PROTOINFO_TCP_STATE] = { .type = NLA_U8 }, - [CTA_PROTOINFO_TCP_WSCALE_ORIGINAL] = { .type = NLA_U8 }, - [CTA_PROTOINFO_TCP_WSCALE_REPLY] = { .type = NLA_U8 }, + [CTA_PROTOINFO_TCP_STATE] = NLA_POLICY_MAX(NLA_U8, TCP_CONNTRACK_SYN_SENT2), + [CTA_PROTOINFO_TCP_WSCALE_ORIGINAL] = NLA_POLICY_MAX(NLA_U8, TCP_MAX_WSCALE), + [CTA_PROTOINFO_TCP_WSCALE_REPLY] = NLA_POLICY_MAX(NLA_U8, TCP_MAX_WSCALE), [CTA_PROTOINFO_TCP_FLAGS_ORIGINAL] = { .len = sizeof(struct nf_ct_tcp_flags) }, [CTA_PROTOINFO_TCP_FLAGS_REPLY] = { .len = sizeof(struct nf_ct_tcp_flags) }, }; @@ -1414,10 +1414,6 @@ static int nlattr_to_tcp(struct nlattr *cda[], struct nf_conn *ct) if (err < 0) return err; - if (tb[CTA_PROTOINFO_TCP_STATE] && - nla_get_u8(tb[CTA_PROTOINFO_TCP_STATE]) >= TCP_CONNTRACK_MAX) - return -EINVAL; - spin_lock_bh(&ct->lock); if (tb[CTA_PROTOINFO_TCP_STATE]) ct->proto.tcp.state = nla_get_u8(tb[CTA_PROTOINFO_TCP_STATE]); diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index 4ab5ef71d96d..939502ff7c87 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -924,7 +924,7 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb, unsigned int protoff, exp = __nf_ct_expect_find(net, nf_ct_zone(ct), &tuple); if (!exp || exp->master == ct || - nfct_help(exp->master)->helper != nfct_help(ct)->helper || + exp->helper != nfct_help(ct)->helper || exp->class != class) break; #if IS_ENABLED(CONFIG_NF_NAT) @@ -1040,6 +1040,7 @@ static int process_sdp(struct sk_buff *skb, unsigned int protoff, unsigned int port; const struct sdp_media_type *t; int ret = NF_ACCEPT; + bool have_rtp_addr = false; hooks = rcu_dereference(nf_nat_sip_hooks); @@ -1056,8 +1057,11 @@ static int process_sdp(struct sk_buff *skb, unsigned int protoff, caddr_len = 0; if (ct_sip_parse_sdp_addr(ct, *dptr, sdpoff, *datalen, SDP_HDR_CONNECTION, SDP_HDR_MEDIA, - &matchoff, &matchlen, &caddr) > 0) + &matchoff, &matchlen, &caddr) > 0) { caddr_len = matchlen; + memcpy(&rtp_addr, &caddr, sizeof(rtp_addr)); + have_rtp_addr = true; + } mediaoff = sdpoff; for (i = 0; i < ARRAY_SIZE(sdp_media_types); ) { @@ -1091,9 +1095,11 @@ static int process_sdp(struct sk_buff *skb, unsigned int protoff, &matchoff, &matchlen, &maddr) > 0) { maddr_len = matchlen; memcpy(&rtp_addr, &maddr, sizeof(rtp_addr)); - } else if (caddr_len) + have_rtp_addr = true; + } else if (caddr_len) { memcpy(&rtp_addr, &caddr, sizeof(rtp_addr)); - else { + have_rtp_addr = true; + } else { nf_ct_helper_log(skb, ct, "cannot parse SDP message"); return NF_DROP; } @@ -1125,7 +1131,7 @@ static int process_sdp(struct sk_buff *skb, unsigned int protoff, /* Update session connection and owner addresses */ hooks = rcu_dereference(nf_nat_sip_hooks); - if (hooks && ct->status & IPS_NAT_MASK) + if (hooks && ct->status & IPS_NAT_MASK && have_rtp_addr) ret = hooks->sdp_session(skb, protoff, dataoff, dptr, datalen, sdpoff, &rtp_addr); @@ -1297,7 +1303,7 @@ static int process_register_request(struct sk_buff *skb, unsigned int protoff, nf_ct_expect_init(exp, SIP_EXPECT_SIGNALLING, nf_ct_l3num(ct), saddr, &daddr, proto, NULL, &port); exp->timeout.expires = sip_timeout * HZ; - exp->helper = helper; + rcu_assign_pointer(exp->helper, helper); exp->flags = NF_CT_EXPECT_PERMANENT | NF_CT_EXPECT_INACTIVE; hooks = rcu_dereference(nf_nat_sip_hooks); diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index b35a90955e2e..fcbe54940b2e 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -647,15 +647,11 @@ __build_packet_message(struct nfnl_log_net *log, if (data_len) { struct nlattr *nla; - int size = nla_attr_size(data_len); - if (skb_tailroom(inst->skb) < nla_total_size(data_len)) + nla = nla_reserve(inst->skb, NFULA_PAYLOAD, data_len); + if (!nla) goto nla_put_failure; - nla = skb_put(inst->skb, nla_total_size(data_len)); - nla->nla_type = NFULA_PAYLOAD; - nla->nla_len = size; - if (skb_copy_bits(skb, 0, nla_data(nla), data_len)) BUG(); } diff --git a/net/netfilter/nft_set_pipapo_avx2.c b/net/netfilter/nft_set_pipapo_avx2.c index 7ff90325c97f..6395982e4d95 100644 --- a/net/netfilter/nft_set_pipapo_avx2.c +++ b/net/netfilter/nft_set_pipapo_avx2.c @@ -242,7 +242,7 @@ static int nft_pipapo_avx2_lookup_4b_2(unsigned long *map, unsigned long *fill, b = nft_pipapo_avx2_refill(i_ul, &map[i_ul], fill, f->mt, last); if (last) - return b; + ret = b; if (unlikely(ret == -1)) ret = b / XSAVE_YMM_SIZE; @@ -319,7 +319,7 @@ static int nft_pipapo_avx2_lookup_4b_4(unsigned long *map, unsigned long *fill, b = nft_pipapo_avx2_refill(i_ul, &map[i_ul], fill, f->mt, last); if (last) - return b; + ret = b; if (unlikely(ret == -1)) ret = b / XSAVE_YMM_SIZE; @@ -414,7 +414,7 @@ static int nft_pipapo_avx2_lookup_4b_8(unsigned long *map, unsigned long *fill, b = nft_pipapo_avx2_refill(i_ul, &map[i_ul], fill, f->mt, last); if (last) - return b; + ret = b; if (unlikely(ret == -1)) ret = b / XSAVE_YMM_SIZE; @@ -505,7 +505,7 @@ static int nft_pipapo_avx2_lookup_4b_12(unsigned long *map, unsigned long *fill, b = nft_pipapo_avx2_refill(i_ul, &map[i_ul], fill, f->mt, last); if (last) - return b; + ret = b; if (unlikely(ret == -1)) ret = b / XSAVE_YMM_SIZE; @@ -641,7 +641,7 @@ static int nft_pipapo_avx2_lookup_4b_32(unsigned long *map, unsigned long *fill, b = nft_pipapo_avx2_refill(i_ul, &map[i_ul], fill, f->mt, last); if (last) - return b; + ret = b; if (unlikely(ret == -1)) ret = b / XSAVE_YMM_SIZE; @@ -699,7 +699,7 @@ static int nft_pipapo_avx2_lookup_8b_1(unsigned long *map, unsigned long *fill, b = nft_pipapo_avx2_refill(i_ul, &map[i_ul], fill, f->mt, last); if (last) - return b; + ret = b; if (unlikely(ret == -1)) ret = b / XSAVE_YMM_SIZE; @@ -764,7 +764,7 @@ static int nft_pipapo_avx2_lookup_8b_2(unsigned long *map, unsigned long *fill, b = nft_pipapo_avx2_refill(i_ul, &map[i_ul], fill, f->mt, last); if (last) - return b; + ret = b; if (unlikely(ret == -1)) ret = b / XSAVE_YMM_SIZE; @@ -839,7 +839,7 @@ static int nft_pipapo_avx2_lookup_8b_4(unsigned long *map, unsigned long *fill, b = nft_pipapo_avx2_refill(i_ul, &map[i_ul], fill, f->mt, last); if (last) - return b; + ret = b; if (unlikely(ret == -1)) ret = b / XSAVE_YMM_SIZE; @@ -925,7 +925,7 @@ static int nft_pipapo_avx2_lookup_8b_6(unsigned long *map, unsigned long *fill, b = nft_pipapo_avx2_refill(i_ul, &map[i_ul], fill, f->mt, last); if (last) - return b; + ret = b; if (unlikely(ret == -1)) ret = b / XSAVE_YMM_SIZE; @@ -1019,7 +1019,7 @@ static int nft_pipapo_avx2_lookup_8b_16(unsigned long *map, unsigned long *fill, b = nft_pipapo_avx2_refill(i_ul, &map[i_ul], fill, f->mt, last); if (last) - return b; + ret = b; if (unlikely(ret == -1)) ret = b / XSAVE_YMM_SIZE; diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c index fe8bd497d74a..737c339decd0 100644 --- a/net/netfilter/nft_set_rbtree.c +++ b/net/netfilter/nft_set_rbtree.c @@ -572,14 +572,12 @@ static struct nft_array *nft_array_alloc(u32 max_intervals) return array; } -#define NFT_ARRAY_EXTRA_SIZE 10240 - /* Similar to nft_rbtree_{u,k}size to hide details to userspace, but consider * packed representation coming from userspace for anonymous sets too. */ static u32 nft_array_elems(const struct nft_set *set) { - u32 nelems = atomic_read(&set->nelems); + u32 nelems = atomic_read(&set->nelems) - set->ndeact; /* Adjacent intervals are represented with a single start element in * anonymous sets, use the current element counter as is. @@ -595,27 +593,87 @@ static u32 nft_array_elems(const struct nft_set *set) return (nelems / 2) + 2; } -static int nft_array_may_resize(const struct nft_set *set) +#define NFT_ARRAY_INITIAL_SIZE 1024 +#define NFT_ARRAY_INITIAL_ANON_SIZE 16 +#define NFT_ARRAY_INITIAL_ANON_THRESH (8192U / sizeof(struct nft_array_interval)) + +static int nft_array_may_resize(const struct nft_set *set, bool flush) { - u32 nelems = nft_array_elems(set), new_max_intervals; + u32 initial_intervals, max_intervals, new_max_intervals, delta; + u32 shrinked_max_intervals, nelems = nft_array_elems(set); struct nft_rbtree *priv = nft_set_priv(set); struct nft_array *array; - if (!priv->array_next) { - array = nft_array_alloc(nelems + NFT_ARRAY_EXTRA_SIZE); - if (!array) - return -ENOMEM; + if (nft_set_is_anonymous(set)) + initial_intervals = NFT_ARRAY_INITIAL_ANON_SIZE; + else + initial_intervals = NFT_ARRAY_INITIAL_SIZE; + + if (priv->array_next) { + max_intervals = priv->array_next->max_intervals; + new_max_intervals = priv->array_next->max_intervals; + } else { + if (priv->array) { + max_intervals = priv->array->max_intervals; + new_max_intervals = priv->array->max_intervals; + } else { + max_intervals = 0; + new_max_intervals = initial_intervals; + } + } - priv->array_next = array; + if (nft_set_is_anonymous(set)) + goto maybe_grow; + + if (flush) { + /* Set flush just started, nelems still report elements.*/ + nelems = 0; + new_max_intervals = NFT_ARRAY_INITIAL_SIZE; + goto realloc_array; } - if (nelems < priv->array_next->max_intervals) - return 0; + if (check_add_overflow(new_max_intervals, new_max_intervals, + &shrinked_max_intervals)) + return -EOVERFLOW; + + shrinked_max_intervals = DIV_ROUND_UP(shrinked_max_intervals, 3); - new_max_intervals = priv->array_next->max_intervals + NFT_ARRAY_EXTRA_SIZE; - if (nft_array_intervals_alloc(priv->array_next, new_max_intervals) < 0) + if (shrinked_max_intervals > NFT_ARRAY_INITIAL_SIZE && + nelems < shrinked_max_intervals) { + new_max_intervals = shrinked_max_intervals; + goto realloc_array; + } +maybe_grow: + if (nelems > new_max_intervals) { + if (nft_set_is_anonymous(set) && + new_max_intervals < NFT_ARRAY_INITIAL_ANON_THRESH) { + new_max_intervals <<= 1; + } else { + delta = new_max_intervals >> 1; + if (check_add_overflow(new_max_intervals, delta, + &new_max_intervals)) + return -EOVERFLOW; + } + } + +realloc_array: + if (WARN_ON_ONCE(nelems > new_max_intervals)) return -ENOMEM; + if (priv->array_next) { + if (max_intervals == new_max_intervals) + return 0; + + if (nft_array_intervals_alloc(priv->array_next, new_max_intervals) < 0) + return -ENOMEM; + } else { + array = nft_array_alloc(new_max_intervals); + if (!array) + return -ENOMEM; + + priv->array_next = array; + } + return 0; } @@ -630,7 +688,7 @@ static int nft_rbtree_insert(const struct net *net, const struct nft_set *set, nft_rbtree_maybe_reset_start_cookie(priv, tstamp); - if (nft_array_may_resize(set) < 0) + if (nft_array_may_resize(set, false) < 0) return -ENOMEM; do { @@ -741,7 +799,7 @@ nft_rbtree_deactivate(const struct net *net, const struct nft_set *set, nft_rbtree_interval_null(set, this)) priv->start_rbe_cookie = 0; - if (nft_array_may_resize(set) < 0) + if (nft_array_may_resize(set, false) < 0) return NULL; while (parent != NULL) { @@ -811,7 +869,7 @@ static void nft_rbtree_walk(const struct nft_ctx *ctx, switch (iter->type) { case NFT_ITER_UPDATE_CLONE: - if (nft_array_may_resize(set) < 0) { + if (nft_array_may_resize(set, true) < 0) { iter->err = -ENOMEM; break; } diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c index 43d871525dbc..5f46c4b5720f 100644 --- a/net/nfc/nci/core.c +++ b/net/nfc/nci/core.c @@ -579,8 +579,7 @@ static int nci_close_device(struct nci_dev *ndev) skb_queue_purge(&ndev->rx_q); skb_queue_purge(&ndev->tx_q); - /* Flush RX and TX wq */ - flush_workqueue(ndev->rx_wq); + /* Flush TX wq, RX wq flush can't be under the lock */ flush_workqueue(ndev->tx_wq); /* Reset device */ @@ -592,13 +591,13 @@ static int nci_close_device(struct nci_dev *ndev) msecs_to_jiffies(NCI_RESET_TIMEOUT)); /* After this point our queues are empty - * and no works are scheduled. + * rx work may be running but will see that NCI_UP was cleared */ ndev->ops->close(ndev); clear_bit(NCI_INIT, &ndev->flags); - /* Flush cmd wq */ + /* Flush cmd and tx wq */ flush_workqueue(ndev->cmd_wq); timer_delete_sync(&ndev->cmd_timer); @@ -613,6 +612,9 @@ static int nci_close_device(struct nci_dev *ndev) mutex_unlock(&ndev->req_lock); + /* rx_work may take req_lock via nci_deactivate_target */ + flush_workqueue(ndev->rx_wq); + return 0; } diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index 67fbf6e48a30..13052408a132 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -2953,6 +2953,8 @@ static int validate_set(const struct nlattr *a, case OVS_KEY_ATTR_MPLS: if (!eth_p_mpls(eth_type)) return -EINVAL; + if (key_len != sizeof(struct ovs_key_mpls)) + return -EINVAL; break; case OVS_KEY_ATTR_SCTP: diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c index 6574f9bcdc02..12055af832dc 100644 --- a/net/openvswitch/vport-netdev.c +++ b/net/openvswitch/vport-netdev.c @@ -151,11 +151,15 @@ static void vport_netdev_free(struct rcu_head *rcu) void ovs_netdev_detach_dev(struct vport *vport) { ASSERT_RTNL(); - vport->dev->priv_flags &= ~IFF_OVS_DATAPATH; netdev_rx_handler_unregister(vport->dev); netdev_upper_dev_unlink(vport->dev, netdev_master_upper_dev_get(vport->dev)); dev_set_promiscuity(vport->dev, -1); + + /* paired with smp_mb() in netdev_destroy() */ + smp_wmb(); + + vport->dev->priv_flags &= ~IFF_OVS_DATAPATH; } static void netdev_destroy(struct vport *vport) @@ -174,6 +178,9 @@ static void netdev_destroy(struct vport *vport) rtnl_unlock(); } + /* paired with smp_wmb() in ovs_netdev_detach_dev() */ + smp_mb(); + call_rcu(&vport->rcu, vport_netdev_free); } @@ -189,8 +196,6 @@ void ovs_netdev_tunnel_destroy(struct vport *vport) */ if (vport->dev->reg_state == NETREG_REGISTERED) rtnl_delete_link(vport->dev, 0, NULL); - netdev_put(vport->dev, &vport->dev_tracker); - vport->dev = NULL; rtnl_unlock(); call_rcu(&vport->rcu, vport_netdev_free); diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 72d0935139f0..bb2d88205e5a 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -3135,6 +3135,7 @@ static int packet_release(struct socket *sock) spin_lock(&po->bind_lock); unregister_prot_hook(sk, false); + WRITE_ONCE(po->num, 0); packet_cached_dev_reset(po); if (po->prot_hook.dev) { diff --git a/net/smc/smc_rx.c b/net/smc/smc_rx.c index d833e36f7fd4..c1d9b923938d 100644 --- a/net/smc/smc_rx.c +++ b/net/smc/smc_rx.c @@ -135,9 +135,16 @@ out: sock_put(sk); } +static bool smc_rx_pipe_buf_get(struct pipe_inode_info *pipe, + struct pipe_buffer *buf) +{ + /* smc_spd_priv in buf->private is not shareable; disallow cloning. */ + return false; +} + static const struct pipe_buf_operations smc_pipe_ops = { .release = smc_rx_pipe_buf_release, - .get = generic_pipe_buf_get + .get = smc_rx_pipe_buf_get, }; static void smc_rx_spd_release(struct splice_pipe_desc *spd, diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 5fe07f110fe8..dd9dda759bbb 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -246,6 +246,7 @@ static int tls_decrypt_async_wait(struct tls_sw_context_rx *ctx) crypto_wait_req(-EINPROGRESS, &ctx->async_wait); atomic_inc(&ctx->decrypt_pending); + __skb_queue_purge(&ctx->async_hold); return ctx->async_wait.err; } @@ -2225,7 +2226,6 @@ recv_end: /* Wait for all previously submitted records to be decrypted */ ret = tls_decrypt_async_wait(ctx); - __skb_queue_purge(&ctx->async_hold); if (ret) { if (err >= 0 || err == -EINPROGRESS) diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index 4ed346e682c7..dc1312ed5a09 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -75,7 +75,10 @@ int xfrm_input_unregister_afinfo(const struct xfrm_input_afinfo *afinfo) spin_lock_bh(&xfrm_input_afinfo_lock); if (likely(xfrm_input_afinfo[afinfo->is_ipip][afinfo->family])) { - if (unlikely(xfrm_input_afinfo[afinfo->is_ipip][afinfo->family] != afinfo)) + const struct xfrm_input_afinfo *cur; + + cur = rcu_access_pointer(xfrm_input_afinfo[afinfo->is_ipip][afinfo->family]); + if (unlikely(cur != afinfo)) err = -EINVAL; else RCU_INIT_POINTER(xfrm_input_afinfo[afinfo->is_ipip][afinfo->family], NULL); diff --git a/net/xfrm/xfrm_iptfs.c b/net/xfrm/xfrm_iptfs.c index 050a82101ca5..97bc979e55ba 100644 --- a/net/xfrm/xfrm_iptfs.c +++ b/net/xfrm/xfrm_iptfs.c @@ -901,6 +901,12 @@ static u32 iptfs_reassem_cont(struct xfrm_iptfs_data *xtfs, u64 seq, iptfs_skb_can_add_frags(newskb, fragwalk, data, copylen)) { iptfs_skb_add_frags(newskb, fragwalk, data, copylen); } else { + if (skb_linearize(newskb)) { + XFRM_INC_STATS(xs_net(xtfs->x), + LINUX_MIB_XFRMINBUFFERERROR); + goto abandon; + } + /* copy fragment data into newskb */ if (skb_copy_seq_read(st, data, skb_put(newskb, copylen), copylen)) { @@ -991,6 +997,11 @@ static bool __input_process_payload(struct xfrm_state *x, u32 data, iplen = be16_to_cpu(iph->tot_len); iphlen = iph->ihl << 2; + if (iplen < iphlen || iphlen < sizeof(*iph)) { + XFRM_INC_STATS(net, + LINUX_MIB_XFRMINHDRERROR); + goto done; + } protocol = cpu_to_be16(ETH_P_IP); XFRM_MODE_SKB_CB(skbseq->root_skb)->tos = iph->tos; } else if (iph->version == 0x6) { @@ -2653,9 +2664,6 @@ static int iptfs_clone_state(struct xfrm_state *x, struct xfrm_state *orig) if (!xtfs) return -ENOMEM; - x->mode_data = xtfs; - xtfs->x = x; - xtfs->ra_newskb = NULL; if (xtfs->cfg.reorder_win_size) { xtfs->w_saved = kzalloc_objs(*xtfs->w_saved, @@ -2666,6 +2674,9 @@ static int iptfs_clone_state(struct xfrm_state *x, struct xfrm_state *orig) } } + x->mode_data = xtfs; + xtfs->x = x; + return 0; } diff --git a/net/xfrm/xfrm_nat_keepalive.c b/net/xfrm/xfrm_nat_keepalive.c index ebf95d48e86c..1856beee0149 100644 --- a/net/xfrm/xfrm_nat_keepalive.c +++ b/net/xfrm/xfrm_nat_keepalive.c @@ -261,7 +261,7 @@ int __net_init xfrm_nat_keepalive_net_init(struct net *net) int xfrm_nat_keepalive_net_fini(struct net *net) { - cancel_delayed_work_sync(&net->xfrm.nat_keepalive_work); + disable_delayed_work_sync(&net->xfrm.nat_keepalive_work); return 0; } diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 9e1a522ab1c5..362939aa56cf 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -4156,7 +4156,7 @@ void xfrm_policy_unregister_afinfo(const struct xfrm_policy_afinfo *afinfo) int i; for (i = 0; i < ARRAY_SIZE(xfrm_policy_afinfo); i++) { - if (xfrm_policy_afinfo[i] != afinfo) + if (rcu_access_pointer(xfrm_policy_afinfo[i]) != afinfo) continue; RCU_INIT_POINTER(xfrm_policy_afinfo[i], NULL); break; @@ -4242,7 +4242,7 @@ static int __net_init xfrm_policy_init(struct net *net) net->xfrm.policy_count[XFRM_POLICY_MAX + dir] = 0; htab = &net->xfrm.policy_bydst[dir]; - htab->table = xfrm_hash_alloc(sz); + rcu_assign_pointer(htab->table, xfrm_hash_alloc(sz)); if (!htab->table) goto out_bydst; htab->hmask = hmask; @@ -4269,7 +4269,7 @@ out_bydst: struct xfrm_policy_hash *htab; htab = &net->xfrm.policy_bydst[dir]; - xfrm_hash_free(htab->table, sz); + xfrm_hash_free(rcu_dereference_protected(htab->table, true), sz); } xfrm_hash_free(net->xfrm.policy_byidx, sz); out_byidx: @@ -4282,6 +4282,8 @@ static void xfrm_policy_fini(struct net *net) unsigned int sz; int dir; + disable_work_sync(&net->xfrm.policy_hthresh.work); + flush_work(&net->xfrm.policy_hash_work); #ifdef CONFIG_XFRM_SUB_POLICY xfrm_policy_flush(net, XFRM_POLICY_TYPE_SUB, false); @@ -4295,8 +4297,8 @@ static void xfrm_policy_fini(struct net *net) htab = &net->xfrm.policy_bydst[dir]; sz = (htab->hmask + 1) * sizeof(struct hlist_head); - WARN_ON(!hlist_empty(htab->table)); - xfrm_hash_free(htab->table, sz); + WARN_ON(!hlist_empty(rcu_dereference_protected(htab->table, true))); + xfrm_hash_free(rcu_dereference_protected(htab->table, true), sz); } sz = (net->xfrm.policy_idx_hmask + 1) * sizeof(struct hlist_head); diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 98b362d51836..1748d374abca 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -53,7 +53,7 @@ static DECLARE_WORK(xfrm_state_gc_work, xfrm_state_gc_task); static HLIST_HEAD(xfrm_state_gc_list); static HLIST_HEAD(xfrm_state_dev_gc_list); -static inline bool xfrm_state_hold_rcu(struct xfrm_state __rcu *x) +static inline bool xfrm_state_hold_rcu(struct xfrm_state *x) { return refcount_inc_not_zero(&x->refcnt); } @@ -870,7 +870,7 @@ xfrm_state_flush_secctx_check(struct net *net, u8 proto, bool task_valid) for (i = 0; i <= net->xfrm.state_hmask; i++) { struct xfrm_state *x; - hlist_for_each_entry(x, net->xfrm.state_bydst+i, bydst) { + hlist_for_each_entry(x, xfrm_state_deref_prot(net->xfrm.state_bydst, net) + i, bydst) { if (xfrm_id_proto_match(x->id.proto, proto) && (err = security_xfrm_state_delete(x)) != 0) { xfrm_audit_state_delete(x, 0, task_valid); @@ -891,7 +891,7 @@ xfrm_dev_state_flush_secctx_check(struct net *net, struct net_device *dev, bool struct xfrm_state *x; struct xfrm_dev_offload *xso; - hlist_for_each_entry(x, net->xfrm.state_bydst+i, bydst) { + hlist_for_each_entry(x, xfrm_state_deref_prot(net->xfrm.state_bydst, net) + i, bydst) { xso = &x->xso; if (xso->dev == dev && @@ -931,7 +931,7 @@ int xfrm_state_flush(struct net *net, u8 proto, bool task_valid) for (i = 0; i <= net->xfrm.state_hmask; i++) { struct xfrm_state *x; restart: - hlist_for_each_entry(x, net->xfrm.state_bydst+i, bydst) { + hlist_for_each_entry(x, xfrm_state_deref_prot(net->xfrm.state_bydst, net) + i, bydst) { if (!xfrm_state_kern(x) && xfrm_id_proto_match(x->id.proto, proto)) { xfrm_state_hold(x); @@ -973,7 +973,7 @@ int xfrm_dev_state_flush(struct net *net, struct net_device *dev, bool task_vali err = -ESRCH; for (i = 0; i <= net->xfrm.state_hmask; i++) { restart: - hlist_for_each_entry(x, net->xfrm.state_bydst+i, bydst) { + hlist_for_each_entry(x, xfrm_state_deref_prot(net->xfrm.state_bydst, net) + i, bydst) { xso = &x->xso; if (!xfrm_state_kern(x) && xso->dev == dev) { @@ -1563,23 +1563,23 @@ found: list_add(&x->km.all, &net->xfrm.state_all); h = xfrm_dst_hash(net, daddr, saddr, tmpl->reqid, encap_family); XFRM_STATE_INSERT(bydst, &x->bydst, - net->xfrm.state_bydst + h, + xfrm_state_deref_prot(net->xfrm.state_bydst, net) + h, x->xso.type); h = xfrm_src_hash(net, daddr, saddr, encap_family); XFRM_STATE_INSERT(bysrc, &x->bysrc, - net->xfrm.state_bysrc + h, + xfrm_state_deref_prot(net->xfrm.state_bysrc, net) + h, x->xso.type); INIT_HLIST_NODE(&x->state_cache); if (x->id.spi) { h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto, encap_family); XFRM_STATE_INSERT(byspi, &x->byspi, - net->xfrm.state_byspi + h, + xfrm_state_deref_prot(net->xfrm.state_byspi, net) + h, x->xso.type); } if (x->km.seq) { h = xfrm_seq_hash(net, x->km.seq); XFRM_STATE_INSERT(byseq, &x->byseq, - net->xfrm.state_byseq + h, + xfrm_state_deref_prot(net->xfrm.state_byseq, net) + h, x->xso.type); } x->lft.hard_add_expires_seconds = net->xfrm.sysctl_acq_expires; @@ -1652,7 +1652,7 @@ xfrm_stateonly_find(struct net *net, u32 mark, u32 if_id, spin_lock_bh(&net->xfrm.xfrm_state_lock); h = xfrm_dst_hash(net, daddr, saddr, reqid, family); - hlist_for_each_entry(x, net->xfrm.state_bydst+h, bydst) { + hlist_for_each_entry(x, xfrm_state_deref_prot(net->xfrm.state_bydst, net) + h, bydst) { if (x->props.family == family && x->props.reqid == reqid && (mark & x->mark.m) == x->mark.v && @@ -1703,18 +1703,12 @@ static struct xfrm_state *xfrm_state_lookup_spi_proto(struct net *net, __be32 sp struct xfrm_state *x; unsigned int i; - rcu_read_lock(); for (i = 0; i <= net->xfrm.state_hmask; i++) { - hlist_for_each_entry_rcu(x, &net->xfrm.state_byspi[i], byspi) { - if (x->id.spi == spi && x->id.proto == proto) { - if (!xfrm_state_hold_rcu(x)) - continue; - rcu_read_unlock(); + hlist_for_each_entry(x, xfrm_state_deref_prot(net->xfrm.state_byspi, net) + i, byspi) { + if (x->id.spi == spi && x->id.proto == proto) return x; - } } } - rcu_read_unlock(); return NULL; } @@ -1730,25 +1724,29 @@ static void __xfrm_state_insert(struct xfrm_state *x) h = xfrm_dst_hash(net, &x->id.daddr, &x->props.saddr, x->props.reqid, x->props.family); - XFRM_STATE_INSERT(bydst, &x->bydst, net->xfrm.state_bydst + h, + XFRM_STATE_INSERT(bydst, &x->bydst, + xfrm_state_deref_prot(net->xfrm.state_bydst, net) + h, x->xso.type); h = xfrm_src_hash(net, &x->id.daddr, &x->props.saddr, x->props.family); - XFRM_STATE_INSERT(bysrc, &x->bysrc, net->xfrm.state_bysrc + h, + XFRM_STATE_INSERT(bysrc, &x->bysrc, + xfrm_state_deref_prot(net->xfrm.state_bysrc, net) + h, x->xso.type); if (x->id.spi) { h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto, x->props.family); - XFRM_STATE_INSERT(byspi, &x->byspi, net->xfrm.state_byspi + h, + XFRM_STATE_INSERT(byspi, &x->byspi, + xfrm_state_deref_prot(net->xfrm.state_byspi, net) + h, x->xso.type); } if (x->km.seq) { h = xfrm_seq_hash(net, x->km.seq); - XFRM_STATE_INSERT(byseq, &x->byseq, net->xfrm.state_byseq + h, + XFRM_STATE_INSERT(byseq, &x->byseq, + xfrm_state_deref_prot(net->xfrm.state_byseq, net) + h, x->xso.type); } @@ -1775,7 +1773,7 @@ static void __xfrm_state_bump_genids(struct xfrm_state *xnew) u32 cpu_id = xnew->pcpu_num; h = xfrm_dst_hash(net, &xnew->id.daddr, &xnew->props.saddr, reqid, family); - hlist_for_each_entry(x, net->xfrm.state_bydst+h, bydst) { + hlist_for_each_entry(x, xfrm_state_deref_prot(net->xfrm.state_bydst, net) + h, bydst) { if (x->props.family == family && x->props.reqid == reqid && x->if_id == if_id && @@ -1811,7 +1809,7 @@ static struct xfrm_state *__find_acq_core(struct net *net, struct xfrm_state *x; u32 mark = m->v & m->m; - hlist_for_each_entry(x, net->xfrm.state_bydst+h, bydst) { + hlist_for_each_entry(x, xfrm_state_deref_prot(net->xfrm.state_bydst, net) + h, bydst) { if (x->props.reqid != reqid || x->props.mode != mode || x->props.family != family || @@ -1868,10 +1866,12 @@ static struct xfrm_state *__find_acq_core(struct net *net, ktime_set(net->xfrm.sysctl_acq_expires, 0), HRTIMER_MODE_REL_SOFT); list_add(&x->km.all, &net->xfrm.state_all); - XFRM_STATE_INSERT(bydst, &x->bydst, net->xfrm.state_bydst + h, + XFRM_STATE_INSERT(bydst, &x->bydst, + xfrm_state_deref_prot(net->xfrm.state_bydst, net) + h, x->xso.type); h = xfrm_src_hash(net, daddr, saddr, family); - XFRM_STATE_INSERT(bysrc, &x->bysrc, net->xfrm.state_bysrc + h, + XFRM_STATE_INSERT(bysrc, &x->bysrc, + xfrm_state_deref_prot(net->xfrm.state_bysrc, net) + h, x->xso.type); net->xfrm.state_num++; @@ -2091,7 +2091,7 @@ struct xfrm_state *xfrm_migrate_state_find(struct xfrm_migrate *m, struct net *n if (m->reqid) { h = xfrm_dst_hash(net, &m->old_daddr, &m->old_saddr, m->reqid, m->old_family); - hlist_for_each_entry(x, net->xfrm.state_bydst+h, bydst) { + hlist_for_each_entry(x, xfrm_state_deref_prot(net->xfrm.state_bydst, net) + h, bydst) { if (x->props.mode != m->mode || x->id.proto != m->proto) continue; @@ -2110,7 +2110,7 @@ struct xfrm_state *xfrm_migrate_state_find(struct xfrm_migrate *m, struct net *n } else { h = xfrm_src_hash(net, &m->old_daddr, &m->old_saddr, m->old_family); - hlist_for_each_entry(x, net->xfrm.state_bysrc+h, bysrc) { + hlist_for_each_entry(x, xfrm_state_deref_prot(net->xfrm.state_bysrc, net) + h, bysrc) { if (x->props.mode != m->mode || x->id.proto != m->proto) continue; @@ -2264,6 +2264,7 @@ out: err = 0; x->km.state = XFRM_STATE_DEAD; + xfrm_dev_state_delete(x); __xfrm_state_put(x); } @@ -2312,7 +2313,7 @@ void xfrm_state_update_stats(struct net *net) spin_lock_bh(&net->xfrm.xfrm_state_lock); for (i = 0; i <= net->xfrm.state_hmask; i++) { - hlist_for_each_entry(x, net->xfrm.state_bydst + i, bydst) + hlist_for_each_entry(x, xfrm_state_deref_prot(net->xfrm.state_bydst, net) + i, bydst) xfrm_dev_state_update_stats(x); } spin_unlock_bh(&net->xfrm.xfrm_state_lock); @@ -2503,7 +2504,7 @@ static struct xfrm_state *__xfrm_find_acq_byseq(struct net *net, u32 mark, u32 s unsigned int h = xfrm_seq_hash(net, seq); struct xfrm_state *x; - hlist_for_each_entry_rcu(x, net->xfrm.state_byseq + h, byseq) { + hlist_for_each_entry(x, xfrm_state_deref_prot(net->xfrm.state_byseq, net) + h, byseq) { if (x->km.seq == seq && (mark & x->mark.m) == x->mark.v && x->pcpu_num == pcpu_num && @@ -2602,12 +2603,13 @@ int xfrm_alloc_spi(struct xfrm_state *x, u32 low, u32 high, if (!x0) { x->id.spi = newspi; h = xfrm_spi_hash(net, &x->id.daddr, newspi, x->id.proto, x->props.family); - XFRM_STATE_INSERT(byspi, &x->byspi, net->xfrm.state_byspi + h, x->xso.type); + XFRM_STATE_INSERT(byspi, &x->byspi, + xfrm_state_deref_prot(net->xfrm.state_byspi, net) + h, + x->xso.type); spin_unlock_bh(&net->xfrm.xfrm_state_lock); err = 0; goto unlock; } - xfrm_state_put(x0); spin_unlock_bh(&net->xfrm.xfrm_state_lock); next: @@ -3258,6 +3260,7 @@ EXPORT_SYMBOL(xfrm_init_state); int __net_init xfrm_state_init(struct net *net) { + struct hlist_head *ndst, *nsrc, *nspi, *nseq; unsigned int sz; if (net_eq(net, &init_net)) @@ -3268,18 +3271,25 @@ int __net_init xfrm_state_init(struct net *net) sz = sizeof(struct hlist_head) * 8; - net->xfrm.state_bydst = xfrm_hash_alloc(sz); - if (!net->xfrm.state_bydst) + ndst = xfrm_hash_alloc(sz); + if (!ndst) goto out_bydst; - net->xfrm.state_bysrc = xfrm_hash_alloc(sz); - if (!net->xfrm.state_bysrc) + rcu_assign_pointer(net->xfrm.state_bydst, ndst); + + nsrc = xfrm_hash_alloc(sz); + if (!nsrc) goto out_bysrc; - net->xfrm.state_byspi = xfrm_hash_alloc(sz); - if (!net->xfrm.state_byspi) + rcu_assign_pointer(net->xfrm.state_bysrc, nsrc); + + nspi = xfrm_hash_alloc(sz); + if (!nspi) goto out_byspi; - net->xfrm.state_byseq = xfrm_hash_alloc(sz); - if (!net->xfrm.state_byseq) + rcu_assign_pointer(net->xfrm.state_byspi, nspi); + + nseq = xfrm_hash_alloc(sz); + if (!nseq) goto out_byseq; + rcu_assign_pointer(net->xfrm.state_byseq, nseq); net->xfrm.state_cache_input = alloc_percpu(struct hlist_head); if (!net->xfrm.state_cache_input) @@ -3295,17 +3305,19 @@ int __net_init xfrm_state_init(struct net *net) return 0; out_state_cache_input: - xfrm_hash_free(net->xfrm.state_byseq, sz); + xfrm_hash_free(nseq, sz); out_byseq: - xfrm_hash_free(net->xfrm.state_byspi, sz); + xfrm_hash_free(nspi, sz); out_byspi: - xfrm_hash_free(net->xfrm.state_bysrc, sz); + xfrm_hash_free(nsrc, sz); out_bysrc: - xfrm_hash_free(net->xfrm.state_bydst, sz); + xfrm_hash_free(ndst, sz); out_bydst: return -ENOMEM; } +#define xfrm_state_deref_netexit(table) \ + rcu_dereference_protected((table), true /* netns is going away */) void xfrm_state_fini(struct net *net) { unsigned int sz; @@ -3318,17 +3330,17 @@ void xfrm_state_fini(struct net *net) WARN_ON(!list_empty(&net->xfrm.state_all)); for (i = 0; i <= net->xfrm.state_hmask; i++) { - WARN_ON(!hlist_empty(net->xfrm.state_byseq + i)); - WARN_ON(!hlist_empty(net->xfrm.state_byspi + i)); - WARN_ON(!hlist_empty(net->xfrm.state_bysrc + i)); - WARN_ON(!hlist_empty(net->xfrm.state_bydst + i)); + WARN_ON(!hlist_empty(xfrm_state_deref_netexit(net->xfrm.state_byseq) + i)); + WARN_ON(!hlist_empty(xfrm_state_deref_netexit(net->xfrm.state_byspi) + i)); + WARN_ON(!hlist_empty(xfrm_state_deref_netexit(net->xfrm.state_bysrc) + i)); + WARN_ON(!hlist_empty(xfrm_state_deref_netexit(net->xfrm.state_bydst) + i)); } sz = (net->xfrm.state_hmask + 1) * sizeof(struct hlist_head); - xfrm_hash_free(net->xfrm.state_byseq, sz); - xfrm_hash_free(net->xfrm.state_byspi, sz); - xfrm_hash_free(net->xfrm.state_bysrc, sz); - xfrm_hash_free(net->xfrm.state_bydst, sz); + xfrm_hash_free(xfrm_state_deref_netexit(net->xfrm.state_byseq), sz); + xfrm_hash_free(xfrm_state_deref_netexit(net->xfrm.state_byspi), sz); + xfrm_hash_free(xfrm_state_deref_netexit(net->xfrm.state_bysrc), sz); + xfrm_hash_free(xfrm_state_deref_netexit(net->xfrm.state_bydst), sz); free_percpu(net->xfrm.state_cache_input); } diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 403b5ecac2c5..1656b487f833 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -35,6 +35,15 @@ #endif #include <linux/unaligned.h> +static struct sock *xfrm_net_nlsk(const struct net *net, const struct sk_buff *skb) +{ + /* get the source of this request, see netlink_unicast_kernel */ + const struct sock *sk = NETLINK_CB(skb).sk; + + /* sk is refcounted, the netns stays alive and nlsk with it */ + return rcu_dereference_protected(net->xfrm.nlsk, sk->sk_net_refcnt); +} + static int verify_one_alg(struct nlattr **attrs, enum xfrm_attr_type_t type, struct netlink_ext_ack *extack) { @@ -1727,7 +1736,7 @@ static int xfrm_get_spdinfo(struct sk_buff *skb, struct nlmsghdr *nlh, err = build_spdinfo(r_skb, net, sportid, seq, *flags); BUG_ON(err < 0); - return nlmsg_unicast(net->xfrm.nlsk, r_skb, sportid); + return nlmsg_unicast(xfrm_net_nlsk(net, skb), r_skb, sportid); } static inline unsigned int xfrm_sadinfo_msgsize(void) @@ -1787,7 +1796,7 @@ static int xfrm_get_sadinfo(struct sk_buff *skb, struct nlmsghdr *nlh, err = build_sadinfo(r_skb, net, sportid, seq, *flags); BUG_ON(err < 0); - return nlmsg_unicast(net->xfrm.nlsk, r_skb, sportid); + return nlmsg_unicast(xfrm_net_nlsk(net, skb), r_skb, sportid); } static int xfrm_get_sa(struct sk_buff *skb, struct nlmsghdr *nlh, @@ -1807,7 +1816,7 @@ static int xfrm_get_sa(struct sk_buff *skb, struct nlmsghdr *nlh, if (IS_ERR(resp_skb)) { err = PTR_ERR(resp_skb); } else { - err = nlmsg_unicast(net->xfrm.nlsk, resp_skb, NETLINK_CB(skb).portid); + err = nlmsg_unicast(xfrm_net_nlsk(net, skb), resp_skb, NETLINK_CB(skb).portid); } xfrm_state_put(x); out_noput: @@ -1850,6 +1859,7 @@ static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh, pcpu_num = nla_get_u32(attrs[XFRMA_SA_PCPU]); if (pcpu_num >= num_possible_cpus()) { err = -EINVAL; + NL_SET_ERR_MSG(extack, "pCPU number too big"); goto out_noput; } } @@ -1897,7 +1907,7 @@ static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh, } } - err = nlmsg_unicast(net->xfrm.nlsk, resp_skb, NETLINK_CB(skb).portid); + err = nlmsg_unicast(xfrm_net_nlsk(net, skb), resp_skb, NETLINK_CB(skb).portid); out: xfrm_state_put(x); @@ -2542,7 +2552,7 @@ static int xfrm_get_default(struct sk_buff *skb, struct nlmsghdr *nlh, r_up->out = net->xfrm.policy_default[XFRM_POLICY_OUT]; nlmsg_end(r_skb, r_nlh); - return nlmsg_unicast(net->xfrm.nlsk, r_skb, portid); + return nlmsg_unicast(xfrm_net_nlsk(net, skb), r_skb, portid); } static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, @@ -2608,7 +2618,7 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, if (IS_ERR(resp_skb)) { err = PTR_ERR(resp_skb); } else { - err = nlmsg_unicast(net->xfrm.nlsk, resp_skb, + err = nlmsg_unicast(xfrm_net_nlsk(net, skb), resp_skb, NETLINK_CB(skb).portid); } } else { @@ -2781,7 +2791,7 @@ static int xfrm_get_ae(struct sk_buff *skb, struct nlmsghdr *nlh, err = build_aevent(r_skb, x, &c); BUG_ON(err < 0); - err = nlmsg_unicast(net->xfrm.nlsk, r_skb, NETLINK_CB(skb).portid); + err = nlmsg_unicast(xfrm_net_nlsk(net, skb), r_skb, NETLINK_CB(skb).portid); spin_unlock_bh(&x->lock); xfrm_state_put(x); return err; @@ -3001,8 +3011,10 @@ static int xfrm_add_acquire(struct sk_buff *skb, struct nlmsghdr *nlh, if (attrs[XFRMA_SA_PCPU]) { x->pcpu_num = nla_get_u32(attrs[XFRMA_SA_PCPU]); err = -EINVAL; - if (x->pcpu_num >= num_possible_cpus()) + if (x->pcpu_num >= num_possible_cpus()) { + NL_SET_ERR_MSG(extack, "pCPU number too big"); goto free_state; + } } err = verify_newpolicy_info(&ua->policy, extack); @@ -3483,7 +3495,7 @@ static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, goto err; } - err = netlink_dump_start(net->xfrm.nlsk, skb, nlh, &c); + err = netlink_dump_start(xfrm_net_nlsk(net, skb), skb, nlh, &c); goto err; } @@ -3673,7 +3685,7 @@ static inline unsigned int xfrm_sa_len(struct xfrm_state *x) } if (x->if_id) l += nla_total_size(sizeof(x->if_id)); - if (x->pcpu_num) + if (x->pcpu_num != UINT_MAX) l += nla_total_size(sizeof(x->pcpu_num)); /* Must count x->lastused as it may become non-zero behind our back. */ diff --git a/tools/testing/selftests/drivers/net/team/Makefile b/tools/testing/selftests/drivers/net/team/Makefile index 45a3e7ad3dcb..02d6f51d5a06 100644 --- a/tools/testing/selftests/drivers/net/team/Makefile +++ b/tools/testing/selftests/drivers/net/team/Makefile @@ -3,6 +3,7 @@ TEST_PROGS := \ dev_addr_lists.sh \ + non_ether_header_ops.sh \ options.sh \ propagation.sh \ refleak.sh \ diff --git a/tools/testing/selftests/drivers/net/team/config b/tools/testing/selftests/drivers/net/team/config index 558e1d0cf565..5d36a22ef080 100644 --- a/tools/testing/selftests/drivers/net/team/config +++ b/tools/testing/selftests/drivers/net/team/config @@ -1,7 +1,9 @@ +CONFIG_BONDING=y CONFIG_DUMMY=y CONFIG_IPV6=y CONFIG_MACVLAN=y CONFIG_NETDEVSIM=m +CONFIG_NET_IPGRE=y CONFIG_NET_TEAM=y CONFIG_NET_TEAM_MODE_ACTIVEBACKUP=y CONFIG_NET_TEAM_MODE_LOADBALANCE=y diff --git a/tools/testing/selftests/drivers/net/team/non_ether_header_ops.sh b/tools/testing/selftests/drivers/net/team/non_ether_header_ops.sh new file mode 100755 index 000000000000..948a43576bdc --- /dev/null +++ b/tools/testing/selftests/drivers/net/team/non_ether_header_ops.sh @@ -0,0 +1,41 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# shellcheck disable=SC2154 +# +# Reproduce the non-Ethernet header_ops confusion scenario with: +# g0 (gre) -> b0 (bond) -> t0 (team) +# +# Before the fix, direct header_ops inheritance in this stack could call +# callbacks with the wrong net_device context and crash. + +lib_dir=$(dirname "$0") +source "$lib_dir"/../../../net/lib.sh + +trap cleanup_all_ns EXIT + +setup_ns ns1 + +ip -n "$ns1" link add d0 type dummy +ip -n "$ns1" addr add 10.10.10.1/24 dev d0 +ip -n "$ns1" link set d0 up + +ip -n "$ns1" link add g0 type gre local 10.10.10.1 +ip -n "$ns1" link add b0 type bond mode active-backup +ip -n "$ns1" link add t0 type team + +ip -n "$ns1" link set g0 master b0 +ip -n "$ns1" link set b0 master t0 + +ip -n "$ns1" link set g0 up +ip -n "$ns1" link set b0 up +ip -n "$ns1" link set t0 up + +# IPv6 address assignment triggers MLD join reports that call +# dev_hard_header() on t0, exercising the inherited header_ops path. +ip -n "$ns1" -6 addr add 2001:db8:1::1/64 dev t0 nodad +for i in $(seq 1 20); do + ip netns exec "$ns1" ping -6 -I t0 ff02::1 -c1 -W1 &>/dev/null || true +done + +echo "PASS: non-Ethernet header_ops stacking did not crash" +exit "$EXIT_STATUS" diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh index c5694cc4ddd2..829f72c8ee07 100755 --- a/tools/testing/selftests/net/fib_tests.sh +++ b/tools/testing/selftests/net/fib_tests.sh @@ -868,6 +868,64 @@ fib6_gc_test() check_rt_num 5 $($IP -6 route list |grep -v expires|grep 2001:20::|wc -l) log_test $ret 0 "ipv6 route garbage collection (replace with permanent)" + # Delete dummy_10 and remove all routes + $IP link del dev dummy_10 + + # rd6 is required for the next test. (ipv6toolkit) + if [ ! -x "$(command -v rd6)" ]; then + echo "SKIP: rd6 not found." + set +e + cleanup &> /dev/null + return + fi + + setup_ns ns2 + $IP link add veth1 type veth peer veth2 netns $ns2 + $IP link set veth1 up + ip -netns $ns2 link set veth2 up + $IP addr add fe80:dead::1/64 dev veth1 + ip -netns $ns2 addr add fe80:dead::2/64 dev veth2 + + # Add NTF_ROUTER neighbour to prevent rt6_age_examine_exception() + # from removing not-yet-expired exceptions. + ip -netns $ns2 link set veth2 address 00:11:22:33:44:55 + $IP neigh add fe80:dead::3 lladdr 00:11:22:33:44:55 dev veth1 router + + $NS_EXEC sysctl -wq net.ipv6.conf.veth1.accept_redirects=1 + $NS_EXEC sysctl -wq net.ipv6.conf.veth1.forwarding=0 + + # Temporary routes + for i in $(seq 1 5); do + # Expire route after $EXPIRE seconds + $IP -6 route add 2001:10::$i \ + via fe80:dead::2 dev veth1 expires $EXPIRE + + ip netns exec $ns2 rd6 -i veth2 \ + -s fe80:dead::2 -d fe80:dead::1 \ + -r 2001:10::$i -t fe80:dead::3 -p ICMP6 + done + + check_rt_num 5 $($IP -6 route list | grep expires | grep 2001:10:: | wc -l) + + # Promote to permanent routes by "prepend" (w/o NLM_F_EXCL and NLM_F_REPLACE) + for i in $(seq 1 5); do + # -EEXIST, but the temporary route becomes the permanent route. + $IP -6 route append 2001:10::$i \ + via fe80:dead::2 dev veth1 2>/dev/null || true + done + + check_rt_num 5 $($IP -6 route list | grep -v expires | grep 2001:10:: | wc -l) + check_rt_num 5 $($IP -6 route list cache | grep 2001:10:: | wc -l) + + # Trigger GC instead of waiting $GC_WAIT_TIME. + # rt6_nh_dump_exceptions() just skips expired exceptions. + $NS_EXEC sysctl -wq net.ipv6.route.flush=1 + check_rt_num 0 $($IP -6 route list cache | grep 2001:10:: | wc -l) + log_test $ret 0 "ipv6 route garbage collection (promote to permanent routes)" + + $IP neigh del fe80:dead::3 lladdr 00:11:22:33:44:55 dev veth1 router + $IP link del veth1 + # ra6 is required for the next test. (ipv6toolkit) if [ ! -x "$(command -v ra6)" ]; then echo "SKIP: ra6 not found." @@ -876,9 +934,6 @@ fib6_gc_test() return fi - # Delete dummy_10 and remove all routes - $IP link del dev dummy_10 - # Create a pair of veth devices to send a RA message from one # device to another. $IP link add veth1 type veth peer name veth2 diff --git a/tools/testing/selftests/net/netfilter/nft_concat_range.sh b/tools/testing/selftests/net/netfilter/nft_concat_range.sh index 394166f224a4..ffdc6ccc6511 100755 --- a/tools/testing/selftests/net/netfilter/nft_concat_range.sh +++ b/tools/testing/selftests/net/netfilter/nft_concat_range.sh @@ -29,7 +29,8 @@ TYPES="net_port port_net net6_port port_proto net6_port_mac net6_port_mac_proto net6_port_net6_port net_port_mac_proto_net" # Reported bugs, also described by TYPE_ variables below -BUGS="flush_remove_add reload net_port_proto_match avx2_mismatch doublecreate insert_overlap" +BUGS="flush_remove_add reload net_port_proto_match avx2_mismatch doublecreate + insert_overlap load_flush_load4 load_flush_load8" # List of possible paths to pktgen script from kernel tree for performance tests PKTGEN_SCRIPT_PATHS=" @@ -432,6 +433,30 @@ race_repeat 0 perf_duration 0 " +TYPE_load_flush_load4=" +display reload with flush, 4bit groups +type_spec ipv4_addr . ipv4_addr +chain_spec ip saddr . ip daddr +dst addr4 +proto icmp + +race_repeat 0 + +perf_duration 0 +" + +TYPE_load_flush_load8=" +display reload with flush, 8bit groups +type_spec ipv4_addr . ipv4_addr +chain_spec ip saddr . ip daddr +dst addr4 +proto icmp + +race_repeat 0 + +perf_duration 0 +" + # Set template for all tests, types and rules are filled in depending on test set_template=' flush ruleset @@ -1997,6 +2022,49 @@ test_bug_insert_overlap() return 0 } +test_bug_load_flush_load4() +{ + local i + + setup veth send_"${proto}" set || return ${ksft_skip} + + for i in $(seq 0 255); do + local addelem="add element inet filter test" + local j + + for j in $(seq 0 20); do + echo "$addelem { 10.$j.0.$i . 10.$j.1.$i }" + echo "$addelem { 10.$j.0.$i . 10.$j.2.$i }" + done + done > "$tmp" + + nft -f "$tmp" || return 1 + + ( echo "flush set inet filter test";cat "$tmp") | nft -f - + [ $? -eq 0 ] || return 1 + + return 0 +} + +test_bug_load_flush_load8() +{ + local i + + setup veth send_"${proto}" set || return ${ksft_skip} + + for i in $(seq 1 100); do + echo "add element inet filter test { 10.0.0.$i . 10.0.1.$i }" + echo "add element inet filter test { 10.0.0.$i . 10.0.2.$i }" + done > "$tmp" + + nft -f "$tmp" || return 1 + + ( echo "flush set inet filter test";cat "$tmp") | nft -f - + [ $? -eq 0 ] || return 1 + + return 0 +} + test_reported_issues() { eval test_bug_"${subtest}" } |
