summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--drivers/bluetooth/btintel.c11
-rw-r--r--drivers/bluetooth/btusb.c5
-rw-r--r--drivers/bluetooth/hci_ll.c2
-rw-r--r--drivers/net/can/dev/netlink.c4
-rw-r--r--drivers/net/can/spi/mcp251x.c29
-rw-r--r--drivers/net/ethernet/airoha/airoha_ppe.c2
-rw-r--r--drivers/net/ethernet/broadcom/Kconfig2
-rw-r--r--drivers/net/ethernet/broadcom/asp2/bcmasp.c41
-rw-r--r--drivers/net/ethernet/cadence/macb_main.c41
-rw-r--r--drivers/net/ethernet/freescale/enetc/enetc_ethtool.c2
-rw-r--r--drivers/net/ethernet/intel/iavf/iavf_ethtool.c31
-rw-r--r--drivers/net/ethernet/intel/ice/ice.h22
-rw-r--r--drivers/net/ethernet/intel/ice/ice_ethtool.c32
-rw-r--r--drivers/net/ethernet/intel/ice/ice_main.c4
-rw-r--r--drivers/net/ethernet/intel/ice/ice_repr.c5
-rw-r--r--drivers/net/ethernet/intel/idpf/idpf.h2
-rw-r--r--drivers/net/ethernet/intel/idpf/idpf_idc.c6
-rw-r--r--drivers/net/ethernet/intel/idpf/idpf_txrx.c2
-rw-r--r--drivers/net/ethernet/intel/idpf/idpf_virtchnl.c2
-rw-r--r--drivers/net/ethernet/microchip/lan743x_main.c5
-rw-r--r--drivers/net/ethernet/microsoft/mana/mana_en.c6
-rw-r--r--drivers/net/ethernet/pensando/ionic/ionic_lif.c17
-rw-r--r--drivers/net/ethernet/ti/icssg/icssg_common.c4
-rw-r--r--drivers/net/team/team_core.c65
-rw-r--r--drivers/net/tun_vnet.h2
-rw-r--r--drivers/net/virtio_net.c7
-rw-r--r--include/linux/virtio_net.h53
-rw-r--r--include/net/bluetooth/l2cap.h1
-rw-r--r--include/net/codel_impl.h1
-rw-r--r--include/net/inet_hashtables.h14
-rw-r--r--include/net/ip6_fib.h21
-rw-r--r--include/net/netfilter/nf_conntrack_core.h5
-rw-r--r--include/net/netfilter/nf_conntrack_expect.h20
-rw-r--r--include/net/netns/xfrm.h2
-rw-r--r--include/uapi/linux/netfilter/nf_conntrack_common.h4
-rw-r--r--net/bluetooth/hci_conn.c2
-rw-r--r--net/bluetooth/hci_core.c2
-rw-r--r--net/bluetooth/hci_sync.c20
-rw-r--r--net/bluetooth/l2cap_core.c71
-rw-r--r--net/bluetooth/l2cap_sock.c3
-rw-r--r--net/bluetooth/mgmt.c2
-rw-r--r--net/bluetooth/sco.c10
-rw-r--r--net/can/af_can.c4
-rw-r--r--net/can/af_can.h2
-rw-r--r--net/can/gw.c6
-rw-r--r--net/can/isotp.c24
-rw-r--r--net/can/proc.c3
-rw-r--r--net/core/dev.c22
-rw-r--r--net/core/rtnetlink.c28
-rw-r--r--net/ipv4/esp4.c9
-rw-r--r--net/ipv4/inet_connection_sock.c20
-rw-r--r--net/ipv4/udp.c2
-rw-r--r--net/ipv6/addrconf.c4
-rw-r--r--net/ipv6/esp6.c9
-rw-r--r--net/ipv6/ip6_fib.c15
-rw-r--r--net/ipv6/netfilter/ip6t_rt.c4
-rw-r--r--net/ipv6/route.c2
-rw-r--r--net/key/af_key.c19
-rw-r--r--net/netfilter/nf_conntrack_broadcast.c8
-rw-r--r--net/netfilter/nf_conntrack_ecache.c2
-rw-r--r--net/netfilter/nf_conntrack_expect.c39
-rw-r--r--net/netfilter/nf_conntrack_h323_main.c12
-rw-r--r--net/netfilter/nf_conntrack_helper.c11
-rw-r--r--net/netfilter/nf_conntrack_netlink.c75
-rw-r--r--net/netfilter/nf_conntrack_proto_tcp.c10
-rw-r--r--net/netfilter/nf_conntrack_sip.c18
-rw-r--r--net/netfilter/nfnetlink_log.c8
-rw-r--r--net/netfilter/nft_set_pipapo_avx2.c20
-rw-r--r--net/netfilter/nft_set_rbtree.c92
-rw-r--r--net/nfc/nci/core.c10
-rw-r--r--net/openvswitch/flow_netlink.c2
-rw-r--r--net/openvswitch/vport-netdev.c11
-rw-r--r--net/packet/af_packet.c1
-rw-r--r--net/smc/smc_rx.c9
-rw-r--r--net/tls/tls_sw.c2
-rw-r--r--net/xfrm/xfrm_input.c5
-rw-r--r--net/xfrm/xfrm_iptfs.c17
-rw-r--r--net/xfrm/xfrm_nat_keepalive.c2
-rw-r--r--net/xfrm/xfrm_policy.c12
-rw-r--r--net/xfrm/xfrm_state.c116
-rw-r--r--net/xfrm/xfrm_user.c32
-rw-r--r--tools/testing/selftests/drivers/net/team/Makefile1
-rw-r--r--tools/testing/selftests/drivers/net/team/config2
-rwxr-xr-xtools/testing/selftests/drivers/net/team/non_ether_header_ops.sh41
-rwxr-xr-xtools/testing/selftests/net/fib_tests.sh61
-rwxr-xr-xtools/testing/selftests/net/netfilter/nft_concat_range.sh70
86 files changed, 1057 insertions, 387 deletions
diff --git a/drivers/bluetooth/btintel.c b/drivers/bluetooth/btintel.c
index 246b6205c5e0..ab146894ba4e 100644
--- a/drivers/bluetooth/btintel.c
+++ b/drivers/bluetooth/btintel.c
@@ -251,11 +251,13 @@ void btintel_hw_error(struct hci_dev *hdev, u8 code)
bt_dev_err(hdev, "Hardware error 0x%2.2x", code);
+ hci_req_sync_lock(hdev);
+
skb = __hci_cmd_sync(hdev, HCI_OP_RESET, 0, NULL, HCI_INIT_TIMEOUT);
if (IS_ERR(skb)) {
bt_dev_err(hdev, "Reset after hardware error failed (%ld)",
PTR_ERR(skb));
- return;
+ goto unlock;
}
kfree_skb(skb);
@@ -263,18 +265,21 @@ void btintel_hw_error(struct hci_dev *hdev, u8 code)
if (IS_ERR(skb)) {
bt_dev_err(hdev, "Retrieving Intel exception info failed (%ld)",
PTR_ERR(skb));
- return;
+ goto unlock;
}
if (skb->len != 13) {
bt_dev_err(hdev, "Exception info size mismatch");
kfree_skb(skb);
- return;
+ goto unlock;
}
bt_dev_err(hdev, "Exception info %s", (char *)(skb->data + 1));
kfree_skb(skb);
+
+unlock:
+ hci_req_sync_unlock(hdev);
}
EXPORT_SYMBOL_GPL(btintel_hw_error);
diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c
index a1c5eb993e47..5c535f3ab722 100644
--- a/drivers/bluetooth/btusb.c
+++ b/drivers/bluetooth/btusb.c
@@ -2376,8 +2376,11 @@ static void btusb_work(struct work_struct *work)
if (data->air_mode == HCI_NOTIFY_ENABLE_SCO_CVSD) {
if (hdev->voice_setting & 0x0020) {
static const int alts[3] = { 2, 4, 5 };
+ unsigned int sco_idx;
- new_alts = alts[data->sco_num - 1];
+ sco_idx = min_t(unsigned int, data->sco_num - 1,
+ ARRAY_SIZE(alts) - 1);
+ new_alts = alts[sco_idx];
} else {
new_alts = data->sco_num;
}
diff --git a/drivers/bluetooth/hci_ll.c b/drivers/bluetooth/hci_ll.c
index 91acf24f1ef5..91c96ad12342 100644
--- a/drivers/bluetooth/hci_ll.c
+++ b/drivers/bluetooth/hci_ll.c
@@ -541,6 +541,8 @@ static int download_firmware(struct ll_device *lldev)
if (err || !fw->data || !fw->size) {
bt_dev_err(lldev->hu.hdev, "request_firmware failed(errno %d) for %s",
err, bts_scr_name);
+ if (!err)
+ release_firmware(fw);
return -EINVAL;
}
ptr = (void *)fw->data;
diff --git a/drivers/net/can/dev/netlink.c b/drivers/net/can/dev/netlink.c
index 0498198a4696..766d455950f5 100644
--- a/drivers/net/can/dev/netlink.c
+++ b/drivers/net/can/dev/netlink.c
@@ -601,7 +601,9 @@ static int can_changelink(struct net_device *dev, struct nlattr *tb[],
/* We need synchronization with dev->stop() */
ASSERT_RTNL();
- can_ctrlmode_changelink(dev, data, extack);
+ err = can_ctrlmode_changelink(dev, data, extack);
+ if (err)
+ return err;
if (data[IFLA_CAN_BITTIMING]) {
struct can_bittiming bt;
diff --git a/drivers/net/can/spi/mcp251x.c b/drivers/net/can/spi/mcp251x.c
index bb7782582f40..0d0190ae094a 100644
--- a/drivers/net/can/spi/mcp251x.c
+++ b/drivers/net/can/spi/mcp251x.c
@@ -1225,7 +1225,11 @@ static int mcp251x_open(struct net_device *net)
}
mutex_lock(&priv->mcp_lock);
- mcp251x_power_enable(priv->transceiver, 1);
+ ret = mcp251x_power_enable(priv->transceiver, 1);
+ if (ret) {
+ dev_err(&spi->dev, "failed to enable transceiver power: %pe\n", ERR_PTR(ret));
+ goto out_close_candev;
+ }
priv->force_quit = 0;
priv->tx_skb = NULL;
@@ -1272,6 +1276,7 @@ out_free_irq:
mcp251x_hw_sleep(spi);
out_close:
mcp251x_power_enable(priv->transceiver, 0);
+out_close_candev:
close_candev(net);
mutex_unlock(&priv->mcp_lock);
if (release_irq)
@@ -1516,11 +1521,25 @@ static int __maybe_unused mcp251x_can_resume(struct device *dev)
{
struct spi_device *spi = to_spi_device(dev);
struct mcp251x_priv *priv = spi_get_drvdata(spi);
+ int ret = 0;
- if (priv->after_suspend & AFTER_SUSPEND_POWER)
- mcp251x_power_enable(priv->power, 1);
- if (priv->after_suspend & AFTER_SUSPEND_UP)
- mcp251x_power_enable(priv->transceiver, 1);
+ if (priv->after_suspend & AFTER_SUSPEND_POWER) {
+ ret = mcp251x_power_enable(priv->power, 1);
+ if (ret) {
+ dev_err(dev, "failed to restore power: %pe\n", ERR_PTR(ret));
+ return ret;
+ }
+ }
+
+ if (priv->after_suspend & AFTER_SUSPEND_UP) {
+ ret = mcp251x_power_enable(priv->transceiver, 1);
+ if (ret) {
+ dev_err(dev, "failed to restore transceiver power: %pe\n", ERR_PTR(ret));
+ if (priv->after_suspend & AFTER_SUSPEND_POWER)
+ mcp251x_power_enable(priv->power, 0);
+ return ret;
+ }
+ }
if (priv->after_suspend & (AFTER_SUSPEND_POWER | AFTER_SUSPEND_UP))
queue_work(priv->wq, &priv->restart_work);
diff --git a/drivers/net/ethernet/airoha/airoha_ppe.c b/drivers/net/ethernet/airoha/airoha_ppe.c
index 42dbe8f93231..5724f8f2defd 100644
--- a/drivers/net/ethernet/airoha/airoha_ppe.c
+++ b/drivers/net/ethernet/airoha/airoha_ppe.c
@@ -227,7 +227,9 @@ static int airoha_ppe_get_wdma_info(struct net_device *dev, const u8 *addr,
if (!dev)
return -ENODEV;
+ rcu_read_lock();
err = dev_fill_forward_path(dev, addr, &stack);
+ rcu_read_unlock();
if (err)
return err;
diff --git a/drivers/net/ethernet/broadcom/Kconfig b/drivers/net/ethernet/broadcom/Kconfig
index cd7dddeb91dd..9787c1857e13 100644
--- a/drivers/net/ethernet/broadcom/Kconfig
+++ b/drivers/net/ethernet/broadcom/Kconfig
@@ -25,7 +25,7 @@ config B44
select SSB
select MII
select PHYLIB
- select FIXED_PHY if BCM47XX
+ select FIXED_PHY
help
If you have a network (Ethernet) controller of this type, say Y
or M here.
diff --git a/drivers/net/ethernet/broadcom/asp2/bcmasp.c b/drivers/net/ethernet/broadcom/asp2/bcmasp.c
index aa6d8606849f..972474893a6b 100644
--- a/drivers/net/ethernet/broadcom/asp2/bcmasp.c
+++ b/drivers/net/ethernet/broadcom/asp2/bcmasp.c
@@ -1152,12 +1152,6 @@ void bcmasp_enable_wol(struct bcmasp_intf *intf, bool en)
}
}
-static void bcmasp_wol_irq_destroy(struct bcmasp_priv *priv)
-{
- if (priv->wol_irq > 0)
- free_irq(priv->wol_irq, priv);
-}
-
static void bcmasp_eee_fixup(struct bcmasp_intf *intf, bool en)
{
u32 reg, phy_lpi_overwrite;
@@ -1255,7 +1249,7 @@ static int bcmasp_probe(struct platform_device *pdev)
if (priv->irq <= 0)
return -EINVAL;
- priv->clk = devm_clk_get_optional_enabled(dev, "sw_asp");
+ priv->clk = devm_clk_get_optional(dev, "sw_asp");
if (IS_ERR(priv->clk))
return dev_err_probe(dev, PTR_ERR(priv->clk),
"failed to request clock\n");
@@ -1283,6 +1277,10 @@ static int bcmasp_probe(struct platform_device *pdev)
bcmasp_set_pdata(priv, pdata);
+ ret = clk_prepare_enable(priv->clk);
+ if (ret)
+ return dev_err_probe(dev, ret, "failed to start clock\n");
+
/* Enable all clocks to ensure successful probing */
bcmasp_core_clock_set(priv, ASP_CTRL_CLOCK_CTRL_ASP_ALL_DISABLE, 0);
@@ -1294,8 +1292,10 @@ static int bcmasp_probe(struct platform_device *pdev)
ret = devm_request_irq(&pdev->dev, priv->irq, bcmasp_isr, 0,
pdev->name, priv);
- if (ret)
- return dev_err_probe(dev, ret, "failed to request ASP interrupt: %d", ret);
+ if (ret) {
+ dev_err(dev, "Failed to request ASP interrupt: %d", ret);
+ goto err_clock_disable;
+ }
/* Register mdio child nodes */
of_platform_populate(dev->of_node, bcmasp_mdio_of_match, NULL, dev);
@@ -1307,13 +1307,17 @@ static int bcmasp_probe(struct platform_device *pdev)
priv->mda_filters = devm_kcalloc(dev, priv->num_mda_filters,
sizeof(*priv->mda_filters), GFP_KERNEL);
- if (!priv->mda_filters)
- return -ENOMEM;
+ if (!priv->mda_filters) {
+ ret = -ENOMEM;
+ goto err_clock_disable;
+ }
priv->net_filters = devm_kcalloc(dev, priv->num_net_filters,
sizeof(*priv->net_filters), GFP_KERNEL);
- if (!priv->net_filters)
- return -ENOMEM;
+ if (!priv->net_filters) {
+ ret = -ENOMEM;
+ goto err_clock_disable;
+ }
bcmasp_core_init_filters(priv);
@@ -1322,7 +1326,8 @@ static int bcmasp_probe(struct platform_device *pdev)
ports_node = of_find_node_by_name(dev->of_node, "ethernet-ports");
if (!ports_node) {
dev_warn(dev, "No ports found\n");
- return -EINVAL;
+ ret = -EINVAL;
+ goto err_clock_disable;
}
i = 0;
@@ -1344,8 +1349,6 @@ static int bcmasp_probe(struct platform_device *pdev)
*/
bcmasp_core_clock_set(priv, 0, ASP_CTRL_CLOCK_CTRL_ASP_ALL_DISABLE);
- clk_disable_unprepare(priv->clk);
-
/* Now do the registration of the network ports which will take care
* of managing the clock properly.
*/
@@ -1358,13 +1361,16 @@ static int bcmasp_probe(struct platform_device *pdev)
count++;
}
+ clk_disable_unprepare(priv->clk);
+
dev_info(dev, "Initialized %d port(s)\n", count);
return ret;
err_cleanup:
- bcmasp_wol_irq_destroy(priv);
bcmasp_remove_intfs(priv);
+err_clock_disable:
+ clk_disable_unprepare(priv->clk);
return ret;
}
@@ -1376,7 +1382,6 @@ static void bcmasp_remove(struct platform_device *pdev)
if (!priv)
return;
- bcmasp_wol_irq_destroy(priv);
bcmasp_remove_intfs(priv);
}
diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c
index c16ac9c76aa3..99e7d5cf3786 100644
--- a/drivers/net/ethernet/cadence/macb_main.c
+++ b/drivers/net/ethernet/cadence/macb_main.c
@@ -1071,7 +1071,7 @@ static void macb_tx_unmap(struct macb *bp, struct macb_tx_skb *tx_skb, int budge
}
if (tx_skb->skb) {
- napi_consume_skb(tx_skb->skb, budget);
+ dev_consume_skb_any(tx_skb->skb);
tx_skb->skb = NULL;
}
}
@@ -3224,7 +3224,7 @@ static void gem_get_ethtool_stats(struct net_device *dev,
spin_lock_irq(&bp->stats_lock);
gem_update_stats(bp);
memcpy(data, &bp->ethtool_stats, sizeof(u64)
- * (GEM_STATS_LEN + QUEUE_STATS_LEN * MACB_MAX_QUEUES));
+ * (GEM_STATS_LEN + QUEUE_STATS_LEN * bp->num_queues));
spin_unlock_irq(&bp->stats_lock);
}
@@ -5776,9 +5776,9 @@ static int __maybe_unused macb_suspend(struct device *dev)
struct macb_queue *queue;
struct in_device *idev;
unsigned long flags;
+ u32 tmp, ifa_local;
unsigned int q;
int err;
- u32 tmp;
if (!device_may_wakeup(&bp->dev->dev))
phy_exit(bp->phy);
@@ -5787,14 +5787,21 @@ static int __maybe_unused macb_suspend(struct device *dev)
return 0;
if (bp->wol & MACB_WOL_ENABLED) {
- /* Check for IP address in WOL ARP mode */
- idev = __in_dev_get_rcu(bp->dev);
- if (idev)
- ifa = rcu_dereference(idev->ifa_list);
- if ((bp->wolopts & WAKE_ARP) && !ifa) {
- netdev_err(netdev, "IP address not assigned as required by WoL walk ARP\n");
- return -EOPNOTSUPP;
+ if (bp->wolopts & WAKE_ARP) {
+ /* Check for IP address in WOL ARP mode */
+ rcu_read_lock();
+ idev = __in_dev_get_rcu(bp->dev);
+ if (idev)
+ ifa = rcu_dereference(idev->ifa_list);
+ if (!ifa) {
+ rcu_read_unlock();
+ netdev_err(netdev, "IP address not assigned as required by WoL walk ARP\n");
+ return -EOPNOTSUPP;
+ }
+ ifa_local = be32_to_cpu(ifa->ifa_local);
+ rcu_read_unlock();
}
+
spin_lock_irqsave(&bp->lock, flags);
/* Disable Tx and Rx engines before disabling the queues,
@@ -5833,8 +5840,9 @@ static int __maybe_unused macb_suspend(struct device *dev)
if (bp->wolopts & WAKE_ARP) {
tmp |= MACB_BIT(ARP);
/* write IP address into register */
- tmp |= MACB_BFEXT(IP, be32_to_cpu(ifa->ifa_local));
+ tmp |= MACB_BFEXT(IP, ifa_local);
}
+ spin_unlock_irqrestore(&bp->lock, flags);
/* Change interrupt handler and
* Enable WoL IRQ on queue 0
@@ -5847,11 +5855,12 @@ static int __maybe_unused macb_suspend(struct device *dev)
dev_err(dev,
"Unable to request IRQ %d (error %d)\n",
bp->queues[0].irq, err);
- spin_unlock_irqrestore(&bp->lock, flags);
return err;
}
+ spin_lock_irqsave(&bp->lock, flags);
queue_writel(bp->queues, IER, GEM_BIT(WOL));
gem_writel(bp, WOL, tmp);
+ spin_unlock_irqrestore(&bp->lock, flags);
} else {
err = devm_request_irq(dev, bp->queues[0].irq, macb_wol_interrupt,
IRQF_SHARED, netdev->name, bp->queues);
@@ -5859,13 +5868,13 @@ static int __maybe_unused macb_suspend(struct device *dev)
dev_err(dev,
"Unable to request IRQ %d (error %d)\n",
bp->queues[0].irq, err);
- spin_unlock_irqrestore(&bp->lock, flags);
return err;
}
+ spin_lock_irqsave(&bp->lock, flags);
queue_writel(bp->queues, IER, MACB_BIT(WOL));
macb_writel(bp, WOL, tmp);
+ spin_unlock_irqrestore(&bp->lock, flags);
}
- spin_unlock_irqrestore(&bp->lock, flags);
enable_irq_wake(bp->queues[0].irq);
}
@@ -5932,6 +5941,8 @@ static int __maybe_unused macb_resume(struct device *dev)
queue_readl(bp->queues, ISR);
if (bp->caps & MACB_CAPS_ISR_CLEAR_ON_WRITE)
queue_writel(bp->queues, ISR, -1);
+ spin_unlock_irqrestore(&bp->lock, flags);
+
/* Replace interrupt handler on queue 0 */
devm_free_irq(dev, bp->queues[0].irq, bp->queues);
err = devm_request_irq(dev, bp->queues[0].irq, macb_interrupt,
@@ -5940,10 +5951,8 @@ static int __maybe_unused macb_resume(struct device *dev)
dev_err(dev,
"Unable to request IRQ %d (error %d)\n",
bp->queues[0].irq, err);
- spin_unlock_irqrestore(&bp->lock, flags);
return err;
}
- spin_unlock_irqrestore(&bp->lock, flags);
disable_irq_wake(bp->queues[0].irq);
diff --git a/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c b/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c
index fed89d4f1e1d..2fe140ddebb2 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c
@@ -813,6 +813,8 @@ static void enetc_get_ringparam(struct net_device *ndev,
{
struct enetc_ndev_priv *priv = netdev_priv(ndev);
+ ring->rx_max_pending = priv->rx_bd_count;
+ ring->tx_max_pending = priv->tx_bd_count;
ring->rx_pending = priv->rx_bd_count;
ring->tx_pending = priv->tx_bd_count;
diff --git a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c
index ab67c709d5a0..1cd1f3f2930a 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c
@@ -313,14 +313,13 @@ static int iavf_get_sset_count(struct net_device *netdev, int sset)
{
/* Report the maximum number queues, even if not every queue is
* currently configured. Since allocation of queues is in pairs,
- * use netdev->real_num_tx_queues * 2. The real_num_tx_queues is set
- * at device creation and never changes.
+ * use netdev->num_tx_queues * 2. The num_tx_queues is set at
+ * device creation and never changes.
*/
if (sset == ETH_SS_STATS)
return IAVF_STATS_LEN +
- (IAVF_QUEUE_STATS_LEN * 2 *
- netdev->real_num_tx_queues);
+ (IAVF_QUEUE_STATS_LEN * 2 * netdev->num_tx_queues);
else
return -EINVAL;
}
@@ -345,19 +344,19 @@ static void iavf_get_ethtool_stats(struct net_device *netdev,
iavf_add_ethtool_stats(&data, adapter, iavf_gstrings_stats);
rcu_read_lock();
- /* As num_active_queues describe both tx and rx queues, we can use
- * it to iterate over rings' stats.
+ /* Use num_tx_queues to report stats for the maximum number of queues.
+ * Queues beyond num_active_queues will report zero.
*/
- for (i = 0; i < adapter->num_active_queues; i++) {
- struct iavf_ring *ring;
+ for (i = 0; i < netdev->num_tx_queues; i++) {
+ struct iavf_ring *tx_ring = NULL, *rx_ring = NULL;
- /* Tx rings stats */
- ring = &adapter->tx_rings[i];
- iavf_add_queue_stats(&data, ring);
+ if (i < adapter->num_active_queues) {
+ tx_ring = &adapter->tx_rings[i];
+ rx_ring = &adapter->rx_rings[i];
+ }
- /* Rx rings stats */
- ring = &adapter->rx_rings[i];
- iavf_add_queue_stats(&data, ring);
+ iavf_add_queue_stats(&data, tx_ring);
+ iavf_add_queue_stats(&data, rx_ring);
}
rcu_read_unlock();
}
@@ -376,9 +375,9 @@ static void iavf_get_stat_strings(struct net_device *netdev, u8 *data)
iavf_add_stat_strings(&data, iavf_gstrings_stats);
/* Queues are always allocated in pairs, so we just use
- * real_num_tx_queues for both Tx and Rx queues.
+ * num_tx_queues for both Tx and Rx queues.
*/
- for (i = 0; i < netdev->real_num_tx_queues; i++) {
+ for (i = 0; i < netdev->num_tx_queues; i++) {
iavf_add_stat_strings(&data, iavf_gstrings_queue_stats,
"tx", i);
iavf_add_stat_strings(&data, iavf_gstrings_queue_stats,
diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h
index 2b2b22af42be..eb3a48330cc1 100644
--- a/drivers/net/ethernet/intel/ice/ice.h
+++ b/drivers/net/ethernet/intel/ice/ice.h
@@ -840,6 +840,28 @@ static inline void ice_tx_xsk_pool(struct ice_vsi *vsi, u16 qid)
}
/**
+ * ice_get_max_txq - return the maximum number of Tx queues for in a PF
+ * @pf: PF structure
+ *
+ * Return: maximum number of Tx queues
+ */
+static inline int ice_get_max_txq(struct ice_pf *pf)
+{
+ return min(num_online_cpus(), pf->hw.func_caps.common_cap.num_txq);
+}
+
+/**
+ * ice_get_max_rxq - return the maximum number of Rx queues for in a PF
+ * @pf: PF structure
+ *
+ * Return: maximum number of Rx queues
+ */
+static inline int ice_get_max_rxq(struct ice_pf *pf)
+{
+ return min(num_online_cpus(), pf->hw.func_caps.common_cap.num_rxq);
+}
+
+/**
* ice_get_main_vsi - Get the PF VSI
* @pf: PF instance
*
diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c
index 301947d53ede..e6a20af6f63d 100644
--- a/drivers/net/ethernet/intel/ice/ice_ethtool.c
+++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c
@@ -1930,6 +1930,17 @@ __ice_get_ethtool_stats(struct net_device *netdev,
int i = 0;
char *p;
+ if (ice_is_port_repr_netdev(netdev)) {
+ ice_update_eth_stats(vsi);
+
+ for (j = 0; j < ICE_VSI_STATS_LEN; j++) {
+ p = (char *)vsi + ice_gstrings_vsi_stats[j].stat_offset;
+ data[i++] = (ice_gstrings_vsi_stats[j].sizeof_stat ==
+ sizeof(u64)) ? *(u64 *)p : *(u32 *)p;
+ }
+ return;
+ }
+
ice_update_pf_stats(pf);
ice_update_vsi_stats(vsi);
@@ -1939,9 +1950,6 @@ __ice_get_ethtool_stats(struct net_device *netdev,
sizeof(u64)) ? *(u64 *)p : *(u32 *)p;
}
- if (ice_is_port_repr_netdev(netdev))
- return;
-
/* populate per queue stats */
rcu_read_lock();
@@ -3774,24 +3782,6 @@ ice_get_ts_info(struct net_device *dev, struct kernel_ethtool_ts_info *info)
}
/**
- * ice_get_max_txq - return the maximum number of Tx queues for in a PF
- * @pf: PF structure
- */
-static int ice_get_max_txq(struct ice_pf *pf)
-{
- return min(num_online_cpus(), pf->hw.func_caps.common_cap.num_txq);
-}
-
-/**
- * ice_get_max_rxq - return the maximum number of Rx queues for in a PF
- * @pf: PF structure
- */
-static int ice_get_max_rxq(struct ice_pf *pf)
-{
- return min(num_online_cpus(), pf->hw.func_caps.common_cap.num_rxq);
-}
-
-/**
* ice_get_combined_cnt - return the current number of combined channels
* @vsi: PF VSI pointer
*
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index e7308e381e2f..3c36e3641b9e 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -4699,8 +4699,8 @@ static int ice_cfg_netdev(struct ice_vsi *vsi)
struct net_device *netdev;
u8 mac_addr[ETH_ALEN];
- netdev = alloc_etherdev_mqs(sizeof(*np), vsi->alloc_txq,
- vsi->alloc_rxq);
+ netdev = alloc_etherdev_mqs(sizeof(*np), ice_get_max_txq(vsi->back),
+ ice_get_max_rxq(vsi->back));
if (!netdev)
return -ENOMEM;
diff --git a/drivers/net/ethernet/intel/ice/ice_repr.c b/drivers/net/ethernet/intel/ice/ice_repr.c
index 90f99443a922..096566c697f4 100644
--- a/drivers/net/ethernet/intel/ice/ice_repr.c
+++ b/drivers/net/ethernet/intel/ice/ice_repr.c
@@ -2,6 +2,7 @@
/* Copyright (C) 2019-2021, Intel Corporation. */
#include "ice.h"
+#include "ice_lib.h"
#include "ice_eswitch.h"
#include "devlink/devlink.h"
#include "devlink/port.h"
@@ -67,7 +68,7 @@ ice_repr_get_stats64(struct net_device *netdev, struct rtnl_link_stats64 *stats)
return;
vsi = repr->src_vsi;
- ice_update_vsi_stats(vsi);
+ ice_update_eth_stats(vsi);
eth_stats = &vsi->eth_stats;
stats->tx_packets = eth_stats->tx_unicast + eth_stats->tx_broadcast +
@@ -315,7 +316,7 @@ ice_repr_reg_netdev(struct net_device *netdev, const struct net_device_ops *ops)
static int ice_repr_ready_vf(struct ice_repr *repr)
{
- return !ice_check_vf_ready_for_cfg(repr->vf);
+ return ice_check_vf_ready_for_cfg(repr->vf);
}
static int ice_repr_ready_sf(struct ice_repr *repr)
diff --git a/drivers/net/ethernet/intel/idpf/idpf.h b/drivers/net/ethernet/intel/idpf/idpf.h
index b206fba092c8..ec1b75f039bb 100644
--- a/drivers/net/ethernet/intel/idpf/idpf.h
+++ b/drivers/net/ethernet/intel/idpf/idpf.h
@@ -1066,7 +1066,7 @@ bool idpf_vport_set_hsplit(const struct idpf_vport *vport, u8 val);
int idpf_idc_init(struct idpf_adapter *adapter);
int idpf_idc_init_aux_core_dev(struct idpf_adapter *adapter,
enum iidc_function_type ftype);
-void idpf_idc_deinit_core_aux_device(struct iidc_rdma_core_dev_info *cdev_info);
+void idpf_idc_deinit_core_aux_device(struct idpf_adapter *adapter);
void idpf_idc_deinit_vport_aux_device(struct iidc_rdma_vport_dev_info *vdev_info);
void idpf_idc_issue_reset_event(struct iidc_rdma_core_dev_info *cdev_info);
void idpf_idc_vdev_mtu_event(struct iidc_rdma_vport_dev_info *vdev_info,
diff --git a/drivers/net/ethernet/intel/idpf/idpf_idc.c b/drivers/net/ethernet/intel/idpf/idpf_idc.c
index bd4785fb8d3e..7e4f4ac92653 100644
--- a/drivers/net/ethernet/intel/idpf/idpf_idc.c
+++ b/drivers/net/ethernet/intel/idpf/idpf_idc.c
@@ -470,10 +470,11 @@ err_privd_alloc:
/**
* idpf_idc_deinit_core_aux_device - de-initialize Auxiliary Device(s)
- * @cdev_info: IDC core device info pointer
+ * @adapter: driver private data structure
*/
-void idpf_idc_deinit_core_aux_device(struct iidc_rdma_core_dev_info *cdev_info)
+void idpf_idc_deinit_core_aux_device(struct idpf_adapter *adapter)
{
+ struct iidc_rdma_core_dev_info *cdev_info = adapter->cdev_info;
struct iidc_rdma_priv_dev_info *privd;
if (!cdev_info)
@@ -485,6 +486,7 @@ void idpf_idc_deinit_core_aux_device(struct iidc_rdma_core_dev_info *cdev_info)
kfree(privd->mapped_mem_regions);
kfree(privd);
kfree(cdev_info);
+ adapter->cdev_info = NULL;
}
/**
diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_txrx.c
index 252259993022..f6b3b15364ff 100644
--- a/drivers/net/ethernet/intel/idpf/idpf_txrx.c
+++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.c
@@ -1860,13 +1860,13 @@ static int idpf_rxq_group_alloc(struct idpf_vport *vport,
idpf_queue_assign(HSPLIT_EN, q, hs);
idpf_queue_assign(RSC_EN, q, rsc);
- bufq_set->num_refillqs = num_rxq;
bufq_set->refillqs = kcalloc(num_rxq, swq_size,
GFP_KERNEL);
if (!bufq_set->refillqs) {
err = -ENOMEM;
goto err_alloc;
}
+ bufq_set->num_refillqs = num_rxq;
for (unsigned int k = 0; k < bufq_set->num_refillqs; k++) {
struct idpf_sw_queue *refillq =
&bufq_set->refillqs[k];
diff --git a/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c b/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c
index d5a877e1fef8..113ecfc16dd7 100644
--- a/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c
+++ b/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c
@@ -3668,7 +3668,7 @@ void idpf_vc_core_deinit(struct idpf_adapter *adapter)
idpf_ptp_release(adapter);
idpf_deinit_task(adapter);
- idpf_idc_deinit_core_aux_device(adapter->cdev_info);
+ idpf_idc_deinit_core_aux_device(adapter);
idpf_rel_rx_pt_lkup(adapter);
idpf_intr_rel(adapter);
diff --git a/drivers/net/ethernet/microchip/lan743x_main.c b/drivers/net/ethernet/microchip/lan743x_main.c
index a3845edf0e48..f0b5dd752f08 100644
--- a/drivers/net/ethernet/microchip/lan743x_main.c
+++ b/drivers/net/ethernet/microchip/lan743x_main.c
@@ -3053,6 +3053,11 @@ static void lan743x_phylink_mac_link_up(struct phylink_config *config,
else if (speed == SPEED_100)
mac_cr |= MAC_CR_CFG_L_;
+ if (duplex == DUPLEX_FULL)
+ mac_cr |= MAC_CR_DPX_;
+ else
+ mac_cr &= ~MAC_CR_DPX_;
+
lan743x_csr_write(adapter, MAC_CR, mac_cr);
lan743x_ptp_update_latency(adapter, speed);
diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c
index 9017e806ecda..dca62fb9a3a9 100644
--- a/drivers/net/ethernet/microsoft/mana/mana_en.c
+++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
@@ -3425,6 +3425,7 @@ static int add_adev(struct gdma_dev *gd, const char *name)
struct auxiliary_device *adev;
struct mana_adev *madev;
int ret;
+ int id;
madev = kzalloc_obj(*madev);
if (!madev)
@@ -3434,7 +3435,8 @@ static int add_adev(struct gdma_dev *gd, const char *name)
ret = mana_adev_idx_alloc();
if (ret < 0)
goto idx_fail;
- adev->id = ret;
+ id = ret;
+ adev->id = id;
adev->name = name;
adev->dev.parent = gd->gdma_context->dev;
@@ -3460,7 +3462,7 @@ add_fail:
auxiliary_device_uninit(adev);
init_fail:
- mana_adev_idx_free(adev->id);
+ mana_adev_idx_free(id);
idx_fail:
kfree(madev);
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
index 8d040e611d5a..637e635bbf03 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
@@ -1719,13 +1719,18 @@ static int ionic_set_mac_address(struct net_device *netdev, void *sa)
if (ether_addr_equal(netdev->dev_addr, mac))
return 0;
- err = ionic_program_mac(lif, mac);
- if (err < 0)
- return err;
+ /* Only program macs for virtual functions to avoid losing the permanent
+ * Mac across warm reset/reboot.
+ */
+ if (lif->ionic->pdev->is_virtfn) {
+ err = ionic_program_mac(lif, mac);
+ if (err < 0)
+ return err;
- if (err > 0)
- netdev_dbg(netdev, "%s: SET and GET ATTR Mac are not equal-due to old FW running\n",
- __func__);
+ if (err > 0)
+ netdev_dbg(netdev, "%s: SET and GET ATTR Mac are not equal-due to old FW running\n",
+ __func__);
+ }
err = eth_prepare_mac_addr_change(netdev, addr);
if (err)
diff --git a/drivers/net/ethernet/ti/icssg/icssg_common.c b/drivers/net/ethernet/ti/icssg/icssg_common.c
index 0a3cf2f848a5..fd4e7622f123 100644
--- a/drivers/net/ethernet/ti/icssg/icssg_common.c
+++ b/drivers/net/ethernet/ti/icssg/icssg_common.c
@@ -962,7 +962,6 @@ static int emac_rx_packet_zc(struct prueth_emac *emac, u32 flow_id,
pkt_len -= 4;
cppi5_desc_get_tags_ids(&desc_rx->hdr, &port_id, NULL);
psdata = cppi5_hdesc_get_psdata(desc_rx);
- k3_cppi_desc_pool_free(rx_chn->desc_pool, desc_rx);
count++;
xsk_buff_set_size(xdp, pkt_len);
xsk_buff_dma_sync_for_cpu(xdp);
@@ -988,6 +987,7 @@ static int emac_rx_packet_zc(struct prueth_emac *emac, u32 flow_id,
emac_dispatch_skb_zc(emac, xdp, psdata);
xsk_buff_free(xdp);
}
+ k3_cppi_desc_pool_free(rx_chn->desc_pool, desc_rx);
}
if (xdp_status & ICSSG_XDP_REDIR)
@@ -1057,7 +1057,6 @@ static int emac_rx_packet(struct prueth_emac *emac, u32 flow_id, u32 *xdp_state)
/* firmware adds 4 CRC bytes, strip them */
pkt_len -= 4;
cppi5_desc_get_tags_ids(&desc_rx->hdr, &port_id, NULL);
- k3_cppi_desc_pool_free(rx_chn->desc_pool, desc_rx);
/* if allocation fails we drop the packet but push the
* descriptor back to the ring with old page to prevent a stall
@@ -1115,6 +1114,7 @@ static int emac_rx_packet(struct prueth_emac *emac, u32 flow_id, u32 *xdp_state)
ndev->stats.rx_packets++;
requeue:
+ k3_cppi_desc_pool_free(rx_chn->desc_pool, desc_rx);
/* queue another RX DMA */
ret = prueth_dma_rx_push_mapped(emac, &emac->rx_chns, new_page,
PRUETH_MAX_PKT_SIZE);
diff --git a/drivers/net/team/team_core.c b/drivers/net/team/team_core.c
index b7282f5c9632..120aeb539d9f 100644
--- a/drivers/net/team/team_core.c
+++ b/drivers/net/team/team_core.c
@@ -2058,6 +2058,68 @@ static const struct ethtool_ops team_ethtool_ops = {
* rt netlink interface
***********************/
+/* For tx path we need a linkup && enabled port and for parse any port
+ * suffices.
+ */
+static struct team_port *team_header_port_get_rcu(struct team *team,
+ bool txable)
+{
+ struct team_port *port;
+
+ list_for_each_entry_rcu(port, &team->port_list, list) {
+ if (!txable || team_port_txable(port))
+ return port;
+ }
+
+ return NULL;
+}
+
+static int team_header_create(struct sk_buff *skb, struct net_device *team_dev,
+ unsigned short type, const void *daddr,
+ const void *saddr, unsigned int len)
+{
+ struct team *team = netdev_priv(team_dev);
+ const struct header_ops *port_ops;
+ struct team_port *port;
+ int ret = 0;
+
+ rcu_read_lock();
+ port = team_header_port_get_rcu(team, true);
+ if (port) {
+ port_ops = READ_ONCE(port->dev->header_ops);
+ if (port_ops && port_ops->create)
+ ret = port_ops->create(skb, port->dev,
+ type, daddr, saddr, len);
+ }
+ rcu_read_unlock();
+ return ret;
+}
+
+static int team_header_parse(const struct sk_buff *skb,
+ const struct net_device *team_dev,
+ unsigned char *haddr)
+{
+ struct team *team = netdev_priv(team_dev);
+ const struct header_ops *port_ops;
+ struct team_port *port;
+ int ret = 0;
+
+ rcu_read_lock();
+ port = team_header_port_get_rcu(team, false);
+ if (port) {
+ port_ops = READ_ONCE(port->dev->header_ops);
+ if (port_ops && port_ops->parse)
+ ret = port_ops->parse(skb, port->dev, haddr);
+ }
+ rcu_read_unlock();
+ return ret;
+}
+
+static const struct header_ops team_header_ops = {
+ .create = team_header_create,
+ .parse = team_header_parse,
+};
+
static void team_setup_by_port(struct net_device *dev,
struct net_device *port_dev)
{
@@ -2066,7 +2128,8 @@ static void team_setup_by_port(struct net_device *dev,
if (port_dev->type == ARPHRD_ETHER)
dev->header_ops = team->header_ops_cache;
else
- dev->header_ops = port_dev->header_ops;
+ dev->header_ops = port_dev->header_ops ?
+ &team_header_ops : NULL;
dev->type = port_dev->type;
dev->hard_header_len = port_dev->hard_header_len;
dev->needed_headroom = port_dev->needed_headroom;
diff --git a/drivers/net/tun_vnet.h b/drivers/net/tun_vnet.h
index a5f93b6c4482..fa5cab9d3e55 100644
--- a/drivers/net/tun_vnet.h
+++ b/drivers/net/tun_vnet.h
@@ -244,7 +244,7 @@ tun_vnet_hdr_tnl_from_skb(unsigned int flags,
if (virtio_net_hdr_tnl_from_skb(skb, tnl_hdr, has_tnl_offload,
tun_vnet_is_little_endian(flags),
- vlan_hlen, true)) {
+ vlan_hlen, true, false)) {
struct virtio_net_hdr_v1 *hdr = &tnl_hdr->hash_hdr.hdr;
struct skb_shared_info *sinfo = skb_shinfo(skb);
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 72d6a9c6a5a2..ab2108ee206a 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -3267,8 +3267,12 @@ static int xmit_skb(struct send_queue *sq, struct sk_buff *skb, bool orphan)
struct virtio_net_hdr_v1_hash_tunnel *hdr;
int num_sg;
unsigned hdr_len = vi->hdr_len;
+ bool feature_hdrlen;
bool can_push;
+ feature_hdrlen = virtio_has_feature(vi->vdev,
+ VIRTIO_NET_F_GUEST_HDRLEN);
+
pr_debug("%s: xmit %p %pM\n", vi->dev->name, skb, dest);
/* Make sure it's safe to cast between formats */
@@ -3288,7 +3292,7 @@ static int xmit_skb(struct send_queue *sq, struct sk_buff *skb, bool orphan)
if (virtio_net_hdr_tnl_from_skb(skb, hdr, vi->tx_tnl,
virtio_is_little_endian(vi->vdev), 0,
- false))
+ false, feature_hdrlen))
return -EPROTO;
if (vi->mergeable_rx_bufs)
@@ -3351,6 +3355,7 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev)
/* Don't wait up for transmitted skbs to be freed. */
if (!use_napi) {
skb_orphan(skb);
+ skb_dst_drop(skb);
nf_reset_ct(skb);
}
diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h
index 75dabb763c65..f36d21b5bc19 100644
--- a/include/linux/virtio_net.h
+++ b/include/linux/virtio_net.h
@@ -207,6 +207,39 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb,
return __virtio_net_hdr_to_skb(skb, hdr, little_endian, hdr->gso_type);
}
+/* This function must be called after virtio_net_hdr_from_skb(). */
+static inline void __virtio_net_set_hdrlen(const struct sk_buff *skb,
+ struct virtio_net_hdr *hdr,
+ bool little_endian)
+{
+ u16 hdr_len;
+
+ hdr_len = skb_transport_offset(skb);
+
+ if (hdr->gso_type == VIRTIO_NET_HDR_GSO_UDP_L4)
+ hdr_len += sizeof(struct udphdr);
+ else
+ hdr_len += tcp_hdrlen(skb);
+
+ hdr->hdr_len = __cpu_to_virtio16(little_endian, hdr_len);
+}
+
+/* This function must be called after virtio_net_hdr_from_skb(). */
+static inline void __virtio_net_set_tnl_hdrlen(const struct sk_buff *skb,
+ struct virtio_net_hdr *hdr)
+{
+ u16 hdr_len;
+
+ hdr_len = skb_inner_transport_offset(skb);
+
+ if (hdr->gso_type == VIRTIO_NET_HDR_GSO_UDP_L4)
+ hdr_len += sizeof(struct udphdr);
+ else
+ hdr_len += inner_tcp_hdrlen(skb);
+
+ hdr->hdr_len = __cpu_to_virtio16(true, hdr_len);
+}
+
static inline int virtio_net_hdr_from_skb(const struct sk_buff *skb,
struct virtio_net_hdr *hdr,
bool little_endian,
@@ -385,7 +418,8 @@ virtio_net_hdr_tnl_from_skb(const struct sk_buff *skb,
bool tnl_hdr_negotiated,
bool little_endian,
int vlan_hlen,
- bool has_data_valid)
+ bool has_data_valid,
+ bool feature_hdrlen)
{
struct virtio_net_hdr *hdr = (struct virtio_net_hdr *)vhdr;
unsigned int inner_nh, outer_th;
@@ -394,9 +428,17 @@ virtio_net_hdr_tnl_from_skb(const struct sk_buff *skb,
tnl_gso_type = skb_shinfo(skb)->gso_type & (SKB_GSO_UDP_TUNNEL |
SKB_GSO_UDP_TUNNEL_CSUM);
- if (!tnl_gso_type)
- return virtio_net_hdr_from_skb(skb, hdr, little_endian,
- has_data_valid, vlan_hlen);
+ if (!tnl_gso_type) {
+ ret = virtio_net_hdr_from_skb(skb, hdr, little_endian,
+ has_data_valid, vlan_hlen);
+ if (ret)
+ return ret;
+
+ if (feature_hdrlen && hdr->hdr_len)
+ __virtio_net_set_hdrlen(skb, hdr, little_endian);
+
+ return ret;
+ }
/* Tunnel support not negotiated but skb ask for it. */
if (!tnl_hdr_negotiated)
@@ -414,6 +456,9 @@ virtio_net_hdr_tnl_from_skb(const struct sk_buff *skb,
if (ret)
return ret;
+ if (feature_hdrlen && hdr->hdr_len)
+ __virtio_net_set_tnl_hdrlen(skb, hdr);
+
if (skb->protocol == htons(ETH_P_IPV6))
hdr->gso_type |= VIRTIO_NET_HDR_GSO_UDP_TUNNEL_IPV6;
else
diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h
index 010f1a8fd15f..5172afee5494 100644
--- a/include/net/bluetooth/l2cap.h
+++ b/include/net/bluetooth/l2cap.h
@@ -658,6 +658,7 @@ struct l2cap_conn {
struct sk_buff *rx_skb;
__u32 rx_len;
struct ida tx_ida;
+ __u8 tx_ident;
struct sk_buff_head pending_rx;
struct work_struct pending_rx_work;
diff --git a/include/net/codel_impl.h b/include/net/codel_impl.h
index 78a27ac73070..b2c359c6dd1b 100644
--- a/include/net/codel_impl.h
+++ b/include/net/codel_impl.h
@@ -158,6 +158,7 @@ static struct sk_buff *codel_dequeue(void *ctx,
bool drop;
if (!skb) {
+ vars->first_above_time = 0;
vars->dropping = false;
return skb;
}
diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h
index 5a979dcab538..6d936e9f2fd3 100644
--- a/include/net/inet_hashtables.h
+++ b/include/net/inet_hashtables.h
@@ -264,6 +264,20 @@ inet_bhashfn_portaddr(const struct inet_hashinfo *hinfo, const struct sock *sk,
return &hinfo->bhash2[hash & (hinfo->bhash_size - 1)];
}
+static inline bool inet_use_hash2_on_bind(const struct sock *sk)
+{
+#if IS_ENABLED(CONFIG_IPV6)
+ if (sk->sk_family == AF_INET6) {
+ if (ipv6_addr_any(&sk->sk_v6_rcv_saddr))
+ return false;
+
+ if (!ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr))
+ return true;
+ }
+#endif
+ return sk->sk_rcv_saddr != htonl(INADDR_ANY);
+}
+
struct inet_bind_hashbucket *
inet_bhash2_addr_any_hashbucket(const struct sock *sk, const struct net *net, int port);
diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index 88b0dd4d8e09..9f8b6814a96a 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -507,12 +507,14 @@ void fib6_rt_update(struct net *net, struct fib6_info *rt,
void inet6_rt_notify(int event, struct fib6_info *rt, struct nl_info *info,
unsigned int flags);
+void fib6_age_exceptions(struct fib6_info *rt, struct fib6_gc_args *gc_args,
+ unsigned long now);
void fib6_run_gc(unsigned long expires, struct net *net, bool force);
-
void fib6_gc_cleanup(void);
int fib6_init(void);
+#if IS_ENABLED(CONFIG_IPV6)
/* Add the route to the gc list if it is not already there
*
* The callers should hold f6i->fib6_table->tb6_lock.
@@ -545,6 +547,23 @@ static inline void fib6_remove_gc_list(struct fib6_info *f6i)
hlist_del_init(&f6i->gc_link);
}
+static inline void fib6_may_remove_gc_list(struct net *net,
+ struct fib6_info *f6i)
+{
+ struct fib6_gc_args gc_args;
+
+ if (hlist_unhashed(&f6i->gc_link))
+ return;
+
+ gc_args.timeout = READ_ONCE(net->ipv6.sysctl.ip6_rt_gc_interval);
+ gc_args.more = 0;
+
+ rcu_read_lock();
+ fib6_age_exceptions(f6i, &gc_args, jiffies);
+ rcu_read_unlock();
+}
+#endif
+
struct ipv6_route_iter {
struct seq_net_private p;
struct fib6_walker w;
diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h
index 3384859a8921..8883575adcc1 100644
--- a/include/net/netfilter/nf_conntrack_core.h
+++ b/include/net/netfilter/nf_conntrack_core.h
@@ -83,6 +83,11 @@ void nf_conntrack_lock(spinlock_t *lock);
extern spinlock_t nf_conntrack_expect_lock;
+static inline void lockdep_nfct_expect_lock_held(void)
+{
+ lockdep_assert_held(&nf_conntrack_expect_lock);
+}
+
/* ctnetlink code shared by both ctnetlink and nf_conntrack_bpf */
static inline void __nf_ct_set_timeout(struct nf_conn *ct, u64 timeout)
diff --git a/include/net/netfilter/nf_conntrack_expect.h b/include/net/netfilter/nf_conntrack_expect.h
index 165e7a03b8e9..e9a8350e7ccf 100644
--- a/include/net/netfilter/nf_conntrack_expect.h
+++ b/include/net/netfilter/nf_conntrack_expect.h
@@ -22,10 +22,16 @@ struct nf_conntrack_expect {
/* Hash member */
struct hlist_node hnode;
+ /* Network namespace */
+ possible_net_t net;
+
/* We expect this tuple, with the following mask */
struct nf_conntrack_tuple tuple;
struct nf_conntrack_tuple_mask mask;
+#ifdef CONFIG_NF_CONNTRACK_ZONES
+ struct nf_conntrack_zone zone;
+#endif
/* Usage count. */
refcount_t use;
@@ -40,7 +46,7 @@ struct nf_conntrack_expect {
struct nf_conntrack_expect *this);
/* Helper to assign to new connection */
- struct nf_conntrack_helper *helper;
+ struct nf_conntrack_helper __rcu *helper;
/* The conntrack of the master connection */
struct nf_conn *master;
@@ -62,7 +68,17 @@ struct nf_conntrack_expect {
static inline struct net *nf_ct_exp_net(struct nf_conntrack_expect *exp)
{
- return nf_ct_net(exp->master);
+ return read_pnet(&exp->net);
+}
+
+static inline bool nf_ct_exp_zone_equal_any(const struct nf_conntrack_expect *a,
+ const struct nf_conntrack_zone *b)
+{
+#ifdef CONFIG_NF_CONNTRACK_ZONES
+ return a->zone.id == b->id;
+#else
+ return true;
+#endif
}
#define NF_CT_EXP_POLICY_NAME_LEN 16
diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h
index 23dd647fe024..b73983a17e08 100644
--- a/include/net/netns/xfrm.h
+++ b/include/net/netns/xfrm.h
@@ -59,7 +59,7 @@ struct netns_xfrm {
struct list_head inexact_bins;
- struct sock *nlsk;
+ struct sock __rcu *nlsk;
struct sock *nlsk_stash;
u32 sysctl_aevent_etime;
diff --git a/include/uapi/linux/netfilter/nf_conntrack_common.h b/include/uapi/linux/netfilter/nf_conntrack_common.h
index 26071021e986..56b6b60a814f 100644
--- a/include/uapi/linux/netfilter/nf_conntrack_common.h
+++ b/include/uapi/linux/netfilter/nf_conntrack_common.h
@@ -159,5 +159,9 @@ enum ip_conntrack_expect_events {
#define NF_CT_EXPECT_INACTIVE 0x2
#define NF_CT_EXPECT_USERSPACE 0x4
+#ifdef __KERNEL__
+#define NF_CT_EXPECT_MASK (NF_CT_EXPECT_PERMANENT | NF_CT_EXPECT_INACTIVE | \
+ NF_CT_EXPECT_USERSPACE)
+#endif
#endif /* _UAPI_NF_CONNTRACK_COMMON_H */
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index 6eb59e9f2aa8..e6393f17576b 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -3095,7 +3095,7 @@ int hci_abort_conn(struct hci_conn *conn, u8 reason)
* hci_connect_le serializes the connection attempts so only one
* connection can be in BT_CONNECT at time.
*/
- if (conn->state == BT_CONNECT && hdev->req_status == HCI_REQ_PEND) {
+ if (conn->state == BT_CONNECT && READ_ONCE(hdev->req_status) == HCI_REQ_PEND) {
switch (hci_skb_event(hdev->sent_cmd)) {
case HCI_EV_CONN_COMPLETE:
case HCI_EV_LE_CONN_COMPLETE:
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 31308c1de4ec..01f8ceeb1c0c 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -4126,7 +4126,7 @@ static int hci_send_cmd_sync(struct hci_dev *hdev, struct sk_buff *skb)
kfree_skb(skb);
}
- if (hdev->req_status == HCI_REQ_PEND &&
+ if (READ_ONCE(hdev->req_status) == HCI_REQ_PEND &&
!hci_dev_test_and_set_flag(hdev, HCI_CMD_PENDING)) {
kfree_skb(hdev->req_skb);
hdev->req_skb = skb_clone(hdev->sent_cmd, GFP_KERNEL);
diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c
index 3166914b0d6c..45d16639874a 100644
--- a/net/bluetooth/hci_sync.c
+++ b/net/bluetooth/hci_sync.c
@@ -25,11 +25,11 @@ static void hci_cmd_sync_complete(struct hci_dev *hdev, u8 result, u16 opcode,
{
bt_dev_dbg(hdev, "result 0x%2.2x", result);
- if (hdev->req_status != HCI_REQ_PEND)
+ if (READ_ONCE(hdev->req_status) != HCI_REQ_PEND)
return;
hdev->req_result = result;
- hdev->req_status = HCI_REQ_DONE;
+ WRITE_ONCE(hdev->req_status, HCI_REQ_DONE);
/* Free the request command so it is not used as response */
kfree_skb(hdev->req_skb);
@@ -167,20 +167,20 @@ struct sk_buff *__hci_cmd_sync_sk(struct hci_dev *hdev, u16 opcode, u32 plen,
hci_cmd_sync_add(&req, opcode, plen, param, event, sk);
- hdev->req_status = HCI_REQ_PEND;
+ WRITE_ONCE(hdev->req_status, HCI_REQ_PEND);
err = hci_req_sync_run(&req);
if (err < 0)
return ERR_PTR(err);
err = wait_event_interruptible_timeout(hdev->req_wait_q,
- hdev->req_status != HCI_REQ_PEND,
+ READ_ONCE(hdev->req_status) != HCI_REQ_PEND,
timeout);
if (err == -ERESTARTSYS)
return ERR_PTR(-EINTR);
- switch (hdev->req_status) {
+ switch (READ_ONCE(hdev->req_status)) {
case HCI_REQ_DONE:
err = -bt_to_errno(hdev->req_result);
break;
@@ -194,7 +194,7 @@ struct sk_buff *__hci_cmd_sync_sk(struct hci_dev *hdev, u16 opcode, u32 plen,
break;
}
- hdev->req_status = 0;
+ WRITE_ONCE(hdev->req_status, 0);
hdev->req_result = 0;
skb = hdev->req_rsp;
hdev->req_rsp = NULL;
@@ -665,9 +665,9 @@ void hci_cmd_sync_cancel(struct hci_dev *hdev, int err)
{
bt_dev_dbg(hdev, "err 0x%2.2x", err);
- if (hdev->req_status == HCI_REQ_PEND) {
+ if (READ_ONCE(hdev->req_status) == HCI_REQ_PEND) {
hdev->req_result = err;
- hdev->req_status = HCI_REQ_CANCELED;
+ WRITE_ONCE(hdev->req_status, HCI_REQ_CANCELED);
queue_work(hdev->workqueue, &hdev->cmd_sync_cancel_work);
}
@@ -683,12 +683,12 @@ void hci_cmd_sync_cancel_sync(struct hci_dev *hdev, int err)
{
bt_dev_dbg(hdev, "err 0x%2.2x", err);
- if (hdev->req_status == HCI_REQ_PEND) {
+ if (READ_ONCE(hdev->req_status) == HCI_REQ_PEND) {
/* req_result is __u32 so error must be positive to be properly
* propagated.
*/
hdev->req_result = err < 0 ? -err : err;
- hdev->req_status = HCI_REQ_CANCELED;
+ WRITE_ONCE(hdev->req_status, HCI_REQ_CANCELED);
wake_up_interruptible(&hdev->req_wait_q);
}
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index 5deb6c4f1e41..95c65fece39b 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -926,16 +926,39 @@ int l2cap_chan_check_security(struct l2cap_chan *chan, bool initiator)
static int l2cap_get_ident(struct l2cap_conn *conn)
{
+ u8 max;
+ int ident;
+
/* LE link does not support tools like l2ping so use the full range */
if (conn->hcon->type == LE_LINK)
- return ida_alloc_range(&conn->tx_ida, 1, 255, GFP_ATOMIC);
-
+ max = 255;
/* Get next available identificator.
* 1 - 128 are used by kernel.
* 129 - 199 are reserved.
* 200 - 254 are used by utilities like l2ping, etc.
*/
- return ida_alloc_range(&conn->tx_ida, 1, 128, GFP_ATOMIC);
+ else
+ max = 128;
+
+ /* Allocate ident using min as last used + 1 (cyclic) */
+ ident = ida_alloc_range(&conn->tx_ida, READ_ONCE(conn->tx_ident) + 1,
+ max, GFP_ATOMIC);
+ /* Force min 1 to start over */
+ if (ident <= 0) {
+ ident = ida_alloc_range(&conn->tx_ida, 1, max, GFP_ATOMIC);
+ if (ident <= 0) {
+ /* If all idents are in use, log an error, this is
+ * extremely unlikely to happen and would indicate a bug
+ * in the code that idents are not being freed properly.
+ */
+ BT_ERR("Unable to allocate ident: %d", ident);
+ return 0;
+ }
+ }
+
+ WRITE_ONCE(conn->tx_ident, ident);
+
+ return ident;
}
static void l2cap_send_acl(struct l2cap_conn *conn, struct sk_buff *skb,
@@ -1748,6 +1771,9 @@ static void l2cap_conn_del(struct hci_conn *hcon, int err)
BT_DBG("hcon %p conn %p, err %d", hcon, conn, err);
+ disable_delayed_work_sync(&conn->info_timer);
+ disable_delayed_work_sync(&conn->id_addr_timer);
+
mutex_lock(&conn->lock);
kfree_skb(conn->rx_skb);
@@ -1763,8 +1789,6 @@ static void l2cap_conn_del(struct hci_conn *hcon, int err)
ida_destroy(&conn->tx_ida);
- cancel_delayed_work_sync(&conn->id_addr_timer);
-
l2cap_unregister_all_users(conn);
/* Force the connection to be immediately dropped */
@@ -1783,9 +1807,6 @@ static void l2cap_conn_del(struct hci_conn *hcon, int err)
l2cap_chan_put(chan);
}
- if (conn->info_state & L2CAP_INFO_FEAT_MASK_REQ_SENT)
- cancel_delayed_work_sync(&conn->info_timer);
-
hci_chan_del(conn->hchan);
conn->hchan = NULL;
@@ -2377,6 +2398,9 @@ static int l2cap_segment_sdu(struct l2cap_chan *chan,
/* Remote device may have requested smaller PDUs */
pdu_len = min_t(size_t, pdu_len, chan->remote_mps);
+ if (!pdu_len)
+ return -EINVAL;
+
if (len <= pdu_len) {
sar = L2CAP_SAR_UNSEGMENTED;
sdu_len = 0;
@@ -4312,14 +4336,16 @@ static inline int l2cap_config_req(struct l2cap_conn *conn,
if (test_bit(CONF_INPUT_DONE, &chan->conf_state)) {
set_default_fcs(chan);
- if (chan->mode == L2CAP_MODE_ERTM ||
- chan->mode == L2CAP_MODE_STREAMING)
- err = l2cap_ertm_init(chan);
+ if (chan->state != BT_CONNECTED) {
+ if (chan->mode == L2CAP_MODE_ERTM ||
+ chan->mode == L2CAP_MODE_STREAMING)
+ err = l2cap_ertm_init(chan);
- if (err < 0)
- l2cap_send_disconn_req(chan, -err);
- else
- l2cap_chan_ready(chan);
+ if (err < 0)
+ l2cap_send_disconn_req(chan, -err);
+ else
+ l2cap_chan_ready(chan);
+ }
goto unlock;
}
@@ -5081,14 +5107,14 @@ static inline int l2cap_ecred_conn_req(struct l2cap_conn *conn,
cmd_len -= sizeof(*req);
num_scid = cmd_len / sizeof(u16);
- /* Always respond with the same number of scids as in the request */
- rsp_len = cmd_len;
-
if (num_scid > L2CAP_ECRED_MAX_CID) {
result = L2CAP_CR_LE_INVALID_PARAMS;
goto response;
}
+ /* Always respond with the same number of scids as in the request */
+ rsp_len = cmd_len;
+
mtu = __le16_to_cpu(req->mtu);
mps = __le16_to_cpu(req->mps);
@@ -6607,6 +6633,10 @@ static void l2cap_chan_le_send_credits(struct l2cap_chan *chan)
struct l2cap_le_credits pkt;
u16 return_credits = l2cap_le_rx_credits(chan);
+ if (chan->mode != L2CAP_MODE_LE_FLOWCTL &&
+ chan->mode != L2CAP_MODE_EXT_FLOWCTL)
+ return;
+
if (chan->rx_credits >= return_credits)
return;
@@ -6690,6 +6720,11 @@ static int l2cap_ecred_data_rcv(struct l2cap_chan *chan, struct sk_buff *skb)
if (!chan->sdu) {
u16 sdu_len;
+ if (!pskb_may_pull(skb, L2CAP_SDULEN_SIZE)) {
+ err = -EINVAL;
+ goto failed;
+ }
+
sdu_len = get_unaligned_le16(skb->data);
skb_pull(skb, L2CAP_SDULEN_SIZE);
diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c
index 597686790371..71e8c1b45bce 100644
--- a/net/bluetooth/l2cap_sock.c
+++ b/net/bluetooth/l2cap_sock.c
@@ -1698,6 +1698,9 @@ static void l2cap_sock_ready_cb(struct l2cap_chan *chan)
struct sock *sk = chan->data;
struct sock *parent;
+ if (!sk)
+ return;
+
lock_sock(sk);
parent = bt_sk(sk)->parent;
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index d52238ce6a9a..e5f9287fb826 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -5355,7 +5355,7 @@ static void mgmt_add_adv_patterns_monitor_complete(struct hci_dev *hdev,
* hci_adv_monitors_clear is about to be called which will take care of
* freeing the adv_monitor instances.
*/
- if (status == -ECANCELED && !mgmt_pending_valid(hdev, cmd))
+ if (status == -ECANCELED || !mgmt_pending_valid(hdev, cmd))
return;
monitor = cmd->user_data;
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index e7db50165879..584e059de20a 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -401,7 +401,7 @@ static void sco_recv_frame(struct sco_conn *conn, struct sk_buff *skb)
struct sock *sk;
sco_conn_lock(conn);
- sk = conn->sk;
+ sk = sco_sock_hold(conn);
sco_conn_unlock(conn);
if (!sk)
@@ -410,11 +410,15 @@ static void sco_recv_frame(struct sco_conn *conn, struct sk_buff *skb)
BT_DBG("sk %p len %u", sk, skb->len);
if (sk->sk_state != BT_CONNECTED)
- goto drop;
+ goto drop_put;
- if (!sock_queue_rcv_skb(sk, skb))
+ if (!sock_queue_rcv_skb(sk, skb)) {
+ sock_put(sk);
return;
+ }
+drop_put:
+ sock_put(sk);
drop:
kfree_skb(skb);
}
diff --git a/net/can/af_can.c b/net/can/af_can.c
index f70e2ba0aadc..7bc86b176b4d 100644
--- a/net/can/af_can.c
+++ b/net/can/af_can.c
@@ -469,7 +469,7 @@ int can_rx_register(struct net *net, struct net_device *dev, canid_t can_id,
rcv->can_id = can_id;
rcv->mask = mask;
- rcv->matches = 0;
+ atomic_long_set(&rcv->matches, 0);
rcv->func = func;
rcv->data = data;
rcv->ident = ident;
@@ -573,7 +573,7 @@ EXPORT_SYMBOL(can_rx_unregister);
static inline void deliver(struct sk_buff *skb, struct receiver *rcv)
{
rcv->func(skb, rcv->data);
- rcv->matches++;
+ atomic_long_inc(&rcv->matches);
}
static int can_rcv_filter(struct can_dev_rcv_lists *dev_rcv_lists, struct sk_buff *skb)
diff --git a/net/can/af_can.h b/net/can/af_can.h
index 22f3352c77fe..87887014f562 100644
--- a/net/can/af_can.h
+++ b/net/can/af_can.h
@@ -52,7 +52,7 @@ struct receiver {
struct hlist_node list;
canid_t can_id;
canid_t mask;
- unsigned long matches;
+ atomic_long_t matches;
void (*func)(struct sk_buff *skb, void *data);
void *data;
char *ident;
diff --git a/net/can/gw.c b/net/can/gw.c
index 8ee4d67a07d3..0ec99f68aa45 100644
--- a/net/can/gw.c
+++ b/net/can/gw.c
@@ -375,10 +375,10 @@ static void cgw_csum_crc8_rel(struct canfd_frame *cf,
return;
if (from <= to) {
- for (i = crc8->from_idx; i <= crc8->to_idx; i++)
+ for (i = from; i <= to; i++)
crc = crc8->crctab[crc ^ cf->data[i]];
} else {
- for (i = crc8->from_idx; i >= crc8->to_idx; i--)
+ for (i = from; i >= to; i--)
crc = crc8->crctab[crc ^ cf->data[i]];
}
@@ -397,7 +397,7 @@ static void cgw_csum_crc8_rel(struct canfd_frame *cf,
break;
}
- cf->data[crc8->result_idx] = crc ^ crc8->final_xor_val;
+ cf->data[res] = crc ^ crc8->final_xor_val;
}
static void cgw_csum_crc8_pos(struct canfd_frame *cf,
diff --git a/net/can/isotp.c b/net/can/isotp.c
index da3b72e7afcc..2770f43f4951 100644
--- a/net/can/isotp.c
+++ b/net/can/isotp.c
@@ -1248,12 +1248,6 @@ static int isotp_release(struct socket *sock)
so->ifindex = 0;
so->bound = 0;
- if (so->rx.buf != so->rx.sbuf)
- kfree(so->rx.buf);
-
- if (so->tx.buf != so->tx.sbuf)
- kfree(so->tx.buf);
-
sock_orphan(sk);
sock->sk = NULL;
@@ -1622,6 +1616,21 @@ static int isotp_notifier(struct notifier_block *nb, unsigned long msg,
return NOTIFY_DONE;
}
+static void isotp_sock_destruct(struct sock *sk)
+{
+ struct isotp_sock *so = isotp_sk(sk);
+
+ /* do the standard CAN sock destruct work */
+ can_sock_destruct(sk);
+
+ /* free potential extended PDU buffers */
+ if (so->rx.buf != so->rx.sbuf)
+ kfree(so->rx.buf);
+
+ if (so->tx.buf != so->tx.sbuf)
+ kfree(so->tx.buf);
+}
+
static int isotp_init(struct sock *sk)
{
struct isotp_sock *so = isotp_sk(sk);
@@ -1666,6 +1675,9 @@ static int isotp_init(struct sock *sk)
list_add_tail(&so->notifier, &isotp_notifier_list);
spin_unlock(&isotp_notifier_lock);
+ /* re-assign default can_sock_destruct() reference */
+ sk->sk_destruct = isotp_sock_destruct;
+
return 0;
}
diff --git a/net/can/proc.c b/net/can/proc.c
index 0938bf7dd646..de4d05ae3459 100644
--- a/net/can/proc.c
+++ b/net/can/proc.c
@@ -196,7 +196,8 @@ static void can_print_rcvlist(struct seq_file *m, struct hlist_head *rx_list,
" %-5s %03x %08x %pK %pK %8ld %s\n";
seq_printf(m, fmt, DNAME(dev), r->can_id, r->mask,
- r->func, r->data, r->matches, r->ident);
+ r->func, r->data, atomic_long_read(&r->matches),
+ r->ident);
}
}
diff --git a/net/core/dev.c b/net/core/dev.c
index 14a83f2035b9..fc5557062414 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3769,6 +3769,22 @@ static netdev_features_t dflt_features_check(struct sk_buff *skb,
return vlan_features_check(skb, features);
}
+static bool skb_gso_has_extension_hdr(const struct sk_buff *skb)
+{
+ if (!skb->encapsulation)
+ return ((skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6 ||
+ (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4 &&
+ vlan_get_protocol(skb) == htons(ETH_P_IPV6))) &&
+ skb_transport_header_was_set(skb) &&
+ skb_network_header_len(skb) != sizeof(struct ipv6hdr));
+ else
+ return (!skb_inner_network_header_was_set(skb) ||
+ ((skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6 ||
+ (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4 &&
+ inner_ip_hdr(skb)->version == 6)) &&
+ skb_inner_network_header_len(skb) != sizeof(struct ipv6hdr)));
+}
+
static netdev_features_t gso_features_check(const struct sk_buff *skb,
struct net_device *dev,
netdev_features_t features)
@@ -3816,11 +3832,7 @@ static netdev_features_t gso_features_check(const struct sk_buff *skb,
* so neither does TSO that depends on it.
*/
if (features & NETIF_F_IPV6_CSUM &&
- (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6 ||
- (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4 &&
- vlan_get_protocol(skb) == htons(ETH_P_IPV6))) &&
- skb_transport_header_was_set(skb) &&
- skb_network_header_len(skb) != sizeof(struct ipv6hdr))
+ skb_gso_has_extension_hdr(skb))
features &= ~(NETIF_F_IPV6_CSUM | NETIF_F_TSO6 | NETIF_F_GSO_UDP_L4);
return features;
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index dad4b1054955..fae8034efbff 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -629,6 +629,9 @@ int rtnl_link_register(struct rtnl_link_ops *ops)
unlock:
mutex_unlock(&link_ops_mutex);
+ if (err)
+ cleanup_srcu_struct(&ops->srcu);
+
return err;
}
EXPORT_SYMBOL_GPL(rtnl_link_register);
@@ -707,11 +710,14 @@ static size_t rtnl_link_get_slave_info_data_size(const struct net_device *dev)
goto out;
ops = master_dev->rtnl_link_ops;
- if (!ops || !ops->get_slave_size)
+ if (!ops)
+ goto out;
+ size += nla_total_size(strlen(ops->kind) + 1); /* IFLA_INFO_SLAVE_KIND */
+ if (!ops->get_slave_size)
goto out;
/* IFLA_INFO_SLAVE_DATA + nested data */
- size = nla_total_size(sizeof(struct nlattr)) +
- ops->get_slave_size(master_dev, dev);
+ size += nla_total_size(sizeof(struct nlattr)) +
+ ops->get_slave_size(master_dev, dev);
out:
rcu_read_unlock();
@@ -1267,6 +1273,21 @@ static size_t rtnl_dpll_pin_size(const struct net_device *dev)
return size;
}
+static size_t rtnl_dev_parent_size(const struct net_device *dev)
+{
+ size_t size = 0;
+
+ /* IFLA_PARENT_DEV_NAME */
+ if (dev->dev.parent)
+ size += nla_total_size(strlen(dev_name(dev->dev.parent)) + 1);
+
+ /* IFLA_PARENT_DEV_BUS_NAME */
+ if (dev->dev.parent && dev->dev.parent->bus)
+ size += nla_total_size(strlen(dev->dev.parent->bus->name) + 1);
+
+ return size;
+}
+
static noinline size_t if_nlmsg_size(const struct net_device *dev,
u32 ext_filter_mask)
{
@@ -1328,6 +1349,7 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev,
+ nla_total_size(8) /* IFLA_MAX_PACING_OFFLOAD_HORIZON */
+ nla_total_size(2) /* IFLA_HEADROOM */
+ nla_total_size(2) /* IFLA_TAILROOM */
+ + rtnl_dev_parent_size(dev)
+ 0;
if (!(ext_filter_mask & RTEXT_FILTER_SKIP_STATS))
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 2c922afadb8f..6dfc0bcdef65 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -235,10 +235,13 @@ static void esp_output_done(void *data, int err)
xfrm_dev_resume(skb);
} else {
if (!err &&
- x->encap && x->encap->encap_type == TCP_ENCAP_ESPINTCP)
- esp_output_tail_tcp(x, skb);
- else
+ x->encap && x->encap->encap_type == TCP_ENCAP_ESPINTCP) {
+ err = esp_output_tail_tcp(x, skb);
+ if (err != -EINPROGRESS)
+ kfree_skb(skb);
+ } else {
xfrm_output_resume(skb_to_full_sk(skb), skb, err);
+ }
}
}
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 5dfac6ce1110..e961936b6be7 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -154,20 +154,6 @@ bool inet_sk_get_local_port_range(const struct sock *sk, int *low, int *high)
}
EXPORT_SYMBOL(inet_sk_get_local_port_range);
-static bool inet_use_bhash2_on_bind(const struct sock *sk)
-{
-#if IS_ENABLED(CONFIG_IPV6)
- if (sk->sk_family == AF_INET6) {
- if (ipv6_addr_any(&sk->sk_v6_rcv_saddr))
- return false;
-
- if (!ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr))
- return true;
- }
-#endif
- return sk->sk_rcv_saddr != htonl(INADDR_ANY);
-}
-
static bool inet_bind_conflict(const struct sock *sk, struct sock *sk2,
kuid_t uid, bool relax,
bool reuseport_cb_ok, bool reuseport_ok)
@@ -259,7 +245,7 @@ static int inet_csk_bind_conflict(const struct sock *sk,
* checks separately because their spinlocks have to be acquired/released
* independently of each other, to prevent possible deadlocks
*/
- if (inet_use_bhash2_on_bind(sk))
+ if (inet_use_hash2_on_bind(sk))
return tb2 && inet_bhash2_conflict(sk, tb2, uid, relax,
reuseport_cb_ok, reuseport_ok);
@@ -376,7 +362,7 @@ other_parity_scan:
head = &hinfo->bhash[inet_bhashfn(net, port,
hinfo->bhash_size)];
spin_lock_bh(&head->lock);
- if (inet_use_bhash2_on_bind(sk)) {
+ if (inet_use_hash2_on_bind(sk)) {
if (inet_bhash2_addr_any_conflict(sk, port, l3mdev, relax, false))
goto next_port;
}
@@ -562,7 +548,7 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum)
check_bind_conflict = false;
}
- if (check_bind_conflict && inet_use_bhash2_on_bind(sk)) {
+ if (check_bind_conflict && inet_use_hash2_on_bind(sk)) {
if (inet_bhash2_addr_any_conflict(sk, port, l3mdev, true, true))
goto fail_unlock;
}
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index b60fad393e18..cb99a3c27053 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -287,7 +287,7 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum,
} else {
hslot = udp_hashslot(udptable, net, snum);
spin_lock_bh(&hslot->lock);
- if (hslot->count > 10) {
+ if (inet_use_hash2_on_bind(sk) && hslot->count > 10) {
int exist;
unsigned int slot2 = udp_sk(sk)->udp_portaddr_hash ^ snum;
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 0e55f139e05d..f4e23b543585 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -2862,7 +2862,7 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao)
fib6_add_gc_list(rt);
} else {
fib6_clean_expires(rt);
- fib6_remove_gc_list(rt);
+ fib6_may_remove_gc_list(net, rt);
}
spin_unlock_bh(&table->tb6_lock);
@@ -4840,7 +4840,7 @@ static int modify_prefix_route(struct net *net, struct inet6_ifaddr *ifp,
if (!(flags & RTF_EXPIRES)) {
fib6_clean_expires(f6i);
- fib6_remove_gc_list(f6i);
+ fib6_may_remove_gc_list(net, f6i);
} else {
fib6_set_expires(f6i, expires);
fib6_add_gc_list(f6i);
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index e75da98f5283..9f75313734f8 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -271,10 +271,13 @@ static void esp_output_done(void *data, int err)
xfrm_dev_resume(skb);
} else {
if (!err &&
- x->encap && x->encap->encap_type == TCP_ENCAP_ESPINTCP)
- esp_output_tail_tcp(x, skb);
- else
+ x->encap && x->encap->encap_type == TCP_ENCAP_ESPINTCP) {
+ err = esp_output_tail_tcp(x, skb);
+ if (err != -EINPROGRESS)
+ kfree_skb(skb);
+ } else {
xfrm_output_resume(skb_to_full_sk(skb), skb, err);
+ }
}
}
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 9058e71241dc..dd26657b6a4a 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -1133,7 +1133,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt,
return -EEXIST;
if (!(rt->fib6_flags & RTF_EXPIRES)) {
fib6_clean_expires(iter);
- fib6_remove_gc_list(iter);
+ fib6_may_remove_gc_list(info->nl_net, iter);
} else {
fib6_set_expires(iter, rt->expires);
fib6_add_gc_list(iter);
@@ -2348,6 +2348,17 @@ static void fib6_flush_trees(struct net *net)
/*
* Garbage collection
*/
+void fib6_age_exceptions(struct fib6_info *rt, struct fib6_gc_args *gc_args,
+ unsigned long now)
+{
+ bool may_expire = rt->fib6_flags & RTF_EXPIRES && rt->expires;
+ int old_more = gc_args->more;
+
+ rt6_age_exceptions(rt, gc_args, now);
+
+ if (!may_expire && old_more == gc_args->more)
+ fib6_remove_gc_list(rt);
+}
static int fib6_age(struct fib6_info *rt, struct fib6_gc_args *gc_args)
{
@@ -2370,7 +2381,7 @@ static int fib6_age(struct fib6_info *rt, struct fib6_gc_args *gc_args)
* Note, that clones are aged out
* only if they are not in use now.
*/
- rt6_age_exceptions(rt, gc_args, now);
+ fib6_age_exceptions(rt, gc_args, now);
return 0;
}
diff --git a/net/ipv6/netfilter/ip6t_rt.c b/net/ipv6/netfilter/ip6t_rt.c
index 4ad8b2032f1f..5561bd9cea81 100644
--- a/net/ipv6/netfilter/ip6t_rt.c
+++ b/net/ipv6/netfilter/ip6t_rt.c
@@ -157,6 +157,10 @@ static int rt_mt6_check(const struct xt_mtchk_param *par)
pr_debug("unknown flags %X\n", rtinfo->invflags);
return -EINVAL;
}
+ if (rtinfo->addrnr > IP6T_RT_HOPS) {
+ pr_debug("too many addresses specified\n");
+ return -EINVAL;
+ }
if ((rtinfo->flags & (IP6T_RT_RES | IP6T_RT_FST_MASK)) &&
(!(rtinfo->flags & IP6T_RT_TYP) ||
(rtinfo->rt_type != 0) ||
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 08cd86f49bf9..cb521700cee7 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1033,7 +1033,7 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
if (!addrconf_finite_timeout(lifetime)) {
fib6_clean_expires(rt);
- fib6_remove_gc_list(rt);
+ fib6_may_remove_gc_list(net, rt);
} else {
fib6_set_expires(rt, jiffies + HZ * lifetime);
fib6_add_gc_list(rt);
diff --git a/net/key/af_key.c b/net/key/af_key.c
index 0756bac62f7c..72ac2ace419d 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -3518,7 +3518,7 @@ static int set_sadb_kmaddress(struct sk_buff *skb, const struct xfrm_kmaddress *
static int set_ipsecrequest(struct sk_buff *skb,
uint8_t proto, uint8_t mode, int level,
- uint32_t reqid, uint8_t family,
+ uint32_t reqid, sa_family_t family,
const xfrm_address_t *src, const xfrm_address_t *dst)
{
struct sadb_x_ipsecrequest *rq;
@@ -3583,12 +3583,17 @@ static int pfkey_send_migrate(const struct xfrm_selector *sel, u8 dir, u8 type,
/* ipsecrequests */
for (i = 0, mp = m; i < num_bundles; i++, mp++) {
- /* old locator pair */
- size_pol += sizeof(struct sadb_x_ipsecrequest) +
- pfkey_sockaddr_pair_size(mp->old_family);
- /* new locator pair */
- size_pol += sizeof(struct sadb_x_ipsecrequest) +
- pfkey_sockaddr_pair_size(mp->new_family);
+ int pair_size;
+
+ pair_size = pfkey_sockaddr_pair_size(mp->old_family);
+ if (!pair_size)
+ return -EINVAL;
+ size_pol += sizeof(struct sadb_x_ipsecrequest) + pair_size;
+
+ pair_size = pfkey_sockaddr_pair_size(mp->new_family);
+ if (!pair_size)
+ return -EINVAL;
+ size_pol += sizeof(struct sadb_x_ipsecrequest) + pair_size;
}
size += sizeof(struct sadb_msg) + size_pol;
diff --git a/net/netfilter/nf_conntrack_broadcast.c b/net/netfilter/nf_conntrack_broadcast.c
index a7552a46d6ac..4f39bf7c843f 100644
--- a/net/netfilter/nf_conntrack_broadcast.c
+++ b/net/netfilter/nf_conntrack_broadcast.c
@@ -21,6 +21,7 @@ int nf_conntrack_broadcast_help(struct sk_buff *skb,
unsigned int timeout)
{
const struct nf_conntrack_helper *helper;
+ struct net *net = read_pnet(&ct->ct_net);
struct nf_conntrack_expect *exp;
struct iphdr *iph = ip_hdr(skb);
struct rtable *rt = skb_rtable(skb);
@@ -70,8 +71,11 @@ int nf_conntrack_broadcast_help(struct sk_buff *skb,
exp->expectfn = NULL;
exp->flags = NF_CT_EXPECT_PERMANENT;
exp->class = NF_CT_EXPECT_CLASS_DEFAULT;
- exp->helper = NULL;
-
+ rcu_assign_pointer(exp->helper, helper);
+ write_pnet(&exp->net, net);
+#ifdef CONFIG_NF_CONNTRACK_ZONES
+ exp->zone = ct->zone;
+#endif
nf_ct_expect_related(exp, 0);
nf_ct_expect_put(exp);
diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c
index 81baf2082604..9df159448b89 100644
--- a/net/netfilter/nf_conntrack_ecache.c
+++ b/net/netfilter/nf_conntrack_ecache.c
@@ -247,6 +247,8 @@ void nf_ct_expect_event_report(enum ip_conntrack_expect_events event,
struct nf_ct_event_notifier *notify;
struct nf_conntrack_ecache *e;
+ lockdep_nfct_expect_lock_held();
+
rcu_read_lock();
notify = rcu_dereference(net->ct.nf_conntrack_event_cb);
if (!notify)
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index cfc2daa3fc7f..24d0576d84b7 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -51,6 +51,7 @@ void nf_ct_unlink_expect_report(struct nf_conntrack_expect *exp,
struct net *net = nf_ct_exp_net(exp);
struct nf_conntrack_net *cnet;
+ lockdep_nfct_expect_lock_held();
WARN_ON(!master_help);
WARN_ON(timer_pending(&exp->timeout));
@@ -112,12 +113,14 @@ nf_ct_exp_equal(const struct nf_conntrack_tuple *tuple,
const struct net *net)
{
return nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask) &&
- net_eq(net, nf_ct_net(i->master)) &&
- nf_ct_zone_equal_any(i->master, zone);
+ net_eq(net, read_pnet(&i->net)) &&
+ nf_ct_exp_zone_equal_any(i, zone);
}
bool nf_ct_remove_expect(struct nf_conntrack_expect *exp)
{
+ lockdep_nfct_expect_lock_held();
+
if (timer_delete(&exp->timeout)) {
nf_ct_unlink_expect(exp);
nf_ct_expect_put(exp);
@@ -177,6 +180,8 @@ nf_ct_find_expectation(struct net *net,
struct nf_conntrack_expect *i, *exp = NULL;
unsigned int h;
+ lockdep_nfct_expect_lock_held();
+
if (!cnet->expect_count)
return NULL;
@@ -309,12 +314,20 @@ struct nf_conntrack_expect *nf_ct_expect_alloc(struct nf_conn *me)
}
EXPORT_SYMBOL_GPL(nf_ct_expect_alloc);
+/* This function can only be used from packet path, where accessing
+ * master's helper is safe, because the packet holds a reference on
+ * the conntrack object. Never use it from control plane.
+ */
void nf_ct_expect_init(struct nf_conntrack_expect *exp, unsigned int class,
u_int8_t family,
const union nf_inet_addr *saddr,
const union nf_inet_addr *daddr,
u_int8_t proto, const __be16 *src, const __be16 *dst)
{
+ struct nf_conntrack_helper *helper = NULL;
+ struct nf_conn *ct = exp->master;
+ struct net *net = read_pnet(&ct->ct_net);
+ struct nf_conn_help *help;
int len;
if (family == AF_INET)
@@ -325,7 +338,16 @@ void nf_ct_expect_init(struct nf_conntrack_expect *exp, unsigned int class,
exp->flags = 0;
exp->class = class;
exp->expectfn = NULL;
- exp->helper = NULL;
+
+ help = nfct_help(ct);
+ if (help)
+ helper = rcu_dereference(help->helper);
+
+ rcu_assign_pointer(exp->helper, helper);
+ write_pnet(&exp->net, net);
+#ifdef CONFIG_NF_CONNTRACK_ZONES
+ exp->zone = ct->zone;
+#endif
exp->tuple.src.l3num = family;
exp->tuple.dst.protonum = proto;
@@ -442,6 +464,8 @@ static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect,
unsigned int h;
int ret = 0;
+ lockdep_nfct_expect_lock_held();
+
if (!master_help) {
ret = -ESHUTDOWN;
goto out;
@@ -498,8 +522,9 @@ int nf_ct_expect_related_report(struct nf_conntrack_expect *expect,
nf_ct_expect_insert(expect);
- spin_unlock_bh(&nf_conntrack_expect_lock);
nf_ct_expect_event_report(IPEXP_NEW, expect, portid, report);
+ spin_unlock_bh(&nf_conntrack_expect_lock);
+
return 0;
out:
spin_unlock_bh(&nf_conntrack_expect_lock);
@@ -627,11 +652,15 @@ static int exp_seq_show(struct seq_file *s, void *v)
{
struct nf_conntrack_expect *expect;
struct nf_conntrack_helper *helper;
+ struct net *net = seq_file_net(s);
struct hlist_node *n = v;
char *delim = "";
expect = hlist_entry(n, struct nf_conntrack_expect, hnode);
+ if (!net_eq(nf_ct_exp_net(expect), net))
+ return 0;
+
if (expect->timeout.function)
seq_printf(s, "%ld ", timer_pending(&expect->timeout)
? (long)(expect->timeout.expires - jiffies)/HZ : 0);
@@ -654,7 +683,7 @@ static int exp_seq_show(struct seq_file *s, void *v)
if (expect->flags & NF_CT_EXPECT_USERSPACE)
seq_printf(s, "%sUSERSPACE", delim);
- helper = rcu_dereference(nfct_help(expect->master)->helper);
+ helper = rcu_dereference(expect->helper);
if (helper) {
seq_printf(s, "%s%s", expect->flags ? " " : "", helper->name);
if (helper->expect_policy[expect->class].name[0])
diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c
index a2a0e22ccee1..3f5c50455b71 100644
--- a/net/netfilter/nf_conntrack_h323_main.c
+++ b/net/netfilter/nf_conntrack_h323_main.c
@@ -643,7 +643,7 @@ static int expect_h245(struct sk_buff *skb, struct nf_conn *ct,
&ct->tuplehash[!dir].tuple.src.u3,
&ct->tuplehash[!dir].tuple.dst.u3,
IPPROTO_TCP, NULL, &port);
- exp->helper = &nf_conntrack_helper_h245;
+ rcu_assign_pointer(exp->helper, &nf_conntrack_helper_h245);
nathook = rcu_dereference(nfct_h323_nat_hook);
if (memcmp(&ct->tuplehash[dir].tuple.src.u3,
@@ -767,7 +767,7 @@ static int expect_callforwarding(struct sk_buff *skb,
nf_ct_expect_init(exp, NF_CT_EXPECT_CLASS_DEFAULT, nf_ct_l3num(ct),
&ct->tuplehash[!dir].tuple.src.u3, &addr,
IPPROTO_TCP, NULL, &port);
- exp->helper = nf_conntrack_helper_q931;
+ rcu_assign_pointer(exp->helper, nf_conntrack_helper_q931);
nathook = rcu_dereference(nfct_h323_nat_hook);
if (memcmp(&ct->tuplehash[dir].tuple.src.u3,
@@ -1234,7 +1234,7 @@ static int expect_q931(struct sk_buff *skb, struct nf_conn *ct,
&ct->tuplehash[!dir].tuple.src.u3 : NULL,
&ct->tuplehash[!dir].tuple.dst.u3,
IPPROTO_TCP, NULL, &port);
- exp->helper = nf_conntrack_helper_q931;
+ rcu_assign_pointer(exp->helper, nf_conntrack_helper_q931);
exp->flags = NF_CT_EXPECT_PERMANENT; /* Accept multiple calls */
nathook = rcu_dereference(nfct_h323_nat_hook);
@@ -1306,7 +1306,7 @@ static int process_gcf(struct sk_buff *skb, struct nf_conn *ct,
nf_ct_expect_init(exp, NF_CT_EXPECT_CLASS_DEFAULT, nf_ct_l3num(ct),
&ct->tuplehash[!dir].tuple.src.u3, &addr,
IPPROTO_UDP, NULL, &port);
- exp->helper = nf_conntrack_helper_ras;
+ rcu_assign_pointer(exp->helper, nf_conntrack_helper_ras);
if (nf_ct_expect_related(exp, 0) == 0) {
pr_debug("nf_ct_ras: expect RAS ");
@@ -1523,7 +1523,7 @@ static int process_acf(struct sk_buff *skb, struct nf_conn *ct,
&ct->tuplehash[!dir].tuple.src.u3, &addr,
IPPROTO_TCP, NULL, &port);
exp->flags = NF_CT_EXPECT_PERMANENT;
- exp->helper = nf_conntrack_helper_q931;
+ rcu_assign_pointer(exp->helper, nf_conntrack_helper_q931);
if (nf_ct_expect_related(exp, 0) == 0) {
pr_debug("nf_ct_ras: expect Q.931 ");
@@ -1577,7 +1577,7 @@ static int process_lcf(struct sk_buff *skb, struct nf_conn *ct,
&ct->tuplehash[!dir].tuple.src.u3, &addr,
IPPROTO_TCP, NULL, &port);
exp->flags = NF_CT_EXPECT_PERMANENT;
- exp->helper = nf_conntrack_helper_q931;
+ rcu_assign_pointer(exp->helper, nf_conntrack_helper_q931);
if (nf_ct_expect_related(exp, 0) == 0) {
pr_debug("nf_ct_ras: expect Q.931 ");
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index ceb48c3ca0a4..1b330ba6613b 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -395,14 +395,10 @@ EXPORT_SYMBOL_GPL(nf_conntrack_helper_register);
static bool expect_iter_me(struct nf_conntrack_expect *exp, void *data)
{
- struct nf_conn_help *help = nfct_help(exp->master);
const struct nf_conntrack_helper *me = data;
const struct nf_conntrack_helper *this;
- if (exp->helper == me)
- return true;
-
- this = rcu_dereference_protected(help->helper,
+ this = rcu_dereference_protected(exp->helper,
lockdep_is_held(&nf_conntrack_expect_lock));
return this == me;
}
@@ -421,6 +417,11 @@ void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me)
nf_ct_expect_iterate_destroy(expect_iter_me, NULL);
nf_ct_iterate_destroy(unhelp, me);
+
+ /* nf_ct_iterate_destroy() does an unconditional synchronize_rcu() as
+ * last step, this ensures rcu readers of exp->helper are done.
+ * No need for another synchronize_rcu() here.
+ */
}
EXPORT_SYMBOL_GPL(nf_conntrack_helper_unregister);
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index c156574e1273..3f408f3713bb 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -910,8 +910,8 @@ struct ctnetlink_filter {
};
static const struct nla_policy cta_filter_nla_policy[CTA_FILTER_MAX + 1] = {
- [CTA_FILTER_ORIG_FLAGS] = { .type = NLA_U32 },
- [CTA_FILTER_REPLY_FLAGS] = { .type = NLA_U32 },
+ [CTA_FILTER_ORIG_FLAGS] = NLA_POLICY_MASK(NLA_U32, CTA_FILTER_F_ALL),
+ [CTA_FILTER_REPLY_FLAGS] = NLA_POLICY_MASK(NLA_U32, CTA_FILTER_F_ALL),
};
static int ctnetlink_parse_filter(const struct nlattr *attr,
@@ -925,17 +925,11 @@ static int ctnetlink_parse_filter(const struct nlattr *attr,
if (ret)
return ret;
- if (tb[CTA_FILTER_ORIG_FLAGS]) {
+ if (tb[CTA_FILTER_ORIG_FLAGS])
filter->orig_flags = nla_get_u32(tb[CTA_FILTER_ORIG_FLAGS]);
- if (filter->orig_flags & ~CTA_FILTER_F_ALL)
- return -EOPNOTSUPP;
- }
- if (tb[CTA_FILTER_REPLY_FLAGS]) {
+ if (tb[CTA_FILTER_REPLY_FLAGS])
filter->reply_flags = nla_get_u32(tb[CTA_FILTER_REPLY_FLAGS]);
- if (filter->reply_flags & ~CTA_FILTER_F_ALL)
- return -EOPNOTSUPP;
- }
return 0;
}
@@ -2634,7 +2628,7 @@ static const struct nla_policy exp_nla_policy[CTA_EXPECT_MAX+1] = {
[CTA_EXPECT_HELP_NAME] = { .type = NLA_NUL_STRING,
.len = NF_CT_HELPER_NAME_LEN - 1 },
[CTA_EXPECT_ZONE] = { .type = NLA_U16 },
- [CTA_EXPECT_FLAGS] = { .type = NLA_U32 },
+ [CTA_EXPECT_FLAGS] = NLA_POLICY_MASK(NLA_BE32, NF_CT_EXPECT_MASK),
[CTA_EXPECT_CLASS] = { .type = NLA_U32 },
[CTA_EXPECT_NAT] = { .type = NLA_NESTED },
[CTA_EXPECT_FN] = { .type = NLA_NUL_STRING },
@@ -3012,7 +3006,7 @@ ctnetlink_exp_dump_expect(struct sk_buff *skb,
{
struct nf_conn *master = exp->master;
long timeout = ((long)exp->timeout.expires - (long)jiffies) / HZ;
- struct nf_conn_help *help;
+ struct nf_conntrack_helper *helper;
#if IS_ENABLED(CONFIG_NF_NAT)
struct nlattr *nest_parms;
struct nf_conntrack_tuple nat_tuple = {};
@@ -3057,15 +3051,12 @@ ctnetlink_exp_dump_expect(struct sk_buff *skb,
nla_put_be32(skb, CTA_EXPECT_FLAGS, htonl(exp->flags)) ||
nla_put_be32(skb, CTA_EXPECT_CLASS, htonl(exp->class)))
goto nla_put_failure;
- help = nfct_help(master);
- if (help) {
- struct nf_conntrack_helper *helper;
- helper = rcu_dereference(help->helper);
- if (helper &&
- nla_put_string(skb, CTA_EXPECT_HELP_NAME, helper->name))
- goto nla_put_failure;
- }
+ helper = rcu_dereference(exp->helper);
+ if (helper &&
+ nla_put_string(skb, CTA_EXPECT_HELP_NAME, helper->name))
+ goto nla_put_failure;
+
expfn = nf_ct_helper_expectfn_find_by_symbol(exp->expectfn);
if (expfn != NULL &&
nla_put_string(skb, CTA_EXPECT_FN, expfn->name))
@@ -3358,31 +3349,37 @@ static int ctnetlink_get_expect(struct sk_buff *skb,
if (err < 0)
return err;
+ skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!skb2)
+ return -ENOMEM;
+
+ spin_lock_bh(&nf_conntrack_expect_lock);
exp = nf_ct_expect_find_get(info->net, &zone, &tuple);
- if (!exp)
+ if (!exp) {
+ spin_unlock_bh(&nf_conntrack_expect_lock);
+ kfree_skb(skb2);
return -ENOENT;
+ }
if (cda[CTA_EXPECT_ID]) {
__be32 id = nla_get_be32(cda[CTA_EXPECT_ID]);
if (id != nf_expect_get_id(exp)) {
nf_ct_expect_put(exp);
+ spin_unlock_bh(&nf_conntrack_expect_lock);
+ kfree_skb(skb2);
return -ENOENT;
}
}
- skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
- if (!skb2) {
- nf_ct_expect_put(exp);
- return -ENOMEM;
- }
-
rcu_read_lock();
err = ctnetlink_exp_fill_info(skb2, NETLINK_CB(skb).portid,
info->nlh->nlmsg_seq, IPCTNL_MSG_EXP_NEW,
exp);
rcu_read_unlock();
nf_ct_expect_put(exp);
+ spin_unlock_bh(&nf_conntrack_expect_lock);
+
if (err <= 0) {
kfree_skb(skb2);
return -ENOMEM;
@@ -3394,12 +3391,9 @@ static int ctnetlink_get_expect(struct sk_buff *skb,
static bool expect_iter_name(struct nf_conntrack_expect *exp, void *data)
{
struct nf_conntrack_helper *helper;
- const struct nf_conn_help *m_help;
const char *name = data;
- m_help = nfct_help(exp->master);
-
- helper = rcu_dereference(m_help->helper);
+ helper = rcu_dereference(exp->helper);
if (!helper)
return false;
@@ -3432,22 +3426,26 @@ static int ctnetlink_del_expect(struct sk_buff *skb,
if (err < 0)
return err;
+ spin_lock_bh(&nf_conntrack_expect_lock);
+
/* bump usage count to 2 */
exp = nf_ct_expect_find_get(info->net, &zone, &tuple);
- if (!exp)
+ if (!exp) {
+ spin_unlock_bh(&nf_conntrack_expect_lock);
return -ENOENT;
+ }
if (cda[CTA_EXPECT_ID]) {
__be32 id = nla_get_be32(cda[CTA_EXPECT_ID]);
if (id != nf_expect_get_id(exp)) {
nf_ct_expect_put(exp);
+ spin_unlock_bh(&nf_conntrack_expect_lock);
return -ENOENT;
}
}
/* after list removal, usage count == 1 */
- spin_lock_bh(&nf_conntrack_expect_lock);
if (timer_delete(&exp->timeout)) {
nf_ct_unlink_expect_report(exp, NETLINK_CB(skb).portid,
nlmsg_report(info->nlh));
@@ -3534,9 +3532,10 @@ ctnetlink_alloc_expect(const struct nlattr * const cda[], struct nf_conn *ct,
struct nf_conntrack_tuple *tuple,
struct nf_conntrack_tuple *mask)
{
- u_int32_t class = 0;
+ struct net *net = read_pnet(&ct->ct_net);
struct nf_conntrack_expect *exp;
struct nf_conn_help *help;
+ u32 class = 0;
int err;
help = nfct_help(ct);
@@ -3573,7 +3572,13 @@ ctnetlink_alloc_expect(const struct nlattr * const cda[], struct nf_conn *ct,
exp->class = class;
exp->master = ct;
- exp->helper = helper;
+ write_pnet(&exp->net, net);
+#ifdef CONFIG_NF_CONNTRACK_ZONES
+ exp->zone = ct->zone;
+#endif
+ if (!helper)
+ helper = rcu_dereference(help->helper);
+ rcu_assign_pointer(exp->helper, helper);
exp->tuple = *tuple;
exp->mask.src.u3 = mask->src.u3;
exp->mask.src.u.all = mask->src.u.all;
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 0c1d086e96cb..b67426c2189b 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -1385,9 +1385,9 @@ nla_put_failure:
}
static const struct nla_policy tcp_nla_policy[CTA_PROTOINFO_TCP_MAX+1] = {
- [CTA_PROTOINFO_TCP_STATE] = { .type = NLA_U8 },
- [CTA_PROTOINFO_TCP_WSCALE_ORIGINAL] = { .type = NLA_U8 },
- [CTA_PROTOINFO_TCP_WSCALE_REPLY] = { .type = NLA_U8 },
+ [CTA_PROTOINFO_TCP_STATE] = NLA_POLICY_MAX(NLA_U8, TCP_CONNTRACK_SYN_SENT2),
+ [CTA_PROTOINFO_TCP_WSCALE_ORIGINAL] = NLA_POLICY_MAX(NLA_U8, TCP_MAX_WSCALE),
+ [CTA_PROTOINFO_TCP_WSCALE_REPLY] = NLA_POLICY_MAX(NLA_U8, TCP_MAX_WSCALE),
[CTA_PROTOINFO_TCP_FLAGS_ORIGINAL] = { .len = sizeof(struct nf_ct_tcp_flags) },
[CTA_PROTOINFO_TCP_FLAGS_REPLY] = { .len = sizeof(struct nf_ct_tcp_flags) },
};
@@ -1414,10 +1414,6 @@ static int nlattr_to_tcp(struct nlattr *cda[], struct nf_conn *ct)
if (err < 0)
return err;
- if (tb[CTA_PROTOINFO_TCP_STATE] &&
- nla_get_u8(tb[CTA_PROTOINFO_TCP_STATE]) >= TCP_CONNTRACK_MAX)
- return -EINVAL;
-
spin_lock_bh(&ct->lock);
if (tb[CTA_PROTOINFO_TCP_STATE])
ct->proto.tcp.state = nla_get_u8(tb[CTA_PROTOINFO_TCP_STATE]);
diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c
index 4ab5ef71d96d..939502ff7c87 100644
--- a/net/netfilter/nf_conntrack_sip.c
+++ b/net/netfilter/nf_conntrack_sip.c
@@ -924,7 +924,7 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb, unsigned int protoff,
exp = __nf_ct_expect_find(net, nf_ct_zone(ct), &tuple);
if (!exp || exp->master == ct ||
- nfct_help(exp->master)->helper != nfct_help(ct)->helper ||
+ exp->helper != nfct_help(ct)->helper ||
exp->class != class)
break;
#if IS_ENABLED(CONFIG_NF_NAT)
@@ -1040,6 +1040,7 @@ static int process_sdp(struct sk_buff *skb, unsigned int protoff,
unsigned int port;
const struct sdp_media_type *t;
int ret = NF_ACCEPT;
+ bool have_rtp_addr = false;
hooks = rcu_dereference(nf_nat_sip_hooks);
@@ -1056,8 +1057,11 @@ static int process_sdp(struct sk_buff *skb, unsigned int protoff,
caddr_len = 0;
if (ct_sip_parse_sdp_addr(ct, *dptr, sdpoff, *datalen,
SDP_HDR_CONNECTION, SDP_HDR_MEDIA,
- &matchoff, &matchlen, &caddr) > 0)
+ &matchoff, &matchlen, &caddr) > 0) {
caddr_len = matchlen;
+ memcpy(&rtp_addr, &caddr, sizeof(rtp_addr));
+ have_rtp_addr = true;
+ }
mediaoff = sdpoff;
for (i = 0; i < ARRAY_SIZE(sdp_media_types); ) {
@@ -1091,9 +1095,11 @@ static int process_sdp(struct sk_buff *skb, unsigned int protoff,
&matchoff, &matchlen, &maddr) > 0) {
maddr_len = matchlen;
memcpy(&rtp_addr, &maddr, sizeof(rtp_addr));
- } else if (caddr_len)
+ have_rtp_addr = true;
+ } else if (caddr_len) {
memcpy(&rtp_addr, &caddr, sizeof(rtp_addr));
- else {
+ have_rtp_addr = true;
+ } else {
nf_ct_helper_log(skb, ct, "cannot parse SDP message");
return NF_DROP;
}
@@ -1125,7 +1131,7 @@ static int process_sdp(struct sk_buff *skb, unsigned int protoff,
/* Update session connection and owner addresses */
hooks = rcu_dereference(nf_nat_sip_hooks);
- if (hooks && ct->status & IPS_NAT_MASK)
+ if (hooks && ct->status & IPS_NAT_MASK && have_rtp_addr)
ret = hooks->sdp_session(skb, protoff, dataoff,
dptr, datalen, sdpoff,
&rtp_addr);
@@ -1297,7 +1303,7 @@ static int process_register_request(struct sk_buff *skb, unsigned int protoff,
nf_ct_expect_init(exp, SIP_EXPECT_SIGNALLING, nf_ct_l3num(ct),
saddr, &daddr, proto, NULL, &port);
exp->timeout.expires = sip_timeout * HZ;
- exp->helper = helper;
+ rcu_assign_pointer(exp->helper, helper);
exp->flags = NF_CT_EXPECT_PERMANENT | NF_CT_EXPECT_INACTIVE;
hooks = rcu_dereference(nf_nat_sip_hooks);
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index b35a90955e2e..fcbe54940b2e 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -647,15 +647,11 @@ __build_packet_message(struct nfnl_log_net *log,
if (data_len) {
struct nlattr *nla;
- int size = nla_attr_size(data_len);
- if (skb_tailroom(inst->skb) < nla_total_size(data_len))
+ nla = nla_reserve(inst->skb, NFULA_PAYLOAD, data_len);
+ if (!nla)
goto nla_put_failure;
- nla = skb_put(inst->skb, nla_total_size(data_len));
- nla->nla_type = NFULA_PAYLOAD;
- nla->nla_len = size;
-
if (skb_copy_bits(skb, 0, nla_data(nla), data_len))
BUG();
}
diff --git a/net/netfilter/nft_set_pipapo_avx2.c b/net/netfilter/nft_set_pipapo_avx2.c
index 7ff90325c97f..6395982e4d95 100644
--- a/net/netfilter/nft_set_pipapo_avx2.c
+++ b/net/netfilter/nft_set_pipapo_avx2.c
@@ -242,7 +242,7 @@ static int nft_pipapo_avx2_lookup_4b_2(unsigned long *map, unsigned long *fill,
b = nft_pipapo_avx2_refill(i_ul, &map[i_ul], fill, f->mt, last);
if (last)
- return b;
+ ret = b;
if (unlikely(ret == -1))
ret = b / XSAVE_YMM_SIZE;
@@ -319,7 +319,7 @@ static int nft_pipapo_avx2_lookup_4b_4(unsigned long *map, unsigned long *fill,
b = nft_pipapo_avx2_refill(i_ul, &map[i_ul], fill, f->mt, last);
if (last)
- return b;
+ ret = b;
if (unlikely(ret == -1))
ret = b / XSAVE_YMM_SIZE;
@@ -414,7 +414,7 @@ static int nft_pipapo_avx2_lookup_4b_8(unsigned long *map, unsigned long *fill,
b = nft_pipapo_avx2_refill(i_ul, &map[i_ul], fill, f->mt, last);
if (last)
- return b;
+ ret = b;
if (unlikely(ret == -1))
ret = b / XSAVE_YMM_SIZE;
@@ -505,7 +505,7 @@ static int nft_pipapo_avx2_lookup_4b_12(unsigned long *map, unsigned long *fill,
b = nft_pipapo_avx2_refill(i_ul, &map[i_ul], fill, f->mt, last);
if (last)
- return b;
+ ret = b;
if (unlikely(ret == -1))
ret = b / XSAVE_YMM_SIZE;
@@ -641,7 +641,7 @@ static int nft_pipapo_avx2_lookup_4b_32(unsigned long *map, unsigned long *fill,
b = nft_pipapo_avx2_refill(i_ul, &map[i_ul], fill, f->mt, last);
if (last)
- return b;
+ ret = b;
if (unlikely(ret == -1))
ret = b / XSAVE_YMM_SIZE;
@@ -699,7 +699,7 @@ static int nft_pipapo_avx2_lookup_8b_1(unsigned long *map, unsigned long *fill,
b = nft_pipapo_avx2_refill(i_ul, &map[i_ul], fill, f->mt, last);
if (last)
- return b;
+ ret = b;
if (unlikely(ret == -1))
ret = b / XSAVE_YMM_SIZE;
@@ -764,7 +764,7 @@ static int nft_pipapo_avx2_lookup_8b_2(unsigned long *map, unsigned long *fill,
b = nft_pipapo_avx2_refill(i_ul, &map[i_ul], fill, f->mt, last);
if (last)
- return b;
+ ret = b;
if (unlikely(ret == -1))
ret = b / XSAVE_YMM_SIZE;
@@ -839,7 +839,7 @@ static int nft_pipapo_avx2_lookup_8b_4(unsigned long *map, unsigned long *fill,
b = nft_pipapo_avx2_refill(i_ul, &map[i_ul], fill, f->mt, last);
if (last)
- return b;
+ ret = b;
if (unlikely(ret == -1))
ret = b / XSAVE_YMM_SIZE;
@@ -925,7 +925,7 @@ static int nft_pipapo_avx2_lookup_8b_6(unsigned long *map, unsigned long *fill,
b = nft_pipapo_avx2_refill(i_ul, &map[i_ul], fill, f->mt, last);
if (last)
- return b;
+ ret = b;
if (unlikely(ret == -1))
ret = b / XSAVE_YMM_SIZE;
@@ -1019,7 +1019,7 @@ static int nft_pipapo_avx2_lookup_8b_16(unsigned long *map, unsigned long *fill,
b = nft_pipapo_avx2_refill(i_ul, &map[i_ul], fill, f->mt, last);
if (last)
- return b;
+ ret = b;
if (unlikely(ret == -1))
ret = b / XSAVE_YMM_SIZE;
diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c
index fe8bd497d74a..737c339decd0 100644
--- a/net/netfilter/nft_set_rbtree.c
+++ b/net/netfilter/nft_set_rbtree.c
@@ -572,14 +572,12 @@ static struct nft_array *nft_array_alloc(u32 max_intervals)
return array;
}
-#define NFT_ARRAY_EXTRA_SIZE 10240
-
/* Similar to nft_rbtree_{u,k}size to hide details to userspace, but consider
* packed representation coming from userspace for anonymous sets too.
*/
static u32 nft_array_elems(const struct nft_set *set)
{
- u32 nelems = atomic_read(&set->nelems);
+ u32 nelems = atomic_read(&set->nelems) - set->ndeact;
/* Adjacent intervals are represented with a single start element in
* anonymous sets, use the current element counter as is.
@@ -595,27 +593,87 @@ static u32 nft_array_elems(const struct nft_set *set)
return (nelems / 2) + 2;
}
-static int nft_array_may_resize(const struct nft_set *set)
+#define NFT_ARRAY_INITIAL_SIZE 1024
+#define NFT_ARRAY_INITIAL_ANON_SIZE 16
+#define NFT_ARRAY_INITIAL_ANON_THRESH (8192U / sizeof(struct nft_array_interval))
+
+static int nft_array_may_resize(const struct nft_set *set, bool flush)
{
- u32 nelems = nft_array_elems(set), new_max_intervals;
+ u32 initial_intervals, max_intervals, new_max_intervals, delta;
+ u32 shrinked_max_intervals, nelems = nft_array_elems(set);
struct nft_rbtree *priv = nft_set_priv(set);
struct nft_array *array;
- if (!priv->array_next) {
- array = nft_array_alloc(nelems + NFT_ARRAY_EXTRA_SIZE);
- if (!array)
- return -ENOMEM;
+ if (nft_set_is_anonymous(set))
+ initial_intervals = NFT_ARRAY_INITIAL_ANON_SIZE;
+ else
+ initial_intervals = NFT_ARRAY_INITIAL_SIZE;
+
+ if (priv->array_next) {
+ max_intervals = priv->array_next->max_intervals;
+ new_max_intervals = priv->array_next->max_intervals;
+ } else {
+ if (priv->array) {
+ max_intervals = priv->array->max_intervals;
+ new_max_intervals = priv->array->max_intervals;
+ } else {
+ max_intervals = 0;
+ new_max_intervals = initial_intervals;
+ }
+ }
- priv->array_next = array;
+ if (nft_set_is_anonymous(set))
+ goto maybe_grow;
+
+ if (flush) {
+ /* Set flush just started, nelems still report elements.*/
+ nelems = 0;
+ new_max_intervals = NFT_ARRAY_INITIAL_SIZE;
+ goto realloc_array;
}
- if (nelems < priv->array_next->max_intervals)
- return 0;
+ if (check_add_overflow(new_max_intervals, new_max_intervals,
+ &shrinked_max_intervals))
+ return -EOVERFLOW;
+
+ shrinked_max_intervals = DIV_ROUND_UP(shrinked_max_intervals, 3);
- new_max_intervals = priv->array_next->max_intervals + NFT_ARRAY_EXTRA_SIZE;
- if (nft_array_intervals_alloc(priv->array_next, new_max_intervals) < 0)
+ if (shrinked_max_intervals > NFT_ARRAY_INITIAL_SIZE &&
+ nelems < shrinked_max_intervals) {
+ new_max_intervals = shrinked_max_intervals;
+ goto realloc_array;
+ }
+maybe_grow:
+ if (nelems > new_max_intervals) {
+ if (nft_set_is_anonymous(set) &&
+ new_max_intervals < NFT_ARRAY_INITIAL_ANON_THRESH) {
+ new_max_intervals <<= 1;
+ } else {
+ delta = new_max_intervals >> 1;
+ if (check_add_overflow(new_max_intervals, delta,
+ &new_max_intervals))
+ return -EOVERFLOW;
+ }
+ }
+
+realloc_array:
+ if (WARN_ON_ONCE(nelems > new_max_intervals))
return -ENOMEM;
+ if (priv->array_next) {
+ if (max_intervals == new_max_intervals)
+ return 0;
+
+ if (nft_array_intervals_alloc(priv->array_next, new_max_intervals) < 0)
+ return -ENOMEM;
+ } else {
+ array = nft_array_alloc(new_max_intervals);
+ if (!array)
+ return -ENOMEM;
+
+ priv->array_next = array;
+ }
+
return 0;
}
@@ -630,7 +688,7 @@ static int nft_rbtree_insert(const struct net *net, const struct nft_set *set,
nft_rbtree_maybe_reset_start_cookie(priv, tstamp);
- if (nft_array_may_resize(set) < 0)
+ if (nft_array_may_resize(set, false) < 0)
return -ENOMEM;
do {
@@ -741,7 +799,7 @@ nft_rbtree_deactivate(const struct net *net, const struct nft_set *set,
nft_rbtree_interval_null(set, this))
priv->start_rbe_cookie = 0;
- if (nft_array_may_resize(set) < 0)
+ if (nft_array_may_resize(set, false) < 0)
return NULL;
while (parent != NULL) {
@@ -811,7 +869,7 @@ static void nft_rbtree_walk(const struct nft_ctx *ctx,
switch (iter->type) {
case NFT_ITER_UPDATE_CLONE:
- if (nft_array_may_resize(set) < 0) {
+ if (nft_array_may_resize(set, true) < 0) {
iter->err = -ENOMEM;
break;
}
diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c
index 43d871525dbc..5f46c4b5720f 100644
--- a/net/nfc/nci/core.c
+++ b/net/nfc/nci/core.c
@@ -579,8 +579,7 @@ static int nci_close_device(struct nci_dev *ndev)
skb_queue_purge(&ndev->rx_q);
skb_queue_purge(&ndev->tx_q);
- /* Flush RX and TX wq */
- flush_workqueue(ndev->rx_wq);
+ /* Flush TX wq, RX wq flush can't be under the lock */
flush_workqueue(ndev->tx_wq);
/* Reset device */
@@ -592,13 +591,13 @@ static int nci_close_device(struct nci_dev *ndev)
msecs_to_jiffies(NCI_RESET_TIMEOUT));
/* After this point our queues are empty
- * and no works are scheduled.
+ * rx work may be running but will see that NCI_UP was cleared
*/
ndev->ops->close(ndev);
clear_bit(NCI_INIT, &ndev->flags);
- /* Flush cmd wq */
+ /* Flush cmd and tx wq */
flush_workqueue(ndev->cmd_wq);
timer_delete_sync(&ndev->cmd_timer);
@@ -613,6 +612,9 @@ static int nci_close_device(struct nci_dev *ndev)
mutex_unlock(&ndev->req_lock);
+ /* rx_work may take req_lock via nci_deactivate_target */
+ flush_workqueue(ndev->rx_wq);
+
return 0;
}
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index 67fbf6e48a30..13052408a132 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -2953,6 +2953,8 @@ static int validate_set(const struct nlattr *a,
case OVS_KEY_ATTR_MPLS:
if (!eth_p_mpls(eth_type))
return -EINVAL;
+ if (key_len != sizeof(struct ovs_key_mpls))
+ return -EINVAL;
break;
case OVS_KEY_ATTR_SCTP:
diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c
index 6574f9bcdc02..12055af832dc 100644
--- a/net/openvswitch/vport-netdev.c
+++ b/net/openvswitch/vport-netdev.c
@@ -151,11 +151,15 @@ static void vport_netdev_free(struct rcu_head *rcu)
void ovs_netdev_detach_dev(struct vport *vport)
{
ASSERT_RTNL();
- vport->dev->priv_flags &= ~IFF_OVS_DATAPATH;
netdev_rx_handler_unregister(vport->dev);
netdev_upper_dev_unlink(vport->dev,
netdev_master_upper_dev_get(vport->dev));
dev_set_promiscuity(vport->dev, -1);
+
+ /* paired with smp_mb() in netdev_destroy() */
+ smp_wmb();
+
+ vport->dev->priv_flags &= ~IFF_OVS_DATAPATH;
}
static void netdev_destroy(struct vport *vport)
@@ -174,6 +178,9 @@ static void netdev_destroy(struct vport *vport)
rtnl_unlock();
}
+ /* paired with smp_wmb() in ovs_netdev_detach_dev() */
+ smp_mb();
+
call_rcu(&vport->rcu, vport_netdev_free);
}
@@ -189,8 +196,6 @@ void ovs_netdev_tunnel_destroy(struct vport *vport)
*/
if (vport->dev->reg_state == NETREG_REGISTERED)
rtnl_delete_link(vport->dev, 0, NULL);
- netdev_put(vport->dev, &vport->dev_tracker);
- vport->dev = NULL;
rtnl_unlock();
call_rcu(&vport->rcu, vport_netdev_free);
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 72d0935139f0..bb2d88205e5a 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -3135,6 +3135,7 @@ static int packet_release(struct socket *sock)
spin_lock(&po->bind_lock);
unregister_prot_hook(sk, false);
+ WRITE_ONCE(po->num, 0);
packet_cached_dev_reset(po);
if (po->prot_hook.dev) {
diff --git a/net/smc/smc_rx.c b/net/smc/smc_rx.c
index d833e36f7fd4..c1d9b923938d 100644
--- a/net/smc/smc_rx.c
+++ b/net/smc/smc_rx.c
@@ -135,9 +135,16 @@ out:
sock_put(sk);
}
+static bool smc_rx_pipe_buf_get(struct pipe_inode_info *pipe,
+ struct pipe_buffer *buf)
+{
+ /* smc_spd_priv in buf->private is not shareable; disallow cloning. */
+ return false;
+}
+
static const struct pipe_buf_operations smc_pipe_ops = {
.release = smc_rx_pipe_buf_release,
- .get = generic_pipe_buf_get
+ .get = smc_rx_pipe_buf_get,
};
static void smc_rx_spd_release(struct splice_pipe_desc *spd,
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 5fe07f110fe8..dd9dda759bbb 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -246,6 +246,7 @@ static int tls_decrypt_async_wait(struct tls_sw_context_rx *ctx)
crypto_wait_req(-EINPROGRESS, &ctx->async_wait);
atomic_inc(&ctx->decrypt_pending);
+ __skb_queue_purge(&ctx->async_hold);
return ctx->async_wait.err;
}
@@ -2225,7 +2226,6 @@ recv_end:
/* Wait for all previously submitted records to be decrypted */
ret = tls_decrypt_async_wait(ctx);
- __skb_queue_purge(&ctx->async_hold);
if (ret) {
if (err >= 0 || err == -EINPROGRESS)
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index 4ed346e682c7..dc1312ed5a09 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -75,7 +75,10 @@ int xfrm_input_unregister_afinfo(const struct xfrm_input_afinfo *afinfo)
spin_lock_bh(&xfrm_input_afinfo_lock);
if (likely(xfrm_input_afinfo[afinfo->is_ipip][afinfo->family])) {
- if (unlikely(xfrm_input_afinfo[afinfo->is_ipip][afinfo->family] != afinfo))
+ const struct xfrm_input_afinfo *cur;
+
+ cur = rcu_access_pointer(xfrm_input_afinfo[afinfo->is_ipip][afinfo->family]);
+ if (unlikely(cur != afinfo))
err = -EINVAL;
else
RCU_INIT_POINTER(xfrm_input_afinfo[afinfo->is_ipip][afinfo->family], NULL);
diff --git a/net/xfrm/xfrm_iptfs.c b/net/xfrm/xfrm_iptfs.c
index 050a82101ca5..97bc979e55ba 100644
--- a/net/xfrm/xfrm_iptfs.c
+++ b/net/xfrm/xfrm_iptfs.c
@@ -901,6 +901,12 @@ static u32 iptfs_reassem_cont(struct xfrm_iptfs_data *xtfs, u64 seq,
iptfs_skb_can_add_frags(newskb, fragwalk, data, copylen)) {
iptfs_skb_add_frags(newskb, fragwalk, data, copylen);
} else {
+ if (skb_linearize(newskb)) {
+ XFRM_INC_STATS(xs_net(xtfs->x),
+ LINUX_MIB_XFRMINBUFFERERROR);
+ goto abandon;
+ }
+
/* copy fragment data into newskb */
if (skb_copy_seq_read(st, data, skb_put(newskb, copylen),
copylen)) {
@@ -991,6 +997,11 @@ static bool __input_process_payload(struct xfrm_state *x, u32 data,
iplen = be16_to_cpu(iph->tot_len);
iphlen = iph->ihl << 2;
+ if (iplen < iphlen || iphlen < sizeof(*iph)) {
+ XFRM_INC_STATS(net,
+ LINUX_MIB_XFRMINHDRERROR);
+ goto done;
+ }
protocol = cpu_to_be16(ETH_P_IP);
XFRM_MODE_SKB_CB(skbseq->root_skb)->tos = iph->tos;
} else if (iph->version == 0x6) {
@@ -2653,9 +2664,6 @@ static int iptfs_clone_state(struct xfrm_state *x, struct xfrm_state *orig)
if (!xtfs)
return -ENOMEM;
- x->mode_data = xtfs;
- xtfs->x = x;
-
xtfs->ra_newskb = NULL;
if (xtfs->cfg.reorder_win_size) {
xtfs->w_saved = kzalloc_objs(*xtfs->w_saved,
@@ -2666,6 +2674,9 @@ static int iptfs_clone_state(struct xfrm_state *x, struct xfrm_state *orig)
}
}
+ x->mode_data = xtfs;
+ xtfs->x = x;
+
return 0;
}
diff --git a/net/xfrm/xfrm_nat_keepalive.c b/net/xfrm/xfrm_nat_keepalive.c
index ebf95d48e86c..1856beee0149 100644
--- a/net/xfrm/xfrm_nat_keepalive.c
+++ b/net/xfrm/xfrm_nat_keepalive.c
@@ -261,7 +261,7 @@ int __net_init xfrm_nat_keepalive_net_init(struct net *net)
int xfrm_nat_keepalive_net_fini(struct net *net)
{
- cancel_delayed_work_sync(&net->xfrm.nat_keepalive_work);
+ disable_delayed_work_sync(&net->xfrm.nat_keepalive_work);
return 0;
}
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 9e1a522ab1c5..362939aa56cf 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -4156,7 +4156,7 @@ void xfrm_policy_unregister_afinfo(const struct xfrm_policy_afinfo *afinfo)
int i;
for (i = 0; i < ARRAY_SIZE(xfrm_policy_afinfo); i++) {
- if (xfrm_policy_afinfo[i] != afinfo)
+ if (rcu_access_pointer(xfrm_policy_afinfo[i]) != afinfo)
continue;
RCU_INIT_POINTER(xfrm_policy_afinfo[i], NULL);
break;
@@ -4242,7 +4242,7 @@ static int __net_init xfrm_policy_init(struct net *net)
net->xfrm.policy_count[XFRM_POLICY_MAX + dir] = 0;
htab = &net->xfrm.policy_bydst[dir];
- htab->table = xfrm_hash_alloc(sz);
+ rcu_assign_pointer(htab->table, xfrm_hash_alloc(sz));
if (!htab->table)
goto out_bydst;
htab->hmask = hmask;
@@ -4269,7 +4269,7 @@ out_bydst:
struct xfrm_policy_hash *htab;
htab = &net->xfrm.policy_bydst[dir];
- xfrm_hash_free(htab->table, sz);
+ xfrm_hash_free(rcu_dereference_protected(htab->table, true), sz);
}
xfrm_hash_free(net->xfrm.policy_byidx, sz);
out_byidx:
@@ -4282,6 +4282,8 @@ static void xfrm_policy_fini(struct net *net)
unsigned int sz;
int dir;
+ disable_work_sync(&net->xfrm.policy_hthresh.work);
+
flush_work(&net->xfrm.policy_hash_work);
#ifdef CONFIG_XFRM_SUB_POLICY
xfrm_policy_flush(net, XFRM_POLICY_TYPE_SUB, false);
@@ -4295,8 +4297,8 @@ static void xfrm_policy_fini(struct net *net)
htab = &net->xfrm.policy_bydst[dir];
sz = (htab->hmask + 1) * sizeof(struct hlist_head);
- WARN_ON(!hlist_empty(htab->table));
- xfrm_hash_free(htab->table, sz);
+ WARN_ON(!hlist_empty(rcu_dereference_protected(htab->table, true)));
+ xfrm_hash_free(rcu_dereference_protected(htab->table, true), sz);
}
sz = (net->xfrm.policy_idx_hmask + 1) * sizeof(struct hlist_head);
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 98b362d51836..1748d374abca 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -53,7 +53,7 @@ static DECLARE_WORK(xfrm_state_gc_work, xfrm_state_gc_task);
static HLIST_HEAD(xfrm_state_gc_list);
static HLIST_HEAD(xfrm_state_dev_gc_list);
-static inline bool xfrm_state_hold_rcu(struct xfrm_state __rcu *x)
+static inline bool xfrm_state_hold_rcu(struct xfrm_state *x)
{
return refcount_inc_not_zero(&x->refcnt);
}
@@ -870,7 +870,7 @@ xfrm_state_flush_secctx_check(struct net *net, u8 proto, bool task_valid)
for (i = 0; i <= net->xfrm.state_hmask; i++) {
struct xfrm_state *x;
- hlist_for_each_entry(x, net->xfrm.state_bydst+i, bydst) {
+ hlist_for_each_entry(x, xfrm_state_deref_prot(net->xfrm.state_bydst, net) + i, bydst) {
if (xfrm_id_proto_match(x->id.proto, proto) &&
(err = security_xfrm_state_delete(x)) != 0) {
xfrm_audit_state_delete(x, 0, task_valid);
@@ -891,7 +891,7 @@ xfrm_dev_state_flush_secctx_check(struct net *net, struct net_device *dev, bool
struct xfrm_state *x;
struct xfrm_dev_offload *xso;
- hlist_for_each_entry(x, net->xfrm.state_bydst+i, bydst) {
+ hlist_for_each_entry(x, xfrm_state_deref_prot(net->xfrm.state_bydst, net) + i, bydst) {
xso = &x->xso;
if (xso->dev == dev &&
@@ -931,7 +931,7 @@ int xfrm_state_flush(struct net *net, u8 proto, bool task_valid)
for (i = 0; i <= net->xfrm.state_hmask; i++) {
struct xfrm_state *x;
restart:
- hlist_for_each_entry(x, net->xfrm.state_bydst+i, bydst) {
+ hlist_for_each_entry(x, xfrm_state_deref_prot(net->xfrm.state_bydst, net) + i, bydst) {
if (!xfrm_state_kern(x) &&
xfrm_id_proto_match(x->id.proto, proto)) {
xfrm_state_hold(x);
@@ -973,7 +973,7 @@ int xfrm_dev_state_flush(struct net *net, struct net_device *dev, bool task_vali
err = -ESRCH;
for (i = 0; i <= net->xfrm.state_hmask; i++) {
restart:
- hlist_for_each_entry(x, net->xfrm.state_bydst+i, bydst) {
+ hlist_for_each_entry(x, xfrm_state_deref_prot(net->xfrm.state_bydst, net) + i, bydst) {
xso = &x->xso;
if (!xfrm_state_kern(x) && xso->dev == dev) {
@@ -1563,23 +1563,23 @@ found:
list_add(&x->km.all, &net->xfrm.state_all);
h = xfrm_dst_hash(net, daddr, saddr, tmpl->reqid, encap_family);
XFRM_STATE_INSERT(bydst, &x->bydst,
- net->xfrm.state_bydst + h,
+ xfrm_state_deref_prot(net->xfrm.state_bydst, net) + h,
x->xso.type);
h = xfrm_src_hash(net, daddr, saddr, encap_family);
XFRM_STATE_INSERT(bysrc, &x->bysrc,
- net->xfrm.state_bysrc + h,
+ xfrm_state_deref_prot(net->xfrm.state_bysrc, net) + h,
x->xso.type);
INIT_HLIST_NODE(&x->state_cache);
if (x->id.spi) {
h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto, encap_family);
XFRM_STATE_INSERT(byspi, &x->byspi,
- net->xfrm.state_byspi + h,
+ xfrm_state_deref_prot(net->xfrm.state_byspi, net) + h,
x->xso.type);
}
if (x->km.seq) {
h = xfrm_seq_hash(net, x->km.seq);
XFRM_STATE_INSERT(byseq, &x->byseq,
- net->xfrm.state_byseq + h,
+ xfrm_state_deref_prot(net->xfrm.state_byseq, net) + h,
x->xso.type);
}
x->lft.hard_add_expires_seconds = net->xfrm.sysctl_acq_expires;
@@ -1652,7 +1652,7 @@ xfrm_stateonly_find(struct net *net, u32 mark, u32 if_id,
spin_lock_bh(&net->xfrm.xfrm_state_lock);
h = xfrm_dst_hash(net, daddr, saddr, reqid, family);
- hlist_for_each_entry(x, net->xfrm.state_bydst+h, bydst) {
+ hlist_for_each_entry(x, xfrm_state_deref_prot(net->xfrm.state_bydst, net) + h, bydst) {
if (x->props.family == family &&
x->props.reqid == reqid &&
(mark & x->mark.m) == x->mark.v &&
@@ -1703,18 +1703,12 @@ static struct xfrm_state *xfrm_state_lookup_spi_proto(struct net *net, __be32 sp
struct xfrm_state *x;
unsigned int i;
- rcu_read_lock();
for (i = 0; i <= net->xfrm.state_hmask; i++) {
- hlist_for_each_entry_rcu(x, &net->xfrm.state_byspi[i], byspi) {
- if (x->id.spi == spi && x->id.proto == proto) {
- if (!xfrm_state_hold_rcu(x))
- continue;
- rcu_read_unlock();
+ hlist_for_each_entry(x, xfrm_state_deref_prot(net->xfrm.state_byspi, net) + i, byspi) {
+ if (x->id.spi == spi && x->id.proto == proto)
return x;
- }
}
}
- rcu_read_unlock();
return NULL;
}
@@ -1730,25 +1724,29 @@ static void __xfrm_state_insert(struct xfrm_state *x)
h = xfrm_dst_hash(net, &x->id.daddr, &x->props.saddr,
x->props.reqid, x->props.family);
- XFRM_STATE_INSERT(bydst, &x->bydst, net->xfrm.state_bydst + h,
+ XFRM_STATE_INSERT(bydst, &x->bydst,
+ xfrm_state_deref_prot(net->xfrm.state_bydst, net) + h,
x->xso.type);
h = xfrm_src_hash(net, &x->id.daddr, &x->props.saddr, x->props.family);
- XFRM_STATE_INSERT(bysrc, &x->bysrc, net->xfrm.state_bysrc + h,
+ XFRM_STATE_INSERT(bysrc, &x->bysrc,
+ xfrm_state_deref_prot(net->xfrm.state_bysrc, net) + h,
x->xso.type);
if (x->id.spi) {
h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto,
x->props.family);
- XFRM_STATE_INSERT(byspi, &x->byspi, net->xfrm.state_byspi + h,
+ XFRM_STATE_INSERT(byspi, &x->byspi,
+ xfrm_state_deref_prot(net->xfrm.state_byspi, net) + h,
x->xso.type);
}
if (x->km.seq) {
h = xfrm_seq_hash(net, x->km.seq);
- XFRM_STATE_INSERT(byseq, &x->byseq, net->xfrm.state_byseq + h,
+ XFRM_STATE_INSERT(byseq, &x->byseq,
+ xfrm_state_deref_prot(net->xfrm.state_byseq, net) + h,
x->xso.type);
}
@@ -1775,7 +1773,7 @@ static void __xfrm_state_bump_genids(struct xfrm_state *xnew)
u32 cpu_id = xnew->pcpu_num;
h = xfrm_dst_hash(net, &xnew->id.daddr, &xnew->props.saddr, reqid, family);
- hlist_for_each_entry(x, net->xfrm.state_bydst+h, bydst) {
+ hlist_for_each_entry(x, xfrm_state_deref_prot(net->xfrm.state_bydst, net) + h, bydst) {
if (x->props.family == family &&
x->props.reqid == reqid &&
x->if_id == if_id &&
@@ -1811,7 +1809,7 @@ static struct xfrm_state *__find_acq_core(struct net *net,
struct xfrm_state *x;
u32 mark = m->v & m->m;
- hlist_for_each_entry(x, net->xfrm.state_bydst+h, bydst) {
+ hlist_for_each_entry(x, xfrm_state_deref_prot(net->xfrm.state_bydst, net) + h, bydst) {
if (x->props.reqid != reqid ||
x->props.mode != mode ||
x->props.family != family ||
@@ -1868,10 +1866,12 @@ static struct xfrm_state *__find_acq_core(struct net *net,
ktime_set(net->xfrm.sysctl_acq_expires, 0),
HRTIMER_MODE_REL_SOFT);
list_add(&x->km.all, &net->xfrm.state_all);
- XFRM_STATE_INSERT(bydst, &x->bydst, net->xfrm.state_bydst + h,
+ XFRM_STATE_INSERT(bydst, &x->bydst,
+ xfrm_state_deref_prot(net->xfrm.state_bydst, net) + h,
x->xso.type);
h = xfrm_src_hash(net, daddr, saddr, family);
- XFRM_STATE_INSERT(bysrc, &x->bysrc, net->xfrm.state_bysrc + h,
+ XFRM_STATE_INSERT(bysrc, &x->bysrc,
+ xfrm_state_deref_prot(net->xfrm.state_bysrc, net) + h,
x->xso.type);
net->xfrm.state_num++;
@@ -2091,7 +2091,7 @@ struct xfrm_state *xfrm_migrate_state_find(struct xfrm_migrate *m, struct net *n
if (m->reqid) {
h = xfrm_dst_hash(net, &m->old_daddr, &m->old_saddr,
m->reqid, m->old_family);
- hlist_for_each_entry(x, net->xfrm.state_bydst+h, bydst) {
+ hlist_for_each_entry(x, xfrm_state_deref_prot(net->xfrm.state_bydst, net) + h, bydst) {
if (x->props.mode != m->mode ||
x->id.proto != m->proto)
continue;
@@ -2110,7 +2110,7 @@ struct xfrm_state *xfrm_migrate_state_find(struct xfrm_migrate *m, struct net *n
} else {
h = xfrm_src_hash(net, &m->old_daddr, &m->old_saddr,
m->old_family);
- hlist_for_each_entry(x, net->xfrm.state_bysrc+h, bysrc) {
+ hlist_for_each_entry(x, xfrm_state_deref_prot(net->xfrm.state_bysrc, net) + h, bysrc) {
if (x->props.mode != m->mode ||
x->id.proto != m->proto)
continue;
@@ -2264,6 +2264,7 @@ out:
err = 0;
x->km.state = XFRM_STATE_DEAD;
+ xfrm_dev_state_delete(x);
__xfrm_state_put(x);
}
@@ -2312,7 +2313,7 @@ void xfrm_state_update_stats(struct net *net)
spin_lock_bh(&net->xfrm.xfrm_state_lock);
for (i = 0; i <= net->xfrm.state_hmask; i++) {
- hlist_for_each_entry(x, net->xfrm.state_bydst + i, bydst)
+ hlist_for_each_entry(x, xfrm_state_deref_prot(net->xfrm.state_bydst, net) + i, bydst)
xfrm_dev_state_update_stats(x);
}
spin_unlock_bh(&net->xfrm.xfrm_state_lock);
@@ -2503,7 +2504,7 @@ static struct xfrm_state *__xfrm_find_acq_byseq(struct net *net, u32 mark, u32 s
unsigned int h = xfrm_seq_hash(net, seq);
struct xfrm_state *x;
- hlist_for_each_entry_rcu(x, net->xfrm.state_byseq + h, byseq) {
+ hlist_for_each_entry(x, xfrm_state_deref_prot(net->xfrm.state_byseq, net) + h, byseq) {
if (x->km.seq == seq &&
(mark & x->mark.m) == x->mark.v &&
x->pcpu_num == pcpu_num &&
@@ -2602,12 +2603,13 @@ int xfrm_alloc_spi(struct xfrm_state *x, u32 low, u32 high,
if (!x0) {
x->id.spi = newspi;
h = xfrm_spi_hash(net, &x->id.daddr, newspi, x->id.proto, x->props.family);
- XFRM_STATE_INSERT(byspi, &x->byspi, net->xfrm.state_byspi + h, x->xso.type);
+ XFRM_STATE_INSERT(byspi, &x->byspi,
+ xfrm_state_deref_prot(net->xfrm.state_byspi, net) + h,
+ x->xso.type);
spin_unlock_bh(&net->xfrm.xfrm_state_lock);
err = 0;
goto unlock;
}
- xfrm_state_put(x0);
spin_unlock_bh(&net->xfrm.xfrm_state_lock);
next:
@@ -3258,6 +3260,7 @@ EXPORT_SYMBOL(xfrm_init_state);
int __net_init xfrm_state_init(struct net *net)
{
+ struct hlist_head *ndst, *nsrc, *nspi, *nseq;
unsigned int sz;
if (net_eq(net, &init_net))
@@ -3268,18 +3271,25 @@ int __net_init xfrm_state_init(struct net *net)
sz = sizeof(struct hlist_head) * 8;
- net->xfrm.state_bydst = xfrm_hash_alloc(sz);
- if (!net->xfrm.state_bydst)
+ ndst = xfrm_hash_alloc(sz);
+ if (!ndst)
goto out_bydst;
- net->xfrm.state_bysrc = xfrm_hash_alloc(sz);
- if (!net->xfrm.state_bysrc)
+ rcu_assign_pointer(net->xfrm.state_bydst, ndst);
+
+ nsrc = xfrm_hash_alloc(sz);
+ if (!nsrc)
goto out_bysrc;
- net->xfrm.state_byspi = xfrm_hash_alloc(sz);
- if (!net->xfrm.state_byspi)
+ rcu_assign_pointer(net->xfrm.state_bysrc, nsrc);
+
+ nspi = xfrm_hash_alloc(sz);
+ if (!nspi)
goto out_byspi;
- net->xfrm.state_byseq = xfrm_hash_alloc(sz);
- if (!net->xfrm.state_byseq)
+ rcu_assign_pointer(net->xfrm.state_byspi, nspi);
+
+ nseq = xfrm_hash_alloc(sz);
+ if (!nseq)
goto out_byseq;
+ rcu_assign_pointer(net->xfrm.state_byseq, nseq);
net->xfrm.state_cache_input = alloc_percpu(struct hlist_head);
if (!net->xfrm.state_cache_input)
@@ -3295,17 +3305,19 @@ int __net_init xfrm_state_init(struct net *net)
return 0;
out_state_cache_input:
- xfrm_hash_free(net->xfrm.state_byseq, sz);
+ xfrm_hash_free(nseq, sz);
out_byseq:
- xfrm_hash_free(net->xfrm.state_byspi, sz);
+ xfrm_hash_free(nspi, sz);
out_byspi:
- xfrm_hash_free(net->xfrm.state_bysrc, sz);
+ xfrm_hash_free(nsrc, sz);
out_bysrc:
- xfrm_hash_free(net->xfrm.state_bydst, sz);
+ xfrm_hash_free(ndst, sz);
out_bydst:
return -ENOMEM;
}
+#define xfrm_state_deref_netexit(table) \
+ rcu_dereference_protected((table), true /* netns is going away */)
void xfrm_state_fini(struct net *net)
{
unsigned int sz;
@@ -3318,17 +3330,17 @@ void xfrm_state_fini(struct net *net)
WARN_ON(!list_empty(&net->xfrm.state_all));
for (i = 0; i <= net->xfrm.state_hmask; i++) {
- WARN_ON(!hlist_empty(net->xfrm.state_byseq + i));
- WARN_ON(!hlist_empty(net->xfrm.state_byspi + i));
- WARN_ON(!hlist_empty(net->xfrm.state_bysrc + i));
- WARN_ON(!hlist_empty(net->xfrm.state_bydst + i));
+ WARN_ON(!hlist_empty(xfrm_state_deref_netexit(net->xfrm.state_byseq) + i));
+ WARN_ON(!hlist_empty(xfrm_state_deref_netexit(net->xfrm.state_byspi) + i));
+ WARN_ON(!hlist_empty(xfrm_state_deref_netexit(net->xfrm.state_bysrc) + i));
+ WARN_ON(!hlist_empty(xfrm_state_deref_netexit(net->xfrm.state_bydst) + i));
}
sz = (net->xfrm.state_hmask + 1) * sizeof(struct hlist_head);
- xfrm_hash_free(net->xfrm.state_byseq, sz);
- xfrm_hash_free(net->xfrm.state_byspi, sz);
- xfrm_hash_free(net->xfrm.state_bysrc, sz);
- xfrm_hash_free(net->xfrm.state_bydst, sz);
+ xfrm_hash_free(xfrm_state_deref_netexit(net->xfrm.state_byseq), sz);
+ xfrm_hash_free(xfrm_state_deref_netexit(net->xfrm.state_byspi), sz);
+ xfrm_hash_free(xfrm_state_deref_netexit(net->xfrm.state_bysrc), sz);
+ xfrm_hash_free(xfrm_state_deref_netexit(net->xfrm.state_bydst), sz);
free_percpu(net->xfrm.state_cache_input);
}
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 403b5ecac2c5..1656b487f833 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -35,6 +35,15 @@
#endif
#include <linux/unaligned.h>
+static struct sock *xfrm_net_nlsk(const struct net *net, const struct sk_buff *skb)
+{
+ /* get the source of this request, see netlink_unicast_kernel */
+ const struct sock *sk = NETLINK_CB(skb).sk;
+
+ /* sk is refcounted, the netns stays alive and nlsk with it */
+ return rcu_dereference_protected(net->xfrm.nlsk, sk->sk_net_refcnt);
+}
+
static int verify_one_alg(struct nlattr **attrs, enum xfrm_attr_type_t type,
struct netlink_ext_ack *extack)
{
@@ -1727,7 +1736,7 @@ static int xfrm_get_spdinfo(struct sk_buff *skb, struct nlmsghdr *nlh,
err = build_spdinfo(r_skb, net, sportid, seq, *flags);
BUG_ON(err < 0);
- return nlmsg_unicast(net->xfrm.nlsk, r_skb, sportid);
+ return nlmsg_unicast(xfrm_net_nlsk(net, skb), r_skb, sportid);
}
static inline unsigned int xfrm_sadinfo_msgsize(void)
@@ -1787,7 +1796,7 @@ static int xfrm_get_sadinfo(struct sk_buff *skb, struct nlmsghdr *nlh,
err = build_sadinfo(r_skb, net, sportid, seq, *flags);
BUG_ON(err < 0);
- return nlmsg_unicast(net->xfrm.nlsk, r_skb, sportid);
+ return nlmsg_unicast(xfrm_net_nlsk(net, skb), r_skb, sportid);
}
static int xfrm_get_sa(struct sk_buff *skb, struct nlmsghdr *nlh,
@@ -1807,7 +1816,7 @@ static int xfrm_get_sa(struct sk_buff *skb, struct nlmsghdr *nlh,
if (IS_ERR(resp_skb)) {
err = PTR_ERR(resp_skb);
} else {
- err = nlmsg_unicast(net->xfrm.nlsk, resp_skb, NETLINK_CB(skb).portid);
+ err = nlmsg_unicast(xfrm_net_nlsk(net, skb), resp_skb, NETLINK_CB(skb).portid);
}
xfrm_state_put(x);
out_noput:
@@ -1850,6 +1859,7 @@ static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh,
pcpu_num = nla_get_u32(attrs[XFRMA_SA_PCPU]);
if (pcpu_num >= num_possible_cpus()) {
err = -EINVAL;
+ NL_SET_ERR_MSG(extack, "pCPU number too big");
goto out_noput;
}
}
@@ -1897,7 +1907,7 @@ static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh,
}
}
- err = nlmsg_unicast(net->xfrm.nlsk, resp_skb, NETLINK_CB(skb).portid);
+ err = nlmsg_unicast(xfrm_net_nlsk(net, skb), resp_skb, NETLINK_CB(skb).portid);
out:
xfrm_state_put(x);
@@ -2542,7 +2552,7 @@ static int xfrm_get_default(struct sk_buff *skb, struct nlmsghdr *nlh,
r_up->out = net->xfrm.policy_default[XFRM_POLICY_OUT];
nlmsg_end(r_skb, r_nlh);
- return nlmsg_unicast(net->xfrm.nlsk, r_skb, portid);
+ return nlmsg_unicast(xfrm_net_nlsk(net, skb), r_skb, portid);
}
static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh,
@@ -2608,7 +2618,7 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh,
if (IS_ERR(resp_skb)) {
err = PTR_ERR(resp_skb);
} else {
- err = nlmsg_unicast(net->xfrm.nlsk, resp_skb,
+ err = nlmsg_unicast(xfrm_net_nlsk(net, skb), resp_skb,
NETLINK_CB(skb).portid);
}
} else {
@@ -2781,7 +2791,7 @@ static int xfrm_get_ae(struct sk_buff *skb, struct nlmsghdr *nlh,
err = build_aevent(r_skb, x, &c);
BUG_ON(err < 0);
- err = nlmsg_unicast(net->xfrm.nlsk, r_skb, NETLINK_CB(skb).portid);
+ err = nlmsg_unicast(xfrm_net_nlsk(net, skb), r_skb, NETLINK_CB(skb).portid);
spin_unlock_bh(&x->lock);
xfrm_state_put(x);
return err;
@@ -3001,8 +3011,10 @@ static int xfrm_add_acquire(struct sk_buff *skb, struct nlmsghdr *nlh,
if (attrs[XFRMA_SA_PCPU]) {
x->pcpu_num = nla_get_u32(attrs[XFRMA_SA_PCPU]);
err = -EINVAL;
- if (x->pcpu_num >= num_possible_cpus())
+ if (x->pcpu_num >= num_possible_cpus()) {
+ NL_SET_ERR_MSG(extack, "pCPU number too big");
goto free_state;
+ }
}
err = verify_newpolicy_info(&ua->policy, extack);
@@ -3483,7 +3495,7 @@ static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
goto err;
}
- err = netlink_dump_start(net->xfrm.nlsk, skb, nlh, &c);
+ err = netlink_dump_start(xfrm_net_nlsk(net, skb), skb, nlh, &c);
goto err;
}
@@ -3673,7 +3685,7 @@ static inline unsigned int xfrm_sa_len(struct xfrm_state *x)
}
if (x->if_id)
l += nla_total_size(sizeof(x->if_id));
- if (x->pcpu_num)
+ if (x->pcpu_num != UINT_MAX)
l += nla_total_size(sizeof(x->pcpu_num));
/* Must count x->lastused as it may become non-zero behind our back. */
diff --git a/tools/testing/selftests/drivers/net/team/Makefile b/tools/testing/selftests/drivers/net/team/Makefile
index 45a3e7ad3dcb..02d6f51d5a06 100644
--- a/tools/testing/selftests/drivers/net/team/Makefile
+++ b/tools/testing/selftests/drivers/net/team/Makefile
@@ -3,6 +3,7 @@
TEST_PROGS := \
dev_addr_lists.sh \
+ non_ether_header_ops.sh \
options.sh \
propagation.sh \
refleak.sh \
diff --git a/tools/testing/selftests/drivers/net/team/config b/tools/testing/selftests/drivers/net/team/config
index 558e1d0cf565..5d36a22ef080 100644
--- a/tools/testing/selftests/drivers/net/team/config
+++ b/tools/testing/selftests/drivers/net/team/config
@@ -1,7 +1,9 @@
+CONFIG_BONDING=y
CONFIG_DUMMY=y
CONFIG_IPV6=y
CONFIG_MACVLAN=y
CONFIG_NETDEVSIM=m
+CONFIG_NET_IPGRE=y
CONFIG_NET_TEAM=y
CONFIG_NET_TEAM_MODE_ACTIVEBACKUP=y
CONFIG_NET_TEAM_MODE_LOADBALANCE=y
diff --git a/tools/testing/selftests/drivers/net/team/non_ether_header_ops.sh b/tools/testing/selftests/drivers/net/team/non_ether_header_ops.sh
new file mode 100755
index 000000000000..948a43576bdc
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/team/non_ether_header_ops.sh
@@ -0,0 +1,41 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# shellcheck disable=SC2154
+#
+# Reproduce the non-Ethernet header_ops confusion scenario with:
+# g0 (gre) -> b0 (bond) -> t0 (team)
+#
+# Before the fix, direct header_ops inheritance in this stack could call
+# callbacks with the wrong net_device context and crash.
+
+lib_dir=$(dirname "$0")
+source "$lib_dir"/../../../net/lib.sh
+
+trap cleanup_all_ns EXIT
+
+setup_ns ns1
+
+ip -n "$ns1" link add d0 type dummy
+ip -n "$ns1" addr add 10.10.10.1/24 dev d0
+ip -n "$ns1" link set d0 up
+
+ip -n "$ns1" link add g0 type gre local 10.10.10.1
+ip -n "$ns1" link add b0 type bond mode active-backup
+ip -n "$ns1" link add t0 type team
+
+ip -n "$ns1" link set g0 master b0
+ip -n "$ns1" link set b0 master t0
+
+ip -n "$ns1" link set g0 up
+ip -n "$ns1" link set b0 up
+ip -n "$ns1" link set t0 up
+
+# IPv6 address assignment triggers MLD join reports that call
+# dev_hard_header() on t0, exercising the inherited header_ops path.
+ip -n "$ns1" -6 addr add 2001:db8:1::1/64 dev t0 nodad
+for i in $(seq 1 20); do
+ ip netns exec "$ns1" ping -6 -I t0 ff02::1 -c1 -W1 &>/dev/null || true
+done
+
+echo "PASS: non-Ethernet header_ops stacking did not crash"
+exit "$EXIT_STATUS"
diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh
index c5694cc4ddd2..829f72c8ee07 100755
--- a/tools/testing/selftests/net/fib_tests.sh
+++ b/tools/testing/selftests/net/fib_tests.sh
@@ -868,6 +868,64 @@ fib6_gc_test()
check_rt_num 5 $($IP -6 route list |grep -v expires|grep 2001:20::|wc -l)
log_test $ret 0 "ipv6 route garbage collection (replace with permanent)"
+ # Delete dummy_10 and remove all routes
+ $IP link del dev dummy_10
+
+ # rd6 is required for the next test. (ipv6toolkit)
+ if [ ! -x "$(command -v rd6)" ]; then
+ echo "SKIP: rd6 not found."
+ set +e
+ cleanup &> /dev/null
+ return
+ fi
+
+ setup_ns ns2
+ $IP link add veth1 type veth peer veth2 netns $ns2
+ $IP link set veth1 up
+ ip -netns $ns2 link set veth2 up
+ $IP addr add fe80:dead::1/64 dev veth1
+ ip -netns $ns2 addr add fe80:dead::2/64 dev veth2
+
+ # Add NTF_ROUTER neighbour to prevent rt6_age_examine_exception()
+ # from removing not-yet-expired exceptions.
+ ip -netns $ns2 link set veth2 address 00:11:22:33:44:55
+ $IP neigh add fe80:dead::3 lladdr 00:11:22:33:44:55 dev veth1 router
+
+ $NS_EXEC sysctl -wq net.ipv6.conf.veth1.accept_redirects=1
+ $NS_EXEC sysctl -wq net.ipv6.conf.veth1.forwarding=0
+
+ # Temporary routes
+ for i in $(seq 1 5); do
+ # Expire route after $EXPIRE seconds
+ $IP -6 route add 2001:10::$i \
+ via fe80:dead::2 dev veth1 expires $EXPIRE
+
+ ip netns exec $ns2 rd6 -i veth2 \
+ -s fe80:dead::2 -d fe80:dead::1 \
+ -r 2001:10::$i -t fe80:dead::3 -p ICMP6
+ done
+
+ check_rt_num 5 $($IP -6 route list | grep expires | grep 2001:10:: | wc -l)
+
+ # Promote to permanent routes by "prepend" (w/o NLM_F_EXCL and NLM_F_REPLACE)
+ for i in $(seq 1 5); do
+ # -EEXIST, but the temporary route becomes the permanent route.
+ $IP -6 route append 2001:10::$i \
+ via fe80:dead::2 dev veth1 2>/dev/null || true
+ done
+
+ check_rt_num 5 $($IP -6 route list | grep -v expires | grep 2001:10:: | wc -l)
+ check_rt_num 5 $($IP -6 route list cache | grep 2001:10:: | wc -l)
+
+ # Trigger GC instead of waiting $GC_WAIT_TIME.
+ # rt6_nh_dump_exceptions() just skips expired exceptions.
+ $NS_EXEC sysctl -wq net.ipv6.route.flush=1
+ check_rt_num 0 $($IP -6 route list cache | grep 2001:10:: | wc -l)
+ log_test $ret 0 "ipv6 route garbage collection (promote to permanent routes)"
+
+ $IP neigh del fe80:dead::3 lladdr 00:11:22:33:44:55 dev veth1 router
+ $IP link del veth1
+
# ra6 is required for the next test. (ipv6toolkit)
if [ ! -x "$(command -v ra6)" ]; then
echo "SKIP: ra6 not found."
@@ -876,9 +934,6 @@ fib6_gc_test()
return
fi
- # Delete dummy_10 and remove all routes
- $IP link del dev dummy_10
-
# Create a pair of veth devices to send a RA message from one
# device to another.
$IP link add veth1 type veth peer name veth2
diff --git a/tools/testing/selftests/net/netfilter/nft_concat_range.sh b/tools/testing/selftests/net/netfilter/nft_concat_range.sh
index 394166f224a4..ffdc6ccc6511 100755
--- a/tools/testing/selftests/net/netfilter/nft_concat_range.sh
+++ b/tools/testing/selftests/net/netfilter/nft_concat_range.sh
@@ -29,7 +29,8 @@ TYPES="net_port port_net net6_port port_proto net6_port_mac net6_port_mac_proto
net6_port_net6_port net_port_mac_proto_net"
# Reported bugs, also described by TYPE_ variables below
-BUGS="flush_remove_add reload net_port_proto_match avx2_mismatch doublecreate insert_overlap"
+BUGS="flush_remove_add reload net_port_proto_match avx2_mismatch doublecreate
+ insert_overlap load_flush_load4 load_flush_load8"
# List of possible paths to pktgen script from kernel tree for performance tests
PKTGEN_SCRIPT_PATHS="
@@ -432,6 +433,30 @@ race_repeat 0
perf_duration 0
"
+TYPE_load_flush_load4="
+display reload with flush, 4bit groups
+type_spec ipv4_addr . ipv4_addr
+chain_spec ip saddr . ip daddr
+dst addr4
+proto icmp
+
+race_repeat 0
+
+perf_duration 0
+"
+
+TYPE_load_flush_load8="
+display reload with flush, 8bit groups
+type_spec ipv4_addr . ipv4_addr
+chain_spec ip saddr . ip daddr
+dst addr4
+proto icmp
+
+race_repeat 0
+
+perf_duration 0
+"
+
# Set template for all tests, types and rules are filled in depending on test
set_template='
flush ruleset
@@ -1997,6 +2022,49 @@ test_bug_insert_overlap()
return 0
}
+test_bug_load_flush_load4()
+{
+ local i
+
+ setup veth send_"${proto}" set || return ${ksft_skip}
+
+ for i in $(seq 0 255); do
+ local addelem="add element inet filter test"
+ local j
+
+ for j in $(seq 0 20); do
+ echo "$addelem { 10.$j.0.$i . 10.$j.1.$i }"
+ echo "$addelem { 10.$j.0.$i . 10.$j.2.$i }"
+ done
+ done > "$tmp"
+
+ nft -f "$tmp" || return 1
+
+ ( echo "flush set inet filter test";cat "$tmp") | nft -f -
+ [ $? -eq 0 ] || return 1
+
+ return 0
+}
+
+test_bug_load_flush_load8()
+{
+ local i
+
+ setup veth send_"${proto}" set || return ${ksft_skip}
+
+ for i in $(seq 1 100); do
+ echo "add element inet filter test { 10.0.0.$i . 10.0.1.$i }"
+ echo "add element inet filter test { 10.0.0.$i . 10.0.2.$i }"
+ done > "$tmp"
+
+ nft -f "$tmp" || return 1
+
+ ( echo "flush set inet filter test";cat "$tmp") | nft -f -
+ [ $? -eq 0 ] || return 1
+
+ return 0
+}
+
test_reported_issues() {
eval test_bug_"${subtest}"
}