- max-frame-size: See ethernet.txt in the same directory.
The MAC address will be determined using the optional properties
-defined in ethernet.txt.
+defined in ethernet.txt. The 'phy-mode' property is required, but may
+be set to the empty string if the PHY configuration is programmed by
+the firmware or set by hardware straps, and needs to be preserved.
Example:
eth0: ethernet@522d0000 {
icmp_msgs_per_sec - INTEGER
Limit maximal number of ICMP packets sent per second from this host.
Only messages whose type matches icmp_ratemask (see below) are
- controlled by this limit.
+ controlled by this limit. For security reasons, the precise count
+ of messages per second is randomized.
Default: 1000
icmp_msgs_burst - INTEGER
icmp_msgs_per_sec controls number of ICMP packets sent per second,
while icmp_msgs_burst controls the burst size of these packets.
+ For security reasons, the precise burst size is randomized.
Default: 50
This documentation is based on the LWN.net articles [1]_\ [2]_. Rafal Milecki
also made a very complete and comprehensive summary called "A state of network
acceleration" that describes how things were before this infrastructure was
-mailined [3]_ and it also makes a rough summary of this work [4]_.
+mainlined [3]_ and it also makes a rough summary of this work [4]_.
.. [1] https://lwn.net/Articles/738214/
.. [2] https://lwn.net/Articles/742164/
L: netdev@vger.kernel.org
L: bpf@vger.kernel.org
S: Supported
-Q: https://patchwork.ozlabs.org/project/netdev/list/?delegate=77147
+W: https://bpf.io/
+Q: https://patchwork.kernel.org/project/netdevbpf/list/?delegate=121173
T: git git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf.git
T: git git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git
F: Documentation/bpf/
unsigned long new_rate;
unsigned int ports_active;
/* Frequenty in Mhz */
- const unsigned long rate_table[] = {
+ static const unsigned long rate_table[] = {
59220000,
60820000,
62500000,
.stats_layout = vsc9953_stats_layout,
.num_stats = ARRAY_SIZE(vsc9953_stats_layout),
.vcap = vsc9953_vcap_props,
- .shared_queue_sz = 2048 * 1024,
+ .shared_queue_sz = 256 * 1024,
.num_mact_rows = 2048,
.num_ports = 10,
.mdio_bus_alloc = vsc9953_mdio_bus_alloc,
default:
err = -1;
goto err_exit;
- break;
}
if (!(self->aq_nic_cfg.aq_hw_caps->link_speed_msk & rate)) {
err = -1;
config CRYPTO_DEV_CHELSIO_TLS
tristate "Chelsio Crypto Inline TLS Driver"
depends on CHELSIO_T4
+ depends on TLS
depends on TLS_TOE
help
Support Chelsio Inline TLS with Chelsio crypto accelerator.
static struct net_device *chtls_find_netdev(struct chtls_dev *cdev,
struct sock *sk)
{
+ struct adapter *adap = pci_get_drvdata(cdev->pdev);
struct net_device *ndev = cdev->ports[0];
#if IS_ENABLED(CONFIG_IPV6)
struct net_device *temp;
int addr_type;
#endif
+ int i;
switch (sk->sk_family) {
case PF_INET:
return NULL;
if (is_vlan_dev(ndev))
- return vlan_dev_real_dev(ndev);
- return ndev;
+ ndev = vlan_dev_real_dev(ndev);
+
+ for_each_port(adap, i)
+ if (cdev->ports[i] == ndev)
+ return ndev;
+ return NULL;
}
static void assign_rxopt(struct sock *sk, unsigned int opt)
chtls_purge_write_queue(sk);
free_tls_keyid(sk);
kref_put(&csk->kref, chtls_sock_release);
- csk->cdev = NULL;
if (sk->sk_family == AF_INET)
sk->sk_prot = &tcp_prot;
#if IS_ENABLED(CONFIG_IPV6)
#if IS_ENABLED(CONFIG_IPV6)
if (sk->sk_family == PF_INET6) {
- struct chtls_sock *csk;
+ struct net_device *ndev = chtls_find_netdev(cdev, sk);
int addr_type = 0;
- csk = rcu_dereference_sk_user_data(sk);
addr_type = ipv6_addr_type((const struct in6_addr *)
&sk->sk_v6_rcv_saddr);
if (addr_type != IPV6_ADDR_ANY)
- cxgb4_clip_release(csk->egress_dev, (const u32 *)
+ cxgb4_clip_release(ndev, (const u32 *)
&sk->sk_v6_rcv_saddr, 1);
}
#endif
ndev = n->dev;
if (!ndev)
goto free_dst;
+ if (is_vlan_dev(ndev))
+ ndev = vlan_dev_real_dev(ndev);
+
port_id = cxgb4_port_idx(ndev);
csk = chtls_sock_create(cdev);
return 0;
}
-static int csk_mem_free(struct chtls_dev *cdev, struct sock *sk)
+static bool csk_mem_free(struct chtls_dev *cdev, struct sock *sk)
{
- return (cdev->max_host_sndbuf - sk->sk_wmem_queued);
+ return (cdev->max_host_sndbuf - sk->sk_wmem_queued > 0);
}
static int csk_wait_memory(struct chtls_dev *cdev,
copied = 0;
csk = rcu_dereference_sk_user_data(sk);
cdev = csk->cdev;
+ lock_sock(sk);
timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
err = sk_stream_wait_connect(sk, &timeo);
break;
default:
return -EINVAL;
- break;
}
fsp->h_u.tcp_ip4_spec.ip4src = flow_get_u32_src(&n->keys);
priv->rxdes0_edorr_mask = BIT(30);
priv->txdes0_edotr_mask = BIT(30);
priv->is_aspeed = true;
+ /* Disable ast2600 problematic HW arbitration */
+ if (of_device_is_compatible(np, "aspeed,ast2600-mac")) {
+ iowrite32(FTGMAC100_TM_DEFAULT,
+ priv->base + FTGMAC100_OFFSET_TM);
+ }
} else {
priv->rxdes0_edorr_mask = BIT(15);
priv->txdes0_edotr_mask = BIT(15);
#define FTGMAC100_MACCR_FAST_MODE (1 << 19)
#define FTGMAC100_MACCR_SW_RST (1 << 31)
+/*
+ * test mode control register
+ */
+#define FTGMAC100_TM_RQ_TX_VALID_DIS (1 << 28)
+#define FTGMAC100_TM_RQ_RR_IDLE_PREV (1 << 27)
+#define FTGMAC100_TM_DEFAULT \
+ (FTGMAC100_TM_RQ_TX_VALID_DIS | FTGMAC100_TM_RQ_RR_IDLE_PREV)
+
/*
* PHY control register
*/
dev_err(dev, "Error %ld in CHANGE_MAC_ADDR_RSP\n", rc);
goto out;
}
+ /* crq->change_mac_addr.mac_addr is the requested one
+ * crq->change_mac_addr_rsp.mac_addr is the returned valid one.
+ */
ether_addr_copy(netdev->dev_addr,
&crq->change_mac_addr_rsp.mac_addr[0]);
+ ether_addr_copy(adapter->mac_addr,
+ &crq->change_mac_addr_rsp.mac_addr[0]);
out:
complete(&adapter->fw_done);
return rc;
**/
s32 ixgbe_mii_bus_init(struct ixgbe_hw *hw)
{
+ s32 (*write)(struct mii_bus *bus, int addr, int regnum, u16 val);
+ s32 (*read)(struct mii_bus *bus, int addr, int regnum);
struct ixgbe_adapter *adapter = hw->back;
struct pci_dev *pdev = adapter->pdev;
struct device *dev = &adapter->netdev->dev;
struct mii_bus *bus;
- bus = devm_mdiobus_alloc(dev);
- if (!bus)
- return -ENOMEM;
-
switch (hw->device_id) {
/* C3000 SoCs */
case IXGBE_DEV_ID_X550EM_A_KR:
case IXGBE_DEV_ID_X550EM_A_1G_T:
case IXGBE_DEV_ID_X550EM_A_1G_T_L:
if (!ixgbe_x550em_a_has_mii(hw))
- return -ENODEV;
- bus->read = &ixgbe_x550em_a_mii_bus_read;
- bus->write = &ixgbe_x550em_a_mii_bus_write;
+ return 0;
+ read = &ixgbe_x550em_a_mii_bus_read;
+ write = &ixgbe_x550em_a_mii_bus_write;
break;
default:
- bus->read = &ixgbe_mii_bus_read;
- bus->write = &ixgbe_mii_bus_write;
+ read = &ixgbe_mii_bus_read;
+ write = &ixgbe_mii_bus_write;
break;
}
+ bus = devm_mdiobus_alloc(dev);
+ if (!bus)
+ return -ENOMEM;
+
+ bus->read = read;
+ bus->write = write;
+
/* Use the position of the device in the PCI hierarchy as the id */
snprintf(bus->id, MII_BUS_ID_SIZE, "%s-mdio-%s", ixgbe_driver_name,
pci_name(pdev));
if (ixgbe_read_eerd_generic(hw, pointer, &length)) {
hw_dbg(hw, "EEPROM read failed\n");
return IXGBE_ERR_EEPROM;
- break;
}
/* Skip pointer section if length is invalid. */
return rc;
probe_err_register:
- kfree(KSEG0ADDR(lp->td_ring));
+ kfree((struct dma_desc *)KSEG0ADDR(lp->td_ring));
probe_err_td_ring:
iounmap(lp->tx_dma_regs);
probe_err_dma_tx:
iounmap(lp->eth_regs);
iounmap(lp->rx_dma_regs);
iounmap(lp->tx_dma_regs);
- kfree(KSEG0ADDR(lp->td_ring));
+ kfree((struct dma_desc *)KSEG0ADDR(lp->td_ring));
unregister_netdev(bif->dev);
free_netdev(bif->dev);
config NET_MEDIATEK_STAR_EMAC
tristate "MediaTek STAR Ethernet MAC support"
select PHYLIB
+ select REGMAP_MMIO
help
This driver supports the ethernet MAC IP first used on
MediaTek MT85** SoCs.
phy_disconnect(tp->phydev);
- pci_free_irq(pdev, 0, tp);
+ free_irq(pci_irq_vector(pdev, 0), tp);
dma_free_coherent(&pdev->dev, R8169_RX_RING_BYTES, tp->RxDescArray,
tp->RxPhyAddr);
rtl_request_firmware(tp);
- retval = pci_request_irq(pdev, 0, rtl8169_interrupt, NULL, tp,
- dev->name);
+ retval = request_irq(pci_irq_vector(pdev, 0), rtl8169_interrupt,
+ IRQF_NO_THREAD | IRQF_SHARED, dev->name, tp);
if (retval < 0)
goto err_release_fw_2;
return retval;
err_free_irq:
- pci_free_irq(pdev, 0, tp);
+ free_irq(pci_irq_vector(pdev, 0), tp);
err_release_fw_2:
rtl_release_firmware(tp);
rtl8169_rx_clear(tp);
efx->num_mac_stats = MC_CMD_MAC_NSTATS;
BUILD_BUG_ON(MC_CMD_MAC_NSTATS - 1 != MC_CMD_MAC_GENERATION_END);
mutex_init(&efx->mac_lock);
+ init_rwsem(&efx->filter_sem);
#ifdef CONFIG_RFS_ACCEL
mutex_init(&efx->rps_mutex);
spin_lock_init(&efx->rps_hash_lock);
{
int rc;
- init_rwsem(&efx->filter_sem);
mutex_lock(&efx->mac_lock);
down_write(&efx->filter_sem);
rc = efx->type->filter_table_probe(efx);
#include <linux/pm_runtime.h>
#include <linux/acpi.h>
#include <linux/of_mdio.h>
+#include <linux/of_net.h>
#include <linux/etherdevice.h>
#include <linux/interrupt.h>
#include <linux/io.h>
static int netsec_of_probe(struct platform_device *pdev,
struct netsec_priv *priv, u32 *phy_addr)
{
+ int err;
+
+ err = of_get_phy_mode(pdev->dev.of_node, &priv->phy_interface);
+ if (err) {
+ dev_err(&pdev->dev, "missing required property 'phy-mode'\n");
+ return err;
+ }
+
priv->phy_np = of_parse_phandle(pdev->dev.of_node, "phy-handle", 0);
if (!priv->phy_np) {
dev_err(&pdev->dev, "missing required property 'phy-handle'\n");
if (!IS_ENABLED(CONFIG_ACPI))
return -ENODEV;
+ /* ACPI systems are assumed to configure the PHY in firmware, so
+ * there is really no need to discover the PHY mode from the DSDT.
+ * Since firmware is known to exist in the field that configures the
+ * PHY correctly but passes the wrong mode string in the phy-mode
+ * device property, we have no choice but to ignore it.
+ */
+ priv->phy_interface = PHY_INTERFACE_MODE_NA;
+
ret = device_property_read_u32(&pdev->dev, "phy-channel", phy_addr);
if (ret) {
dev_err(&pdev->dev,
priv->msg_enable = NETIF_MSG_TX_ERR | NETIF_MSG_HW | NETIF_MSG_DRV |
NETIF_MSG_LINK | NETIF_MSG_PROBE;
- priv->phy_interface = device_get_phy_mode(&pdev->dev);
- if ((int)priv->phy_interface < 0) {
- dev_err(&pdev->dev, "missing required property 'phy-mode'\n");
- ret = -ENODEV;
- goto free_ndev;
- }
-
priv->ioaddr = devm_ioremap(&pdev->dev, mmio_res->start,
resource_size(mmio_res));
if (!priv->ioaddr) {
config PCS_XPCS
tristate "Synopsys DesignWare XPCS controller"
- select MDIO_BUS
- depends on MDIO_DEVICE
+ depends on MDIO_DEVICE && MDIO_BUS
help
This module provides helper functions for Synopsys DesignWare XPCS
controllers.
ret = get_registers(dev, IDR, sizeof(node_id), node_id);
- if (ret == sizeof(node_id)) {
+ if (!ret) {
ether_addr_copy(dev->netdev->dev_addr, node_id);
} else {
eth_hw_addr_random(dev->netdev);
(1ULL << VIRTIO_NET_F_GUEST_ECN) | \
(1ULL << VIRTIO_NET_F_GUEST_UFO))
-#define GUEST_OFFLOAD_CSUM_MASK (1ULL << VIRTIO_NET_F_GUEST_CSUM)
-
struct virtnet_stat_desc {
char desc[ETH_GSTRING_LEN];
size_t offset;
return 0;
}
-static netdev_features_t virtnet_fix_features(struct net_device *netdev,
- netdev_features_t features)
-{
- /* If Rx checksum is disabled, LRO should also be disabled. */
- if (!(features & NETIF_F_RXCSUM))
- features &= ~NETIF_F_LRO;
-
- return features;
-}
-
static int virtnet_set_features(struct net_device *dev,
netdev_features_t features)
{
struct virtnet_info *vi = netdev_priv(dev);
- u64 offloads = vi->guest_offloads;
+ u64 offloads;
int err;
- /* Don't allow configuration while XDP is active. */
- if (vi->xdp_queue_pairs)
- return -EBUSY;
-
if ((dev->features ^ features) & NETIF_F_LRO) {
+ if (vi->xdp_queue_pairs)
+ return -EBUSY;
+
if (features & NETIF_F_LRO)
- offloads |= GUEST_OFFLOAD_LRO_MASK &
- vi->guest_offloads_capable;
+ offloads = vi->guest_offloads_capable;
else
- offloads &= ~GUEST_OFFLOAD_LRO_MASK;
- }
+ offloads = vi->guest_offloads_capable &
+ ~GUEST_OFFLOAD_LRO_MASK;
- if ((dev->features ^ features) & NETIF_F_RXCSUM) {
- if (features & NETIF_F_RXCSUM)
- offloads |= GUEST_OFFLOAD_CSUM_MASK &
- vi->guest_offloads_capable;
- else
- offloads &= ~GUEST_OFFLOAD_CSUM_MASK;
+ err = virtnet_set_guest_offloads(vi, offloads);
+ if (err)
+ return err;
+ vi->guest_offloads = offloads;
}
- err = virtnet_set_guest_offloads(vi, offloads);
- if (err)
- return err;
-
- vi->guest_offloads = offloads;
return 0;
}
.ndo_features_check = passthru_features_check,
.ndo_get_phys_port_name = virtnet_get_phys_port_name,
.ndo_set_features = virtnet_set_features,
- .ndo_fix_features = virtnet_fix_features,
};
static void virtnet_config_changed_work(struct work_struct *work)
if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4) ||
virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO6))
dev->features |= NETIF_F_LRO;
- if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
- dev->hw_features |= NETIF_F_RXCSUM;
+ if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS))
dev->hw_features |= NETIF_F_LRO;
- }
dev->vlan_features = dev->features;
static int hdlc_rcv(struct sk_buff *skb, struct net_device *dev,
struct packet_type *p, struct net_device *orig_dev)
{
- struct hdlc_device *hdlc = dev_to_hdlc(dev);
+ struct hdlc_device *hdlc;
+
+ /* First make sure "dev" is an HDLC device */
+ if (!(dev->priv_flags & IFF_WAN_HDLC)) {
+ kfree_skb(skb);
+ return NET_RX_SUCCESS;
+ }
+
+ hdlc = dev_to_hdlc(dev);
if (!net_eq(dev_net(dev), &init_net)) {
kfree_skb(skb);
old_qlen = dev->tx_queue_len;
ether_setup(dev);
dev->tx_queue_len = old_qlen;
+ dev->priv_flags &= ~IFF_TX_SKB_SHARING;
eth_hw_addr_random(dev);
call_netdevice_notifiers(NETDEV_POST_TYPE_CHANGE, dev);
netif_dormant_off(dev);
switch(sc->if_type){
case LMC_PPP:
return hdlc_type_trans(skb, sc->lmc_device);
- break;
case LMC_NET:
return htons(ETH_P_802_2);
- break;
case LMC_RAW: /* Packet type for skbuff kind of useless */
return htons(ETH_P_802_2);
- break;
default:
printk(KERN_WARNING "%s: No protocol set for this interface, assuming 802.2 (which is wrong!!)\n", sc->name);
return htons(ETH_P_802_2);
- break;
}
}
skb->len,
st21nfca_hci_data_exchange_cb,
info);
- break;
default:
return 1;
}
case ISO15693_CMD_WRITE_DSFID:
case ISO15693_CMD_LOCK_DSFID:
return 1;
- break;
default:
return 0;
}
void *data_end;
};
+struct bpf_nh_params {
+ u32 nh_family;
+ union {
+ u32 ipv4_nh;
+ struct in6_addr ipv6_nh;
+ };
+};
+
struct bpf_redirect_info {
u32 flags;
u32 tgt_index;
void *tgt_value;
struct bpf_map *map;
u32 kern_flags;
+ struct bpf_nh_params nh;
};
DECLARE_PER_CPU(struct bpf_redirect_info, bpf_redirect_info);
int (*done)(struct netlink_callback *);
void *data;
struct module *module;
- u16 min_dump_alloc;
+ u32 min_dump_alloc;
};
int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
return (struct nft_expr *)&rule->data[rule->dlen];
}
+static inline bool nft_expr_more(const struct nft_rule *rule,
+ const struct nft_expr *expr)
+{
+ return expr != nft_expr_last(rule) && expr->ops;
+}
+
static inline struct nft_userdata *nft_userdata(const struct nft_rule *rule)
{
return (void *)&rule->data[rule->dlen];
* Return
* The id is returned or 0 in case the id could not be retrieved.
*
- * long bpf_redirect_neigh(u32 ifindex, u64 flags)
+ * long bpf_redirect_neigh(u32 ifindex, struct bpf_redir_neigh *params, int plen, u64 flags)
* Description
* Redirect the packet to another net device of index *ifindex*
* and fill in L2 addresses from neighboring subsystem. This helper
* is somewhat similar to **bpf_redirect**\ (), except that it
* populates L2 addresses as well, meaning, internally, the helper
- * performs a FIB lookup based on the skb's networking header to
- * get the address of the next hop and then relies on the neighbor
- * lookup for the L2 address of the nexthop.
+ * relies on the neighbor lookup for the L2 address of the nexthop.
+ *
+ * The helper will perform a FIB lookup based on the skb's
+ * networking header to get the address of the next hop, unless
+ * this is supplied by the caller in the *params* argument. The
+ * *plen* argument indicates the len of *params* and should be set
+ * to 0 if *params* is NULL.
*
* The *flags* argument is reserved and must be 0. The helper is
* currently only supported for tc BPF program types, and enabled
__u8 dmac[6]; /* ETH_ALEN */
};
+struct bpf_redir_neigh {
+ /* network family for lookup (AF_INET, AF_INET6) */
+ __u32 nh_family;
+ /* network address of nexthop; skips fib lookup to find gateway */
+ union {
+ __be32 ipv4_nh;
+ __u32 ipv6_nh[4]; /* in6_addr; network order */
+ };
+};
+
enum bpf_task_fd_type {
BPF_FD_TYPE_RAW_TRACEPOINT, /* tp name */
BPF_FD_TYPE_TRACEPOINT, /* tp name */
case BPF_CGROUP_INET_INGRESS:
case BPF_CGROUP_INET_EGRESS:
return BPF_PROG_TYPE_CGROUP_SKB;
- break;
case BPF_CGROUP_INET_SOCK_CREATE:
case BPF_CGROUP_INET_SOCK_RELEASE:
case BPF_CGROUP_INET4_POST_BIND:
regs[BPF_REG_0].id = ++env->id_gen;
} else {
regs[BPF_REG_0].type = PTR_TO_MAP_VALUE_OR_NULL;
- regs[BPF_REG_0].id = ++env->id_gen;
}
} else if (fn->ret_type == RET_PTR_TO_SOCKET_OR_NULL) {
mark_reg_known_zero(env, regs, BPF_REG_0);
regs[BPF_REG_0].type = PTR_TO_SOCKET_OR_NULL;
- regs[BPF_REG_0].id = ++env->id_gen;
} else if (fn->ret_type == RET_PTR_TO_SOCK_COMMON_OR_NULL) {
mark_reg_known_zero(env, regs, BPF_REG_0);
regs[BPF_REG_0].type = PTR_TO_SOCK_COMMON_OR_NULL;
- regs[BPF_REG_0].id = ++env->id_gen;
} else if (fn->ret_type == RET_PTR_TO_TCP_SOCK_OR_NULL) {
mark_reg_known_zero(env, regs, BPF_REG_0);
regs[BPF_REG_0].type = PTR_TO_TCP_SOCK_OR_NULL;
- regs[BPF_REG_0].id = ++env->id_gen;
} else if (fn->ret_type == RET_PTR_TO_ALLOC_MEM_OR_NULL) {
mark_reg_known_zero(env, regs, BPF_REG_0);
regs[BPF_REG_0].type = PTR_TO_MEM_OR_NULL;
- regs[BPF_REG_0].id = ++env->id_gen;
regs[BPF_REG_0].mem_size = meta.mem_size;
} else if (fn->ret_type == RET_PTR_TO_MEM_OR_BTF_ID_OR_NULL ||
fn->ret_type == RET_PTR_TO_MEM_OR_BTF_ID) {
return -EINVAL;
}
+ if (reg_type_may_be_null(regs[BPF_REG_0].type))
+ regs[BPF_REG_0].id = ++env->id_gen;
+
if (is_ptr_cast_function(func_id)) {
/* For release_reference() */
regs[BPF_REG_0].ref_obj_id = meta.ref_obj_id;
struct bpf_reg_state *reg, u32 id,
bool is_null)
{
- if (reg_type_may_be_null(reg->type) && reg->id == id) {
+ if (reg_type_may_be_null(reg->type) && reg->id == id &&
+ !WARN_ON_ONCE(!reg->id)) {
/* Old offset (both fixed and variable parts) should
* have been known-zero, because we don't allow pointer
* arithmetic on pointers that might be NULL.
{
const struct ebt_nat_info *info = par->targinfo;
- if (skb_ensure_writable(skb, ETH_ALEN))
+ if (skb_ensure_writable(skb, 0))
return EBT_DROP;
ether_addr_copy(eth_hdr(skb)->h_dest, info->mac);
{
const struct ebt_redirect_info *info = par->targinfo;
- if (skb_ensure_writable(skb, ETH_ALEN))
+ if (skb_ensure_writable(skb, 0))
return EBT_DROP;
if (xt_hooknum(par) != NF_BR_BROUTING)
{
const struct ebt_nat_info *info = par->targinfo;
- if (skb_ensure_writable(skb, ETH_ALEN * 2))
+ if (skb_ensure_writable(skb, 0))
return EBT_DROP;
ether_addr_copy(eth_hdr(skb)->h_source, info->mac);
struct net_device *dev = list_first_entry(&unlink_list,
struct net_device,
unlink_list);
- list_del(&dev->unlink_list);
+ list_del_init(&dev->unlink_list);
dev->nested_level = dev->lower_level - 1;
}
#endif
}
#if IS_ENABLED(CONFIG_IPV6)
-static int bpf_out_neigh_v6(struct net *net, struct sk_buff *skb)
+static int bpf_out_neigh_v6(struct net *net, struct sk_buff *skb,
+ struct net_device *dev, struct bpf_nh_params *nh)
{
- struct dst_entry *dst = skb_dst(skb);
- struct net_device *dev = dst->dev;
u32 hh_len = LL_RESERVED_SPACE(dev);
const struct in6_addr *nexthop;
+ struct dst_entry *dst = NULL;
struct neighbour *neigh;
if (dev_xmit_recursion()) {
}
rcu_read_lock_bh();
- nexthop = rt6_nexthop(container_of(dst, struct rt6_info, dst),
- &ipv6_hdr(skb)->daddr);
+ if (!nh) {
+ dst = skb_dst(skb);
+ nexthop = rt6_nexthop(container_of(dst, struct rt6_info, dst),
+ &ipv6_hdr(skb)->daddr);
+ } else {
+ nexthop = &nh->ipv6_nh;
+ }
neigh = ip_neigh_gw6(dev, nexthop);
if (likely(!IS_ERR(neigh))) {
int ret;
return ret;
}
rcu_read_unlock_bh();
- IP6_INC_STATS(dev_net(dst->dev),
- ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
+ if (dst)
+ IP6_INC_STATS(dev_net(dst->dev),
+ ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
out_drop:
kfree_skb(skb);
return -ENETDOWN;
}
-static int __bpf_redirect_neigh_v6(struct sk_buff *skb, struct net_device *dev)
+static int __bpf_redirect_neigh_v6(struct sk_buff *skb, struct net_device *dev,
+ struct bpf_nh_params *nh)
{
const struct ipv6hdr *ip6h = ipv6_hdr(skb);
struct net *net = dev_net(dev);
int err, ret = NET_XMIT_DROP;
- struct dst_entry *dst;
- struct flowi6 fl6 = {
- .flowi6_flags = FLOWI_FLAG_ANYSRC,
- .flowi6_mark = skb->mark,
- .flowlabel = ip6_flowinfo(ip6h),
- .flowi6_oif = dev->ifindex,
- .flowi6_proto = ip6h->nexthdr,
- .daddr = ip6h->daddr,
- .saddr = ip6h->saddr,
- };
- dst = ipv6_stub->ipv6_dst_lookup_flow(net, NULL, &fl6, NULL);
- if (IS_ERR(dst))
- goto out_drop;
+ if (!nh) {
+ struct dst_entry *dst;
+ struct flowi6 fl6 = {
+ .flowi6_flags = FLOWI_FLAG_ANYSRC,
+ .flowi6_mark = skb->mark,
+ .flowlabel = ip6_flowinfo(ip6h),
+ .flowi6_oif = dev->ifindex,
+ .flowi6_proto = ip6h->nexthdr,
+ .daddr = ip6h->daddr,
+ .saddr = ip6h->saddr,
+ };
+
+ dst = ipv6_stub->ipv6_dst_lookup_flow(net, NULL, &fl6, NULL);
+ if (IS_ERR(dst))
+ goto out_drop;
- skb_dst_set(skb, dst);
+ skb_dst_set(skb, dst);
+ } else if (nh->nh_family != AF_INET6) {
+ goto out_drop;
+ }
- err = bpf_out_neigh_v6(net, skb);
+ err = bpf_out_neigh_v6(net, skb, dev, nh);
if (unlikely(net_xmit_eval(err)))
dev->stats.tx_errors++;
else
return ret;
}
#else
-static int __bpf_redirect_neigh_v6(struct sk_buff *skb, struct net_device *dev)
+static int __bpf_redirect_neigh_v6(struct sk_buff *skb, struct net_device *dev,
+ struct bpf_nh_params *nh)
{
kfree_skb(skb);
return NET_XMIT_DROP;
#endif /* CONFIG_IPV6 */
#if IS_ENABLED(CONFIG_INET)
-static int bpf_out_neigh_v4(struct net *net, struct sk_buff *skb)
+static int bpf_out_neigh_v4(struct net *net, struct sk_buff *skb,
+ struct net_device *dev, struct bpf_nh_params *nh)
{
- struct dst_entry *dst = skb_dst(skb);
- struct rtable *rt = container_of(dst, struct rtable, dst);
- struct net_device *dev = dst->dev;
u32 hh_len = LL_RESERVED_SPACE(dev);
struct neighbour *neigh;
bool is_v6gw = false;
}
rcu_read_lock_bh();
- neigh = ip_neigh_for_gw(rt, skb, &is_v6gw);
+ if (!nh) {
+ struct dst_entry *dst = skb_dst(skb);
+ struct rtable *rt = container_of(dst, struct rtable, dst);
+
+ neigh = ip_neigh_for_gw(rt, skb, &is_v6gw);
+ } else if (nh->nh_family == AF_INET6) {
+ neigh = ip_neigh_gw6(dev, &nh->ipv6_nh);
+ is_v6gw = true;
+ } else if (nh->nh_family == AF_INET) {
+ neigh = ip_neigh_gw4(dev, nh->ipv4_nh);
+ } else {
+ rcu_read_unlock_bh();
+ goto out_drop;
+ }
+
if (likely(!IS_ERR(neigh))) {
int ret;
return -ENETDOWN;
}
-static int __bpf_redirect_neigh_v4(struct sk_buff *skb, struct net_device *dev)
+static int __bpf_redirect_neigh_v4(struct sk_buff *skb, struct net_device *dev,
+ struct bpf_nh_params *nh)
{
const struct iphdr *ip4h = ip_hdr(skb);
struct net *net = dev_net(dev);
int err, ret = NET_XMIT_DROP;
- struct rtable *rt;
- struct flowi4 fl4 = {
- .flowi4_flags = FLOWI_FLAG_ANYSRC,
- .flowi4_mark = skb->mark,
- .flowi4_tos = RT_TOS(ip4h->tos),
- .flowi4_oif = dev->ifindex,
- .flowi4_proto = ip4h->protocol,
- .daddr = ip4h->daddr,
- .saddr = ip4h->saddr,
- };
- rt = ip_route_output_flow(net, &fl4, NULL);
- if (IS_ERR(rt))
- goto out_drop;
- if (rt->rt_type != RTN_UNICAST && rt->rt_type != RTN_LOCAL) {
- ip_rt_put(rt);
- goto out_drop;
- }
+ if (!nh) {
+ struct flowi4 fl4 = {
+ .flowi4_flags = FLOWI_FLAG_ANYSRC,
+ .flowi4_mark = skb->mark,
+ .flowi4_tos = RT_TOS(ip4h->tos),
+ .flowi4_oif = dev->ifindex,
+ .flowi4_proto = ip4h->protocol,
+ .daddr = ip4h->daddr,
+ .saddr = ip4h->saddr,
+ };
+ struct rtable *rt;
+
+ rt = ip_route_output_flow(net, &fl4, NULL);
+ if (IS_ERR(rt))
+ goto out_drop;
+ if (rt->rt_type != RTN_UNICAST && rt->rt_type != RTN_LOCAL) {
+ ip_rt_put(rt);
+ goto out_drop;
+ }
- skb_dst_set(skb, &rt->dst);
+ skb_dst_set(skb, &rt->dst);
+ }
- err = bpf_out_neigh_v4(net, skb);
+ err = bpf_out_neigh_v4(net, skb, dev, nh);
if (unlikely(net_xmit_eval(err)))
dev->stats.tx_errors++;
else
return ret;
}
#else
-static int __bpf_redirect_neigh_v4(struct sk_buff *skb, struct net_device *dev)
+static int __bpf_redirect_neigh_v4(struct sk_buff *skb, struct net_device *dev,
+ struct bpf_nh_params *nh)
{
kfree_skb(skb);
return NET_XMIT_DROP;
}
#endif /* CONFIG_INET */
-static int __bpf_redirect_neigh(struct sk_buff *skb, struct net_device *dev)
+static int __bpf_redirect_neigh(struct sk_buff *skb, struct net_device *dev,
+ struct bpf_nh_params *nh)
{
struct ethhdr *ethh = eth_hdr(skb);
skb_reset_network_header(skb);
if (skb->protocol == htons(ETH_P_IP))
- return __bpf_redirect_neigh_v4(skb, dev);
+ return __bpf_redirect_neigh_v4(skb, dev, nh);
else if (skb->protocol == htons(ETH_P_IPV6))
- return __bpf_redirect_neigh_v6(skb, dev);
+ return __bpf_redirect_neigh_v6(skb, dev, nh);
out:
kfree_skb(skb);
return -ENOTSUPP;
enum {
BPF_F_NEIGH = (1ULL << 1),
BPF_F_PEER = (1ULL << 2),
-#define BPF_F_REDIRECT_INTERNAL (BPF_F_NEIGH | BPF_F_PEER)
+ BPF_F_NEXTHOP = (1ULL << 3),
+#define BPF_F_REDIRECT_INTERNAL (BPF_F_NEIGH | BPF_F_PEER | BPF_F_NEXTHOP)
};
BPF_CALL_3(bpf_clone_redirect, struct sk_buff *, skb, u32, ifindex, u64, flags)
return -EAGAIN;
}
return flags & BPF_F_NEIGH ?
- __bpf_redirect_neigh(skb, dev) :
+ __bpf_redirect_neigh(skb, dev, flags & BPF_F_NEXTHOP ?
+ &ri->nh : NULL) :
__bpf_redirect(skb, dev, flags);
out_drop:
kfree_skb(skb);
.arg2_type = ARG_ANYTHING,
};
-BPF_CALL_2(bpf_redirect_neigh, u32, ifindex, u64, flags)
+BPF_CALL_4(bpf_redirect_neigh, u32, ifindex, struct bpf_redir_neigh *, params,
+ int, plen, u64, flags)
{
struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
- if (unlikely(flags))
+ if (unlikely((plen && plen < sizeof(*params)) || flags))
return TC_ACT_SHOT;
- ri->flags = BPF_F_NEIGH;
+ ri->flags = BPF_F_NEIGH | (plen ? BPF_F_NEXTHOP : 0);
ri->tgt_index = ifindex;
+ BUILD_BUG_ON(sizeof(struct bpf_redir_neigh) != sizeof(struct bpf_nh_params));
+ if (plen)
+ memcpy(&ri->nh, params, sizeof(ri->nh));
+
return TC_ACT_REDIRECT;
}
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_ANYTHING,
- .arg2_type = ARG_ANYTHING,
+ .arg2_type = ARG_PTR_TO_MEM_OR_NULL,
+ .arg3_type = ARG_CONST_SIZE_OR_ZERO,
+ .arg4_type = ARG_ANYTHING,
};
BPF_CALL_2(bpf_msg_apply_bytes, struct sk_msg *, msg, u32, bytes)
cmpxchg(&sk->sk_pacing_status,
SK_PACING_NONE,
SK_PACING_NEEDED);
- sk->sk_max_pacing_rate = (val == ~0U) ? ~0UL : val;
+ sk->sk_max_pacing_rate = (val == ~0U) ?
+ ~0UL : (unsigned int)val;
sk->sk_pacing_rate = min(sk->sk_pacing_rate,
sk->sk_max_pacing_rate);
break;
return rtnl_linkprop(RTM_DELLINKPROP, skb, nlh, extack);
}
-static u16 rtnl_calcit(struct sk_buff *skb, struct nlmsghdr *nlh)
+static u32 rtnl_calcit(struct sk_buff *skb, struct nlmsghdr *nlh)
{
struct net *net = sock_net(skb->sk);
- struct net_device *dev;
+ size_t min_ifinfo_dump_size = 0;
struct nlattr *tb[IFLA_MAX+1];
u32 ext_filter_mask = 0;
- u16 min_ifinfo_dump_size = 0;
+ struct net_device *dev;
int hdrlen;
/* Same kernel<->userspace interface hack as in rtnl_dump_ifinfo. */
*/
rcu_read_lock();
for_each_netdev_rcu(net, dev) {
- min_ifinfo_dump_size = max_t(u16, min_ifinfo_dump_size,
- if_nlmsg_size(dev,
- ext_filter_mask));
+ min_ifinfo_dump_size = max(min_ifinfo_dump_size,
+ if_nlmsg_size(dev, ext_filter_mask));
}
rcu_read_unlock();
if (kind == 2 && nlh->nlmsg_flags&NLM_F_DUMP) {
struct sock *rtnl;
rtnl_dumpit_func dumpit;
- u16 min_dump_alloc = 0;
+ u32 min_dump_alloc = 0;
link = rtnl_get_link(family, type);
if (!link || !link->dumpit) {
case SO_MAX_PACING_RATE:
{
- unsigned long ulval = (val == ~0U) ? ~0UL : val;
+ unsigned long ulval = (val == ~0U) ? ~0UL : (unsigned int)val;
if (sizeof(ulval) != sizeof(val) &&
optlen >= sizeof(ulval) &&
.xmit = ksz8795_xmit,
.rcv = ksz8795_rcv,
.overhead = KSZ_INGRESS_TAG_LEN,
+ .tail_tag = true,
};
DSA_TAG_DRIVER(ksz8795_netdev_ops);
.xmit = ksz9477_xmit,
.rcv = ksz9477_rcv,
.overhead = KSZ9477_INGRESS_TAG_LEN,
+ .tail_tag = true,
};
DSA_TAG_DRIVER(ksz9477_netdev_ops);
/**
* icmp_global_allow - Are we allowed to send one more ICMP message ?
*
- * Uses a token bucket to limit our ICMP messages to sysctl_icmp_msgs_per_sec.
+ * Uses a token bucket to limit our ICMP messages to ~sysctl_icmp_msgs_per_sec.
* Returns false if we reached the limit and can not send another packet.
* Note: called with BH disabled
*/
}
credit = min_t(u32, icmp_global.credit + incr, sysctl_icmp_msgs_burst);
if (credit) {
- credit--;
+ /* We want to use a credit of one in average, but need to randomize
+ * it for security reasons.
+ */
+ credit = max_t(int, credit - prandom_u32_max(3), 0);
rc = true;
}
WRITE_ONCE(icmp_global.credit, credit);
remove_nh_grp_entry(net, nhge, nlinfo);
/* make sure all see the newly published array before releasing rtnl */
- synchronize_rcu();
+ synchronize_net();
}
static void remove_nexthop_group(struct nexthop *nh, struct nl_info *nlinfo)
tcp_data_snd_check(sk);
if (!inet_csk_ack_scheduled(sk))
goto no_ack;
+ } else {
+ tcp_update_wl(tp, TCP_SKB_CB(skb)->seq);
}
__tcp_ack_snd_check(sk, 0);
ipv6_hdr(skb)->payload_len = htons(payload_len);
ipv6_change_dsfield(ipv6_hdr(skb), 0xff, ecn);
IP6CB(skb)->frag_max_size = sizeof(struct ipv6hdr) + fq->q.max_size;
+ IP6CB(skb)->flags |= IP6SKB_FRAGMENTED;
/* Yes, and fold redundant checksum back. 8) */
if (skb->ip_summed == CHECKSUM_COMPLETE)
module_exit(mpls_iptunnel_exit);
MODULE_ALIAS_RTNL_LWT(MPLS);
+MODULE_SOFTDEP("post: mpls_gso");
MODULE_DESCRIPTION("MultiProtocol Label Switching IP Tunnels");
MODULE_LICENSE("GPL v2");
config MPTCP_IPV6
bool "MPTCP: IPv6 support for Multipath TCP"
- select IPV6
+ depends on IPV6=y
default y
-endif
-
config MPTCP_KUNIT_TESTS
tristate "This builds the MPTCP KUnit tests" if !KUNIT_ALL_TESTS
- select MPTCP
depends on KUNIT
default KUNIT_ALL_TESTS
help
If unsure, say N.
+endif
}
mp_opt->add_addr = 1;
- mp_opt->port = 0;
mp_opt->addr_id = *ptr++;
pr_debug("ADD_ADDR: id=%d, echo=%d", mp_opt->addr_id, mp_opt->echo);
if (mp_opt->family == MPTCP_ADDR_IPVERSION_4) {
mp_opt->mp_capable = 0;
mp_opt->mp_join = 0;
mp_opt->add_addr = 0;
+ mp_opt->ahmac = 0;
+ mp_opt->port = 0;
mp_opt->rm_addr = 0;
mp_opt->dss = 0;
if (new_state != cp->state) {
struct ip_vs_dest *dest = cp->dest;
- IP_VS_DBG_BUF(8, "%s %s [%c%c%c%c] %s:%d->"
- "%s:%d state: %s->%s conn->refcnt:%d\n",
+ IP_VS_DBG_BUF(8, "%s %s [%c%c%c%c] c:%s:%d v:%s:%d "
+ "d:%s:%d state: %s->%s conn->refcnt:%d\n",
pd->pp->name,
((state_off == TCP_DIR_OUTPUT) ?
"output " : "input "),
th->fin ? 'F' : '.',
th->ack ? 'A' : '.',
th->rst ? 'R' : '.',
- IP_VS_DBG_ADDR(cp->daf, &cp->daddr),
- ntohs(cp->dport),
IP_VS_DBG_ADDR(cp->af, &cp->caddr),
ntohs(cp->cport),
+ IP_VS_DBG_ADDR(cp->af, &cp->vaddr),
+ ntohs(cp->vport),
+ IP_VS_DBG_ADDR(cp->daf, &cp->daddr),
+ ntohs(cp->dport),
tcp_state_name(cp->state),
tcp_state_name(new_state),
refcount_read(&cp->refcnt));
swin = win << sender->td_scale;
sender->td_maxwin = (swin == 0 ? 1 : swin);
sender->td_maxend = end + sender->td_maxwin;
- /*
- * We haven't seen traffic in the other direction yet
- * but we have to tweak window tracking to pass III
- * and IV until that happens.
- */
- if (receiver->td_maxwin == 0)
+ if (receiver->td_maxwin == 0) {
+ /* We haven't seen traffic in the other
+ * direction yet but we have to tweak window
+ * tracking to pass III and IV until that
+ * happens.
+ */
receiver->td_end = receiver->td_maxend = sack;
+ } else if (sack == receiver->td_end + 1) {
+ /* Likely a reply to a keepalive.
+ * Needed for III.
+ */
+ receiver->td_end++;
+ }
+
}
} else if (((state->state == TCP_CONNTRACK_SYN_SENT
&& dir == IP_CT_DIR_ORIGINAL)
skb_push(skb, skb->mac_len);
skb->dev = dev;
+ skb->tstamp = 0;
dev_queue_xmit(skb);
}
struct nft_expr *expr;
expr = nft_expr_first(rule);
- while (expr != nft_expr_last(rule) && expr->ops) {
+ while (nft_expr_more(rule, expr)) {
if (expr->ops->activate)
expr->ops->activate(ctx, expr);
struct nft_expr *expr;
expr = nft_expr_first(rule);
- while (expr != nft_expr_last(rule) && expr->ops) {
+ while (nft_expr_more(rule, expr)) {
if (expr->ops->deactivate)
expr->ops->deactivate(ctx, expr, phase);
* is called on error from nf_tables_newrule().
*/
expr = nft_expr_first(rule);
- while (expr != nft_expr_last(rule) && expr->ops) {
+ while (nft_expr_more(rule, expr)) {
next = nft_expr_next(expr);
nf_tables_expr_destroy(ctx, expr);
expr = next;
struct nft_expr *expr;
expr = nft_expr_first(rule);
- while (expr->ops && expr != nft_expr_last(rule)) {
+ while (nft_expr_more(rule, expr)) {
if (expr->ops->offload_flags & NFT_OFFLOAD_F_ACTION)
num_actions++;
ctx->net = net;
ctx->dep.type = NFT_OFFLOAD_DEP_UNSPEC;
- while (expr->ops && expr != nft_expr_last(rule)) {
+ while (nft_expr_more(rule, expr)) {
if (!expr->ops->offload) {
err = -EOPNOTSUPP;
goto err_out;
return;
skb->dev = dev;
+ skb->tstamp = 0;
neigh_xmit(neigh_table, dev, addr, skb);
out:
regs->verdict.code = verdict;
u32 idx;
char firmware_name[NFC_FIRMWARE_NAME_MAXSIZE + 1];
- if (!info->attrs[NFC_ATTR_DEVICE_INDEX])
+ if (!info->attrs[NFC_ATTR_DEVICE_INDEX] || !info->attrs[NFC_ATTR_FIRMWARE_NAME])
return -EINVAL;
idx = nla_get_u32(info->attrs[NFC_ATTR_DEVICE_INDEX]);
static void __mask_array_destroy(struct mask_array *ma)
{
- free_percpu(ma->masks_usage_cntr);
+ free_percpu(ma->masks_usage_stats);
kfree(ma);
}
ma->masks_usage_zero_cntr[i] = 0;
for_each_possible_cpu(cpu) {
- u64 *usage_counters = per_cpu_ptr(ma->masks_usage_cntr,
- cpu);
+ struct mask_array_stats *stats;
unsigned int start;
u64 counter;
+ stats = per_cpu_ptr(ma->masks_usage_stats, cpu);
do {
- start = u64_stats_fetch_begin_irq(&ma->syncp);
- counter = usage_counters[i];
- } while (u64_stats_fetch_retry_irq(&ma->syncp, start));
+ start = u64_stats_fetch_begin_irq(&stats->syncp);
+ counter = stats->usage_cntrs[i];
+ } while (u64_stats_fetch_retry_irq(&stats->syncp, start));
ma->masks_usage_zero_cntr[i] += counter;
}
sizeof(struct sw_flow_mask *) *
size);
- new->masks_usage_cntr = __alloc_percpu(sizeof(u64) * size,
- __alignof__(u64));
- if (!new->masks_usage_cntr) {
+ new->masks_usage_stats = __alloc_percpu(sizeof(struct mask_array_stats) +
+ sizeof(u64) * size,
+ __alignof__(u64));
+ if (!new->masks_usage_stats) {
kfree(new);
return NULL;
}
/* Flow lookup does full lookup on flow table. It starts with
* mask from index passed in *index.
+ * This function MUST be called with BH disabled due to the use
+ * of CPU specific variables.
*/
static struct sw_flow *flow_lookup(struct flow_table *tbl,
struct table_instance *ti,
u32 *n_cache_hit,
u32 *index)
{
- u64 *usage_counters = this_cpu_ptr(ma->masks_usage_cntr);
+ struct mask_array_stats *stats = this_cpu_ptr(ma->masks_usage_stats);
struct sw_flow *flow;
struct sw_flow_mask *mask;
int i;
if (mask) {
flow = masked_flow_lookup(ti, key, mask, n_mask_hit);
if (flow) {
- u64_stats_update_begin(&ma->syncp);
- usage_counters[*index]++;
- u64_stats_update_end(&ma->syncp);
+ u64_stats_update_begin(&stats->syncp);
+ stats->usage_cntrs[*index]++;
+ u64_stats_update_end(&stats->syncp);
(*n_cache_hit)++;
return flow;
}
flow = masked_flow_lookup(ti, key, mask, n_mask_hit);
if (flow) { /* Found */
*index = i;
- u64_stats_update_begin(&ma->syncp);
- usage_counters[*index]++;
- u64_stats_update_end(&ma->syncp);
+ u64_stats_update_begin(&stats->syncp);
+ stats->usage_cntrs[*index]++;
+ u64_stats_update_end(&stats->syncp);
return flow;
}
}
struct mask_array *ma = rcu_dereference_ovsl(tbl->mask_array);
u32 __always_unused n_mask_hit;
u32 __always_unused n_cache_hit;
+ struct sw_flow *flow;
u32 index = 0;
- return flow_lookup(tbl, ti, ma, key, &n_mask_hit, &n_cache_hit, &index);
+ /* This function gets called trough the netlink interface and therefore
+ * is preemptible. However, flow_lookup() function needs to be called
+ * with BH disabled due to CPU specific variables.
+ */
+ local_bh_disable();
+ flow = flow_lookup(tbl, ti, ma, key, &n_mask_hit, &n_cache_hit, &index);
+ local_bh_enable();
+ return flow;
}
struct sw_flow *ovs_flow_tbl_lookup_exact(struct flow_table *tbl,
for (i = 0; i < ma->max; i++) {
struct sw_flow_mask *mask;
- unsigned int start;
int cpu;
mask = rcu_dereference_ovsl(ma->masks[i]);
masks_and_count[i].counter = 0;
for_each_possible_cpu(cpu) {
- u64 *usage_counters = per_cpu_ptr(ma->masks_usage_cntr,
- cpu);
+ struct mask_array_stats *stats;
+ unsigned int start;
u64 counter;
+ stats = per_cpu_ptr(ma->masks_usage_stats, cpu);
do {
- start = u64_stats_fetch_begin_irq(&ma->syncp);
- counter = usage_counters[i];
- } while (u64_stats_fetch_retry_irq(&ma->syncp, start));
+ start = u64_stats_fetch_begin_irq(&stats->syncp);
+ counter = stats->usage_cntrs[i];
+ } while (u64_stats_fetch_retry_irq(&stats->syncp,
+ start));
masks_and_count[i].counter += counter;
}
u64 counter;
};
+struct mask_array_stats {
+ struct u64_stats_sync syncp;
+ u64 usage_cntrs[];
+};
+
struct mask_array {
struct rcu_head rcu;
int count, max;
- u64 __percpu *masks_usage_cntr;
+ struct mask_array_stats __percpu *masks_usage_stats;
u64 *masks_usage_zero_cntr;
- struct u64_stats_sync syncp;
struct sw_flow_mask __rcu *masks[];
};
__be16 target_dst = target.dst.u.udp.port;
if (target_src != tuple->src.u.udp.port)
- tcf_ct_add_mangle_action(action, FLOW_ACT_MANGLE_HDR_TYPE_TCP,
+ tcf_ct_add_mangle_action(action, FLOW_ACT_MANGLE_HDR_TYPE_UDP,
offsetof(struct udphdr, source),
0xFFFF, be16_to_cpu(target_src));
if (target_dst != tuple->dst.u.udp.port)
- tcf_ct_add_mangle_action(action, FLOW_ACT_MANGLE_HDR_TYPE_TCP,
+ tcf_ct_add_mangle_action(action, FLOW_ACT_MANGLE_HDR_TYPE_UDP,
offsetof(struct udphdr, dest),
0xFFFF, be16_to_cpu(target_dst));
}
metadata = __ipv6_tun_set_dst(&saddr, &daddr, tos, ttl, dst_port,
0, flags,
- key_id, 0);
+ key_id, opts_len);
} else {
NL_SET_ERR_MSG(extack, "Missing either ipv4 or ipv6 src and dst");
ret = -EINVAL;
entry->gate.num_entries = tcf_gate_num_entries(act);
err = tcf_gate_get_entries(entry, act);
if (err)
- goto err_out;
+ goto err_out_locked;
} else {
err = -EOPNOTSUPP;
goto err_out_locked;
{
struct tipc_bc_base *bb = tipc_bc_base(net);
int all_dests = tipc_link_bc_peers(bb->link);
+ int max_win = tipc_link_max_win(bb->link);
+ int min_win = tipc_link_min_win(bb->link);
int i, mtu, prim;
bb->primary_bearer = INVALID_BEARER_ID;
continue;
mtu = tipc_bearer_mtu(net, i);
- if (mtu < tipc_link_mtu(bb->link))
+ if (mtu < tipc_link_mtu(bb->link)) {
tipc_link_set_mtu(bb->link, mtu);
+ tipc_link_set_queue_limits(bb->link,
+ min_win,
+ max_win);
+ }
bb->bcast_support &= tipc_bearer_bcast_support(net, i);
if (bb->dests[i] < all_dests)
continue;
if (max_win > TIPC_MAX_LINK_WIN)
return -EINVAL;
tipc_bcast_lock(net);
- tipc_link_set_queue_limits(l, BCLINK_WIN_MIN, max_win);
+ tipc_link_set_queue_limits(l, tipc_link_min_win(l), max_win);
tipc_bcast_unlock(net);
return 0;
}
switch (proto) {
case ETH_P_8021Q:
case ETH_P_8021AD:
- bpf_tail_call_static(skb, &jmp_table, PARSE_VLAN);
+ bpf_tail_call(skb, &jmp_table, PARSE_VLAN);
break;
case ETH_P_MPLS_UC:
case ETH_P_MPLS_MC:
- bpf_tail_call_static(skb, &jmp_table, PARSE_MPLS);
+ bpf_tail_call(skb, &jmp_table, PARSE_MPLS);
break;
case ETH_P_IP:
- bpf_tail_call_static(skb, &jmp_table, PARSE_IP);
+ bpf_tail_call(skb, &jmp_table, PARSE_IP);
break;
case ETH_P_IPV6:
- bpf_tail_call_static(skb, &jmp_table, PARSE_IPV6);
+ bpf_tail_call(skb, &jmp_table, PARSE_IPV6);
break;
}
}
'struct bpf_perf_event_data',
'struct bpf_perf_event_value',
'struct bpf_pidns_info',
+ 'struct bpf_redir_neigh',
'struct bpf_sk_lookup',
'struct bpf_sock',
'struct bpf_sock_addr',
* Return
* The id is returned or 0 in case the id could not be retrieved.
*
- * long bpf_redirect_neigh(u32 ifindex, u64 flags)
+ * long bpf_redirect_neigh(u32 ifindex, struct bpf_redir_neigh *params, int plen, u64 flags)
* Description
* Redirect the packet to another net device of index *ifindex*
* and fill in L2 addresses from neighboring subsystem. This helper
* is somewhat similar to **bpf_redirect**\ (), except that it
* populates L2 addresses as well, meaning, internally, the helper
- * performs a FIB lookup based on the skb's networking header to
- * get the address of the next hop and then relies on the neighbor
- * lookup for the L2 address of the nexthop.
+ * relies on the neighbor lookup for the L2 address of the nexthop.
+ *
+ * The helper will perform a FIB lookup based on the skb's
+ * networking header to get the address of the next hop, unless
+ * this is supplied by the caller in the *params* argument. The
+ * *plen* argument indicates the len of *params* and should be set
+ * to 0 if *params* is NULL.
*
* The *flags* argument is reserved and must be 0. The helper is
* currently only supported for tc BPF program types, and enabled
__u8 dmac[6]; /* ETH_ALEN */
};
+struct bpf_redir_neigh {
+ /* network family for lookup (AF_INET, AF_INET6) */
+ __u32 nh_family;
+ /* network address of nexthop; skips fib lookup to find gateway */
+ union {
+ __be32 ipv4_nh;
+ __u32 ipv6_nh[4]; /* in6_addr; network order */
+ };
+};
+
enum bpf_task_fd_type {
BPF_FD_TYPE_RAW_TRACEPOINT, /* tp name */
BPF_FD_TYPE_TRACEPOINT, /* tp name */
/*
* Helper function to perform a tail call with a constant/immediate map slot.
*/
+#if __clang_major__ >= 8 && defined(__bpf__)
static __always_inline void
bpf_tail_call_static(void *ctx, const void *map, const __u32 slot)
{
:: [ctx]"r"(ctx), [map]"r"(map), [slot]"i"(slot)
: "r0", "r1", "r2", "r3", "r4", "r5");
}
+#endif
/*
* Helper structure used by eBPF C program
#include <bpf/libbpf.h>
#include <bpf/btf.h>
#include "test_ksyms_btf.skel.h"
+#include "test_ksyms_btf_null_check.skel.h"
static int duration;
-void test_ksyms_btf(void)
+static void test_basic(void)
{
__u64 runqueues_addr, bpf_prog_active_addr;
__u32 this_rq_cpu;
int this_bpf_prog_active;
struct test_ksyms_btf *skel = NULL;
struct test_ksyms_btf__data *data;
- struct btf *btf;
- int percpu_datasec;
int err;
err = kallsyms_find("runqueues", &runqueues_addr);
if (CHECK(err == -ENOENT, "ksym_find", "symbol 'bpf_prog_active' not found\n"))
return;
- btf = libbpf_find_kernel_btf();
- if (CHECK(IS_ERR(btf), "btf_exists", "failed to load kernel BTF: %ld\n",
- PTR_ERR(btf)))
- return;
-
- percpu_datasec = btf__find_by_name_kind(btf, ".data..percpu",
- BTF_KIND_DATASEC);
- if (percpu_datasec < 0) {
- printf("%s:SKIP:no PERCPU DATASEC in kernel btf\n",
- __func__);
- test__skip();
- goto cleanup;
- }
-
skel = test_ksyms_btf__open_and_load();
if (CHECK(!skel, "skel_open", "failed to open and load skeleton\n"))
goto cleanup;
data->out__bpf_prog_active);
cleanup:
- btf__free(btf);
test_ksyms_btf__destroy(skel);
}
+
+static void test_null_check(void)
+{
+ struct test_ksyms_btf_null_check *skel;
+
+ skel = test_ksyms_btf_null_check__open_and_load();
+ CHECK(skel, "skel_open", "unexpected load of a prog missing null check\n");
+
+ test_ksyms_btf_null_check__destroy(skel);
+}
+
+void test_ksyms_btf(void)
+{
+ int percpu_datasec;
+ struct btf *btf;
+
+ btf = libbpf_find_kernel_btf();
+ if (CHECK(IS_ERR(btf), "btf_exists", "failed to load kernel BTF: %ld\n",
+ PTR_ERR(btf)))
+ return;
+
+ percpu_datasec = btf__find_by_name_kind(btf, ".data..percpu",
+ BTF_KIND_DATASEC);
+ btf__free(btf);
+ if (percpu_datasec < 0) {
+ printf("%s:SKIP:no PERCPU DATASEC in kernel btf\n",
+ __func__);
+ test__skip();
+ return;
+ }
+
+ if (test__start_subtest("basic"))
+ test_basic();
+
+ if (test__start_subtest("null_check"))
+ test_null_check();
+}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+
+#include "vmlinux.h"
+
+#include <bpf/bpf_helpers.h>
+
+extern const struct rq runqueues __ksym; /* struct type global var. */
+extern const int bpf_prog_active __ksym; /* int type global var. */
+
+SEC("raw_tp/sys_enter")
+int handler(const void *ctx)
+{
+ struct rq *rq;
+ int *active;
+ __u32 cpu;
+
+ cpu = bpf_get_smp_processor_id();
+ rq = (struct rq *)bpf_per_cpu_ptr(&runqueues, cpu);
+ active = (int *)bpf_per_cpu_ptr(&bpf_prog_active, cpu);
+ if (active) {
+ /* READ_ONCE */
+ *(volatile int *)active;
+ /* !rq has not been tested, so verifier should reject. */
+ *(volatile int *)(&rq->cpu);
+ }
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
// SPDX-License-Identifier: GPL-2.0
+#include <stddef.h>
#include <stdint.h>
#include <stdbool.h>
if (bpf_skb_store_bytes(skb, 0, &zero, sizeof(zero), 0) < 0)
return TC_ACT_SHOT;
- return bpf_redirect_neigh(get_dev_ifindex(dev_src), 0);
+ return bpf_redirect_neigh(get_dev_ifindex(dev_src), NULL, 0, 0);
}
SEC("src_ingress") int tc_src(struct __sk_buff *skb)
if (bpf_skb_store_bytes(skb, 0, &zero, sizeof(zero), 0) < 0)
return TC_ACT_SHOT;
- return bpf_redirect_neigh(get_dev_ifindex(dev_dst), 0);
+ return bpf_redirect_neigh(get_dev_ifindex(dev_dst), NULL, 0, 0);
}
char __license[] SEC("license") = "GPL";
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+#include <stdint.h>
+#include <stdbool.h>
+#include <stddef.h>
+
+#include <linux/bpf.h>
+#include <linux/stddef.h>
+#include <linux/pkt_cls.h>
+#include <linux/if_ether.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+#ifndef ctx_ptr
+# define ctx_ptr(field) (void *)(long)(field)
+#endif
+
+#define AF_INET 2
+#define AF_INET6 10
+
+static __always_inline int fill_fib_params_v4(struct __sk_buff *skb,
+ struct bpf_fib_lookup *fib_params)
+{
+ void *data_end = ctx_ptr(skb->data_end);
+ void *data = ctx_ptr(skb->data);
+ struct iphdr *ip4h;
+
+ if (data + sizeof(struct ethhdr) > data_end)
+ return -1;
+
+ ip4h = (struct iphdr *)(data + sizeof(struct ethhdr));
+ if ((void *)(ip4h + 1) > data_end)
+ return -1;
+
+ fib_params->family = AF_INET;
+ fib_params->tos = ip4h->tos;
+ fib_params->l4_protocol = ip4h->protocol;
+ fib_params->sport = 0;
+ fib_params->dport = 0;
+ fib_params->tot_len = bpf_ntohs(ip4h->tot_len);
+ fib_params->ipv4_src = ip4h->saddr;
+ fib_params->ipv4_dst = ip4h->daddr;
+
+ return 0;
+}
+
+static __always_inline int fill_fib_params_v6(struct __sk_buff *skb,
+ struct bpf_fib_lookup *fib_params)
+{
+ struct in6_addr *src = (struct in6_addr *)fib_params->ipv6_src;
+ struct in6_addr *dst = (struct in6_addr *)fib_params->ipv6_dst;
+ void *data_end = ctx_ptr(skb->data_end);
+ void *data = ctx_ptr(skb->data);
+ struct ipv6hdr *ip6h;
+
+ if (data + sizeof(struct ethhdr) > data_end)
+ return -1;
+
+ ip6h = (struct ipv6hdr *)(data + sizeof(struct ethhdr));
+ if ((void *)(ip6h + 1) > data_end)
+ return -1;
+
+ fib_params->family = AF_INET6;
+ fib_params->flowinfo = 0;
+ fib_params->l4_protocol = ip6h->nexthdr;
+ fib_params->sport = 0;
+ fib_params->dport = 0;
+ fib_params->tot_len = bpf_ntohs(ip6h->payload_len);
+ *src = ip6h->saddr;
+ *dst = ip6h->daddr;
+
+ return 0;
+}
+
+SEC("chk_egress") int tc_chk(struct __sk_buff *skb)
+{
+ void *data_end = ctx_ptr(skb->data_end);
+ void *data = ctx_ptr(skb->data);
+ __u32 *raw = data;
+
+ if (data + sizeof(struct ethhdr) > data_end)
+ return TC_ACT_SHOT;
+
+ return !raw[0] && !raw[1] && !raw[2] ? TC_ACT_SHOT : TC_ACT_OK;
+}
+
+static __always_inline int tc_redir(struct __sk_buff *skb)
+{
+ struct bpf_fib_lookup fib_params = { .ifindex = skb->ingress_ifindex };
+ __u8 zero[ETH_ALEN * 2];
+ int ret = -1;
+
+ switch (skb->protocol) {
+ case __bpf_constant_htons(ETH_P_IP):
+ ret = fill_fib_params_v4(skb, &fib_params);
+ break;
+ case __bpf_constant_htons(ETH_P_IPV6):
+ ret = fill_fib_params_v6(skb, &fib_params);
+ break;
+ }
+
+ if (ret)
+ return TC_ACT_OK;
+
+ ret = bpf_fib_lookup(skb, &fib_params, sizeof(fib_params), 0);
+ if (ret == BPF_FIB_LKUP_RET_NOT_FWDED || ret < 0)
+ return TC_ACT_OK;
+
+ __builtin_memset(&zero, 0, sizeof(zero));
+ if (bpf_skb_store_bytes(skb, 0, &zero, sizeof(zero), 0) < 0)
+ return TC_ACT_SHOT;
+
+ if (ret == BPF_FIB_LKUP_RET_NO_NEIGH) {
+ struct bpf_redir_neigh nh_params = {};
+
+ nh_params.nh_family = fib_params.family;
+ __builtin_memcpy(&nh_params.ipv6_nh, &fib_params.ipv6_dst,
+ sizeof(nh_params.ipv6_nh));
+
+ return bpf_redirect_neigh(fib_params.ifindex, &nh_params,
+ sizeof(nh_params), 0);
+
+ } else if (ret == BPF_FIB_LKUP_RET_SUCCESS) {
+ void *data_end = ctx_ptr(skb->data_end);
+ struct ethhdr *eth = ctx_ptr(skb->data);
+
+ if (eth + 1 > data_end)
+ return TC_ACT_SHOT;
+
+ __builtin_memcpy(eth->h_dest, fib_params.dmac, ETH_ALEN);
+ __builtin_memcpy(eth->h_source, fib_params.smac, ETH_ALEN);
+
+ return bpf_redirect(fib_params.ifindex, 0);
+ }
+
+ return TC_ACT_SHOT;
+}
+
+/* these are identical, but keep them separate for compatibility with the
+ * section names expected by test_tc_redirect.sh
+ */
+SEC("dst_ingress") int tc_dst(struct __sk_buff *skb)
+{
+ return tc_redir(skb);
+}
+
+SEC("src_ingress") int tc_src(struct __sk_buff *skb)
+{
+ return tc_redir(skb);
+}
+
+char __license[] SEC("license") = "GPL";
{ echo >&2 "timeout is not available"; exit 1; }
command -v ping >/dev/null 2>&1 || \
{ echo >&2 "ping is not available"; exit 1; }
-command -v ping6 >/dev/null 2>&1 || \
- { echo >&2 "ping6 is not available"; exit 1; }
+if command -v ping6 >/dev/null 2>&1; then PING6=ping6; else PING6=ping; fi
command -v perl >/dev/null 2>&1 || \
{ echo >&2 "perl is not available"; exit 1; }
command -v jq >/dev/null 2>&1 || \
echo -e "${TEST}: ${GREEN}PASS${NC}"
TEST="ICMPv6 connectivity test"
- ip netns exec ${NS_SRC} ping6 $PING_ARG ${IP6_DST}
+ ip netns exec ${NS_SRC} $PING6 $PING_ARG ${IP6_DST}
if [ $? -ne 0 ]; then
echo -e "${TEST}: ${RED}FAIL${NC}"
exit 1
netns_setup_bpf()
{
local obj=$1
+ local use_forwarding=${2:-0}
ip netns exec ${NS_FWD} tc qdisc add dev veth_src_fwd clsact
ip netns exec ${NS_FWD} tc filter add dev veth_src_fwd ingress bpf da obj $obj sec src_ingress
ip netns exec ${NS_FWD} tc filter add dev veth_dst_fwd ingress bpf da obj $obj sec dst_ingress
ip netns exec ${NS_FWD} tc filter add dev veth_dst_fwd egress bpf da obj $obj sec chk_egress
+ if [ "$use_forwarding" -eq "1" ]; then
+ # bpf_fib_lookup() checks if forwarding is enabled
+ ip netns exec ${NS_FWD} sysctl -w net.ipv4.ip_forward=1
+ ip netns exec ${NS_FWD} sysctl -w net.ipv6.conf.veth_dst_fwd.forwarding=1
+ ip netns exec ${NS_FWD} sysctl -w net.ipv6.conf.veth_src_fwd.forwarding=1
+ return 0
+ fi
+
veth_src=$(ip netns exec ${NS_FWD} cat /sys/class/net/veth_src_fwd/ifindex)
veth_dst=$(ip netns exec ${NS_FWD} cat /sys/class/net/veth_dst_fwd/ifindex)
netns_test_connectivity
netns_cleanup
netns_setup
+netns_setup_bpf test_tc_neigh_fib.o 1
+netns_test_connectivity
+netns_cleanup
+netns_setup
netns_setup_bpf test_tc_peer.o
netns_test_connectivity
.prog_type = BPF_PROG_TYPE_SK_REUSEPORT,
.result = ACCEPT,
},
+{
+ "mark null check on return value of bpf_skc_to helpers",
+ .insns = {
+ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_EMIT_CALL(BPF_FUNC_skc_to_tcp_sock),
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_EMIT_CALL(BPF_FUNC_skc_to_tcp_request_sock),
+ BPF_MOV64_REG(BPF_REG_8, BPF_REG_0),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_8, 0, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_7, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = REJECT,
+ .errstr = "invalid mem access",
+ .result_unpriv = REJECT,
+ .errstr_unpriv = "unknown func",
+},
CONFIG_TRACEPOINTS=y
CONFIG_NET_DROP_MONITOR=m
CONFIG_NETDEVSIM=m
+CONFIG_NET_FOU=m
bridge fdb add 00:00:5e:00:01:01 dev br1 self local vlan 10
bridge fdb add 00:00:5e:00:01:01 dev br1 self local vlan 20
+
+ sysctl_set net.ipv4.conf.all.rp_filter 0
+ sysctl_set net.ipv4.conf.vlan10-v.rp_filter 0
+ sysctl_set net.ipv4.conf.vlan20-v.rp_filter 0
}
switch_destroy()
{
+ sysctl_restore net.ipv4.conf.all.rp_filter
+
bridge fdb del 00:00:5e:00:01:01 dev br1 self local vlan 20
bridge fdb del 00:00:5e:00:01:01 dev br1 self local vlan 10
bridge fdb add 00:00:5e:00:01:01 dev br1 self local vlan 10
bridge fdb add 00:00:5e:00:01:01 dev br1 self local vlan 20
+
+ sysctl_set net.ipv4.conf.all.rp_filter 0
+ sysctl_set net.ipv4.conf.vlan10-v.rp_filter 0
+ sysctl_set net.ipv4.conf.vlan20-v.rp_filter 0
}
export -f ns_switch_create
bridge fdb add 00:00:5e:00:01:01 dev br1 self local vlan 10
bridge fdb add 00:00:5e:00:01:01 dev br1 self local vlan 20
+
+ sysctl_set net.ipv4.conf.all.rp_filter 0
+ sysctl_set net.ipv4.conf.vlan10-v.rp_filter 0
+ sysctl_set net.ipv4.conf.vlan20-v.rp_filter 0
}
switch_destroy()
{
+ sysctl_restore net.ipv4.conf.all.rp_filter
+
bridge fdb del 00:00:5e:00:01:01 dev br1 self local vlan 20
bridge fdb del 00:00:5e:00:01:01 dev br1 self local vlan 10
bridge fdb add 00:00:5e:00:01:01 dev br1 self local vlan 10
bridge fdb add 00:00:5e:00:01:01 dev br1 self local vlan 20
+
+ sysctl_set net.ipv4.conf.all.rp_filter 0
+ sysctl_set net.ipv4.conf.vlan10-v.rp_filter 0
+ sysctl_set net.ipv4.conf.vlan20-v.rp_filter 0
}
export -f ns_switch_create
CONFIG_MPTCP=y
+CONFIG_IPV6=y
CONFIG_MPTCP_IPV6=y
CONFIG_INET_DIAG=m
CONFIG_INET_MPTCP_DIAG=m
return $ksft_skip
fi
+ if ! /sbin/modprobe -q -n fou; then
+ echo "SKIP: module fou is not found"
+ return $ksft_skip
+ fi
+ /sbin/modprobe -q fou
ip -netns "$testns" fou add port 7777 ipproto 47 2>/dev/null
if [ $? -ne 0 ];then
echo "FAIL: can't add fou port 7777, skipping test"