Merge tag 'mac80211-next-for-davem-2018-03-02' of git://git.kernel.org/pub/scm/linux...
authorDavid S. Miller <davem@davemloft.net>
Fri, 2 Mar 2018 14:50:21 +0000 (09:50 -0500)
committerDavid S. Miller <davem@davemloft.net>
Fri, 2 Mar 2018 14:50:21 +0000 (09:50 -0500)
Johannes Berg says:

====================
Only a few new things:
 * hwsim net namespace stuff from Kirill Tkhai
 * A-MSDU support in fast-RX
 * 4-addr mode support in fast-RX
 * support for a spec quirk in Add-BA negotiation
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
273 files changed:
Documentation/devicetree/bindings/net/ieee802154/mcr20a.txt [new file with mode: 0644]
Documentation/devicetree/bindings/net/sff,sfp.txt
MAINTAINERS
arch/arm/boot/dts/armada-370-rd.dts
arch/m68k/mac/config.c
arch/x86/net/bpf_jit_comp.c
drivers/infiniband/core/cma.c
drivers/infiniband/hw/mlx5/Makefile
drivers/infiniband/hw/mlx5/ib_rep.c [new file with mode: 0644]
drivers/infiniband/hw/mlx5/ib_rep.h [new file with mode: 0644]
drivers/infiniband/hw/mlx5/main.c
drivers/infiniband/hw/mlx5/mlx5_ib.h
drivers/infiniband/hw/mlx5/mr.c
drivers/infiniband/hw/mlx5/qp.c
drivers/net/Kconfig
drivers/net/Space.c
drivers/net/bonding/bond_main.c
drivers/net/dsa/mv88e6xxx/chip.c
drivers/net/dsa/mv88e6xxx/chip.h
drivers/net/ethernet/apple/macmace.c
drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c
drivers/net/ethernet/cirrus/mac89x0.c
drivers/net/ethernet/emulex/benet/be_cmds.c
drivers/net/ethernet/emulex/benet/be_cmds.h
drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
drivers/net/ethernet/freescale/fman/fman_dtsec.c
drivers/net/ethernet/freescale/fman/fman_dtsec.h
drivers/net/ethernet/freescale/fman/fman_memac.c
drivers/net/ethernet/freescale/fman/fman_memac.h
drivers/net/ethernet/freescale/fman/fman_tgec.c
drivers/net/ethernet/freescale/fman/fman_tgec.h
drivers/net/ethernet/freescale/fman/mac.c
drivers/net/ethernet/freescale/fman/mac.h
drivers/net/ethernet/ibm/ibmvnic.c
drivers/net/ethernet/intel/fm10k/fm10k_common.c
drivers/net/ethernet/intel/fm10k/fm10k_main.c
drivers/net/ethernet/intel/fm10k/fm10k_netdev.c
drivers/net/ethernet/intel/fm10k/fm10k_pci.c
drivers/net/ethernet/intel/fm10k/fm10k_pf.c
drivers/net/ethernet/intel/fm10k/fm10k_tlv.c
drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h
drivers/net/ethernet/intel/i40e/i40e_ethtool.c
drivers/net/ethernet/intel/i40e/i40e_main.c
drivers/net/ethernet/intel/i40e/i40e_txrx.c
drivers/net/ethernet/intel/i40e/i40e_txrx.h
drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
drivers/net/ethernet/intel/i40evf/i40e_txrx.c
drivers/net/ethernet/intel/i40evf/i40evf_main.c
drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c
drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c
drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
drivers/net/ethernet/intel/ixgbevf/ethtool.c
drivers/net/ethernet/intel/ixgbevf/ixgbevf.h
drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
drivers/net/ethernet/marvell/mvpp2.c
drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
drivers/net/ethernet/mellanox/mlx4/en_netdev.c
drivers/net/ethernet/mellanox/mlx4/en_port.c
drivers/net/ethernet/mellanox/mlx4/en_rx.c
drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
drivers/net/ethernet/mellanox/mlx4/mlx4_stats.h
drivers/net/ethernet/mellanox/mlx5/core/dev.c
drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
drivers/net/ethernet/mellanox/mlxsw/Kconfig
drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c
drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h
drivers/net/ethernet/mellanox/mlxsw/reg.h
drivers/net/ethernet/mellanox/mlxsw/spectrum.c
drivers/net/ethernet/mellanox/mlxsw/spectrum.h
drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c
drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.c
drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c
drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h
drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c
drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c
drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c
drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c
drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h
drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
drivers/net/ethernet/natsemi/jazzsonic.c
drivers/net/ethernet/natsemi/macsonic.c
drivers/net/ethernet/natsemi/sonic.c
drivers/net/ethernet/natsemi/sonic.h
drivers/net/ethernet/natsemi/xtsonic.c
drivers/net/ethernet/qlogic/qed/qed_iwarp.c
drivers/net/ethernet/realtek/r8169.c
drivers/net/ethernet/renesas/sh_eth.c
drivers/net/ethernet/renesas/sh_eth.h
drivers/net/ethernet/sfc/falcon/enum.h
drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c
drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
drivers/net/geneve.c
drivers/net/gtp.c
drivers/net/ieee802154/Kconfig
drivers/net/ieee802154/Makefile
drivers/net/ieee802154/mcr20a.c [new file with mode: 0644]
drivers/net/ieee802154/mcr20a.h [new file with mode: 0644]
drivers/net/ipvlan/ipvlan.h
drivers/net/ipvlan/ipvlan_core.c
drivers/net/ipvlan/ipvlan_main.c
drivers/net/phy/aquantia.c
drivers/net/phy/cortina.c
drivers/net/phy/marvell10g.c
drivers/net/phy/phy-c45.c
drivers/net/phy/phylink.c
drivers/net/phy/sfp-bus.c
drivers/net/phy/sfp.c
drivers/net/phy/teranetics.c
drivers/net/ppp/ppp_generic.c
drivers/net/ppp/pppoe.c
drivers/net/team/team.c
drivers/net/vrf.c
drivers/net/vxlan.c
drivers/net/xen-netback/rx.c
fs/lockd/svc.c
fs/nfs/inode.c
fs/nfs_common/grace.c
include/linux/bpf-cgroup.h
include/linux/filter.h
include/linux/mlx5/driver.h
include/linux/mlx5/eswitch.h [new file with mode: 0644]
include/linux/mroute.h
include/linux/mroute6.h
include/linux/mroute_base.h [new file with mode: 0644]
include/linux/phy.h
include/linux/sfp.h
include/net/Space.h
include/net/ethoc.h
include/net/fib_rules.h
include/net/flow.h
include/net/gre.h
include/net/inet_connection_sock.h
include/net/ip.h
include/net/ip6_fib.h
include/net/ip6_route.h
include/net/ip_fib.h
include/net/ip_tunnels.h
include/net/ipv6.h
include/net/netns/ipv4.h
include/net/netns/ipv6.h
include/net/pkt_cls.h
include/net/sch_generic.h
include/net/tcp.h
include/net/xfrm.h
include/uapi/linux/errqueue.h
include/uapi/linux/fib_rules.h
include/uapi/linux/rds.h
kernel/bpf/verifier.c
net/8021q/vlan.c
net/bridge/br.c
net/bridge/br_netfilter_hooks.c
net/can/bcm.c
net/core/dev.c
net/core/fib_rules.c
net/core/net_namespace.c
net/core/skbuff.c
net/ieee802154/6lowpan/core.c
net/ieee802154/core.c
net/ipv4/Kconfig
net/ipv4/Makefile
net/ipv4/fib_rules.c
net/ipv4/fib_semantics.c
net/ipv4/fib_trie.c
net/ipv4/inetpeer.c
net/ipv4/ip_gre.c
net/ipv4/ip_tunnel.c
net/ipv4/ip_vti.c
net/ipv4/ipip.c
net/ipv4/ipmr.c
net/ipv4/ipmr_base.c [new file with mode: 0644]
net/ipv4/netfilter/ipt_CLUSTERIP.c
net/ipv4/netfilter/nf_defrag_ipv4.c
net/ipv4/proc.c
net/ipv4/route.c
net/ipv4/tcp_bbr.c
net/ipv4/tcp_output.c
net/ipv4/tunnel4.c
net/ipv4/xfrm4_policy.c
net/ipv6/Kconfig
net/ipv6/addrconf.c
net/ipv6/anycast.c
net/ipv6/exthdrs_core.c
net/ipv6/fib6_rules.c
net/ipv6/icmp.c
net/ipv6/ila/ila_xlat.c
net/ipv6/ip6_gre.c
net/ipv6/ip6_output.c
net/ipv6/ip6_tunnel.c
net/ipv6/ip6_vti.c
net/ipv6/ip6mr.c
net/ipv6/ipv6_sockglue.c
net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
net/ipv6/proc.c
net/ipv6/route.c
net/ipv6/sit.c
net/ipv6/xfrm6_state.c
net/ipv6/xfrm6_tunnel.c
net/kcm/kcmproc.c
net/kcm/kcmsock.c
net/key/af_key.c
net/l2tp/l2tp_ppp.c
net/netfilter/ipvs/ip_vs_lblc.c
net/netfilter/ipvs/ip_vs_lblcr.c
net/netfilter/nf_synproxy_core.c
net/netfilter/xt_hashlimit.c
net/netfilter/xt_recent.c
net/phonet/pn_dev.c
net/rds/af_rds.c
net/rds/message.c
net/rds/rds.h
net/rds/recv.c
net/sched/act_bpf.c
net/sched/act_connmark.c
net/sched/act_csum.c
net/sched/act_gact.c
net/sched/act_ife.c
net/sched/act_ipt.c
net/sched/act_mirred.c
net/sched/act_nat.c
net/sched/act_pedit.c
net/sched/act_police.c
net/sched/act_sample.c
net/sched/act_simple.c
net/sched/act_skbedit.c
net/sched/act_skbmod.c
net/sched/act_tunnel_key.c
net/sched/act_vlan.c
net/sched/cls_api.c
net/sched/sch_api.c
net/sched/sch_prio.c
net/smc/af_smc.c
net/smc/smc.h
net/smc/smc_clc.c
net/smc/smc_clc.h
net/smc/smc_core.c
net/smc/smc_core.h
net/smc/smc_llc.c
net/smc/smc_llc.h
net/socket.c
samples/bpf/Makefile
samples/bpf/cpustat_kern.c [new file with mode: 0644]
samples/bpf/cpustat_user.c [new file with mode: 0644]
samples/bpf/xdp_redirect_user.c
samples/sockmap/Makefile
samples/sockmap/sockmap_user.c
security/selinux/hooks.c
security/smack/smack_netfilter.c
tools/testing/selftests/bpf/Makefile
tools/testing/selftests/bpf/test_tcpbpf_user.c
tools/testing/selftests/bpf/test_verifier.c
tools/testing/selftests/net/Makefile
tools/testing/selftests/net/forwarding/.gitignore [new file with mode: 0644]
tools/testing/selftests/net/forwarding/README [new file with mode: 0644]
tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh [new file with mode: 0755]
tools/testing/selftests/net/forwarding/config [new file with mode: 0644]
tools/testing/selftests/net/forwarding/forwarding.config.sample [new file with mode: 0644]
tools/testing/selftests/net/forwarding/lib.sh [new file with mode: 0644]
tools/testing/selftests/net/forwarding/router.sh [new file with mode: 0755]
tools/testing/selftests/net/forwarding/router_multipath.sh [new file with mode: 0755]
tools/testing/selftests/net/forwarding/tc_actions.sh [new file with mode: 0755]
tools/testing/selftests/net/forwarding/tc_chains.sh [new file with mode: 0755]
tools/testing/selftests/net/forwarding/tc_common.sh [new file with mode: 0644]
tools/testing/selftests/net/forwarding/tc_flower.sh [new file with mode: 0755]
tools/testing/selftests/net/forwarding/tc_shblocks.sh [new file with mode: 0755]
tools/testing/selftests/net/msg_zerocopy.c
tools/testing/selftests/tc-testing/tdc.py
tools/testing/selftests/tc-testing/tdc_batch.py

diff --git a/Documentation/devicetree/bindings/net/ieee802154/mcr20a.txt b/Documentation/devicetree/bindings/net/ieee802154/mcr20a.txt
new file mode 100644 (file)
index 0000000..2aaef56
--- /dev/null
@@ -0,0 +1,23 @@
+* MCR20A IEEE 802.15.4 *
+
+Required properties:
+  - compatible:                should be "nxp,mcr20a"
+  - spi-max-frequency: maximal bus speed, should be set to a frequency
+                       lower than 9000000 depends sync or async operation mode
+  - reg:               the chipselect index
+  - interrupts:                the interrupt generated by the device. Non high-level
+                       can occur deadlocks while handling isr.
+
+Optional properties:
+  - rst_b-gpio:                GPIO spec for the RST_B pin
+
+Example:
+
+       mcr20a@0 {
+               compatible = "nxp,mcr20a";
+               spi-max-frequency = <9000000>;
+               reg = <0>;
+               interrupts = <17 2>;
+               interrupt-parent = <&gpio>;
+               rst_b-gpio = <&gpio 27 1>
+       };
index f1c441b..929591d 100644 (file)
@@ -33,6 +33,10 @@ Optional Properties:
   Select (AKA RS1) output gpio signal (SFP+ only), low: low Tx rate, high:
   high Tx rate. Must not be present for SFF modules
 
+- maximum-power-milliwatt : Maximum module power consumption
+  Specifies the maximum power consumption allowable by a module in the
+  slot, in milli-Watts.  Presently, modules can be up to 1W, 1.5W or 2W.
+
 Example #1: Direct serdes to SFP connection
 
 sfp_eth3: sfp-eth3 {
@@ -40,6 +44,7 @@ sfp_eth3: sfp-eth3 {
        i2c-bus = <&sfp_1g_i2c>;
        los-gpios = <&cpm_gpio2 22 GPIO_ACTIVE_HIGH>;
        mod-def0-gpios = <&cpm_gpio2 21 GPIO_ACTIVE_LOW>;
+       maximum-power-milliwatt = <1000>;
        pinctrl-names = "default";
        pinctrl-0 = <&cpm_sfp_1g_pins &cps_sfp_1g_pins>;
        tx-disable-gpios = <&cps_gpio1 24 GPIO_ACTIVE_HIGH>;
index 93a12af..e0b3900 100644 (file)
@@ -8592,6 +8592,15 @@ S:       Maintained
 F:     Documentation/ABI/testing/sysfs-bus-iio-potentiometer-mcp4531
 F:     drivers/iio/potentiometer/mcp4531.c
 
+MCR20A IEEE-802.15.4 RADIO DRIVER
+M:     Xue Liu <liuxuenetmail@gmail.com>
+L:     linux-wpan@vger.kernel.org
+W:     https://github.com/xueliu/mcr20a-linux
+S:     Maintained
+F:     drivers/net/ieee802154/mcr20a.c
+F:     drivers/net/ieee802154/mcr20a.h
+F:     Documentation/devicetree/bindings/net/ieee802154/mcr20a.txt
+
 MEASUREMENT COMPUTING CIO-DAC IIO DRIVER
 M:     William Breathitt Gray <vilhelm.gray@gmail.com>
 L:     linux-iio@vger.kernel.org
index 8b2fa9a..c28afb2 100644 (file)
@@ -56,6 +56,7 @@
 
 /dts-v1/;
 #include <dt-bindings/input/input.h>
+#include <dt-bindings/interrupt-controller/irq.h>
 #include <dt-bindings/gpio/gpio.h>
 #include "armada-370.dtsi"
 
                #address-cells = <1>;
                #size-cells = <0>;
                reg = <0x10>;
+               interrupt-controller;
+               #interrupt-cells = <2>;
 
                ports {
                        #address-cells = <1>;
                                };
                        };
                };
+
+               mdio {
+                       #address-cells = <1>;
+                       #size-cells = <0>;
+
+                       switchphy0: switchphy@0 {
+                               reg = <0>;
+                               interrupt-parent = <&switch>;
+                               interrupts = <0 IRQ_TYPE_LEVEL_HIGH>;
+                       };
+
+                       switchphy1: switchphy@1 {
+                               reg = <1>;
+                               interrupt-parent = <&switch>;
+                               interrupts = <1 IRQ_TYPE_LEVEL_HIGH>;
+                       };
+
+                       switchphy2: switchphy@2 {
+                               reg = <2>;
+                               interrupt-parent = <&switch>;
+                               interrupts = <2 IRQ_TYPE_LEVEL_HIGH>;
+                       };
+
+                       switchphy3: switchphy@3 {
+                               reg = <3>;
+                               interrupt-parent = <&switch>;
+                               interrupts = <3 IRQ_TYPE_LEVEL_HIGH>;
+                       };
+               };
        };
 };
 
index d3d4352..c73eb82 100644 (file)
@@ -1088,6 +1088,10 @@ int __init mac_platform_init(void)
            macintosh_config->expansion_type == MAC_EXP_PDS_COMM)
                platform_device_register_simple("macsonic", -1, NULL, 0);
 
+       if (macintosh_config->expansion_type == MAC_EXP_PDS ||
+           macintosh_config->expansion_type == MAC_EXP_PDS_COMM)
+               platform_device_register_simple("mac89x0", -1, NULL, 0);
+
        if (macintosh_config->ether_type == MAC_ETHER_MACE)
                platform_device_register_simple("macmace", -1, NULL, 0);
 
index 45e4eb5..cbf94d4 100644 (file)
@@ -61,7 +61,12 @@ static bool is_imm8(int value)
 
 static bool is_simm32(s64 value)
 {
-       return value == (s64) (s32) value;
+       return value == (s64)(s32)value;
+}
+
+static bool is_uimm32(u64 value)
+{
+       return value == (u64)(u32)value;
 }
 
 /* mov dst, src */
@@ -212,7 +217,7 @@ struct jit_context {
 /* emit x64 prologue code for BPF program and check it's size.
  * bpf_tail_call helper will skip it while jumping into another program
  */
-static void emit_prologue(u8 **pprog, u32 stack_depth)
+static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf)
 {
        u8 *prog = *pprog;
        int cnt = 0;
@@ -247,18 +252,21 @@ static void emit_prologue(u8 **pprog, u32 stack_depth)
        /* mov qword ptr [rbp+24],r15 */
        EMIT4(0x4C, 0x89, 0x7D, 24);
 
-       /* Clear the tail call counter (tail_call_cnt): for eBPF tail calls
-        * we need to reset the counter to 0. It's done in two instructions,
-        * resetting rax register to 0 (xor on eax gets 0 extended), and
-        * moving it to the counter location.
-        */
+       if (!ebpf_from_cbpf) {
+               /* Clear the tail call counter (tail_call_cnt): for eBPF tail
+                * calls we need to reset the counter to 0. It's done in two
+                * instructions, resetting rax register to 0, and moving it
+                * to the counter location.
+                */
+
+               /* xor eax, eax */
+               EMIT2(0x31, 0xc0);
+               /* mov qword ptr [rbp+32], rax */
+               EMIT4(0x48, 0x89, 0x45, 32);
 
-       /* xor eax, eax */
-       EMIT2(0x31, 0xc0);
-       /* mov qword ptr [rbp+32], rax */
-       EMIT4(0x48, 0x89, 0x45, 32);
+               BUILD_BUG_ON(cnt != PROLOGUE_SIZE);
+       }
 
-       BUILD_BUG_ON(cnt != PROLOGUE_SIZE);
        *pprog = prog;
 }
 
@@ -356,6 +364,86 @@ static void emit_load_skb_data_hlen(u8 **pprog)
        *pprog = prog;
 }
 
+static void emit_mov_imm32(u8 **pprog, bool sign_propagate,
+                          u32 dst_reg, const u32 imm32)
+{
+       u8 *prog = *pprog;
+       u8 b1, b2, b3;
+       int cnt = 0;
+
+       /* optimization: if imm32 is positive, use 'mov %eax, imm32'
+        * (which zero-extends imm32) to save 2 bytes.
+        */
+       if (sign_propagate && (s32)imm32 < 0) {
+               /* 'mov %rax, imm32' sign extends imm32 */
+               b1 = add_1mod(0x48, dst_reg);
+               b2 = 0xC7;
+               b3 = 0xC0;
+               EMIT3_off32(b1, b2, add_1reg(b3, dst_reg), imm32);
+               goto done;
+       }
+
+       /* optimization: if imm32 is zero, use 'xor %eax, %eax'
+        * to save 3 bytes.
+        */
+       if (imm32 == 0) {
+               if (is_ereg(dst_reg))
+                       EMIT1(add_2mod(0x40, dst_reg, dst_reg));
+               b2 = 0x31; /* xor */
+               b3 = 0xC0;
+               EMIT2(b2, add_2reg(b3, dst_reg, dst_reg));
+               goto done;
+       }
+
+       /* mov %eax, imm32 */
+       if (is_ereg(dst_reg))
+               EMIT1(add_1mod(0x40, dst_reg));
+       EMIT1_off32(add_1reg(0xB8, dst_reg), imm32);
+done:
+       *pprog = prog;
+}
+
+static void emit_mov_imm64(u8 **pprog, u32 dst_reg,
+                          const u32 imm32_hi, const u32 imm32_lo)
+{
+       u8 *prog = *pprog;
+       int cnt = 0;
+
+       if (is_uimm32(((u64)imm32_hi << 32) | (u32)imm32_lo)) {
+               /* For emitting plain u32, where sign bit must not be
+                * propagated LLVM tends to load imm64 over mov32
+                * directly, so save couple of bytes by just doing
+                * 'mov %eax, imm32' instead.
+                */
+               emit_mov_imm32(&prog, false, dst_reg, imm32_lo);
+       } else {
+               /* movabsq %rax, imm64 */
+               EMIT2(add_1mod(0x48, dst_reg), add_1reg(0xB8, dst_reg));
+               EMIT(imm32_lo, 4);
+               EMIT(imm32_hi, 4);
+       }
+
+       *pprog = prog;
+}
+
+static void emit_mov_reg(u8 **pprog, bool is64, u32 dst_reg, u32 src_reg)
+{
+       u8 *prog = *pprog;
+       int cnt = 0;
+
+       if (is64) {
+               /* mov dst, src */
+               EMIT_mov(dst_reg, src_reg);
+       } else {
+               /* mov32 dst, src */
+               if (is_ereg(dst_reg) || is_ereg(src_reg))
+                       EMIT1(add_2mod(0x40, dst_reg, src_reg));
+               EMIT2(0x89, add_2reg(0xC0, dst_reg, src_reg));
+       }
+
+       *pprog = prog;
+}
+
 static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
                  int oldproglen, struct jit_context *ctx)
 {
@@ -369,7 +457,8 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
        int proglen = 0;
        u8 *prog = temp;
 
-       emit_prologue(&prog, bpf_prog->aux->stack_depth);
+       emit_prologue(&prog, bpf_prog->aux->stack_depth,
+                     bpf_prog_was_classic(bpf_prog));
 
        if (seen_ld_abs)
                emit_load_skb_data_hlen(&prog);
@@ -378,7 +467,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
                const s32 imm32 = insn->imm;
                u32 dst_reg = insn->dst_reg;
                u32 src_reg = insn->src_reg;
-               u8 b1 = 0, b2 = 0, b3 = 0;
+               u8 b2 = 0, b3 = 0;
                s64 jmp_offset;
                u8 jmp_cond;
                bool reload_skb_data;
@@ -414,16 +503,11 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
                        EMIT2(b2, add_2reg(0xC0, dst_reg, src_reg));
                        break;
 
-                       /* mov dst, src */
                case BPF_ALU64 | BPF_MOV | BPF_X:
-                       EMIT_mov(dst_reg, src_reg);
-                       break;
-
-                       /* mov32 dst, src */
                case BPF_ALU | BPF_MOV | BPF_X:
-                       if (is_ereg(dst_reg) || is_ereg(src_reg))
-                               EMIT1(add_2mod(0x40, dst_reg, src_reg));
-                       EMIT2(0x89, add_2reg(0xC0, dst_reg, src_reg));
+                       emit_mov_reg(&prog,
+                                    BPF_CLASS(insn->code) == BPF_ALU64,
+                                    dst_reg, src_reg);
                        break;
 
                        /* neg dst */
@@ -486,58 +570,13 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
                        break;
 
                case BPF_ALU64 | BPF_MOV | BPF_K:
-                       /* optimization: if imm32 is positive,
-                        * use 'mov eax, imm32' (which zero-extends imm32)
-                        * to save 2 bytes
-                        */
-                       if (imm32 < 0) {
-                               /* 'mov rax, imm32' sign extends imm32 */
-                               b1 = add_1mod(0x48, dst_reg);
-                               b2 = 0xC7;
-                               b3 = 0xC0;
-                               EMIT3_off32(b1, b2, add_1reg(b3, dst_reg), imm32);
-                               break;
-                       }
-
                case BPF_ALU | BPF_MOV | BPF_K:
-                       /* optimization: if imm32 is zero, use 'xor <dst>,<dst>'
-                        * to save 3 bytes.
-                        */
-                       if (imm32 == 0) {
-                               if (is_ereg(dst_reg))
-                                       EMIT1(add_2mod(0x40, dst_reg, dst_reg));
-                               b2 = 0x31; /* xor */
-                               b3 = 0xC0;
-                               EMIT2(b2, add_2reg(b3, dst_reg, dst_reg));
-                               break;
-                       }
-
-                       /* mov %eax, imm32 */
-                       if (is_ereg(dst_reg))
-                               EMIT1(add_1mod(0x40, dst_reg));
-                       EMIT1_off32(add_1reg(0xB8, dst_reg), imm32);
+                       emit_mov_imm32(&prog, BPF_CLASS(insn->code) == BPF_ALU64,
+                                      dst_reg, imm32);
                        break;
 
                case BPF_LD | BPF_IMM | BPF_DW:
-                       /* optimization: if imm64 is zero, use 'xor <dst>,<dst>'
-                        * to save 7 bytes.
-                        */
-                       if (insn[0].imm == 0 && insn[1].imm == 0) {
-                               b1 = add_2mod(0x48, dst_reg, dst_reg);
-                               b2 = 0x31; /* xor */
-                               b3 = 0xC0;
-                               EMIT3(b1, b2, add_2reg(b3, dst_reg, dst_reg));
-
-                               insn++;
-                               i++;
-                               break;
-                       }
-
-                       /* movabsq %rax, imm64 */
-                       EMIT2(add_1mod(0x48, dst_reg), add_1reg(0xB8, dst_reg));
-                       EMIT(insn[0].imm, 4);
-                       EMIT(insn[1].imm, 4);
-
+                       emit_mov_imm64(&prog, dst_reg, insn[1].imm, insn[0].imm);
                        insn++;
                        i++;
                        break;
@@ -594,36 +633,38 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
                case BPF_ALU | BPF_MUL | BPF_X:
                case BPF_ALU64 | BPF_MUL | BPF_K:
                case BPF_ALU64 | BPF_MUL | BPF_X:
-                       EMIT1(0x50); /* push rax */
-                       EMIT1(0x52); /* push rdx */
+               {
+                       bool is64 = BPF_CLASS(insn->code) == BPF_ALU64;
+
+                       if (dst_reg != BPF_REG_0)
+                               EMIT1(0x50); /* push rax */
+                       if (dst_reg != BPF_REG_3)
+                               EMIT1(0x52); /* push rdx */
 
                        /* mov r11, dst_reg */
                        EMIT_mov(AUX_REG, dst_reg);
 
                        if (BPF_SRC(insn->code) == BPF_X)
-                               /* mov rax, src_reg */
-                               EMIT_mov(BPF_REG_0, src_reg);
+                               emit_mov_reg(&prog, is64, BPF_REG_0, src_reg);
                        else
-                               /* mov rax, imm32 */
-                               EMIT3_off32(0x48, 0xC7, 0xC0, imm32);
+                               emit_mov_imm32(&prog, is64, BPF_REG_0, imm32);
 
-                       if (BPF_CLASS(insn->code) == BPF_ALU64)
+                       if (is64)
                                EMIT1(add_1mod(0x48, AUX_REG));
                        else if (is_ereg(AUX_REG))
                                EMIT1(add_1mod(0x40, AUX_REG));
                        /* mul(q) r11 */
                        EMIT2(0xF7, add_1reg(0xE0, AUX_REG));
 
-                       /* mov r11, rax */
-                       EMIT_mov(AUX_REG, BPF_REG_0);
-
-                       EMIT1(0x5A); /* pop rdx */
-                       EMIT1(0x58); /* pop rax */
-
-                       /* mov dst_reg, r11 */
-                       EMIT_mov(dst_reg, AUX_REG);
+                       if (dst_reg != BPF_REG_3)
+                               EMIT1(0x5A); /* pop rdx */
+                       if (dst_reg != BPF_REG_0) {
+                               /* mov dst_reg, rax */
+                               EMIT_mov(dst_reg, BPF_REG_0);
+                               EMIT1(0x58); /* pop rax */
+                       }
                        break;
-
+               }
                        /* shifts */
                case BPF_ALU | BPF_LSH | BPF_K:
                case BPF_ALU | BPF_RSH | BPF_K:
@@ -641,7 +682,11 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
                        case BPF_RSH: b3 = 0xE8; break;
                        case BPF_ARSH: b3 = 0xF8; break;
                        }
-                       EMIT3(0xC1, add_1reg(b3, dst_reg), imm32);
+
+                       if (imm32 == 1)
+                               EMIT2(0xD1, add_1reg(b3, dst_reg));
+                       else
+                               EMIT3(0xC1, add_1reg(b3, dst_reg), imm32);
                        break;
 
                case BPF_ALU | BPF_LSH | BPF_X:
index e66963c..3ae32d1 100644 (file)
@@ -4549,6 +4549,7 @@ static struct pernet_operations cma_pernet_operations = {
        .exit = cma_exit_net,
        .id = &cma_pernet_id,
        .size = sizeof(struct cma_pernet),
+       .async = true,
 };
 
 static int __init cma_init(void)
index bc62996..d42b922 100644 (file)
@@ -2,3 +2,4 @@ obj-$(CONFIG_MLX5_INFINIBAND)   += mlx5_ib.o
 
 mlx5_ib-y :=   main.o cq.o doorbell.o qp.o mem.o srq.o mr.o ah.o mad.o gsi.o ib_virt.o cmd.o cong.o
 mlx5_ib-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += odp.o
+mlx5_ib-$(CONFIG_MLX5_ESWITCH) += ib_rep.o
diff --git a/drivers/infiniband/hw/mlx5/ib_rep.c b/drivers/infiniband/hw/mlx5/ib_rep.c
new file mode 100644 (file)
index 0000000..61cc3d7
--- /dev/null
@@ -0,0 +1,189 @@
+/* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */
+/*
+ * Copyright (c) 2018 Mellanox Technologies. All rights reserved.
+ */
+
+#include "ib_rep.h"
+
+static const struct mlx5_ib_profile rep_profile = {
+       STAGE_CREATE(MLX5_IB_STAGE_INIT,
+                    mlx5_ib_stage_init_init,
+                    mlx5_ib_stage_init_cleanup),
+       STAGE_CREATE(MLX5_IB_STAGE_FLOW_DB,
+                    mlx5_ib_stage_rep_flow_db_init,
+                    NULL),
+       STAGE_CREATE(MLX5_IB_STAGE_CAPS,
+                    mlx5_ib_stage_caps_init,
+                    NULL),
+       STAGE_CREATE(MLX5_IB_STAGE_NON_DEFAULT_CB,
+                    mlx5_ib_stage_rep_non_default_cb,
+                    NULL),
+       STAGE_CREATE(MLX5_IB_STAGE_ROCE,
+                    mlx5_ib_stage_rep_roce_init,
+                    mlx5_ib_stage_rep_roce_cleanup),
+       STAGE_CREATE(MLX5_IB_STAGE_DEVICE_RESOURCES,
+                    mlx5_ib_stage_dev_res_init,
+                    mlx5_ib_stage_dev_res_cleanup),
+       STAGE_CREATE(MLX5_IB_STAGE_COUNTERS,
+                    mlx5_ib_stage_counters_init,
+                    mlx5_ib_stage_counters_cleanup),
+       STAGE_CREATE(MLX5_IB_STAGE_BFREG,
+                    mlx5_ib_stage_bfrag_init,
+                    mlx5_ib_stage_bfrag_cleanup),
+       STAGE_CREATE(MLX5_IB_STAGE_IB_REG,
+                    mlx5_ib_stage_ib_reg_init,
+                    mlx5_ib_stage_ib_reg_cleanup),
+       STAGE_CREATE(MLX5_IB_STAGE_UMR_RESOURCES,
+                    mlx5_ib_stage_umr_res_init,
+                    mlx5_ib_stage_umr_res_cleanup),
+       STAGE_CREATE(MLX5_IB_STAGE_CLASS_ATTR,
+                    mlx5_ib_stage_class_attr_init,
+                    NULL),
+};
+
+static int
+mlx5_ib_nic_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
+{
+       return 0;
+}
+
+static void
+mlx5_ib_nic_rep_unload(struct mlx5_eswitch_rep *rep)
+{
+       rep->rep_if[REP_IB].priv = NULL;
+}
+
+static int
+mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
+{
+       struct mlx5_ib_dev *ibdev;
+
+       ibdev = (struct mlx5_ib_dev *)ib_alloc_device(sizeof(*ibdev));
+       if (!ibdev)
+               return -ENOMEM;
+
+       ibdev->rep = rep;
+       ibdev->mdev = dev;
+       ibdev->num_ports = max(MLX5_CAP_GEN(dev, num_ports),
+                              MLX5_CAP_GEN(dev, num_vhca_ports));
+       if (!__mlx5_ib_add(ibdev, &rep_profile))
+               return -EINVAL;
+
+       rep->rep_if[REP_IB].priv = ibdev;
+
+       return 0;
+}
+
+static void
+mlx5_ib_vport_rep_unload(struct mlx5_eswitch_rep *rep)
+{
+       struct mlx5_ib_dev *dev;
+
+       if (!rep->rep_if[REP_IB].priv)
+               return;
+
+       dev = mlx5_ib_rep_to_dev(rep);
+       __mlx5_ib_remove(dev, dev->profile, MLX5_IB_STAGE_MAX);
+       rep->rep_if[REP_IB].priv = NULL;
+}
+
+static void *mlx5_ib_vport_get_proto_dev(struct mlx5_eswitch_rep *rep)
+{
+       return mlx5_ib_rep_to_dev(rep);
+}
+
+static void mlx5_ib_rep_register_vf_vports(struct mlx5_ib_dev *dev)
+{
+       struct mlx5_eswitch *esw   = dev->mdev->priv.eswitch;
+       int total_vfs = MLX5_TOTAL_VPORTS(dev->mdev);
+       int vport;
+
+       for (vport = 1; vport < total_vfs; vport++) {
+               struct mlx5_eswitch_rep_if rep_if = {};
+
+               rep_if.load = mlx5_ib_vport_rep_load;
+               rep_if.unload = mlx5_ib_vport_rep_unload;
+               rep_if.get_proto_dev = mlx5_ib_vport_get_proto_dev;
+               mlx5_eswitch_register_vport_rep(esw, vport, &rep_if, REP_IB);
+       }
+}
+
+static void mlx5_ib_rep_unregister_vf_vports(struct mlx5_ib_dev *dev)
+{
+       struct mlx5_eswitch *esw   = dev->mdev->priv.eswitch;
+       int total_vfs = MLX5_TOTAL_VPORTS(dev->mdev);
+       int vport;
+
+       for (vport = 1; vport < total_vfs; vport++)
+               mlx5_eswitch_unregister_vport_rep(esw, vport, REP_IB);
+}
+
+void mlx5_ib_register_vport_reps(struct mlx5_ib_dev *dev)
+{
+       struct mlx5_eswitch *esw = dev->mdev->priv.eswitch;
+       struct mlx5_eswitch_rep_if rep_if = {};
+
+       rep_if.load = mlx5_ib_nic_rep_load;
+       rep_if.unload = mlx5_ib_nic_rep_unload;
+       rep_if.get_proto_dev = mlx5_ib_vport_get_proto_dev;
+       rep_if.priv = dev;
+
+       mlx5_eswitch_register_vport_rep(esw, 0, &rep_if, REP_IB);
+
+       mlx5_ib_rep_register_vf_vports(dev);
+}
+
+void mlx5_ib_unregister_vport_reps(struct mlx5_ib_dev *dev)
+{
+       struct mlx5_eswitch *esw   = dev->mdev->priv.eswitch;
+
+       mlx5_ib_rep_unregister_vf_vports(dev); /* VFs vports */
+       mlx5_eswitch_unregister_vport_rep(esw, 0, REP_IB); /* UPLINK PF*/
+}
+
+u8 mlx5_ib_eswitch_mode(struct mlx5_eswitch *esw)
+{
+       return mlx5_eswitch_mode(esw);
+}
+
+struct mlx5_ib_dev *mlx5_ib_get_rep_ibdev(struct mlx5_eswitch *esw,
+                                         int vport_index)
+{
+       return mlx5_eswitch_get_proto_dev(esw, vport_index, REP_IB);
+}
+
+struct net_device *mlx5_ib_get_rep_netdev(struct mlx5_eswitch *esw,
+                                         int vport_index)
+{
+       return mlx5_eswitch_get_proto_dev(esw, vport_index, REP_ETH);
+}
+
+struct mlx5_ib_dev *mlx5_ib_get_uplink_ibdev(struct mlx5_eswitch *esw)
+{
+       return mlx5_eswitch_uplink_get_proto_dev(esw, REP_IB);
+}
+
+struct mlx5_eswitch_rep *mlx5_ib_vport_rep(struct mlx5_eswitch *esw, int vport)
+{
+       return mlx5_eswitch_vport_rep(esw, vport);
+}
+
+int create_flow_rule_vport_sq(struct mlx5_ib_dev *dev,
+                             struct mlx5_ib_sq *sq)
+{
+       struct mlx5_flow_handle *flow_rule;
+       struct mlx5_eswitch *esw = dev->mdev->priv.eswitch;
+
+       if (!dev->rep)
+               return 0;
+
+       flow_rule =
+               mlx5_eswitch_add_send_to_vport_rule(esw,
+                                                   dev->rep->vport,
+                                                   sq->base.mqp.qpn);
+       if (IS_ERR(flow_rule))
+               return PTR_ERR(flow_rule);
+       sq->flow_rule = flow_rule;
+
+       return 0;
+}
diff --git a/drivers/infiniband/hw/mlx5/ib_rep.h b/drivers/infiniband/hw/mlx5/ib_rep.h
new file mode 100644 (file)
index 0000000..046fd94
--- /dev/null
@@ -0,0 +1,72 @@
+/* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */
+/*
+ * Copyright (c) 2018 Mellanox Technologies. All rights reserved.
+ */
+
+#ifndef __MLX5_IB_REP_H__
+#define __MLX5_IB_REP_H__
+
+#include <linux/mlx5/eswitch.h>
+#include "mlx5_ib.h"
+
+#ifdef CONFIG_MLX5_ESWITCH
+u8 mlx5_ib_eswitch_mode(struct mlx5_eswitch *esw);
+struct mlx5_ib_dev *mlx5_ib_get_rep_ibdev(struct mlx5_eswitch *esw,
+                                         int vport_index);
+struct mlx5_ib_dev *mlx5_ib_get_uplink_ibdev(struct mlx5_eswitch *esw);
+struct mlx5_eswitch_rep *mlx5_ib_vport_rep(struct mlx5_eswitch *esw,
+                                          int vport_index);
+void mlx5_ib_register_vport_reps(struct mlx5_ib_dev *dev);
+void mlx5_ib_unregister_vport_reps(struct mlx5_ib_dev *dev);
+int create_flow_rule_vport_sq(struct mlx5_ib_dev *dev,
+                             struct mlx5_ib_sq *sq);
+struct net_device *mlx5_ib_get_rep_netdev(struct mlx5_eswitch *esw,
+                                         int vport_index);
+#else /* CONFIG_MLX5_ESWITCH */
+static inline u8 mlx5_ib_eswitch_mode(struct mlx5_eswitch *esw)
+{
+       return SRIOV_NONE;
+}
+
+static inline
+struct mlx5_ib_dev *mlx5_ib_get_rep_ibdev(struct mlx5_eswitch *esw,
+                                         int vport_index)
+{
+       return NULL;
+}
+
+static inline
+struct mlx5_ib_dev *mlx5_ib_get_uplink_ibdev(struct mlx5_eswitch *esw)
+{
+       return NULL;
+}
+
+static inline
+struct mlx5_eswitch_rep *mlx5_ib_vport_rep(struct mlx5_eswitch *esw,
+                                          int vport_index)
+{
+       return NULL;
+}
+
+static inline void mlx5_ib_register_vport_reps(struct mlx5_ib_dev *dev) {}
+static inline void mlx5_ib_unregister_vport_reps(struct mlx5_ib_dev *dev) {}
+static inline int create_flow_rule_vport_sq(struct mlx5_ib_dev *dev,
+                                           struct mlx5_ib_sq *sq)
+{
+       return 0;
+}
+
+static inline
+struct net_device *mlx5_ib_get_rep_netdev(struct mlx5_eswitch *esw,
+                                         int vport_index)
+{
+       return NULL;
+}
+#endif
+
+static inline
+struct mlx5_ib_dev *mlx5_ib_rep_to_dev(struct mlx5_eswitch_rep *rep)
+{
+       return (struct mlx5_ib_dev *)rep->rep_if[REP_IB].priv;
+}
+#endif /* __MLX5_IB_REP_H__ */
index 4236c80..ee55d7d 100644 (file)
@@ -57,6 +57,7 @@
 #include <linux/in.h>
 #include <linux/etherdevice.h>
 #include "mlx5_ib.h"
+#include "ib_rep.h"
 #include "cmd.h"
 
 #define DRIVER_NAME "mlx5_ib"
@@ -130,7 +131,7 @@ static int get_port_state(struct ib_device *ibdev,
        int ret;
 
        memset(&attr, 0, sizeof(attr));
-       ret = mlx5_ib_query_port(ibdev, port_num, &attr);
+       ret = ibdev->query_port(ibdev, port_num, &attr);
        if (!ret)
                *state = attr.state;
        return ret;
@@ -154,10 +155,19 @@ static int mlx5_netdev_event(struct notifier_block *this,
        case NETDEV_REGISTER:
        case NETDEV_UNREGISTER:
                write_lock(&roce->netdev_lock);
-
-               if (ndev->dev.parent == &mdev->pdev->dev)
-                       roce->netdev = (event == NETDEV_UNREGISTER) ?
+               if (ibdev->rep) {
+                       struct mlx5_eswitch *esw = ibdev->mdev->priv.eswitch;
+                       struct net_device *rep_ndev;
+
+                       rep_ndev = mlx5_ib_get_rep_netdev(esw,
+                                                         ibdev->rep->vport);
+                       if (rep_ndev == ndev)
+                               roce->netdev = (event == NETDEV_UNREGISTER) ?
                                        NULL : ndev;
+               } else if (ndev->dev.parent == &ibdev->mdev->pdev->dev) {
+                       roce->netdev = (event == NETDEV_UNREGISTER) ?
+                               NULL : ndev;
+               }
                write_unlock(&roce->netdev_lock);
                break;
 
@@ -1268,6 +1278,22 @@ int mlx5_ib_query_port(struct ib_device *ibdev, u8 port,
        return ret;
 }
 
+static int mlx5_ib_rep_query_port(struct ib_device *ibdev, u8 port,
+                                 struct ib_port_attr *props)
+{
+       int ret;
+
+       /* Only link layer == ethernet is valid for representors */
+       ret = mlx5_query_port_roce(ibdev, port, props);
+       if (ret || !props)
+               return ret;
+
+       /* We don't support GIDS */
+       props->gid_tbl_len = 0;
+
+       return ret;
+}
+
 static int mlx5_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
                             union ib_gid *gid)
 {
@@ -2631,7 +2657,7 @@ static int mlx5_ib_destroy_flow(struct ib_flow *flow_id)
                                                          ibflow);
        struct mlx5_ib_flow_handler *iter, *tmp;
 
-       mutex_lock(&dev->flow_db.lock);
+       mutex_lock(&dev->flow_db->lock);
 
        list_for_each_entry_safe(iter, tmp, &handler->list, list) {
                mlx5_del_flow_rules(iter->rule);
@@ -2642,7 +2668,7 @@ static int mlx5_ib_destroy_flow(struct ib_flow *flow_id)
 
        mlx5_del_flow_rules(handler->rule);
        put_flow_table(dev, handler->prio, true);
-       mutex_unlock(&dev->flow_db.lock);
+       mutex_unlock(&dev->flow_db->lock);
 
        kfree(handler);
 
@@ -2691,7 +2717,7 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,
                                             MLX5_FLOW_NAMESPACE_BYPASS);
                num_entries = MLX5_FS_MAX_ENTRIES;
                num_groups = MLX5_FS_MAX_TYPES;
-               prio = &dev->flow_db.prios[priority];
+               prio = &dev->flow_db->prios[priority];
        } else if (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
                   flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT) {
                ns = mlx5_get_flow_namespace(dev->mdev,
@@ -2699,7 +2725,7 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,
                build_leftovers_ft_param(&priority,
                                         &num_entries,
                                         &num_groups);
-               prio = &dev->flow_db.prios[MLX5_IB_FLOW_LEFTOVERS_PRIO];
+               prio = &dev->flow_db->prios[MLX5_IB_FLOW_LEFTOVERS_PRIO];
        } else if (flow_attr->type == IB_FLOW_ATTR_SNIFFER) {
                if (!MLX5_CAP_FLOWTABLE(dev->mdev,
                                        allow_sniffer_and_nic_rx_shared_tir))
@@ -2709,7 +2735,7 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,
                                             MLX5_FLOW_NAMESPACE_SNIFFER_RX :
                                             MLX5_FLOW_NAMESPACE_SNIFFER_TX);
 
-               prio = &dev->flow_db.sniffer[ft_type];
+               prio = &dev->flow_db->sniffer[ft_type];
                priority = 0;
                num_entries = 1;
                num_groups = 1;
@@ -2802,6 +2828,18 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev,
        if (!flow_is_multicast_only(flow_attr))
                set_underlay_qp(dev, spec, underlay_qpn);
 
+       if (dev->rep) {
+               void *misc;
+
+               misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+                                   misc_parameters);
+               MLX5_SET(fte_match_set_misc, misc, source_port,
+                        dev->rep->vport);
+               misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+                                   misc_parameters);
+               MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
+       }
+
        spec->match_criteria_enable = get_match_criteria_enable(spec->match_criteria);
        if (is_drop) {
                flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP;
@@ -2999,7 +3037,7 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp,
        if (!dst)
                return ERR_PTR(-ENOMEM);
 
-       mutex_lock(&dev->flow_db.lock);
+       mutex_lock(&dev->flow_db->lock);
 
        ft_prio = get_flow_table(dev, flow_attr, MLX5_IB_FT_RX);
        if (IS_ERR(ft_prio)) {
@@ -3048,7 +3086,7 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp,
                goto destroy_ft;
        }
 
-       mutex_unlock(&dev->flow_db.lock);
+       mutex_unlock(&dev->flow_db->lock);
        kfree(dst);
 
        return &handler->ibflow;
@@ -3058,7 +3096,7 @@ destroy_ft:
        if (ft_prio_tx)
                put_flow_table(dev, ft_prio_tx, false);
 unlock:
-       mutex_unlock(&dev->flow_db.lock);
+       mutex_unlock(&dev->flow_db->lock);
        kfree(dst);
        kfree(handler);
        return ERR_PTR(err);
@@ -3772,6 +3810,25 @@ static int mlx5_port_immutable(struct ib_device *ibdev, u8 port_num,
        return 0;
 }
 
+static int mlx5_port_rep_immutable(struct ib_device *ibdev, u8 port_num,
+                                  struct ib_port_immutable *immutable)
+{
+       struct ib_port_attr attr;
+       int err;
+
+       immutable->core_cap_flags = RDMA_CORE_PORT_RAW_PACKET;
+
+       err = ib_query_port(ibdev, port_num, &attr);
+       if (err)
+               return err;
+
+       immutable->pkey_tbl_len = attr.pkey_tbl_len;
+       immutable->gid_tbl_len = attr.gid_tbl_len;
+       immutable->core_cap_flags = RDMA_CORE_PORT_RAW_PACKET;
+
+       return 0;
+}
+
 static void get_dev_fw_str(struct ib_device *ibdev, char *str)
 {
        struct mlx5_ib_dev *dev =
@@ -3802,7 +3859,7 @@ static int mlx5_eth_lag_init(struct mlx5_ib_dev *dev)
                goto err_destroy_vport_lag;
        }
 
-       dev->flow_db.lag_demux_ft = ft;
+       dev->flow_db->lag_demux_ft = ft;
        return 0;
 
 err_destroy_vport_lag:
@@ -3814,9 +3871,9 @@ static void mlx5_eth_lag_cleanup(struct mlx5_ib_dev *dev)
 {
        struct mlx5_core_dev *mdev = dev->mdev;
 
-       if (dev->flow_db.lag_demux_ft) {
-               mlx5_destroy_flow_table(dev->flow_db.lag_demux_ft);
-               dev->flow_db.lag_demux_ft = NULL;
+       if (dev->flow_db->lag_demux_ft) {
+               mlx5_destroy_flow_table(dev->flow_db->lag_demux_ft);
+               dev->flow_db->lag_demux_ft = NULL;
 
                mlx5_cmd_destroy_vport_lag(mdev);
        }
@@ -3848,14 +3905,10 @@ static int mlx5_enable_eth(struct mlx5_ib_dev *dev, u8 port_num)
 {
        int err;
 
-       err = mlx5_add_netdev_notifier(dev, port_num);
-       if (err)
-               return err;
-
        if (MLX5_CAP_GEN(dev->mdev, roce)) {
                err = mlx5_nic_vport_enable_roce(dev->mdev);
                if (err)
-                       goto err_unregister_netdevice_notifier;
+                       return err;
        }
 
        err = mlx5_eth_lag_init(dev);
@@ -3868,8 +3921,6 @@ err_disable_roce:
        if (MLX5_CAP_GEN(dev->mdev, roce))
                mlx5_nic_vport_disable_roce(dev->mdev);
 
-err_unregister_netdevice_notifier:
-       mlx5_remove_netdev_notifier(dev, port_num);
        return err;
 }
 
@@ -4503,7 +4554,7 @@ static void mlx5_ib_cleanup_multiport_master(struct mlx5_ib_dev *dev)
        mlx5_nic_vport_disable_roce(dev->mdev);
 }
 
-static void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev)
+void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev)
 {
        mlx5_ib_cleanup_multiport_master(dev);
 #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
@@ -4512,7 +4563,7 @@ static void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev)
        kfree(dev->port);
 }
 
-static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev)
+int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev)
 {
        struct mlx5_core_dev *mdev = dev->mdev;
        const char *name;
@@ -4564,7 +4615,6 @@ static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev)
                dev->mdev->priv.eq_table.num_comp_vectors;
        dev->ib_dev.dev.parent          = &mdev->pdev->dev;
 
-       mutex_init(&dev->flow_db.lock);
        mutex_init(&dev->cap_mask_mutex);
        INIT_LIST_HEAD(&dev->qp_list);
        spin_lock_init(&dev->reset_flow_resource_lock);
@@ -4585,7 +4635,38 @@ err_free_port:
        return -ENOMEM;
 }
 
-static int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev)
+static int mlx5_ib_stage_flow_db_init(struct mlx5_ib_dev *dev)
+{
+       dev->flow_db = kzalloc(sizeof(*dev->flow_db), GFP_KERNEL);
+
+       if (!dev->flow_db)
+               return -ENOMEM;
+
+       mutex_init(&dev->flow_db->lock);
+
+       return 0;
+}
+
+int mlx5_ib_stage_rep_flow_db_init(struct mlx5_ib_dev *dev)
+{
+       struct mlx5_ib_dev *nic_dev;
+
+       nic_dev = mlx5_ib_get_uplink_ibdev(dev->mdev->priv.eswitch);
+
+       if (!nic_dev)
+               return -EINVAL;
+
+       dev->flow_db = nic_dev->flow_db;
+
+       return 0;
+}
+
+static void mlx5_ib_stage_flow_db_cleanup(struct mlx5_ib_dev *dev)
+{
+       kfree(dev->flow_db);
+}
+
+int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev)
 {
        struct mlx5_core_dev *mdev = dev->mdev;
        int err;
@@ -4626,7 +4707,6 @@ static int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev)
                (1ull << IB_USER_VERBS_EX_CMD_MODIFY_CQ);
 
        dev->ib_dev.query_device        = mlx5_ib_query_device;
-       dev->ib_dev.query_port          = mlx5_ib_query_port;
        dev->ib_dev.get_link_layer      = mlx5_ib_port_link_layer;
        dev->ib_dev.query_gid           = mlx5_ib_query_gid;
        dev->ib_dev.add_gid             = mlx5_ib_add_gid;
@@ -4669,7 +4749,6 @@ static int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev)
        dev->ib_dev.alloc_mr            = mlx5_ib_alloc_mr;
        dev->ib_dev.map_mr_sg           = mlx5_ib_map_mr_sg;
        dev->ib_dev.check_mr_status     = mlx5_ib_check_mr_status;
-       dev->ib_dev.get_port_immutable  = mlx5_port_immutable;
        dev->ib_dev.get_dev_fw_str      = get_dev_fw_str;
        dev->ib_dev.get_vector_affinity = mlx5_ib_get_vector_affinity;
        if (MLX5_CAP_GEN(mdev, ipoib_enhanced_offloads))
@@ -4720,6 +4799,80 @@ static int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev)
        return 0;
 }
 
+static int mlx5_ib_stage_non_default_cb(struct mlx5_ib_dev *dev)
+{
+       dev->ib_dev.get_port_immutable  = mlx5_port_immutable;
+       dev->ib_dev.query_port          = mlx5_ib_query_port;
+
+       return 0;
+}
+
+int mlx5_ib_stage_rep_non_default_cb(struct mlx5_ib_dev *dev)
+{
+       dev->ib_dev.get_port_immutable  = mlx5_port_rep_immutable;
+       dev->ib_dev.query_port          = mlx5_ib_rep_query_port;
+
+       return 0;
+}
+
+static int mlx5_ib_stage_common_roce_init(struct mlx5_ib_dev *dev,
+                                         u8 port_num)
+{
+       int i;
+
+       for (i = 0; i < dev->num_ports; i++) {
+               dev->roce[i].dev = dev;
+               dev->roce[i].native_port_num = i + 1;
+               dev->roce[i].last_port_state = IB_PORT_DOWN;
+       }
+
+       dev->ib_dev.get_netdev  = mlx5_ib_get_netdev;
+       dev->ib_dev.create_wq    = mlx5_ib_create_wq;
+       dev->ib_dev.modify_wq    = mlx5_ib_modify_wq;
+       dev->ib_dev.destroy_wq   = mlx5_ib_destroy_wq;
+       dev->ib_dev.create_rwq_ind_table = mlx5_ib_create_rwq_ind_table;
+       dev->ib_dev.destroy_rwq_ind_table = mlx5_ib_destroy_rwq_ind_table;
+
+       dev->ib_dev.uverbs_ex_cmd_mask |=
+                       (1ull << IB_USER_VERBS_EX_CMD_CREATE_WQ) |
+                       (1ull << IB_USER_VERBS_EX_CMD_MODIFY_WQ) |
+                       (1ull << IB_USER_VERBS_EX_CMD_DESTROY_WQ) |
+                       (1ull << IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL) |
+                       (1ull << IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL);
+
+       return mlx5_add_netdev_notifier(dev, port_num);
+}
+
+static void mlx5_ib_stage_common_roce_cleanup(struct mlx5_ib_dev *dev)
+{
+       u8 port_num = mlx5_core_native_port_num(dev->mdev) - 1;
+
+       mlx5_remove_netdev_notifier(dev, port_num);
+}
+
+int mlx5_ib_stage_rep_roce_init(struct mlx5_ib_dev *dev)
+{
+       struct mlx5_core_dev *mdev = dev->mdev;
+       enum rdma_link_layer ll;
+       int port_type_cap;
+       int err = 0;
+       u8 port_num;
+
+       port_num = mlx5_core_native_port_num(dev->mdev) - 1;
+       port_type_cap = MLX5_CAP_GEN(mdev, port_type);
+       ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap);
+
+       if (ll == IB_LINK_LAYER_ETHERNET)
+               err = mlx5_ib_stage_common_roce_init(dev, port_num);
+
+       return err;
+}
+
+void mlx5_ib_stage_rep_roce_cleanup(struct mlx5_ib_dev *dev)
+{
+       mlx5_ib_stage_common_roce_cleanup(dev);
+}
+
 static int mlx5_ib_stage_roce_init(struct mlx5_ib_dev *dev)
 {
        struct mlx5_core_dev *mdev = dev->mdev;
@@ -4727,37 +4880,26 @@ static int mlx5_ib_stage_roce_init(struct mlx5_ib_dev *dev)
        int port_type_cap;
        u8 port_num;
        int err;
-       int i;
 
        port_num = mlx5_core_native_port_num(dev->mdev) - 1;
        port_type_cap = MLX5_CAP_GEN(mdev, port_type);
        ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap);
 
        if (ll == IB_LINK_LAYER_ETHERNET) {
-               for (i = 0; i < dev->num_ports; i++) {
-                       dev->roce[i].dev = dev;
-                       dev->roce[i].native_port_num = i + 1;
-                       dev->roce[i].last_port_state = IB_PORT_DOWN;
-               }
+               err = mlx5_ib_stage_common_roce_init(dev, port_num);
+               if (err)
+                       return err;
 
-               dev->ib_dev.get_netdev  = mlx5_ib_get_netdev;
-               dev->ib_dev.create_wq    = mlx5_ib_create_wq;
-               dev->ib_dev.modify_wq    = mlx5_ib_modify_wq;
-               dev->ib_dev.destroy_wq   = mlx5_ib_destroy_wq;
-               dev->ib_dev.create_rwq_ind_table = mlx5_ib_create_rwq_ind_table;
-               dev->ib_dev.destroy_rwq_ind_table = mlx5_ib_destroy_rwq_ind_table;
-               dev->ib_dev.uverbs_ex_cmd_mask |=
-                       (1ull << IB_USER_VERBS_EX_CMD_CREATE_WQ) |
-                       (1ull << IB_USER_VERBS_EX_CMD_MODIFY_WQ) |
-                       (1ull << IB_USER_VERBS_EX_CMD_DESTROY_WQ) |
-                       (1ull << IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL) |
-                       (1ull << IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL);
                err = mlx5_enable_eth(dev, port_num);
                if (err)
-                       return err;
+                       goto cleanup;
        }
 
        return 0;
+cleanup:
+       mlx5_ib_stage_common_roce_cleanup(dev);
+
+       return err;
 }
 
 static void mlx5_ib_stage_roce_cleanup(struct mlx5_ib_dev *dev)
@@ -4773,16 +4915,16 @@ static void mlx5_ib_stage_roce_cleanup(struct mlx5_ib_dev *dev)
 
        if (ll == IB_LINK_LAYER_ETHERNET) {
                mlx5_disable_eth(dev);
-               mlx5_remove_netdev_notifier(dev, port_num);
+               mlx5_ib_stage_common_roce_cleanup(dev);
        }
 }
 
-static int mlx5_ib_stage_dev_res_init(struct mlx5_ib_dev *dev)
+int mlx5_ib_stage_dev_res_init(struct mlx5_ib_dev *dev)
 {
        return create_dev_resources(&dev->devr);
 }
 
-static void mlx5_ib_stage_dev_res_cleanup(struct mlx5_ib_dev *dev)
+void mlx5_ib_stage_dev_res_cleanup(struct mlx5_ib_dev *dev)
 {
        destroy_dev_resources(&dev->devr);
 }
@@ -4794,7 +4936,7 @@ static int mlx5_ib_stage_odp_init(struct mlx5_ib_dev *dev)
        return mlx5_ib_odp_init_one(dev);
 }
 
-static int mlx5_ib_stage_counters_init(struct mlx5_ib_dev *dev)
+int mlx5_ib_stage_counters_init(struct mlx5_ib_dev *dev)
 {
        if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt)) {
                dev->ib_dev.get_hw_stats        = mlx5_ib_get_hw_stats;
@@ -4806,7 +4948,7 @@ static int mlx5_ib_stage_counters_init(struct mlx5_ib_dev *dev)
        return 0;
 }
 
-static void mlx5_ib_stage_counters_cleanup(struct mlx5_ib_dev *dev)
+void mlx5_ib_stage_counters_cleanup(struct mlx5_ib_dev *dev)
 {
        if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt))
                mlx5_ib_dealloc_counters(dev);
@@ -4837,7 +4979,7 @@ static void mlx5_ib_stage_uar_cleanup(struct mlx5_ib_dev *dev)
        mlx5_put_uars_page(dev->mdev, dev->mdev->priv.uar);
 }
 
-static int mlx5_ib_stage_bfrag_init(struct mlx5_ib_dev *dev)
+int mlx5_ib_stage_bfrag_init(struct mlx5_ib_dev *dev)
 {
        int err;
 
@@ -4852,28 +4994,28 @@ static int mlx5_ib_stage_bfrag_init(struct mlx5_ib_dev *dev)
        return err;
 }
 
-static void mlx5_ib_stage_bfrag_cleanup(struct mlx5_ib_dev *dev)
+void mlx5_ib_stage_bfrag_cleanup(struct mlx5_ib_dev *dev)
 {
        mlx5_free_bfreg(dev->mdev, &dev->fp_bfreg);
        mlx5_free_bfreg(dev->mdev, &dev->bfreg);
 }
 
-static int mlx5_ib_stage_ib_reg_init(struct mlx5_ib_dev *dev)
+int mlx5_ib_stage_ib_reg_init(struct mlx5_ib_dev *dev)
 {
        return ib_register_device(&dev->ib_dev, NULL);
 }
 
-static void mlx5_ib_stage_ib_reg_cleanup(struct mlx5_ib_dev *dev)
+void mlx5_ib_stage_ib_reg_cleanup(struct mlx5_ib_dev *dev)
 {
        ib_unregister_device(&dev->ib_dev);
 }
 
-static int mlx5_ib_stage_umr_res_init(struct mlx5_ib_dev *dev)
+int mlx5_ib_stage_umr_res_init(struct mlx5_ib_dev *dev)
 {
        return create_umr_res(dev);
 }
 
-static void mlx5_ib_stage_umr_res_cleanup(struct mlx5_ib_dev *dev)
+void mlx5_ib_stage_umr_res_cleanup(struct mlx5_ib_dev *dev)
 {
        destroy_umrc_res(dev);
 }
@@ -4890,7 +5032,7 @@ static void mlx5_ib_stage_delay_drop_cleanup(struct mlx5_ib_dev *dev)
        cancel_delay_drop(dev);
 }
 
-static int mlx5_ib_stage_class_attr_init(struct mlx5_ib_dev *dev)
+int mlx5_ib_stage_class_attr_init(struct mlx5_ib_dev *dev)
 {
        int err;
        int i;
@@ -4905,9 +5047,21 @@ static int mlx5_ib_stage_class_attr_init(struct mlx5_ib_dev *dev)
        return 0;
 }
 
-static void __mlx5_ib_remove(struct mlx5_ib_dev *dev,
-                            const struct mlx5_ib_profile *profile,
-                            int stage)
+static int mlx5_ib_stage_rep_reg_init(struct mlx5_ib_dev *dev)
+{
+       mlx5_ib_register_vport_reps(dev);
+
+       return 0;
+}
+
+static void mlx5_ib_stage_rep_reg_cleanup(struct mlx5_ib_dev *dev)
+{
+       mlx5_ib_unregister_vport_reps(dev);
+}
+
+void __mlx5_ib_remove(struct mlx5_ib_dev *dev,
+                     const struct mlx5_ib_profile *profile,
+                     int stage)
 {
        /* Number of stages to cleanup */
        while (stage) {
@@ -4921,23 +5075,14 @@ static void __mlx5_ib_remove(struct mlx5_ib_dev *dev,
 
 static void *mlx5_ib_add_slave_port(struct mlx5_core_dev *mdev, u8 port_num);
 
-static void *__mlx5_ib_add(struct mlx5_core_dev *mdev,
-                          const struct mlx5_ib_profile *profile)
+void *__mlx5_ib_add(struct mlx5_ib_dev *dev,
+                   const struct mlx5_ib_profile *profile)
 {
-       struct mlx5_ib_dev *dev;
        int err;
        int i;
 
        printk_once(KERN_INFO "%s", mlx5_version);
 
-       dev = (struct mlx5_ib_dev *)ib_alloc_device(sizeof(*dev));
-       if (!dev)
-               return NULL;
-
-       dev->mdev = mdev;
-       dev->num_ports = max(MLX5_CAP_GEN(mdev, num_ports),
-                            MLX5_CAP_GEN(mdev, num_vhca_ports));
-
        for (i = 0; i < MLX5_IB_STAGE_MAX; i++) {
                if (profile->stage[i].init) {
                        err = profile->stage[i].init(dev);
@@ -4961,9 +5106,15 @@ static const struct mlx5_ib_profile pf_profile = {
        STAGE_CREATE(MLX5_IB_STAGE_INIT,
                     mlx5_ib_stage_init_init,
                     mlx5_ib_stage_init_cleanup),
+       STAGE_CREATE(MLX5_IB_STAGE_FLOW_DB,
+                    mlx5_ib_stage_flow_db_init,
+                    mlx5_ib_stage_flow_db_cleanup),
        STAGE_CREATE(MLX5_IB_STAGE_CAPS,
                     mlx5_ib_stage_caps_init,
                     NULL),
+       STAGE_CREATE(MLX5_IB_STAGE_NON_DEFAULT_CB,
+                    mlx5_ib_stage_non_default_cb,
+                    NULL),
        STAGE_CREATE(MLX5_IB_STAGE_ROCE,
                     mlx5_ib_stage_roce_init,
                     mlx5_ib_stage_roce_cleanup),
@@ -4999,6 +5150,48 @@ static const struct mlx5_ib_profile pf_profile = {
                     NULL),
 };
 
+static const struct mlx5_ib_profile nic_rep_profile = {
+       STAGE_CREATE(MLX5_IB_STAGE_INIT,
+                    mlx5_ib_stage_init_init,
+                    mlx5_ib_stage_init_cleanup),
+       STAGE_CREATE(MLX5_IB_STAGE_FLOW_DB,
+                    mlx5_ib_stage_flow_db_init,
+                    mlx5_ib_stage_flow_db_cleanup),
+       STAGE_CREATE(MLX5_IB_STAGE_CAPS,
+                    mlx5_ib_stage_caps_init,
+                    NULL),
+       STAGE_CREATE(MLX5_IB_STAGE_NON_DEFAULT_CB,
+                    mlx5_ib_stage_rep_non_default_cb,
+                    NULL),
+       STAGE_CREATE(MLX5_IB_STAGE_ROCE,
+                    mlx5_ib_stage_rep_roce_init,
+                    mlx5_ib_stage_rep_roce_cleanup),
+       STAGE_CREATE(MLX5_IB_STAGE_DEVICE_RESOURCES,
+                    mlx5_ib_stage_dev_res_init,
+                    mlx5_ib_stage_dev_res_cleanup),
+       STAGE_CREATE(MLX5_IB_STAGE_COUNTERS,
+                    mlx5_ib_stage_counters_init,
+                    mlx5_ib_stage_counters_cleanup),
+       STAGE_CREATE(MLX5_IB_STAGE_UAR,
+                    mlx5_ib_stage_uar_init,
+                    mlx5_ib_stage_uar_cleanup),
+       STAGE_CREATE(MLX5_IB_STAGE_BFREG,
+                    mlx5_ib_stage_bfrag_init,
+                    mlx5_ib_stage_bfrag_cleanup),
+       STAGE_CREATE(MLX5_IB_STAGE_IB_REG,
+                    mlx5_ib_stage_ib_reg_init,
+                    mlx5_ib_stage_ib_reg_cleanup),
+       STAGE_CREATE(MLX5_IB_STAGE_UMR_RESOURCES,
+                    mlx5_ib_stage_umr_res_init,
+                    mlx5_ib_stage_umr_res_cleanup),
+       STAGE_CREATE(MLX5_IB_STAGE_CLASS_ATTR,
+                    mlx5_ib_stage_class_attr_init,
+                    NULL),
+       STAGE_CREATE(MLX5_IB_STAGE_REP_REG,
+                    mlx5_ib_stage_rep_reg_init,
+                    mlx5_ib_stage_rep_reg_cleanup),
+};
+
 static void *mlx5_ib_add_slave_port(struct mlx5_core_dev *mdev, u8 port_num)
 {
        struct mlx5_ib_multiport_info *mpi;
@@ -5044,8 +5237,11 @@ static void *mlx5_ib_add_slave_port(struct mlx5_core_dev *mdev, u8 port_num)
 static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
 {
        enum rdma_link_layer ll;
+       struct mlx5_ib_dev *dev;
        int port_type_cap;
 
+       printk_once(KERN_INFO "%s", mlx5_version);
+
        port_type_cap = MLX5_CAP_GEN(mdev, port_type);
        ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap);
 
@@ -5055,7 +5251,22 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
                return mlx5_ib_add_slave_port(mdev, port_num);
        }
 
-       return __mlx5_ib_add(mdev, &pf_profile);
+       dev = (struct mlx5_ib_dev *)ib_alloc_device(sizeof(*dev));
+       if (!dev)
+               return NULL;
+
+       dev->mdev = mdev;
+       dev->num_ports = max(MLX5_CAP_GEN(mdev, num_ports),
+                            MLX5_CAP_GEN(mdev, num_vhca_ports));
+
+       if (MLX5_VPORT_MANAGER(mdev) &&
+           mlx5_ib_eswitch_mode(mdev->priv.eswitch) == SRIOV_OFFLOADS) {
+               dev->rep = mlx5_ib_vport_rep(mdev->priv.eswitch, 0);
+
+               return __mlx5_ib_add(dev, &nic_rep_profile);
+       }
+
+       return __mlx5_ib_add(dev, &pf_profile);
 }
 
 static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context)
index eafb975..e0bad28 100644 (file)
@@ -343,6 +343,7 @@ struct mlx5_ib_sq {
        struct mlx5_ib_wq       *sq;
        struct mlx5_ib_ubuffer  ubuffer;
        struct mlx5_db          *doorbell;
+       struct mlx5_flow_handle *flow_rule;
        u32                     tisn;
        u8                      state;
 };
@@ -731,7 +732,9 @@ struct mlx5_ib_delay_drop {
 
 enum mlx5_ib_stages {
        MLX5_IB_STAGE_INIT,
+       MLX5_IB_STAGE_FLOW_DB,
        MLX5_IB_STAGE_CAPS,
+       MLX5_IB_STAGE_NON_DEFAULT_CB,
        MLX5_IB_STAGE_ROCE,
        MLX5_IB_STAGE_DEVICE_RESOURCES,
        MLX5_IB_STAGE_ODP,
@@ -743,6 +746,7 @@ enum mlx5_ib_stages {
        MLX5_IB_STAGE_UMR_RESOURCES,
        MLX5_IB_STAGE_DELAY_DROP,
        MLX5_IB_STAGE_CLASS_ATTR,
+       MLX5_IB_STAGE_REP_REG,
        MLX5_IB_STAGE_MAX,
 };
 
@@ -797,7 +801,7 @@ struct mlx5_ib_dev {
        struct srcu_struct      mr_srcu;
        u32                     null_mkey;
 #endif
-       struct mlx5_ib_flow_db  flow_db;
+       struct mlx5_ib_flow_db  *flow_db;
        /* protect resources needed as part of reset flow */
        spinlock_t              reset_flow_resource_lock;
        struct list_head        qp_list;
@@ -807,6 +811,7 @@ struct mlx5_ib_dev {
        struct mlx5_sq_bfreg    fp_bfreg;
        struct mlx5_ib_delay_drop       delay_drop;
        const struct mlx5_ib_profile    *profile;
+       struct mlx5_eswitch_rep         *rep;
 
        /* protect the user_td */
        struct mutex            lb_mutex;
@@ -1049,6 +1054,31 @@ static inline void mlx5_odp_populate_klm(struct mlx5_klm *pklm, size_t offset,
 
 #endif /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
 
+/* Needed for rep profile */
+int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev);
+void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev);
+int mlx5_ib_stage_rep_flow_db_init(struct mlx5_ib_dev *dev);
+int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev);
+int mlx5_ib_stage_rep_non_default_cb(struct mlx5_ib_dev *dev);
+int mlx5_ib_stage_rep_roce_init(struct mlx5_ib_dev *dev);
+void mlx5_ib_stage_rep_roce_cleanup(struct mlx5_ib_dev *dev);
+int mlx5_ib_stage_dev_res_init(struct mlx5_ib_dev *dev);
+void mlx5_ib_stage_dev_res_cleanup(struct mlx5_ib_dev *dev);
+int mlx5_ib_stage_counters_init(struct mlx5_ib_dev *dev);
+void mlx5_ib_stage_counters_cleanup(struct mlx5_ib_dev *dev);
+int mlx5_ib_stage_bfrag_init(struct mlx5_ib_dev *dev);
+void mlx5_ib_stage_bfrag_cleanup(struct mlx5_ib_dev *dev);
+int mlx5_ib_stage_ib_reg_init(struct mlx5_ib_dev *dev);
+void mlx5_ib_stage_ib_reg_cleanup(struct mlx5_ib_dev *dev);
+int mlx5_ib_stage_umr_res_init(struct mlx5_ib_dev *dev);
+void mlx5_ib_stage_umr_res_cleanup(struct mlx5_ib_dev *dev);
+int mlx5_ib_stage_class_attr_init(struct mlx5_ib_dev *dev);
+void __mlx5_ib_remove(struct mlx5_ib_dev *dev,
+                     const struct mlx5_ib_profile *profile,
+                     int stage);
+void *__mlx5_ib_add(struct mlx5_ib_dev *dev,
+                   const struct mlx5_ib_profile *profile);
+
 int mlx5_ib_get_vf_config(struct ib_device *device, int vf,
                          u8 port, struct ifla_vf_info *info);
 int mlx5_ib_set_vf_link_state(struct ib_device *device, int vf,
index 556e015..a5fad3e 100644 (file)
@@ -587,7 +587,7 @@ static void clean_keys(struct mlx5_ib_dev *dev, int c)
 
 static void mlx5_mr_cache_debugfs_cleanup(struct mlx5_ib_dev *dev)
 {
-       if (!mlx5_debugfs_root)
+       if (!mlx5_debugfs_root || dev->rep)
                return;
 
        debugfs_remove_recursive(dev->cache.root);
@@ -600,7 +600,7 @@ static int mlx5_mr_cache_debugfs_init(struct mlx5_ib_dev *dev)
        struct mlx5_cache_ent *ent;
        int i;
 
-       if (!mlx5_debugfs_root)
+       if (!mlx5_debugfs_root || dev->rep)
                return 0;
 
        cache->root = debugfs_create_dir("mr_cache", dev->mdev->priv.dbg_root);
@@ -690,6 +690,7 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)
                           MLX5_IB_UMR_OCTOWORD;
                ent->access_mode = MLX5_MKC_ACCESS_MODE_MTT;
                if ((dev->mdev->profile->mask & MLX5_PROF_MASK_MR_CACHE) &&
+                   !dev->rep &&
                    mlx5_core_is_pf(dev->mdev))
                        ent->limit = dev->mdev->profile->mr_cache[i].limit;
                else
index 39d24bf..5663530 100644 (file)
@@ -36,6 +36,7 @@
 #include <rdma/ib_user_verbs.h>
 #include <linux/mlx5/fs.h>
 #include "mlx5_ib.h"
+#include "ib_rep.h"
 
 /* not supported currently */
 static int wq_signature;
@@ -1082,6 +1083,13 @@ static void destroy_raw_packet_qp_tis(struct mlx5_ib_dev *dev,
        mlx5_core_destroy_tis(dev->mdev, sq->tisn);
 }
 
+static void destroy_flow_rule_vport_sq(struct mlx5_ib_dev *dev,
+                                      struct mlx5_ib_sq *sq)
+{
+       if (sq->flow_rule)
+               mlx5_del_flow_rules(sq->flow_rule);
+}
+
 static int create_raw_packet_qp_sq(struct mlx5_ib_dev *dev,
                                   struct mlx5_ib_sq *sq, void *qpin,
                                   struct ib_pd *pd)
@@ -1145,8 +1153,15 @@ static int create_raw_packet_qp_sq(struct mlx5_ib_dev *dev,
        if (err)
                goto err_umem;
 
+       err = create_flow_rule_vport_sq(dev, sq);
+       if (err)
+               goto err_flow;
+
        return 0;
 
+err_flow:
+       mlx5_core_destroy_sq_tracked(dev->mdev, &sq->base.mqp);
+
 err_umem:
        ib_umem_release(sq->ubuffer.umem);
        sq->ubuffer.umem = NULL;
@@ -1157,6 +1172,7 @@ err_umem:
 static void destroy_raw_packet_qp_sq(struct mlx5_ib_dev *dev,
                                     struct mlx5_ib_sq *sq)
 {
+       destroy_flow_rule_vport_sq(dev, sq);
        mlx5_core_destroy_sq_tracked(dev->mdev, &sq->base.mqp);
        ib_umem_release(sq->ubuffer.umem);
 }
@@ -1263,6 +1279,10 @@ static int create_raw_packet_qp_tir(struct mlx5_ib_dev *dev,
        if (tunnel_offload_en)
                MLX5_SET(tirc, tirc, tunneled_offload_en, 1);
 
+       if (dev->rep)
+               MLX5_SET(tirc, tirc, self_lb_block,
+                        MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST_);
+
        err = mlx5_core_create_tir(dev->mdev, in, inlen, &rq->tirn);
 
        kvfree(in);
@@ -1554,6 +1574,10 @@ static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
        MLX5_SET(rx_hash_field_select, hfso, selected_fields, selected_fields);
 
 create_tir:
+       if (dev->rep)
+               MLX5_SET(tirc, tirc, self_lb_block,
+                        MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST_);
+
        err = mlx5_core_create_tir(dev->mdev, in, inlen, &qp->rss_qp.tirn);
 
        if (err)
index d88b78a..08b8521 100644 (file)
@@ -149,6 +149,7 @@ config MACVTAP
 config IPVLAN
     tristate "IP-VLAN support"
     depends on INET
+    depends on IPV6 || !IPV6
     depends on NETFILTER
     select NET_L3_MASTER_DEV
     ---help---
index 64333ec..3afda65 100644 (file)
@@ -113,9 +113,6 @@ static struct devprobe2 m68k_probes[] __initdata = {
 #endif
 #ifdef CONFIG_MVME147_NET      /* MVME147 internal Ethernet */
        {mvme147lance_probe, 0},
-#endif
-#ifdef CONFIG_MAC89x0
-       {mac89x0_probe, 0},
 #endif
        {NULL, 0},
 };
index c669554..4c19d23 100644 (file)
@@ -4791,6 +4791,7 @@ static struct pernet_operations bond_net_ops = {
        .exit = bond_net_exit,
        .id   = &bond_net_id,
        .size = sizeof(struct bond_net),
+       .async = true,
 };
 
 static int __init bonding_init(void)
index e1b5c5c..24486f9 100644 (file)
@@ -253,9 +253,8 @@ static void mv88e6xxx_g1_irq_unmask(struct irq_data *d)
        chip->g1_irq.masked &= ~(1 << n);
 }
 
-static irqreturn_t mv88e6xxx_g1_irq_thread_fn(int irq, void *dev_id)
+static irqreturn_t mv88e6xxx_g1_irq_thread_work(struct mv88e6xxx_chip *chip)
 {
-       struct mv88e6xxx_chip *chip = dev_id;
        unsigned int nhandled = 0;
        unsigned int sub_irq;
        unsigned int n;
@@ -280,6 +279,13 @@ out:
        return (nhandled > 0 ? IRQ_HANDLED : IRQ_NONE);
 }
 
+static irqreturn_t mv88e6xxx_g1_irq_thread_fn(int irq, void *dev_id)
+{
+       struct mv88e6xxx_chip *chip = dev_id;
+
+       return mv88e6xxx_g1_irq_thread_work(chip);
+}
+
 static void mv88e6xxx_g1_irq_bus_lock(struct irq_data *d)
 {
        struct mv88e6xxx_chip *chip = irq_data_get_irq_chip_data(d);
@@ -335,7 +341,7 @@ static const struct irq_domain_ops mv88e6xxx_g1_irq_domain_ops = {
        .xlate  = irq_domain_xlate_twocell,
 };
 
-static void mv88e6xxx_g1_irq_free(struct mv88e6xxx_chip *chip)
+static void mv88e6xxx_g1_irq_free_common(struct mv88e6xxx_chip *chip)
 {
        int irq, virq;
        u16 mask;
@@ -344,8 +350,6 @@ static void mv88e6xxx_g1_irq_free(struct mv88e6xxx_chip *chip)
        mask &= ~GENMASK(chip->g1_irq.nirqs, 0);
        mv88e6xxx_g1_write(chip, MV88E6XXX_G1_CTL1, mask);
 
-       free_irq(chip->irq, chip);
-
        for (irq = 0; irq < chip->g1_irq.nirqs; irq++) {
                virq = irq_find_mapping(chip->g1_irq.domain, irq);
                irq_dispose_mapping(virq);
@@ -354,7 +358,14 @@ static void mv88e6xxx_g1_irq_free(struct mv88e6xxx_chip *chip)
        irq_domain_remove(chip->g1_irq.domain);
 }
 
-static int mv88e6xxx_g1_irq_setup(struct mv88e6xxx_chip *chip)
+static void mv88e6xxx_g1_irq_free(struct mv88e6xxx_chip *chip)
+{
+       mv88e6xxx_g1_irq_free(chip);
+
+       free_irq(chip->irq, chip);
+}
+
+static int mv88e6xxx_g1_irq_setup_common(struct mv88e6xxx_chip *chip)
 {
        int err, irq, virq;
        u16 reg, mask;
@@ -387,13 +398,6 @@ static int mv88e6xxx_g1_irq_setup(struct mv88e6xxx_chip *chip)
        if (err)
                goto out_disable;
 
-       err = request_threaded_irq(chip->irq, NULL,
-                                  mv88e6xxx_g1_irq_thread_fn,
-                                  IRQF_ONESHOT | IRQF_TRIGGER_FALLING,
-                                  dev_name(chip->dev), chip);
-       if (err)
-               goto out_disable;
-
        return 0;
 
 out_disable:
@@ -411,6 +415,62 @@ out_mapping:
        return err;
 }
 
+static int mv88e6xxx_g1_irq_setup(struct mv88e6xxx_chip *chip)
+{
+       int err;
+
+       err = mv88e6xxx_g1_irq_setup_common(chip);
+       if (err)
+               return err;
+
+       err = request_threaded_irq(chip->irq, NULL,
+                                  mv88e6xxx_g1_irq_thread_fn,
+                                  IRQF_ONESHOT | IRQF_TRIGGER_FALLING,
+                                  dev_name(chip->dev), chip);
+       if (err)
+               mv88e6xxx_g1_irq_free_common(chip);
+
+       return err;
+}
+
+static void mv88e6xxx_irq_poll(struct kthread_work *work)
+{
+       struct mv88e6xxx_chip *chip = container_of(work,
+                                                  struct mv88e6xxx_chip,
+                                                  irq_poll_work.work);
+       mv88e6xxx_g1_irq_thread_work(chip);
+
+       kthread_queue_delayed_work(chip->kworker, &chip->irq_poll_work,
+                                  msecs_to_jiffies(100));
+}
+
+static int mv88e6xxx_irq_poll_setup(struct mv88e6xxx_chip *chip)
+{
+       int err;
+
+       err = mv88e6xxx_g1_irq_setup_common(chip);
+       if (err)
+               return err;
+
+       kthread_init_delayed_work(&chip->irq_poll_work,
+                                 mv88e6xxx_irq_poll);
+
+       chip->kworker = kthread_create_worker(0, dev_name(chip->dev));
+       if (IS_ERR(chip->kworker))
+               return PTR_ERR(chip->kworker);
+
+       kthread_queue_delayed_work(chip->kworker, &chip->irq_poll_work,
+                                  msecs_to_jiffies(100));
+
+       return 0;
+}
+
+static void mv88e6xxx_irq_poll_free(struct mv88e6xxx_chip *chip)
+{
+       kthread_cancel_delayed_work_sync(&chip->irq_poll_work);
+       kthread_destroy_worker(chip->kworker);
+}
+
 int mv88e6xxx_wait(struct mv88e6xxx_chip *chip, int addr, int reg, u16 mask)
 {
        int i;
@@ -4034,33 +4094,34 @@ static int mv88e6xxx_probe(struct mdio_device *mdiodev)
                goto out;
        }
 
-       if (chip->irq > 0) {
-               /* Has to be performed before the MDIO bus is created,
-                * because the PHYs will link there interrupts to these
-                * interrupt controllers
-                */
-               mutex_lock(&chip->reg_lock);
+       /* Has to be performed before the MDIO bus is created, because
+        * the PHYs will link there interrupts to these interrupt
+        * controllers
+        */
+       mutex_lock(&chip->reg_lock);
+       if (chip->irq > 0)
                err = mv88e6xxx_g1_irq_setup(chip);
-               mutex_unlock(&chip->reg_lock);
-
-               if (err)
-                       goto out;
-
-               if (chip->info->g2_irqs > 0) {
-                       err = mv88e6xxx_g2_irq_setup(chip);
-                       if (err)
-                               goto out_g1_irq;
-               }
+       else
+               err = mv88e6xxx_irq_poll_setup(chip);
+       mutex_unlock(&chip->reg_lock);
 
-               err = mv88e6xxx_g1_atu_prob_irq_setup(chip);
-               if (err)
-                       goto out_g2_irq;
+       if (err)
+               goto out;
 
-               err = mv88e6xxx_g1_vtu_prob_irq_setup(chip);
+       if (chip->info->g2_irqs > 0) {
+               err = mv88e6xxx_g2_irq_setup(chip);
                if (err)
-                       goto out_g1_atu_prob_irq;
+                       goto out_g1_irq;
        }
 
+       err = mv88e6xxx_g1_atu_prob_irq_setup(chip);
+       if (err)
+               goto out_g2_irq;
+
+       err = mv88e6xxx_g1_vtu_prob_irq_setup(chip);
+       if (err)
+               goto out_g1_atu_prob_irq;
+
        err = mv88e6xxx_mdios_register(chip, np);
        if (err)
                goto out_g1_vtu_prob_irq;
@@ -4074,20 +4135,19 @@ static int mv88e6xxx_probe(struct mdio_device *mdiodev)
 out_mdio:
        mv88e6xxx_mdios_unregister(chip);
 out_g1_vtu_prob_irq:
-       if (chip->irq > 0)
-               mv88e6xxx_g1_vtu_prob_irq_free(chip);
+       mv88e6xxx_g1_vtu_prob_irq_free(chip);
 out_g1_atu_prob_irq:
-       if (chip->irq > 0)
-               mv88e6xxx_g1_atu_prob_irq_free(chip);
+       mv88e6xxx_g1_atu_prob_irq_free(chip);
 out_g2_irq:
-       if (chip->info->g2_irqs > 0 && chip->irq > 0)
+       if (chip->info->g2_irqs > 0)
                mv88e6xxx_g2_irq_free(chip);
 out_g1_irq:
-       if (chip->irq > 0) {
-               mutex_lock(&chip->reg_lock);
+       mutex_lock(&chip->reg_lock);
+       if (chip->irq > 0)
                mv88e6xxx_g1_irq_free(chip);
-               mutex_unlock(&chip->reg_lock);
-       }
+       else
+               mv88e6xxx_irq_poll_free(chip);
+       mutex_unlock(&chip->reg_lock);
 out:
        return err;
 }
index 97d7915..d6a1391 100644 (file)
@@ -15,6 +15,7 @@
 #include <linux/if_vlan.h>
 #include <linux/irq.h>
 #include <linux/gpio/consumer.h>
+#include <linux/kthread.h>
 #include <linux/phy.h>
 #include <linux/ptp_clock_kernel.h>
 #include <linux/timecounter.h>
@@ -245,6 +246,8 @@ struct mv88e6xxx_chip {
        int watchdog_irq;
        int atu_prob_irq;
        int vtu_prob_irq;
+       struct kthread_worker *kworker;
+       struct kthread_delayed_work irq_poll_work;
 
        /* GPIO resources */
        u8 gpio_data[2];
index f17a160..137cbb4 100644 (file)
@@ -247,8 +247,8 @@ static int mace_probe(struct platform_device *pdev)
        dev->netdev_ops         = &mace_netdev_ops;
        dev->watchdog_timeo     = TX_TIMEOUT;
 
-       printk(KERN_INFO "%s: 68K MACE, hardware address %pM\n",
-              dev->name, dev->dev_addr);
+       pr_info("Onboard MACE, hardware address %pM, chip revision 0x%04X\n",
+               dev->dev_addr, mp->chipid);
 
        err = register_netdev(dev);
        if (!err)
@@ -589,7 +589,6 @@ static irqreturn_t mace_interrupt(int irq, void *dev_id)
                        else if (fs & (UFLO|LCOL|RTRY)) {
                                ++dev->stats.tx_aborted_errors;
                                if (mb->xmtfs & UFLO) {
-                                       printk(KERN_ERR "%s: DMA underrun.\n", dev->name);
                                        dev->stats.tx_fifo_errors++;
                                        mace_txdma_reset(dev);
                                }
@@ -644,10 +643,8 @@ static void mace_dma_rx_frame(struct net_device *dev, struct mace_frame *mf)
 
        if (frame_status & (RS_OFLO | RS_CLSN | RS_FRAMERR | RS_FCSERR)) {
                dev->stats.rx_errors++;
-               if (frame_status & RS_OFLO) {
-                       printk(KERN_DEBUG "%s: fifo overflow.\n", dev->name);
+               if (frame_status & RS_OFLO)
                        dev->stats.rx_fifo_errors++;
-               }
                if (frame_status & RS_CLSN)
                        dev->stats.collisions++;
                if (frame_status & RS_FRAMERR)
@@ -770,18 +767,4 @@ static struct platform_driver mac_mace_driver = {
        },
 };
 
-static int __init mac_mace_init_module(void)
-{
-       if (!MACH_IS_MAC)
-               return -ENODEV;
-
-       return platform_driver_register(&mac_mace_driver);
-}
-
-static void __exit mac_mace_cleanup_module(void)
-{
-       platform_driver_unregister(&mac_mace_driver);
-}
-
-module_init(mac_mace_init_module);
-module_exit(mac_mace_cleanup_module);
+module_platform_driver(mac_mace_driver);
index 3177b0c..db92f18 100644 (file)
@@ -1335,12 +1335,6 @@ int __cxgb4_set_filter(struct net_device *dev, int filter_id,
                return ret;
        }
 
-       /* Clear out any old resources being used by the filter before
-        * we start constructing the new filter.
-        */
-       if (f->valid)
-               clear_filter(adapter, f);
-
        if (is_t6(adapter->params.chip) && fs->type &&
            ipv6_addr_type((const struct in6_addr *)fs->val.lip) !=
            IPV6_ADDR_ANY) {
index 977d4c2..3f8fe8f 100644 (file)
   local_irq_{dis,en}able()
 */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 static const char version[] =
 "cs89x0.c:v1.02 11/26/96 Russell Nelson <nelson@crynwr.com>\n";
 
-/* ======================= configure the driver here ======================= */
-
-/* use 0 for production, 1 for verification, >2 for debug */
-#ifndef NET_DEBUG
-#define NET_DEBUG 0
-#endif
-
-/* ======================= end of configuration ======================= */
-
-
-/* Always include 'config.h' first in case the user wants to turn on
-   or override something. */
 #include <linux/module.h>
 
 /*
@@ -93,6 +83,7 @@ static const char version[] =
 #include <linux/errno.h>
 #include <linux/init.h>
 #include <linux/netdevice.h>
+#include <linux/platform_device.h>
 #include <linux/etherdevice.h>
 #include <linux/skbuff.h>
 #include <linux/delay.h>
@@ -105,24 +96,22 @@ static const char version[] =
 
 #include "cs89x0.h"
 
-static unsigned int net_debug = NET_DEBUG;
+static int debug = -1;
+module_param(debug, int, 0);
+MODULE_PARM_DESC(debug, "debug message level");
 
 /* Information that need to be kept for each board. */
 struct net_local {
+       int msg_enable;
        int chip_type;          /* one of: CS8900, CS8920, CS8920M */
        char chip_revision;     /* revision letter of the chip ('A'...) */
        int send_cmd;           /* the propercommand used to send a packet. */
        int rx_mode;
        int curr_rx_cfg;
         int send_underrun;      /* keep track of how many underruns in a row we get */
-       struct sk_buff *skb;
 };
 
 /* Index to functions, as function prototypes. */
-
-#if 0
-extern void reset_chip(struct net_device *dev);
-#endif
 static int net_open(struct net_device *dev);
 static int net_send_packet(struct sk_buff *skb, struct net_device *dev);
 static irqreturn_t net_interrupt(int irq, void *dev_id);
@@ -132,10 +121,6 @@ static int net_close(struct net_device *dev);
 static struct net_device_stats *net_get_stats(struct net_device *dev);
 static int set_mac_address(struct net_device *dev, void *addr);
 
-
-/* Example routines you must write ;->. */
-#define tx_done(dev) 1
-
 /* For reading/writing registers ISA-style */
 static inline int
 readreg_io(struct net_device *dev, int portno)
@@ -176,12 +161,10 @@ static const struct net_device_ops mac89x0_netdev_ops = {
 
 /* Probe for the CS8900 card in slot E.  We won't bother looking
    anywhere else until we have a really good reason to do so. */
-struct net_device * __init mac89x0_probe(int unit)
+static int mac89x0_device_probe(struct platform_device *pdev)
 {
        struct net_device *dev;
-       static int once_is_enough;
        struct net_local *lp;
-       static unsigned version_printed;
        int i, slot;
        unsigned rev_type = 0;
        unsigned long ioaddr;
@@ -189,21 +172,9 @@ struct net_device * __init mac89x0_probe(int unit)
        int err = -ENODEV;
        struct nubus_rsrc *fres;
 
-       if (!MACH_IS_MAC)
-               return ERR_PTR(-ENODEV);
-
        dev = alloc_etherdev(sizeof(struct net_local));
        if (!dev)
-               return ERR_PTR(-ENOMEM);
-
-       if (unit >= 0) {
-               sprintf(dev->name, "eth%d", unit);
-               netdev_boot_setup_check(dev);
-       }
-
-       if (once_is_enough)
-               goto out;
-       once_is_enough = 1;
+               return -ENOMEM;
 
        /* We might have to parameterize this later */
        slot = 0xE;
@@ -230,9 +201,13 @@ struct net_device * __init mac89x0_probe(int unit)
        if (sig != swab16(CHIP_EISA_ID_SIG))
                goto out;
 
+       SET_NETDEV_DEV(dev, &pdev->dev);
+
        /* Initialize the net_device structure. */
        lp = netdev_priv(dev);
 
+       lp->msg_enable = netif_msg_init(debug, 0);
+
        /* Fill in the 'dev' fields. */
        dev->base_addr = ioaddr;
        dev->mem_start = (unsigned long)
@@ -255,19 +230,16 @@ struct net_device * __init mac89x0_probe(int unit)
        if (lp->chip_type != CS8900 && lp->chip_revision >= 'C')
                lp->send_cmd = TX_NOW;
 
-       if (net_debug && version_printed++ == 0)
-               printk(version);
+       netif_dbg(lp, drv, dev, "%s", version);
 
-       printk(KERN_INFO "%s: cs89%c0%s rev %c found at %#8lx",
-              dev->name,
-              lp->chip_type==CS8900?'0':'2',
-              lp->chip_type==CS8920M?"M":"",
-              lp->chip_revision,
-              dev->base_addr);
+       pr_info("cs89%c0%s rev %c found at %#8lx\n",
+               lp->chip_type == CS8900 ? '0' : '2',
+               lp->chip_type == CS8920M ? "M" : "",
+               lp->chip_revision, dev->base_addr);
 
        /* Try to read the MAC address */
        if ((readreg(dev, PP_SelfST) & (EEPROM_PRESENT | EEPROM_OK)) == 0) {
-               printk("\nmac89x0: No EEPROM, giving up now.\n");
+               pr_info("No EEPROM, giving up now.\n");
                goto out1;
         } else {
                 for (i = 0; i < ETH_ALEN; i += 2) {
@@ -282,39 +254,23 @@ struct net_device * __init mac89x0_probe(int unit)
 
        /* print the IRQ and ethernet address. */
 
-       printk(" IRQ %d ADDR %pM\n", dev->irq, dev->dev_addr);
+       pr_info("MAC %pM, IRQ %d\n", dev->dev_addr, dev->irq);
 
        dev->netdev_ops         = &mac89x0_netdev_ops;
 
        err = register_netdev(dev);
        if (err)
                goto out1;
-       return NULL;
+
+       platform_set_drvdata(pdev, dev);
+       return 0;
 out1:
        nubus_writew(0, dev->base_addr + ADD_PORT);
 out:
        free_netdev(dev);
-       return ERR_PTR(err);
+       return err;
 }
 
-#if 0
-/* This is useful for something, but I don't know what yet. */
-void __init reset_chip(struct net_device *dev)
-{
-       int reset_start_time;
-
-       writereg(dev, PP_SelfCTL, readreg(dev, PP_SelfCTL) | POWER_ON_RESET);
-
-       /* wait 30 ms */
-       msleep_interruptible(30);
-
-       /* Wait until the chip is reset */
-       reset_start_time = jiffies;
-       while( (readreg(dev, PP_SelfST) & INIT_DONE) == 0 && jiffies - reset_start_time < 2)
-               ;
-}
-#endif
-
 /* Open/initialize the board.  This is called (in the current kernel)
    sometime after booting when the 'ifconfig' program is run.
 
@@ -374,11 +330,9 @@ net_send_packet(struct sk_buff *skb, struct net_device *dev)
        struct net_local *lp = netdev_priv(dev);
        unsigned long flags;
 
-       if (net_debug > 3)
-               printk("%s: sent %d byte packet of type %x\n",
-                      dev->name, skb->len,
-                      (skb->data[ETH_ALEN+ETH_ALEN] << 8)
-                      | skb->data[ETH_ALEN+ETH_ALEN+1]);
+       netif_dbg(lp, tx_queued, dev, "sent %d byte packet of type %x\n",
+                 skb->len, skb->data[ETH_ALEN + ETH_ALEN] << 8 |
+                 skb->data[ETH_ALEN + ETH_ALEN + 1]);
 
        /* keep the upload from being interrupted, since we
           ask the chip to start transmitting before the
@@ -416,11 +370,6 @@ static irqreturn_t net_interrupt(int irq, void *dev_id)
        struct net_local *lp;
        int ioaddr, status;
 
-       if (dev == NULL) {
-               printk ("net_interrupt(): irq %d for unknown device.\n", irq);
-               return IRQ_NONE;
-       }
-
        ioaddr = dev->base_addr;
        lp = netdev_priv(dev);
 
@@ -432,7 +381,7 @@ static irqreturn_t net_interrupt(int irq, void *dev_id)
            faster than you can read them off, you're screwed.  Hasta la
            vista, baby!  */
        while ((status = swab16(nubus_readw(dev->base_addr + ISQ_PORT)))) {
-               if (net_debug > 4)printk("%s: event=%04x\n", dev->name, status);
+               netif_dbg(lp, intr, dev, "status=%04x\n", status);
                switch(status & ISQ_EVENT_MASK) {
                case ISQ_RECEIVER_EVENT:
                        /* Got a packet(s). */
@@ -462,7 +411,7 @@ static irqreturn_t net_interrupt(int irq, void *dev_id)
                                netif_wake_queue(dev);
                        }
                        if (status & TX_UNDERRUN) {
-                               if (net_debug > 0) printk("%s: transmit underrun\n", dev->name);
+                               netif_dbg(lp, tx_err, dev, "transmit underrun\n");
                                 lp->send_underrun++;
                                 if (lp->send_underrun == 3) lp->send_cmd = TX_AFTER_381;
                                 else if (lp->send_underrun == 6) lp->send_cmd = TX_AFTER_ALL;
@@ -483,6 +432,7 @@ static irqreturn_t net_interrupt(int irq, void *dev_id)
 static void
 net_rx(struct net_device *dev)
 {
+       struct net_local *lp = netdev_priv(dev);
        struct sk_buff *skb;
        int status, length;
 
@@ -506,7 +456,6 @@ net_rx(struct net_device *dev)
        /* Malloc up new buffer. */
        skb = alloc_skb(length, GFP_ATOMIC);
        if (skb == NULL) {
-               printk("%s: Memory squeeze, dropping packet.\n", dev->name);
                dev->stats.rx_dropped++;
                return;
        }
@@ -515,10 +464,9 @@ net_rx(struct net_device *dev)
        skb_copy_to_linear_data(skb, (void *)(dev->mem_start + PP_RxFrame),
                                length);
 
-       if (net_debug > 3)printk("%s: received %d byte packet of type %x\n",
-                                 dev->name, length,
-                                 (skb->data[ETH_ALEN+ETH_ALEN] << 8)
-                                | skb->data[ETH_ALEN+ETH_ALEN+1]);
+       netif_dbg(lp, rx_status, dev, "received %d byte packet of type %x\n",
+                 length, skb->data[ETH_ALEN + ETH_ALEN] << 8 |
+                 skb->data[ETH_ALEN + ETH_ALEN + 1]);
 
         skb->protocol=eth_type_trans(skb,dev);
        netif_rx(skb);
@@ -594,7 +542,7 @@ static int set_mac_address(struct net_device *dev, void *addr)
                return -EADDRNOTAVAIL;
 
        memcpy(dev->dev_addr, saddr->sa_data, ETH_ALEN);
-       printk("%s: Setting MAC address to %pM\n", dev->name, dev->dev_addr);
+       netdev_info(dev, "Setting MAC address to %pM\n", dev->dev_addr);
 
        /* set the Ethernet address */
        for (i=0; i < ETH_ALEN/2; i++)
@@ -603,32 +551,24 @@ static int set_mac_address(struct net_device *dev, void *addr)
        return 0;
 }
 
-#ifdef MODULE
-
-static struct net_device *dev_cs89x0;
-static int debug;
-
-module_param(debug, int, 0);
-MODULE_PARM_DESC(debug, "CS89[02]0 debug level (0-5)");
 MODULE_LICENSE("GPL");
 
-int __init
-init_module(void)
+static int mac89x0_device_remove(struct platform_device *pdev)
 {
-       net_debug = debug;
-        dev_cs89x0 = mac89x0_probe(-1);
-       if (IS_ERR(dev_cs89x0)) {
-                printk(KERN_WARNING "mac89x0.c: No card found\n");
-               return PTR_ERR(dev_cs89x0);
-       }
+       struct net_device *dev = platform_get_drvdata(pdev);
+
+       unregister_netdev(dev);
+       nubus_writew(0, dev->base_addr + ADD_PORT);
+       free_netdev(dev);
        return 0;
 }
 
-void
-cleanup_module(void)
-{
-       unregister_netdev(dev_cs89x0);
-       nubus_writew(0, dev_cs89x0->base_addr + ADD_PORT);
-       free_netdev(dev_cs89x0);
-}
-#endif /* MODULE */
+static struct platform_driver mac89x0_platform_driver = {
+       .probe = mac89x0_device_probe,
+       .remove = mac89x0_device_remove,
+       .driver = {
+               .name = "mac89x0",
+       },
+};
+
+module_platform_driver(mac89x0_platform_driver);
index 1a49297..ff92ab1 100644 (file)
@@ -19,7 +19,7 @@
 #include "be.h"
 #include "be_cmds.h"
 
-char *be_misconfig_evt_port_state[] = {
+const char * const be_misconfig_evt_port_state[] = {
        "Physical Link is functional",
        "Optics faulted/incorrectly installed/not installed - Reseat optics. If issue not resolved, replace.",
        "Optics of two types installed â€“ Remove one optic or install matching pair of optics.",
index 09da2d8..e8b43cf 100644 (file)
@@ -201,7 +201,7 @@ enum {
                         phy_state == BE_PHY_UNQUALIFIED ||     \
                         phy_state == BE_PHY_UNCERTIFIED)
 
-extern  char *be_misconfig_evt_port_state[];
+extern const  char * const be_misconfig_evt_port_state[];
 
 /* async event indicating misconfigured port */
 struct be_async_event_misconfig_port {
index a998c36..159dc2d 100644 (file)
@@ -454,6 +454,16 @@ static void dpaa_set_rx_mode(struct net_device *net_dev)
                                  err);
        }
 
+       if (!!(net_dev->flags & IFF_ALLMULTI) != priv->mac_dev->allmulti) {
+               priv->mac_dev->allmulti = !priv->mac_dev->allmulti;
+               err = priv->mac_dev->set_allmulti(priv->mac_dev->fman_mac,
+                                                 priv->mac_dev->allmulti);
+               if (err < 0)
+                       netif_err(priv, drv, net_dev,
+                                 "mac_dev->set_allmulti() = %d\n",
+                                 err);
+       }
+
        err = priv->mac_dev->set_multi(net_dev, priv->mac_dev);
        if (err < 0)
                netif_err(priv, drv, net_dev, "mac_dev->set_multi() = %d\n",
@@ -1916,8 +1926,10 @@ static int skb_to_sg_fd(struct dpaa_priv *priv,
                goto csum_failed;
        }
 
+       /* SGT[0] is used by the linear part */
        sgt = (struct qm_sg_entry *)(sgt_buf + priv->tx_headroom);
-       qm_sg_entry_set_len(&sgt[0], skb_headlen(skb));
+       frag_len = skb_headlen(skb);
+       qm_sg_entry_set_len(&sgt[0], frag_len);
        sgt[0].bpid = FSL_DPAA_BPID_INV;
        sgt[0].offset = 0;
        addr = dma_map_single(dev, skb->data,
@@ -1930,9 +1942,9 @@ static int skb_to_sg_fd(struct dpaa_priv *priv,
        qm_sg_entry_set64(&sgt[0], addr);
 
        /* populate the rest of SGT entries */
-       frag = &skb_shinfo(skb)->frags[0];
-       frag_len = frag->size;
-       for (i = 1; i <= nr_frags; i++, frag++) {
+       for (i = 0; i < nr_frags; i++) {
+               frag = &skb_shinfo(skb)->frags[i];
+               frag_len = frag->size;
                WARN_ON(!skb_frag_page(frag));
                addr = skb_frag_dma_map(dev, frag, 0,
                                        frag_len, dma_dir);
@@ -1942,15 +1954,16 @@ static int skb_to_sg_fd(struct dpaa_priv *priv,
                        goto sg_map_failed;
                }
 
-               qm_sg_entry_set_len(&sgt[i], frag_len);
-               sgt[i].bpid = FSL_DPAA_BPID_INV;
-               sgt[i].offset = 0;
+               qm_sg_entry_set_len(&sgt[i + 1], frag_len);
+               sgt[i + 1].bpid = FSL_DPAA_BPID_INV;
+               sgt[i + 1].offset = 0;
 
                /* keep the offset in the address */
-               qm_sg_entry_set64(&sgt[i], addr);
-               frag_len = frag->size;
+               qm_sg_entry_set64(&sgt[i + 1], addr);
        }
-       qm_sg_entry_set_f(&sgt[i - 1], frag_len);
+
+       /* Set the final bit in the last used entry of the SGT */
+       qm_sg_entry_set_f(&sgt[nr_frags], frag_len);
 
        qm_fd_set_sg(fd, priv->tx_headroom, skb->len);
 
@@ -2052,19 +2065,23 @@ static int dpaa_start_xmit(struct sk_buff *skb, struct net_device *net_dev)
        /* MAX_SKB_FRAGS is equal or larger than our dpaa_SGT_MAX_ENTRIES;
         * make sure we don't feed FMan with more fragments than it supports.
         */
-       if (nonlinear &&
-           likely(skb_shinfo(skb)->nr_frags < DPAA_SGT_MAX_ENTRIES)) {
-               /* Just create a S/G fd based on the skb */
-               err = skb_to_sg_fd(priv, skb, &fd);
-               percpu_priv->tx_frag_skbuffs++;
-       } else {
+       if (unlikely(nonlinear &&
+                    (skb_shinfo(skb)->nr_frags >= DPAA_SGT_MAX_ENTRIES))) {
                /* If the egress skb contains more fragments than we support
                 * we have no choice but to linearize it ourselves.
                 */
-               if (unlikely(nonlinear) && __skb_linearize(skb))
+               if (__skb_linearize(skb))
                        goto enomem;
 
-               /* Finally, create a contig FD from this skb */
+               nonlinear = skb_is_nonlinear(skb);
+       }
+
+       if (nonlinear) {
+               /* Just create a S/G fd based on the skb */
+               err = skb_to_sg_fd(priv, skb, &fd);
+               percpu_priv->tx_frag_skbuffs++;
+       } else {
+               /* Create a contig FD from this skb */
                err = skb_to_contig_fd(priv, skb, &fd, &offset);
        }
        if (unlikely(err < 0))
@@ -2201,14 +2218,8 @@ static enum qman_cb_dqrr_result rx_error_dqrr(struct qman_portal *portal,
        if (dpaa_eth_napi_schedule(percpu_priv, portal))
                return qman_cb_dqrr_stop;
 
-       if (dpaa_eth_refill_bpools(priv))
-               /* Unable to refill the buffer pool due to insufficient
-                * system memory. Just release the frame back into the pool,
-                * otherwise we'll soon end up with an empty buffer pool.
-                */
-               dpaa_fd_release(net_dev, &dq->fd);
-       else
-               dpaa_rx_error(net_dev, priv, percpu_priv, &dq->fd, fq->fqid);
+       dpaa_eth_refill_bpools(priv);
+       dpaa_rx_error(net_dev, priv, percpu_priv, &dq->fd, fq->fqid);
 
        return qman_cb_dqrr_consume;
 }
index ea43b49..9a581fa 100644 (file)
@@ -1117,6 +1117,25 @@ int dtsec_add_hash_mac_address(struct fman_mac *dtsec, enet_addr_t *eth_addr)
        return 0;
 }
 
+int dtsec_set_allmulti(struct fman_mac *dtsec, bool enable)
+{
+       u32 tmp;
+       struct dtsec_regs __iomem *regs = dtsec->regs;
+
+       if (!is_init_done(dtsec->dtsec_drv_param))
+               return -EINVAL;
+
+       tmp = ioread32be(&regs->rctrl);
+       if (enable)
+               tmp |= RCTRL_MPROM;
+       else
+               tmp &= ~RCTRL_MPROM;
+
+       iowrite32be(tmp, &regs->rctrl);
+
+       return 0;
+}
+
 int dtsec_del_hash_mac_address(struct fman_mac *dtsec, enet_addr_t *eth_addr)
 {
        struct dtsec_regs __iomem *regs = dtsec->regs;
index c4467c0..1a689ad 100644 (file)
@@ -55,5 +55,6 @@ int dtsec_set_exception(struct fman_mac *dtsec,
 int dtsec_add_hash_mac_address(struct fman_mac *dtsec, enet_addr_t *eth_addr);
 int dtsec_del_hash_mac_address(struct fman_mac *dtsec, enet_addr_t *eth_addr);
 int dtsec_get_version(struct fman_mac *dtsec, u32 *mac_version);
+int dtsec_set_allmulti(struct fman_mac *dtsec, bool enable);
 
 #endif /* __DTSEC_H */
index c029688..446a97b 100644 (file)
@@ -350,6 +350,7 @@ struct fman_mac {
        struct fman_rev_info fm_rev_info;
        bool basex_if;
        struct phy_device *pcsphy;
+       bool allmulti_enabled;
 };
 
 static void add_addr_in_paddr(struct memac_regs __iomem *regs, u8 *adr,
@@ -940,6 +941,29 @@ int memac_add_hash_mac_address(struct fman_mac *memac, enet_addr_t *eth_addr)
        return 0;
 }
 
+int memac_set_allmulti(struct fman_mac *memac, bool enable)
+{
+       u32 entry;
+       struct memac_regs __iomem *regs = memac->regs;
+
+       if (!is_init_done(memac->memac_drv_param))
+               return -EINVAL;
+
+       if (enable) {
+               for (entry = 0; entry < HASH_TABLE_SIZE; entry++)
+                       iowrite32be(entry | HASH_CTRL_MCAST_EN,
+                                   &regs->hashtable_ctrl);
+       } else {
+               for (entry = 0; entry < HASH_TABLE_SIZE; entry++)
+                       iowrite32be(entry & ~HASH_CTRL_MCAST_EN,
+                                   &regs->hashtable_ctrl);
+       }
+
+       memac->allmulti_enabled = enable;
+
+       return 0;
+}
+
 int memac_del_hash_mac_address(struct fman_mac *memac, enet_addr_t *eth_addr)
 {
        struct memac_regs __iomem *regs = memac->regs;
@@ -963,8 +987,12 @@ int memac_del_hash_mac_address(struct fman_mac *memac, enet_addr_t *eth_addr)
                        break;
                }
        }
-       if (list_empty(&memac->multicast_addr_hash->lsts[hash]))
-               iowrite32be(hash & ~HASH_CTRL_MCAST_EN, &regs->hashtable_ctrl);
+
+       if (!memac->allmulti_enabled) {
+               if (list_empty(&memac->multicast_addr_hash->lsts[hash]))
+                       iowrite32be(hash & ~HASH_CTRL_MCAST_EN,
+                                   &regs->hashtable_ctrl);
+       }
 
        return 0;
 }
index c4a6646..b5a5033 100644 (file)
@@ -57,5 +57,6 @@ int memac_set_exception(struct fman_mac *memac,
                        enum fman_mac_exceptions exception, bool enable);
 int memac_add_hash_mac_address(struct fman_mac *memac, enet_addr_t *eth_addr);
 int memac_del_hash_mac_address(struct fman_mac *memac, enet_addr_t *eth_addr);
+int memac_set_allmulti(struct fman_mac *memac, bool enable);
 
 #endif /* __MEMAC_H */
index 4b0f3a5..284735d 100644 (file)
@@ -217,6 +217,7 @@ struct fman_mac {
        struct tgec_cfg *cfg;
        void *fm;
        struct fman_rev_info fm_rev_info;
+       bool allmulti_enabled;
 };
 
 static void set_mac_address(struct tgec_regs __iomem *regs, u8 *adr)
@@ -564,6 +565,29 @@ int tgec_add_hash_mac_address(struct fman_mac *tgec, enet_addr_t *eth_addr)
        return 0;
 }
 
+int tgec_set_allmulti(struct fman_mac *tgec, bool enable)
+{
+       u32 entry;
+       struct tgec_regs __iomem *regs = tgec->regs;
+
+       if (!is_init_done(tgec->cfg))
+               return -EINVAL;
+
+       if (enable) {
+               for (entry = 0; entry < TGEC_HASH_TABLE_SIZE; entry++)
+                       iowrite32be(entry | TGEC_HASH_MCAST_EN,
+                                   &regs->hashtable_ctrl);
+       } else {
+               for (entry = 0; entry < TGEC_HASH_TABLE_SIZE; entry++)
+                       iowrite32be(entry & ~TGEC_HASH_MCAST_EN,
+                                   &regs->hashtable_ctrl);
+       }
+
+       tgec->allmulti_enabled = enable;
+
+       return 0;
+}
+
 int tgec_del_hash_mac_address(struct fman_mac *tgec, enet_addr_t *eth_addr)
 {
        struct tgec_regs __iomem *regs = tgec->regs;
@@ -591,9 +615,12 @@ int tgec_del_hash_mac_address(struct fman_mac *tgec, enet_addr_t *eth_addr)
                        break;
                }
        }
-       if (list_empty(&tgec->multicast_addr_hash->lsts[hash]))
-               iowrite32be((hash & ~TGEC_HASH_MCAST_EN),
-                           &regs->hashtable_ctrl);
+
+       if (!tgec->allmulti_enabled) {
+               if (list_empty(&tgec->multicast_addr_hash->lsts[hash]))
+                       iowrite32be((hash & ~TGEC_HASH_MCAST_EN),
+                                   &regs->hashtable_ctrl);
+       }
 
        return 0;
 }
index 514bba9..cbbd3b4 100644 (file)
@@ -51,5 +51,6 @@ int tgec_set_exception(struct fman_mac *tgec,
 int tgec_add_hash_mac_address(struct fman_mac *tgec, enet_addr_t *eth_addr);
 int tgec_del_hash_mac_address(struct fman_mac *tgec, enet_addr_t *eth_addr);
 int tgec_get_version(struct fman_mac *tgec, u32 *mac_version);
+int tgec_set_allmulti(struct fman_mac *tgec, bool enable);
 
 #endif /* __TGEC_H */
index 88c0a06..4829dcd 100644 (file)
@@ -470,6 +470,7 @@ static void setup_dtsec(struct mac_device *mac_dev)
        mac_dev->set_tx_pause           = dtsec_set_tx_pause_frames;
        mac_dev->set_rx_pause           = dtsec_accept_rx_pause_frames;
        mac_dev->set_exception          = dtsec_set_exception;
+       mac_dev->set_allmulti           = dtsec_set_allmulti;
        mac_dev->set_multi              = set_multi;
        mac_dev->start                  = start;
        mac_dev->stop                   = stop;
@@ -488,6 +489,7 @@ static void setup_tgec(struct mac_device *mac_dev)
        mac_dev->set_tx_pause           = tgec_set_tx_pause_frames;
        mac_dev->set_rx_pause           = tgec_accept_rx_pause_frames;
        mac_dev->set_exception          = tgec_set_exception;
+       mac_dev->set_allmulti           = tgec_set_allmulti;
        mac_dev->set_multi              = set_multi;
        mac_dev->start                  = start;
        mac_dev->stop                   = stop;
@@ -506,6 +508,7 @@ static void setup_memac(struct mac_device *mac_dev)
        mac_dev->set_tx_pause           = memac_set_tx_pause_frames;
        mac_dev->set_rx_pause           = memac_accept_rx_pause_frames;
        mac_dev->set_exception          = memac_set_exception;
+       mac_dev->set_allmulti           = memac_set_allmulti;
        mac_dev->set_multi              = set_multi;
        mac_dev->start                  = start;
        mac_dev->stop                   = stop;
index eefb335..b520cec 100644 (file)
@@ -59,6 +59,7 @@ struct mac_device {
        bool rx_pause_active;
        bool tx_pause_active;
        bool promisc;
+       bool allmulti;
 
        int (*init)(struct mac_device *mac_dev);
        int (*start)(struct mac_device *mac_dev);
@@ -66,6 +67,7 @@ struct mac_device {
        void (*adjust_link)(struct mac_device *mac_dev);
        int (*set_promisc)(struct fman_mac *mac_dev, bool enable);
        int (*change_addr)(struct fman_mac *mac_dev, enet_addr_t *enet_addr);
+       int (*set_allmulti)(struct fman_mac *mac_dev, bool enable);
        int (*set_multi)(struct net_device *net_dev,
                         struct mac_device *mac_dev);
        int (*set_rx_pause)(struct fman_mac *mac_dev, bool en);
index 5a86a91..7654071 100644 (file)
@@ -111,7 +111,7 @@ static int ibmvnic_poll(struct napi_struct *napi, int data);
 static void send_map_query(struct ibmvnic_adapter *adapter);
 static void send_request_map(struct ibmvnic_adapter *, dma_addr_t, __be32, u8);
 static void send_request_unmap(struct ibmvnic_adapter *, u8);
-static void send_login(struct ibmvnic_adapter *adapter);
+static int send_login(struct ibmvnic_adapter *adapter);
 static void send_cap_queries(struct ibmvnic_adapter *adapter);
 static int init_sub_crqs(struct ibmvnic_adapter *);
 static int init_sub_crq_irqs(struct ibmvnic_adapter *adapter);
@@ -809,8 +809,11 @@ static int ibmvnic_login(struct net_device *netdev)
                }
 
                reinit_completion(&adapter->init_done);
-               send_login(adapter);
-               if (!wait_for_completion_timeout(&adapter->init_done,
+               rc = send_login(adapter);
+               if (rc) {
+                       dev_err(dev, "Unable to attempt device login\n");
+                       return rc;
+               } else if (!wait_for_completion_timeout(&adapter->init_done,
                                                 timeout)) {
                        dev_err(dev, "Login timeout\n");
                        return -1;
@@ -845,8 +848,6 @@ static void release_resources(struct ibmvnic_adapter *adapter)
        release_tx_pools(adapter);
        release_rx_pools(adapter);
 
-       release_stats_token(adapter);
-       release_stats_buffers(adapter);
        release_error_buffers(adapter);
        release_napi(adapter);
        release_login_rsp_buffer(adapter);
@@ -974,14 +975,6 @@ static int init_resources(struct ibmvnic_adapter *adapter)
        if (rc)
                return rc;
 
-       rc = init_stats_buffers(adapter);
-       if (rc)
-               return rc;
-
-       rc = init_stats_token(adapter);
-       if (rc)
-               return rc;
-
        adapter->vpd = kzalloc(sizeof(*adapter->vpd), GFP_KERNEL);
        if (!adapter->vpd)
                return -ENOMEM;
@@ -1091,6 +1084,7 @@ static int ibmvnic_open(struct net_device *netdev)
 static void clean_rx_pools(struct ibmvnic_adapter *adapter)
 {
        struct ibmvnic_rx_pool *rx_pool;
+       struct ibmvnic_rx_buff *rx_buff;
        u64 rx_entries;
        int rx_scrqs;
        int i, j;
@@ -1104,14 +1098,15 @@ static void clean_rx_pools(struct ibmvnic_adapter *adapter)
        /* Free any remaining skbs in the rx buffer pools */
        for (i = 0; i < rx_scrqs; i++) {
                rx_pool = &adapter->rx_pool[i];
-               if (!rx_pool)
+               if (!rx_pool || !rx_pool->rx_buff)
                        continue;
 
                netdev_dbg(adapter->netdev, "Cleaning rx_pool[%d]\n", i);
                for (j = 0; j < rx_entries; j++) {
-                       if (rx_pool->rx_buff[j].skb) {
-                               dev_kfree_skb_any(rx_pool->rx_buff[j].skb);
-                               rx_pool->rx_buff[j].skb = NULL;
+                       rx_buff = &rx_pool->rx_buff[j];
+                       if (rx_buff && rx_buff->skb) {
+                               dev_kfree_skb_any(rx_buff->skb);
+                               rx_buff->skb = NULL;
                        }
                }
        }
@@ -1120,6 +1115,7 @@ static void clean_rx_pools(struct ibmvnic_adapter *adapter)
 static void clean_tx_pools(struct ibmvnic_adapter *adapter)
 {
        struct ibmvnic_tx_pool *tx_pool;
+       struct ibmvnic_tx_buff *tx_buff;
        u64 tx_entries;
        int tx_scrqs;
        int i, j;
@@ -1133,14 +1129,15 @@ static void clean_tx_pools(struct ibmvnic_adapter *adapter)
        /* Free any remaining skbs in the tx buffer pools */
        for (i = 0; i < tx_scrqs; i++) {
                tx_pool = &adapter->tx_pool[i];
-               if (!tx_pool)
+               if (!tx_pool && !tx_pool->tx_buff)
                        continue;
 
                netdev_dbg(adapter->netdev, "Cleaning tx_pool[%d]\n", i);
                for (j = 0; j < tx_entries; j++) {
-                       if (tx_pool->tx_buff[j].skb) {
-                               dev_kfree_skb_any(tx_pool->tx_buff[j].skb);
-                               tx_pool->tx_buff[j].skb = NULL;
+                       tx_buff = &tx_pool->tx_buff[j];
+                       if (tx_buff && tx_buff->skb) {
+                               dev_kfree_skb_any(tx_buff->skb);
+                               tx_buff->skb = NULL;
                        }
                }
        }
@@ -1482,6 +1479,7 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
        if ((*hdrs >> 7) & 1) {
                build_hdr_descs_arr(tx_buff, &num_entries, *hdrs);
                tx_crq.v1.n_crq_elem = num_entries;
+               tx_buff->num_entries = num_entries;
                tx_buff->indir_arr[0] = tx_crq;
                tx_buff->indir_dma = dma_map_single(dev, tx_buff->indir_arr,
                                                    sizeof(tx_buff->indir_arr),
@@ -1500,6 +1498,7 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
                                               (u64)tx_buff->indir_dma,
                                               (u64)num_entries);
        } else {
+               tx_buff->num_entries = num_entries;
                lpar_rc = send_subcrq(adapter, handle_array[queue_num],
                                      &tx_crq);
        }
@@ -1532,11 +1531,10 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
 
        if (atomic_add_return(num_entries, &tx_scrq->used)
                                        >= adapter->req_tx_entries_per_subcrq) {
-               netdev_info(netdev, "Stopping queue %d\n", queue_num);
+               netdev_dbg(netdev, "Stopping queue %d\n", queue_num);
                netif_stop_subqueue(netdev, queue_num);
        }
 
-       tx_buff->num_entries = num_entries;
        tx_packets++;
        tx_bytes += skb->len;
        txq->trans_start = jiffies;
@@ -2546,8 +2544,8 @@ restart_loop:
                    __netif_subqueue_stopped(adapter->netdev,
                                             scrq->pool_index)) {
                        netif_wake_subqueue(adapter->netdev, scrq->pool_index);
-                       netdev_info(adapter->netdev, "Started queue %d\n",
-                                   scrq->pool_index);
+                       netdev_dbg(adapter->netdev, "Started queue %d\n",
+                                  scrq->pool_index);
                }
        }
 
@@ -3079,7 +3077,7 @@ static void vnic_add_client_data(struct ibmvnic_adapter *adapter,
        strncpy(&vlcd->name, adapter->netdev->name, len);
 }
 
-static void send_login(struct ibmvnic_adapter *adapter)
+static int send_login(struct ibmvnic_adapter *adapter)
 {
        struct ibmvnic_login_rsp_buffer *login_rsp_buffer;
        struct ibmvnic_login_buffer *login_buffer;
@@ -3095,6 +3093,12 @@ static void send_login(struct ibmvnic_adapter *adapter)
        struct vnic_login_client_data *vlcd;
        int i;
 
+       if (!adapter->tx_scrq || !adapter->rx_scrq) {
+               netdev_err(adapter->netdev,
+                          "RX or TX queues are not allocated, device login failed\n");
+               return -1;
+       }
+
        release_login_rsp_buffer(adapter);
        client_data_len = vnic_client_data_len(adapter);
 
@@ -3192,7 +3196,7 @@ static void send_login(struct ibmvnic_adapter *adapter)
        crq.login.len = cpu_to_be32(buffer_size);
        ibmvnic_send_crq(adapter, &crq);
 
-       return;
+       return 0;
 
 buf_rsp_map_failed:
        kfree(login_rsp_buffer);
@@ -3201,7 +3205,7 @@ buf_rsp_alloc_failed:
 buf_map_failed:
        kfree(login_buffer);
 buf_alloc_failed:
-       return;
+       return -1;
 }
 
 static void send_request_map(struct ibmvnic_adapter *adapter, dma_addr_t addr,
@@ -4430,6 +4434,14 @@ static int ibmvnic_init(struct ibmvnic_adapter *adapter)
                release_crq_queue(adapter);
        }
 
+       rc = init_stats_buffers(adapter);
+       if (rc)
+               return rc;
+
+       rc = init_stats_token(adapter);
+       if (rc)
+               return rc;
+
        return rc;
 }
 
@@ -4537,6 +4549,9 @@ static int ibmvnic_remove(struct vio_dev *dev)
        release_sub_crqs(adapter, 1);
        release_crq_queue(adapter);
 
+       release_stats_token(adapter);
+       release_stats_buffers(adapter);
+
        adapter->state = VNIC_REMOVED;
 
        mutex_unlock(&adapter->reset_lock);
index 736a9f0..c58a537 100644 (file)
@@ -1,5 +1,5 @@
 /* Intel(R) Ethernet Switch Host Interface Driver
- * Copyright(c) 2013 - 2017 Intel Corporation.
+ * Copyright(c) 2013 - 2018 Intel Corporation.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -262,6 +262,7 @@ s32 fm10k_stop_hw_generic(struct fm10k_hw *hw)
  *  fm10k_read_hw_stats_32b - Reads value of 32-bit registers
  *  @hw: pointer to the hardware structure
  *  @addr: address of register containing a 32-bit value
+ *  @stat: pointer to structure holding hw stat information
  *
  *  Function reads the content of the register and returns the delta
  *  between the base and the current value.
@@ -281,6 +282,7 @@ u32 fm10k_read_hw_stats_32b(struct fm10k_hw *hw, u32 addr,
  *  fm10k_read_hw_stats_48b - Reads value of 48-bit registers
  *  @hw: pointer to the hardware structure
  *  @addr: address of register containing the lower 32-bit value
+ *  @stat: pointer to structure holding hw stat information
  *
  *  Function reads the content of 2 registers, combined to represent a 48-bit
  *  statistical value. Extra processing is required to handle overflowing.
@@ -461,7 +463,6 @@ void fm10k_update_hw_stats_q(struct fm10k_hw *hw, struct fm10k_hw_stats_q *q,
 
 /**
  *  fm10k_unbind_hw_stats_q - Unbind the queue counters from their queues
- *  @hw: pointer to the hardware structure
  *  @q: pointer to the ring of hardware statistics queue
  *  @idx: index pointing to the start of the ring iteration
  *  @count: number of queues to iterate over
index 8e12aae..2c93d71 100644 (file)
 
 #include "fm10k.h"
 
-#define DRV_VERSION    "0.22.1-k"
+#define DRV_VERSION    "0.23.4-k"
 #define DRV_SUMMARY    "Intel(R) Ethernet Switch Host Interface Driver"
 const char fm10k_driver_version[] = DRV_VERSION;
 char fm10k_driver_name[] = "fm10k";
 static const char fm10k_driver_string[] = DRV_SUMMARY;
 static const char fm10k_copyright[] =
-       "Copyright(c) 2013 - 2017 Intel Corporation.";
+       "Copyright(c) 2013 - 2018 Intel Corporation.";
 
 MODULE_AUTHOR("Intel Corporation, <linux.nics@intel.com>");
 MODULE_DESCRIPTION(DRV_SUMMARY);
index a38ae5c..75c99ae 100644 (file)
@@ -1,5 +1,5 @@
 /* Intel(R) Ethernet Switch Host Interface Driver
- * Copyright(c) 2013 - 2017 Intel Corporation.
+ * Copyright(c) 2013 - 2018 Intel Corporation.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -486,7 +486,7 @@ static void fm10k_insert_tunnel_port(struct list_head *ports,
 
 /**
  * fm10k_udp_tunnel_add
- * @netdev: network interface device structure
+ * @dev: network interface device structure
  * @ti: Tunnel endpoint information
  *
  * This function is called when a new UDP tunnel port has been added.
@@ -518,8 +518,8 @@ static void fm10k_udp_tunnel_add(struct net_device *dev,
 
 /**
  * fm10k_udp_tunnel_del
- * @netdev: network interface device structure
- * @ti: Tunnel endpoint information
+ * @dev: network interface device structure
+ * @ti: Tunnel end point information
  *
  * This function is called when a new UDP tunnel port is deleted. The freed
  * port will be removed from the list, then we reprogram the offloaded port
@@ -803,7 +803,7 @@ int fm10k_queue_vlan_request(struct fm10k_intfc *interface,
  * @glort: the target glort for this update
  * @addr: the address to update
  * @vid: the vid to update
- * @sync: whether to add or remove
+ * @set: whether to add or remove
  *
  * This function queues up a MAC request for sending to the switch manager.
  * A separate thread monitors the queue and sends updates to the switch
index a434fec..50f53e4 100644 (file)
@@ -1,5 +1,5 @@
 /* Intel(R) Ethernet Switch Host Interface Driver
- * Copyright(c) 2013 - 2017 Intel Corporation.
+ * Copyright(c) 2013 - 2018 Intel Corporation.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -29,7 +29,7 @@ static const struct fm10k_info *fm10k_info_tbl[] = {
        [fm10k_device_vf] = &fm10k_vf_info,
 };
 
-/**
+/*
  * fm10k_pci_tbl - PCI Device ID Table
  *
  * Wildcard entries (PCI_ANY_ID) should come last
@@ -211,7 +211,7 @@ static void fm10k_start_service_event(struct fm10k_intfc *interface)
 
 /**
  * fm10k_service_timer - Timer Call-back
- * @data: pointer to interface cast into an unsigned long
+ * @t: pointer to timer data
  **/
 static void fm10k_service_timer(struct timer_list *t)
 {
@@ -649,7 +649,7 @@ void fm10k_update_stats(struct fm10k_intfc *interface)
 
 /**
  * fm10k_watchdog_flush_tx - flush queues on host not ready
- * @interface - pointer to the device interface structure
+ * @interface: pointer to the device interface structure
  **/
 static void fm10k_watchdog_flush_tx(struct fm10k_intfc *interface)
 {
@@ -679,7 +679,7 @@ static void fm10k_watchdog_flush_tx(struct fm10k_intfc *interface)
 
 /**
  * fm10k_watchdog_subtask - check and bring link up
- * @interface - pointer to the device interface structure
+ * @interface: pointer to the device interface structure
  **/
 static void fm10k_watchdog_subtask(struct fm10k_intfc *interface)
 {
@@ -703,7 +703,7 @@ static void fm10k_watchdog_subtask(struct fm10k_intfc *interface)
 
 /**
  * fm10k_check_hang_subtask - check for hung queues and dropped interrupts
- * @interface - pointer to the device interface structure
+ * @interface: pointer to the device interface structure
  *
  * This function serves two purposes.  First it strobes the interrupt lines
  * in order to make certain interrupts are occurring.  Secondly it sets the
@@ -1995,6 +1995,7 @@ skip_tx_dma_drain:
 /**
  * fm10k_sw_init - Initialize general software structures
  * @interface: host interface private structure to initialize
+ * @ent: PCI device ID entry
  *
  * fm10k_sw_init initializes the interface private data structure.
  * Fields are initialized based on PCI device information and
index d6406fc..bee192f 100644 (file)
@@ -1,5 +1,5 @@
 /* Intel(R) Ethernet Switch Host Interface Driver
- * Copyright(c) 2013 - 2017 Intel Corporation.
+ * Copyright(c) 2013 - 2018 Intel Corporation.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -1180,7 +1180,7 @@ s32 fm10k_iov_msg_msix_pf(struct fm10k_hw *hw, u32 **results,
 
 /**
  * fm10k_iov_select_vid - Select correct default VLAN ID
- * @hw: Pointer to hardware structure
+ * @vf_info: pointer to VF information structure
  * @vid: VLAN ID to correct
  *
  * Will report an error if the VLAN ID is out of range. For VID = 0, it will
index f8e87bf..9d0d31d 100644 (file)
@@ -1,5 +1,5 @@
 /* Intel(R) Ethernet Switch Host Interface Driver
- * Copyright(c) 2013 - 2016 Intel Corporation.
+ * Copyright(c) 2013 - 2018 Intel Corporation.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -120,6 +120,7 @@ static s32 fm10k_tlv_attr_get_null_string(u32 *attr, unsigned char *string)
  *  @msg: Pointer to message block
  *  @attr_id: Attribute ID
  *  @mac_addr: MAC address to be stored
+ *  @vlan: VLAN to be stored
  *
  *  This function will reorder a MAC address to be CPU endian and store it
  *  in the attribute buffer.  It will return success if provided with a
@@ -155,8 +156,8 @@ s32 fm10k_tlv_attr_put_mac_vlan(u32 *msg, u16 attr_id,
 /**
  *  fm10k_tlv_attr_get_mac_vlan - Get MAC/VLAN stored in attribute
  *  @attr: Pointer to attribute
- *  @attr_id: Attribute ID
  *  @mac_addr: location of buffer to store MAC address
+ *  @vlan: location of buffer to store VLAN
  *
  *  This function pulls the MAC address back out of the attribute and will
  *  place it in the array pointed by by mac_addr.  It will return success
@@ -549,7 +550,7 @@ static s32 fm10k_tlv_attr_parse(u32 *attr, u32 **results,
  *  @hw: Pointer to hardware structure
  *  @msg: Pointer to message
  *  @mbx: Pointer to mailbox information structure
- *  @func: Function array containing list of message handling functions
+ *  @data: Pointer to message handler data structure
  *
  *  This function should be the first function called upon receiving a
  *  message.  The handler will identify the message type and call the correct
index a852775..0dfc527 100644 (file)
@@ -1914,6 +1914,43 @@ enum i40e_aq_phy_type {
        I40E_PHY_TYPE_DEFAULT                   = 0xFF,
 };
 
+#define I40E_PHY_TYPES_BITMASK (BIT_ULL(I40E_PHY_TYPE_SGMII) | \
+                               BIT_ULL(I40E_PHY_TYPE_1000BASE_KX) | \
+                               BIT_ULL(I40E_PHY_TYPE_10GBASE_KX4) | \
+                               BIT_ULL(I40E_PHY_TYPE_10GBASE_KR) | \
+                               BIT_ULL(I40E_PHY_TYPE_40GBASE_KR4) | \
+                               BIT_ULL(I40E_PHY_TYPE_XAUI) | \
+                               BIT_ULL(I40E_PHY_TYPE_XFI) | \
+                               BIT_ULL(I40E_PHY_TYPE_SFI) | \
+                               BIT_ULL(I40E_PHY_TYPE_XLAUI) | \
+                               BIT_ULL(I40E_PHY_TYPE_XLPPI) | \
+                               BIT_ULL(I40E_PHY_TYPE_40GBASE_CR4_CU) | \
+                               BIT_ULL(I40E_PHY_TYPE_10GBASE_CR1_CU) | \
+                               BIT_ULL(I40E_PHY_TYPE_10GBASE_AOC) | \
+                               BIT_ULL(I40E_PHY_TYPE_40GBASE_AOC) | \
+                               BIT_ULL(I40E_PHY_TYPE_UNRECOGNIZED) | \
+                               BIT_ULL(I40E_PHY_TYPE_UNSUPPORTED) | \
+                               BIT_ULL(I40E_PHY_TYPE_100BASE_TX) | \
+                               BIT_ULL(I40E_PHY_TYPE_1000BASE_T) | \
+                               BIT_ULL(I40E_PHY_TYPE_10GBASE_T) | \
+                               BIT_ULL(I40E_PHY_TYPE_10GBASE_SR) | \
+                               BIT_ULL(I40E_PHY_TYPE_10GBASE_LR) | \
+                               BIT_ULL(I40E_PHY_TYPE_10GBASE_SFPP_CU) | \
+                               BIT_ULL(I40E_PHY_TYPE_10GBASE_CR1) | \
+                               BIT_ULL(I40E_PHY_TYPE_40GBASE_CR4) | \
+                               BIT_ULL(I40E_PHY_TYPE_40GBASE_SR4) | \
+                               BIT_ULL(I40E_PHY_TYPE_40GBASE_LR4) | \
+                               BIT_ULL(I40E_PHY_TYPE_1000BASE_SX) | \
+                               BIT_ULL(I40E_PHY_TYPE_1000BASE_LX) | \
+                               BIT_ULL(I40E_PHY_TYPE_1000BASE_T_OPTICAL) | \
+                               BIT_ULL(I40E_PHY_TYPE_20GBASE_KR2) | \
+                               BIT_ULL(I40E_PHY_TYPE_25GBASE_KR) | \
+                               BIT_ULL(I40E_PHY_TYPE_25GBASE_CR) | \
+                               BIT_ULL(I40E_PHY_TYPE_25GBASE_SR) | \
+                               BIT_ULL(I40E_PHY_TYPE_25GBASE_LR) | \
+                               BIT_ULL(I40E_PHY_TYPE_25GBASE_AOC) | \
+                               BIT_ULL(I40E_PHY_TYPE_25GBASE_ACC))
+
 #define I40E_LINK_SPEED_100MB_SHIFT    0x1
 #define I40E_LINK_SPEED_1000MB_SHIFT   0x2
 #define I40E_LINK_SPEED_10GB_SHIFT     0x3
index 0dcbbda..89807e3 100644 (file)
@@ -230,6 +230,8 @@ static const struct i40e_priv_flags i40e_gstrings_priv_flags[] = {
        I40E_PRIV_FLAG("flow-director-atr", I40E_FLAG_FD_ATR_ENABLED, 0),
        I40E_PRIV_FLAG("veb-stats", I40E_FLAG_VEB_STATS_ENABLED, 0),
        I40E_PRIV_FLAG("hw-atr-eviction", I40E_FLAG_HW_ATR_EVICT_ENABLED, 0),
+       I40E_PRIV_FLAG("link-down-on-close",
+                      I40E_FLAG_LINK_DOWN_ON_CLOSE_ENABLED, 0),
        I40E_PRIV_FLAG("legacy-rx", I40E_FLAG_LEGACY_RX, 0),
        I40E_PRIV_FLAG("disable-source-pruning",
                       I40E_FLAG_SOURCE_PRUNING_DISABLED, 0),
index f6d3745..be9a146 100644 (file)
@@ -6546,6 +6546,75 @@ int i40e_up(struct i40e_vsi *vsi)
        return err;
 }
 
+/**
+ * i40e_force_link_state - Force the link status
+ * @pf: board private structure
+ * @is_up: whether the link state should be forced up or down
+ **/
+static i40e_status i40e_force_link_state(struct i40e_pf *pf, bool is_up)
+{
+       struct i40e_aq_get_phy_abilities_resp abilities;
+       struct i40e_aq_set_phy_config config = {0};
+       struct i40e_hw *hw = &pf->hw;
+       i40e_status err;
+       u64 mask;
+
+       /* Get the current phy config */
+       err = i40e_aq_get_phy_capabilities(hw, false, false, &abilities,
+                                          NULL);
+       if (err) {
+               dev_err(&pf->pdev->dev,
+                       "failed to get phy cap., ret =  %s last_status =  %s\n",
+                       i40e_stat_str(hw, err),
+                       i40e_aq_str(hw, hw->aq.asq_last_status));
+               return err;
+       }
+
+       /* If link needs to go up, but was not forced to go down,
+        * no need for a flap
+        */
+       if (is_up && abilities.phy_type != 0)
+               return I40E_SUCCESS;
+
+       /* To force link we need to set bits for all supported PHY types,
+        * but there are now more than 32, so we need to split the bitmap
+        * across two fields.
+        */
+       mask = I40E_PHY_TYPES_BITMASK;
+       config.phy_type = is_up ? cpu_to_le32((u32)(mask & 0xffffffff)) : 0;
+       config.phy_type_ext = is_up ? (u8)((mask >> 32) & 0xff) : 0;
+       /* Copy the old settings, except of phy_type */
+       config.abilities = abilities.abilities;
+       config.link_speed = abilities.link_speed;
+       config.eee_capability = abilities.eee_capability;
+       config.eeer = abilities.eeer_val;
+       config.low_power_ctrl = abilities.d3_lpan;
+       err = i40e_aq_set_phy_config(hw, &config, NULL);
+
+       if (err) {
+               dev_err(&pf->pdev->dev,
+                       "set phy config ret =  %s last_status =  %s\n",
+                       i40e_stat_str(&pf->hw, err),
+                       i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
+               return err;
+       }
+
+       /* Update the link info */
+       err = i40e_update_link_info(hw);
+       if (err) {
+               /* Wait a little bit (on 40G cards it sometimes takes a really
+                * long time for link to come back from the atomic reset)
+                * and try once more
+                */
+               msleep(1000);
+               i40e_update_link_info(hw);
+       }
+
+       i40e_aq_set_link_restart_an(hw, true, NULL);
+
+       return I40E_SUCCESS;
+}
+
 /**
  * i40e_down - Shutdown the connection processing
  * @vsi: the VSI being stopped
@@ -6563,6 +6632,9 @@ void i40e_down(struct i40e_vsi *vsi)
        }
        i40e_vsi_disable_irq(vsi);
        i40e_vsi_stop_rings(vsi);
+       if (vsi->type == I40E_VSI_MAIN &&
+           vsi->back->flags & I40E_FLAG_LINK_DOWN_ON_CLOSE_ENABLED)
+               i40e_force_link_state(vsi->back, false);
        i40e_napi_disable_all(vsi);
 
        for (i = 0; i < vsi->num_queue_pairs; i++) {
@@ -7524,6 +7596,9 @@ int i40e_open(struct net_device *netdev)
 
        netif_carrier_off(netdev);
 
+       if (i40e_force_link_state(pf, true))
+               return -EAGAIN;
+
        err = i40e_vsi_open(vsi);
        if (err)
                return err;
index 1ec9b1d..97cfe94 100644 (file)
@@ -708,16 +708,22 @@ void i40e_free_tx_resources(struct i40e_ring *tx_ring)
 /**
  * i40e_get_tx_pending - how many tx descriptors not processed
  * @tx_ring: the ring of descriptors
+ * @in_sw: use SW variables
  *
  * Since there is no access to the ring head register
  * in XL710, we need to use our local copies
  **/
-u32 i40e_get_tx_pending(struct i40e_ring *ring)
+u32 i40e_get_tx_pending(struct i40e_ring *ring, bool in_sw)
 {
        u32 head, tail;
 
-       head = i40e_get_head(ring);
-       tail = readl(ring->tail);
+       if (!in_sw) {
+               head = i40e_get_head(ring);
+               tail = readl(ring->tail);
+       } else {
+               head = ring->next_to_clean;
+               tail = ring->next_to_use;
+       }
 
        if (head != tail)
                return (head < tail) ?
@@ -774,7 +780,7 @@ void i40e_detect_recover_hung(struct i40e_vsi *vsi)
                         */
                        smp_rmb();
                        tx_ring->tx_stats.prev_pkt_ctr =
-                           i40e_get_tx_pending(tx_ring) ? packets : -1;
+                           i40e_get_tx_pending(tx_ring, true) ? packets : -1;
                }
        }
 }
@@ -898,7 +904,7 @@ static bool i40e_clean_tx_irq(struct i40e_vsi *vsi,
                 * them to be written back in case we stay in NAPI.
                 * In this mode on X722 we do not enable Interrupt.
                 */
-               unsigned int j = i40e_get_tx_pending(tx_ring);
+               unsigned int j = i40e_get_tx_pending(tx_ring, false);
 
                if (budget &&
                    ((j / WB_STRIDE) == 0) && (j > 0) &&
index f75a8fe..3c80ea7 100644 (file)
@@ -505,7 +505,7 @@ void i40e_free_tx_resources(struct i40e_ring *tx_ring);
 void i40e_free_rx_resources(struct i40e_ring *rx_ring);
 int i40e_napi_poll(struct napi_struct *napi, int budget);
 void i40e_force_wb(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector);
-u32 i40e_get_tx_pending(struct i40e_ring *ring);
+u32 i40e_get_tx_pending(struct i40e_ring *ring, bool in_sw);
 void i40e_detect_recover_hung(struct i40e_vsi *vsi);
 int __i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size);
 bool __i40e_chk_linearize(struct sk_buff *skb);
index 5cca083..e23975c 100644 (file)
@@ -3062,7 +3062,7 @@ static struct i40e_vsi *i40e_find_vsi_from_seid(struct i40e_vf *vf, u16 seid)
 
        for (i = 0; i < vf->num_tc ; i++) {
                vsi = i40e_find_vsi_from_id(pf, vf->ch[i].vsi_id);
-               if (vsi->seid == seid)
+               if (vsi && vsi->seid == seid)
                        return vsi;
        }
        return NULL;
@@ -3146,8 +3146,8 @@ static int i40e_vc_del_cloud_filter(struct i40e_vf *vf, u8 *msg)
                dev_info(&pf->pdev->dev,
                         "VF %d: Invalid input, can't apply cloud filter\n",
                         vf->vf_id);
-                       aq_ret = I40E_ERR_PARAM;
-                       goto err;
+               aq_ret = I40E_ERR_PARAM;
+               goto err;
        }
 
        memset(&cfilter, 0, sizeof(cfilter));
index eb8f3e3..e088d23 100644 (file)
@@ -196,7 +196,7 @@ void i40evf_detect_recover_hung(struct i40e_vsi *vsi)
                         */
                        smp_rmb();
                        tx_ring->tx_stats.prev_pkt_ctr =
-                         i40evf_get_tx_pending(tx_ring, false) ? packets : -1;
+                         i40evf_get_tx_pending(tx_ring, true) ? packets : -1;
                }
        }
 }
index 4955ce3..dae1218 100644 (file)
@@ -815,13 +815,11 @@ i40evf_mac_filter *i40evf_add_filter(struct i40evf_adapter *adapter,
        if (!macaddr)
                return NULL;
 
-       spin_lock_bh(&adapter->mac_vlan_list_lock);
-
        f = i40evf_find_filter(adapter, macaddr);
        if (!f) {
                f = kzalloc(sizeof(*f), GFP_ATOMIC);
                if (!f)
-                       goto clearout;
+                       return f;
 
                ether_addr_copy(f->macaddr, macaddr);
 
@@ -832,8 +830,6 @@ i40evf_mac_filter *i40evf_add_filter(struct i40evf_adapter *adapter,
                f->remove = false;
        }
 
-clearout:
-       spin_unlock_bh(&adapter->mac_vlan_list_lock);
        return f;
 }
 
@@ -868,9 +864,10 @@ static int i40evf_set_mac(struct net_device *netdev, void *p)
                adapter->aq_required |= I40EVF_FLAG_AQ_DEL_MAC_FILTER;
        }
 
+       f = i40evf_add_filter(adapter, addr->sa_data);
+
        spin_unlock_bh(&adapter->mac_vlan_list_lock);
 
-       f = i40evf_add_filter(adapter, addr->sa_data);
        if (f) {
                ether_addr_copy(hw->mac.addr, addr->sa_data);
                ether_addr_copy(netdev->dev_addr, adapter->hw.mac.addr);
@@ -2493,6 +2490,7 @@ static int i40evf_parse_cls_flower(struct i40evf_adapter *adapter,
        u16 addr_type = 0;
        u16 n_proto = 0;
        int i = 0;
+       struct virtchnl_filter *vf = &filter->f;
 
        if (f->dissector->used_keys &
            ~(BIT(FLOW_DISSECTOR_KEY_CONTROL) |
@@ -2540,7 +2538,7 @@ static int i40evf_parse_cls_flower(struct i40evf_adapter *adapter,
                        return -EINVAL;
                if (n_proto == ETH_P_IPV6) {
                        /* specify flow type as TCP IPv6 */
-                       filter->f.flow_type = VIRTCHNL_TCP_V6_FLOW;
+                       vf->flow_type = VIRTCHNL_TCP_V6_FLOW;
                }
 
                if (key->ip_proto != IPPROTO_TCP) {
@@ -2585,9 +2583,8 @@ static int i40evf_parse_cls_flower(struct i40evf_adapter *adapter,
                            is_multicast_ether_addr(key->dst)) {
                                /* set the mask if a valid dst_mac address */
                                for (i = 0; i < ETH_ALEN; i++)
-                                       filter->f.mask.tcp_spec.dst_mac[i] |=
-                                                                       0xff;
-                               ether_addr_copy(filter->f.data.tcp_spec.dst_mac,
+                                       vf->mask.tcp_spec.dst_mac[i] |= 0xff;
+                               ether_addr_copy(vf->data.tcp_spec.dst_mac,
                                                key->dst);
                        }
 
@@ -2596,9 +2593,8 @@ static int i40evf_parse_cls_flower(struct i40evf_adapter *adapter,
                            is_multicast_ether_addr(key->src)) {
                                /* set the mask if a valid dst_mac address */
                                for (i = 0; i < ETH_ALEN; i++)
-                                       filter->f.mask.tcp_spec.src_mac[i] |=
-                                                                       0xff;
-                               ether_addr_copy(filter->f.data.tcp_spec.src_mac,
+                                       vf->mask.tcp_spec.src_mac[i] |= 0xff;
+                               ether_addr_copy(vf->data.tcp_spec.src_mac,
                                                key->src);
                }
        }
@@ -2622,8 +2618,8 @@ static int i40evf_parse_cls_flower(struct i40evf_adapter *adapter,
                                return I40E_ERR_CONFIG;
                        }
                }
-               filter->f.mask.tcp_spec.vlan_id |= cpu_to_be16(0xffff);
-               filter->f.data.tcp_spec.vlan_id = cpu_to_be16(key->vlan_id);
+               vf->mask.tcp_spec.vlan_id |= cpu_to_be16(0xffff);
+               vf->data.tcp_spec.vlan_id = cpu_to_be16(key->vlan_id);
        }
 
        if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_CONTROL)) {
@@ -2670,14 +2666,12 @@ static int i40evf_parse_cls_flower(struct i40evf_adapter *adapter,
                        return I40E_ERR_CONFIG;
                }
                if (key->dst) {
-                       filter->f.mask.tcp_spec.dst_ip[0] |=
-                                                       cpu_to_be32(0xffffffff);
-                       filter->f.data.tcp_spec.dst_ip[0] = key->dst;
+                       vf->mask.tcp_spec.dst_ip[0] |= cpu_to_be32(0xffffffff);
+                       vf->data.tcp_spec.dst_ip[0] = key->dst;
                }
                if (key->src) {
-                       filter->f.mask.tcp_spec.src_ip[0] |=
-                                                       cpu_to_be32(0xffffffff);
-                       filter->f.data.tcp_spec.src_ip[0] = key->src;
+                       vf->mask.tcp_spec.src_ip[0] |= cpu_to_be32(0xffffffff);
+                       vf->data.tcp_spec.src_ip[0] = key->src;
                }
        }
 
@@ -2710,22 +2704,14 @@ static int i40evf_parse_cls_flower(struct i40evf_adapter *adapter,
                if (!ipv6_addr_any(&mask->dst) || !ipv6_addr_any(&mask->src))
                        field_flags |= I40EVF_CLOUD_FIELD_IIP;
 
-               if (key->dst.s6_addr) {
-                       for (i = 0; i < 4; i++)
-                               filter->f.mask.tcp_spec.dst_ip[i] |=
-                                                       cpu_to_be32(0xffffffff);
-                       memcpy(&filter->f.data.tcp_spec.dst_ip,
-                              &key->dst.s6_addr32,
-                              sizeof(filter->f.data.tcp_spec.dst_ip));
-               }
-               if (key->src.s6_addr) {
-                       for (i = 0; i < 4; i++)
-                               filter->f.mask.tcp_spec.src_ip[i] |=
-                                                       cpu_to_be32(0xffffffff);
-                       memcpy(&filter->f.data.tcp_spec.src_ip,
-                              &key->src.s6_addr32,
-                              sizeof(filter->f.data.tcp_spec.src_ip));
-               }
+               for (i = 0; i < 4; i++)
+                       vf->mask.tcp_spec.dst_ip[i] |= cpu_to_be32(0xffffffff);
+               memcpy(&vf->data.tcp_spec.dst_ip, &key->dst.s6_addr32,
+                      sizeof(vf->data.tcp_spec.dst_ip));
+               for (i = 0; i < 4; i++)
+                       vf->mask.tcp_spec.src_ip[i] |= cpu_to_be32(0xffffffff);
+               memcpy(&vf->data.tcp_spec.src_ip, &key->src.s6_addr32,
+                      sizeof(vf->data.tcp_spec.src_ip));
        }
        if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_PORTS)) {
                struct flow_dissector_key_ports *key =
@@ -2757,16 +2743,16 @@ static int i40evf_parse_cls_flower(struct i40evf_adapter *adapter,
                        }
                }
                if (key->dst) {
-                       filter->f.mask.tcp_spec.dst_port |= cpu_to_be16(0xffff);
-                       filter->f.data.tcp_spec.dst_port = key->dst;
+                       vf->mask.tcp_spec.dst_port |= cpu_to_be16(0xffff);
+                       vf->data.tcp_spec.dst_port = key->dst;
                }
 
                if (key->src) {
-                       filter->f.mask.tcp_spec.src_port |= cpu_to_be16(0xffff);
-                       filter->f.data.tcp_spec.src_port = key->dst;
+                       vf->mask.tcp_spec.src_port |= cpu_to_be16(0xffff);
+                       vf->data.tcp_spec.src_port = key->dst;
                }
        }
-       filter->f.field_flags = field_flags;
+       vf->field_flags = field_flags;
 
        return 0;
 }
@@ -3040,7 +3026,12 @@ static int i40evf_open(struct net_device *netdev)
        if (err)
                goto err_req_irq;
 
+       spin_lock_bh(&adapter->mac_vlan_list_lock);
+
        i40evf_add_filter(adapter, adapter->hw.mac.addr);
+
+       spin_unlock_bh(&adapter->mac_vlan_list_lock);
+
        i40evf_configure(adapter);
 
        i40evf_up_complete(adapter);
index 6134b61..3c76c81 100644 (file)
@@ -1048,24 +1048,28 @@ void i40evf_disable_channels(struct i40evf_adapter *adapter)
  * Print the cloud filter
  **/
 static void i40evf_print_cloud_filter(struct i40evf_adapter *adapter,
-                                     struct virtchnl_filter f)
+                                     struct virtchnl_filter *f)
 {
-       switch (f.flow_type) {
+       switch (f->flow_type) {
        case VIRTCHNL_TCP_V4_FLOW:
                dev_info(&adapter->pdev->dev, "dst_mac: %pM src_mac: %pM vlan_id: %hu dst_ip: %pI4 src_ip %pI4 dst_port %hu src_port %hu\n",
-                        &f.data.tcp_spec.dst_mac, &f.data.tcp_spec.src_mac,
-                        ntohs(f.data.tcp_spec.vlan_id),
-                        &f.data.tcp_spec.dst_ip[0], &f.data.tcp_spec.src_ip[0],
-                        ntohs(f.data.tcp_spec.dst_port),
-                        ntohs(f.data.tcp_spec.src_port));
+                        &f->data.tcp_spec.dst_mac,
+                        &f->data.tcp_spec.src_mac,
+                        ntohs(f->data.tcp_spec.vlan_id),
+                        &f->data.tcp_spec.dst_ip[0],
+                        &f->data.tcp_spec.src_ip[0],
+                        ntohs(f->data.tcp_spec.dst_port),
+                        ntohs(f->data.tcp_spec.src_port));
                break;
        case VIRTCHNL_TCP_V6_FLOW:
                dev_info(&adapter->pdev->dev, "dst_mac: %pM src_mac: %pM vlan_id: %hu dst_ip: %pI6 src_ip %pI6 dst_port %hu src_port %hu\n",
-                        &f.data.tcp_spec.dst_mac, &f.data.tcp_spec.src_mac,
-                        ntohs(f.data.tcp_spec.vlan_id),
-                        &f.data.tcp_spec.dst_ip, &f.data.tcp_spec.src_ip,
-                        ntohs(f.data.tcp_spec.dst_port),
-                        ntohs(f.data.tcp_spec.src_port));
+                        &f->data.tcp_spec.dst_mac,
+                        &f->data.tcp_spec.src_mac,
+                        ntohs(f->data.tcp_spec.vlan_id),
+                        &f->data.tcp_spec.dst_ip,
+                        &f->data.tcp_spec.src_ip,
+                        ntohs(f->data.tcp_spec.dst_port),
+                        ntohs(f->data.tcp_spec.src_port));
                break;
        }
 }
@@ -1303,7 +1307,7 @@ void i40evf_virtchnl_completion(struct i40evf_adapter *adapter,
                                                 i40evf_stat_str(&adapter->hw,
                                                                 v_retval));
                                        i40evf_print_cloud_filter(adapter,
-                                                                 cf->f);
+                                                                 &cf->f);
                                        list_del(&cf->list);
                                        kfree(cf);
                                        adapter->num_cloud_filters--;
@@ -1322,7 +1326,7 @@ void i40evf_virtchnl_completion(struct i40evf_adapter *adapter,
                                                 i40evf_stat_str(&adapter->hw,
                                                                 v_retval));
                                        i40evf_print_cloud_filter(adapter,
-                                                                 cf->f);
+                                                                 &cf->f);
                                }
                        }
                        }
index 221f158..89edb9f 100644 (file)
@@ -3059,6 +3059,8 @@ static int ixgbe_set_rxfh(struct net_device *netdev, const u32 *indir,
 
                for (i = 0; i < reta_entries; i++)
                        adapter->rss_indir_tbl[i] = indir[i];
+
+               ixgbe_store_reta(adapter);
        }
 
        /* Fill out the rss hash key */
@@ -3067,8 +3069,6 @@ static int ixgbe_set_rxfh(struct net_device *netdev, const u32 *indir,
                ixgbe_store_key(adapter);
        }
 
-       ixgbe_store_reta(adapter);
-
        return 0;
 }
 
index 4242f02..ed4cbe9 100644 (file)
@@ -58,7 +58,6 @@ static bool ixgbe_cache_ring_dcb_sriov(struct ixgbe_adapter *adapter)
                return false;
 
        /* start at VMDq register offset for SR-IOV enabled setups */
-       pool = 0;
        reg_idx = vmdq->offset * __ALIGN_MASK(1, ~vmdq->mask);
        for (i = 0, pool = 0; i < adapter->num_rx_queues; i++, reg_idx++) {
                /* If we are greater than indices move to next pool */
index 0da5aa2..b032091 100644 (file)
@@ -7703,7 +7703,8 @@ static void ixgbe_service_task(struct work_struct *work)
 
        if (test_bit(__IXGBE_PTP_RUNNING, &adapter->state)) {
                ixgbe_ptp_overflow_check(adapter);
-               ixgbe_ptp_rx_hang(adapter);
+               if (adapter->flags & IXGBE_FLAG_RX_HWTSTAMP_IN_REGISTER)
+                       ixgbe_ptp_rx_hang(adapter);
                ixgbe_ptp_tx_hang(adapter);
        }
 
index 4400e49..e7623fe 100644 (file)
@@ -94,6 +94,13 @@ static const char ixgbe_gstrings_test[][ETH_GSTRING_LEN] = {
 
 #define IXGBEVF_TEST_LEN (sizeof(ixgbe_gstrings_test) / ETH_GSTRING_LEN)
 
+static const char ixgbevf_priv_flags_strings[][ETH_GSTRING_LEN] = {
+#define IXGBEVF_PRIV_FLAGS_LEGACY_RX   BIT(0)
+       "legacy-rx",
+};
+
+#define IXGBEVF_PRIV_FLAGS_STR_LEN ARRAY_SIZE(ixgbevf_priv_flags_strings)
+
 static int ixgbevf_get_link_ksettings(struct net_device *netdev,
                                      struct ethtool_link_ksettings *cmd)
 {
@@ -241,6 +248,8 @@ static void ixgbevf_get_drvinfo(struct net_device *netdev,
                sizeof(drvinfo->version));
        strlcpy(drvinfo->bus_info, pci_name(adapter->pdev),
                sizeof(drvinfo->bus_info));
+
+       drvinfo->n_priv_flags = IXGBEVF_PRIV_FLAGS_STR_LEN;
 }
 
 static void ixgbevf_get_ringparam(struct net_device *netdev,
@@ -392,6 +401,8 @@ static int ixgbevf_get_sset_count(struct net_device *netdev, int stringset)
                return IXGBEVF_TEST_LEN;
        case ETH_SS_STATS:
                return IXGBEVF_STATS_LEN;
+       case ETH_SS_PRIV_FLAGS:
+               return IXGBEVF_PRIV_FLAGS_STR_LEN;
        default:
                return -EINVAL;
        }
@@ -496,6 +507,10 @@ static void ixgbevf_get_strings(struct net_device *netdev, u32 stringset,
                        p += ETH_GSTRING_LEN;
                }
                break;
+       case ETH_SS_PRIV_FLAGS:
+               memcpy(data, ixgbevf_priv_flags_strings,
+                      IXGBEVF_PRIV_FLAGS_STR_LEN * ETH_GSTRING_LEN);
+               break;
        }
 }
 
@@ -888,6 +903,37 @@ static int ixgbevf_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key,
        return err;
 }
 
+static u32 ixgbevf_get_priv_flags(struct net_device *netdev)
+{
+       struct ixgbevf_adapter *adapter = netdev_priv(netdev);
+       u32 priv_flags = 0;
+
+       if (adapter->flags & IXGBEVF_FLAGS_LEGACY_RX)
+               priv_flags |= IXGBEVF_PRIV_FLAGS_LEGACY_RX;
+
+       return priv_flags;
+}
+
+static int ixgbevf_set_priv_flags(struct net_device *netdev, u32 priv_flags)
+{
+       struct ixgbevf_adapter *adapter = netdev_priv(netdev);
+       unsigned int flags = adapter->flags;
+
+       flags &= ~IXGBEVF_FLAGS_LEGACY_RX;
+       if (priv_flags & IXGBEVF_PRIV_FLAGS_LEGACY_RX)
+               flags |= IXGBEVF_FLAGS_LEGACY_RX;
+
+       if (flags != adapter->flags) {
+               adapter->flags = flags;
+
+               /* reset interface to repopulate queues */
+               if (netif_running(netdev))
+                       ixgbevf_reinit_locked(adapter);
+       }
+
+       return 0;
+}
+
 static const struct ethtool_ops ixgbevf_ethtool_ops = {
        .get_drvinfo            = ixgbevf_get_drvinfo,
        .get_regs_len           = ixgbevf_get_regs_len,
@@ -909,6 +955,8 @@ static const struct ethtool_ops ixgbevf_ethtool_ops = {
        .get_rxfh_key_size      = ixgbevf_get_rxfh_key_size,
        .get_rxfh               = ixgbevf_get_rxfh,
        .get_link_ksettings     = ixgbevf_get_link_ksettings,
+       .get_priv_flags         = ixgbevf_get_priv_flags,
+       .set_priv_flags         = ixgbevf_set_priv_flags,
 };
 
 void ixgbevf_set_ethtool_ops(struct net_device *netdev)
index f695242..f65ca15 100644 (file)
@@ -89,19 +89,15 @@ struct ixgbevf_rx_queue_stats {
 };
 
 enum ixgbevf_ring_state_t {
+       __IXGBEVF_RX_3K_BUFFER,
+       __IXGBEVF_RX_BUILD_SKB_ENABLED,
        __IXGBEVF_TX_DETECT_HANG,
        __IXGBEVF_HANG_CHECK_ARMED,
 };
 
-#define check_for_tx_hang(ring) \
-       test_bit(__IXGBEVF_TX_DETECT_HANG, &(ring)->state)
-#define set_check_for_tx_hang(ring) \
-       set_bit(__IXGBEVF_TX_DETECT_HANG, &(ring)->state)
-#define clear_check_for_tx_hang(ring) \
-       clear_bit(__IXGBEVF_TX_DETECT_HANG, &(ring)->state)
-
 struct ixgbevf_ring {
        struct ixgbevf_ring *next;
+       struct ixgbevf_q_vector *q_vector;      /* backpointer to q_vector */
        struct net_device *netdev;
        struct device *dev;
        void *desc;                     /* descriptor ring memory */
@@ -133,7 +129,7 @@ struct ixgbevf_ring {
         */
        u16 reg_idx;
        int queue_index; /* needed for multiqueue queue management */
-};
+} ____cacheline_internodealigned_in_smp;
 
 /* How many Rx Buffers do we bundle into one write to the hardware ? */
 #define IXGBEVF_RX_BUFFER_WRITE        16      /* Must be power of 2 */
@@ -156,12 +152,20 @@ struct ixgbevf_ring {
 /* Supported Rx Buffer Sizes */
 #define IXGBEVF_RXBUFFER_256   256    /* Used for packet split */
 #define IXGBEVF_RXBUFFER_2048  2048
+#define IXGBEVF_RXBUFFER_3072  3072
 
 #define IXGBEVF_RX_HDR_SIZE    IXGBEVF_RXBUFFER_256
-#define IXGBEVF_RX_BUFSZ       IXGBEVF_RXBUFFER_2048
 
 #define MAXIMUM_ETHERNET_VLAN_SIZE (VLAN_ETH_FRAME_LEN + ETH_FCS_LEN)
 
+#define IXGBEVF_SKB_PAD                (NET_SKB_PAD + NET_IP_ALIGN)
+#if (PAGE_SIZE < 8192)
+#define IXGBEVF_MAX_FRAME_BUILD_SKB \
+       (SKB_WITH_OVERHEAD(IXGBEVF_RXBUFFER_2048) - IXGBEVF_SKB_PAD)
+#else
+#define IXGBEVF_MAX_FRAME_BUILD_SKB    IXGBEVF_RXBUFFER_2048
+#endif
+
 #define IXGBE_TX_FLAGS_CSUM            BIT(0)
 #define IXGBE_TX_FLAGS_VLAN            BIT(1)
 #define IXGBE_TX_FLAGS_TSO             BIT(2)
@@ -170,6 +174,50 @@ struct ixgbevf_ring {
 #define IXGBE_TX_FLAGS_VLAN_PRIO_MASK  0x0000e000
 #define IXGBE_TX_FLAGS_VLAN_SHIFT      16
 
+#define ring_uses_large_buffer(ring) \
+       test_bit(__IXGBEVF_RX_3K_BUFFER, &(ring)->state)
+#define set_ring_uses_large_buffer(ring) \
+       set_bit(__IXGBEVF_RX_3K_BUFFER, &(ring)->state)
+#define clear_ring_uses_large_buffer(ring) \
+       clear_bit(__IXGBEVF_RX_3K_BUFFER, &(ring)->state)
+
+#define ring_uses_build_skb(ring) \
+       test_bit(__IXGBEVF_RX_BUILD_SKB_ENABLED, &(ring)->state)
+#define set_ring_build_skb_enabled(ring) \
+       set_bit(__IXGBEVF_RX_BUILD_SKB_ENABLED, &(ring)->state)
+#define clear_ring_build_skb_enabled(ring) \
+       clear_bit(__IXGBEVF_RX_BUILD_SKB_ENABLED, &(ring)->state)
+
+static inline unsigned int ixgbevf_rx_bufsz(struct ixgbevf_ring *ring)
+{
+#if (PAGE_SIZE < 8192)
+       if (ring_uses_large_buffer(ring))
+               return IXGBEVF_RXBUFFER_3072;
+
+       if (ring_uses_build_skb(ring))
+               return IXGBEVF_MAX_FRAME_BUILD_SKB;
+#endif
+       return IXGBEVF_RXBUFFER_2048;
+}
+
+static inline unsigned int ixgbevf_rx_pg_order(struct ixgbevf_ring *ring)
+{
+#if (PAGE_SIZE < 8192)
+       if (ring_uses_large_buffer(ring))
+               return 1;
+#endif
+       return 0;
+}
+
+#define ixgbevf_rx_pg_size(_ring) (PAGE_SIZE << ixgbevf_rx_pg_order(_ring))
+
+#define check_for_tx_hang(ring) \
+       test_bit(__IXGBEVF_TX_DETECT_HANG, &(ring)->state)
+#define set_check_for_tx_hang(ring) \
+       set_bit(__IXGBEVF_TX_DETECT_HANG, &(ring)->state)
+#define clear_check_for_tx_hang(ring) \
+       clear_bit(__IXGBEVF_TX_DETECT_HANG, &(ring)->state)
+
 struct ixgbevf_ring_container {
        struct ixgbevf_ring *ring;      /* pointer to linked list of rings */
        unsigned int total_bytes;       /* total bytes processed this int */
@@ -194,7 +242,11 @@ struct ixgbevf_q_vector {
        u16 itr; /* Interrupt throttle rate written to EITR */
        struct napi_struct napi;
        struct ixgbevf_ring_container rx, tx;
+       struct rcu_head rcu;    /* to avoid race with update stats on free */
        char name[IFNAMSIZ + 9];
+
+       /* for dynamic allocation of rings associated with this q_vector */
+       struct ixgbevf_ring ring[0] ____cacheline_internodealigned_in_smp;
 #ifdef CONFIG_NET_RX_BUSY_POLL
        unsigned int state;
 #define IXGBEVF_QV_STATE_IDLE          0
@@ -331,6 +383,8 @@ struct ixgbevf_adapter {
 
        u32 *rss_key;
        u8 rss_indir_tbl[IXGBEVF_X550_VFRETA_SIZE];
+       u32 flags;
+#define IXGBEVF_FLAGS_LEGACY_RX                BIT(1)
 };
 
 enum ixbgevf_state_t {
index 9b3d43d..f373071 100644 (file)
@@ -130,6 +130,9 @@ static void ixgbevf_service_event_complete(struct ixgbevf_adapter *adapter)
 static void ixgbevf_queue_reset_subtask(struct ixgbevf_adapter *adapter);
 static void ixgbevf_set_itr(struct ixgbevf_q_vector *q_vector);
 static void ixgbevf_free_all_rx_resources(struct ixgbevf_adapter *adapter);
+static bool ixgbevf_can_reuse_rx_page(struct ixgbevf_rx_buffer *rx_buffer);
+static void ixgbevf_reuse_rx_page(struct ixgbevf_ring *rx_ring,
+                                 struct ixgbevf_rx_buffer *old_buff);
 
 static void ixgbevf_remove_adapter(struct ixgbe_hw *hw)
 {
@@ -527,6 +530,49 @@ static void ixgbevf_process_skb_fields(struct ixgbevf_ring *rx_ring,
        skb->protocol = eth_type_trans(skb, rx_ring->netdev);
 }
 
+static
+struct ixgbevf_rx_buffer *ixgbevf_get_rx_buffer(struct ixgbevf_ring *rx_ring,
+                                               const unsigned int size)
+{
+       struct ixgbevf_rx_buffer *rx_buffer;
+
+       rx_buffer = &rx_ring->rx_buffer_info[rx_ring->next_to_clean];
+       prefetchw(rx_buffer->page);
+
+       /* we are reusing so sync this buffer for CPU use */
+       dma_sync_single_range_for_cpu(rx_ring->dev,
+                                     rx_buffer->dma,
+                                     rx_buffer->page_offset,
+                                     size,
+                                     DMA_FROM_DEVICE);
+
+       rx_buffer->pagecnt_bias--;
+
+       return rx_buffer;
+}
+
+static void ixgbevf_put_rx_buffer(struct ixgbevf_ring *rx_ring,
+                                 struct ixgbevf_rx_buffer *rx_buffer)
+{
+       if (ixgbevf_can_reuse_rx_page(rx_buffer)) {
+               /* hand second half of page back to the ring */
+               ixgbevf_reuse_rx_page(rx_ring, rx_buffer);
+       } else {
+               /* We are not reusing the buffer so unmap it and free
+                * any references we are holding to it
+                */
+               dma_unmap_page_attrs(rx_ring->dev, rx_buffer->dma,
+                                    ixgbevf_rx_pg_size(rx_ring),
+                                    DMA_FROM_DEVICE,
+                                    IXGBEVF_RX_DMA_ATTR);
+               __page_frag_cache_drain(rx_buffer->page,
+                                       rx_buffer->pagecnt_bias);
+       }
+
+       /* clear contents of rx_buffer */
+       rx_buffer->page = NULL;
+}
+
 /**
  * ixgbevf_is_non_eop - process handling of non-EOP buffers
  * @rx_ring: Rx ring being processed
@@ -554,32 +600,38 @@ static bool ixgbevf_is_non_eop(struct ixgbevf_ring *rx_ring,
        return true;
 }
 
+static inline unsigned int ixgbevf_rx_offset(struct ixgbevf_ring *rx_ring)
+{
+       return ring_uses_build_skb(rx_ring) ? IXGBEVF_SKB_PAD : 0;
+}
+
 static bool ixgbevf_alloc_mapped_page(struct ixgbevf_ring *rx_ring,
                                      struct ixgbevf_rx_buffer *bi)
 {
        struct page *page = bi->page;
-       dma_addr_t dma = bi->dma;
+       dma_addr_t dma;
 
        /* since we are recycling buffers we should seldom need to alloc */
        if (likely(page))
                return true;
 
        /* alloc new page for storage */
-       page = dev_alloc_page();
+       page = dev_alloc_pages(ixgbevf_rx_pg_order(rx_ring));
        if (unlikely(!page)) {
                rx_ring->rx_stats.alloc_rx_page_failed++;
                return false;
        }
 
        /* map page for use */
-       dma = dma_map_page_attrs(rx_ring->dev, page, 0, PAGE_SIZE,
+       dma = dma_map_page_attrs(rx_ring->dev, page, 0,
+                                ixgbevf_rx_pg_size(rx_ring),
                                 DMA_FROM_DEVICE, IXGBEVF_RX_DMA_ATTR);
 
        /* if mapping failed free memory back to system since
         * there isn't much point in holding memory we can't use
         */
        if (dma_mapping_error(rx_ring->dev, dma)) {
-               __free_page(page);
+               __free_pages(page, ixgbevf_rx_pg_order(rx_ring));
 
                rx_ring->rx_stats.alloc_rx_page_failed++;
                return false;
@@ -587,7 +639,7 @@ static bool ixgbevf_alloc_mapped_page(struct ixgbevf_ring *rx_ring,
 
        bi->dma = dma;
        bi->page = page;
-       bi->page_offset = 0;
+       bi->page_offset = ixgbevf_rx_offset(rx_ring);
        bi->pagecnt_bias = 1;
        rx_ring->rx_stats.alloc_rx_page++;
 
@@ -621,7 +673,7 @@ static void ixgbevf_alloc_rx_buffers(struct ixgbevf_ring *rx_ring,
                /* sync the buffer for use by the device */
                dma_sync_single_range_for_device(rx_ring->dev, bi->dma,
                                                 bi->page_offset,
-                                                IXGBEVF_RX_BUFSZ,
+                                                ixgbevf_rx_bufsz(rx_ring),
                                                 DMA_FROM_DEVICE);
 
                /* Refresh the desc even if pkt_addr didn't change
@@ -734,11 +786,10 @@ static inline bool ixgbevf_page_is_reserved(struct page *page)
        return (page_to_nid(page) != numa_mem_id()) || page_is_pfmemalloc(page);
 }
 
-static bool ixgbevf_can_reuse_rx_page(struct ixgbevf_rx_buffer *rx_buffer,
-                                     struct page *page,
-                                     const unsigned int truesize)
+static bool ixgbevf_can_reuse_rx_page(struct ixgbevf_rx_buffer *rx_buffer)
 {
-       unsigned int pagecnt_bias = rx_buffer->pagecnt_bias--;
+       unsigned int pagecnt_bias = rx_buffer->pagecnt_bias;
+       struct page *page = rx_buffer->page;
 
        /* avoid re-using remote pages */
        if (unlikely(ixgbevf_page_is_reserved(page)))
@@ -746,17 +797,13 @@ static bool ixgbevf_can_reuse_rx_page(struct ixgbevf_rx_buffer *rx_buffer,
 
 #if (PAGE_SIZE < 8192)
        /* if we are only owner of page we can reuse it */
-       if (unlikely(page_ref_count(page) != pagecnt_bias))
+       if (unlikely((page_ref_count(page) - pagecnt_bias) > 1))
                return false;
-
-       /* flip page offset to other buffer */
-       rx_buffer->page_offset ^= IXGBEVF_RX_BUFSZ;
-
 #else
-       /* move offset up to the next cache line */
-       rx_buffer->page_offset += truesize;
+#define IXGBEVF_LAST_OFFSET \
+       (SKB_WITH_OVERHEAD(PAGE_SIZE) - IXGBEVF_RXBUFFER_2048)
 
-       if (rx_buffer->page_offset > (PAGE_SIZE - IXGBEVF_RX_BUFSZ))
+       if (rx_buffer->page_offset > IXGBEVF_LAST_OFFSET)
                return false;
 
 #endif
@@ -765,7 +812,7 @@ static bool ixgbevf_can_reuse_rx_page(struct ixgbevf_rx_buffer *rx_buffer,
         * the pagecnt_bias and page count so that we fully restock the
         * number of references the driver holds.
         */
-       if (unlikely(pagecnt_bias == 1)) {
+       if (unlikely(!pagecnt_bias)) {
                page_ref_add(page, USHRT_MAX);
                rx_buffer->pagecnt_bias = USHRT_MAX;
        }
@@ -777,127 +824,81 @@ static bool ixgbevf_can_reuse_rx_page(struct ixgbevf_rx_buffer *rx_buffer,
  * ixgbevf_add_rx_frag - Add contents of Rx buffer to sk_buff
  * @rx_ring: rx descriptor ring to transact packets on
  * @rx_buffer: buffer containing page to add
- * @rx_desc: descriptor containing length of buffer written by hardware
  * @skb: sk_buff to place the data into
+ * @size: size of buffer to be added
  *
  * This function will add the data contained in rx_buffer->page to the skb.
- * This is done either through a direct copy if the data in the buffer is
- * less than the skb header size, otherwise it will just attach the page as
- * a frag to the skb.
- *
- * The function will then update the page offset if necessary and return
- * true if the buffer can be reused by the adapter.
  **/
-static bool ixgbevf_add_rx_frag(struct ixgbevf_ring *rx_ring,
+static void ixgbevf_add_rx_frag(struct ixgbevf_ring *rx_ring,
                                struct ixgbevf_rx_buffer *rx_buffer,
-                               u16 size,
-                               union ixgbe_adv_rx_desc *rx_desc,
-                               struct sk_buff *skb)
+                               struct sk_buff *skb,
+                               unsigned int size)
 {
-       struct page *page = rx_buffer->page;
-       unsigned char *va = page_address(page) + rx_buffer->page_offset;
 #if (PAGE_SIZE < 8192)
-       unsigned int truesize = IXGBEVF_RX_BUFSZ;
+       unsigned int truesize = ixgbevf_rx_pg_size(rx_ring) / 2;
 #else
-       unsigned int truesize = ALIGN(size, L1_CACHE_BYTES);
+       unsigned int truesize = ring_uses_build_skb(rx_ring) ?
+                               SKB_DATA_ALIGN(IXGBEVF_SKB_PAD + size) :
+                               SKB_DATA_ALIGN(size);
+#endif
+       skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_buffer->page,
+                       rx_buffer->page_offset, size, truesize);
+#if (PAGE_SIZE < 8192)
+       rx_buffer->page_offset ^= truesize;
+#else
+       rx_buffer->page_offset += truesize;
 #endif
-       unsigned int pull_len;
-
-       if (unlikely(skb_is_nonlinear(skb)))
-               goto add_tail_frag;
-
-       if (likely(size <= IXGBEVF_RX_HDR_SIZE)) {
-               memcpy(__skb_put(skb, size), va, ALIGN(size, sizeof(long)));
-
-               /* page is not reserved, we can reuse buffer as is */
-               if (likely(!ixgbevf_page_is_reserved(page)))
-                       return true;
-
-               /* this page cannot be reused so discard it */
-               return false;
-       }
-
-       /* we need the header to contain the greater of either ETH_HLEN or
-        * 60 bytes if the skb->len is less than 60 for skb_pad.
-        */
-       pull_len = eth_get_headlen(va, IXGBEVF_RX_HDR_SIZE);
-
-       /* align pull length to size of long to optimize memcpy performance */
-       memcpy(__skb_put(skb, pull_len), va, ALIGN(pull_len, sizeof(long)));
-
-       /* update all of the pointers */
-       va += pull_len;
-       size -= pull_len;
-
-add_tail_frag:
-       skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page,
-                       (unsigned long)va & ~PAGE_MASK, size, truesize);
-
-       return ixgbevf_can_reuse_rx_page(rx_buffer, page, truesize);
 }
 
-static struct sk_buff *ixgbevf_fetch_rx_buffer(struct ixgbevf_ring *rx_ring,
-                                              union ixgbe_adv_rx_desc *rx_desc,
-                                              struct sk_buff *skb)
+static
+struct sk_buff *ixgbevf_construct_skb(struct ixgbevf_ring *rx_ring,
+                                     struct ixgbevf_rx_buffer *rx_buffer,
+                                     union ixgbe_adv_rx_desc *rx_desc,
+                                     unsigned int size)
 {
-       struct ixgbevf_rx_buffer *rx_buffer;
-       struct page *page;
-       u16 size = le16_to_cpu(rx_desc->wb.upper.length);
-
-       rx_buffer = &rx_ring->rx_buffer_info[rx_ring->next_to_clean];
-       page = rx_buffer->page;
-       prefetchw(page);
-
-       /* we are reusing so sync this buffer for CPU use */
-       dma_sync_single_range_for_cpu(rx_ring->dev,
-                                     rx_buffer->dma,
-                                     rx_buffer->page_offset,
-                                     size,
-                                     DMA_FROM_DEVICE);
-
-       if (likely(!skb)) {
-               void *page_addr = page_address(page) +
-                                 rx_buffer->page_offset;
+       void *va = page_address(rx_buffer->page) + rx_buffer->page_offset;
+#if (PAGE_SIZE < 8192)
+       unsigned int truesize = ixgbevf_rx_pg_size(rx_ring) / 2;
+#else
+       unsigned int truesize = SKB_DATA_ALIGN(size);
+#endif
+       unsigned int headlen;
+       struct sk_buff *skb;
 
-               /* prefetch first cache line of first page */
-               prefetch(page_addr);
+       /* prefetch first cache line of first page */
+       prefetch(va);
 #if L1_CACHE_BYTES < 128
-               prefetch(page_addr + L1_CACHE_BYTES);
+       prefetch(va + L1_CACHE_BYTES);
 #endif
 
-               /* allocate a skb to store the frags */
-               skb = netdev_alloc_skb_ip_align(rx_ring->netdev,
-                                               IXGBEVF_RX_HDR_SIZE);
-               if (unlikely(!skb)) {
-                       rx_ring->rx_stats.alloc_rx_buff_failed++;
-                       return NULL;
-               }
+       /* allocate a skb to store the frags */
+       skb = napi_alloc_skb(&rx_ring->q_vector->napi, IXGBEVF_RX_HDR_SIZE);
+       if (unlikely(!skb))
+               return NULL;
 
-               /* we will be copying header into skb->data in
-                * pskb_may_pull so it is in our interest to prefetch
-                * it now to avoid a possible cache miss
-                */
-               prefetchw(skb->data);
-       }
+       /* Determine available headroom for copy */
+       headlen = size;
+       if (headlen > IXGBEVF_RX_HDR_SIZE)
+               headlen = eth_get_headlen(va, IXGBEVF_RX_HDR_SIZE);
 
-       /* pull page into skb */
-       if (ixgbevf_add_rx_frag(rx_ring, rx_buffer, size, rx_desc, skb)) {
-               /* hand second half of page back to the ring */
-               ixgbevf_reuse_rx_page(rx_ring, rx_buffer);
+       /* align pull length to size of long to optimize memcpy performance */
+       memcpy(__skb_put(skb, headlen), va, ALIGN(headlen, sizeof(long)));
+
+       /* update all of the pointers */
+       size -= headlen;
+       if (size) {
+               skb_add_rx_frag(skb, 0, rx_buffer->page,
+                               (va + headlen) - page_address(rx_buffer->page),
+                               size, truesize);
+#if (PAGE_SIZE < 8192)
+               rx_buffer->page_offset ^= truesize;
+#else
+               rx_buffer->page_offset += truesize;
+#endif
        } else {
-               /* We are not reusing the buffer so unmap it and free
-                * any references we are holding to it
-                */
-               dma_unmap_page_attrs(rx_ring->dev, rx_buffer->dma,
-                                    PAGE_SIZE, DMA_FROM_DEVICE,
-                                    IXGBEVF_RX_DMA_ATTR);
-               __page_frag_cache_drain(page, rx_buffer->pagecnt_bias);
+               rx_buffer->pagecnt_bias++;
        }
 
-       /* clear contents of buffer_info */
-       rx_buffer->dma = 0;
-       rx_buffer->page = NULL;
-
        return skb;
 }
 
@@ -909,6 +910,44 @@ static inline void ixgbevf_irq_enable_queues(struct ixgbevf_adapter *adapter,
        IXGBE_WRITE_REG(hw, IXGBE_VTEIMS, qmask);
 }
 
+static struct sk_buff *ixgbevf_build_skb(struct ixgbevf_ring *rx_ring,
+                                        struct ixgbevf_rx_buffer *rx_buffer,
+                                        union ixgbe_adv_rx_desc *rx_desc,
+                                        unsigned int size)
+{
+       void *va = page_address(rx_buffer->page) + rx_buffer->page_offset;
+#if (PAGE_SIZE < 8192)
+       unsigned int truesize = ixgbevf_rx_pg_size(rx_ring) / 2;
+#else
+       unsigned int truesize = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) +
+                               SKB_DATA_ALIGN(IXGBEVF_SKB_PAD + size);
+#endif
+       struct sk_buff *skb;
+
+       /* prefetch first cache line of first page */
+       prefetch(va);
+#if L1_CACHE_BYTES < 128
+       prefetch(va + L1_CACHE_BYTES);
+#endif
+
+       /* build an skb to around the page buffer */
+       skb = build_skb(va - IXGBEVF_SKB_PAD, truesize);
+       if (unlikely(!skb))
+               return NULL;
+
+       /* update pointers within the skb to store the data */
+       skb_reserve(skb, IXGBEVF_SKB_PAD);
+       __skb_put(skb, size);
+
+       /* update buffer offset */
+#if (PAGE_SIZE < 8192)
+       rx_buffer->page_offset ^= truesize;
+#else
+       rx_buffer->page_offset += truesize;
+#endif
+
+       return skb;
+}
 static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector,
                                struct ixgbevf_ring *rx_ring,
                                int budget)
@@ -919,6 +958,8 @@ static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector,
 
        while (likely(total_rx_packets < budget)) {
                union ixgbe_adv_rx_desc *rx_desc;
+               struct ixgbevf_rx_buffer *rx_buffer;
+               unsigned int size;
 
                /* return some buffers to hardware, one at a time is too slow */
                if (cleaned_count >= IXGBEVF_RX_BUFFER_WRITE) {
@@ -927,8 +968,8 @@ static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector,
                }
 
                rx_desc = IXGBEVF_RX_DESC(rx_ring, rx_ring->next_to_clean);
-
-               if (!rx_desc->wb.upper.length)
+               size = le16_to_cpu(rx_desc->wb.upper.length);
+               if (!size)
                        break;
 
                /* This memory barrier is needed to keep us from reading
@@ -937,15 +978,26 @@ static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector,
                 */
                rmb();
 
+               rx_buffer = ixgbevf_get_rx_buffer(rx_ring, size);
+
                /* retrieve a buffer from the ring */
-               skb = ixgbevf_fetch_rx_buffer(rx_ring, rx_desc, skb);
+               if (skb)
+                       ixgbevf_add_rx_frag(rx_ring, rx_buffer, skb, size);
+               else if (ring_uses_build_skb(rx_ring))
+                       skb = ixgbevf_build_skb(rx_ring, rx_buffer,
+                                               rx_desc, size);
+               else
+                       skb = ixgbevf_construct_skb(rx_ring, rx_buffer,
+                                                   rx_desc, size);
 
                /* exit if we failed to retrieve a buffer */
                if (!skb) {
                        rx_ring->rx_stats.alloc_rx_buff_failed++;
+                       rx_buffer->pagecnt_bias++;
                        break;
                }
 
+               ixgbevf_put_rx_buffer(rx_ring, rx_buffer);
                cleaned_count++;
 
                /* fetch next buffer in frame if non-eop */
@@ -1260,85 +1312,6 @@ static irqreturn_t ixgbevf_msix_clean_rings(int irq, void *data)
        return IRQ_HANDLED;
 }
 
-static inline void map_vector_to_rxq(struct ixgbevf_adapter *a, int v_idx,
-                                    int r_idx)
-{
-       struct ixgbevf_q_vector *q_vector = a->q_vector[v_idx];
-
-       a->rx_ring[r_idx]->next = q_vector->rx.ring;
-       q_vector->rx.ring = a->rx_ring[r_idx];
-       q_vector->rx.count++;
-}
-
-static inline void map_vector_to_txq(struct ixgbevf_adapter *a, int v_idx,
-                                    int t_idx)
-{
-       struct ixgbevf_q_vector *q_vector = a->q_vector[v_idx];
-
-       a->tx_ring[t_idx]->next = q_vector->tx.ring;
-       q_vector->tx.ring = a->tx_ring[t_idx];
-       q_vector->tx.count++;
-}
-
-/**
- * ixgbevf_map_rings_to_vectors - Maps descriptor rings to vectors
- * @adapter: board private structure to initialize
- *
- * This function maps descriptor rings to the queue-specific vectors
- * we were allotted through the MSI-X enabling code.  Ideally, we'd have
- * one vector per ring/queue, but on a constrained vector budget, we
- * group the rings as "efficiently" as possible.  You would add new
- * mapping configurations in here.
- **/
-static int ixgbevf_map_rings_to_vectors(struct ixgbevf_adapter *adapter)
-{
-       int q_vectors;
-       int v_start = 0;
-       int rxr_idx = 0, txr_idx = 0;
-       int rxr_remaining = adapter->num_rx_queues;
-       int txr_remaining = adapter->num_tx_queues;
-       int i, j;
-       int rqpv, tqpv;
-
-       q_vectors = adapter->num_msix_vectors - NON_Q_VECTORS;
-
-       /* The ideal configuration...
-        * We have enough vectors to map one per queue.
-        */
-       if (q_vectors == adapter->num_rx_queues + adapter->num_tx_queues) {
-               for (; rxr_idx < rxr_remaining; v_start++, rxr_idx++)
-                       map_vector_to_rxq(adapter, v_start, rxr_idx);
-
-               for (; txr_idx < txr_remaining; v_start++, txr_idx++)
-                       map_vector_to_txq(adapter, v_start, txr_idx);
-               return 0;
-       }
-
-       /* If we don't have enough vectors for a 1-to-1
-        * mapping, we'll have to group them so there are
-        * multiple queues per vector.
-        */
-       /* Re-adjusting *qpv takes care of the remainder. */
-       for (i = v_start; i < q_vectors; i++) {
-               rqpv = DIV_ROUND_UP(rxr_remaining, q_vectors - i);
-               for (j = 0; j < rqpv; j++) {
-                       map_vector_to_rxq(adapter, i, rxr_idx);
-                       rxr_idx++;
-                       rxr_remaining--;
-               }
-       }
-       for (i = v_start; i < q_vectors; i++) {
-               tqpv = DIV_ROUND_UP(txr_remaining, q_vectors - i);
-               for (j = 0; j < tqpv; j++) {
-                       map_vector_to_txq(adapter, i, txr_idx);
-                       txr_idx++;
-                       txr_remaining--;
-               }
-       }
-
-       return 0;
-}
-
 /**
  * ixgbevf_request_msix_irqs - Initialize MSI-X interrupts
  * @adapter: board private structure
@@ -1411,20 +1384,6 @@ free_queue_irqs:
        return err;
 }
 
-static inline void ixgbevf_reset_q_vectors(struct ixgbevf_adapter *adapter)
-{
-       int i, q_vectors = adapter->num_msix_vectors - NON_Q_VECTORS;
-
-       for (i = 0; i < q_vectors; i++) {
-               struct ixgbevf_q_vector *q_vector = adapter->q_vector[i];
-
-               q_vector->rx.ring = NULL;
-               q_vector->tx.ring = NULL;
-               q_vector->rx.count = 0;
-               q_vector->tx.count = 0;
-       }
-}
-
 /**
  * ixgbevf_request_irq - initialize interrupts
  * @adapter: board private structure
@@ -1464,8 +1423,6 @@ static void ixgbevf_free_irq(struct ixgbevf_adapter *adapter)
                free_irq(adapter->msix_entries[i].vector,
                         adapter->q_vector[i]);
        }
-
-       ixgbevf_reset_q_vectors(adapter);
 }
 
 /**
@@ -1587,7 +1544,8 @@ static void ixgbevf_configure_tx(struct ixgbevf_adapter *adapter)
 
 #define IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT        2
 
-static void ixgbevf_configure_srrctl(struct ixgbevf_adapter *adapter, int index)
+static void ixgbevf_configure_srrctl(struct ixgbevf_adapter *adapter,
+                                    struct ixgbevf_ring *ring, int index)
 {
        struct ixgbe_hw *hw = &adapter->hw;
        u32 srrctl;
@@ -1595,7 +1553,10 @@ static void ixgbevf_configure_srrctl(struct ixgbevf_adapter *adapter, int index)
        srrctl = IXGBE_SRRCTL_DROP_EN;
 
        srrctl |= IXGBEVF_RX_HDR_SIZE << IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT;
-       srrctl |= IXGBEVF_RX_BUFSZ >> IXGBE_SRRCTL_BSIZEPKT_SHIFT;
+       if (ring_uses_large_buffer(ring))
+               srrctl |= IXGBEVF_RXBUFFER_3072 >> IXGBE_SRRCTL_BSIZEPKT_SHIFT;
+       else
+               srrctl |= IXGBEVF_RXBUFFER_2048 >> IXGBE_SRRCTL_BSIZEPKT_SHIFT;
        srrctl |= IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
 
        IXGBE_WRITE_REG(hw, IXGBE_VFSRRCTL(index), srrctl);
@@ -1767,10 +1728,21 @@ static void ixgbevf_configure_rx_ring(struct ixgbevf_adapter *adapter,
        ring->next_to_use = 0;
        ring->next_to_alloc = 0;
 
-       ixgbevf_configure_srrctl(adapter, reg_idx);
+       ixgbevf_configure_srrctl(adapter, ring, reg_idx);
+
+       /* RXDCTL.RLPML does not work on 82599 */
+       if (adapter->hw.mac.type != ixgbe_mac_82599_vf) {
+               rxdctl &= ~(IXGBE_RXDCTL_RLPMLMASK |
+                           IXGBE_RXDCTL_RLPML_EN);
 
-       /* allow any size packet since we can handle overflow */
-       rxdctl &= ~IXGBE_RXDCTL_RLPML_EN;
+#if (PAGE_SIZE < 8192)
+               /* Limit the maximum frame size so we don't overrun the skb */
+               if (ring_uses_build_skb(ring) &&
+                   !ring_uses_large_buffer(ring))
+                       rxdctl |= IXGBEVF_MAX_FRAME_BUILD_SKB |
+                                 IXGBE_RXDCTL_RLPML_EN;
+#endif
+       }
 
        rxdctl |= IXGBE_RXDCTL_ENABLE | IXGBE_RXDCTL_VME;
        IXGBE_WRITE_REG(hw, IXGBE_VFRXDCTL(reg_idx), rxdctl);
@@ -1779,6 +1751,29 @@ static void ixgbevf_configure_rx_ring(struct ixgbevf_adapter *adapter,
        ixgbevf_alloc_rx_buffers(ring, ixgbevf_desc_unused(ring));
 }
 
+static void ixgbevf_set_rx_buffer_len(struct ixgbevf_adapter *adapter,
+                                     struct ixgbevf_ring *rx_ring)
+{
+       struct net_device *netdev = adapter->netdev;
+       unsigned int max_frame = netdev->mtu + ETH_HLEN + ETH_FCS_LEN;
+
+       /* set build_skb and buffer size flags */
+       clear_ring_build_skb_enabled(rx_ring);
+       clear_ring_uses_large_buffer(rx_ring);
+
+       if (adapter->flags & IXGBEVF_FLAGS_LEGACY_RX)
+               return;
+
+       set_ring_build_skb_enabled(rx_ring);
+
+#if (PAGE_SIZE < 8192)
+       if (max_frame <= IXGBEVF_MAX_FRAME_BUILD_SKB)
+               return;
+
+       set_ring_uses_large_buffer(rx_ring);
+#endif
+}
+
 /**
  * ixgbevf_configure_rx - Configure 82599 VF Receive Unit after Reset
  * @adapter: board private structure
@@ -1806,8 +1801,12 @@ static void ixgbevf_configure_rx(struct ixgbevf_adapter *adapter)
        /* Setup the HW Rx Head and Tail Descriptor Pointers and
         * the Base and Length of the Rx Descriptor Ring
         */
-       for (i = 0; i < adapter->num_rx_queues; i++)
-               ixgbevf_configure_rx_ring(adapter, adapter->rx_ring[i]);
+       for (i = 0; i < adapter->num_rx_queues; i++) {
+               struct ixgbevf_ring *rx_ring = adapter->rx_ring[i];
+
+               ixgbevf_set_rx_buffer_len(adapter, rx_ring);
+               ixgbevf_configure_rx_ring(adapter, rx_ring);
+       }
 }
 
 static int ixgbevf_vlan_rx_add_vid(struct net_device *netdev,
@@ -2136,13 +2135,13 @@ static void ixgbevf_clean_rx_ring(struct ixgbevf_ring *rx_ring)
                dma_sync_single_range_for_cpu(rx_ring->dev,
                                              rx_buffer->dma,
                                              rx_buffer->page_offset,
-                                             IXGBEVF_RX_BUFSZ,
+                                             ixgbevf_rx_bufsz(rx_ring),
                                              DMA_FROM_DEVICE);
 
                /* free resources associated with mapping */
                dma_unmap_page_attrs(rx_ring->dev,
                                     rx_buffer->dma,
-                                    PAGE_SIZE,
+                                    ixgbevf_rx_pg_size(rx_ring),
                                     DMA_FROM_DEVICE,
                                     IXGBEVF_RX_DMA_ATTR);
 
@@ -2405,105 +2404,171 @@ static void ixgbevf_set_num_queues(struct ixgbevf_adapter *adapter)
 }
 
 /**
- * ixgbevf_alloc_queues - Allocate memory for all rings
+ * ixgbevf_set_interrupt_capability - set MSI-X or FAIL if not supported
+ * @adapter: board private structure to initialize
+ *
+ * Attempt to configure the interrupts using the best available
+ * capabilities of the hardware and the kernel.
+ **/
+static int ixgbevf_set_interrupt_capability(struct ixgbevf_adapter *adapter)
+{
+       int vector, v_budget;
+
+       /* It's easy to be greedy for MSI-X vectors, but it really
+        * doesn't do us much good if we have a lot more vectors
+        * than CPU's.  So let's be conservative and only ask for
+        * (roughly) the same number of vectors as there are CPU's.
+        * The default is to use pairs of vectors.
+        */
+       v_budget = max(adapter->num_rx_queues, adapter->num_tx_queues);
+       v_budget = min_t(int, v_budget, num_online_cpus());
+       v_budget += NON_Q_VECTORS;
+
+       adapter->msix_entries = kcalloc(v_budget,
+                                       sizeof(struct msix_entry), GFP_KERNEL);
+       if (!adapter->msix_entries)
+               return -ENOMEM;
+
+       for (vector = 0; vector < v_budget; vector++)
+               adapter->msix_entries[vector].entry = vector;
+
+       /* A failure in MSI-X entry allocation isn't fatal, but the VF driver
+        * does not support any other modes, so we will simply fail here. Note
+        * that we clean up the msix_entries pointer else-where.
+        */
+       return ixgbevf_acquire_msix_vectors(adapter, v_budget);
+}
+
+static void ixgbevf_add_ring(struct ixgbevf_ring *ring,
+                            struct ixgbevf_ring_container *head)
+{
+       ring->next = head->ring;
+       head->ring = ring;
+       head->count++;
+}
+
+/**
+ * ixgbevf_alloc_q_vector - Allocate memory for a single interrupt vector
  * @adapter: board private structure to initialize
+ * @v_idx: index of vector in adapter struct
+ * @txr_count: number of Tx rings for q vector
+ * @txr_idx: index of first Tx ring to assign
+ * @rxr_count: number of Rx rings for q vector
+ * @rxr_idx: index of first Rx ring to assign
  *
- * We allocate one ring per queue at run-time since we don't know the
- * number of queues at compile-time.  The polling_netdev array is
- * intended for Multiqueue, but should work fine with a single queue.
+ * We allocate one q_vector.  If allocation fails we return -ENOMEM.
  **/
-static int ixgbevf_alloc_queues(struct ixgbevf_adapter *adapter)
+static int ixgbevf_alloc_q_vector(struct ixgbevf_adapter *adapter, int v_idx,
+                                 int txr_count, int txr_idx,
+                                 int rxr_count, int rxr_idx)
 {
+       struct ixgbevf_q_vector *q_vector;
        struct ixgbevf_ring *ring;
-       int rx = 0, tx = 0;
+       int ring_count, size;
+
+       ring_count = txr_count + rxr_count;
+       size = sizeof(*q_vector) + (sizeof(*ring) * ring_count);
+
+       /* allocate q_vector and rings */
+       q_vector = kzalloc(size, GFP_KERNEL);
+       if (!q_vector)
+               return -ENOMEM;
+
+       /* initialize NAPI */
+       netif_napi_add(adapter->netdev, &q_vector->napi, ixgbevf_poll, 64);
+
+       /* tie q_vector and adapter together */
+       adapter->q_vector[v_idx] = q_vector;
+       q_vector->adapter = adapter;
+       q_vector->v_idx = v_idx;
 
-       for (; tx < adapter->num_tx_queues; tx++) {
-               ring = kzalloc(sizeof(*ring), GFP_KERNEL);
-               if (!ring)
-                       goto err_allocation;
+       /* initialize pointer to rings */
+       ring = q_vector->ring;
 
+       while (txr_count) {
+               /* assign generic ring traits */
                ring->dev = &adapter->pdev->dev;
                ring->netdev = adapter->netdev;
+
+               /* configure backlink on ring */
+               ring->q_vector = q_vector;
+
+               /* update q_vector Tx values */
+               ixgbevf_add_ring(ring, &q_vector->tx);
+
+               /* apply Tx specific ring traits */
                ring->count = adapter->tx_ring_count;
-               ring->queue_index = tx;
-               ring->reg_idx = tx;
+               ring->queue_index = txr_idx;
+               ring->reg_idx = txr_idx;
 
-               adapter->tx_ring[tx] = ring;
-       }
+               /* assign ring to adapter */
+                adapter->tx_ring[txr_idx] = ring;
+
+               /* update count and index */
+               txr_count--;
+               txr_idx++;
 
-       for (; rx < adapter->num_rx_queues; rx++) {
-               ring = kzalloc(sizeof(*ring), GFP_KERNEL);
-               if (!ring)
-                       goto err_allocation;
+               /* push pointer to next ring */
+               ring++;
+       }
 
+       while (rxr_count) {
+               /* assign generic ring traits */
                ring->dev = &adapter->pdev->dev;
                ring->netdev = adapter->netdev;
 
+               /* configure backlink on ring */
+               ring->q_vector = q_vector;
+
+               /* update q_vector Rx values */
+               ixgbevf_add_ring(ring, &q_vector->rx);
+
+               /* apply Rx specific ring traits */
                ring->count = adapter->rx_ring_count;
-               ring->queue_index = rx;
-               ring->reg_idx = rx;
+               ring->queue_index = rxr_idx;
+               ring->reg_idx = rxr_idx;
 
-               adapter->rx_ring[rx] = ring;
-       }
+               /* assign ring to adapter */
+               adapter->rx_ring[rxr_idx] = ring;
 
-       return 0;
+               /* update count and index */
+               rxr_count--;
+               rxr_idx++;
 
-err_allocation:
-       while (tx) {
-               kfree(adapter->tx_ring[--tx]);
-               adapter->tx_ring[tx] = NULL;
+               /* push pointer to next ring */
+               ring++;
        }
 
-       while (rx) {
-               kfree(adapter->rx_ring[--rx]);
-               adapter->rx_ring[rx] = NULL;
-       }
-       return -ENOMEM;
+       return 0;
 }
 
 /**
- * ixgbevf_set_interrupt_capability - set MSI-X or FAIL if not supported
+ * ixgbevf_free_q_vector - Free memory allocated for specific interrupt vector
  * @adapter: board private structure to initialize
+ * @v_idx: index of vector in adapter struct
  *
- * Attempt to configure the interrupts using the best available
- * capabilities of the hardware and the kernel.
+ * This function frees the memory allocated to the q_vector.  In addition if
+ * NAPI is enabled it will delete any references to the NAPI struct prior
+ * to freeing the q_vector.
  **/
-static int ixgbevf_set_interrupt_capability(struct ixgbevf_adapter *adapter)
+static void ixgbevf_free_q_vector(struct ixgbevf_adapter *adapter, int v_idx)
 {
-       struct net_device *netdev = adapter->netdev;
-       int err;
-       int vector, v_budget;
-
-       /* It's easy to be greedy for MSI-X vectors, but it really
-        * doesn't do us much good if we have a lot more vectors
-        * than CPU's.  So let's be conservative and only ask for
-        * (roughly) the same number of vectors as there are CPU's.
-        * The default is to use pairs of vectors.
-        */
-       v_budget = max(adapter->num_rx_queues, adapter->num_tx_queues);
-       v_budget = min_t(int, v_budget, num_online_cpus());
-       v_budget += NON_Q_VECTORS;
-
-       /* A failure in MSI-X entry allocation isn't fatal, but it does
-        * mean we disable MSI-X capabilities of the adapter.
-        */
-       adapter->msix_entries = kcalloc(v_budget,
-                                       sizeof(struct msix_entry), GFP_KERNEL);
-       if (!adapter->msix_entries)
-               return -ENOMEM;
+       struct ixgbevf_q_vector *q_vector = adapter->q_vector[v_idx];
+       struct ixgbevf_ring *ring;
 
-       for (vector = 0; vector < v_budget; vector++)
-               adapter->msix_entries[vector].entry = vector;
+       ixgbevf_for_each_ring(ring, q_vector->tx)
+               adapter->tx_ring[ring->queue_index] = NULL;
 
-       err = ixgbevf_acquire_msix_vectors(adapter, v_budget);
-       if (err)
-               return err;
+       ixgbevf_for_each_ring(ring, q_vector->rx)
+               adapter->rx_ring[ring->queue_index] = NULL;
 
-       err = netif_set_real_num_tx_queues(netdev, adapter->num_tx_queues);
-       if (err)
-               return err;
+       adapter->q_vector[v_idx] = NULL;
+       netif_napi_del(&q_vector->napi);
 
-       return netif_set_real_num_rx_queues(netdev, adapter->num_rx_queues);
+       /* ixgbevf_get_stats() might access the rings on this vector,
+        * we must wait a grace period before freeing it.
+        */
+       kfree_rcu(q_vector, rcu);
 }
 
 /**
@@ -2515,35 +2580,53 @@ static int ixgbevf_set_interrupt_capability(struct ixgbevf_adapter *adapter)
  **/
 static int ixgbevf_alloc_q_vectors(struct ixgbevf_adapter *adapter)
 {
-       int q_idx, num_q_vectors;
-       struct ixgbevf_q_vector *q_vector;
+       int q_vectors = adapter->num_msix_vectors - NON_Q_VECTORS;
+       int rxr_remaining = adapter->num_rx_queues;
+       int txr_remaining = adapter->num_tx_queues;
+       int rxr_idx = 0, txr_idx = 0, v_idx = 0;
+       int err;
+
+       if (q_vectors >= (rxr_remaining + txr_remaining)) {
+               for (; rxr_remaining; v_idx++, q_vectors--) {
+                       int rqpv = DIV_ROUND_UP(rxr_remaining, q_vectors);
+
+                       err = ixgbevf_alloc_q_vector(adapter, v_idx,
+                                                    0, 0, rqpv, rxr_idx);
+                       if (err)
+                               goto err_out;
 
-       num_q_vectors = adapter->num_msix_vectors - NON_Q_VECTORS;
+                       /* update counts and index */
+                       rxr_remaining -= rqpv;
+                       rxr_idx += rqpv;
+               }
+       }
+
+       for (; q_vectors; v_idx++, q_vectors--) {
+               int rqpv = DIV_ROUND_UP(rxr_remaining, q_vectors);
+               int tqpv = DIV_ROUND_UP(txr_remaining, q_vectors);
 
-       for (q_idx = 0; q_idx < num_q_vectors; q_idx++) {
-               q_vector = kzalloc(sizeof(struct ixgbevf_q_vector), GFP_KERNEL);
-               if (!q_vector)
+               err = ixgbevf_alloc_q_vector(adapter, v_idx,
+                                            tqpv, txr_idx,
+                                            rqpv, rxr_idx);
+
+               if (err)
                        goto err_out;
-               q_vector->adapter = adapter;
-               q_vector->v_idx = q_idx;
-               netif_napi_add(adapter->netdev, &q_vector->napi,
-                              ixgbevf_poll, 64);
-               adapter->q_vector[q_idx] = q_vector;
+
+               /* update counts and index */
+               rxr_remaining -= rqpv;
+               rxr_idx += rqpv;
+               txr_remaining -= tqpv;
+               txr_idx += tqpv;
        }
 
        return 0;
 
 err_out:
-       while (q_idx) {
-               q_idx--;
-               q_vector = adapter->q_vector[q_idx];
-#ifdef CONFIG_NET_RX_BUSY_POLL
-               napi_hash_del(&q_vector->napi);
-#endif
-               netif_napi_del(&q_vector->napi);
-               kfree(q_vector);
-               adapter->q_vector[q_idx] = NULL;
+       while (v_idx) {
+               v_idx--;
+               ixgbevf_free_q_vector(adapter, v_idx);
        }
+
        return -ENOMEM;
 }
 
@@ -2557,17 +2640,11 @@ err_out:
  **/
 static void ixgbevf_free_q_vectors(struct ixgbevf_adapter *adapter)
 {
-       int q_idx, num_q_vectors = adapter->num_msix_vectors - NON_Q_VECTORS;
-
-       for (q_idx = 0; q_idx < num_q_vectors; q_idx++) {
-               struct ixgbevf_q_vector *q_vector = adapter->q_vector[q_idx];
+       int q_vectors = adapter->num_msix_vectors - NON_Q_VECTORS;
 
-               adapter->q_vector[q_idx] = NULL;
-#ifdef CONFIG_NET_RX_BUSY_POLL
-               napi_hash_del(&q_vector->napi);
-#endif
-               netif_napi_del(&q_vector->napi);
-               kfree(q_vector);
+       while (q_vectors) {
+               q_vectors--;
+               ixgbevf_free_q_vector(adapter, q_vectors);
        }
 }
 
@@ -2611,12 +2688,6 @@ static int ixgbevf_init_interrupt_scheme(struct ixgbevf_adapter *adapter)
                goto err_alloc_q_vectors;
        }
 
-       err = ixgbevf_alloc_queues(adapter);
-       if (err) {
-               pr_err("Unable to allocate memory for queues\n");
-               goto err_alloc_queues;
-       }
-
        hw_dbg(&adapter->hw, "Multiqueue %s: Rx Queue count = %u, Tx Queue count = %u\n",
               (adapter->num_rx_queues > 1) ? "Enabled" :
               "Disabled", adapter->num_rx_queues, adapter->num_tx_queues);
@@ -2624,8 +2695,6 @@ static int ixgbevf_init_interrupt_scheme(struct ixgbevf_adapter *adapter)
        set_bit(__IXGBEVF_DOWN, &adapter->state);
 
        return 0;
-err_alloc_queues:
-       ixgbevf_free_q_vectors(adapter);
 err_alloc_q_vectors:
        ixgbevf_reset_interrupt_capability(adapter);
 err_set_interrupt:
@@ -2641,17 +2710,6 @@ err_set_interrupt:
  **/
 static void ixgbevf_clear_interrupt_scheme(struct ixgbevf_adapter *adapter)
 {
-       int i;
-
-       for (i = 0; i < adapter->num_tx_queues; i++) {
-               kfree(adapter->tx_ring[i]);
-               adapter->tx_ring[i] = NULL;
-       }
-       for (i = 0; i < adapter->num_rx_queues; i++) {
-               kfree(adapter->rx_ring[i]);
-               adapter->rx_ring[i] = NULL;
-       }
-
        adapter->num_tx_queues = 0;
        adapter->num_rx_queues = 0;
 
@@ -3088,9 +3146,14 @@ static int ixgbevf_setup_all_tx_resources(struct ixgbevf_adapter *adapter)
                if (!err)
                        continue;
                hw_dbg(&adapter->hw, "Allocation for Tx Queue %u failed\n", i);
-               break;
+               goto err_setup_tx;
        }
 
+       return 0;
+err_setup_tx:
+       /* rewind the index freeing the rings as we go */
+       while (i--)
+               ixgbevf_free_tx_resources(adapter->tx_ring[i]);
        return err;
 }
 
@@ -3148,8 +3211,14 @@ static int ixgbevf_setup_all_rx_resources(struct ixgbevf_adapter *adapter)
                if (!err)
                        continue;
                hw_dbg(&adapter->hw, "Allocation for Rx Queue %u failed\n", i);
-               break;
+               goto err_setup_rx;
        }
+
+       return 0;
+err_setup_rx:
+       /* rewind the index freeing the rings as we go */
+       while (i--)
+               ixgbevf_free_rx_resources(adapter->rx_ring[i]);
        return err;
 }
 
@@ -3244,28 +3313,31 @@ int ixgbevf_open(struct net_device *netdev)
 
        ixgbevf_configure(adapter);
 
-       /* Map the Tx/Rx rings to the vectors we were allotted.
-        * if request_irq will be called in this function map_rings
-        * must be called *before* up_complete
-        */
-       ixgbevf_map_rings_to_vectors(adapter);
-
        err = ixgbevf_request_irq(adapter);
        if (err)
                goto err_req_irq;
 
+       /* Notify the stack of the actual queue counts. */
+       err = netif_set_real_num_tx_queues(netdev, adapter->num_tx_queues);
+       if (err)
+               goto err_set_queues;
+
+       err = netif_set_real_num_rx_queues(netdev, adapter->num_rx_queues);
+       if (err)
+               goto err_set_queues;
+
        ixgbevf_up_complete(adapter);
 
        return 0;
 
+err_set_queues:
+       ixgbevf_free_irq(adapter);
 err_req_irq:
-       ixgbevf_down(adapter);
-err_setup_rx:
        ixgbevf_free_all_rx_resources(adapter);
-err_setup_tx:
+err_setup_rx:
        ixgbevf_free_all_tx_resources(adapter);
+err_setup_tx:
        ixgbevf_reset(adapter);
-
 err_setup_reset:
 
        return err;
@@ -3707,11 +3779,10 @@ static int ixgbevf_maybe_stop_tx(struct ixgbevf_ring *tx_ring, int size)
        return __ixgbevf_maybe_stop_tx(tx_ring, size);
 }
 
-static int ixgbevf_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
+static int ixgbevf_xmit_frame_ring(struct sk_buff *skb,
+                                  struct ixgbevf_ring *tx_ring)
 {
-       struct ixgbevf_adapter *adapter = netdev_priv(netdev);
        struct ixgbevf_tx_buffer *first;
-       struct ixgbevf_ring *tx_ring;
        int tso;
        u32 tx_flags = 0;
        u16 count = TXD_USE_COUNT(skb_headlen(skb));
@@ -3726,8 +3797,6 @@ static int ixgbevf_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
                return NETDEV_TX_OK;
        }
 
-       tx_ring = adapter->tx_ring[skb->queue_mapping];
-
        /* need: 1 descriptor per page * PAGE_SIZE/IXGBE_MAX_DATA_PER_TXD,
         *       + 1 desc for skb_headlen/IXGBE_MAX_DATA_PER_TXD,
         *       + 2 desc gap to keep tail from touching head,
@@ -3780,6 +3849,29 @@ out_drop:
        return NETDEV_TX_OK;
 }
 
+static int ixgbevf_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
+{
+       struct ixgbevf_adapter *adapter = netdev_priv(netdev);
+       struct ixgbevf_ring *tx_ring;
+
+       if (skb->len <= 0) {
+               dev_kfree_skb_any(skb);
+               return NETDEV_TX_OK;
+       }
+
+       /* The minimum packet size for olinfo paylen is 17 so pad the skb
+        * in order to meet this minimum size requirement.
+        */
+       if (skb->len < 17) {
+               if (skb_padto(skb, 17))
+                       return NETDEV_TX_OK;
+               skb->len = 17;
+       }
+
+       tx_ring = adapter->tx_ring[skb->queue_mapping];
+       return ixgbevf_xmit_frame_ring(skb, tx_ring);
+}
+
 /**
  * ixgbevf_set_mac - Change the Ethernet Address of the NIC
  * @netdev: network interface device structure
@@ -3839,6 +3931,9 @@ static int ixgbevf_change_mtu(struct net_device *netdev, int new_mtu)
        /* must set new MTU before calling down or up */
        netdev->mtu = new_mtu;
 
+       if (netif_running(netdev))
+               ixgbevf_reinit_locked(adapter);
+
        return 0;
 }
 
@@ -3917,17 +4012,11 @@ static int ixgbevf_resume(struct pci_dev *pdev)
 
        rtnl_lock();
        err = ixgbevf_init_interrupt_scheme(adapter);
+       if (!err && netif_running(netdev))
+               err = ixgbevf_open(netdev);
        rtnl_unlock();
-       if (err) {
-               dev_err(&pdev->dev, "Cannot initialize interrupts\n");
+       if (err)
                return err;
-       }
-
-       if (netif_running(netdev)) {
-               err = ixgbevf_open(netdev);
-               if (err)
-                       return err;
-       }
 
        netif_device_attach(netdev);
 
@@ -3953,6 +4042,7 @@ static void ixgbevf_get_stats(struct net_device *netdev,
 
        stats->multicast = adapter->stats.vfmprc - adapter->stats.base_vfmprc;
 
+       rcu_read_lock();
        for (i = 0; i < adapter->num_rx_queues; i++) {
                ring = adapter->rx_ring[i];
                do {
@@ -3974,6 +4064,7 @@ static void ixgbevf_get_stats(struct net_device *netdev,
                stats->tx_bytes += bytes;
                stats->tx_packets += packets;
        }
+       rcu_read_unlock();
 }
 
 #define IXGBEVF_MAX_MAC_HDR_LEN                127
index 5a1668c..9418f6e 100644 (file)
 #define     MVPP2_RXQ_PACKET_OFFSET_MASK       0x70000000
 #define     MVPP2_RXQ_DISABLE_MASK             BIT(31)
 
+/* Top Registers */
+#define MVPP2_MH_REG(port)                     (0x5040 + 4 * (port))
+#define MVPP2_DSA_EXTENDED                     BIT(5)
+
 /* Parser Registers */
 #define MVPP2_PRS_INIT_LOOKUP_REG              0x1000
 #define     MVPP2_PRS_PORT_LU_MAX              0xf
 #define MVPP2_ETH_TYPE_LEN             2
 #define MVPP2_PPPOE_HDR_SIZE           8
 #define MVPP2_VLAN_TAG_LEN             4
+#define MVPP2_VLAN_TAG_EDSA_LEN                8
 
 /* Lbtd 802.3 type */
 #define MVPP2_IP_LBDT_TYPE             0xfffa
@@ -609,35 +614,64 @@ enum mvpp2_tag_type {
 #define MVPP2_PRS_TCAM_LU_BYTE                 20
 #define MVPP2_PRS_TCAM_EN_OFFS(offs)           ((offs) + 2)
 #define MVPP2_PRS_TCAM_INV_WORD                        5
+
+#define MVPP2_PRS_VID_TCAM_BYTE         2
+
+/* There is a TCAM range reserved for VLAN filtering entries, range size is 33
+ * 10 VLAN ID filter entries per port
+ * 1 default VLAN filter entry per port
+ * It is assumed that there are 3 ports for filter, not including loopback port
+ */
+#define MVPP2_PRS_VLAN_FILT_MAX                11
+#define MVPP2_PRS_VLAN_FILT_RANGE_SIZE 33
+
+#define MVPP2_PRS_VLAN_FILT_MAX_ENTRY   (MVPP2_PRS_VLAN_FILT_MAX - 2)
+#define MVPP2_PRS_VLAN_FILT_DFLT_ENTRY  (MVPP2_PRS_VLAN_FILT_MAX - 1)
+
 /* Tcam entries ID */
 #define MVPP2_PE_DROP_ALL              0
 #define MVPP2_PE_FIRST_FREE_TID                1
-#define MVPP2_PE_LAST_FREE_TID         (MVPP2_PRS_TCAM_SRAM_SIZE - 31)
+
+/* VLAN filtering range */
+#define MVPP2_PE_VID_FILT_RANGE_END     (MVPP2_PRS_TCAM_SRAM_SIZE - 31)
+#define MVPP2_PE_VID_FILT_RANGE_START   (MVPP2_PE_VID_FILT_RANGE_END - \
+                                        MVPP2_PRS_VLAN_FILT_RANGE_SIZE + 1)
+#define MVPP2_PE_LAST_FREE_TID          (MVPP2_PE_VID_FILT_RANGE_START - 1)
 #define MVPP2_PE_IP6_EXT_PROTO_UN      (MVPP2_PRS_TCAM_SRAM_SIZE - 30)
 #define MVPP2_PE_MAC_MC_IP6            (MVPP2_PRS_TCAM_SRAM_SIZE - 29)
 #define MVPP2_PE_IP6_ADDR_UN           (MVPP2_PRS_TCAM_SRAM_SIZE - 28)
 #define MVPP2_PE_IP4_ADDR_UN           (MVPP2_PRS_TCAM_SRAM_SIZE - 27)
 #define MVPP2_PE_LAST_DEFAULT_FLOW     (MVPP2_PRS_TCAM_SRAM_SIZE - 26)
-#define MVPP2_PE_FIRST_DEFAULT_FLOW    (MVPP2_PRS_TCAM_SRAM_SIZE - 19)
-#define MVPP2_PE_EDSA_TAGGED           (MVPP2_PRS_TCAM_SRAM_SIZE - 18)
-#define MVPP2_PE_EDSA_UNTAGGED         (MVPP2_PRS_TCAM_SRAM_SIZE - 17)
-#define MVPP2_PE_DSA_TAGGED            (MVPP2_PRS_TCAM_SRAM_SIZE - 16)
-#define MVPP2_PE_DSA_UNTAGGED          (MVPP2_PRS_TCAM_SRAM_SIZE - 15)
-#define MVPP2_PE_ETYPE_EDSA_TAGGED     (MVPP2_PRS_TCAM_SRAM_SIZE - 14)
-#define MVPP2_PE_ETYPE_EDSA_UNTAGGED   (MVPP2_PRS_TCAM_SRAM_SIZE - 13)
-#define MVPP2_PE_ETYPE_DSA_TAGGED      (MVPP2_PRS_TCAM_SRAM_SIZE - 12)
-#define MVPP2_PE_ETYPE_DSA_UNTAGGED    (MVPP2_PRS_TCAM_SRAM_SIZE - 11)
-#define MVPP2_PE_MH_DEFAULT            (MVPP2_PRS_TCAM_SRAM_SIZE - 10)
-#define MVPP2_PE_DSA_DEFAULT           (MVPP2_PRS_TCAM_SRAM_SIZE - 9)
-#define MVPP2_PE_IP6_PROTO_UN          (MVPP2_PRS_TCAM_SRAM_SIZE - 8)
-#define MVPP2_PE_IP4_PROTO_UN          (MVPP2_PRS_TCAM_SRAM_SIZE - 7)
-#define MVPP2_PE_ETH_TYPE_UN           (MVPP2_PRS_TCAM_SRAM_SIZE - 6)
+#define MVPP2_PE_FIRST_DEFAULT_FLOW    (MVPP2_PRS_TCAM_SRAM_SIZE - 21)
+#define MVPP2_PE_EDSA_TAGGED           (MVPP2_PRS_TCAM_SRAM_SIZE - 20)
+#define MVPP2_PE_EDSA_UNTAGGED         (MVPP2_PRS_TCAM_SRAM_SIZE - 19)
+#define MVPP2_PE_DSA_TAGGED            (MVPP2_PRS_TCAM_SRAM_SIZE - 18)
+#define MVPP2_PE_DSA_UNTAGGED          (MVPP2_PRS_TCAM_SRAM_SIZE - 17)
+#define MVPP2_PE_ETYPE_EDSA_TAGGED     (MVPP2_PRS_TCAM_SRAM_SIZE - 16)
+#define MVPP2_PE_ETYPE_EDSA_UNTAGGED   (MVPP2_PRS_TCAM_SRAM_SIZE - 15)
+#define MVPP2_PE_ETYPE_DSA_TAGGED      (MVPP2_PRS_TCAM_SRAM_SIZE - 14)
+#define MVPP2_PE_ETYPE_DSA_UNTAGGED    (MVPP2_PRS_TCAM_SRAM_SIZE - 13)
+#define MVPP2_PE_MH_DEFAULT            (MVPP2_PRS_TCAM_SRAM_SIZE - 12)
+#define MVPP2_PE_DSA_DEFAULT           (MVPP2_PRS_TCAM_SRAM_SIZE - 11)
+#define MVPP2_PE_IP6_PROTO_UN          (MVPP2_PRS_TCAM_SRAM_SIZE - 10)
+#define MVPP2_PE_IP4_PROTO_UN          (MVPP2_PRS_TCAM_SRAM_SIZE - 9)
+#define MVPP2_PE_ETH_TYPE_UN           (MVPP2_PRS_TCAM_SRAM_SIZE - 8)
+#define MVPP2_PE_VID_FLTR_DEFAULT      (MVPP2_PRS_TCAM_SRAM_SIZE - 7)
+#define MVPP2_PE_VID_EDSA_FLTR_DEFAULT (MVPP2_PRS_TCAM_SRAM_SIZE - 6)
 #define MVPP2_PE_VLAN_DBL              (MVPP2_PRS_TCAM_SRAM_SIZE - 5)
 #define MVPP2_PE_VLAN_NONE             (MVPP2_PRS_TCAM_SRAM_SIZE - 4)
 #define MVPP2_PE_MAC_MC_ALL            (MVPP2_PRS_TCAM_SRAM_SIZE - 3)
 #define MVPP2_PE_MAC_PROMISCUOUS       (MVPP2_PRS_TCAM_SRAM_SIZE - 2)
 #define MVPP2_PE_MAC_NON_PROMISCUOUS   (MVPP2_PRS_TCAM_SRAM_SIZE - 1)
 
+#define MVPP2_PRS_VID_PORT_FIRST(port) (MVPP2_PE_VID_FILT_RANGE_START + \
+                                        ((port) * MVPP2_PRS_VLAN_FILT_MAX))
+#define MVPP2_PRS_VID_PORT_LAST(port)  (MVPP2_PRS_VID_PORT_FIRST(port) \
+                                        + MVPP2_PRS_VLAN_FILT_MAX_ENTRY)
+/* Index of default vid filter for given port */
+#define MVPP2_PRS_VID_PORT_DFLT(port)  (MVPP2_PRS_VID_PORT_FIRST(port) \
+                                        + MVPP2_PRS_VLAN_FILT_DFLT_ENTRY)
+
 /* Sram structure
  * The fields are represented by MVPP2_PRS_TCAM_DATA_REG(3)->(0).
  */
@@ -725,6 +759,7 @@ enum mvpp2_tag_type {
 #define MVPP2_PRS_IPV6_EXT_AH_L4_AI_BIT                BIT(4)
 #define MVPP2_PRS_SINGLE_VLAN_AI               0
 #define MVPP2_PRS_DBL_VLAN_AI_BIT              BIT(7)
+#define MVPP2_PRS_EDSA_VID_AI_BIT              BIT(0)
 
 /* DSA/EDSA type */
 #define MVPP2_PRS_TAGGED               true
@@ -747,6 +782,7 @@ enum mvpp2_prs_lookup {
        MVPP2_PRS_LU_MAC,
        MVPP2_PRS_LU_DSA,
        MVPP2_PRS_LU_VLAN,
+       MVPP2_PRS_LU_VID,
        MVPP2_PRS_LU_L2,
        MVPP2_PRS_LU_PPPOE,
        MVPP2_PRS_LU_IP4,
@@ -1662,6 +1698,14 @@ static void mvpp2_prs_match_etype(struct mvpp2_prs_entry *pe, int offset,
        mvpp2_prs_tcam_data_byte_set(pe, offset + 1, ethertype & 0xff, 0xff);
 }
 
+/* Set vid in tcam sw entry */
+static void mvpp2_prs_match_vid(struct mvpp2_prs_entry *pe, int offset,
+                               unsigned short vid)
+{
+       mvpp2_prs_tcam_data_byte_set(pe, offset + 0, (vid & 0xf00) >> 8, 0xf);
+       mvpp2_prs_tcam_data_byte_set(pe, offset + 1, vid & 0xff, 0xff);
+}
+
 /* Set bits in sram sw entry */
 static void mvpp2_prs_sram_bits_set(struct mvpp2_prs_entry *pe, int bit_num,
                                    int val)
@@ -2029,24 +2073,30 @@ static void mvpp2_prs_dsa_tag_set(struct mvpp2 *priv, int port, bool add,
                mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_DSA);
                pe.index = tid;
 
-               /* Shift 4 bytes if DSA tag or 8 bytes in case of EDSA tag*/
-               mvpp2_prs_sram_shift_set(&pe, shift,
-                                        MVPP2_PRS_SRAM_OP_SEL_SHIFT_ADD);
-
                /* Update shadow table */
                mvpp2_prs_shadow_set(priv, pe.index, MVPP2_PRS_LU_DSA);
 
                if (tagged) {
                        /* Set tagged bit in DSA tag */
                        mvpp2_prs_tcam_data_byte_set(&pe, 0,
-                                                    MVPP2_PRS_TCAM_DSA_TAGGED_BIT,
-                                                    MVPP2_PRS_TCAM_DSA_TAGGED_BIT);
-                       /* Clear all ai bits for next iteration */
-                       mvpp2_prs_sram_ai_update(&pe, 0,
-                                                MVPP2_PRS_SRAM_AI_MASK);
-                       /* If packet is tagged continue check vlans */
-                       mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_VLAN);
+                                            MVPP2_PRS_TCAM_DSA_TAGGED_BIT,
+                                            MVPP2_PRS_TCAM_DSA_TAGGED_BIT);
+
+                       /* Set ai bits for next iteration */
+                       if (extend)
+                               mvpp2_prs_sram_ai_update(&pe, 1,
+                                                       MVPP2_PRS_SRAM_AI_MASK);
+                       else
+                               mvpp2_prs_sram_ai_update(&pe, 0,
+                                                       MVPP2_PRS_SRAM_AI_MASK);
+
+                       /* If packet is tagged continue check vid filtering */
+                       mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_VID);
                } else {
+                       /* Shift 4 bytes for DSA tag or 8 bytes for EDSA tag*/
+                       mvpp2_prs_sram_shift_set(&pe, shift,
+                                       MVPP2_PRS_SRAM_OP_SEL_SHIFT_ADD);
+
                        /* Set result info bits to 'no vlans' */
                        mvpp2_prs_sram_ri_update(&pe, MVPP2_PRS_RI_VLAN_NONE,
                                                 MVPP2_PRS_RI_VLAN_MASK);
@@ -2231,10 +2281,9 @@ static int mvpp2_prs_vlan_add(struct mvpp2 *priv, unsigned short tpid, int ai,
 
                mvpp2_prs_match_etype(pe, 0, tpid);
 
-               mvpp2_prs_sram_next_lu_set(pe, MVPP2_PRS_LU_L2);
-               /* Shift 4 bytes - skip 1 vlan tag */
-               mvpp2_prs_sram_shift_set(pe, MVPP2_VLAN_TAG_LEN,
-                                        MVPP2_PRS_SRAM_OP_SEL_SHIFT_ADD);
+               /* VLAN tag detected, proceed with VID filtering */
+               mvpp2_prs_sram_next_lu_set(pe, MVPP2_PRS_LU_VID);
+
                /* Clear all ai bits for next iteration */
                mvpp2_prs_sram_ai_update(pe, 0, MVPP2_PRS_SRAM_AI_MASK);
 
@@ -2375,8 +2424,8 @@ static int mvpp2_prs_double_vlan_add(struct mvpp2 *priv, unsigned short tpid1,
                mvpp2_prs_match_etype(pe, 4, tpid2);
 
                mvpp2_prs_sram_next_lu_set(pe, MVPP2_PRS_LU_VLAN);
-               /* Shift 8 bytes - skip 2 vlan tags */
-               mvpp2_prs_sram_shift_set(pe, 2 * MVPP2_VLAN_TAG_LEN,
+               /* Shift 4 bytes - skip outer vlan tag */
+               mvpp2_prs_sram_shift_set(pe, MVPP2_VLAN_TAG_LEN,
                                         MVPP2_PRS_SRAM_OP_SEL_SHIFT_ADD);
                mvpp2_prs_sram_ri_update(pe, MVPP2_PRS_RI_VLAN_DOUBLE,
                                         MVPP2_PRS_RI_VLAN_MASK);
@@ -2755,6 +2804,62 @@ static void mvpp2_prs_dsa_init(struct mvpp2 *priv)
        mvpp2_prs_hw_write(priv, &pe);
 }
 
+/* Initialize parser entries for VID filtering */
+static void mvpp2_prs_vid_init(struct mvpp2 *priv)
+{
+       struct mvpp2_prs_entry pe;
+
+       memset(&pe, 0, sizeof(pe));
+
+       /* Set default vid entry */
+       pe.index = MVPP2_PE_VID_FLTR_DEFAULT;
+       mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_VID);
+
+       mvpp2_prs_tcam_ai_update(&pe, 0, MVPP2_PRS_EDSA_VID_AI_BIT);
+
+       /* Skip VLAN header - Set offset to 4 bytes */
+       mvpp2_prs_sram_shift_set(&pe, MVPP2_VLAN_TAG_LEN,
+                                MVPP2_PRS_SRAM_OP_SEL_SHIFT_ADD);
+
+       /* Clear all ai bits for next iteration */
+       mvpp2_prs_sram_ai_update(&pe, 0, MVPP2_PRS_SRAM_AI_MASK);
+
+       mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_L2);
+
+       /* Unmask all ports */
+       mvpp2_prs_tcam_port_map_set(&pe, MVPP2_PRS_PORT_MASK);
+
+       /* Update shadow table and hw entry */
+       mvpp2_prs_shadow_set(priv, pe.index, MVPP2_PRS_LU_VID);
+       mvpp2_prs_hw_write(priv, &pe);
+
+       /* Set default vid entry for extended DSA*/
+       memset(&pe, 0, sizeof(pe));
+
+       /* Set default vid entry */
+       pe.index = MVPP2_PE_VID_EDSA_FLTR_DEFAULT;
+       mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_VID);
+
+       mvpp2_prs_tcam_ai_update(&pe, MVPP2_PRS_EDSA_VID_AI_BIT,
+                                MVPP2_PRS_EDSA_VID_AI_BIT);
+
+       /* Skip VLAN header - Set offset to 8 bytes */
+       mvpp2_prs_sram_shift_set(&pe, MVPP2_VLAN_TAG_EDSA_LEN,
+                                MVPP2_PRS_SRAM_OP_SEL_SHIFT_ADD);
+
+       /* Clear all ai bits for next iteration */
+       mvpp2_prs_sram_ai_update(&pe, 0, MVPP2_PRS_SRAM_AI_MASK);
+
+       mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_L2);
+
+       /* Unmask all ports */
+       mvpp2_prs_tcam_port_map_set(&pe, MVPP2_PRS_PORT_MASK);
+
+       /* Update shadow table and hw entry */
+       mvpp2_prs_shadow_set(priv, pe.index, MVPP2_PRS_LU_VID);
+       mvpp2_prs_hw_write(priv, &pe);
+}
+
 /* Match basic ethertypes */
 static int mvpp2_prs_etype_init(struct mvpp2 *priv)
 {
@@ -3023,7 +3128,8 @@ static int mvpp2_prs_vlan_init(struct platform_device *pdev, struct mvpp2 *priv)
        mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_VLAN);
        pe.index = MVPP2_PE_VLAN_DBL;
 
-       mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_L2);
+       mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_VID);
+
        /* Clear ai for next iterations */
        mvpp2_prs_sram_ai_update(&pe, 0, MVPP2_PRS_SRAM_AI_MASK);
        mvpp2_prs_sram_ri_update(&pe, MVPP2_PRS_RI_VLAN_DOUBLE,
@@ -3386,6 +3492,192 @@ static int mvpp2_prs_ip6_init(struct mvpp2 *priv)
        return 0;
 }
 
+/* Find tcam entry with matched pair <vid,port> */
+static int mvpp2_prs_vid_range_find(struct mvpp2 *priv, int pmap, u16 vid,
+                                   u16 mask)
+{
+       unsigned char byte[2], enable[2];
+       struct mvpp2_prs_entry pe;
+       u16 rvid, rmask;
+       int tid;
+
+       /* Go through the all entries with MVPP2_PRS_LU_VID */
+       for (tid = MVPP2_PE_VID_FILT_RANGE_START;
+            tid <= MVPP2_PE_VID_FILT_RANGE_END; tid++) {
+               if (!priv->prs_shadow[tid].valid ||
+                   priv->prs_shadow[tid].lu != MVPP2_PRS_LU_VID)
+                       continue;
+
+               pe.index = tid;
+
+               mvpp2_prs_hw_read(priv, &pe);
+               mvpp2_prs_tcam_data_byte_get(&pe, 2, &byte[0], &enable[0]);
+               mvpp2_prs_tcam_data_byte_get(&pe, 3, &byte[1], &enable[1]);
+
+               rvid = ((byte[0] & 0xf) << 8) + byte[1];
+               rmask = ((enable[0] & 0xf) << 8) + enable[1];
+
+               if (rvid != vid || rmask != mask)
+                       continue;
+
+               return tid;
+       }
+
+       return 0;
+}
+
+/* Write parser entry for VID filtering */
+static int mvpp2_prs_vid_entry_add(struct mvpp2_port *port, u16 vid)
+{
+       unsigned int vid_start = MVPP2_PE_VID_FILT_RANGE_START +
+                                port->id * MVPP2_PRS_VLAN_FILT_MAX;
+       unsigned int mask = 0xfff, reg_val, shift;
+       struct mvpp2 *priv = port->priv;
+       struct mvpp2_prs_entry pe;
+       int tid;
+
+       /* Scan TCAM and see if entry with this <vid,port> already exist */
+       tid = mvpp2_prs_vid_range_find(priv, (1 << port->id), vid, mask);
+
+       reg_val = mvpp2_read(priv, MVPP2_MH_REG(port->id));
+       if (reg_val & MVPP2_DSA_EXTENDED)
+               shift = MVPP2_VLAN_TAG_EDSA_LEN;
+       else
+               shift = MVPP2_VLAN_TAG_LEN;
+
+       /* No such entry */
+       if (!tid) {
+               memset(&pe, 0, sizeof(pe));
+
+               /* Go through all entries from first to last in vlan range */
+               tid = mvpp2_prs_tcam_first_free(priv, vid_start,
+                                               vid_start +
+                                               MVPP2_PRS_VLAN_FILT_MAX_ENTRY);
+
+               /* There isn't room for a new VID filter */
+               if (tid < 0)
+                       return tid;
+
+               mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_VID);
+               pe.index = tid;
+
+               /* Mask all ports */
+               mvpp2_prs_tcam_port_map_set(&pe, 0);
+       } else {
+               mvpp2_prs_hw_read(priv, &pe);
+       }
+
+       /* Enable the current port */
+       mvpp2_prs_tcam_port_set(&pe, port->id, true);
+
+       /* Continue - set next lookup */
+       mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_L2);
+
+       /* Skip VLAN header - Set offset to 4 or 8 bytes */
+       mvpp2_prs_sram_shift_set(&pe, shift, MVPP2_PRS_SRAM_OP_SEL_SHIFT_ADD);
+
+       /* Set match on VID */
+       mvpp2_prs_match_vid(&pe, MVPP2_PRS_VID_TCAM_BYTE, vid);
+
+       /* Clear all ai bits for next iteration */
+       mvpp2_prs_sram_ai_update(&pe, 0, MVPP2_PRS_SRAM_AI_MASK);
+
+       /* Update shadow table */
+       mvpp2_prs_shadow_set(priv, pe.index, MVPP2_PRS_LU_VID);
+       mvpp2_prs_hw_write(priv, &pe);
+
+       return 0;
+}
+
+/* Write parser entry for VID filtering */
+static void mvpp2_prs_vid_entry_remove(struct mvpp2_port *port, u16 vid)
+{
+       struct mvpp2 *priv = port->priv;
+       int tid;
+
+       /* Scan TCAM and see if entry with this <vid,port> already exist */
+       tid = mvpp2_prs_vid_range_find(priv, (1 << port->id), vid, 0xfff);
+
+       /* No such entry */
+       if (tid)
+               return;
+
+       mvpp2_prs_hw_inv(priv, tid);
+       priv->prs_shadow[tid].valid = false;
+}
+
+/* Remove all existing VID filters on this port */
+static void mvpp2_prs_vid_remove_all(struct mvpp2_port *port)
+{
+       struct mvpp2 *priv = port->priv;
+       int tid;
+
+       for (tid = MVPP2_PRS_VID_PORT_FIRST(port->id);
+            tid <= MVPP2_PRS_VID_PORT_LAST(port->id); tid++) {
+               if (priv->prs_shadow[tid].valid)
+                       mvpp2_prs_vid_entry_remove(port, tid);
+       }
+}
+
+/* Remove VID filering entry for this port */
+static void mvpp2_prs_vid_disable_filtering(struct mvpp2_port *port)
+{
+       unsigned int tid = MVPP2_PRS_VID_PORT_DFLT(port->id);
+       struct mvpp2 *priv = port->priv;
+
+       /* Invalidate the guard entry */
+       mvpp2_prs_hw_inv(priv, tid);
+
+       priv->prs_shadow[tid].valid = false;
+}
+
+/* Add guard entry that drops packets when no VID is matched on this port */
+static void mvpp2_prs_vid_enable_filtering(struct mvpp2_port *port)
+{
+       unsigned int tid = MVPP2_PRS_VID_PORT_DFLT(port->id);
+       struct mvpp2 *priv = port->priv;
+       unsigned int reg_val, shift;
+       struct mvpp2_prs_entry pe;
+
+       if (priv->prs_shadow[tid].valid)
+               return;
+
+       memset(&pe, 0, sizeof(pe));
+
+       pe.index = tid;
+
+       reg_val = mvpp2_read(priv, MVPP2_MH_REG(port->id));
+       if (reg_val & MVPP2_DSA_EXTENDED)
+               shift = MVPP2_VLAN_TAG_EDSA_LEN;
+       else
+               shift = MVPP2_VLAN_TAG_LEN;
+
+       mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_VID);
+
+       /* Mask all ports */
+       mvpp2_prs_tcam_port_map_set(&pe, 0);
+
+       /* Update port mask */
+       mvpp2_prs_tcam_port_set(&pe, port->id, true);
+
+       /* Continue - set next lookup */
+       mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_L2);
+
+       /* Skip VLAN header - Set offset to 4 or 8 bytes */
+       mvpp2_prs_sram_shift_set(&pe, shift, MVPP2_PRS_SRAM_OP_SEL_SHIFT_ADD);
+
+       /* Drop VLAN packets that don't belong to any VIDs on this port */
+       mvpp2_prs_sram_ri_update(&pe, MVPP2_PRS_RI_DROP_MASK,
+                                MVPP2_PRS_RI_DROP_MASK);
+
+       /* Clear all ai bits for next iteration */
+       mvpp2_prs_sram_ai_update(&pe, 0, MVPP2_PRS_SRAM_AI_MASK);
+
+       /* Update shadow table */
+       mvpp2_prs_shadow_set(priv, pe.index, MVPP2_PRS_LU_VID);
+       mvpp2_prs_hw_write(priv, &pe);
+}
+
 /* Parser default initialization */
 static int mvpp2_prs_default_init(struct platform_device *pdev,
                                  struct mvpp2 *priv)
@@ -3429,6 +3721,8 @@ static int mvpp2_prs_default_init(struct platform_device *pdev,
 
        mvpp2_prs_dsa_init(priv);
 
+       mvpp2_prs_vid_init(priv);
+
        err = mvpp2_prs_etype_init(priv);
        if (err)
                return err;
@@ -7153,6 +7447,12 @@ retry:
                        }
                }
        }
+
+       /* Disable VLAN filtering in promiscuous mode */
+       if (dev->flags & IFF_PROMISC)
+               mvpp2_prs_vid_disable_filtering(port);
+       else
+               mvpp2_prs_vid_enable_filtering(port);
 }
 
 static int mvpp2_set_mac_address(struct net_device *dev, void *p)
@@ -7292,6 +7592,48 @@ static int mvpp2_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
        return ret;
 }
 
+static int mvpp2_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid)
+{
+       struct mvpp2_port *port = netdev_priv(dev);
+       int ret;
+
+       ret = mvpp2_prs_vid_entry_add(port, vid);
+       if (ret)
+               netdev_err(dev, "rx-vlan-filter offloading cannot accept more than %d VIDs per port\n",
+                          MVPP2_PRS_VLAN_FILT_MAX - 1);
+       return ret;
+}
+
+static int mvpp2_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, u16 vid)
+{
+       struct mvpp2_port *port = netdev_priv(dev);
+
+       mvpp2_prs_vid_entry_remove(port, vid);
+       return 0;
+}
+
+static int mvpp2_set_features(struct net_device *dev,
+                             netdev_features_t features)
+{
+       netdev_features_t changed = dev->features ^ features;
+       struct mvpp2_port *port = netdev_priv(dev);
+
+       if (changed & NETIF_F_HW_VLAN_CTAG_FILTER) {
+               if (features & NETIF_F_HW_VLAN_CTAG_FILTER) {
+                       mvpp2_prs_vid_enable_filtering(port);
+               } else {
+                       /* Invalidate all registered VID filters for this
+                        * port
+                        */
+                       mvpp2_prs_vid_remove_all(port);
+
+                       mvpp2_prs_vid_disable_filtering(port);
+               }
+       }
+
+       return 0;
+}
+
 /* Ethtool methods */
 
 /* Set interrupt coalescing for ethtools */
@@ -7433,6 +7775,9 @@ static const struct net_device_ops mvpp2_netdev_ops = {
        .ndo_change_mtu         = mvpp2_change_mtu,
        .ndo_get_stats64        = mvpp2_get_stats64,
        .ndo_do_ioctl           = mvpp2_ioctl,
+       .ndo_vlan_rx_add_vid    = mvpp2_vlan_rx_add_vid,
+       .ndo_vlan_rx_kill_vid   = mvpp2_vlan_rx_kill_vid,
+       .ndo_set_features       = mvpp2_set_features,
 };
 
 static const struct ethtool_ops mvpp2_eth_tool_ops = {
@@ -7945,7 +8290,8 @@ static int mvpp2_port_probe(struct platform_device *pdev,
 
        features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_TSO;
        dev->features = features | NETIF_F_RXCSUM;
-       dev->hw_features |= features | NETIF_F_RXCSUM | NETIF_F_GRO;
+       dev->hw_features |= features | NETIF_F_RXCSUM | NETIF_F_GRO |
+                           NETIF_F_HW_VLAN_CTAG_FILTER;
        dev->vlan_features |= features;
        dev->gso_max_segs = MVPP2_MAX_TSO_SEGS;
 
index ebc1f56..9a7a2f0 100644 (file)
@@ -199,6 +199,10 @@ static const char main_strings[][ETH_GSTRING_LEN] = {
        "rx_xdp_drop",
        "rx_xdp_tx",
        "rx_xdp_tx_full",
+
+       /* phy statistics */
+       "rx_packets_phy", "rx_bytes_phy",
+       "tx_packets_phy", "tx_bytes_phy",
 };
 
 static const char mlx4_en_test_names[][ETH_GSTRING_LEN]= {
@@ -411,6 +415,10 @@ static void mlx4_en_get_ethtool_stats(struct net_device *dev,
                if (bitmap_iterator_test(&it))
                        data[index++] = ((unsigned long *)&priv->xdp_stats)[i];
 
+       for (i = 0; i < NUM_PHY_STATS; i++, bitmap_iterator_inc(&it))
+               if (bitmap_iterator_test(&it))
+                       data[index++] = ((unsigned long *)&priv->phy_stats)[i];
+
        for (i = 0; i < priv->tx_ring_num[TX]; i++) {
                data[index++] = priv->tx_ring[TX][i]->packets;
                data[index++] = priv->tx_ring[TX][i]->bytes;
@@ -490,6 +498,12 @@ static void mlx4_en_get_strings(struct net_device *dev,
                                strcpy(data + (index++) * ETH_GSTRING_LEN,
                                       main_strings[strings]);
 
+               for (i = 0; i < NUM_PHY_STATS; i++, strings++,
+                    bitmap_iterator_inc(&it))
+                       if (bitmap_iterator_test(&it))
+                               strcpy(data + (index++) * ETH_GSTRING_LEN,
+                                      main_strings[strings]);
+
                for (i = 0; i < priv->tx_ring_num[TX]; i++) {
                        sprintf(data + (index++) * ETH_GSTRING_LEN,
                                "tx%d_packets", i);
index 8fc51bc..e0adac4 100644 (file)
@@ -3256,6 +3256,10 @@ void mlx4_en_set_stats_bitmap(struct mlx4_dev *dev,
 
        bitmap_set(stats_bitmap->bitmap, last_i, NUM_XDP_STATS);
        last_i += NUM_XDP_STATS;
+
+       if (!mlx4_is_slave(dev))
+               bitmap_set(stats_bitmap->bitmap, last_i, NUM_PHY_STATS);
+       last_i += NUM_PHY_STATS;
 }
 
 int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port,
@@ -3630,10 +3634,6 @@ int mlx4_en_reset_config(struct net_device *dev,
                mlx4_en_stop_port(dev, 1);
        }
 
-       en_warn(priv, "Changing device configuration rx filter(%x) rx vlan(%x)\n",
-               ts_config.rx_filter,
-               !!(features & NETIF_F_HW_VLAN_CTAG_RX));
-
        mlx4_en_safe_replace_resources(priv, tmp);
 
        if (DEV_FEATURE_CHANGED(dev, features, NETIF_F_HW_VLAN_CTAG_RX)) {
index 1fa4849..0158b88 100644 (file)
@@ -275,19 +275,31 @@ int mlx4_en_DUMP_ETH_STATS(struct mlx4_en_dev *mdev, u8 port, u8 reset)
                priv->port_stats.xmit_more         += READ_ONCE(ring->xmit_more);
        }
 
-       if (mlx4_is_master(mdev->dev)) {
-               stats->rx_packets = en_stats_adder(&mlx4_en_stats->RTOT_prio_0,
-                                                  &mlx4_en_stats->RTOT_prio_1,
-                                                  NUM_PRIORITIES);
-               stats->tx_packets = en_stats_adder(&mlx4_en_stats->TTOT_prio_0,
-                                                  &mlx4_en_stats->TTOT_prio_1,
-                                                  NUM_PRIORITIES);
-               stats->rx_bytes = en_stats_adder(&mlx4_en_stats->ROCT_prio_0,
-                                                &mlx4_en_stats->ROCT_prio_1,
-                                                NUM_PRIORITIES);
-               stats->tx_bytes = en_stats_adder(&mlx4_en_stats->TOCT_prio_0,
-                                                &mlx4_en_stats->TOCT_prio_1,
-                                                NUM_PRIORITIES);
+       if (!mlx4_is_slave(mdev->dev)) {
+               struct mlx4_en_phy_stats *p_stats = &priv->phy_stats;
+
+               p_stats->rx_packets_phy =
+                       en_stats_adder(&mlx4_en_stats->RTOT_prio_0,
+                                      &mlx4_en_stats->RTOT_prio_1,
+                                      NUM_PRIORITIES);
+               p_stats->tx_packets_phy =
+                       en_stats_adder(&mlx4_en_stats->TTOT_prio_0,
+                                      &mlx4_en_stats->TTOT_prio_1,
+                                      NUM_PRIORITIES);
+               p_stats->rx_bytes_phy =
+                       en_stats_adder(&mlx4_en_stats->ROCT_prio_0,
+                                      &mlx4_en_stats->ROCT_prio_1,
+                                      NUM_PRIORITIES);
+               p_stats->tx_bytes_phy =
+                       en_stats_adder(&mlx4_en_stats->TOCT_prio_0,
+                                      &mlx4_en_stats->TOCT_prio_1,
+                                      NUM_PRIORITIES);
+               if (mlx4_is_master(mdev->dev)) {
+                       stats->rx_packets = p_stats->rx_packets_phy;
+                       stats->tx_packets = p_stats->tx_packets_phy;
+                       stats->rx_bytes = p_stats->rx_bytes_phy;
+                       stats->tx_bytes = p_stats->tx_bytes_phy;
+               }
        }
 
        /* net device stats */
index b4d144e..c2c6bd7 100644 (file)
@@ -649,6 +649,12 @@ static int check_csum(struct mlx4_cqe *cqe, struct sk_buff *skb, void *va,
        return get_fixed_ipv4_csum(hw_checksum, skb, hdr);
 }
 
+#if IS_ENABLED(CONFIG_IPV6)
+#define MLX4_CQE_STATUS_IP_ANY (MLX4_CQE_STATUS_IPV4 | MLX4_CQE_STATUS_IPV6)
+#else
+#define MLX4_CQE_STATUS_IP_ANY (MLX4_CQE_STATUS_IPV4)
+#endif
+
 int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int budget)
 {
        struct mlx4_en_priv *priv = netdev_priv(dev);
@@ -662,12 +668,9 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
        int polled = 0;
        int index;
 
-       if (unlikely(!priv->port_up))
+       if (unlikely(!priv->port_up || budget <= 0))
                return 0;
 
-       if (unlikely(budget <= 0))
-               return polled;
-
        ring = priv->rx_ring[cq_ring];
 
        /* Protect accesses to: ring->xdp_prog, priv->mac_hash list */
@@ -838,12 +841,7 @@ xdp_drop_no_cnt:
                                ring->csum_ok++;
                        } else {
                                if (!(priv->flags & MLX4_EN_FLAG_RX_CSUM_NON_TCP_UDP &&
-                                     (cqe->status & cpu_to_be16(MLX4_CQE_STATUS_IPV4 |
-#if IS_ENABLED(CONFIG_IPV6)
-                                                                MLX4_CQE_STATUS_IPV6))))
-#else
-                                                                0))))
-#endif
+                                     (cqe->status & cpu_to_be16(MLX4_CQE_STATUS_IP_ANY))))
                                        goto csum_none;
                                if (check_csum(cqe, skb, va, dev->features))
                                        goto csum_none;
index f470ae3..f7c8113 100644 (file)
@@ -608,6 +608,7 @@ struct mlx4_en_priv {
        struct mlx4_en_flow_stats_tx tx_flowstats;
        struct mlx4_en_port_stats port_stats;
        struct mlx4_en_xdp_stats xdp_stats;
+       struct mlx4_en_phy_stats phy_stats;
        struct mlx4_en_stats_bitmap stats_bitmap;
        struct list_head mc_list;
        struct list_head curr_list;
index aab28eb..86b6051 100644 (file)
@@ -63,6 +63,14 @@ struct mlx4_en_xdp_stats {
 #define NUM_XDP_STATS          3
 };
 
+struct mlx4_en_phy_stats {
+       unsigned long rx_packets_phy;
+       unsigned long rx_bytes_phy;
+       unsigned long tx_packets_phy;
+       unsigned long tx_bytes_phy;
+#define NUM_PHY_STATS          4
+};
+
 #define NUM_MAIN_STATS 21
 
 #define MLX4_NUM_PRIORITIES    8
@@ -116,7 +124,7 @@ enum {
 
 #define NUM_ALL_STATS  (NUM_MAIN_STATS + NUM_PORT_STATS + NUM_PKT_STATS + \
                         NUM_FLOW_STATS + NUM_PERF_STATS + NUM_PF_STATS + \
-                        NUM_XDP_STATS)
+                        NUM_XDP_STATS + NUM_PHY_STATS)
 
 #define MLX4_FIND_NETDEV_STAT(n) (offsetof(struct net_device_stats, n) / \
                                  sizeof(((struct net_device_stats *)0)->n))
index 17b7232..b994b80 100644 (file)
@@ -337,6 +337,14 @@ void mlx5_unregister_interface(struct mlx5_interface *intf)
 }
 EXPORT_SYMBOL(mlx5_unregister_interface);
 
+void mlx5_reload_interface(struct mlx5_core_dev *mdev, int protocol)
+{
+       mutex_lock(&mlx5_intf_mutex);
+       mlx5_remove_dev_by_protocol(mdev, protocol);
+       mlx5_add_dev_by_protocol(mdev, protocol);
+       mutex_unlock(&mlx5_intf_mutex);
+}
+
 void *mlx5_get_protocol_dev(struct mlx5_core_dev *mdev, int protocol)
 {
        struct mlx5_priv *priv = &mdev->priv;
index 363d8dc..ea4b255 100644 (file)
@@ -1156,6 +1156,15 @@ mlx5e_vport_rep_unload(struct mlx5_eswitch_rep *rep)
        kfree(ppriv); /* mlx5e_rep_priv */
 }
 
+static void *mlx5e_vport_rep_get_proto_dev(struct mlx5_eswitch_rep *rep)
+{
+       struct mlx5e_rep_priv *rpriv;
+
+       rpriv = mlx5e_rep_to_rep_priv(rep);
+
+       return rpriv->netdev;
+}
+
 static void mlx5e_rep_register_vf_vports(struct mlx5e_priv *priv)
 {
        struct mlx5_core_dev *mdev = priv->mdev;
@@ -1168,6 +1177,7 @@ static void mlx5e_rep_register_vf_vports(struct mlx5e_priv *priv)
 
                rep_if.load = mlx5e_vport_rep_load;
                rep_if.unload = mlx5e_vport_rep_unload;
+               rep_if.get_proto_dev = mlx5e_vport_rep_get_proto_dev;
                mlx5_eswitch_register_vport_rep(esw, vport, &rep_if, REP_ETH);
        }
 }
@@ -1195,6 +1205,7 @@ void mlx5e_register_vport_reps(struct mlx5e_priv *priv)
 
        rep_if.load = mlx5e_nic_rep_load;
        rep_if.unload = mlx5e_nic_rep_unload;
+       rep_if.get_proto_dev = mlx5e_vport_rep_get_proto_dev;
        rep_if.priv = rpriv;
        INIT_LIST_HEAD(&rpriv->vport_sqs_list);
        mlx5_eswitch_register_vport_rep(esw, 0, &rep_if, REP_ETH); /* UPLINK PF vport*/
index c2b1d7d..77b7272 100644 (file)
@@ -1619,10 +1619,14 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode)
        esw_info(esw->dev, "E-Switch enable SRIOV: nvfs(%d) mode (%d)\n", nvfs, mode);
        esw->mode = mode;
 
-       if (mode == SRIOV_LEGACY)
+       if (mode == SRIOV_LEGACY) {
                err = esw_create_legacy_fdb_table(esw, nvfs + 1);
-       else
+       } else {
+               mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_IB);
+
                err = esw_offloads_init(esw, nvfs + 1);
+       }
+
        if (err)
                goto abort;
 
@@ -1644,12 +1648,17 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode)
 
 abort:
        esw->mode = SRIOV_NONE;
+
+       if (mode == SRIOV_OFFLOADS)
+               mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_IB);
+
        return err;
 }
 
 void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw)
 {
        struct esw_mc_addr *mc_promisc;
+       int old_mode;
        int nvports;
        int i;
 
@@ -1675,7 +1684,11 @@ void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw)
        else if (esw->mode == SRIOV_OFFLOADS)
                esw_offloads_cleanup(esw, nvports);
 
+       old_mode = esw->mode;
        esw->mode = SRIOV_NONE;
+
+       if (old_mode == SRIOV_OFFLOADS)
+               mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_IB);
 }
 
 int mlx5_eswitch_init(struct mlx5_core_dev *dev)
@@ -2175,3 +2188,9 @@ free_out:
        kvfree(out);
        return err;
 }
+
+u8 mlx5_eswitch_mode(struct mlx5_eswitch *esw)
+{
+       return esw->mode;
+}
+EXPORT_SYMBOL_GPL(mlx5_eswitch_mode);
index 2fa0370..98d2177 100644 (file)
 #include <linux/if_link.h>
 #include <net/devlink.h>
 #include <linux/mlx5/device.h>
+#include <linux/mlx5/eswitch.h>
 #include "lib/mpfs.h"
 
-enum {
-       SRIOV_NONE,
-       SRIOV_LEGACY,
-       SRIOV_OFFLOADS
-};
-
-enum {
-       REP_ETH,
-       NUM_REP_TYPES,
-};
-
 #ifdef CONFIG_MLX5_ESWITCH
 
 #define MLX5_MAX_UC_PER_VPORT(dev) \
@@ -139,29 +129,13 @@ struct mlx5_eswitch_fdb {
                        struct mlx5_flow_table *fdb;
                        struct mlx5_flow_group *send_to_vport_grp;
                        struct mlx5_flow_group *miss_grp;
-                       struct mlx5_flow_handle *miss_rule;
+                       struct mlx5_flow_handle *miss_rule_uni;
+                       struct mlx5_flow_handle *miss_rule_multi;
                        int vlan_push_pop_refcount;
                } offloads;
        };
 };
 
-struct mlx5_eswitch_rep;
-struct mlx5_eswitch_rep_if {
-       int                    (*load)(struct mlx5_core_dev *dev,
-                                      struct mlx5_eswitch_rep *rep);
-       void                   (*unload)(struct mlx5_eswitch_rep *rep);
-       void                    *priv;
-       bool                   valid;
-};
-
-struct mlx5_eswitch_rep {
-       struct mlx5_eswitch_rep_if rep_if[NUM_REP_TYPES];
-       u16                    vport;
-       u8                     hw_id[ETH_ALEN];
-       u16                    vlan;
-       u32                    vlan_refcount;
-};
-
 struct mlx5_esw_offload {
        struct mlx5_flow_table *ft_offloads;
        struct mlx5_flow_group *vport_rx_group;
@@ -231,9 +205,6 @@ int mlx5_eswitch_get_vport_config(struct mlx5_eswitch *esw,
 int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw,
                                 int vport,
                                 struct ifla_vf_stats *vf_stats);
-struct mlx5_flow_handle *
-mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw, int vport,
-                                   u32 sqn);
 void mlx5_eswitch_del_send_to_vport_rule(struct mlx5_flow_handle *rule);
 
 struct mlx5_flow_spec;
@@ -278,13 +249,6 @@ int mlx5_devlink_eswitch_inline_mode_get(struct devlink *devlink, u8 *mode);
 int mlx5_eswitch_inline_mode_get(struct mlx5_eswitch *esw, int nvfs, u8 *mode);
 int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, u8 encap);
 int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink, u8 *encap);
-void mlx5_eswitch_register_vport_rep(struct mlx5_eswitch *esw,
-                                    int vport_index,
-                                    struct mlx5_eswitch_rep_if *rep_if,
-                                    u8 rep_type);
-void mlx5_eswitch_unregister_vport_rep(struct mlx5_eswitch *esw,
-                                      int vport_index,
-                                      u8 rep_type);
 void *mlx5_eswitch_get_uplink_priv(struct mlx5_eswitch *esw, u8 rep_type);
 
 int mlx5_eswitch_add_vlan_action(struct mlx5_eswitch *esw,
index 99f583a..0a8303c 100644 (file)
@@ -338,6 +338,7 @@ out:
        kvfree(spec);
        return flow_rule;
 }
+EXPORT_SYMBOL(mlx5_eswitch_add_send_to_vport_rule);
 
 void mlx5_eswitch_del_send_to_vport_rule(struct mlx5_flow_handle *rule)
 {
@@ -350,7 +351,11 @@ static int esw_add_fdb_miss_rule(struct mlx5_eswitch *esw)
        struct mlx5_flow_destination dest = {};
        struct mlx5_flow_handle *flow_rule = NULL;
        struct mlx5_flow_spec *spec;
+       void *headers_c;
+       void *headers_v;
        int err = 0;
+       u8 *dmac_c;
+       u8 *dmac_v;
 
        spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
        if (!spec) {
@@ -358,6 +363,13 @@ static int esw_add_fdb_miss_rule(struct mlx5_eswitch *esw)
                goto out;
        }
 
+       spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+       headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+                                outer_headers);
+       dmac_c = MLX5_ADDR_OF(fte_match_param, headers_c,
+                             outer_headers.dmac_47_16);
+       dmac_c[0] = 0x01;
+
        dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
        dest.vport_num = 0;
        flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
@@ -366,11 +378,28 @@ static int esw_add_fdb_miss_rule(struct mlx5_eswitch *esw)
                                        &flow_act, &dest, 1);
        if (IS_ERR(flow_rule)) {
                err = PTR_ERR(flow_rule);
-               esw_warn(esw->dev,  "FDB: Failed to add miss flow rule err %d\n", err);
+               esw_warn(esw->dev,  "FDB: Failed to add unicast miss flow rule err %d\n", err);
                goto out;
        }
 
-       esw->fdb_table.offloads.miss_rule = flow_rule;
+       esw->fdb_table.offloads.miss_rule_uni = flow_rule;
+
+       headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+                                outer_headers);
+       dmac_v = MLX5_ADDR_OF(fte_match_param, headers_v,
+                             outer_headers.dmac_47_16);
+       dmac_v[0] = 0x01;
+       flow_rule = mlx5_add_flow_rules(esw->fdb_table.offloads.fdb, spec,
+                                       &flow_act, &dest, 1);
+       if (IS_ERR(flow_rule)) {
+               err = PTR_ERR(flow_rule);
+               esw_warn(esw->dev, "FDB: Failed to add multicast miss flow rule err %d\n", err);
+               mlx5_del_flow_rules(esw->fdb_table.offloads.miss_rule_uni);
+               goto out;
+       }
+
+       esw->fdb_table.offloads.miss_rule_multi = flow_rule;
+
 out:
        kvfree(spec);
        return err;
@@ -426,6 +455,7 @@ static void esw_destroy_offloads_fast_fdb_table(struct mlx5_eswitch *esw)
 }
 
 #define MAX_PF_SQ 256
+#define MAX_SQ_NVPORTS 32
 
 static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports)
 {
@@ -438,6 +468,7 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports)
        struct mlx5_flow_group *g;
        void *match_criteria;
        u32 *flow_group_in;
+       u8 *dmac;
 
        esw_debug(esw->dev, "Create offloads FDB Tables\n");
        flow_group_in = kvzalloc(inlen, GFP_KERNEL);
@@ -455,7 +486,7 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports)
        if (err)
                goto fast_fdb_err;
 
-       table_size = nvports + MAX_PF_SQ + 1;
+       table_size = nvports * MAX_SQ_NVPORTS + MAX_PF_SQ + 2;
 
        ft_attr.max_fte = table_size;
        ft_attr.prio = FDB_SLOW_PATH;
@@ -478,7 +509,7 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports)
        MLX5_SET_TO_ONES(fte_match_param, match_criteria, misc_parameters.source_sqn);
        MLX5_SET_TO_ONES(fte_match_param, match_criteria, misc_parameters.source_port);
 
-       ix = nvports + MAX_PF_SQ;
+       ix = nvports * MAX_SQ_NVPORTS + MAX_PF_SQ;
        MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0);
        MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, ix - 1);
 
@@ -492,10 +523,16 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports)
 
        /* create miss group */
        memset(flow_group_in, 0, inlen);
-       MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, 0);
+       MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
+                MLX5_MATCH_OUTER_HEADERS);
+       match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in,
+                                     match_criteria);
+       dmac = MLX5_ADDR_OF(fte_match_param, match_criteria,
+                           outer_headers.dmac_47_16);
+       dmac[0] = 0x01;
 
        MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, ix);
-       MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, ix + 1);
+       MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, ix + 2);
 
        g = mlx5_create_flow_group(fdb, flow_group_in);
        if (IS_ERR(g)) {
@@ -531,7 +568,8 @@ static void esw_destroy_offloads_fdb_tables(struct mlx5_eswitch *esw)
                return;
 
        esw_debug(esw->dev, "Destroy offloads FDB Tables\n");
-       mlx5_del_flow_rules(esw->fdb_table.offloads.miss_rule);
+       mlx5_del_flow_rules(esw->fdb_table.offloads.miss_rule_multi);
+       mlx5_del_flow_rules(esw->fdb_table.offloads.miss_rule_uni);
        mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_grp);
        mlx5_destroy_flow_group(esw->fdb_table.offloads.miss_grp);
 
@@ -789,14 +827,9 @@ int esw_offloads_init(struct mlx5_eswitch *esw, int nvports)
 {
        int err;
 
-       /* disable PF RoCE so missed packets don't go through RoCE steering */
-       mlx5_dev_list_lock();
-       mlx5_remove_dev_by_protocol(esw->dev, MLX5_INTERFACE_PROTOCOL_IB);
-       mlx5_dev_list_unlock();
-
        err = esw_create_offloads_fdb_tables(esw, nvports);
        if (err)
-               goto create_fdb_err;
+               return err;
 
        err = esw_create_offloads_table(esw);
        if (err)
@@ -821,12 +854,6 @@ create_fg_err:
 create_ft_err:
        esw_destroy_offloads_fdb_tables(esw);
 
-create_fdb_err:
-       /* enable back PF RoCE */
-       mlx5_dev_list_lock();
-       mlx5_add_dev_by_protocol(esw->dev, MLX5_INTERFACE_PROTOCOL_IB);
-       mlx5_dev_list_unlock();
-
        return err;
 }
 
@@ -844,9 +871,7 @@ static int esw_offloads_stop(struct mlx5_eswitch *esw)
        }
 
        /* enable back PF RoCE */
-       mlx5_dev_list_lock();
-       mlx5_add_dev_by_protocol(esw->dev, MLX5_INTERFACE_PROTOCOL_IB);
-       mlx5_dev_list_unlock();
+       mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_IB);
 
        return err;
 }
@@ -1160,10 +1185,12 @@ void mlx5_eswitch_register_vport_rep(struct mlx5_eswitch *esw,
 
        rep_if->load   = __rep_if->load;
        rep_if->unload = __rep_if->unload;
+       rep_if->get_proto_dev = __rep_if->get_proto_dev;
        rep_if->priv = __rep_if->priv;
 
        rep_if->valid = true;
 }
+EXPORT_SYMBOL(mlx5_eswitch_register_vport_rep);
 
 void mlx5_eswitch_unregister_vport_rep(struct mlx5_eswitch *esw,
                                       int vport_index, u8 rep_type)
@@ -1178,6 +1205,7 @@ void mlx5_eswitch_unregister_vport_rep(struct mlx5_eswitch *esw,
 
        rep->rep_if[rep_type].valid = false;
 }
+EXPORT_SYMBOL(mlx5_eswitch_unregister_vport_rep);
 
 void *mlx5_eswitch_get_uplink_priv(struct mlx5_eswitch *esw, u8 rep_type)
 {
@@ -1188,3 +1216,35 @@ void *mlx5_eswitch_get_uplink_priv(struct mlx5_eswitch *esw, u8 rep_type)
        rep = &offloads->vport_reps[UPLINK_REP_INDEX];
        return rep->rep_if[rep_type].priv;
 }
+
+void *mlx5_eswitch_get_proto_dev(struct mlx5_eswitch *esw,
+                                int vport,
+                                u8 rep_type)
+{
+       struct mlx5_esw_offload *offloads = &esw->offloads;
+       struct mlx5_eswitch_rep *rep;
+
+       if (vport == FDB_UPLINK_VPORT)
+               vport = UPLINK_REP_INDEX;
+
+       rep = &offloads->vport_reps[vport];
+
+       if (rep->rep_if[rep_type].valid &&
+           rep->rep_if[rep_type].get_proto_dev)
+               return rep->rep_if[rep_type].get_proto_dev(rep);
+       return NULL;
+}
+EXPORT_SYMBOL(mlx5_eswitch_get_proto_dev);
+
+void *mlx5_eswitch_uplink_get_proto_dev(struct mlx5_eswitch *esw, u8 rep_type)
+{
+       return mlx5_eswitch_get_proto_dev(esw, UPLINK_REP_INDEX, rep_type);
+}
+EXPORT_SYMBOL(mlx5_eswitch_uplink_get_proto_dev);
+
+struct mlx5_eswitch_rep *mlx5_eswitch_vport_rep(struct mlx5_eswitch *esw,
+                                               int vport)
+{
+       return &esw->offloads.vport_reps[vport];
+}
+EXPORT_SYMBOL(mlx5_eswitch_vport_rep);
index 23e17ac..4e25f2b 100644 (file)
 #define DRIVER_NAME "mlx5_core"
 #define DRIVER_VERSION "5.0-0"
 
-#define MLX5_TOTAL_VPORTS(mdev) (1 + pci_sriov_get_totalvfs(mdev->pdev))
-#define MLX5_VPORT_MANAGER(mdev) \
-       (MLX5_CAP_GEN(mdev, vport_group_manager) && \
-       (MLX5_CAP_GEN(mdev, port_type) == MLX5_CAP_PORT_TYPE_ETH) && \
-        mlx5_core_is_pf(mdev))
-
 extern uint mlx5_core_debug_mask;
 
 #define mlx5_core_dbg(__dev, format, ...)                              \
@@ -207,4 +201,5 @@ static inline int mlx5_lag_is_lacp_owner(struct mlx5_core_dev *dev)
 int mlx5_lag_allow(struct mlx5_core_dev *dev);
 int mlx5_lag_forbid(struct mlx5_core_dev *dev);
 
+void mlx5_reload_interface(struct mlx5_core_dev *mdev, int protocol);
 #endif /* __MLX5_CORE_H__ */
index d56eea3..93d97b4 100644 (file)
@@ -78,6 +78,10 @@ config MLXSW_SPECTRUM
        depends on IPV6 || IPV6=n
        select PARMAN
        select MLXFW
+       depends on NET_IPGRE
+       depends on !(MLXSW_CORE=y && NET_IPGRE=m)
+       depends on IPV6_GRE
+       depends on !(MLXSW_CORE=y && IPV6_GRE=m)
        default m
        ---help---
          This driver supports Mellanox Technologies Spectrum Ethernet
index b698fb4..ba33842 100644 (file)
@@ -1,6 +1,6 @@
 /*
  * drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c
- * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2017, 2018 Mellanox Technologies. All rights reserved.
  * Copyright (c) 2017 Jiri Pirko <jiri@mellanox.com>
  *
  * Redistribution and use in source and binary forms, with or without
@@ -838,7 +838,6 @@ struct mlxsw_afa_mirror {
        struct mlxsw_afa_resource resource;
        int span_id;
        u8 local_in_port;
-       u8 local_out_port;
        bool ingress;
 };
 
@@ -848,7 +847,7 @@ mlxsw_afa_mirror_destroy(struct mlxsw_afa_block *block,
 {
        block->afa->ops->mirror_del(block->afa->ops_priv,
                                    mirror->local_in_port,
-                                   mirror->local_out_port,
+                                   mirror->span_id,
                                    mirror->ingress);
        kfree(mirror);
 }
@@ -864,9 +863,8 @@ mlxsw_afa_mirror_destructor(struct mlxsw_afa_block *block,
 }
 
 static struct mlxsw_afa_mirror *
-mlxsw_afa_mirror_create(struct mlxsw_afa_block *block,
-                       u8 local_in_port, u8 local_out_port,
-                       bool ingress)
+mlxsw_afa_mirror_create(struct mlxsw_afa_block *block, u8 local_in_port,
+                       const struct net_device *out_dev, bool ingress)
 {
        struct mlxsw_afa_mirror *mirror;
        int err;
@@ -876,13 +874,12 @@ mlxsw_afa_mirror_create(struct mlxsw_afa_block *block,
                return ERR_PTR(-ENOMEM);
 
        err = block->afa->ops->mirror_add(block->afa->ops_priv,
-                                         local_in_port, local_out_port,
+                                         local_in_port, out_dev,
                                          ingress, &mirror->span_id);
        if (err)
                goto err_mirror_add;
 
        mirror->ingress = ingress;
-       mirror->local_out_port = local_out_port;
        mirror->local_in_port = local_in_port;
        mirror->resource.destructor = mlxsw_afa_mirror_destructor;
        mlxsw_afa_resource_add(block, &mirror->resource);
@@ -909,13 +906,13 @@ mlxsw_afa_block_append_allocated_mirror(struct mlxsw_afa_block *block,
 }
 
 int
-mlxsw_afa_block_append_mirror(struct mlxsw_afa_block *block,
-                             u8 local_in_port, u8 local_out_port, bool ingress)
+mlxsw_afa_block_append_mirror(struct mlxsw_afa_block *block, u8 local_in_port,
+                             const struct net_device *out_dev, bool ingress)
 {
        struct mlxsw_afa_mirror *mirror;
        int err;
 
-       mirror = mlxsw_afa_mirror_create(block, local_in_port, local_out_port,
+       mirror = mlxsw_afa_mirror_create(block, local_in_port, out_dev,
                                         ingress);
        if (IS_ERR(mirror))
                return PTR_ERR(mirror);
index 4313229..6dd6017 100644 (file)
@@ -36,6 +36,7 @@
 #define _MLXSW_CORE_ACL_FLEX_ACTIONS_H
 
 #include <linux/types.h>
+#include <linux/netdevice.h>
 
 struct mlxsw_afa;
 struct mlxsw_afa_block;
@@ -48,9 +49,10 @@ struct mlxsw_afa_ops {
        void (*kvdl_fwd_entry_del)(void *priv, u32 kvdl_index);
        int (*counter_index_get)(void *priv, unsigned int *p_counter_index);
        void (*counter_index_put)(void *priv, unsigned int counter_index);
-       int (*mirror_add)(void *priv, u8 locol_in_port, u8 local_out_port,
+       int (*mirror_add)(void *priv, u8 local_in_port,
+                         const struct net_device *out_dev,
                          bool ingress, int *p_span_id);
-       void (*mirror_del)(void *priv, u8 locol_in_port, u8 local_out_port,
+       void (*mirror_del)(void *priv, u8 local_in_port, int span_id,
                           bool ingress);
 };
 
@@ -70,7 +72,8 @@ int mlxsw_afa_block_append_trap(struct mlxsw_afa_block *block, u16 trap_id);
 int mlxsw_afa_block_append_trap_and_forward(struct mlxsw_afa_block *block,
                                            u16 trap_id);
 int mlxsw_afa_block_append_mirror(struct mlxsw_afa_block *block,
-                                 u8 local_in_port, u8 local_out_port,
+                                 u8 local_in_port,
+                                 const struct net_device *out_dev,
                                  bool ingress);
 int mlxsw_afa_block_append_fwd(struct mlxsw_afa_block *block,
                               u8 local_port, bool in_port);
index 0e08be4..cb5f77f 100644 (file)
@@ -1,11 +1,11 @@
 /*
  * drivers/net/ethernet/mellanox/mlxsw/reg.h
- * Copyright (c) 2015-2017 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2015-2018 Mellanox Technologies. All rights reserved.
  * Copyright (c) 2015-2016 Ido Schimmel <idosch@mellanox.com>
  * Copyright (c) 2015 Elad Raz <eladr@mellanox.com>
  * Copyright (c) 2015-2017 Jiri Pirko <jiri@mellanox.com>
  * Copyright (c) 2016 Yotam Gigi <yotamg@mellanox.com>
- * Copyright (c) 2017 Petr Machata <petrm@mellanox.com>
+ * Copyright (c) 2017-2018 Petr Machata <petrm@mellanox.com>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -6772,8 +6772,104 @@ MLXSW_ITEM32(reg, mpat, qos, 0x04, 26, 1);
  */
 MLXSW_ITEM32(reg, mpat, be, 0x04, 25, 1);
 
+enum mlxsw_reg_mpat_span_type {
+       /* Local SPAN Ethernet.
+        * The original packet is not encapsulated.
+        */
+       MLXSW_REG_MPAT_SPAN_TYPE_LOCAL_ETH = 0x0,
+
+       /* Encapsulated Remote SPAN Ethernet L3 GRE.
+        * The packet is encapsulated with GRE header.
+        */
+       MLXSW_REG_MPAT_SPAN_TYPE_REMOTE_ETH_L3 = 0x3,
+};
+
+/* reg_mpat_span_type
+ * SPAN type.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mpat, span_type, 0x04, 0, 4);
+
+/* Remote SPAN - Ethernet VLAN
+ * - - - - - - - - - - - - - -
+ */
+
+/* reg_mpat_eth_rspan_vid
+ * Encapsulation header VLAN ID.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mpat, eth_rspan_vid, 0x18, 0, 12);
+
+/* Encapsulated Remote SPAN - Ethernet L2
+ * - - - - - - - - - - - - - - - - - - -
+ */
+
+enum mlxsw_reg_mpat_eth_rspan_version {
+       MLXSW_REG_MPAT_ETH_RSPAN_VERSION_NO_HEADER = 15,
+};
+
+/* reg_mpat_eth_rspan_version
+ * RSPAN mirror header version.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mpat, eth_rspan_version, 0x10, 18, 4);
+
+/* reg_mpat_eth_rspan_mac
+ * Destination MAC address.
+ * Access: RW
+ */
+MLXSW_ITEM_BUF(reg, mpat, eth_rspan_mac, 0x12, 6);
+
+/* reg_mpat_eth_rspan_tp
+ * Tag Packet. Indicates whether the mirroring header should be VLAN tagged.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mpat, eth_rspan_tp, 0x18, 16, 1);
+
+/* Encapsulated Remote SPAN - Ethernet L3
+ * - - - - - - - - - - - - - - - - - - -
+ */
+
+enum mlxsw_reg_mpat_eth_rspan_protocol {
+       MLXSW_REG_MPAT_ETH_RSPAN_PROTOCOL_IPV4,
+       MLXSW_REG_MPAT_ETH_RSPAN_PROTOCOL_IPV6,
+};
+
+/* reg_mpat_eth_rspan_protocol
+ * SPAN encapsulation protocol.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mpat, eth_rspan_protocol, 0x18, 24, 4);
+
+/* reg_mpat_eth_rspan_ttl
+ * Encapsulation header Time-to-Live/HopLimit.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mpat, eth_rspan_ttl, 0x1C, 4, 8);
+
+/* reg_mpat_eth_rspan_smac
+ * Source MAC address
+ * Access: RW
+ */
+MLXSW_ITEM_BUF(reg, mpat, eth_rspan_smac, 0x22, 6);
+
+/* reg_mpat_eth_rspan_dip*
+ * Destination IP address. The IP version is configured by protocol.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mpat, eth_rspan_dip4, 0x4C, 0, 32);
+MLXSW_ITEM_BUF(reg, mpat, eth_rspan_dip6, 0x40, 16);
+
+/* reg_mpat_eth_rspan_sip*
+ * Source IP address. The IP version is configured by protocol.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mpat, eth_rspan_sip4, 0x5C, 0, 32);
+MLXSW_ITEM_BUF(reg, mpat, eth_rspan_sip6, 0x50, 16);
+
 static inline void mlxsw_reg_mpat_pack(char *payload, u8 pa_id,
-                                      u16 system_port, bool e)
+                                      u16 system_port, bool e,
+                                      enum mlxsw_reg_mpat_span_type span_type)
 {
        MLXSW_REG_ZERO(mpat, payload);
        mlxsw_reg_mpat_pa_id_set(payload, pa_id);
@@ -6781,6 +6877,49 @@ static inline void mlxsw_reg_mpat_pack(char *payload, u8 pa_id,
        mlxsw_reg_mpat_e_set(payload, e);
        mlxsw_reg_mpat_qos_set(payload, 1);
        mlxsw_reg_mpat_be_set(payload, 1);
+       mlxsw_reg_mpat_span_type_set(payload, span_type);
+}
+
+static inline void mlxsw_reg_mpat_eth_rspan_pack(char *payload, u16 vid)
+{
+       mlxsw_reg_mpat_eth_rspan_vid_set(payload, vid);
+}
+
+static inline void
+mlxsw_reg_mpat_eth_rspan_l2_pack(char *payload,
+                                enum mlxsw_reg_mpat_eth_rspan_version version,
+                                const char *mac,
+                                bool tp)
+{
+       mlxsw_reg_mpat_eth_rspan_version_set(payload, version);
+       mlxsw_reg_mpat_eth_rspan_mac_memcpy_to(payload, mac);
+       mlxsw_reg_mpat_eth_rspan_tp_set(payload, tp);
+}
+
+static inline void
+mlxsw_reg_mpat_eth_rspan_l3_ipv4_pack(char *payload, u8 ttl,
+                                     const char *smac,
+                                     u32 sip, u32 dip)
+{
+       mlxsw_reg_mpat_eth_rspan_ttl_set(payload, ttl);
+       mlxsw_reg_mpat_eth_rspan_smac_memcpy_to(payload, smac);
+       mlxsw_reg_mpat_eth_rspan_protocol_set(payload,
+                                   MLXSW_REG_MPAT_ETH_RSPAN_PROTOCOL_IPV4);
+       mlxsw_reg_mpat_eth_rspan_sip4_set(payload, sip);
+       mlxsw_reg_mpat_eth_rspan_dip4_set(payload, dip);
+}
+
+static inline void
+mlxsw_reg_mpat_eth_rspan_l3_ipv6_pack(char *payload, u8 ttl,
+                                     const char *smac,
+                                     struct in6_addr sip, struct in6_addr dip)
+{
+       mlxsw_reg_mpat_eth_rspan_ttl_set(payload, ttl);
+       mlxsw_reg_mpat_eth_rspan_smac_memcpy_to(payload, smac);
+       mlxsw_reg_mpat_eth_rspan_protocol_set(payload,
+                                   MLXSW_REG_MPAT_ETH_RSPAN_PROTOCOL_IPV6);
+       mlxsw_reg_mpat_eth_rspan_sip6_memcpy_to(payload, (void *)&sip);
+       mlxsw_reg_mpat_eth_rspan_dip6_memcpy_to(payload, (void *)&dip);
 }
 
 /* MPAR - Monitoring Port Analyzer Register
index bfde939..7c6204f 100644 (file)
@@ -1,6 +1,6 @@
 /*
  * drivers/net/ethernet/mellanox/mlxsw/spectrum.c
- * Copyright (c) 2015-2017 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2015-2018 Mellanox Technologies. All rights reserved.
  * Copyright (c) 2015-2017 Jiri Pirko <jiri@mellanox.com>
  * Copyright (c) 2015 Ido Schimmel <idosch@mellanox.com>
  * Copyright (c) 2015 Elad Raz <eladr@mellanox.com>
@@ -1040,6 +1040,16 @@ mlxsw_sp_port_get_hw_xstats(struct net_device *dev,
                xstats->tail_drop[i] =
                        mlxsw_reg_ppcnt_tc_no_buffer_discard_uc_get(ppcnt_pl);
        }
+
+       for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
+               err = mlxsw_sp_port_get_stats_raw(dev, MLXSW_REG_PPCNT_PRIO_CNT,
+                                                 i, ppcnt_pl);
+               if (err)
+                       continue;
+
+               xstats->tx_packets[i] = mlxsw_reg_ppcnt_tx_frames_get(ppcnt_pl);
+               xstats->tx_bytes[i] = mlxsw_reg_ppcnt_tx_octets_get(ppcnt_pl);
+       }
 }
 
 static void update_stats_cache(struct work_struct *work)
@@ -1258,7 +1268,6 @@ mlxsw_sp_port_add_cls_matchall_mirror(struct mlxsw_sp_port *mlxsw_sp_port,
                                      bool ingress)
 {
        enum mlxsw_sp_span_type span_type;
-       struct mlxsw_sp_port *to_port;
        struct net_device *to_dev;
 
        to_dev = tcf_mirred_dev(a);
@@ -1267,17 +1276,10 @@ mlxsw_sp_port_add_cls_matchall_mirror(struct mlxsw_sp_port *mlxsw_sp_port,
                return -EINVAL;
        }
 
-       if (!mlxsw_sp_port_dev_check(to_dev)) {
-               netdev_err(mlxsw_sp_port->dev, "Cannot mirror to a non-spectrum port");
-               return -EOPNOTSUPP;
-       }
-       to_port = netdev_priv(to_dev);
-
-       mirror->to_local_port = to_port->local_port;
        mirror->ingress = ingress;
        span_type = ingress ? MLXSW_SP_SPAN_INGRESS : MLXSW_SP_SPAN_EGRESS;
-       return mlxsw_sp_span_mirror_add(mlxsw_sp_port, to_port, span_type,
-                                       true);
+       return mlxsw_sp_span_mirror_add(mlxsw_sp_port, to_dev, span_type,
+                                       true, &mirror->span_id);
 }
 
 static void
@@ -1288,7 +1290,7 @@ mlxsw_sp_port_del_cls_matchall_mirror(struct mlxsw_sp_port *mlxsw_sp_port,
 
        span_type = mirror->ingress ?
                        MLXSW_SP_SPAN_INGRESS : MLXSW_SP_SPAN_EGRESS;
-       mlxsw_sp_span_mirror_del(mlxsw_sp_port, mirror->to_local_port,
+       mlxsw_sp_span_mirror_del(mlxsw_sp_port, mirror->span_id,
                                 span_type, true);
 }
 
@@ -3675,14 +3677,24 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core,
                goto err_afa_init;
        }
 
+       err = mlxsw_sp_span_init(mlxsw_sp);
+       if (err) {
+               dev_err(mlxsw_sp->bus_info->dev, "Failed to init span system\n");
+               goto err_span_init;
+       }
+
+       /* Initialize router after SPAN is initialized, so that the FIB and
+        * neighbor event handlers can issue SPAN respin.
+        */
        err = mlxsw_sp_router_init(mlxsw_sp);
        if (err) {
                dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize router\n");
                goto err_router_init;
        }
 
-       /* Initialize netdevice notifier after router is initialized, so that
-        * the event handler can use router structures.
+       /* Initialize netdevice notifier after router and SPAN is initialized,
+        * so that the event handler can use router structures and call SPAN
+        * respin.
         */
        mlxsw_sp->netdevice_nb.notifier_call = mlxsw_sp_netdevice_event;
        err = register_netdevice_notifier(&mlxsw_sp->netdevice_nb);
@@ -3691,12 +3703,6 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core,
                goto err_netdev_notifier;
        }
 
-       err = mlxsw_sp_span_init(mlxsw_sp);
-       if (err) {
-               dev_err(mlxsw_sp->bus_info->dev, "Failed to init span system\n");
-               goto err_span_init;
-       }
-
        err = mlxsw_sp_acl_init(mlxsw_sp);
        if (err) {
                dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize ACL\n");
@@ -3722,12 +3728,12 @@ err_ports_create:
 err_dpipe_init:
        mlxsw_sp_acl_fini(mlxsw_sp);
 err_acl_init:
-       mlxsw_sp_span_fini(mlxsw_sp);
-err_span_init:
        unregister_netdevice_notifier(&mlxsw_sp->netdevice_nb);
 err_netdev_notifier:
        mlxsw_sp_router_fini(mlxsw_sp);
 err_router_init:
+       mlxsw_sp_span_fini(mlxsw_sp);
+err_span_init:
        mlxsw_sp_afa_fini(mlxsw_sp);
 err_afa_init:
        mlxsw_sp_counter_pool_fini(mlxsw_sp);
@@ -3753,9 +3759,9 @@ static void mlxsw_sp_fini(struct mlxsw_core *mlxsw_core)
        mlxsw_sp_ports_remove(mlxsw_sp);
        mlxsw_sp_dpipe_fini(mlxsw_sp);
        mlxsw_sp_acl_fini(mlxsw_sp);
-       mlxsw_sp_span_fini(mlxsw_sp);
        unregister_netdevice_notifier(&mlxsw_sp->netdevice_nb);
        mlxsw_sp_router_fini(mlxsw_sp);
+       mlxsw_sp_span_fini(mlxsw_sp);
        mlxsw_sp_afa_fini(mlxsw_sp);
        mlxsw_sp_counter_pool_fini(mlxsw_sp);
        mlxsw_sp_switchdev_fini(mlxsw_sp);
@@ -4639,10 +4645,18 @@ static int mlxsw_sp_netdevice_event(struct notifier_block *nb,
                                    unsigned long event, void *ptr)
 {
        struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+       struct mlxsw_sp_span_entry *span_entry;
        struct mlxsw_sp *mlxsw_sp;
        int err = 0;
 
        mlxsw_sp = container_of(nb, struct mlxsw_sp, netdevice_nb);
+       if (event == NETDEV_UNREGISTER) {
+               span_entry = mlxsw_sp_span_entry_find_by_port(mlxsw_sp, dev);
+               if (span_entry)
+                       mlxsw_sp_span_entry_invalidate(mlxsw_sp, span_entry);
+       }
+       mlxsw_sp_span_respin(mlxsw_sp);
+
        if (mlxsw_sp_netdev_is_ipip_ol(mlxsw_sp, dev))
                err = mlxsw_sp_netdevice_ipip_ol_event(mlxsw_sp, dev,
                                                       event, ptr);
index 675e03a..d5e711d 100644 (file)
@@ -124,7 +124,7 @@ enum mlxsw_sp_port_mall_action_type {
 };
 
 struct mlxsw_sp_port_mall_mirror_tc_entry {
-       u8 to_local_port;
+       int span_id;
        bool ingress;
 };
 
@@ -210,6 +210,8 @@ struct mlxsw_sp_port_xstats {
        u64 wred_drop[TC_MAX_QUEUE];
        u64 tail_drop[TC_MAX_QUEUE];
        u64 backlog[TC_MAX_QUEUE];
+       u64 tx_bytes[IEEE_8021QAZ_MAX_TCS];
+       u64 tx_packets[IEEE_8021QAZ_MAX_TCS];
 };
 
 struct mlxsw_sp_port {
@@ -247,6 +249,7 @@ struct mlxsw_sp_port {
        struct mlxsw_sp_port_sample *sample;
        struct list_head vlans_list;
        struct mlxsw_sp_qdisc *root_qdisc;
+       struct mlxsw_sp_qdisc *tclass_qdiscs;
        unsigned acl_rule_count;
        struct mlxsw_sp_acl_block *ing_acl_block;
        struct mlxsw_sp_acl_block *eg_acl_block;
index 0897a54..21ed27a 100644 (file)
@@ -572,7 +572,6 @@ int mlxsw_sp_acl_rulei_act_mirror(struct mlxsw_sp *mlxsw_sp,
                                  struct net_device *out_dev)
 {
        struct mlxsw_sp_acl_block_binding *binding;
-       struct mlxsw_sp_port *out_port;
        struct mlxsw_sp_port *in_port;
 
        if (!list_is_singular(&block->binding_list))
@@ -581,16 +580,10 @@ int mlxsw_sp_acl_rulei_act_mirror(struct mlxsw_sp *mlxsw_sp,
        binding = list_first_entry(&block->binding_list,
                                   struct mlxsw_sp_acl_block_binding, list);
        in_port = binding->mlxsw_sp_port;
-       if (!mlxsw_sp_port_dev_check(out_dev))
-               return -EINVAL;
-
-       out_port = netdev_priv(out_dev);
-       if (out_port->mlxsw_sp != mlxsw_sp)
-               return -EINVAL;
 
        return mlxsw_afa_block_append_mirror(rulei->act_block,
                                             in_port->local_port,
-                                            out_port->local_port,
+                                            out_dev,
                                             binding->ingress);
 }
 
index f7e61ce..510ce48 100644 (file)
@@ -1,6 +1,6 @@
 /*
  * drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.c
- * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2017, 2018 Mellanox Technologies. All rights reserved.
  * Copyright (c) 2017 Jiri Pirko <jiri@mellanox.com>
  * Copyright (c) 2017 Yotam Gigi <yotamg@mellanox.com>
  *
@@ -126,40 +126,23 @@ mlxsw_sp_act_counter_index_put(void *priv, unsigned int counter_index)
 }
 
 static int
-mlxsw_sp_act_mirror_add(void *priv, u8 local_in_port, u8 local_out_port,
+mlxsw_sp_act_mirror_add(void *priv, u8 local_in_port,
+                       const struct net_device *out_dev,
                        bool ingress, int *p_span_id)
 {
-       struct mlxsw_sp_port *in_port, *out_port;
-       struct mlxsw_sp_span_entry *span_entry;
+       struct mlxsw_sp_port *in_port;
        struct mlxsw_sp *mlxsw_sp = priv;
        enum mlxsw_sp_span_type type;
-       int err;
 
        type = ingress ? MLXSW_SP_SPAN_INGRESS : MLXSW_SP_SPAN_EGRESS;
-       out_port = mlxsw_sp->ports[local_out_port];
        in_port = mlxsw_sp->ports[local_in_port];
 
-       err = mlxsw_sp_span_mirror_add(in_port, out_port, type, false);
-       if (err)
-               return err;
-
-       span_entry = mlxsw_sp_span_entry_find(mlxsw_sp, local_out_port);
-       if (!span_entry) {
-               err = -ENOENT;
-               goto err_span_entry_find;
-       }
-
-       *p_span_id = span_entry->id;
-       return 0;
-
-err_span_entry_find:
-       mlxsw_sp_span_mirror_del(in_port, local_out_port, type, false);
-       return err;
+       return mlxsw_sp_span_mirror_add(in_port, out_dev, type,
+                                       false, p_span_id);
 }
 
 static void
-mlxsw_sp_act_mirror_del(void *priv, u8 local_in_port, u8 local_out_port,
-                       bool ingress)
+mlxsw_sp_act_mirror_del(void *priv, u8 local_in_port, int span_id, bool ingress)
 {
        struct mlxsw_sp *mlxsw_sp = priv;
        struct mlxsw_sp_port *in_port;
@@ -168,7 +151,7 @@ mlxsw_sp_act_mirror_del(void *priv, u8 local_in_port, u8 local_out_port,
        type = ingress ? MLXSW_SP_SPAN_INGRESS : MLXSW_SP_SPAN_EGRESS;
        in_port = mlxsw_sp->ports[local_in_port];
 
-       mlxsw_sp_span_mirror_del(in_port, local_out_port, type, false);
+       mlxsw_sp_span_mirror_del(in_port, span_id, type, false);
 }
 
 static const struct mlxsw_afa_ops mlxsw_sp_act_afa_ops = {
index a1c4b1e..98d896c 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c
- * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
- * Copyright (c) 2017 Petr Machata <petrm@mellanox.com>
+ * Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2017-2018 Petr Machata <petrm@mellanox.com>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -33,6 +33,7 @@
  */
 
 #include <net/ip_tunnels.h>
+#include <net/ip6_tunnel.h>
 
 #include "spectrum_ipip.h"
 
@@ -44,6 +45,14 @@ mlxsw_sp_ipip_netdev_parms4(const struct net_device *ol_dev)
        return tun->parms;
 }
 
+struct __ip6_tnl_parm
+mlxsw_sp_ipip_netdev_parms6(const struct net_device *ol_dev)
+{
+       struct ip6_tnl *tun = netdev_priv(ol_dev);
+
+       return tun->parms;
+}
+
 static bool mlxsw_sp_ipip_parms4_has_ikey(struct ip_tunnel_parm parms)
 {
        return !!(parms.i_flags & TUNNEL_KEY);
@@ -72,24 +81,38 @@ mlxsw_sp_ipip_parms4_saddr(struct ip_tunnel_parm parms)
        return (union mlxsw_sp_l3addr) { .addr4 = parms.iph.saddr };
 }
 
+static union mlxsw_sp_l3addr
+mlxsw_sp_ipip_parms6_saddr(struct __ip6_tnl_parm parms)
+{
+       return (union mlxsw_sp_l3addr) { .addr6 = parms.laddr };
+}
+
 static union mlxsw_sp_l3addr
 mlxsw_sp_ipip_parms4_daddr(struct ip_tunnel_parm parms)
 {
        return (union mlxsw_sp_l3addr) { .addr4 = parms.iph.daddr };
 }
 
+static union mlxsw_sp_l3addr
+mlxsw_sp_ipip_parms6_daddr(struct __ip6_tnl_parm parms)
+{
+       return (union mlxsw_sp_l3addr) { .addr6 = parms.raddr };
+}
+
 union mlxsw_sp_l3addr
 mlxsw_sp_ipip_netdev_saddr(enum mlxsw_sp_l3proto proto,
                           const struct net_device *ol_dev)
 {
        struct ip_tunnel_parm parms4;
+       struct __ip6_tnl_parm parms6;
 
        switch (proto) {
        case MLXSW_SP_L3_PROTO_IPV4:
                parms4 = mlxsw_sp_ipip_netdev_parms4(ol_dev);
                return mlxsw_sp_ipip_parms4_saddr(parms4);
        case MLXSW_SP_L3_PROTO_IPV6:
-               break;
+               parms6 = mlxsw_sp_ipip_netdev_parms6(ol_dev);
+               return mlxsw_sp_ipip_parms6_saddr(parms6);
        }
 
        WARN_ON(1);
@@ -109,19 +132,28 @@ mlxsw_sp_ipip_netdev_daddr(enum mlxsw_sp_l3proto proto,
                           const struct net_device *ol_dev)
 {
        struct ip_tunnel_parm parms4;
+       struct __ip6_tnl_parm parms6;
 
        switch (proto) {
        case MLXSW_SP_L3_PROTO_IPV4:
                parms4 = mlxsw_sp_ipip_netdev_parms4(ol_dev);
                return mlxsw_sp_ipip_parms4_daddr(parms4);
        case MLXSW_SP_L3_PROTO_IPV6:
-               break;
+               parms6 = mlxsw_sp_ipip_netdev_parms6(ol_dev);
+               return mlxsw_sp_ipip_parms6_daddr(parms6);
        }
 
        WARN_ON(1);
        return (union mlxsw_sp_l3addr) {0};
 }
 
+bool mlxsw_sp_l3addr_is_zero(union mlxsw_sp_l3addr addr)
+{
+       union mlxsw_sp_l3addr naddr = {0};
+
+       return !memcmp(&addr, &naddr, sizeof(naddr));
+}
+
 static int
 mlxsw_sp_ipip_nexthop_update_gre4(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
                                  struct mlxsw_sp_ipip_entry *ipip_entry)
@@ -215,15 +247,14 @@ static bool mlxsw_sp_ipip_tunnel_complete(enum mlxsw_sp_l3proto proto,
 {
        union mlxsw_sp_l3addr saddr = mlxsw_sp_ipip_netdev_saddr(proto, ol_dev);
        union mlxsw_sp_l3addr daddr = mlxsw_sp_ipip_netdev_daddr(proto, ol_dev);
-       union mlxsw_sp_l3addr naddr = {0};
 
        /* Tunnels with unset local or remote address are valid in Linux and
         * used for lightweight tunnels (LWT) and Non-Broadcast Multi-Access
         * (NBMA) tunnels. In principle these can be offloaded, but the driver
         * currently doesn't support this. So punt.
         */
-       return memcmp(&saddr, &naddr, sizeof(naddr)) &&
-              memcmp(&daddr, &naddr, sizeof(naddr));
+       return !mlxsw_sp_l3addr_is_zero(saddr) &&
+              !mlxsw_sp_l3addr_is_zero(daddr);
 }
 
 static bool mlxsw_sp_ipip_can_offload_gre4(const struct mlxsw_sp *mlxsw_sp,
index a4ff573..6909d86 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h
- * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
- * Copyright (c) 2017 Petr Machata <petrm@mellanox.com>
+ * Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2017-2018 Petr Machata <petrm@mellanox.com>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
 
 struct ip_tunnel_parm
 mlxsw_sp_ipip_netdev_parms4(const struct net_device *ol_dev);
+struct __ip6_tnl_parm
+mlxsw_sp_ipip_netdev_parms6(const struct net_device *ol_dev);
 
 union mlxsw_sp_l3addr
 mlxsw_sp_ipip_netdev_saddr(enum mlxsw_sp_l3proto proto,
                           const struct net_device *ol_dev);
 
+bool mlxsw_sp_l3addr_is_zero(union mlxsw_sp_l3addr addr);
+
 enum mlxsw_sp_ipip_type {
        MLXSW_SP_IPIP_TYPE_GRE4,
        MLXSW_SP_IPIP_TYPE_MAX,
index d27fa57..059eb32 100644 (file)
@@ -270,6 +270,8 @@ static int mlxsw_sp_kvdl_part_init(struct mlxsw_sp *mlxsw_sp,
        case MLXSW_SP_KVDL_PART_LARGE_CHUNKS:
                resource_id = MLXSW_SP_RESOURCE_KVD_LINEAR_LARGE_CHUNKS;
                break;
+       default:
+               return -EINVAL;
        }
 
        err = devlink_resource_size_get(devlink, resource_id, &resource_size);
@@ -278,7 +280,7 @@ static int mlxsw_sp_kvdl_part_init(struct mlxsw_sp *mlxsw_sp,
                resource_size = info->end_index - info->start_index + 1;
        }
 
-       nr_entries = resource_size / info->alloc_size;
+       nr_entries = div_u64(resource_size, info->alloc_size);
        usage_size = BITS_TO_LONGS(nr_entries) * sizeof(unsigned long);
        part = kzalloc(sizeof(*part) + usage_size, GFP_KERNEL);
        if (!part)
index d20b143..978a3c7 100644 (file)
@@ -126,8 +126,8 @@ mlxsw_sp_mr_route_ivif_in_evifs(const struct mlxsw_sp_mr_route *mr_route)
 
        switch (mr_route->mr_table->proto) {
        case MLXSW_SP_L3_PROTO_IPV4:
-               ivif = mr_route->mfc4->mfc_parent;
-               return mr_route->mfc4->mfc_un.res.ttls[ivif] != 255;
+               ivif = mr_route->mfc4->_c.mfc_parent;
+               return mr_route->mfc4->_c.mfc_un.res.ttls[ivif] != 255;
        case MLXSW_SP_L3_PROTO_IPV6:
                /* fall through */
        default:
@@ -364,7 +364,7 @@ mlxsw_sp_mr_route4_create(struct mlxsw_sp_mr_table *mr_table,
        mr_route->mfc4 = mfc;
        mr_route->mr_table = mr_table;
        for (i = 0; i < MAXVIFS; i++) {
-               if (mfc->mfc_un.res.ttls[i] != 255) {
+               if (mfc->_c.mfc_un.res.ttls[i] != 255) {
                        err = mlxsw_sp_mr_route_evif_link(mr_route,
                                                          &mr_table->vifs[i]);
                        if (err)
@@ -374,7 +374,8 @@ mlxsw_sp_mr_route4_create(struct mlxsw_sp_mr_table *mr_table,
                                mr_route->min_mtu = mr_table->vifs[i].dev->mtu;
                }
        }
-       mlxsw_sp_mr_route_ivif_link(mr_route, &mr_table->vifs[mfc->mfc_parent]);
+       mlxsw_sp_mr_route_ivif_link(mr_route,
+                                   &mr_table->vifs[mfc->_c.mfc_parent]);
 
        mr_route->route_action = mlxsw_sp_mr_route_action(mr_route);
        return mr_route;
@@ -418,9 +419,9 @@ static void mlxsw_sp_mr_mfc_offload_set(struct mlxsw_sp_mr_route *mr_route,
        switch (mr_route->mr_table->proto) {
        case MLXSW_SP_L3_PROTO_IPV4:
                if (offload)
-                       mr_route->mfc4->mfc_flags |= MFC_OFFLOAD;
+                       mr_route->mfc4->_c.mfc_flags |= MFC_OFFLOAD;
                else
-                       mr_route->mfc4->mfc_flags &= ~MFC_OFFLOAD;
+                       mr_route->mfc4->_c.mfc_flags &= ~MFC_OFFLOAD;
                break;
        case MLXSW_SP_L3_PROTO_IPV6:
                /* fall through */
@@ -943,10 +944,10 @@ static void mlxsw_sp_mr_route_stats_update(struct mlxsw_sp *mlxsw_sp,
 
        switch (mr_route->mr_table->proto) {
        case MLXSW_SP_L3_PROTO_IPV4:
-               if (mr_route->mfc4->mfc_un.res.pkt != packets)
-                       mr_route->mfc4->mfc_un.res.lastuse = jiffies;
-               mr_route->mfc4->mfc_un.res.pkt = packets;
-               mr_route->mfc4->mfc_un.res.bytes = bytes;
+               if (mr_route->mfc4->_c.mfc_un.res.pkt != packets)
+                       mr_route->mfc4->_c.mfc_un.res.lastuse = jiffies;
+               mr_route->mfc4->_c.mfc_un.res.pkt = packets;
+               mr_route->mfc4->_c.mfc_un.res.bytes = bytes;
                break;
        case MLXSW_SP_L3_PROTO_IPV6:
                /* fall through */
index 0b76704..91262b0 100644 (file)
@@ -42,6 +42,8 @@
 #include "reg.h"
 
 #define MLXSW_SP_PRIO_BAND_TO_TCLASS(band) (IEEE_8021QAZ_MAX_TCS - band - 1)
+#define MLXSW_SP_PRIO_CHILD_TO_TCLASS(child) \
+       MLXSW_SP_PRIO_BAND_TO_TCLASS((child - 1))
 
 enum mlxsw_sp_qdisc_type {
        MLXSW_SP_QDISC_NO_QDISC,
@@ -76,6 +78,7 @@ struct mlxsw_sp_qdisc_ops {
 struct mlxsw_sp_qdisc {
        u32 handle;
        u8 tclass_num;
+       u8 prio_bitmap;
        union {
                struct red_stats red;
        } xstats_base;
@@ -99,6 +102,44 @@ mlxsw_sp_qdisc_compare(struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, u32 handle,
               mlxsw_sp_qdisc->handle == handle;
 }
 
+static struct mlxsw_sp_qdisc *
+mlxsw_sp_qdisc_find(struct mlxsw_sp_port *mlxsw_sp_port, u32 parent,
+                   bool root_only)
+{
+       int tclass, child_index;
+
+       if (parent == TC_H_ROOT)
+               return mlxsw_sp_port->root_qdisc;
+
+       if (root_only || !mlxsw_sp_port->root_qdisc ||
+           !mlxsw_sp_port->root_qdisc->ops ||
+           TC_H_MAJ(parent) != mlxsw_sp_port->root_qdisc->handle ||
+           TC_H_MIN(parent) > IEEE_8021QAZ_MAX_TCS)
+               return NULL;
+
+       child_index = TC_H_MIN(parent);
+       tclass = MLXSW_SP_PRIO_CHILD_TO_TCLASS(child_index);
+       return &mlxsw_sp_port->tclass_qdiscs[tclass];
+}
+
+static struct mlxsw_sp_qdisc *
+mlxsw_sp_qdisc_find_by_handle(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle)
+{
+       int i;
+
+       if (mlxsw_sp_port->root_qdisc->handle == handle)
+               return mlxsw_sp_port->root_qdisc;
+
+       if (mlxsw_sp_port->root_qdisc->handle == TC_H_UNSPEC)
+               return NULL;
+
+       for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++)
+               if (mlxsw_sp_port->tclass_qdiscs[i].handle == handle)
+                       return &mlxsw_sp_port->tclass_qdiscs[i];
+
+       return NULL;
+}
+
 static int
 mlxsw_sp_qdisc_destroy(struct mlxsw_sp_port *mlxsw_sp_port,
                       struct mlxsw_sp_qdisc *mlxsw_sp_qdisc)
@@ -185,6 +226,23 @@ mlxsw_sp_qdisc_get_xstats(struct mlxsw_sp_port *mlxsw_sp_port,
        return -EOPNOTSUPP;
 }
 
+static void
+mlxsw_sp_qdisc_bstats_per_priority_get(struct mlxsw_sp_port_xstats *xstats,
+                                      u8 prio_bitmap, u64 *tx_packets,
+                                      u64 *tx_bytes)
+{
+       int i;
+
+       *tx_packets = 0;
+       *tx_bytes = 0;
+       for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
+               if (prio_bitmap & BIT(i)) {
+                       *tx_packets += xstats->tx_packets[i];
+                       *tx_bytes += xstats->tx_bytes[i];
+               }
+       }
+}
+
 static int
 mlxsw_sp_tclass_congestion_enable(struct mlxsw_sp_port *mlxsw_sp_port,
                                  int tclass_num, u32 min, u32 max,
@@ -230,17 +288,16 @@ mlxsw_sp_setup_tc_qdisc_red_clean_stats(struct mlxsw_sp_port *mlxsw_sp_port,
        u8 tclass_num = mlxsw_sp_qdisc->tclass_num;
        struct mlxsw_sp_qdisc_stats *stats_base;
        struct mlxsw_sp_port_xstats *xstats;
-       struct rtnl_link_stats64 *stats;
        struct red_stats *red_base;
 
        xstats = &mlxsw_sp_port->periodic_hw_stats.xstats;
-       stats = &mlxsw_sp_port->periodic_hw_stats.stats;
        stats_base = &mlxsw_sp_qdisc->stats_base;
        red_base = &mlxsw_sp_qdisc->xstats_base.red;
 
-       stats_base->tx_packets = stats->tx_packets;
-       stats_base->tx_bytes = stats->tx_bytes;
-
+       mlxsw_sp_qdisc_bstats_per_priority_get(xstats,
+                                              mlxsw_sp_qdisc->prio_bitmap,
+                                              &stats_base->tx_packets,
+                                              &stats_base->tx_bytes);
        red_base->prob_mark = xstats->ecn;
        red_base->prob_drop = xstats->wred_drop[tclass_num];
        red_base->pdrop = xstats->tail_drop[tclass_num];
@@ -255,6 +312,12 @@ static int
 mlxsw_sp_qdisc_red_destroy(struct mlxsw_sp_port *mlxsw_sp_port,
                           struct mlxsw_sp_qdisc *mlxsw_sp_qdisc)
 {
+       struct mlxsw_sp_qdisc *root_qdisc = mlxsw_sp_port->root_qdisc;
+
+       if (root_qdisc != mlxsw_sp_qdisc)
+               root_qdisc->stats_base.backlog -=
+                                       mlxsw_sp_qdisc->stats_base.backlog;
+
        return mlxsw_sp_tclass_congestion_disable(mlxsw_sp_port,
                                                  mlxsw_sp_qdisc->tclass_num);
 }
@@ -319,6 +382,7 @@ mlxsw_sp_qdisc_red_unoffload(struct mlxsw_sp_port *mlxsw_sp_port,
        backlog = mlxsw_sp_cells_bytes(mlxsw_sp_port->mlxsw_sp,
                                       mlxsw_sp_qdisc->stats_base.backlog);
        p->qstats->backlog -= backlog;
+       mlxsw_sp_qdisc->stats_base.backlog = 0;
 }
 
 static int
@@ -357,14 +421,16 @@ mlxsw_sp_qdisc_get_red_stats(struct mlxsw_sp_port *mlxsw_sp_port,
        u8 tclass_num = mlxsw_sp_qdisc->tclass_num;
        struct mlxsw_sp_qdisc_stats *stats_base;
        struct mlxsw_sp_port_xstats *xstats;
-       struct rtnl_link_stats64 *stats;
 
        xstats = &mlxsw_sp_port->periodic_hw_stats.xstats;
-       stats = &mlxsw_sp_port->periodic_hw_stats.stats;
        stats_base = &mlxsw_sp_qdisc->stats_base;
 
-       tx_bytes = stats->tx_bytes - stats_base->tx_bytes;
-       tx_packets = stats->tx_packets - stats_base->tx_packets;
+       mlxsw_sp_qdisc_bstats_per_priority_get(xstats,
+                                              mlxsw_sp_qdisc->prio_bitmap,
+                                              &tx_packets, &tx_bytes);
+       tx_bytes = tx_bytes - stats_base->tx_bytes;
+       tx_packets = tx_packets - stats_base->tx_packets;
+
        overlimits = xstats->wred_drop[tclass_num] + xstats->ecn -
                     stats_base->overlimits;
        drops = xstats->wred_drop[tclass_num] + xstats->tail_drop[tclass_num] -
@@ -406,11 +472,10 @@ int mlxsw_sp_setup_tc_red(struct mlxsw_sp_port *mlxsw_sp_port,
 {
        struct mlxsw_sp_qdisc *mlxsw_sp_qdisc;
 
-       if (p->parent != TC_H_ROOT)
+       mlxsw_sp_qdisc = mlxsw_sp_qdisc_find(mlxsw_sp_port, p->parent, false);
+       if (!mlxsw_sp_qdisc)
                return -EOPNOTSUPP;
 
-       mlxsw_sp_qdisc = mlxsw_sp_port->root_qdisc;
-
        if (p->command == TC_RED_REPLACE)
                return mlxsw_sp_qdisc_replace(mlxsw_sp_port, p->handle,
                                              mlxsw_sp_qdisc,
@@ -441,9 +506,13 @@ mlxsw_sp_qdisc_prio_destroy(struct mlxsw_sp_port *mlxsw_sp_port,
 {
        int i;
 
-       for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++)
+       for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
                mlxsw_sp_port_prio_tc_set(mlxsw_sp_port, i,
                                          MLXSW_SP_PORT_DEFAULT_TCLASS);
+               mlxsw_sp_qdisc_destroy(mlxsw_sp_port,
+                                      &mlxsw_sp_port->tclass_qdiscs[i]);
+               mlxsw_sp_port->tclass_qdiscs[i].prio_bitmap = 0;
+       }
 
        return 0;
 }
@@ -467,16 +536,41 @@ mlxsw_sp_qdisc_prio_replace(struct mlxsw_sp_port *mlxsw_sp_port,
                            void *params)
 {
        struct tc_prio_qopt_offload_params *p = params;
-       int tclass, i;
+       struct mlxsw_sp_qdisc *child_qdisc;
+       int tclass, i, band, backlog;
+       u8 old_priomap;
        int err;
 
-       for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
-               tclass = MLXSW_SP_PRIO_BAND_TO_TCLASS(p->priomap[i]);
-               err = mlxsw_sp_port_prio_tc_set(mlxsw_sp_port, i, tclass);
-               if (err)
-                       return err;
+       for (band = 0; band < p->bands; band++) {
+               tclass = MLXSW_SP_PRIO_BAND_TO_TCLASS(band);
+               child_qdisc = &mlxsw_sp_port->tclass_qdiscs[tclass];
+               old_priomap = child_qdisc->prio_bitmap;
+               child_qdisc->prio_bitmap = 0;
+               for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
+                       if (p->priomap[i] == band) {
+                               child_qdisc->prio_bitmap |= BIT(i);
+                               if (BIT(i) & old_priomap)
+                                       continue;
+                               err = mlxsw_sp_port_prio_tc_set(mlxsw_sp_port,
+                                                               i, tclass);
+                               if (err)
+                                       return err;
+                       }
+               }
+               if (old_priomap != child_qdisc->prio_bitmap &&
+                   child_qdisc->ops && child_qdisc->ops->clean_stats) {
+                       backlog = child_qdisc->stats_base.backlog;
+                       child_qdisc->ops->clean_stats(mlxsw_sp_port,
+                                                     child_qdisc);
+                       child_qdisc->stats_base.backlog = backlog;
+               }
+       }
+       for (; band < IEEE_8021QAZ_MAX_TCS; band++) {
+               tclass = MLXSW_SP_PRIO_BAND_TO_TCLASS(band);
+               child_qdisc = &mlxsw_sp_port->tclass_qdiscs[tclass];
+               child_qdisc->prio_bitmap = 0;
+               mlxsw_sp_qdisc_destroy(mlxsw_sp_port, child_qdisc);
        }
-
        return 0;
 }
 
@@ -513,6 +607,7 @@ mlxsw_sp_qdisc_get_prio_stats(struct mlxsw_sp_port *mlxsw_sp_port,
 
        for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
                drops += xstats->tail_drop[i];
+               drops += xstats->wred_drop[i];
                backlog += xstats->backlog[i];
        }
        drops = drops - stats_base->drops;
@@ -548,8 +643,10 @@ mlxsw_sp_setup_tc_qdisc_prio_clean_stats(struct mlxsw_sp_port *mlxsw_sp_port,
        stats_base->tx_bytes = stats->tx_bytes;
 
        stats_base->drops = 0;
-       for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++)
+       for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
                stats_base->drops += xstats->tail_drop[i];
+               stats_base->drops += xstats->wred_drop[i];
+       }
 
        mlxsw_sp_qdisc->stats_base.backlog = 0;
 }
@@ -564,15 +661,48 @@ static struct mlxsw_sp_qdisc_ops mlxsw_sp_qdisc_ops_prio = {
        .clean_stats = mlxsw_sp_setup_tc_qdisc_prio_clean_stats,
 };
 
+/* Grafting is not supported in mlxsw. It will result in un-offloading of the
+ * grafted qdisc as well as the qdisc in the qdisc new location.
+ * (However, if the graft is to the location where the qdisc is already at, it
+ * will be ignored completely and won't cause un-offloading).
+ */
+static int
+mlxsw_sp_qdisc_prio_graft(struct mlxsw_sp_port *mlxsw_sp_port,
+                         struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
+                         struct tc_prio_qopt_offload_graft_params *p)
+{
+       int tclass_num = MLXSW_SP_PRIO_BAND_TO_TCLASS(p->band);
+       struct mlxsw_sp_qdisc *old_qdisc;
+
+       /* Check if the grafted qdisc is already in its "new" location. If so -
+        * nothing needs to be done.
+        */
+       if (p->band < IEEE_8021QAZ_MAX_TCS &&
+           mlxsw_sp_port->tclass_qdiscs[tclass_num].handle == p->child_handle)
+               return 0;
+
+       /* See if the grafted qdisc is already offloaded on any tclass. If so,
+        * unoffload it.
+        */
+       old_qdisc = mlxsw_sp_qdisc_find_by_handle(mlxsw_sp_port,
+                                                 p->child_handle);
+       if (old_qdisc)
+               mlxsw_sp_qdisc_destroy(mlxsw_sp_port, old_qdisc);
+
+       mlxsw_sp_qdisc_destroy(mlxsw_sp_port,
+                              &mlxsw_sp_port->tclass_qdiscs[tclass_num]);
+       return -EOPNOTSUPP;
+}
+
 int mlxsw_sp_setup_tc_prio(struct mlxsw_sp_port *mlxsw_sp_port,
                           struct tc_prio_qopt_offload *p)
 {
        struct mlxsw_sp_qdisc *mlxsw_sp_qdisc;
 
-       if (p->parent != TC_H_ROOT)
+       mlxsw_sp_qdisc = mlxsw_sp_qdisc_find(mlxsw_sp_port, p->parent, true);
+       if (!mlxsw_sp_qdisc)
                return -EOPNOTSUPP;
 
-       mlxsw_sp_qdisc = mlxsw_sp_port->root_qdisc;
        if (p->command == TC_PRIO_REPLACE)
                return mlxsw_sp_qdisc_replace(mlxsw_sp_port, p->handle,
                                              mlxsw_sp_qdisc,
@@ -589,6 +719,9 @@ int mlxsw_sp_setup_tc_prio(struct mlxsw_sp_port *mlxsw_sp_port,
        case TC_PRIO_STATS:
                return mlxsw_sp_qdisc_get_stats(mlxsw_sp_port, mlxsw_sp_qdisc,
                                                &p->stats);
+       case TC_PRIO_GRAFT:
+               return mlxsw_sp_qdisc_prio_graft(mlxsw_sp_port, mlxsw_sp_qdisc,
+                                                &p->graft_params);
        default:
                return -EOPNOTSUPP;
        }
@@ -596,17 +729,36 @@ int mlxsw_sp_setup_tc_prio(struct mlxsw_sp_port *mlxsw_sp_port,
 
 int mlxsw_sp_tc_qdisc_init(struct mlxsw_sp_port *mlxsw_sp_port)
 {
-       mlxsw_sp_port->root_qdisc = kzalloc(sizeof(*mlxsw_sp_port->root_qdisc),
-                                           GFP_KERNEL);
-       if (!mlxsw_sp_port->root_qdisc)
-               return -ENOMEM;
+       struct mlxsw_sp_qdisc *mlxsw_sp_qdisc;
+       int i;
 
+       mlxsw_sp_qdisc = kzalloc(sizeof(*mlxsw_sp_qdisc), GFP_KERNEL);
+       if (!mlxsw_sp_qdisc)
+               goto err_root_qdisc_init;
+
+       mlxsw_sp_port->root_qdisc = mlxsw_sp_qdisc;
+       mlxsw_sp_port->root_qdisc->prio_bitmap = 0xff;
        mlxsw_sp_port->root_qdisc->tclass_num = MLXSW_SP_PORT_DEFAULT_TCLASS;
 
+       mlxsw_sp_qdisc = kzalloc(sizeof(*mlxsw_sp_qdisc) * IEEE_8021QAZ_MAX_TCS,
+                                GFP_KERNEL);
+       if (!mlxsw_sp_qdisc)
+               goto err_tclass_qdiscs_init;
+
+       mlxsw_sp_port->tclass_qdiscs = mlxsw_sp_qdisc;
+       for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++)
+               mlxsw_sp_port->tclass_qdiscs[i].tclass_num = i;
+
        return 0;
+
+err_tclass_qdiscs_init:
+       kfree(mlxsw_sp_port->root_qdisc);
+err_root_qdisc_init:
+       return -ENOMEM;
 }
 
 void mlxsw_sp_tc_qdisc_fini(struct mlxsw_sp_port *mlxsw_sp_port)
 {
+       kfree(mlxsw_sp_port->tclass_qdiscs);
        kfree(mlxsw_sp_port->root_qdisc);
 }
index 0514697..69f16c6 100644 (file)
@@ -70,6 +70,7 @@
 #include "spectrum_mr.h"
 #include "spectrum_mr_tcam.h"
 #include "spectrum_router.h"
+#include "spectrum_span.h"
 
 struct mlxsw_sp_fib;
 struct mlxsw_sp_vr;
@@ -2330,6 +2331,8 @@ static void mlxsw_sp_router_neigh_event_work(struct work_struct *work)
        read_unlock_bh(&n->lock);
 
        rtnl_lock();
+       mlxsw_sp_span_respin(mlxsw_sp);
+
        entry_connected = nud_state & NUD_VALID && !dead;
        neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n);
        if (!entry_connected && !neigh_entry)
@@ -5589,6 +5592,8 @@ static void mlxsw_sp_router_fib4_event_work(struct work_struct *work)
 
        /* Protect internal structures from changes */
        rtnl_lock();
+       mlxsw_sp_span_respin(mlxsw_sp);
+
        switch (fib_work->event) {
        case FIB_EVENT_ENTRY_REPLACE: /* fall through */
        case FIB_EVENT_ENTRY_APPEND: /* fall through */
@@ -5631,6 +5636,8 @@ static void mlxsw_sp_router_fib6_event_work(struct work_struct *work)
        int err;
 
        rtnl_lock();
+       mlxsw_sp_span_respin(mlxsw_sp);
+
        switch (fib_work->event) {
        case FIB_EVENT_ENTRY_REPLACE: /* fall through */
        case FIB_EVENT_ENTRY_ADD:
index c3bec37..f537e1d 100644 (file)
@@ -1,6 +1,7 @@
 /*
  * drivers/net/ethernet/mellanox/mlxsw/mlxsw_span.c
  * Copyright (c) 2018 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2018 Petr Machata <petrm@mellanox.com>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
  */
 
 #include <linux/list.h>
+#include <net/arp.h>
+#include <net/gre.h>
+#include <net/ndisc.h>
+#include <net/ip6_tunnel.h>
 
 #include "spectrum.h"
 #include "spectrum_span.h"
+#include "spectrum_ipip.h"
 
 int mlxsw_sp_span_init(struct mlxsw_sp *mlxsw_sp)
 {
@@ -51,8 +57,12 @@ int mlxsw_sp_span_init(struct mlxsw_sp *mlxsw_sp)
        if (!mlxsw_sp->span.entries)
                return -ENOMEM;
 
-       for (i = 0; i < mlxsw_sp->span.entries_count; i++)
-               INIT_LIST_HEAD(&mlxsw_sp->span.entries[i].bound_ports_list);
+       for (i = 0; i < mlxsw_sp->span.entries_count; i++) {
+               struct mlxsw_sp_span_entry *curr = &mlxsw_sp->span.entries[i];
+
+               INIT_LIST_HEAD(&curr->bound_ports_list);
+               curr->id = i;
+       }
 
        return 0;
 }
@@ -69,80 +79,460 @@ void mlxsw_sp_span_fini(struct mlxsw_sp *mlxsw_sp)
        kfree(mlxsw_sp->span.entries);
 }
 
-static struct mlxsw_sp_span_entry *
-mlxsw_sp_span_entry_create(struct mlxsw_sp_port *port)
+static int
+mlxsw_sp_span_entry_phys_parms(const struct net_device *to_dev,
+                              struct mlxsw_sp_span_parms *sparmsp)
 {
-       struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp;
-       struct mlxsw_sp_span_entry *span_entry;
+       sparmsp->dest_port = netdev_priv(to_dev);
+       return 0;
+}
+
+static int
+mlxsw_sp_span_entry_phys_configure(struct mlxsw_sp_span_entry *span_entry,
+                                  struct mlxsw_sp_span_parms sparms)
+{
+       struct mlxsw_sp_port *dest_port = sparms.dest_port;
+       struct mlxsw_sp *mlxsw_sp = dest_port->mlxsw_sp;
+       u8 local_port = dest_port->local_port;
+       char mpat_pl[MLXSW_REG_MPAT_LEN];
+       int pa_id = span_entry->id;
+
+       /* Create a new port analayzer entry for local_port. */
+       mlxsw_reg_mpat_pack(mpat_pl, pa_id, local_port, true,
+                           MLXSW_REG_MPAT_SPAN_TYPE_LOCAL_ETH);
+
+       return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpat), mpat_pl);
+}
+
+static void
+mlxsw_sp_span_entry_deconfigure_common(struct mlxsw_sp_span_entry *span_entry,
+                                      enum mlxsw_reg_mpat_span_type span_type)
+{
+       struct mlxsw_sp_port *dest_port = span_entry->parms.dest_port;
+       struct mlxsw_sp *mlxsw_sp = dest_port->mlxsw_sp;
+       u8 local_port = dest_port->local_port;
+       char mpat_pl[MLXSW_REG_MPAT_LEN];
+       int pa_id = span_entry->id;
+
+       mlxsw_reg_mpat_pack(mpat_pl, pa_id, local_port, false, span_type);
+       mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpat), mpat_pl);
+}
+
+static void
+mlxsw_sp_span_entry_phys_deconfigure(struct mlxsw_sp_span_entry *span_entry)
+{
+       mlxsw_sp_span_entry_deconfigure_common(span_entry,
+                                           MLXSW_REG_MPAT_SPAN_TYPE_LOCAL_ETH);
+}
+
+static const
+struct mlxsw_sp_span_entry_ops mlxsw_sp_span_entry_ops_phys = {
+       .can_handle = mlxsw_sp_port_dev_check,
+       .parms = mlxsw_sp_span_entry_phys_parms,
+       .configure = mlxsw_sp_span_entry_phys_configure,
+       .deconfigure = mlxsw_sp_span_entry_phys_deconfigure,
+};
+
+static struct net_device *
+mlxsw_sp_span_gretap4_route(const struct net_device *to_dev,
+                           __be32 *saddrp, __be32 *daddrp)
+{
+       struct ip_tunnel *tun = netdev_priv(to_dev);
+       struct net_device *dev = NULL;
+       struct ip_tunnel_parm parms;
+       struct rtable *rt = NULL;
+       struct flowi4 fl4;
+
+       /* We assume "dev" stays valid after rt is put. */
+       ASSERT_RTNL();
+
+       parms = mlxsw_sp_ipip_netdev_parms4(to_dev);
+       ip_tunnel_init_flow(&fl4, parms.iph.protocol, *daddrp, *saddrp,
+                           0, 0, parms.link, tun->fwmark);
+
+       rt = ip_route_output_key(tun->net, &fl4);
+       if (IS_ERR(rt))
+               return NULL;
+
+       if (rt->rt_type != RTN_UNICAST)
+               goto out;
+
+       dev = rt->dst.dev;
+       *saddrp = fl4.saddr;
+       *daddrp = rt->rt_gateway;
+
+out:
+       ip_rt_put(rt);
+       return dev;
+}
+
+static int mlxsw_sp_span_dmac(struct neigh_table *tbl,
+                             const void *pkey,
+                             struct net_device *l3edev,
+                             unsigned char dmac[ETH_ALEN])
+{
+       struct neighbour *neigh = neigh_lookup(tbl, pkey, l3edev);
+       int err = 0;
+
+       if (!neigh) {
+               neigh = neigh_create(tbl, pkey, l3edev);
+               if (IS_ERR(neigh))
+                       return PTR_ERR(neigh);
+       }
+
+       neigh_event_send(neigh, NULL);
+
+       read_lock_bh(&neigh->lock);
+       if ((neigh->nud_state & NUD_VALID) && !neigh->dead)
+               memcpy(dmac, neigh->ha, ETH_ALEN);
+       else
+               err = -ENOENT;
+       read_unlock_bh(&neigh->lock);
+
+       neigh_release(neigh);
+       return err;
+}
+
+static int
+mlxsw_sp_span_entry_unoffloadable(struct mlxsw_sp_span_parms *sparmsp)
+{
+       sparmsp->dest_port = NULL;
+       return 0;
+}
+
+static int
+mlxsw_sp_span_entry_tunnel_parms_common(struct net_device *l3edev,
+                                       union mlxsw_sp_l3addr saddr,
+                                       union mlxsw_sp_l3addr daddr,
+                                       union mlxsw_sp_l3addr gw,
+                                       __u8 ttl,
+                                       struct neigh_table *tbl,
+                                       struct mlxsw_sp_span_parms *sparmsp)
+{
+       unsigned char dmac[ETH_ALEN];
+
+       if (mlxsw_sp_l3addr_is_zero(gw))
+               gw = daddr;
+
+       if (!l3edev || !mlxsw_sp_port_dev_check(l3edev) ||
+           mlxsw_sp_span_dmac(tbl, &gw, l3edev, dmac))
+               return mlxsw_sp_span_entry_unoffloadable(sparmsp);
+
+       sparmsp->dest_port = netdev_priv(l3edev);
+       sparmsp->ttl = ttl;
+       memcpy(sparmsp->dmac, dmac, ETH_ALEN);
+       memcpy(sparmsp->smac, l3edev->dev_addr, ETH_ALEN);
+       sparmsp->saddr = saddr;
+       sparmsp->daddr = daddr;
+       return 0;
+}
+
+static int
+mlxsw_sp_span_entry_gretap4_parms(const struct net_device *to_dev,
+                                 struct mlxsw_sp_span_parms *sparmsp)
+{
+       struct ip_tunnel_parm tparm = mlxsw_sp_ipip_netdev_parms4(to_dev);
+       union mlxsw_sp_l3addr saddr = { .addr4 = tparm.iph.saddr };
+       union mlxsw_sp_l3addr daddr = { .addr4 = tparm.iph.daddr };
+       bool inherit_tos = tparm.iph.tos & 0x1;
+       bool inherit_ttl = !tparm.iph.ttl;
+       union mlxsw_sp_l3addr gw = daddr;
+       struct net_device *l3edev;
+
+       if (!(to_dev->flags & IFF_UP) ||
+           /* Reject tunnels with GRE keys, checksums, etc. */
+           tparm.i_flags || tparm.o_flags ||
+           /* Require a fixed TTL and a TOS copied from the mirrored packet. */
+           inherit_ttl || !inherit_tos ||
+           /* A destination address may not be "any". */
+           mlxsw_sp_l3addr_is_zero(daddr))
+               return mlxsw_sp_span_entry_unoffloadable(sparmsp);
+
+       l3edev = mlxsw_sp_span_gretap4_route(to_dev, &saddr.addr4, &gw.addr4);
+       return mlxsw_sp_span_entry_tunnel_parms_common(l3edev, saddr, daddr, gw,
+                                                      tparm.iph.ttl,
+                                                      &arp_tbl, sparmsp);
+}
+
+static int
+mlxsw_sp_span_entry_gretap4_configure(struct mlxsw_sp_span_entry *span_entry,
+                                     struct mlxsw_sp_span_parms sparms)
+{
+       struct mlxsw_sp_port *dest_port = sparms.dest_port;
+       struct mlxsw_sp *mlxsw_sp = dest_port->mlxsw_sp;
+       u8 local_port = dest_port->local_port;
+       char mpat_pl[MLXSW_REG_MPAT_LEN];
+       int pa_id = span_entry->id;
+
+       /* Create a new port analayzer entry for local_port. */
+       mlxsw_reg_mpat_pack(mpat_pl, pa_id, local_port, true,
+                           MLXSW_REG_MPAT_SPAN_TYPE_REMOTE_ETH_L3);
+       mlxsw_reg_mpat_eth_rspan_l2_pack(mpat_pl,
+                                   MLXSW_REG_MPAT_ETH_RSPAN_VERSION_NO_HEADER,
+                                   sparms.dmac, false);
+       mlxsw_reg_mpat_eth_rspan_l3_ipv4_pack(mpat_pl,
+                                             sparms.ttl, sparms.smac,
+                                             be32_to_cpu(sparms.saddr.addr4),
+                                             be32_to_cpu(sparms.daddr.addr4));
+
+       return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpat), mpat_pl);
+}
+
+static void
+mlxsw_sp_span_entry_gretap4_deconfigure(struct mlxsw_sp_span_entry *span_entry)
+{
+       mlxsw_sp_span_entry_deconfigure_common(span_entry,
+                                       MLXSW_REG_MPAT_SPAN_TYPE_REMOTE_ETH_L3);
+}
+
+static const struct mlxsw_sp_span_entry_ops mlxsw_sp_span_entry_ops_gretap4 = {
+       .can_handle = is_gretap_dev,
+       .parms = mlxsw_sp_span_entry_gretap4_parms,
+       .configure = mlxsw_sp_span_entry_gretap4_configure,
+       .deconfigure = mlxsw_sp_span_entry_gretap4_deconfigure,
+};
+
+static struct net_device *
+mlxsw_sp_span_gretap6_route(const struct net_device *to_dev,
+                           struct in6_addr *saddrp,
+                           struct in6_addr *daddrp)
+{
+       struct ip6_tnl *t = netdev_priv(to_dev);
+       struct flowi6 fl6 = t->fl.u.ip6;
+       struct net_device *dev = NULL;
+       struct dst_entry *dst;
+       struct rt6_info *rt6;
+
+       /* We assume "dev" stays valid after dst is released. */
+       ASSERT_RTNL();
+
+       fl6.flowi6_mark = t->parms.fwmark;
+       if (!ip6_tnl_xmit_ctl(t, &fl6.saddr, &fl6.daddr))
+               return NULL;
+
+       dst = ip6_route_output(t->net, NULL, &fl6);
+       if (!dst || dst->error)
+               goto out;
+
+       rt6 = container_of(dst, struct rt6_info, dst);
+
+       dev = dst->dev;
+       *saddrp = fl6.saddr;
+       *daddrp = rt6->rt6i_gateway;
+
+out:
+       dst_release(dst);
+       return dev;
+}
+
+static int
+mlxsw_sp_span_entry_gretap6_parms(const struct net_device *to_dev,
+                                 struct mlxsw_sp_span_parms *sparmsp)
+{
+       struct __ip6_tnl_parm tparm = mlxsw_sp_ipip_netdev_parms6(to_dev);
+       bool inherit_tos = tparm.flags & IP6_TNL_F_USE_ORIG_TCLASS;
+       union mlxsw_sp_l3addr saddr = { .addr6 = tparm.laddr };
+       union mlxsw_sp_l3addr daddr = { .addr6 = tparm.raddr };
+       bool inherit_ttl = !tparm.hop_limit;
+       union mlxsw_sp_l3addr gw = daddr;
+       struct net_device *l3edev;
+
+       if (!(to_dev->flags & IFF_UP) ||
+           /* Reject tunnels with GRE keys, checksums, etc. */
+           tparm.i_flags || tparm.o_flags ||
+           /* Require a fixed TTL and a TOS copied from the mirrored packet. */
+           inherit_ttl || !inherit_tos ||
+           /* A destination address may not be "any". */
+           mlxsw_sp_l3addr_is_zero(daddr))
+               return mlxsw_sp_span_entry_unoffloadable(sparmsp);
+
+       l3edev = mlxsw_sp_span_gretap6_route(to_dev, &saddr.addr6, &gw.addr6);
+       return mlxsw_sp_span_entry_tunnel_parms_common(l3edev, saddr, daddr, gw,
+                                                      tparm.hop_limit,
+                                                      &nd_tbl, sparmsp);
+}
+
+static int
+mlxsw_sp_span_entry_gretap6_configure(struct mlxsw_sp_span_entry *span_entry,
+                                     struct mlxsw_sp_span_parms sparms)
+{
+       struct mlxsw_sp_port *dest_port = sparms.dest_port;
+       struct mlxsw_sp *mlxsw_sp = dest_port->mlxsw_sp;
+       u8 local_port = dest_port->local_port;
        char mpat_pl[MLXSW_REG_MPAT_LEN];
-       u8 local_port = port->local_port;
-       int index;
+       int pa_id = span_entry->id;
+
+       /* Create a new port analayzer entry for local_port. */
+       mlxsw_reg_mpat_pack(mpat_pl, pa_id, local_port, true,
+                           MLXSW_REG_MPAT_SPAN_TYPE_REMOTE_ETH_L3);
+       mlxsw_reg_mpat_eth_rspan_l2_pack(mpat_pl,
+                                   MLXSW_REG_MPAT_ETH_RSPAN_VERSION_NO_HEADER,
+                                   sparms.dmac, false);
+       mlxsw_reg_mpat_eth_rspan_l3_ipv6_pack(mpat_pl, sparms.ttl, sparms.smac,
+                                             sparms.saddr.addr6,
+                                             sparms.daddr.addr6);
+
+       return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpat), mpat_pl);
+}
+
+static void
+mlxsw_sp_span_entry_gretap6_deconfigure(struct mlxsw_sp_span_entry *span_entry)
+{
+       mlxsw_sp_span_entry_deconfigure_common(span_entry,
+                                       MLXSW_REG_MPAT_SPAN_TYPE_REMOTE_ETH_L3);
+}
+
+static const
+struct mlxsw_sp_span_entry_ops mlxsw_sp_span_entry_ops_gretap6 = {
+       .can_handle = is_ip6gretap_dev,
+       .parms = mlxsw_sp_span_entry_gretap6_parms,
+       .configure = mlxsw_sp_span_entry_gretap6_configure,
+       .deconfigure = mlxsw_sp_span_entry_gretap6_deconfigure,
+};
+
+static const
+struct mlxsw_sp_span_entry_ops *const mlxsw_sp_span_entry_types[] = {
+       &mlxsw_sp_span_entry_ops_phys,
+       &mlxsw_sp_span_entry_ops_gretap4,
+       &mlxsw_sp_span_entry_ops_gretap6,
+};
+
+static int
+mlxsw_sp_span_entry_nop_parms(const struct net_device *to_dev,
+                             struct mlxsw_sp_span_parms *sparmsp)
+{
+       return mlxsw_sp_span_entry_unoffloadable(sparmsp);
+}
+
+static int
+mlxsw_sp_span_entry_nop_configure(struct mlxsw_sp_span_entry *span_entry,
+                                 struct mlxsw_sp_span_parms sparms)
+{
+       return 0;
+}
+
+static void
+mlxsw_sp_span_entry_nop_deconfigure(struct mlxsw_sp_span_entry *span_entry)
+{
+}
+
+static const struct mlxsw_sp_span_entry_ops mlxsw_sp_span_entry_ops_nop = {
+       .parms = mlxsw_sp_span_entry_nop_parms,
+       .configure = mlxsw_sp_span_entry_nop_configure,
+       .deconfigure = mlxsw_sp_span_entry_nop_deconfigure,
+};
+
+static void
+mlxsw_sp_span_entry_configure(struct mlxsw_sp *mlxsw_sp,
+                             struct mlxsw_sp_span_entry *span_entry,
+                             struct mlxsw_sp_span_parms sparms)
+{
+       if (sparms.dest_port) {
+               if (sparms.dest_port->mlxsw_sp != mlxsw_sp) {
+                       netdev_err(span_entry->to_dev, "Cannot mirror to %s, which belongs to a different mlxsw instance",
+                                  sparms.dest_port->dev->name);
+                       sparms.dest_port = NULL;
+               } else if (span_entry->ops->configure(span_entry, sparms)) {
+                       netdev_err(span_entry->to_dev, "Failed to offload mirror to %s",
+                                  sparms.dest_port->dev->name);
+                       sparms.dest_port = NULL;
+               }
+       }
+
+       span_entry->parms = sparms;
+}
+
+static void
+mlxsw_sp_span_entry_deconfigure(struct mlxsw_sp_span_entry *span_entry)
+{
+       if (span_entry->parms.dest_port)
+               span_entry->ops->deconfigure(span_entry);
+}
+
+static struct mlxsw_sp_span_entry *
+mlxsw_sp_span_entry_create(struct mlxsw_sp *mlxsw_sp,
+                          const struct net_device *to_dev,
+                          const struct mlxsw_sp_span_entry_ops *ops,
+                          struct mlxsw_sp_span_parms sparms)
+{
+       struct mlxsw_sp_span_entry *span_entry = NULL;
        int i;
-       int err;
 
        /* find a free entry to use */
-       index = -1;
        for (i = 0; i < mlxsw_sp->span.entries_count; i++) {
                if (!mlxsw_sp->span.entries[i].ref_count) {
-                       index = i;
                        span_entry = &mlxsw_sp->span.entries[i];
                        break;
                }
        }
-       if (index < 0)
-               return NULL;
-
-       /* create a new port analayzer entry for local_port */
-       mlxsw_reg_mpat_pack(mpat_pl, index, local_port, true);
-       err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpat), mpat_pl);
-       if (err)
+       if (!span_entry)
                return NULL;
 
-       span_entry->id = index;
+       span_entry->ops = ops;
        span_entry->ref_count = 1;
-       span_entry->local_port = local_port;
+       span_entry->to_dev = to_dev;
+       mlxsw_sp_span_entry_configure(mlxsw_sp, span_entry, sparms);
+
        return span_entry;
 }
 
-static void mlxsw_sp_span_entry_destroy(struct mlxsw_sp *mlxsw_sp,
-                                       struct mlxsw_sp_span_entry *span_entry)
+static void mlxsw_sp_span_entry_destroy(struct mlxsw_sp_span_entry *span_entry)
 {
-       u8 local_port = span_entry->local_port;
-       char mpat_pl[MLXSW_REG_MPAT_LEN];
-       int pa_id = span_entry->id;
-
-       mlxsw_reg_mpat_pack(mpat_pl, pa_id, local_port, false);
-       mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpat), mpat_pl);
+       mlxsw_sp_span_entry_deconfigure(span_entry);
 }
 
 struct mlxsw_sp_span_entry *
-mlxsw_sp_span_entry_find(struct mlxsw_sp *mlxsw_sp, u8 local_port)
+mlxsw_sp_span_entry_find_by_port(struct mlxsw_sp *mlxsw_sp,
+                                const struct net_device *to_dev)
 {
        int i;
 
        for (i = 0; i < mlxsw_sp->span.entries_count; i++) {
                struct mlxsw_sp_span_entry *curr = &mlxsw_sp->span.entries[i];
 
-               if (curr->ref_count && curr->local_port == local_port)
+               if (curr->ref_count && curr->to_dev == to_dev)
                        return curr;
        }
        return NULL;
 }
 
+void mlxsw_sp_span_entry_invalidate(struct mlxsw_sp *mlxsw_sp,
+                                   struct mlxsw_sp_span_entry *span_entry)
+{
+       mlxsw_sp_span_entry_deconfigure(span_entry);
+       span_entry->ops = &mlxsw_sp_span_entry_ops_nop;
+}
+
 static struct mlxsw_sp_span_entry *
-mlxsw_sp_span_entry_get(struct mlxsw_sp_port *port)
+mlxsw_sp_span_entry_find_by_id(struct mlxsw_sp *mlxsw_sp, int span_id)
+{
+       int i;
+
+       for (i = 0; i < mlxsw_sp->span.entries_count; i++) {
+               struct mlxsw_sp_span_entry *curr = &mlxsw_sp->span.entries[i];
+
+               if (curr->ref_count && curr->id == span_id)
+                       return curr;
+       }
+       return NULL;
+}
+
+static struct mlxsw_sp_span_entry *
+mlxsw_sp_span_entry_get(struct mlxsw_sp *mlxsw_sp,
+                       const struct net_device *to_dev,
+                       const struct mlxsw_sp_span_entry_ops *ops,
+                       struct mlxsw_sp_span_parms sparms)
 {
        struct mlxsw_sp_span_entry *span_entry;
 
-       span_entry = mlxsw_sp_span_entry_find(port->mlxsw_sp,
-                                             port->local_port);
+       span_entry = mlxsw_sp_span_entry_find_by_port(mlxsw_sp, to_dev);
        if (span_entry) {
                /* Already exists, just take a reference */
                span_entry->ref_count++;
                return span_entry;
        }
 
-       return mlxsw_sp_span_entry_create(port);
+       return mlxsw_sp_span_entry_create(mlxsw_sp, to_dev, ops, sparms);
 }
 
 static int mlxsw_sp_span_entry_put(struct mlxsw_sp *mlxsw_sp,
@@ -150,7 +540,7 @@ static int mlxsw_sp_span_entry_put(struct mlxsw_sp *mlxsw_sp,
 {
        WARN_ON(!span_entry->ref_count);
        if (--span_entry->ref_count == 0)
-               mlxsw_sp_span_entry_destroy(mlxsw_sp, span_entry);
+               mlxsw_sp_span_entry_destroy(span_entry);
        return 0;
 }
 
@@ -312,15 +702,41 @@ mlxsw_sp_span_inspected_port_del(struct mlxsw_sp_port *port,
        kfree(inspected_port);
 }
 
+static const struct mlxsw_sp_span_entry_ops *
+mlxsw_sp_span_entry_ops(struct mlxsw_sp *mlxsw_sp,
+                       const struct net_device *to_dev)
+{
+       size_t i;
+
+       for (i = 0; i < ARRAY_SIZE(mlxsw_sp_span_entry_types); ++i)
+               if (mlxsw_sp_span_entry_types[i]->can_handle(to_dev))
+                       return mlxsw_sp_span_entry_types[i];
+
+       return NULL;
+}
+
 int mlxsw_sp_span_mirror_add(struct mlxsw_sp_port *from,
-                            struct mlxsw_sp_port *to,
-                            enum mlxsw_sp_span_type type, bool bind)
+                            const struct net_device *to_dev,
+                            enum mlxsw_sp_span_type type, bool bind,
+                            int *p_span_id)
 {
        struct mlxsw_sp *mlxsw_sp = from->mlxsw_sp;
+       const struct mlxsw_sp_span_entry_ops *ops;
+       struct mlxsw_sp_span_parms sparms = {0};
        struct mlxsw_sp_span_entry *span_entry;
        int err;
 
-       span_entry = mlxsw_sp_span_entry_get(to);
+       ops = mlxsw_sp_span_entry_ops(mlxsw_sp, to_dev);
+       if (!ops) {
+               netdev_err(to_dev, "Cannot mirror to %s", to_dev->name);
+               return -EOPNOTSUPP;
+       }
+
+       err = ops->parms(to_dev, &sparms);
+       if (err)
+               return err;
+
+       span_entry = mlxsw_sp_span_entry_get(mlxsw_sp, to_dev, ops, sparms);
        if (!span_entry)
                return -ENOENT;
 
@@ -331,6 +747,7 @@ int mlxsw_sp_span_mirror_add(struct mlxsw_sp_port *from,
        if (err)
                goto err_port_bind;
 
+       *p_span_id = span_entry->id;
        return 0;
 
 err_port_bind:
@@ -338,13 +755,12 @@ err_port_bind:
        return err;
 }
 
-void mlxsw_sp_span_mirror_del(struct mlxsw_sp_port *from, u8 destination_port,
+void mlxsw_sp_span_mirror_del(struct mlxsw_sp_port *from, int span_id,
                              enum mlxsw_sp_span_type type, bool bind)
 {
        struct mlxsw_sp_span_entry *span_entry;
 
-       span_entry = mlxsw_sp_span_entry_find(from->mlxsw_sp,
-                                             destination_port);
+       span_entry = mlxsw_sp_span_entry_find_by_id(from->mlxsw_sp, span_id);
        if (!span_entry) {
                netdev_err(from->dev, "no span entry found\n");
                return;
@@ -354,3 +770,27 @@ void mlxsw_sp_span_mirror_del(struct mlxsw_sp_port *from, u8 destination_port,
                   span_entry->id);
        mlxsw_sp_span_inspected_port_del(from, span_entry, type, bind);
 }
+
+void mlxsw_sp_span_respin(struct mlxsw_sp *mlxsw_sp)
+{
+       int i;
+       int err;
+
+       ASSERT_RTNL();
+       for (i = 0; i < mlxsw_sp->span.entries_count; i++) {
+               struct mlxsw_sp_span_entry *curr = &mlxsw_sp->span.entries[i];
+               struct mlxsw_sp_span_parms sparms = {0};
+
+               if (!curr->ref_count)
+                       continue;
+
+               err = curr->ops->parms(curr->to_dev, &sparms);
+               if (err)
+                       continue;
+
+               if (memcmp(&sparms, &curr->parms, sizeof(sparms))) {
+                       mlxsw_sp_span_entry_deconfigure(curr);
+                       mlxsw_sp_span_entry_configure(mlxsw_sp, curr, sparms);
+               }
+       }
+}
index 069050e..948aceb 100644 (file)
@@ -35,6 +35,9 @@
 #define _MLXSW_SPECTRUM_SPAN_H
 
 #include <linux/types.h>
+#include <linux/if_ether.h>
+
+#include "spectrum_router.h"
 
 struct mlxsw_sp;
 struct mlxsw_sp_port;
@@ -50,23 +53,51 @@ struct mlxsw_sp_span_inspected_port {
        u8 local_port;
 };
 
+struct mlxsw_sp_span_parms {
+       struct mlxsw_sp_port *dest_port; /* NULL for unoffloaded SPAN. */
+       unsigned int ttl;
+       unsigned char dmac[ETH_ALEN];
+       unsigned char smac[ETH_ALEN];
+       union mlxsw_sp_l3addr daddr;
+       union mlxsw_sp_l3addr saddr;
+};
+
+struct mlxsw_sp_span_entry_ops;
+
 struct mlxsw_sp_span_entry {
-       u8 local_port;
+       const struct net_device *to_dev;
+       const struct mlxsw_sp_span_entry_ops *ops;
+       struct mlxsw_sp_span_parms parms;
        struct list_head bound_ports_list;
        int ref_count;
        int id;
 };
 
+struct mlxsw_sp_span_entry_ops {
+       bool (*can_handle)(const struct net_device *to_dev);
+       int (*parms)(const struct net_device *to_dev,
+                    struct mlxsw_sp_span_parms *sparmsp);
+       int (*configure)(struct mlxsw_sp_span_entry *span_entry,
+                        struct mlxsw_sp_span_parms sparms);
+       void (*deconfigure)(struct mlxsw_sp_span_entry *span_entry);
+};
+
 int mlxsw_sp_span_init(struct mlxsw_sp *mlxsw_sp);
 void mlxsw_sp_span_fini(struct mlxsw_sp *mlxsw_sp);
+void mlxsw_sp_span_respin(struct mlxsw_sp *mlxsw_sp);
 
 int mlxsw_sp_span_mirror_add(struct mlxsw_sp_port *from,
-                            struct mlxsw_sp_port *to,
-                            enum mlxsw_sp_span_type type, bool bind);
-void mlxsw_sp_span_mirror_del(struct mlxsw_sp_port *from, u8 destination_port,
+                            const struct net_device *to_dev,
+                            enum mlxsw_sp_span_type type,
+                            bool bind, int *p_span_id);
+void mlxsw_sp_span_mirror_del(struct mlxsw_sp_port *from, int span_id,
                              enum mlxsw_sp_span_type type, bool bind);
 struct mlxsw_sp_span_entry *
-mlxsw_sp_span_entry_find(struct mlxsw_sp *mlxsw_sp, u8 local_port);
+mlxsw_sp_span_entry_find_by_port(struct mlxsw_sp *mlxsw_sp,
+                                const struct net_device *to_dev);
+
+void mlxsw_sp_span_entry_invalidate(struct mlxsw_sp *mlxsw_sp,
+                                   struct mlxsw_sp_span_entry *span_entry);
 
 int mlxsw_sp_span_port_mtu_update(struct mlxsw_sp_port *port, u16 mtu);
 
index f9f53af..917663a 100644 (file)
@@ -1882,14 +1882,10 @@ mlxsw_sp_bridge_8021d_port_join(struct mlxsw_sp_bridge_device *bridge_device,
                                struct netlink_ext_ack *extack)
 {
        struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan;
+       struct net_device *dev = bridge_port->dev;
        u16 vid;
 
-       if (!is_vlan_dev(bridge_port->dev)) {
-               NL_SET_ERR_MSG_MOD(extack, "Only VLAN devices can be enslaved to a VLAN-unaware bridge");
-               return -EINVAL;
-       }
-       vid = vlan_dev_vlan_id(bridge_port->dev);
-
+       vid = is_vlan_dev(dev) ? vlan_dev_vlan_id(dev) : 1;
        mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_vid(mlxsw_sp_port, vid);
        if (WARN_ON(!mlxsw_sp_port_vlan))
                return -EINVAL;
@@ -1912,8 +1908,10 @@ mlxsw_sp_bridge_8021d_port_leave(struct mlxsw_sp_bridge_device *bridge_device,
                                 struct mlxsw_sp_port *mlxsw_sp_port)
 {
        struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan;
-       u16 vid = vlan_dev_vlan_id(bridge_port->dev);
+       struct net_device *dev = bridge_port->dev;
+       u16 vid;
 
+       vid = is_vlan_dev(dev) ? vlan_dev_vlan_id(dev) : 1;
        mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_vid(mlxsw_sp_port, vid);
        if (WARN_ON(!mlxsw_sp_port_vlan))
                return;
index d5b2888..51fa82b 100644 (file)
@@ -60,14 +60,6 @@ do {                                                                 \
        *((volatile unsigned int *)dev->base_addr+(reg)) = (val);               \
 } while (0)
 
-
-/* use 0 for production, 1 for verification, >1 for debug */
-#ifdef SONIC_DEBUG
-static unsigned int sonic_debug = SONIC_DEBUG;
-#else
-static unsigned int sonic_debug = 1;
-#endif
-
 /*
  * We cannot use station (ethernet) address prefixes to detect the
  * sonic controller since these are board manufacturer depended.
@@ -117,7 +109,6 @@ static const struct net_device_ops sonic_netdev_ops = {
 
 static int sonic_probe1(struct net_device *dev)
 {
-       static unsigned version_printed;
        unsigned int silicon_revision;
        unsigned int val;
        struct sonic_local *lp = netdev_priv(dev);
@@ -133,26 +124,17 @@ static int sonic_probe1(struct net_device *dev)
         * the expected location.
         */
        silicon_revision = SONIC_READ(SONIC_SR);
-       if (sonic_debug > 1)
-               printk("SONIC Silicon Revision = 0x%04x\n",silicon_revision);
-
        i = 0;
        while (known_revisions[i] != 0xffff &&
               known_revisions[i] != silicon_revision)
                i++;
 
        if (known_revisions[i] == 0xffff) {
-               printk("SONIC ethernet controller not found (0x%4x)\n",
-                      silicon_revision);
+               pr_info("SONIC ethernet controller not found (0x%4x)\n",
+                       silicon_revision);
                goto out;
        }
 
-       if (sonic_debug  &&  version_printed++ == 0)
-               printk(version);
-
-       printk(KERN_INFO "%s: Sonic ethernet found at 0x%08lx, ",
-              dev_name(lp->device), dev->base_addr);
-
        /*
         * Put the sonic into software reset, then
         * retrieve and print the ethernet address.
@@ -245,12 +227,16 @@ static int jazz_sonic_probe(struct platform_device *pdev)
        err = sonic_probe1(dev);
        if (err)
                goto out;
+
+       pr_info("SONIC ethernet @%08lx, MAC %pM, IRQ %d\n",
+               dev->base_addr, dev->dev_addr, dev->irq);
+
+       sonic_msg_init(dev);
+
        err = register_netdev(dev);
        if (err)
                goto out1;
 
-       printk("%s: MAC %pM IRQ %d\n", dev->name, dev->dev_addr, dev->irq);
-
        return 0;
 
 out1:
@@ -262,8 +248,6 @@ out:
 }
 
 MODULE_DESCRIPTION("Jazz SONIC ethernet driver");
-module_param(sonic_debug, int, 0);
-MODULE_PARM_DESC(sonic_debug, "jazzsonic debug level (1-4)");
 MODULE_ALIAS("platform:jazzsonic");
 
 #include "sonic.c"
index b922ab5..0937fc2 100644 (file)
@@ -60,8 +60,6 @@
 #include <asm/macints.h>
 #include <asm/mac_via.h>
 
-static char mac_sonic_string[] = "macsonic";
-
 #include "sonic.h"
 
 /* These should basically be bus-size and endian independent (since
@@ -72,15 +70,6 @@ static char mac_sonic_string[] = "macsonic";
 #define SONIC_WRITE(reg,val) (nubus_writew(val, dev->base_addr + (reg * 4) \
              + lp->reg_offset))
 
-/* use 0 for production, 1 for verification, >1 for debug */
-#ifdef SONIC_DEBUG
-static unsigned int sonic_debug = SONIC_DEBUG;
-#else
-static unsigned int sonic_debug = 1;
-#endif
-
-static int sonic_version_printed;
-
 /* For onboard SONIC */
 #define ONBOARD_SONIC_REGISTERS        0x50F0A000
 #define ONBOARD_SONIC_PROM_BASE        0x50f08000
@@ -313,11 +302,6 @@ static int mac_onboard_sonic_probe(struct net_device *dev)
        int sr;
        bool commslot = macintosh_config->expansion_type == MAC_EXP_PDS_COMM;
 
-       if (!MACH_IS_MAC)
-               return -ENODEV;
-
-       printk(KERN_INFO "Checking for internal Macintosh ethernet (SONIC).. ");
-
        /* Bogus probing, on the models which may or may not have
           Ethernet (BTW, the Ethernet *is* always at the same
           address, and nothing else lives there, at least if Apple's
@@ -327,13 +311,11 @@ static int mac_onboard_sonic_probe(struct net_device *dev)
 
                card_present = hwreg_present((void*)ONBOARD_SONIC_REGISTERS);
                if (!card_present) {
-                       printk("none.\n");
+                       pr_info("Onboard/comm-slot SONIC not found\n");
                        return -ENODEV;
                }
        }
 
-       printk("yes\n");
-
        /* Danger!  My arms are flailing wildly!  You *must* set lp->reg_offset
         * and dev->base_addr before using SONIC_READ() or SONIC_WRITE() */
        dev->base_addr = ONBOARD_SONIC_REGISTERS;
@@ -342,18 +324,10 @@ static int mac_onboard_sonic_probe(struct net_device *dev)
        else
                dev->irq = IRQ_NUBUS_9;
 
-       if (!sonic_version_printed) {
-               printk(KERN_INFO "%s", version);
-               sonic_version_printed = 1;
-       }
-       printk(KERN_INFO "%s: onboard / comm-slot SONIC at 0x%08lx\n",
-              dev_name(lp->device), dev->base_addr);
-
        /* The PowerBook's SONIC is 16 bit always. */
        if (macintosh_config->ident == MAC_MODEL_PB520) {
                lp->reg_offset = 0;
                lp->dma_bitmode = SONIC_BITMODE16;
-               sr = SONIC_READ(SONIC_SR);
        } else if (commslot) {
                /* Some of the comm-slot cards are 16 bit.  But some
                   of them are not.  The 32-bit cards use offset 2 and
@@ -370,22 +344,21 @@ static int mac_onboard_sonic_probe(struct net_device *dev)
                else {
                        lp->dma_bitmode = SONIC_BITMODE16;
                        lp->reg_offset = 0;
-                       sr = SONIC_READ(SONIC_SR);
                }
        } else {
                /* All onboard cards are at offset 2 with 32 bit DMA. */
                lp->reg_offset = 2;
                lp->dma_bitmode = SONIC_BITMODE32;
-               sr = SONIC_READ(SONIC_SR);
        }
-       printk(KERN_INFO
-              "%s: revision 0x%04x, using %d bit DMA and register offset %d\n",
-              dev_name(lp->device), sr, lp->dma_bitmode?32:16, lp->reg_offset);
 
-#if 0 /* This is sometimes useful to find out how MacOS configured the card. */
-       printk(KERN_INFO "%s: DCR: 0x%04x, DCR2: 0x%04x\n", dev_name(lp->device),
-              SONIC_READ(SONIC_DCR) & 0xffff, SONIC_READ(SONIC_DCR2) & 0xffff);
-#endif
+       pr_info("Onboard/comm-slot SONIC, revision 0x%04x, %d bit DMA, register offset %d\n",
+               SONIC_READ(SONIC_SR), lp->dma_bitmode ? 32 : 16,
+               lp->reg_offset);
+
+       /* This is sometimes useful to find out how MacOS configured the card */
+       pr_debug("%s: DCR=0x%04x, DCR2=0x%04x\n", __func__,
+                SONIC_READ(SONIC_DCR) & 0xffff,
+                SONIC_READ(SONIC_DCR2) & 0xffff);
 
        /* Software reset, then initialize control registers. */
        SONIC_WRITE(SONIC_CMD, SONIC_CR_RST);
@@ -406,11 +379,14 @@ static int mac_onboard_sonic_probe(struct net_device *dev)
        /* Now look for the MAC address. */
        mac_onboard_sonic_ethernet_addr(dev);
 
+       pr_info("SONIC ethernet @%08lx, MAC %pM, IRQ %d\n",
+               dev->base_addr, dev->dev_addr, dev->irq);
+
        /* Shared init code */
        return macsonic_init(dev);
 }
 
-static int mac_nubus_sonic_ethernet_addr(struct net_device *dev,
+static int mac_sonic_nubus_ethernet_addr(struct net_device *dev,
                                         unsigned long prom_addr, int id)
 {
        int i;
@@ -449,70 +425,49 @@ static int macsonic_ident(struct nubus_rsrc *fres)
        return -1;
 }
 
-static int mac_nubus_sonic_probe(struct net_device *dev)
+static int mac_sonic_nubus_probe_board(struct nubus_board *board, int id,
+                                      struct net_device *dev)
 {
-       static int slots;
-       struct nubus_rsrc *ndev = NULL;
        struct sonic_local* lp = netdev_priv(dev);
        unsigned long base_addr, prom_addr;
        u16 sonic_dcr;
-       int id = -1;
        int reg_offset, dma_bitmode;
 
-       /* Find the first SONIC that hasn't been initialized already */
-       for_each_func_rsrc(ndev) {
-               if (ndev->category != NUBUS_CAT_NETWORK ||
-                   ndev->type != NUBUS_TYPE_ETHERNET)
-                       continue;
-
-               /* Have we seen it already? */
-               if (slots & (1<<ndev->board->slot))
-                       continue;
-               slots |= 1<<ndev->board->slot;
-
-               /* Is it one of ours? */
-               if ((id = macsonic_ident(ndev)) != -1)
-                       break;
-       }
-
-       if (ndev == NULL)
-               return -ENODEV;
-
        switch (id) {
        case MACSONIC_DUODOCK:
-               base_addr = ndev->board->slot_addr + DUODOCK_SONIC_REGISTERS;
-               prom_addr = ndev->board->slot_addr + DUODOCK_SONIC_PROM_BASE;
+               base_addr = board->slot_addr + DUODOCK_SONIC_REGISTERS;
+               prom_addr = board->slot_addr + DUODOCK_SONIC_PROM_BASE;
                sonic_dcr = SONIC_DCR_EXBUS | SONIC_DCR_RFT0 | SONIC_DCR_RFT1 |
                            SONIC_DCR_TFT0;
                reg_offset = 2;
                dma_bitmode = SONIC_BITMODE32;
                break;
        case MACSONIC_APPLE:
-               base_addr = ndev->board->slot_addr + APPLE_SONIC_REGISTERS;
-               prom_addr = ndev->board->slot_addr + APPLE_SONIC_PROM_BASE;
+               base_addr = board->slot_addr + APPLE_SONIC_REGISTERS;
+               prom_addr = board->slot_addr + APPLE_SONIC_PROM_BASE;
                sonic_dcr = SONIC_DCR_BMS | SONIC_DCR_RFT1 | SONIC_DCR_TFT0;
                reg_offset = 0;
                dma_bitmode = SONIC_BITMODE32;
                break;
        case MACSONIC_APPLE16:
-               base_addr = ndev->board->slot_addr + APPLE_SONIC_REGISTERS;
-               prom_addr = ndev->board->slot_addr + APPLE_SONIC_PROM_BASE;
+               base_addr = board->slot_addr + APPLE_SONIC_REGISTERS;
+               prom_addr = board->slot_addr + APPLE_SONIC_PROM_BASE;
                sonic_dcr = SONIC_DCR_EXBUS | SONIC_DCR_RFT1 | SONIC_DCR_TFT0 |
                            SONIC_DCR_PO1 | SONIC_DCR_BMS;
                reg_offset = 0;
                dma_bitmode = SONIC_BITMODE16;
                break;
        case MACSONIC_DAYNALINK:
-               base_addr = ndev->board->slot_addr + APPLE_SONIC_REGISTERS;
-               prom_addr = ndev->board->slot_addr + DAYNALINK_PROM_BASE;
+               base_addr = board->slot_addr + APPLE_SONIC_REGISTERS;
+               prom_addr = board->slot_addr + DAYNALINK_PROM_BASE;
                sonic_dcr = SONIC_DCR_RFT1 | SONIC_DCR_TFT0 |
                            SONIC_DCR_PO1 | SONIC_DCR_BMS;
                reg_offset = 0;
                dma_bitmode = SONIC_BITMODE16;
                break;
        case MACSONIC_DAYNA:
-               base_addr = ndev->board->slot_addr + DAYNA_SONIC_REGISTERS;
-               prom_addr = ndev->board->slot_addr + DAYNA_SONIC_MAC_ADDR;
+               base_addr = board->slot_addr + DAYNA_SONIC_REGISTERS;
+               prom_addr = board->slot_addr + DAYNA_SONIC_MAC_ADDR;
                sonic_dcr = SONIC_DCR_BMS |
                            SONIC_DCR_RFT1 | SONIC_DCR_TFT0 | SONIC_DCR_PO1;
                reg_offset = 0;
@@ -528,21 +483,16 @@ static int mac_nubus_sonic_probe(struct net_device *dev)
        dev->base_addr = base_addr;
        lp->reg_offset = reg_offset;
        lp->dma_bitmode = dma_bitmode;
-       dev->irq = SLOT2IRQ(ndev->board->slot);
+       dev->irq = SLOT2IRQ(board->slot);
 
-       if (!sonic_version_printed) {
-               printk(KERN_INFO "%s", version);
-               sonic_version_printed = 1;
-       }
-       printk(KERN_INFO "%s: %s in slot %X\n",
-              dev_name(lp->device), ndev->board->name, ndev->board->slot);
-       printk(KERN_INFO "%s: revision 0x%04x, using %d bit DMA and register offset %d\n",
-              dev_name(lp->device), SONIC_READ(SONIC_SR), dma_bitmode?32:16, reg_offset);
+       dev_info(&board->dev, "%s, revision 0x%04x, %d bit DMA, register offset %d\n",
+                board->name, SONIC_READ(SONIC_SR),
+                lp->dma_bitmode ? 32 : 16, lp->reg_offset);
 
-#if 0 /* This is sometimes useful to find out how MacOS configured the card. */
-       printk(KERN_INFO "%s: DCR: 0x%04x, DCR2: 0x%04x\n", dev_name(lp->device),
-              SONIC_READ(SONIC_DCR) & 0xffff, SONIC_READ(SONIC_DCR2) & 0xffff);
-#endif
+       /* This is sometimes useful to find out how MacOS configured the card */
+       dev_dbg(&board->dev, "%s: DCR=0x%04x, DCR2=0x%04x\n", __func__,
+               SONIC_READ(SONIC_DCR) & 0xffff,
+               SONIC_READ(SONIC_DCR2) & 0xffff);
 
        /* Software reset, then initialize control registers. */
        SONIC_WRITE(SONIC_CMD, SONIC_CR_RST);
@@ -557,14 +507,17 @@ static int mac_nubus_sonic_probe(struct net_device *dev)
        SONIC_WRITE(SONIC_ISR, 0x7fff);
 
        /* Now look for the MAC address. */
-       if (mac_nubus_sonic_ethernet_addr(dev, prom_addr, id) != 0)
+       if (mac_sonic_nubus_ethernet_addr(dev, prom_addr, id) != 0)
                return -ENODEV;
 
+       dev_info(&board->dev, "SONIC ethernet @%08lx, MAC %pM, IRQ %d\n",
+                dev->base_addr, dev->dev_addr, dev->irq);
+
        /* Shared init code */
        return macsonic_init(dev);
 }
 
-static int mac_sonic_probe(struct platform_device *pdev)
+static int mac_sonic_platform_probe(struct platform_device *pdev)
 {
        struct net_device *dev;
        struct sonic_local *lp;
@@ -579,22 +532,16 @@ static int mac_sonic_probe(struct platform_device *pdev)
        SET_NETDEV_DEV(dev, &pdev->dev);
        platform_set_drvdata(pdev, dev);
 
-       /* This will catch fatal stuff like -ENOMEM as well as success */
        err = mac_onboard_sonic_probe(dev);
-       if (err == 0)
-               goto found;
-       if (err != -ENODEV)
-               goto out;
-       err = mac_nubus_sonic_probe(dev);
        if (err)
                goto out;
-found:
+
+       sonic_msg_init(dev);
+
        err = register_netdev(dev);
        if (err)
                goto out;
 
-       printk("%s: MAC %pM IRQ %d\n", dev->name, dev->dev_addr, dev->irq);
-
        return 0;
 
 out:
@@ -604,13 +551,11 @@ out:
 }
 
 MODULE_DESCRIPTION("Macintosh SONIC ethernet driver");
-module_param(sonic_debug, int, 0);
-MODULE_PARM_DESC(sonic_debug, "macsonic debug level (1-4)");
 MODULE_ALIAS("platform:macsonic");
 
 #include "sonic.c"
 
-static int mac_sonic_device_remove(struct platform_device *pdev)
+static int mac_sonic_platform_remove(struct platform_device *pdev)
 {
        struct net_device *dev = platform_get_drvdata(pdev);
        struct sonic_local* lp = netdev_priv(dev);
@@ -623,12 +568,105 @@ static int mac_sonic_device_remove(struct platform_device *pdev)
        return 0;
 }
 
-static struct platform_driver mac_sonic_driver = {
-       .probe  = mac_sonic_probe,
-       .remove = mac_sonic_device_remove,
-       .driver = {
-               .name   = mac_sonic_string,
+static struct platform_driver mac_sonic_platform_driver = {
+       .probe  = mac_sonic_platform_probe,
+       .remove = mac_sonic_platform_remove,
+       .driver = {
+               .name = "macsonic",
+       },
+};
+
+static int mac_sonic_nubus_probe(struct nubus_board *board)
+{
+       struct net_device *ndev;
+       struct sonic_local *lp;
+       struct nubus_rsrc *fres;
+       int id = -1;
+       int err;
+
+       /* The platform driver will handle a PDS or Comm Slot card (even if
+        * it has a pseudoslot declaration ROM).
+        */
+       if (macintosh_config->expansion_type == MAC_EXP_PDS_COMM)
+               return -ENODEV;
+
+       for_each_board_func_rsrc(board, fres) {
+               if (fres->category != NUBUS_CAT_NETWORK ||
+                   fres->type != NUBUS_TYPE_ETHERNET)
+                       continue;
+
+               id = macsonic_ident(fres);
+               if (id != -1)
+                       break;
+       }
+       if (!fres)
+               return -ENODEV;
+
+       ndev = alloc_etherdev(sizeof(struct sonic_local));
+       if (!ndev)
+               return -ENOMEM;
+
+       lp = netdev_priv(ndev);
+       lp->device = &board->dev;
+       SET_NETDEV_DEV(ndev, &board->dev);
+
+       err = mac_sonic_nubus_probe_board(board, id, ndev);
+       if (err)
+               goto out;
+
+       sonic_msg_init(ndev);
+
+       err = register_netdev(ndev);
+       if (err)
+               goto out;
+
+       nubus_set_drvdata(board, ndev);
+
+       return 0;
+
+out:
+       free_netdev(ndev);
+       return err;
+}
+
+static int mac_sonic_nubus_remove(struct nubus_board *board)
+{
+       struct net_device *ndev = nubus_get_drvdata(board);
+       struct sonic_local *lp = netdev_priv(ndev);
+
+       unregister_netdev(ndev);
+       dma_free_coherent(lp->device,
+                         SIZEOF_SONIC_DESC * SONIC_BUS_SCALE(lp->dma_bitmode),
+                         lp->descriptors, lp->descriptors_laddr);
+       free_netdev(ndev);
+
+       return 0;
+}
+
+static struct nubus_driver mac_sonic_nubus_driver = {
+       .probe  = mac_sonic_nubus_probe,
+       .remove = mac_sonic_nubus_remove,
+       .driver = {
+               .name = "macsonic-nubus",
+               .owner = THIS_MODULE,
        },
 };
 
-module_platform_driver(mac_sonic_driver);
+static int perr, nerr;
+
+static int __init mac_sonic_init(void)
+{
+       perr = platform_driver_register(&mac_sonic_platform_driver);
+       nerr = nubus_driver_register(&mac_sonic_nubus_driver);
+       return 0;
+}
+module_init(mac_sonic_init);
+
+static void __exit mac_sonic_exit(void)
+{
+       if (!perr)
+               platform_driver_unregister(&mac_sonic_platform_driver);
+       if (!nerr)
+               nubus_driver_unregister(&mac_sonic_nubus_driver);
+}
+module_exit(mac_sonic_exit);
index 612c7a4..7ed0848 100644 (file)
  * the NetBSD file "sys/arch/mac68k/dev/if_sn.c".
  */
 
+static unsigned int version_printed;
 
+static int sonic_debug = -1;
+module_param(sonic_debug, int, 0);
+MODULE_PARM_DESC(sonic_debug, "debug message level");
+
+static void sonic_msg_init(struct net_device *dev)
+{
+       struct sonic_local *lp = netdev_priv(dev);
+
+       lp->msg_enable = netif_msg_init(sonic_debug, 0);
+
+       if (version_printed++ == 0)
+               netif_dbg(lp, drv, dev, "%s", version);
+}
 
 /*
  * Open/initialize the SONIC controller.
@@ -47,8 +61,7 @@ static int sonic_open(struct net_device *dev)
        struct sonic_local *lp = netdev_priv(dev);
        int i;
 
-       if (sonic_debug > 2)
-               printk("sonic_open: initializing sonic driver.\n");
+       netif_dbg(lp, ifup, dev, "%s: initializing sonic driver\n", __func__);
 
        for (i = 0; i < SONIC_NUM_RRS; i++) {
                struct sk_buff *skb = netdev_alloc_skb(dev, SONIC_RBSIZE + 2);
@@ -95,8 +108,7 @@ static int sonic_open(struct net_device *dev)
 
        netif_start_queue(dev);
 
-       if (sonic_debug > 2)
-               printk("sonic_open: Initialization done.\n");
+       netif_dbg(lp, ifup, dev, "%s: Initialization done\n", __func__);
 
        return 0;
 }
@@ -110,8 +122,7 @@ static int sonic_close(struct net_device *dev)
        struct sonic_local *lp = netdev_priv(dev);
        int i;
 
-       if (sonic_debug > 2)
-               printk("sonic_close\n");
+       netif_dbg(lp, ifdown, dev, "%s\n", __func__);
 
        netif_stop_queue(dev);
 
@@ -205,8 +216,7 @@ static int sonic_send_packet(struct sk_buff *skb, struct net_device *dev)
        int length;
        int entry = lp->next_tx;
 
-       if (sonic_debug > 2)
-               printk("sonic_send_packet: skb=%p, dev=%p\n", skb, dev);
+       netif_dbg(lp, tx_queued, dev, "%s: skb=%p\n", __func__, skb);
 
        length = skb->len;
        if (length < ETH_ZLEN) {
@@ -252,14 +262,12 @@ static int sonic_send_packet(struct sk_buff *skb, struct net_device *dev)
        lp->next_tx = (entry + 1) & SONIC_TDS_MASK;
        if (lp->tx_skb[lp->next_tx] != NULL) {
                /* The ring is full, the ISR has yet to process the next TD. */
-               if (sonic_debug > 3)
-                       printk("%s: stopping queue\n", dev->name);
+               netif_dbg(lp, tx_queued, dev, "%s: stopping queue\n", __func__);
                netif_stop_queue(dev);
                /* after this packet, wait for ISR to free up some TDAs */
        } else netif_start_queue(dev);
 
-       if (sonic_debug > 2)
-               printk("sonic_send_packet: issuing Tx command\n");
+       netif_dbg(lp, tx_queued, dev, "%s: issuing Tx command\n", __func__);
 
        SONIC_WRITE(SONIC_CMD, SONIC_CR_TXP);
 
@@ -281,8 +289,7 @@ static irqreturn_t sonic_interrupt(int irq, void *dev_id)
 
        do {
                if (status & SONIC_INT_PKTRX) {
-                       if (sonic_debug > 2)
-                               printk("%s: packet rx\n", dev->name);
+                       netif_dbg(lp, intr, dev, "%s: packet rx\n", __func__);
                        sonic_rx(dev);  /* got packet(s) */
                        SONIC_WRITE(SONIC_ISR, SONIC_INT_PKTRX); /* clear the interrupt */
                }
@@ -299,8 +306,7 @@ static irqreturn_t sonic_interrupt(int irq, void *dev_id)
                         *   still being allocated by sonic_send_packet (status clear & tx_skb[entry] clear)
                         */
 
-                       if (sonic_debug > 2)
-                               printk("%s: tx done\n", dev->name);
+                       netif_dbg(lp, intr, dev, "%s: tx done\n", __func__);
 
                        while (lp->tx_skb[entry] != NULL) {
                                if ((td_status = sonic_tda_get(dev, entry, SONIC_TD_STATUS)) == 0)
@@ -346,20 +352,20 @@ static irqreturn_t sonic_interrupt(int irq, void *dev_id)
                 * check error conditions
                 */
                if (status & SONIC_INT_RFO) {
-                       if (sonic_debug > 1)
-                               printk("%s: rx fifo overrun\n", dev->name);
+                       netif_dbg(lp, rx_err, dev, "%s: rx fifo overrun\n",
+                                 __func__);
                        lp->stats.rx_fifo_errors++;
                        SONIC_WRITE(SONIC_ISR, SONIC_INT_RFO); /* clear the interrupt */
                }
                if (status & SONIC_INT_RDE) {
-                       if (sonic_debug > 1)
-                               printk("%s: rx descriptors exhausted\n", dev->name);
+                       netif_dbg(lp, rx_err, dev, "%s: rx descriptors exhausted\n",
+                                 __func__);
                        lp->stats.rx_dropped++;
                        SONIC_WRITE(SONIC_ISR, SONIC_INT_RDE); /* clear the interrupt */
                }
                if (status & SONIC_INT_RBAE) {
-                       if (sonic_debug > 1)
-                               printk("%s: rx buffer area exceeded\n", dev->name);
+                       netif_dbg(lp, rx_err, dev, "%s: rx buffer area exceeded\n",
+                                 __func__);
                        lp->stats.rx_dropped++;
                        SONIC_WRITE(SONIC_ISR, SONIC_INT_RBAE); /* clear the interrupt */
                }
@@ -380,8 +386,9 @@ static irqreturn_t sonic_interrupt(int irq, void *dev_id)
 
                /* transmit error */
                if (status & SONIC_INT_TXER) {
-                       if ((SONIC_READ(SONIC_TCR) & SONIC_TCR_FU) && (sonic_debug > 2))
-                               printk(KERN_ERR "%s: tx fifo underrun\n", dev->name);
+                       if (SONIC_READ(SONIC_TCR) & SONIC_TCR_FU)
+                               netif_dbg(lp, tx_err, dev, "%s: tx fifo underrun\n",
+                                         __func__);
                        SONIC_WRITE(SONIC_ISR, SONIC_INT_TXER); /* clear the interrupt */
                }
 
@@ -475,8 +482,8 @@ static void sonic_rx(struct net_device *dev)
                        if (lp->cur_rwp >= lp->rra_end) lp->cur_rwp = lp->rra_laddr & 0xffff;
                        SONIC_WRITE(SONIC_RWP, lp->cur_rwp);
                        if (SONIC_READ(SONIC_ISR) & SONIC_INT_RBE) {
-                               if (sonic_debug > 2)
-                                       printk("%s: rx buffer exhausted\n", dev->name);
+                               netif_dbg(lp, rx_err, dev, "%s: rx buffer exhausted\n",
+                                         __func__);
                                SONIC_WRITE(SONIC_ISR, SONIC_INT_RBE); /* clear the flag */
                        }
                } else
@@ -542,9 +549,8 @@ static void sonic_multicast_list(struct net_device *dev)
                    (netdev_mc_count(dev) > 15)) {
                        rcr |= SONIC_RCR_AMC;
                } else {
-                       if (sonic_debug > 2)
-                               printk("sonic_multicast_list: mc_count %d\n",
-                                      netdev_mc_count(dev));
+                       netif_dbg(lp, ifup, dev, "%s: mc_count %d\n", __func__,
+                                 netdev_mc_count(dev));
                        sonic_set_cam_enable(dev, 1);  /* always enable our own address */
                        i = 1;
                        netdev_for_each_mc_addr(ha, dev) {
@@ -562,8 +568,7 @@ static void sonic_multicast_list(struct net_device *dev)
                }
        }
 
-       if (sonic_debug > 2)
-               printk("sonic_multicast_list: setting RCR=%x\n", rcr);
+       netif_dbg(lp, ifup, dev, "%s: setting RCR=%x\n", __func__, rcr);
 
        SONIC_WRITE(SONIC_RCR, rcr);
 }
@@ -596,8 +601,8 @@ static int sonic_init(struct net_device *dev)
        /*
         * initialize the receive resource area
         */
-       if (sonic_debug > 2)
-               printk("sonic_init: initialize receive resource area\n");
+       netif_dbg(lp, ifup, dev, "%s: initialize receive resource area\n",
+                 __func__);
 
        for (i = 0; i < SONIC_NUM_RRS; i++) {
                u16 bufadr_l = (unsigned long)lp->rx_laddr[i] & 0xffff;
@@ -622,8 +627,7 @@ static int sonic_init(struct net_device *dev)
        SONIC_WRITE(SONIC_EOBC, (SONIC_RBSIZE >> 1) - (lp->dma_bitmode ? 2 : 1));
 
        /* load the resource pointers */
-       if (sonic_debug > 3)
-               printk("sonic_init: issuing RRRA command\n");
+       netif_dbg(lp, ifup, dev, "%s: issuing RRRA command\n", __func__);
 
        SONIC_WRITE(SONIC_CMD, SONIC_CR_RRRA);
        i = 0;
@@ -632,16 +636,17 @@ static int sonic_init(struct net_device *dev)
                        break;
        }
 
-       if (sonic_debug > 2)
-               printk("sonic_init: status=%x i=%d\n", SONIC_READ(SONIC_CMD), i);
+       netif_dbg(lp, ifup, dev, "%s: status=%x, i=%d\n", __func__,
+                 SONIC_READ(SONIC_CMD), i);
 
        /*
         * Initialize the receive descriptors so that they
         * become a circular linked list, ie. let the last
         * descriptor point to the first again.
         */
-       if (sonic_debug > 2)
-               printk("sonic_init: initialize receive descriptors\n");
+       netif_dbg(lp, ifup, dev, "%s: initialize receive descriptors\n",
+                 __func__);
+
        for (i=0; i<SONIC_NUM_RDS; i++) {
                sonic_rda_put(dev, i, SONIC_RD_STATUS, 0);
                sonic_rda_put(dev, i, SONIC_RD_PKTLEN, 0);
@@ -664,8 +669,9 @@ static int sonic_init(struct net_device *dev)
        /*
         * initialize transmit descriptors
         */
-       if (sonic_debug > 2)
-               printk("sonic_init: initialize transmit descriptors\n");
+       netif_dbg(lp, ifup, dev, "%s: initialize transmit descriptors\n",
+                 __func__);
+
        for (i = 0; i < SONIC_NUM_TDS; i++) {
                sonic_tda_put(dev, i, SONIC_TD_STATUS, 0);
                sonic_tda_put(dev, i, SONIC_TD_CONFIG, 0);
@@ -712,10 +718,8 @@ static int sonic_init(struct net_device *dev)
                if (SONIC_READ(SONIC_ISR) & SONIC_INT_LCD)
                        break;
        }
-       if (sonic_debug > 2) {
-               printk("sonic_init: CMD=%x, ISR=%x\n, i=%d",
-                      SONIC_READ(SONIC_CMD), SONIC_READ(SONIC_ISR), i);
-       }
+       netif_dbg(lp, ifup, dev, "%s: CMD=%x, ISR=%x, i=%d\n", __func__,
+                 SONIC_READ(SONIC_CMD), SONIC_READ(SONIC_ISR), i);
 
        /*
         * enable receiver, disable loopback
@@ -731,9 +735,8 @@ static int sonic_init(struct net_device *dev)
        if ((cmd & SONIC_CR_RXEN) == 0 || (cmd & SONIC_CR_STP) == 0)
                printk(KERN_ERR "sonic_init: failed, status=%x\n", cmd);
 
-       if (sonic_debug > 2)
-               printk("sonic_init: new status=%x\n",
-                      SONIC_READ(SONIC_CMD));
+       netif_dbg(lp, ifup, dev, "%s: new status=%x\n", __func__,
+                 SONIC_READ(SONIC_CMD));
 
        return 0;
 }
index 421b1a2..2b27f70 100644 (file)
@@ -319,6 +319,7 @@ struct sonic_local {
        unsigned int eol_rx;
        unsigned int eol_tx;           /* last unacked transmit packet */
        unsigned int next_tx;          /* next free TD */
+       int msg_enable;
        struct device *device;         /* generic device */
        struct net_device_stats stats;
 };
@@ -336,6 +337,7 @@ static struct net_device_stats *sonic_get_stats(struct net_device *dev);
 static void sonic_multicast_list(struct net_device *dev);
 static int sonic_init(struct net_device *dev);
 static void sonic_tx_timeout(struct net_device *dev);
+static void sonic_msg_init(struct net_device *dev);
 
 /* Internal inlines for reading/writing DMA buffers.  Note that bus
    size and endianness matter here, whereas they don't for registers,
index 1817dee..e1b886e 100644 (file)
@@ -73,14 +73,6 @@ extern void xtboard_get_ether_addr(unsigned char *buf);
 #define SONIC_WRITE(reg,val) \
        *((volatile unsigned int *)dev->base_addr+reg) = val
 
-
-/* Use 0 for production, 1 for verification, and >2 for debug */
-#ifdef SONIC_DEBUG
-static unsigned int sonic_debug = SONIC_DEBUG;
-#else
-static unsigned int sonic_debug = 1;
-#endif
-
 /*
  * We cannot use station (ethernet) address prefixes to detect the
  * sonic controller since these are board manufacturer depended.
@@ -130,7 +122,6 @@ static const struct net_device_ops xtsonic_netdev_ops = {
 
 static int __init sonic_probe1(struct net_device *dev)
 {
-       static unsigned version_printed = 0;
        unsigned int silicon_revision;
        struct sonic_local *lp = netdev_priv(dev);
        unsigned int base_addr = dev->base_addr;
@@ -146,23 +137,17 @@ static int __init sonic_probe1(struct net_device *dev)
         * the expected location.
         */
        silicon_revision = SONIC_READ(SONIC_SR);
-       if (sonic_debug > 1)
-               printk("SONIC Silicon Revision = 0x%04x\n",silicon_revision);
-
        i = 0;
        while ((known_revisions[i] != 0xffff) &&
                        (known_revisions[i] != silicon_revision))
                i++;
 
        if (known_revisions[i] == 0xffff) {
-               printk("SONIC ethernet controller not found (0x%4x)\n",
-                               silicon_revision);
+               pr_info("SONIC ethernet controller not found (0x%4x)\n",
+                       silicon_revision);
                return -ENODEV;
        }
 
-       if (sonic_debug  &&  version_printed++ == 0)
-               printk(version);
-
        /*
         * Put the sonic into software reset, then retrieve ethernet address.
         * Note: we are assuming that the boot-loader has initialized the cam.
@@ -273,12 +258,15 @@ int xtsonic_probe(struct platform_device *pdev)
 
        if ((err = sonic_probe1(dev)))
                goto out;
+
+       pr_info("SONIC ethernet @%08lx, MAC %pM, IRQ %d\n",
+               dev->base_addr, dev->dev_addr, dev->irq);
+
+       sonic_msg_init(dev);
+
        if ((err = register_netdev(dev)))
                goto out1;
 
-       printk("%s: SONIC ethernet @%08lx, MAC %pM, IRQ %d\n", dev->name,
-              dev->base_addr, dev->dev_addr, dev->irq);
-
        return 0;
 
 out1:
@@ -290,8 +278,6 @@ out:
 }
 
 MODULE_DESCRIPTION("Xtensa XT2000 SONIC ethernet driver");
-module_param(sonic_debug, int, 0);
-MODULE_PARM_DESC(sonic_debug, "xtsonic debug level (1-4)");
 
 #include "sonic.c"
 
index ca4a81d..03ad4ee 100644 (file)
@@ -1784,7 +1784,7 @@ enum qed_iwarp_mpa_pkt_type {
 /* fpdu can be fragmented over maximum 3 bds: header, partial mpa, unaligned */
 #define QED_IWARP_MAX_BDS_PER_FPDU 3
 
-char *pkt_type_str[] = {
+static const char * const pkt_type_str[] = {
        "QED_IWARP_MPA_PKT_PACKED",
        "QED_IWARP_MPA_PKT_PARTIAL",
        "QED_IWARP_MPA_PKT_UNALIGNED"
index 96db328..6fd1333 100644 (file)
@@ -735,11 +735,6 @@ struct ring_info {
        u8              __pad[sizeof(void *) - sizeof(u32)];
 };
 
-enum features {
-       RTL_FEATURE_MSI         = (1 << 0),
-       RTL_FEATURE_GMII        = (1 << 1),
-};
-
 struct rtl8169_counters {
        __le64  tx_packets;
        __le64  rx_packets;
@@ -7847,7 +7842,7 @@ static int rtl8169_close(struct net_device *dev)
 
        cancel_work_sync(&tp->wk.work);
 
-       free_irq(pdev->irq, dev);
+       pci_free_irq(pdev, 0, dev);
 
        dma_free_coherent(&pdev->dev, R8169_RX_RING_BYTES, tp->RxDescArray,
                          tp->RxPhyAddr);
@@ -7866,7 +7861,7 @@ static void rtl8169_netpoll(struct net_device *dev)
 {
        struct rtl8169_private *tp = netdev_priv(dev);
 
-       rtl8169_interrupt(tp->pci_dev->irq, dev);
+       rtl8169_interrupt(pci_irq_vector(tp->pci_dev, 0), dev);
 }
 #endif
 
@@ -7903,9 +7898,8 @@ static int rtl_open(struct net_device *dev)
 
        rtl_request_firmware(tp);
 
-       retval = request_irq(pdev->irq, rtl8169_interrupt,
-                            (tp->features & RTL_FEATURE_MSI) ? 0 : IRQF_SHARED,
-                            dev->name, dev);
+       retval = pci_request_irq(pdev, 0, rtl8169_interrupt, NULL, dev,
+                                dev->name);
        if (retval < 0)
                goto err_release_fw_2;
 
@@ -8235,7 +8229,7 @@ static const struct rtl_cfg_info {
        unsigned int region;
        unsigned int align;
        u16 event_slow;
-       unsigned features;
+       unsigned int has_gmii:1;
        const struct rtl_coalesce_info *coalesce_info;
        u8 default_ver;
 } rtl_cfg_infos [] = {
@@ -8244,7 +8238,7 @@ static const struct rtl_cfg_info {
                .region         = 1,
                .align          = 0,
                .event_slow     = SYSErr | LinkChg | RxOverflow | RxFIFOOver,
-               .features       = RTL_FEATURE_GMII,
+               .has_gmii       = 1,
                .coalesce_info  = rtl_coalesce_info_8169,
                .default_ver    = RTL_GIGA_MAC_VER_01,
        },
@@ -8253,7 +8247,7 @@ static const struct rtl_cfg_info {
                .region         = 2,
                .align          = 8,
                .event_slow     = SYSErr | LinkChg | RxOverflow,
-               .features       = RTL_FEATURE_GMII | RTL_FEATURE_MSI,
+               .has_gmii       = 1,
                .coalesce_info  = rtl_coalesce_info_8168_8136,
                .default_ver    = RTL_GIGA_MAC_VER_11,
        },
@@ -8263,32 +8257,26 @@ static const struct rtl_cfg_info {
                .align          = 8,
                .event_slow     = SYSErr | LinkChg | RxOverflow | RxFIFOOver |
                                  PCSTimeout,
-               .features       = RTL_FEATURE_MSI,
                .coalesce_info  = rtl_coalesce_info_8168_8136,
                .default_ver    = RTL_GIGA_MAC_VER_13,
        }
 };
 
-/* Cfg9346_Unlock assumed. */
-static unsigned rtl_try_msi(struct rtl8169_private *tp,
-                           const struct rtl_cfg_info *cfg)
+static int rtl_alloc_irq(struct rtl8169_private *tp)
 {
        void __iomem *ioaddr = tp->mmio_addr;
-       unsigned msi = 0;
-       u8 cfg2;
+       unsigned int flags;
 
-       cfg2 = RTL_R8(Config2) & ~MSIEnable;
-       if (cfg->features & RTL_FEATURE_MSI) {
-               if (pci_enable_msi(tp->pci_dev)) {
-                       netif_info(tp, hw, tp->dev, "no MSI. Back to INTx.\n");
-               } else {
-                       cfg2 |= MSIEnable;
-                       msi = RTL_FEATURE_MSI;
-               }
+       if (tp->mac_version <= RTL_GIGA_MAC_VER_06) {
+               RTL_W8(Cfg9346, Cfg9346_Unlock);
+               RTL_W8(Config2, RTL_R8(Config2) & ~MSIEnable);
+               RTL_W8(Cfg9346, Cfg9346_Lock);
+               flags = PCI_IRQ_LEGACY;
+       } else {
+               flags = PCI_IRQ_ALL_TYPES;
        }
-       if (tp->mac_version <= RTL_GIGA_MAC_VER_06)
-               RTL_W8(Config2, cfg2);
-       return msi;
+
+       return pci_alloc_irq_vectors(tp->pci_dev, 1, 1, flags);
 }
 
 DECLARE_RTL_COND(rtl_link_list_ready_cond)
@@ -8402,7 +8390,7 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
        mii->mdio_write = rtl_mdio_write;
        mii->phy_id_mask = 0x1f;
        mii->reg_num_mask = 0x1f;
-       mii->supports_gmii = !!(cfg->features & RTL_FEATURE_GMII);
+       mii->supports_gmii = cfg->has_gmii;
 
        /* disable ASPM completely as that cause random device stop working
         * problems as well as full system hangs for some PCIe devices users */
@@ -8497,9 +8485,11 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
        chipset = tp->mac_version;
        tp->txd_version = rtl_chip_infos[chipset].txd_version;
 
-       RTL_W8(Cfg9346, Cfg9346_Unlock);
-       tp->features |= rtl_try_msi(tp, cfg);
-       RTL_W8(Cfg9346, Cfg9346_Lock);
+       rc = rtl_alloc_irq(tp);
+       if (rc < 0) {
+               netif_err(tp, probe, dev, "Can't allocate interrupt\n");
+               return rc;
+       }
 
        /* override BIOS settings, use userspace tools to enable WOL */
        __rtl8169_set_wol(tp, 0);
@@ -8618,7 +8608,8 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 
        netif_info(tp, probe, dev, "%s at 0x%p, %pM, XID %08x IRQ %d\n",
                   rtl_chip_infos[chipset].name, ioaddr, dev->dev_addr,
-                  (u32)(RTL_R32(TxConfig) & 0x9cf0f8ff), pdev->irq);
+                  (u32)(RTL_R32(TxConfig) & 0x9cf0f8ff),
+                  pci_irq_vector(pdev, 0));
        if (rtl_chip_infos[chipset].jumbo_max != JUMBO_1K) {
                netif_info(tp, probe, dev, "jumbo features [frames: %d bytes, "
                           "tx checksumming: %s]\n",
index d7d5a6d..d3e1bc0 100644 (file)
@@ -123,8 +123,8 @@ static const u16 sh_eth_offset_gigabit[SH_ETH_MAX_REGISTER_OFFSET] = {
        [TSU_FWSL0]     = 0x0030,
        [TSU_FWSL1]     = 0x0034,
        [TSU_FWSLC]     = 0x0038,
-       [TSU_QTAG0]     = 0x0040,
-       [TSU_QTAG1]     = 0x0044,
+       [TSU_QTAGM0]    = 0x0040,
+       [TSU_QTAGM1]    = 0x0044,
        [TSU_FWSR]      = 0x0050,
        [TSU_FWINMK]    = 0x0054,
        [TSU_ADQT0]     = 0x0048,
@@ -752,6 +752,7 @@ static struct sh_eth_cpu_data sh7757_data = {
        .rpadir         = 1,
        .rpadir_value   = 2 << 16,
        .rtrate         = 1,
+       .dual_port      = 1,
 };
 
 #define SH_GIGA_ETH_BASE       0xfee00000UL
@@ -830,6 +831,7 @@ static struct sh_eth_cpu_data sh7757_data_giga = {
        .no_trimd       = 1,
        .no_ade         = 1,
        .tsu            = 1,
+       .dual_port      = 1,
 };
 
 /* SH7734 */
@@ -900,6 +902,7 @@ static struct sh_eth_cpu_data sh7763_data = {
        .tsu            = 1,
        .irq_flags      = IRQF_SHARED,
        .magic          = 1,
+       .dual_port      = 1,
 };
 
 static struct sh_eth_cpu_data sh7619_data = {
@@ -932,6 +935,7 @@ static struct sh_eth_cpu_data sh771x_data = {
                          EESIPR_RRFIP | EESIPR_RTLFIP | EESIPR_RTSFIP |
                          EESIPR_PREIP | EESIPR_CERFIP,
        .tsu            = 1,
+       .dual_port      = 1,
 };
 
 static void sh_eth_set_default_cpu_data(struct sh_eth_cpu_data *cd)
@@ -2097,8 +2101,6 @@ static size_t __sh_eth_get_regs(struct net_device *ndev, u32 *buf)
                add_tsu_reg(TSU_FWSL0);
                add_tsu_reg(TSU_FWSL1);
                add_tsu_reg(TSU_FWSLC);
-               add_tsu_reg(TSU_QTAG0);
-               add_tsu_reg(TSU_QTAG1);
                add_tsu_reg(TSU_QTAGM0);
                add_tsu_reg(TSU_QTAGM1);
                add_tsu_reg(TSU_FWSR);
@@ -2917,7 +2919,7 @@ static int sh_eth_vlan_rx_kill_vid(struct net_device *ndev,
 /* SuperH's TSU register init function */
 static void sh_eth_tsu_init(struct sh_eth_private *mdp)
 {
-       if (sh_eth_is_rz_fast_ether(mdp)) {
+       if (!mdp->cd->dual_port) {
                sh_eth_tsu_write(mdp, 0, TSU_TEN); /* Disable all CAM entry */
                sh_eth_tsu_write(mdp, TSU_FWSLC_POSTENU | TSU_FWSLC_POSTENL,
                                 TSU_FWSLC);    /* Enable POST registers */
@@ -2934,13 +2936,8 @@ static void sh_eth_tsu_init(struct sh_eth_private *mdp)
        sh_eth_tsu_write(mdp, 0, TSU_FWSL0);
        sh_eth_tsu_write(mdp, 0, TSU_FWSL1);
        sh_eth_tsu_write(mdp, TSU_FWSLC_POSTENU | TSU_FWSLC_POSTENL, TSU_FWSLC);
-       if (sh_eth_is_gether(mdp)) {
-               sh_eth_tsu_write(mdp, 0, TSU_QTAG0);    /* Disable QTAG(0->1) */
-               sh_eth_tsu_write(mdp, 0, TSU_QTAG1);    /* Disable QTAG(1->0) */
-       } else {
-               sh_eth_tsu_write(mdp, 0, TSU_QTAGM0);   /* Disable QTAG(0->1) */
-               sh_eth_tsu_write(mdp, 0, TSU_QTAGM1);   /* Disable QTAG(1->0) */
-       }
+       sh_eth_tsu_write(mdp, 0, TSU_QTAGM0);   /* Disable QTAG(0->1) */
+       sh_eth_tsu_write(mdp, 0, TSU_QTAGM1);   /* Disable QTAG(1->0) */
        sh_eth_tsu_write(mdp, 0, TSU_FWSR);     /* all interrupt status clear */
        sh_eth_tsu_write(mdp, 0, TSU_FWINMK);   /* Disable all interrupt */
        sh_eth_tsu_write(mdp, 0, TSU_TEN);      /* Disable all CAM entry */
index a6753cc..5bbaf9e 100644 (file)
@@ -118,8 +118,8 @@ enum {
        TSU_FWSL0,
        TSU_FWSL1,
        TSU_FWSLC,
-       TSU_QTAG0,
-       TSU_QTAG1,
+       TSU_QTAG0,                      /* Same as TSU_QTAGM0 */
+       TSU_QTAG1,                      /* Same as TSU_QTAGM1 */
        TSU_QTAGM0,
        TSU_QTAGM1,
        TSU_FWSR,
@@ -509,6 +509,7 @@ struct sh_eth_cpu_data {
        unsigned rmiimode:1;    /* EtherC has RMIIMODE register */
        unsigned rtrate:1;      /* EtherC has RTRATE register */
        unsigned magic:1;       /* EtherC has ECMR.MPDE and ECSR.MPD */
+       unsigned dual_port:1;   /* Dual EtherC/E-DMAC */
 };
 
 struct sh_eth_private {
index 30a1136..4824fcf 100644 (file)
@@ -81,7 +81,6 @@ enum ef4_loopback_mode {
                            (1 << LOOPBACK_XAUI) |              \
                            (1 << LOOPBACK_GMII) |              \
                            (1 << LOOPBACK_SGMII) |             \
-                           (1 << LOOPBACK_SGMII) |             \
                            (1 << LOOPBACK_XGBR) |              \
                            (1 << LOOPBACK_XFI) |               \
                            (1 << LOOPBACK_XAUI_FAR) |          \
index c728ffa..2a6521d 100644 (file)
@@ -389,6 +389,8 @@ static void dwmac4_rd_prepare_tso_tx_desc(struct dma_desc *p, int is_fs,
 
 static void dwmac4_release_tx_desc(struct dma_desc *p, int mode)
 {
+       p->des0 = 0;
+       p->des1 = 0;
        p->des2 = 0;
        p->des3 = 0;
 }
index c8d86d7..a9856a8 100644 (file)
@@ -1844,6 +1844,11 @@ static void stmmac_tx_clean(struct stmmac_priv *priv, u32 queue)
                if (unlikely(status & tx_dma_own))
                        break;
 
+               /* Make sure descriptor fields are read after reading
+                * the own bit.
+                */
+               dma_rmb();
+
                /* Just consider the last segment and ...*/
                if (likely(!(status & tx_not_ls))) {
                        /* ... verify the status error condition */
@@ -2983,14 +2988,21 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
                        tcp_hdrlen(skb) / 4, (skb->len - proto_hdr_len));
 
        /* If context desc is used to change MSS */
-       if (mss_desc)
+       if (mss_desc) {
+               /* Make sure that first descriptor has been completely
+                * written, including its own bit. This is because MSS is
+                * actually before first descriptor, so we need to make
+                * sure that MSS's own bit is the last thing written.
+                */
+               dma_wmb();
                priv->hw->desc->set_tx_owner(mss_desc);
+       }
 
        /* The own bit must be the latest setting done when prepare the
         * descriptor and then barrier is needed to make sure that
         * all is coherent before granting the DMA engine.
         */
-       dma_wmb();
+       wmb();
 
        if (netif_msg_pktdata(priv)) {
                pr_info("%s: curr=%d dirty=%d f=%d, e=%d, f_p=%p, nfrags %d\n",
@@ -3214,7 +3226,7 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
                 * descriptor and then barrier is needed to make sure that
                 * all is coherent before granting the DMA engine.
                 */
-               dma_wmb();
+               wmb();
        }
 
        netdev_tx_sent_queue(netdev_get_tx_queue(dev, queue), skb->len);
index b919e89..516dd59 100644 (file)
@@ -1694,6 +1694,7 @@ static struct pernet_operations geneve_net_ops = {
        .exit_batch = geneve_exit_batch_net,
        .id   = &geneve_net_id,
        .size = sizeof(struct geneve_net),
+       .async = true,
 };
 
 static int __init geneve_init_module(void)
index f38e32a..127edd2 100644 (file)
@@ -1325,6 +1325,7 @@ static struct pernet_operations gtp_net_ops = {
        .exit   = gtp_net_exit,
        .id     = &gtp_net_id,
        .size   = sizeof(struct gtp_net),
+       .async  = true,
 };
 
 static int __init gtp_init(void)
index 303ba41..8782f56 100644 (file)
@@ -104,3 +104,14 @@ config IEEE802154_CA8210_DEBUGFS
          exposes a debugfs node for each CA8210 instance which allows
          direct use of the Cascoda API, exposing the 802.15.4 MAC
          management entities.
+
+config IEEE802154_MCR20A
+       tristate "MCR20A transceiver driver"
+       depends on IEEE802154_DRIVERS && MAC802154
+       depends on SPI
+       ---help---
+         Say Y here to enable the MCR20A SPI 802.15.4 wireless
+         controller.
+
+         This driver can also be built as a module. To do so, say M here.
+         the module will be called 'mcr20a'.
index bea1de5..104744d 100644 (file)
@@ -6,3 +6,4 @@ obj-$(CONFIG_IEEE802154_CC2520) += cc2520.o
 obj-$(CONFIG_IEEE802154_ATUSB) += atusb.o
 obj-$(CONFIG_IEEE802154_ADF7242) += adf7242.o
 obj-$(CONFIG_IEEE802154_CA8210) += ca8210.o
+obj-$(CONFIG_IEEE802154_MCR20A) += mcr20a.o
diff --git a/drivers/net/ieee802154/mcr20a.c b/drivers/net/ieee802154/mcr20a.c
new file mode 100644 (file)
index 0000000..d9eb22a
--- /dev/null
@@ -0,0 +1,1413 @@
+/*
+ * Driver for NXP MCR20A 802.15.4 Wireless-PAN Networking controller
+ *
+ * Copyright (C) 2018 Xue Liu <liuxuenetmail@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/gpio.h>
+#include <linux/spi/spi.h>
+#include <linux/workqueue.h>
+#include <linux/interrupt.h>
+#include <linux/skbuff.h>
+#include <linux/of_gpio.h>
+#include <linux/regmap.h>
+#include <linux/ieee802154.h>
+#include <linux/debugfs.h>
+
+#include <net/mac802154.h>
+#include <net/cfg802154.h>
+
+#include <linux/device.h>
+
+#include "mcr20a.h"
+
+#define        SPI_COMMAND_BUFFER              3
+
+#define REGISTER_READ                  BIT(7)
+#define REGISTER_WRITE                 (0 << 7)
+#define REGISTER_ACCESS                        (0 << 6)
+#define PACKET_BUFF_BURST_ACCESS       BIT(6)
+#define PACKET_BUFF_BYTE_ACCESS                BIT(5)
+
+#define MCR20A_WRITE_REG(x)            (x)
+#define MCR20A_READ_REG(x)             (REGISTER_READ | (x))
+#define MCR20A_BURST_READ_PACKET_BUF   (0xC0)
+#define MCR20A_BURST_WRITE_PACKET_BUF  (0x40)
+
+#define MCR20A_CMD_REG         0x80
+#define MCR20A_CMD_REG_MASK    0x3f
+#define MCR20A_CMD_WRITE       0x40
+#define MCR20A_CMD_FB          0x20
+
+/* Number of Interrupt Request Status Register */
+#define MCR20A_IRQSTS_NUM 2 /* only IRQ_STS1 and IRQ_STS2 */
+
+/* MCR20A CCA Type */
+enum {
+       MCR20A_CCA_ED,    // energy detect - CCA bit not active,
+                         // not to be used for T and CCCA sequences
+       MCR20A_CCA_MODE1, // energy detect - CCA bit ACTIVE
+       MCR20A_CCA_MODE2, // 802.15.4 compliant signal detect - CCA bit ACTIVE
+       MCR20A_CCA_MODE3
+};
+
+enum {
+       MCR20A_XCVSEQ_IDLE      = 0x00,
+       MCR20A_XCVSEQ_RX        = 0x01,
+       MCR20A_XCVSEQ_TX        = 0x02,
+       MCR20A_XCVSEQ_CCA       = 0x03,
+       MCR20A_XCVSEQ_TR        = 0x04,
+       MCR20A_XCVSEQ_CCCA      = 0x05,
+};
+
+/* IEEE-802.15.4 defined constants (2.4 GHz logical channels) */
+#define        MCR20A_MIN_CHANNEL      (11)
+#define        MCR20A_MAX_CHANNEL      (26)
+#define        MCR20A_CHANNEL_SPACING  (5)
+
+/* MCR20A CCA Threshold constans */
+#define MCR20A_MIN_CCA_THRESHOLD (0x6EU)
+#define MCR20A_MAX_CCA_THRESHOLD (0x00U)
+
+/* version 0C */
+#define MCR20A_OVERWRITE_VERSION (0x0C)
+
+/* MCR20A PLL configurations */
+static const u8  PLL_INT[16] = {
+       /* 2405 */ 0x0B,        /* 2410 */ 0x0B,        /* 2415 */ 0x0B,
+       /* 2420 */ 0x0B,        /* 2425 */ 0x0B,        /* 2430 */ 0x0B,
+       /* 2435 */ 0x0C,        /* 2440 */ 0x0C,        /* 2445 */ 0x0C,
+       /* 2450 */ 0x0C,        /* 2455 */ 0x0C,        /* 2460 */ 0x0C,
+       /* 2465 */ 0x0D,        /* 2470 */ 0x0D,        /* 2475 */ 0x0D,
+       /* 2480 */ 0x0D
+};
+
+static const u8 PLL_FRAC[16] = {
+       /* 2405 */ 0x28,        /* 2410 */ 0x50,        /* 2415 */ 0x78,
+       /* 2420 */ 0xA0,        /* 2425 */ 0xC8,        /* 2430 */ 0xF0,
+       /* 2435 */ 0x18,        /* 2440 */ 0x40,        /* 2445 */ 0x68,
+       /* 2450 */ 0x90,        /* 2455 */ 0xB8,        /* 2460 */ 0xE0,
+       /* 2465 */ 0x08,        /* 2470 */ 0x30,        /* 2475 */ 0x58,
+       /* 2480 */ 0x80
+};
+
+static const struct reg_sequence mar20a_iar_overwrites[] = {
+       { IAR_MISC_PAD_CTRL,    0x02 },
+       { IAR_VCO_CTRL1,        0xB3 },
+       { IAR_VCO_CTRL2,        0x07 },
+       { IAR_PA_TUNING,        0x71 },
+       { IAR_CHF_IBUF,         0x2F },
+       { IAR_CHF_QBUF,         0x2F },
+       { IAR_CHF_IRIN,         0x24 },
+       { IAR_CHF_QRIN,         0x24 },
+       { IAR_CHF_IL,           0x24 },
+       { IAR_CHF_QL,           0x24 },
+       { IAR_CHF_CC1,          0x32 },
+       { IAR_CHF_CCL,          0x1D },
+       { IAR_CHF_CC2,          0x2D },
+       { IAR_CHF_IROUT,        0x24 },
+       { IAR_CHF_QROUT,        0x24 },
+       { IAR_PA_CAL,           0x28 },
+       { IAR_AGC_THR1,         0x55 },
+       { IAR_AGC_THR2,         0x2D },
+       { IAR_ATT_RSSI1,        0x5F },
+       { IAR_ATT_RSSI2,        0x8F },
+       { IAR_RSSI_OFFSET,      0x61 },
+       { IAR_CHF_PMA_GAIN,     0x03 },
+       { IAR_CCA1_THRESH,      0x50 },
+       { IAR_CORR_NVAL,        0x13 },
+       { IAR_ACKDELAY,         0x3D },
+};
+
+#define MCR20A_VALID_CHANNELS (0x07FFF800)
+
+struct mcr20a_platform_data {
+       int rst_gpio;
+};
+
+#define MCR20A_MAX_BUF         (127)
+
+#define printdev(X) (&X->spi->dev)
+
+/* regmap information for Direct Access Register (DAR) access */
+#define MCR20A_DAR_WRITE       0x01
+#define MCR20A_DAR_READ                0x00
+#define MCR20A_DAR_NUMREGS     0x3F
+
+/* regmap information for Indirect Access Register (IAR) access */
+#define MCR20A_IAR_ACCESS      0x80
+#define MCR20A_IAR_NUMREGS     0xBEFF
+
+/* Read/Write SPI Commands for DAR and IAR registers. */
+#define MCR20A_READSHORT(reg)  ((reg) << 1)
+#define MCR20A_WRITESHORT(reg) ((reg) << 1 | 1)
+#define MCR20A_READLONG(reg)   (1 << 15 | (reg) << 5)
+#define MCR20A_WRITELONG(reg)  (1 << 15 | (reg) << 5 | 1 << 4)
+
+/* Type definitions for link configuration of instantiable layers  */
+#define MCR20A_PHY_INDIRECT_QUEUE_SIZE (12)
+
+static bool
+mcr20a_dar_writeable(struct device *dev, unsigned int reg)
+{
+       switch (reg) {
+       case DAR_IRQ_STS1:
+       case DAR_IRQ_STS2:
+       case DAR_IRQ_STS3:
+       case DAR_PHY_CTRL1:
+       case DAR_PHY_CTRL2:
+       case DAR_PHY_CTRL3:
+       case DAR_PHY_CTRL4:
+       case DAR_SRC_CTRL:
+       case DAR_SRC_ADDRS_SUM_LSB:
+       case DAR_SRC_ADDRS_SUM_MSB:
+       case DAR_T3CMP_LSB:
+       case DAR_T3CMP_MSB:
+       case DAR_T3CMP_USB:
+       case DAR_T2PRIMECMP_LSB:
+       case DAR_T2PRIMECMP_MSB:
+       case DAR_T1CMP_LSB:
+       case DAR_T1CMP_MSB:
+       case DAR_T1CMP_USB:
+       case DAR_T2CMP_LSB:
+       case DAR_T2CMP_MSB:
+       case DAR_T2CMP_USB:
+       case DAR_T4CMP_LSB:
+       case DAR_T4CMP_MSB:
+       case DAR_T4CMP_USB:
+       case DAR_PLL_INT0:
+       case DAR_PLL_FRAC0_LSB:
+       case DAR_PLL_FRAC0_MSB:
+       case DAR_PA_PWR:
+       /* no DAR_ACM */
+       case DAR_OVERWRITE_VER:
+       case DAR_CLK_OUT_CTRL:
+       case DAR_PWR_MODES:
+               return true;
+       default:
+               return false;
+       }
+}
+
+static bool
+mcr20a_dar_readable(struct device *dev, unsigned int reg)
+{
+       bool rc;
+
+       /* all writeable are also readable */
+       rc = mcr20a_dar_writeable(dev, reg);
+       if (rc)
+               return rc;
+
+       /* readonly regs */
+       switch (reg) {
+       case DAR_RX_FRM_LEN:
+       case DAR_CCA1_ED_FNL:
+       case DAR_EVENT_TMR_LSB:
+       case DAR_EVENT_TMR_MSB:
+       case DAR_EVENT_TMR_USB:
+       case DAR_TIMESTAMP_LSB:
+       case DAR_TIMESTAMP_MSB:
+       case DAR_TIMESTAMP_USB:
+       case DAR_SEQ_STATE:
+       case DAR_LQI_VALUE:
+       case DAR_RSSI_CCA_CONT:
+               return true;
+       default:
+               return false;
+       }
+}
+
+static bool
+mcr20a_dar_volatile(struct device *dev, unsigned int reg)
+{
+       /* can be changed during runtime */
+       switch (reg) {
+       case DAR_IRQ_STS1:
+       case DAR_IRQ_STS2:
+       case DAR_IRQ_STS3:
+       /* use them in spi_async and regmap so it's volatile */
+               return true;
+       default:
+               return false;
+       }
+}
+
+static bool
+mcr20a_dar_precious(struct device *dev, unsigned int reg)
+{
+       /* don't clear irq line on read */
+       switch (reg) {
+       case DAR_IRQ_STS1:
+       case DAR_IRQ_STS2:
+       case DAR_IRQ_STS3:
+               return true;
+       default:
+               return false;
+       }
+}
+
+static const struct regmap_config mcr20a_dar_regmap = {
+       .name                   = "mcr20a_dar",
+       .reg_bits               = 8,
+       .val_bits               = 8,
+       .write_flag_mask        = REGISTER_ACCESS | REGISTER_WRITE,
+       .read_flag_mask         = REGISTER_ACCESS | REGISTER_READ,
+       .cache_type             = REGCACHE_RBTREE,
+       .writeable_reg          = mcr20a_dar_writeable,
+       .readable_reg           = mcr20a_dar_readable,
+       .volatile_reg           = mcr20a_dar_volatile,
+       .precious_reg           = mcr20a_dar_precious,
+       .fast_io                = true,
+       .can_multi_write        = true,
+};
+
+static bool
+mcr20a_iar_writeable(struct device *dev, unsigned int reg)
+{
+       switch (reg) {
+       case IAR_XTAL_TRIM:
+       case IAR_PMC_LP_TRIM:
+       case IAR_MACPANID0_LSB:
+       case IAR_MACPANID0_MSB:
+       case IAR_MACSHORTADDRS0_LSB:
+       case IAR_MACSHORTADDRS0_MSB:
+       case IAR_MACLONGADDRS0_0:
+       case IAR_MACLONGADDRS0_8:
+       case IAR_MACLONGADDRS0_16:
+       case IAR_MACLONGADDRS0_24:
+       case IAR_MACLONGADDRS0_32:
+       case IAR_MACLONGADDRS0_40:
+       case IAR_MACLONGADDRS0_48:
+       case IAR_MACLONGADDRS0_56:
+       case IAR_RX_FRAME_FILTER:
+       case IAR_PLL_INT1:
+       case IAR_PLL_FRAC1_LSB:
+       case IAR_PLL_FRAC1_MSB:
+       case IAR_MACPANID1_LSB:
+       case IAR_MACPANID1_MSB:
+       case IAR_MACSHORTADDRS1_LSB:
+       case IAR_MACSHORTADDRS1_MSB:
+       case IAR_MACLONGADDRS1_0:
+       case IAR_MACLONGADDRS1_8:
+       case IAR_MACLONGADDRS1_16:
+       case IAR_MACLONGADDRS1_24:
+       case IAR_MACLONGADDRS1_32:
+       case IAR_MACLONGADDRS1_40:
+       case IAR_MACLONGADDRS1_48:
+       case IAR_MACLONGADDRS1_56:
+       case IAR_DUAL_PAN_CTRL:
+       case IAR_DUAL_PAN_DWELL:
+       case IAR_CCA1_THRESH:
+       case IAR_CCA1_ED_OFFSET_COMP:
+       case IAR_LQI_OFFSET_COMP:
+       case IAR_CCA_CTRL:
+       case IAR_CCA2_CORR_PEAKS:
+       case IAR_CCA2_CORR_THRESH:
+       case IAR_TMR_PRESCALE:
+       case IAR_ANT_PAD_CTRL:
+       case IAR_MISC_PAD_CTRL:
+       case IAR_BSM_CTRL:
+       case IAR_RNG:
+       case IAR_RX_WTR_MARK:
+       case IAR_SOFT_RESET:
+       case IAR_TXDELAY:
+       case IAR_ACKDELAY:
+       case IAR_CORR_NVAL:
+       case IAR_ANT_AGC_CTRL:
+       case IAR_AGC_THR1:
+       case IAR_AGC_THR2:
+       case IAR_PA_CAL:
+       case IAR_ATT_RSSI1:
+       case IAR_ATT_RSSI2:
+       case IAR_RSSI_OFFSET:
+       case IAR_XTAL_CTRL:
+       case IAR_CHF_PMA_GAIN:
+       case IAR_CHF_IBUF:
+       case IAR_CHF_QBUF:
+       case IAR_CHF_IRIN:
+       case IAR_CHF_QRIN:
+       case IAR_CHF_IL:
+       case IAR_CHF_QL:
+       case IAR_CHF_CC1:
+       case IAR_CHF_CCL:
+       case IAR_CHF_CC2:
+       case IAR_CHF_IROUT:
+       case IAR_CHF_QROUT:
+       case IAR_PA_TUNING:
+       case IAR_VCO_CTRL1:
+       case IAR_VCO_CTRL2:
+               return true;
+       default:
+               return false;
+       }
+}
+
+static bool
+mcr20a_iar_readable(struct device *dev, unsigned int reg)
+{
+       bool rc;
+
+       /* all writeable are also readable */
+       rc = mcr20a_iar_writeable(dev, reg);
+       if (rc)
+               return rc;
+
+       /* readonly regs */
+       switch (reg) {
+       case IAR_PART_ID:
+       case IAR_DUAL_PAN_STS:
+       case IAR_RX_BYTE_COUNT:
+       case IAR_FILTERFAIL_CODE1:
+       case IAR_FILTERFAIL_CODE2:
+       case IAR_RSSI:
+               return true;
+       default:
+               return false;
+       }
+}
+
+static bool
+mcr20a_iar_volatile(struct device *dev, unsigned int reg)
+{
+/* can be changed during runtime */
+       switch (reg) {
+       case IAR_DUAL_PAN_STS:
+       case IAR_RX_BYTE_COUNT:
+       case IAR_FILTERFAIL_CODE1:
+       case IAR_FILTERFAIL_CODE2:
+       case IAR_RSSI:
+               return true;
+       default:
+               return false;
+       }
+}
+
+static const struct regmap_config mcr20a_iar_regmap = {
+       .name                   = "mcr20a_iar",
+       .reg_bits               = 16,
+       .val_bits               = 8,
+       .write_flag_mask        = REGISTER_ACCESS | REGISTER_WRITE | IAR_INDEX,
+       .read_flag_mask         = REGISTER_ACCESS | REGISTER_READ  | IAR_INDEX,
+       .cache_type             = REGCACHE_RBTREE,
+       .writeable_reg          = mcr20a_iar_writeable,
+       .readable_reg           = mcr20a_iar_readable,
+       .volatile_reg           = mcr20a_iar_volatile,
+       .fast_io                = true,
+};
+
+struct mcr20a_local {
+       struct spi_device *spi;
+
+       struct ieee802154_hw *hw;
+       struct mcr20a_platform_data *pdata;
+       struct regmap *regmap_dar;
+       struct regmap *regmap_iar;
+
+       u8 *buf;
+
+       bool is_tx;
+
+       /* for writing tx buffer */
+       struct spi_message tx_buf_msg;
+       u8 tx_header[1];
+       /* burst buffer write command */
+       struct spi_transfer tx_xfer_header;
+       u8 tx_len[1];
+       /* len of tx packet */
+       struct spi_transfer tx_xfer_len;
+       /* data of tx packet */
+       struct spi_transfer tx_xfer_buf;
+       struct sk_buff *tx_skb;
+
+       /* for read length rxfifo */
+       struct spi_message reg_msg;
+       u8 reg_cmd[1];
+       u8 reg_data[MCR20A_IRQSTS_NUM];
+       struct spi_transfer reg_xfer_cmd;
+       struct spi_transfer reg_xfer_data;
+
+       /* receive handling */
+       struct spi_message rx_buf_msg;
+       u8 rx_header[1];
+       struct spi_transfer rx_xfer_header;
+       u8 rx_lqi[1];
+       struct spi_transfer rx_xfer_lqi;
+       u8 rx_buf[MCR20A_MAX_BUF];
+       struct spi_transfer rx_xfer_buf;
+
+       /* isr handling for reading intstat */
+       struct spi_message irq_msg;
+       u8 irq_header[1];
+       u8 irq_data[MCR20A_IRQSTS_NUM];
+       struct spi_transfer irq_xfer_data;
+       struct spi_transfer irq_xfer_header;
+};
+
+static void
+mcr20a_write_tx_buf_complete(void *context)
+{
+       struct mcr20a_local *lp = context;
+       int ret;
+
+       dev_dbg(printdev(lp), "%s\n", __func__);
+
+       lp->reg_msg.complete = NULL;
+       lp->reg_cmd[0]  = MCR20A_WRITE_REG(DAR_PHY_CTRL1);
+       lp->reg_data[0] = MCR20A_XCVSEQ_TX;
+       lp->reg_xfer_data.len = 1;
+
+       ret = spi_async(lp->spi, &lp->reg_msg);
+       if (ret)
+               dev_err(printdev(lp), "failed to set SEQ TX\n");
+}
+
+static int
+mcr20a_xmit(struct ieee802154_hw *hw, struct sk_buff *skb)
+{
+       struct mcr20a_local *lp = hw->priv;
+
+       dev_dbg(printdev(lp), "%s\n", __func__);
+
+       lp->tx_skb = skb;
+
+       print_hex_dump_debug("mcr20a tx: ", DUMP_PREFIX_OFFSET, 16, 1,
+                            skb->data, skb->len, 0);
+
+       lp->is_tx = 1;
+
+       lp->reg_msg.complete    = NULL;
+       lp->reg_cmd[0]          = MCR20A_WRITE_REG(DAR_PHY_CTRL1);
+       lp->reg_data[0]         = MCR20A_XCVSEQ_IDLE;
+       lp->reg_xfer_data.len   = 1;
+
+       return spi_async(lp->spi, &lp->reg_msg);
+}
+
+static int
+mcr20a_ed(struct ieee802154_hw *hw, u8 *level)
+{
+       WARN_ON(!level);
+       *level = 0xbe;
+       return 0;
+}
+
+static int
+mcr20a_set_channel(struct ieee802154_hw *hw, u8 page, u8 channel)
+{
+       struct mcr20a_local *lp = hw->priv;
+       int ret;
+
+       dev_dbg(printdev(lp), "%s\n", __func__);
+
+       /* freqency = ((PLL_INT+64) + (PLL_FRAC/65536)) * 32 MHz */
+       ret = regmap_write(lp->regmap_dar, DAR_PLL_INT0, PLL_INT[channel - 11]);
+       if (ret)
+               return ret;
+       ret = regmap_write(lp->regmap_dar, DAR_PLL_FRAC0_LSB, 0x00);
+       if (ret)
+               return ret;
+       ret = regmap_write(lp->regmap_dar, DAR_PLL_FRAC0_MSB,
+                          PLL_FRAC[channel - 11]);
+       if (ret)
+               return ret;
+
+       return 0;
+}
+
+static int
+mcr20a_start(struct ieee802154_hw *hw)
+{
+       struct mcr20a_local *lp = hw->priv;
+       int ret;
+
+       dev_dbg(printdev(lp), "%s\n", __func__);
+
+       /* No slotted operation */
+       dev_dbg(printdev(lp), "no slotted operation\n");
+       ret = regmap_update_bits(lp->regmap_dar, DAR_PHY_CTRL1,
+                                DAR_PHY_CTRL1_SLOTTED, 0x0);
+
+       /* enable irq */
+       enable_irq(lp->spi->irq);
+
+       /* Unmask SEQ interrupt */
+       ret = regmap_update_bits(lp->regmap_dar, DAR_PHY_CTRL2,
+                                DAR_PHY_CTRL2_SEQMSK, 0x0);
+
+       /* Start the RX sequence */
+       dev_dbg(printdev(lp), "start the RX sequence\n");
+       ret = regmap_update_bits(lp->regmap_dar, DAR_PHY_CTRL1,
+                                DAR_PHY_CTRL1_XCVSEQ_MASK, MCR20A_XCVSEQ_RX);
+
+       return 0;
+}
+
+static void
+mcr20a_stop(struct ieee802154_hw *hw)
+{
+       struct mcr20a_local *lp = hw->priv;
+
+       dev_dbg(printdev(lp), "%s\n", __func__);
+
+       /* stop all running sequence */
+       regmap_update_bits(lp->regmap_dar, DAR_PHY_CTRL1,
+                          DAR_PHY_CTRL1_XCVSEQ_MASK, MCR20A_XCVSEQ_IDLE);
+
+       /* disable irq */
+       disable_irq(lp->spi->irq);
+}
+
+static int
+mcr20a_set_hw_addr_filt(struct ieee802154_hw *hw,
+                       struct ieee802154_hw_addr_filt *filt,
+                       unsigned long changed)
+{
+       struct mcr20a_local *lp = hw->priv;
+
+       dev_dbg(printdev(lp), "%s\n", __func__);
+
+       if (changed & IEEE802154_AFILT_SADDR_CHANGED) {
+               u16 addr = le16_to_cpu(filt->short_addr);
+
+               regmap_write(lp->regmap_iar, IAR_MACSHORTADDRS0_LSB, addr);
+               regmap_write(lp->regmap_iar, IAR_MACSHORTADDRS0_MSB, addr >> 8);
+       }
+
+       if (changed & IEEE802154_AFILT_PANID_CHANGED) {
+               u16 pan = le16_to_cpu(filt->pan_id);
+
+               regmap_write(lp->regmap_iar, IAR_MACPANID0_LSB, pan);
+               regmap_write(lp->regmap_iar, IAR_MACPANID0_MSB, pan >> 8);
+       }
+
+       if (changed & IEEE802154_AFILT_IEEEADDR_CHANGED) {
+               u8 addr[8], i;
+
+               memcpy(addr, &filt->ieee_addr, 8);
+               for (i = 0; i < 8; i++)
+                       regmap_write(lp->regmap_iar,
+                                    IAR_MACLONGADDRS0_0 + i, addr[i]);
+       }
+
+       if (changed & IEEE802154_AFILT_PANC_CHANGED) {
+               if (filt->pan_coord) {
+                       regmap_update_bits(lp->regmap_dar, DAR_PHY_CTRL4,
+                                          DAR_PHY_CTRL4_PANCORDNTR0, 0x10);
+               } else {
+                       regmap_update_bits(lp->regmap_dar, DAR_PHY_CTRL4,
+                                          DAR_PHY_CTRL4_PANCORDNTR0, 0x00);
+               }
+       }
+
+       return 0;
+}
+
+/* -30 dBm to 10 dBm */
+#define MCR20A_MAX_TX_POWERS 0x14
+static const s32 mcr20a_powers[MCR20A_MAX_TX_POWERS + 1] = {
+       -3000, -2800, -2600, -2400, -2200, -2000, -1800, -1600, -1400,
+       -1200, -1000, -800, -600, -400, -200, 0, 200, 400, 600, 800, 1000
+};
+
+static int
+mcr20a_set_txpower(struct ieee802154_hw *hw, s32 mbm)
+{
+       struct mcr20a_local *lp = hw->priv;
+       u32 i;
+
+       dev_dbg(printdev(lp), "%s(%d)\n", __func__, mbm);
+
+       for (i = 0; i < lp->hw->phy->supported.tx_powers_size; i++) {
+               if (lp->hw->phy->supported.tx_powers[i] == mbm)
+                       return regmap_write(lp->regmap_dar, DAR_PA_PWR,
+                                           ((i + 8) & 0x1F));
+       }
+
+       return -EINVAL;
+}
+
+#define MCR20A_MAX_ED_LEVELS MCR20A_MIN_CCA_THRESHOLD
+static s32 mcr20a_ed_levels[MCR20A_MAX_ED_LEVELS + 1];
+
+static int
+mcr20a_set_cca_mode(struct ieee802154_hw *hw,
+                   const struct wpan_phy_cca *cca)
+{
+       struct mcr20a_local *lp = hw->priv;
+       unsigned int cca_mode = 0xff;
+       bool cca_mode_and = false;
+       int ret;
+
+       dev_dbg(printdev(lp), "%s\n", __func__);
+
+       /* mapping 802.15.4 to driver spec */
+       switch (cca->mode) {
+       case NL802154_CCA_ENERGY:
+               cca_mode = MCR20A_CCA_MODE1;
+               break;
+       case NL802154_CCA_CARRIER:
+               cca_mode = MCR20A_CCA_MODE2;
+               break;
+       case NL802154_CCA_ENERGY_CARRIER:
+               switch (cca->opt) {
+               case NL802154_CCA_OPT_ENERGY_CARRIER_AND:
+                       cca_mode = MCR20A_CCA_MODE3;
+                       cca_mode_and = true;
+                       break;
+               case NL802154_CCA_OPT_ENERGY_CARRIER_OR:
+                       cca_mode = MCR20A_CCA_MODE3;
+                       cca_mode_and = false;
+                       break;
+               default:
+                       return -EINVAL;
+               }
+               break;
+       default:
+               return -EINVAL;
+       }
+       ret = regmap_update_bits(lp->regmap_dar, DAR_PHY_CTRL4,
+                                DAR_PHY_CTRL4_CCATYPE_MASK,
+                                cca_mode << DAR_PHY_CTRL4_CCATYPE_SHIFT);
+       if (ret < 0)
+               return ret;
+
+       if (cca_mode == MCR20A_CCA_MODE3) {
+               if (cca_mode_and) {
+                       ret = regmap_update_bits(lp->regmap_iar, IAR_CCA_CTRL,
+                                                IAR_CCA_CTRL_CCA3_AND_NOT_OR,
+                                                0x08);
+               } else {
+                       ret = regmap_update_bits(lp->regmap_iar,
+                                                IAR_CCA_CTRL,
+                                                IAR_CCA_CTRL_CCA3_AND_NOT_OR,
+                                                0x00);
+               }
+               if (ret < 0)
+                       return ret;
+       }
+
+       return ret;
+}
+
+static int
+mcr20a_set_cca_ed_level(struct ieee802154_hw *hw, s32 mbm)
+{
+       struct mcr20a_local *lp = hw->priv;
+       u32 i;
+
+       dev_dbg(printdev(lp), "%s\n", __func__);
+
+       for (i = 0; i < hw->phy->supported.cca_ed_levels_size; i++) {
+               if (hw->phy->supported.cca_ed_levels[i] == mbm)
+                       return regmap_write(lp->regmap_iar, IAR_CCA1_THRESH, i);
+       }
+
+       return 0;
+}
+
+static int
+mcr20a_set_promiscuous_mode(struct ieee802154_hw *hw, const bool on)
+{
+       struct mcr20a_local *lp = hw->priv;
+       int ret;
+       u8 rx_frame_filter_reg = 0x0;
+       u8 val;
+
+       dev_dbg(printdev(lp), "%s(%d)\n", __func__, on);
+
+       if (on) {
+               /* All frame types accepted*/
+               val |= DAR_PHY_CTRL4_PROMISCUOUS;
+               rx_frame_filter_reg &= ~(IAR_RX_FRAME_FLT_FRM_VER);
+               rx_frame_filter_reg |= (IAR_RX_FRAME_FLT_ACK_FT |
+                                 IAR_RX_FRAME_FLT_NS_FT);
+
+               ret = regmap_update_bits(lp->regmap_dar, DAR_PHY_CTRL4,
+                                        DAR_PHY_CTRL4_PROMISCUOUS,
+                                        DAR_PHY_CTRL4_PROMISCUOUS);
+               if (ret < 0)
+                       return ret;
+
+               ret = regmap_write(lp->regmap_iar, IAR_RX_FRAME_FILTER,
+                                  rx_frame_filter_reg);
+               if (ret < 0)
+                       return ret;
+       } else {
+               ret = regmap_update_bits(lp->regmap_dar, DAR_PHY_CTRL4,
+                                        DAR_PHY_CTRL4_PROMISCUOUS, 0x0);
+               if (ret < 0)
+                       return ret;
+
+               ret = regmap_write(lp->regmap_iar, IAR_RX_FRAME_FILTER,
+                                  IAR_RX_FRAME_FLT_FRM_VER |
+                                  IAR_RX_FRAME_FLT_BEACON_FT |
+                                  IAR_RX_FRAME_FLT_DATA_FT |
+                                  IAR_RX_FRAME_FLT_CMD_FT);
+               if (ret < 0)
+                       return ret;
+       }
+
+       return 0;
+}
+
+static const struct ieee802154_ops mcr20a_hw_ops = {
+       .owner                  = THIS_MODULE,
+       .xmit_async             = mcr20a_xmit,
+       .ed                     = mcr20a_ed,
+       .set_channel            = mcr20a_set_channel,
+       .start                  = mcr20a_start,
+       .stop                   = mcr20a_stop,
+       .set_hw_addr_filt       = mcr20a_set_hw_addr_filt,
+       .set_txpower            = mcr20a_set_txpower,
+       .set_cca_mode           = mcr20a_set_cca_mode,
+       .set_cca_ed_level       = mcr20a_set_cca_ed_level,
+       .set_promiscuous_mode   = mcr20a_set_promiscuous_mode,
+};
+
+static int
+mcr20a_request_rx(struct mcr20a_local *lp)
+{
+       dev_dbg(printdev(lp), "%s\n", __func__);
+
+       /* Start the RX sequence */
+       regmap_update_bits_async(lp->regmap_dar, DAR_PHY_CTRL1,
+                                DAR_PHY_CTRL1_XCVSEQ_MASK, MCR20A_XCVSEQ_RX);
+
+       return 0;
+}
+
+static void
+mcr20a_handle_rx_read_buf_complete(void *context)
+{
+       struct mcr20a_local *lp = context;
+       u8 len = lp->reg_data[0] & DAR_RX_FRAME_LENGTH_MASK;
+       struct sk_buff *skb;
+
+       dev_dbg(printdev(lp), "%s\n", __func__);
+
+       dev_dbg(printdev(lp), "RX is done\n");
+
+       if (!ieee802154_is_valid_psdu_len(len)) {
+               dev_vdbg(&lp->spi->dev, "corrupted frame received\n");
+               len = IEEE802154_MTU;
+       }
+
+       len = len - 2;  /* get rid of frame check field */
+
+       skb = dev_alloc_skb(len);
+       if (!skb)
+               return;
+
+       memcpy(skb_put(skb, len), lp->rx_buf, len);
+       ieee802154_rx_irqsafe(lp->hw, skb, lp->rx_lqi[0]);
+
+       print_hex_dump_debug("mcr20a rx: ", DUMP_PREFIX_OFFSET, 16, 1,
+                            lp->rx_buf, len, 0);
+       pr_debug("mcr20a rx: lqi: %02hhx\n", lp->rx_lqi[0]);
+
+       /* start RX sequence */
+       mcr20a_request_rx(lp);
+}
+
+static void
+mcr20a_handle_rx_read_len_complete(void *context)
+{
+       struct mcr20a_local *lp = context;
+       u8 len;
+       int ret;
+
+       dev_dbg(printdev(lp), "%s\n", __func__);
+
+       /* get the length of received frame */
+       len = lp->reg_data[0] & DAR_RX_FRAME_LENGTH_MASK;
+       dev_dbg(printdev(lp), "frame len : %d\n", len);
+
+       /* prepare to read the rx buf */
+       lp->rx_buf_msg.complete = mcr20a_handle_rx_read_buf_complete;
+       lp->rx_header[0] = MCR20A_BURST_READ_PACKET_BUF;
+       lp->rx_xfer_buf.len = len;
+
+       ret = spi_async(lp->spi, &lp->rx_buf_msg);
+       if (ret)
+               dev_err(printdev(lp), "failed to read rx buffer length\n");
+}
+
+static int
+mcr20a_handle_rx(struct mcr20a_local *lp)
+{
+       dev_dbg(printdev(lp), "%s\n", __func__);
+       lp->reg_msg.complete = mcr20a_handle_rx_read_len_complete;
+       lp->reg_cmd[0] = MCR20A_READ_REG(DAR_RX_FRM_LEN);
+       lp->reg_xfer_data.len   = 1;
+
+       return spi_async(lp->spi, &lp->reg_msg);
+}
+
+static int
+mcr20a_handle_tx_complete(struct mcr20a_local *lp)
+{
+       dev_dbg(printdev(lp), "%s\n", __func__);
+
+       ieee802154_xmit_complete(lp->hw, lp->tx_skb, false);
+
+       return mcr20a_request_rx(lp);
+}
+
+static int
+mcr20a_handle_tx(struct mcr20a_local *lp)
+{
+       int ret;
+
+       dev_dbg(printdev(lp), "%s\n", __func__);
+
+       /* write tx buffer */
+       lp->tx_header[0]        = MCR20A_BURST_WRITE_PACKET_BUF;
+       /* add 2 bytes of FCS */
+       lp->tx_len[0]           = lp->tx_skb->len + 2;
+       lp->tx_xfer_buf.tx_buf  = lp->tx_skb->data;
+       /* add 1 byte psduLength */
+       lp->tx_xfer_buf.len     = lp->tx_skb->len + 1;
+
+       ret = spi_async(lp->spi, &lp->tx_buf_msg);
+       if (ret) {
+               dev_err(printdev(lp), "SPI write Failed for TX buf\n");
+               return ret;
+       }
+
+       return 0;
+}
+
+static void
+mcr20a_irq_clean_complete(void *context)
+{
+       struct mcr20a_local *lp = context;
+       u8 seq_state = lp->irq_data[DAR_IRQ_STS1] & DAR_PHY_CTRL1_XCVSEQ_MASK;
+
+       dev_dbg(printdev(lp), "%s\n", __func__);
+
+       enable_irq(lp->spi->irq);
+
+       dev_dbg(printdev(lp), "IRQ STA1 (%02x) STA2 (%02x)\n",
+               lp->irq_data[DAR_IRQ_STS1], lp->irq_data[DAR_IRQ_STS2]);
+
+       switch (seq_state) {
+       /* TX IRQ, RX IRQ and SEQ IRQ */
+       case (0x03):
+               if (lp->is_tx) {
+                       lp->is_tx = 0;
+                       dev_dbg(printdev(lp), "TX is done. No ACK\n");
+                       mcr20a_handle_tx_complete(lp);
+               }
+               break;
+       case (0x05):
+                       /* rx is starting */
+                       dev_dbg(printdev(lp), "RX is starting\n");
+                       mcr20a_handle_rx(lp);
+               break;
+       case (0x07):
+               if (lp->is_tx) {
+                       /* tx is done */
+                       lp->is_tx = 0;
+                       dev_dbg(printdev(lp), "TX is done. Get ACK\n");
+                       mcr20a_handle_tx_complete(lp);
+               } else {
+                       /* rx is starting */
+                       dev_dbg(printdev(lp), "RX is starting\n");
+                       mcr20a_handle_rx(lp);
+               }
+               break;
+       case (0x01):
+               if (lp->is_tx) {
+                       dev_dbg(printdev(lp), "TX is starting\n");
+                       mcr20a_handle_tx(lp);
+               } else {
+                       dev_dbg(printdev(lp), "MCR20A is stop\n");
+               }
+               break;
+       }
+}
+
+static void mcr20a_irq_status_complete(void *context)
+{
+       int ret;
+       struct mcr20a_local *lp = context;
+
+       dev_dbg(printdev(lp), "%s\n", __func__);
+       regmap_update_bits_async(lp->regmap_dar, DAR_PHY_CTRL1,
+                                DAR_PHY_CTRL1_XCVSEQ_MASK, MCR20A_XCVSEQ_IDLE);
+
+       lp->reg_msg.complete = mcr20a_irq_clean_complete;
+       lp->reg_cmd[0] = MCR20A_WRITE_REG(DAR_IRQ_STS1);
+       memcpy(lp->reg_data, lp->irq_data, MCR20A_IRQSTS_NUM);
+       lp->reg_xfer_data.len = MCR20A_IRQSTS_NUM;
+
+       ret = spi_async(lp->spi, &lp->reg_msg);
+
+       if (ret)
+               dev_err(printdev(lp), "failed to clean irq status\n");
+}
+
+static irqreturn_t mcr20a_irq_isr(int irq, void *data)
+{
+       struct mcr20a_local *lp = data;
+       int ret;
+
+       disable_irq_nosync(irq);
+
+       lp->irq_header[0] = MCR20A_READ_REG(DAR_IRQ_STS1);
+       /* read IRQSTSx */
+       ret = spi_async(lp->spi, &lp->irq_msg);
+       if (ret) {
+               enable_irq(irq);
+               return IRQ_NONE;
+       }
+
+       return IRQ_HANDLED;
+}
+
+static int mcr20a_get_platform_data(struct spi_device *spi,
+                                   struct mcr20a_platform_data *pdata)
+{
+       int ret = 0;
+
+       if (!spi->dev.of_node)
+               return -EINVAL;
+
+       pdata->rst_gpio = of_get_named_gpio(spi->dev.of_node, "rst_b-gpio", 0);
+       dev_dbg(&spi->dev, "rst_b-gpio: %d\n", pdata->rst_gpio);
+
+       return ret;
+}
+
+static void mcr20a_hw_setup(struct mcr20a_local *lp)
+{
+       u8 i;
+       struct ieee802154_hw *hw = lp->hw;
+       struct wpan_phy *phy = lp->hw->phy;
+
+       dev_dbg(printdev(lp), "%s\n", __func__);
+
+       phy->symbol_duration = 16;
+       phy->lifs_period = 40;
+       phy->sifs_period = 12;
+
+       hw->flags = IEEE802154_HW_TX_OMIT_CKSUM |
+                       IEEE802154_HW_AFILT |
+                       IEEE802154_HW_PROMISCUOUS;
+
+       phy->flags = WPAN_PHY_FLAG_TXPOWER | WPAN_PHY_FLAG_CCA_ED_LEVEL |
+                       WPAN_PHY_FLAG_CCA_MODE;
+
+       phy->supported.cca_modes = BIT(NL802154_CCA_ENERGY) |
+               BIT(NL802154_CCA_CARRIER) | BIT(NL802154_CCA_ENERGY_CARRIER);
+       phy->supported.cca_opts = BIT(NL802154_CCA_OPT_ENERGY_CARRIER_AND) |
+               BIT(NL802154_CCA_OPT_ENERGY_CARRIER_OR);
+
+       /* initiating cca_ed_levels */
+       for (i = MCR20A_MAX_CCA_THRESHOLD; i < MCR20A_MIN_CCA_THRESHOLD + 1;
+             ++i) {
+               mcr20a_ed_levels[i] =  -i * 100;
+       }
+
+       phy->supported.cca_ed_levels = mcr20a_ed_levels;
+       phy->supported.cca_ed_levels_size = ARRAY_SIZE(mcr20a_ed_levels);
+
+       phy->cca.mode = NL802154_CCA_ENERGY;
+
+       phy->supported.channels[0] = MCR20A_VALID_CHANNELS;
+       phy->current_page = 0;
+       /* MCR20A default reset value */
+       phy->current_channel = 20;
+       phy->symbol_duration = 16;
+       phy->supported.tx_powers = mcr20a_powers;
+       phy->supported.tx_powers_size = ARRAY_SIZE(mcr20a_powers);
+       phy->cca_ed_level = phy->supported.cca_ed_levels[75];
+       phy->transmit_power = phy->supported.tx_powers[0x0F];
+}
+
+static void
+mcr20a_setup_tx_spi_messages(struct mcr20a_local *lp)
+{
+       spi_message_init(&lp->tx_buf_msg);
+       lp->tx_buf_msg.context = lp;
+       lp->tx_buf_msg.complete = mcr20a_write_tx_buf_complete;
+
+       lp->tx_xfer_header.len = 1;
+       lp->tx_xfer_header.tx_buf = lp->tx_header;
+
+       lp->tx_xfer_len.len = 1;
+       lp->tx_xfer_len.tx_buf = lp->tx_len;
+
+       spi_message_add_tail(&lp->tx_xfer_header, &lp->tx_buf_msg);
+       spi_message_add_tail(&lp->tx_xfer_len, &lp->tx_buf_msg);
+       spi_message_add_tail(&lp->tx_xfer_buf, &lp->tx_buf_msg);
+}
+
+static void
+mcr20a_setup_rx_spi_messages(struct mcr20a_local *lp)
+{
+       spi_message_init(&lp->reg_msg);
+       lp->reg_msg.context = lp;
+
+       lp->reg_xfer_cmd.len = 1;
+       lp->reg_xfer_cmd.tx_buf = lp->reg_cmd;
+       lp->reg_xfer_cmd.rx_buf = lp->reg_cmd;
+
+       lp->reg_xfer_data.rx_buf = lp->reg_data;
+       lp->reg_xfer_data.tx_buf = lp->reg_data;
+
+       spi_message_add_tail(&lp->reg_xfer_cmd, &lp->reg_msg);
+       spi_message_add_tail(&lp->reg_xfer_data, &lp->reg_msg);
+
+       spi_message_init(&lp->rx_buf_msg);
+       lp->rx_buf_msg.context = lp;
+       lp->rx_buf_msg.complete = mcr20a_handle_rx_read_buf_complete;
+       lp->rx_xfer_header.len = 1;
+       lp->rx_xfer_header.tx_buf = lp->rx_header;
+       lp->rx_xfer_header.rx_buf = lp->rx_header;
+
+       lp->rx_xfer_buf.rx_buf = lp->rx_buf;
+
+       lp->rx_xfer_lqi.len = 1;
+       lp->rx_xfer_lqi.rx_buf = lp->rx_lqi;
+
+       spi_message_add_tail(&lp->rx_xfer_header, &lp->rx_buf_msg);
+       spi_message_add_tail(&lp->rx_xfer_buf, &lp->rx_buf_msg);
+       spi_message_add_tail(&lp->rx_xfer_lqi, &lp->rx_buf_msg);
+}
+
+static void
+mcr20a_setup_irq_spi_messages(struct mcr20a_local *lp)
+{
+       spi_message_init(&lp->irq_msg);
+       lp->irq_msg.context             = lp;
+       lp->irq_msg.complete    = mcr20a_irq_status_complete;
+       lp->irq_xfer_header.len = 1;
+       lp->irq_xfer_header.tx_buf = lp->irq_header;
+       lp->irq_xfer_header.rx_buf = lp->irq_header;
+
+       lp->irq_xfer_data.len   = MCR20A_IRQSTS_NUM;
+       lp->irq_xfer_data.rx_buf = lp->irq_data;
+
+       spi_message_add_tail(&lp->irq_xfer_header, &lp->irq_msg);
+       spi_message_add_tail(&lp->irq_xfer_data, &lp->irq_msg);
+}
+
+static int
+mcr20a_phy_init(struct mcr20a_local *lp)
+{
+       u8 index;
+       unsigned int phy_reg = 0;
+       int ret;
+
+       dev_dbg(printdev(lp), "%s\n", __func__);
+
+       /* Disable Tristate on COCO MISO for SPI reads */
+       ret = regmap_write(lp->regmap_iar, IAR_MISC_PAD_CTRL, 0x02);
+       if (ret)
+               goto err_ret;
+
+       /* Clear all PP IRQ bits in IRQSTS1 to avoid unexpected interrupts
+        * immediately after init
+        */
+       ret = regmap_write(lp->regmap_dar, DAR_IRQ_STS1, 0xEF);
+       if (ret)
+               goto err_ret;
+
+       /* Clear all PP IRQ bits in IRQSTS2 */
+       ret = regmap_write(lp->regmap_dar, DAR_IRQ_STS2,
+                          DAR_IRQSTS2_ASM_IRQ | DAR_IRQSTS2_PB_ERR_IRQ |
+                          DAR_IRQSTS2_WAKE_IRQ);
+       if (ret)
+               goto err_ret;
+
+       /* Disable all timer interrupts */
+       ret = regmap_write(lp->regmap_dar, DAR_IRQ_STS3, 0xFF);
+       if (ret)
+               goto err_ret;
+
+       /*  PHY_CTRL1 : default HW settings + AUTOACK enabled */
+       ret = regmap_update_bits(lp->regmap_dar, DAR_PHY_CTRL1,
+                                DAR_PHY_CTRL1_AUTOACK, DAR_PHY_CTRL1_AUTOACK);
+
+       /*  PHY_CTRL2 : disable all interrupts */
+       ret = regmap_write(lp->regmap_dar, DAR_PHY_CTRL2, 0xFF);
+       if (ret)
+               goto err_ret;
+
+       /* PHY_CTRL3 : disable all timers and remaining interrupts */
+       ret = regmap_write(lp->regmap_dar, DAR_PHY_CTRL3,
+                          DAR_PHY_CTRL3_ASM_MSK | DAR_PHY_CTRL3_PB_ERR_MSK |
+                          DAR_PHY_CTRL3_WAKE_MSK);
+       if (ret)
+               goto err_ret;
+
+       /* SRC_CTRL : enable Acknowledge Frame Pending and
+        * Source Address Matching Enable
+        */
+       ret = regmap_write(lp->regmap_dar, DAR_SRC_CTRL,
+                          DAR_SRC_CTRL_ACK_FRM_PND |
+                          (DAR_SRC_CTRL_INDEX << DAR_SRC_CTRL_INDEX_SHIFT));
+       if (ret)
+               goto err_ret;
+
+       /*  RX_FRAME_FILTER */
+       /*  FRM_VER[1:0] = b11. Accept FrameVersion 0 and 1 packets */
+       ret = regmap_write(lp->regmap_iar, IAR_RX_FRAME_FILTER,
+                          IAR_RX_FRAME_FLT_FRM_VER |
+                          IAR_RX_FRAME_FLT_BEACON_FT |
+                          IAR_RX_FRAME_FLT_DATA_FT |
+                          IAR_RX_FRAME_FLT_CMD_FT);
+       if (ret)
+               goto err_ret;
+
+       dev_info(printdev(lp), "MCR20A DAR overwrites version: 0x%02x\n",
+                MCR20A_OVERWRITE_VERSION);
+
+       /* Overwrites direct registers  */
+       ret = regmap_write(lp->regmap_dar, DAR_OVERWRITE_VER,
+                          MCR20A_OVERWRITE_VERSION);
+       if (ret)
+               goto err_ret;
+
+       /* Overwrites indirect registers  */
+       ret = regmap_multi_reg_write(lp->regmap_iar, mar20a_iar_overwrites,
+                                    ARRAY_SIZE(mar20a_iar_overwrites));
+       if (ret)
+               goto err_ret;
+
+       /* Clear HW indirect queue */
+       dev_dbg(printdev(lp), "clear HW indirect queue\n");
+       for (index = 0; index < MCR20A_PHY_INDIRECT_QUEUE_SIZE; index++) {
+               phy_reg = (u8)(((index & DAR_SRC_CTRL_INDEX) <<
+                              DAR_SRC_CTRL_INDEX_SHIFT)
+                             | (DAR_SRC_CTRL_SRCADDR_EN)
+                             | (DAR_SRC_CTRL_INDEX_DISABLE));
+               ret = regmap_write(lp->regmap_dar, DAR_SRC_CTRL, phy_reg);
+               if (ret)
+                       goto err_ret;
+               phy_reg = 0;
+       }
+
+       /* Assign HW Indirect hash table to PAN0 */
+       ret = regmap_read(lp->regmap_iar, IAR_DUAL_PAN_CTRL, &phy_reg);
+       if (ret)
+               goto err_ret;
+
+       /* Clear current lvl */
+       phy_reg &= ~IAR_DUAL_PAN_CTRL_DUAL_PAN_SAM_LVL_MSK;
+
+       /* Set new lvl */
+       phy_reg |= MCR20A_PHY_INDIRECT_QUEUE_SIZE <<
+               IAR_DUAL_PAN_CTRL_DUAL_PAN_SAM_LVL_SHIFT;
+       ret = regmap_write(lp->regmap_iar, IAR_DUAL_PAN_CTRL, phy_reg);
+       if (ret)
+               goto err_ret;
+
+       /* Set CCA threshold to -75 dBm */
+       ret = regmap_write(lp->regmap_iar, IAR_CCA1_THRESH, 0x4B);
+       if (ret)
+               goto err_ret;
+
+       /* Set prescaller to obtain 1 symbol (16us) timebase */
+       ret = regmap_write(lp->regmap_iar, IAR_TMR_PRESCALE, 0x05);
+       if (ret)
+               goto err_ret;
+
+       /* Enable autodoze mode. */
+       ret = regmap_update_bits(lp->regmap_dar, DAR_PWR_MODES,
+                                DAR_PWR_MODES_AUTODOZE,
+                                DAR_PWR_MODES_AUTODOZE);
+       if (ret)
+               goto err_ret;
+
+       /* Disable clk_out */
+       ret = regmap_update_bits(lp->regmap_dar, DAR_CLK_OUT_CTRL,
+                                DAR_CLK_OUT_CTRL_EN, 0x0);
+       if (ret)
+               goto err_ret;
+
+       return 0;
+
+err_ret:
+       return ret;
+}
+
+static int
+mcr20a_probe(struct spi_device *spi)
+{
+       struct ieee802154_hw *hw;
+       struct mcr20a_local *lp;
+       struct mcr20a_platform_data *pdata;
+       int irq_type;
+       int ret = -ENOMEM;
+
+       dev_dbg(&spi->dev, "%s\n", __func__);
+
+       if (!spi->irq) {
+               dev_err(&spi->dev, "no IRQ specified\n");
+               return -EINVAL;
+       }
+
+       pdata = kmalloc(sizeof(*pdata), GFP_KERNEL);
+       if (!pdata)
+               return -ENOMEM;
+
+       /* set mcr20a platform data */
+       ret = mcr20a_get_platform_data(spi, pdata);
+       if (ret < 0) {
+               dev_crit(&spi->dev, "mcr20a_get_platform_data failed.\n");
+               return ret;
+       }
+
+       /* init reset gpio */
+       if (gpio_is_valid(pdata->rst_gpio)) {
+               ret = devm_gpio_request_one(&spi->dev, pdata->rst_gpio,
+                                           GPIOF_OUT_INIT_HIGH, "reset");
+               if (ret)
+                       return ret;
+       }
+
+       /* reset mcr20a */
+       if (gpio_is_valid(pdata->rst_gpio)) {
+               usleep_range(10, 20);
+               gpio_set_value_cansleep(pdata->rst_gpio, 0);
+               usleep_range(10, 20);
+               gpio_set_value_cansleep(pdata->rst_gpio, 1);
+               usleep_range(120, 240);
+       }
+
+       /* allocate ieee802154_hw and private data */
+       hw = ieee802154_alloc_hw(sizeof(*lp), &mcr20a_hw_ops);
+       if (!hw) {
+               dev_crit(&spi->dev, "ieee802154_alloc_hw failed\n");
+               return -ENOMEM;
+       }
+
+       /* init mcr20a local data */
+       lp = hw->priv;
+       lp->hw = hw;
+       lp->spi = spi;
+       lp->spi->dev.platform_data = pdata;
+       lp->pdata = pdata;
+
+       /* init ieee802154_hw */
+       hw->parent = &spi->dev;
+       ieee802154_random_extended_addr(&hw->phy->perm_extended_addr);
+
+       /* init buf */
+       lp->buf = devm_kzalloc(&spi->dev, SPI_COMMAND_BUFFER, GFP_KERNEL);
+
+       if (!lp->buf)
+               return -ENOMEM;
+
+       mcr20a_setup_tx_spi_messages(lp);
+       mcr20a_setup_rx_spi_messages(lp);
+       mcr20a_setup_irq_spi_messages(lp);
+
+       /* setup regmap */
+       lp->regmap_dar = devm_regmap_init_spi(spi, &mcr20a_dar_regmap);
+       if (IS_ERR(lp->regmap_dar)) {
+               ret = PTR_ERR(lp->regmap_dar);
+               dev_err(&spi->dev, "Failed to allocate dar map: %d\n",
+                       ret);
+               goto free_dev;
+       }
+
+       lp->regmap_iar = devm_regmap_init_spi(spi, &mcr20a_iar_regmap);
+       if (IS_ERR(lp->regmap_iar)) {
+               ret = PTR_ERR(lp->regmap_iar);
+               dev_err(&spi->dev, "Failed to allocate iar map: %d\n", ret);
+               goto free_dev;
+       }
+
+       mcr20a_hw_setup(lp);
+
+       spi_set_drvdata(spi, lp);
+
+       ret = mcr20a_phy_init(lp);
+       if (ret < 0) {
+               dev_crit(&spi->dev, "mcr20a_phy_init failed\n");
+               goto free_dev;
+       }
+
+       irq_type = irq_get_trigger_type(spi->irq);
+       if (!irq_type)
+               irq_type = IRQF_TRIGGER_FALLING;
+
+       ret = devm_request_irq(&spi->dev, spi->irq, mcr20a_irq_isr,
+                              irq_type, dev_name(&spi->dev), lp);
+       if (ret) {
+               dev_err(&spi->dev, "could not request_irq for mcr20a\n");
+               ret = -ENODEV;
+               goto free_dev;
+       }
+
+       /* disable_irq by default and wait for starting hardware */
+       disable_irq(spi->irq);
+
+       ret = ieee802154_register_hw(hw);
+       if (ret) {
+               dev_crit(&spi->dev, "ieee802154_register_hw failed\n");
+               goto free_dev;
+       }
+
+       return ret;
+
+free_dev:
+       ieee802154_free_hw(lp->hw);
+
+       return ret;
+}
+
+static int mcr20a_remove(struct spi_device *spi)
+{
+       struct mcr20a_local *lp = spi_get_drvdata(spi);
+
+       dev_dbg(&spi->dev, "%s\n", __func__);
+
+       ieee802154_unregister_hw(lp->hw);
+       ieee802154_free_hw(lp->hw);
+
+       return 0;
+}
+
+static const struct of_device_id mcr20a_of_match[] = {
+       { .compatible = "nxp,mcr20a", },
+       { },
+};
+MODULE_DEVICE_TABLE(of, mcr20a_of_match);
+
+static const struct spi_device_id mcr20a_device_id[] = {
+       { .name = "mcr20a", },
+       { },
+};
+MODULE_DEVICE_TABLE(spi, mcr20a_device_id);
+
+static struct spi_driver mcr20a_driver = {
+       .id_table = mcr20a_device_id,
+       .driver = {
+               .of_match_table = of_match_ptr(mcr20a_of_match),
+               .name   = "mcr20a",
+       },
+       .probe      = mcr20a_probe,
+       .remove     = mcr20a_remove,
+};
+
+module_spi_driver(mcr20a_driver);
+
+MODULE_DESCRIPTION("MCR20A Transceiver Driver");
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Xue Liu <liuxuenetmail@gmail>");
diff --git a/drivers/net/ieee802154/mcr20a.h b/drivers/net/ieee802154/mcr20a.h
new file mode 100644 (file)
index 0000000..6da4fd0
--- /dev/null
@@ -0,0 +1,498 @@
+/*
+ * Driver for NXP MCR20A 802.15.4 Wireless-PAN Networking controller
+ *
+ * Copyright (C) 2018 Xue Liu <liuxuenetmail@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+#ifndef _MCR20A_H
+#define _MCR20A_H
+
+/* Direct Accress Register */
+#define DAR_IRQ_STS1           0x00
+#define DAR_IRQ_STS2           0x01
+#define DAR_IRQ_STS3           0x02
+#define DAR_PHY_CTRL1          0x03
+#define DAR_PHY_CTRL2          0x04
+#define DAR_PHY_CTRL3          0x05
+#define DAR_RX_FRM_LEN         0x06
+#define DAR_PHY_CTRL4          0x07
+#define DAR_SRC_CTRL           0x08
+#define DAR_SRC_ADDRS_SUM_LSB  0x09
+#define DAR_SRC_ADDRS_SUM_MSB  0x0A
+#define DAR_CCA1_ED_FNL                0x0B
+#define DAR_EVENT_TMR_LSB      0x0C
+#define DAR_EVENT_TMR_MSB      0x0D
+#define DAR_EVENT_TMR_USB      0x0E
+#define DAR_TIMESTAMP_LSB      0x0F
+#define DAR_TIMESTAMP_MSB      0x10
+#define DAR_TIMESTAMP_USB      0x11
+#define DAR_T3CMP_LSB          0x12
+#define DAR_T3CMP_MSB          0x13
+#define DAR_T3CMP_USB          0x14
+#define DAR_T2PRIMECMP_LSB     0x15
+#define DAR_T2PRIMECMP_MSB     0x16
+#define DAR_T1CMP_LSB          0x17
+#define DAR_T1CMP_MSB          0x18
+#define DAR_T1CMP_USB          0x19
+#define DAR_T2CMP_LSB          0x1A
+#define DAR_T2CMP_MSB          0x1B
+#define DAR_T2CMP_USB          0x1C
+#define DAR_T4CMP_LSB          0x1D
+#define DAR_T4CMP_MSB          0x1E
+#define DAR_T4CMP_USB          0x1F
+#define DAR_PLL_INT0           0x20
+#define DAR_PLL_FRAC0_LSB      0x21
+#define DAR_PLL_FRAC0_MSB      0x22
+#define DAR_PA_PWR             0x23
+#define DAR_SEQ_STATE          0x24
+#define DAR_LQI_VALUE          0x25
+#define DAR_RSSI_CCA_CONT      0x26
+/*------------------            0x27 */
+#define DAR_ASM_CTRL1          0x28
+#define DAR_ASM_CTRL2          0x29
+#define DAR_ASM_DATA_0         0x2A
+#define DAR_ASM_DATA_1         0x2B
+#define DAR_ASM_DATA_2         0x2C
+#define DAR_ASM_DATA_3         0x2D
+#define DAR_ASM_DATA_4         0x2E
+#define DAR_ASM_DATA_5         0x2F
+#define DAR_ASM_DATA_6         0x30
+#define DAR_ASM_DATA_7         0x31
+#define DAR_ASM_DATA_8         0x32
+#define DAR_ASM_DATA_9         0x33
+#define DAR_ASM_DATA_A         0x34
+#define DAR_ASM_DATA_B         0x35
+#define DAR_ASM_DATA_C         0x36
+#define DAR_ASM_DATA_D         0x37
+#define DAR_ASM_DATA_E         0x38
+#define DAR_ASM_DATA_F         0x39
+/*-----------------------       0x3A */
+#define DAR_OVERWRITE_VER      0x3B
+#define DAR_CLK_OUT_CTRL       0x3C
+#define DAR_PWR_MODES          0x3D
+#define IAR_INDEX              0x3E
+#define IAR_DATA               0x3F
+
+/* Indirect Resgister Memory */
+#define IAR_PART_ID            0x00
+#define IAR_XTAL_TRIM          0x01
+#define IAR_PMC_LP_TRIM                0x02
+#define IAR_MACPANID0_LSB      0x03
+#define IAR_MACPANID0_MSB      0x04
+#define IAR_MACSHORTADDRS0_LSB 0x05
+#define IAR_MACSHORTADDRS0_MSB 0x06
+#define IAR_MACLONGADDRS0_0    0x07
+#define IAR_MACLONGADDRS0_8    0x08
+#define IAR_MACLONGADDRS0_16   0x09
+#define IAR_MACLONGADDRS0_24   0x0A
+#define IAR_MACLONGADDRS0_32   0x0B
+#define IAR_MACLONGADDRS0_40   0x0C
+#define IAR_MACLONGADDRS0_48   0x0D
+#define IAR_MACLONGADDRS0_56   0x0E
+#define IAR_RX_FRAME_FILTER    0x0F
+#define IAR_PLL_INT1           0x10
+#define IAR_PLL_FRAC1_LSB      0x11
+#define IAR_PLL_FRAC1_MSB      0x12
+#define IAR_MACPANID1_LSB      0x13
+#define IAR_MACPANID1_MSB      0x14
+#define IAR_MACSHORTADDRS1_LSB 0x15
+#define IAR_MACSHORTADDRS1_MSB 0x16
+#define IAR_MACLONGADDRS1_0    0x17
+#define IAR_MACLONGADDRS1_8    0x18
+#define IAR_MACLONGADDRS1_16   0x19
+#define IAR_MACLONGADDRS1_24   0x1A
+#define IAR_MACLONGADDRS1_32   0x1B
+#define IAR_MACLONGADDRS1_40   0x1C
+#define IAR_MACLONGADDRS1_48   0x1D
+#define IAR_MACLONGADDRS1_56   0x1E
+#define IAR_DUAL_PAN_CTRL      0x1F
+#define IAR_DUAL_PAN_DWELL     0x20
+#define IAR_DUAL_PAN_STS       0x21
+#define IAR_CCA1_THRESH                0x22
+#define IAR_CCA1_ED_OFFSET_COMP        0x23
+#define IAR_LQI_OFFSET_COMP    0x24
+#define IAR_CCA_CTRL           0x25
+#define IAR_CCA2_CORR_PEAKS    0x26
+#define IAR_CCA2_CORR_THRESH   0x27
+#define IAR_TMR_PRESCALE       0x28
+/*--------------------          0x29 */
+#define IAR_GPIO_DATA          0x2A
+#define IAR_GPIO_DIR           0x2B
+#define IAR_GPIO_PUL_EN                0x2C
+#define IAR_GPIO_PUL_SEL       0x2D
+#define IAR_GPIO_DS            0x2E
+/*------------------            0x2F */
+#define IAR_ANT_PAD_CTRL       0x30
+#define IAR_MISC_PAD_CTRL      0x31
+#define IAR_BSM_CTRL           0x32
+/*-------------------           0x33 */
+#define IAR_RNG                        0x34
+#define IAR_RX_BYTE_COUNT      0x35
+#define IAR_RX_WTR_MARK                0x36
+#define IAR_SOFT_RESET         0x37
+#define IAR_TXDELAY            0x38
+#define IAR_ACKDELAY           0x39
+#define IAR_SEQ_MGR_CTRL       0x3A
+#define IAR_SEQ_MGR_STS                0x3B
+#define IAR_SEQ_T_STS          0x3C
+#define IAR_ABORT_STS          0x3D
+#define IAR_CCCA_BUSY_CNT      0x3E
+#define IAR_SRC_ADDR_CHECKSUM1 0x3F
+#define IAR_SRC_ADDR_CHECKSUM2 0x40
+#define IAR_SRC_TBL_VALID1     0x41
+#define IAR_SRC_TBL_VALID2     0x42
+#define IAR_FILTERFAIL_CODE1   0x43
+#define IAR_FILTERFAIL_CODE2   0x44
+#define IAR_SLOT_PRELOAD       0x45
+/*--------------------          0x46 */
+#define IAR_CORR_VT            0x47
+#define IAR_SYNC_CTRL          0x48
+#define IAR_PN_LSB_0           0x49
+#define IAR_PN_LSB_1           0x4A
+#define IAR_PN_MSB_0           0x4B
+#define IAR_PN_MSB_1           0x4C
+#define IAR_CORR_NVAL          0x4D
+#define IAR_TX_MODE_CTRL       0x4E
+#define IAR_SNF_THR            0x4F
+#define IAR_FAD_THR            0x50
+#define IAR_ANT_AGC_CTRL       0x51
+#define IAR_AGC_THR1           0x52
+#define IAR_AGC_THR2           0x53
+#define IAR_AGC_HYS            0x54
+#define IAR_AFC                        0x55
+/*-------------------           0x56 */
+/*-------------------           0x57 */
+#define IAR_PHY_STS            0x58
+#define IAR_RX_MAX_CORR                0x59
+#define IAR_RX_MAX_PREAMBLE    0x5A
+#define IAR_RSSI               0x5B
+/*-------------------           0x5C */
+/*-------------------           0x5D */
+#define IAR_PLL_DIG_CTRL       0x5E
+#define IAR_VCO_CAL            0x5F
+#define IAR_VCO_BEST_DIFF      0x60
+#define IAR_VCO_BIAS           0x61
+#define IAR_KMOD_CTRL          0x62
+#define IAR_KMOD_CAL           0x63
+#define IAR_PA_CAL             0x64
+#define IAR_PA_PWRCAL          0x65
+#define IAR_ATT_RSSI1          0x66
+#define IAR_ATT_RSSI2          0x67
+#define IAR_RSSI_OFFSET                0x68
+#define IAR_RSSI_SLOPE         0x69
+#define IAR_RSSI_CAL1          0x6A
+#define IAR_RSSI_CAL2          0x6B
+/*-------------------           0x6C */
+/*-------------------           0x6D */
+#define IAR_XTAL_CTRL          0x6E
+#define IAR_XTAL_COMP_MIN      0x6F
+#define IAR_XTAL_COMP_MAX      0x70
+#define IAR_XTAL_GM            0x71
+/*-------------------           0x72 */
+/*-------------------           0x73 */
+#define IAR_LNA_TUNE           0x74
+#define IAR_LNA_AGCGAIN                0x75
+/*-------------------           0x76 */
+/*-------------------           0x77 */
+#define IAR_CHF_PMA_GAIN       0x78
+#define IAR_CHF_IBUF           0x79
+#define IAR_CHF_QBUF           0x7A
+#define IAR_CHF_IRIN           0x7B
+#define IAR_CHF_QRIN           0x7C
+#define IAR_CHF_IL             0x7D
+#define IAR_CHF_QL             0x7E
+#define IAR_CHF_CC1            0x7F
+#define IAR_CHF_CCL            0x80
+#define IAR_CHF_CC2            0x81
+#define IAR_CHF_IROUT          0x82
+#define IAR_CHF_QROUT          0x83
+/*-------------------           0x84 */
+/*-------------------           0x85 */
+#define IAR_RSSI_CTRL          0x86
+/*-------------------           0x87 */
+/*-------------------           0x88 */
+#define IAR_PA_BIAS            0x89
+#define IAR_PA_TUNING          0x8A
+/*-------------------           0x8B */
+/*-------------------           0x8C */
+#define IAR_PMC_HP_TRIM                0x8D
+#define IAR_VREGA_TRIM         0x8E
+/*-------------------           0x8F */
+/*-------------------           0x90 */
+#define IAR_VCO_CTRL1          0x91
+#define IAR_VCO_CTRL2          0x92
+/*-------------------           0x93 */
+/*-------------------           0x94 */
+#define IAR_ANA_SPARE_OUT1     0x95
+#define IAR_ANA_SPARE_OUT2     0x96
+#define IAR_ANA_SPARE_IN       0x97
+#define IAR_MISCELLANEOUS      0x98
+/*-------------------           0x99 */
+#define IAR_SEQ_MGR_OVRD0      0x9A
+#define IAR_SEQ_MGR_OVRD1      0x9B
+#define IAR_SEQ_MGR_OVRD2      0x9C
+#define IAR_SEQ_MGR_OVRD3      0x9D
+#define IAR_SEQ_MGR_OVRD4      0x9E
+#define IAR_SEQ_MGR_OVRD5      0x9F
+#define IAR_SEQ_MGR_OVRD6      0xA0
+#define IAR_SEQ_MGR_OVRD7      0xA1
+/*-------------------           0xA2 */
+#define IAR_TESTMODE_CTRL      0xA3
+#define IAR_DTM_CTRL1          0xA4
+#define IAR_DTM_CTRL2          0xA5
+#define IAR_ATM_CTRL1          0xA6
+#define IAR_ATM_CTRL2          0xA7
+#define IAR_ATM_CTRL3          0xA8
+/*-------------------           0xA9 */
+#define IAR_LIM_FE_TEST_CTRL   0xAA
+#define IAR_CHF_TEST_CTRL      0xAB
+#define IAR_VCO_TEST_CTRL      0xAC
+#define IAR_PLL_TEST_CTRL      0xAD
+#define IAR_PA_TEST_CTRL       0xAE
+#define IAR_PMC_TEST_CTRL      0xAF
+#define IAR_SCAN_DTM_PROTECT_1 0xFE
+#define IAR_SCAN_DTM_PROTECT_0 0xFF
+
+/* IRQSTS1 bits */
+#define DAR_IRQSTS1_RX_FRM_PEND                BIT(7)
+#define DAR_IRQSTS1_PLL_UNLOCK_IRQ     BIT(6)
+#define DAR_IRQSTS1_FILTERFAIL_IRQ     BIT(5)
+#define DAR_IRQSTS1_RXWTRMRKIRQ                BIT(4)
+#define DAR_IRQSTS1_CCAIRQ             BIT(3)
+#define DAR_IRQSTS1_RXIRQ              BIT(2)
+#define DAR_IRQSTS1_TXIRQ              BIT(1)
+#define DAR_IRQSTS1_SEQIRQ             BIT(0)
+
+/* IRQSTS2 bits */
+#define DAR_IRQSTS2_CRCVALID           BIT(7)
+#define DAR_IRQSTS2_CCA                        BIT(6)
+#define DAR_IRQSTS2_SRCADDR            BIT(5)
+#define DAR_IRQSTS2_PI                 BIT(4)
+#define DAR_IRQSTS2_TMRSTATUS          BIT(3)
+#define DAR_IRQSTS2_ASM_IRQ            BIT(2)
+#define DAR_IRQSTS2_PB_ERR_IRQ         BIT(1)
+#define DAR_IRQSTS2_WAKE_IRQ           BIT(0)
+
+/* IRQSTS3 bits */
+#define DAR_IRQSTS3_TMR4MSK            BIT(7)
+#define DAR_IRQSTS3_TMR3MSK            BIT(6)
+#define DAR_IRQSTS3_TMR2MSK            BIT(5)
+#define DAR_IRQSTS3_TMR1MSK            BIT(4)
+#define DAR_IRQSTS3_TMR4IRQ            BIT(3)
+#define DAR_IRQSTS3_TMR3IRQ            BIT(2)
+#define DAR_IRQSTS3_TMR2IRQ            BIT(1)
+#define DAR_IRQSTS3_TMR1IRQ            BIT(0)
+
+/* PHY_CTRL1 bits */
+#define DAR_PHY_CTRL1_TMRTRIGEN                BIT(7)
+#define DAR_PHY_CTRL1_SLOTTED          BIT(6)
+#define DAR_PHY_CTRL1_CCABFRTX         BIT(5)
+#define DAR_PHY_CTRL1_CCABFRTX_SHIFT   5
+#define DAR_PHY_CTRL1_RXACKRQD         BIT(4)
+#define DAR_PHY_CTRL1_AUTOACK          BIT(3)
+#define DAR_PHY_CTRL1_XCVSEQ_MASK      0x07
+
+/* PHY_CTRL2 bits */
+#define DAR_PHY_CTRL2_CRC_MSK          BIT(7)
+#define DAR_PHY_CTRL2_PLL_UNLOCK_MSK   BIT(6)
+#define DAR_PHY_CTRL2_FILTERFAIL_MSK   BIT(5)
+#define DAR_PHY_CTRL2_RX_WMRK_MSK      BIT(4)
+#define DAR_PHY_CTRL2_CCAMSK           BIT(3)
+#define DAR_PHY_CTRL2_RXMSK            BIT(2)
+#define DAR_PHY_CTRL2_TXMSK            BIT(1)
+#define DAR_PHY_CTRL2_SEQMSK           BIT(0)
+
+/* PHY_CTRL3 bits */
+#define DAR_PHY_CTRL3_TMR4CMP_EN       BIT(7)
+#define DAR_PHY_CTRL3_TMR3CMP_EN       BIT(6)
+#define DAR_PHY_CTRL3_TMR2CMP_EN       BIT(5)
+#define DAR_PHY_CTRL3_TMR1CMP_EN       BIT(4)
+#define DAR_PHY_CTRL3_ASM_MSK          BIT(2)
+#define DAR_PHY_CTRL3_PB_ERR_MSK       BIT(1)
+#define DAR_PHY_CTRL3_WAKE_MSK         BIT(0)
+
+/* RX_FRM_LEN bits */
+#define DAR_RX_FRAME_LENGTH_MASK       (0x7F)
+
+/* PHY_CTRL4 bits */
+#define DAR_PHY_CTRL4_TRCV_MSK         BIT(7)
+#define DAR_PHY_CTRL4_TC3TMOUT         BIT(6)
+#define DAR_PHY_CTRL4_PANCORDNTR0      BIT(5)
+#define DAR_PHY_CTRL4_CCATYPE          (3)
+#define DAR_PHY_CTRL4_CCATYPE_SHIFT    (3)
+#define DAR_PHY_CTRL4_CCATYPE_MASK     (0x18)
+#define DAR_PHY_CTRL4_TMRLOAD          BIT(2)
+#define DAR_PHY_CTRL4_PROMISCUOUS      BIT(1)
+#define DAR_PHY_CTRL4_TC2PRIME_EN      BIT(0)
+
+/* SRC_CTRL bits */
+#define DAR_SRC_CTRL_INDEX             (0x0F)
+#define DAR_SRC_CTRL_INDEX_SHIFT       (4)
+#define DAR_SRC_CTRL_ACK_FRM_PND       BIT(3)
+#define DAR_SRC_CTRL_SRCADDR_EN                BIT(2)
+#define DAR_SRC_CTRL_INDEX_EN          BIT(1)
+#define DAR_SRC_CTRL_INDEX_DISABLE     BIT(0)
+
+/* DAR_ASM_CTRL1 bits */
+#define DAR_ASM_CTRL1_CLEAR            BIT(7)
+#define DAR_ASM_CTRL1_START            BIT(6)
+#define DAR_ASM_CTRL1_SELFTST          BIT(5)
+#define DAR_ASM_CTRL1_CTR              BIT(4)
+#define DAR_ASM_CTRL1_CBC              BIT(3)
+#define DAR_ASM_CTRL1_AES              BIT(2)
+#define DAR_ASM_CTRL1_LOAD_MAC         BIT(1)
+
+/* DAR_ASM_CTRL2 bits */
+#define DAR_ASM_CTRL2_DATA_REG_TYPE_SEL                (7)
+#define DAR_ASM_CTRL2_DATA_REG_TYPE_SEL_SHIFT  (5)
+#define DAR_ASM_CTRL2_TSTPAS                   BIT(1)
+
+/* DAR_CLK_OUT_CTRL bits */
+#define DAR_CLK_OUT_CTRL_EXTEND                BIT(7)
+#define DAR_CLK_OUT_CTRL_HIZ           BIT(6)
+#define DAR_CLK_OUT_CTRL_SR            BIT(5)
+#define DAR_CLK_OUT_CTRL_DS            BIT(4)
+#define DAR_CLK_OUT_CTRL_EN            BIT(3)
+#define DAR_CLK_OUT_CTRL_DIV           (7)
+
+/* DAR_PWR_MODES bits */
+#define DAR_PWR_MODES_XTAL_READY       BIT(5)
+#define DAR_PWR_MODES_XTALEN           BIT(4)
+#define DAR_PWR_MODES_ASM_CLK_EN       BIT(3)
+#define DAR_PWR_MODES_AUTODOZE         BIT(1)
+#define DAR_PWR_MODES_PMC_MODE         BIT(0)
+
+/* RX_FRAME_FILTER bits */
+#define IAR_RX_FRAME_FLT_FRM_VER               (0xC0)
+#define IAR_RX_FRAME_FLT_FRM_VER_SHIFT         (6)
+#define IAR_RX_FRAME_FLT_ACTIVE_PROMISCUOUS    BIT(5)
+#define IAR_RX_FRAME_FLT_NS_FT                 BIT(4)
+#define IAR_RX_FRAME_FLT_CMD_FT                        BIT(3)
+#define IAR_RX_FRAME_FLT_ACK_FT                        BIT(2)
+#define IAR_RX_FRAME_FLT_DATA_FT               BIT(1)
+#define IAR_RX_FRAME_FLT_BEACON_FT             BIT(0)
+
+/* DUAL_PAN_CTRL bits */
+#define IAR_DUAL_PAN_CTRL_DUAL_PAN_SAM_LVL_MSK (0xF0)
+#define IAR_DUAL_PAN_CTRL_DUAL_PAN_SAM_LVL_SHIFT       (4)
+#define IAR_DUAL_PAN_CTRL_CURRENT_NETWORK      BIT(3)
+#define IAR_DUAL_PAN_CTRL_PANCORDNTR1          BIT(2)
+#define IAR_DUAL_PAN_CTRL_DUAL_PAN_AUTO                BIT(1)
+#define IAR_DUAL_PAN_CTRL_ACTIVE_NETWORK       BIT(0)
+
+/* DUAL_PAN_STS bits */
+#define IAR_DUAL_PAN_STS_RECD_ON_PAN1          BIT(7)
+#define IAR_DUAL_PAN_STS_RECD_ON_PAN0          BIT(6)
+#define IAR_DUAL_PAN_STS_DUAL_PAN_REMAIN       (0x3F)
+
+/* CCA_CTRL bits */
+#define IAR_CCA_CTRL_AGC_FRZ_EN                        BIT(6)
+#define IAR_CCA_CTRL_CONT_RSSI_EN              BIT(5)
+#define IAR_CCA_CTRL_LQI_RSSI_NOT_CORR BIT(4)
+#define IAR_CCA_CTRL_CCA3_AND_NOT_OR   BIT(3)
+#define IAR_CCA_CTRL_POWER_COMP_EN_LQI BIT(2)
+#define IAR_CCA_CTRL_POWER_COMP_EN_ED  BIT(1)
+#define IAR_CCA_CTRL_POWER_COMP_EN_CCA1        BIT(0)
+
+/* ANT_PAD_CTRL bits */
+#define IAR_ANT_PAD_CTRL_ANTX_POL      (0x0F)
+#define IAR_ANT_PAD_CTRL_ANTX_POL_SHIFT        (4)
+#define IAR_ANT_PAD_CTRL_ANTX_CTRLMODE BIT(3)
+#define IAR_ANT_PAD_CTRL_ANTX_HZ       BIT(2)
+#define IAR_ANT_PAD_CTRL_ANTX_EN       (3)
+
+/* MISC_PAD_CTRL bits */
+#define IAR_MISC_PAD_CTRL_MISO_HIZ_EN  BIT(3)
+#define IAR_MISC_PAD_CTRL_IRQ_B_OD     BIT(2)
+#define IAR_MISC_PAD_CTRL_NON_GPIO_DS  BIT(1)
+#define IAR_MISC_PAD_CTRL_ANTX_CURR    (1)
+
+/* ANT_AGC_CTRL bits */
+#define IAR_ANT_AGC_CTRL_FAD_EN_SHIFT  (0)
+#define IAR_ANT_AGC_CTRL_FAD_EN_MASK   (1)
+#define IAR_ANT_AGC_CTRL_ANTX_SHIFT    (1)
+#define IAR_ANT_AGC_CTRL_ANTX_MASK     BIT(AR_ANT_AGC_CTRL_ANTX_SHIFT)
+
+/* BSM_CTRL bits */
+#define BSM_CTRL_BSM_EN                (1)
+
+/* SOFT_RESET bits */
+#define IAR_SOFT_RESET_SOG_RST         BIT(7)
+#define IAR_SOFT_RESET_REGS_RST                BIT(4)
+#define IAR_SOFT_RESET_PLL_RST         BIT(3)
+#define IAR_SOFT_RESET_TX_RST          BIT(2)
+#define IAR_SOFT_RESET_RX_RST          BIT(1)
+#define IAR_SOFT_RESET_SEQ_MGR_RST     BIT(0)
+
+/* SEQ_MGR_CTRL bits */
+#define IAR_SEQ_MGR_CTRL_SEQ_STATE_CTRL                (3)
+#define IAR_SEQ_MGR_CTRL_SEQ_STATE_CTRL_SHIFT  (6)
+#define IAR_SEQ_MGR_CTRL_NO_RX_RECYCLE         BIT(5)
+#define IAR_SEQ_MGR_CTRL_LATCH_PREAMBLE                BIT(4)
+#define IAR_SEQ_MGR_CTRL_EVENT_TMR_DO_NOT_LATCH        BIT(3)
+#define IAR_SEQ_MGR_CTRL_CLR_NEW_SEQ_INHIBIT   BIT(2)
+#define IAR_SEQ_MGR_CTRL_PSM_LOCK_DIS          BIT(1)
+#define IAR_SEQ_MGR_CTRL_PLL_ABORT_OVRD                BIT(0)
+
+/* SEQ_MGR_STS bits */
+#define IAR_SEQ_MGR_STS_TMR2_SEQ_TRIG_ARMED    BIT(7)
+#define IAR_SEQ_MGR_STS_RX_MODE                        BIT(6)
+#define IAR_SEQ_MGR_STS_RX_TIMEOUT_PENDING     BIT(5)
+#define IAR_SEQ_MGR_STS_NEW_SEQ_INHIBIT                BIT(4)
+#define IAR_SEQ_MGR_STS_SEQ_IDLE               BIT(3)
+#define IAR_SEQ_MGR_STS_XCVSEQ_ACTUAL          (7)
+
+/* ABORT_STS bits */
+#define IAR_ABORT_STS_PLL_ABORTED      BIT(2)
+#define IAR_ABORT_STS_TC3_ABORTED      BIT(1)
+#define IAR_ABORT_STS_SW_ABORTED       BIT(0)
+
+/* IAR_FILTERFAIL_CODE2 bits */
+#define IAR_FILTERFAIL_CODE2_PAN_SEL   BIT(7)
+#define IAR_FILTERFAIL_CODE2_9_8       (3)
+
+/* PHY_STS bits */
+#define IAR_PHY_STS_PLL_UNLOCK         BIT(7)
+#define IAR_PHY_STS_PLL_LOCK_ERR       BIT(6)
+#define IAR_PHY_STS_PLL_LOCK           BIT(5)
+#define IAR_PHY_STS_CRCVALID           BIT(3)
+#define IAR_PHY_STS_FILTERFAIL_FLAG_SEL        BIT(2)
+#define IAR_PHY_STS_SFD_DET            BIT(1)
+#define IAR_PHY_STS_PREAMBLE_DET       BIT(0)
+
+/* TESTMODE_CTRL bits */
+#define IAR_TEST_MODE_CTRL_HOT_ANT             BIT(4)
+#define IAR_TEST_MODE_CTRL_IDEAL_RSSI_EN       BIT(3)
+#define IAR_TEST_MODE_CTRL_IDEAL_PFC_EN                BIT(2)
+#define IAR_TEST_MODE_CTRL_CONTINUOUS_EN       BIT(1)
+#define IAR_TEST_MODE_CTRL_FPGA_EN             BIT(0)
+
+/* DTM_CTRL1 bits */
+#define IAR_DTM_CTRL1_ATM_LOCKED       BIT(7)
+#define IAR_DTM_CTRL1_DTM_EN           BIT(6)
+#define IAR_DTM_CTRL1_PAGE5            BIT(5)
+#define IAR_DTM_CTRL1_PAGE4            BIT(4)
+#define IAR_DTM_CTRL1_PAGE3            BIT(3)
+#define IAR_DTM_CTRL1_PAGE2            BIT(2)
+#define IAR_DTM_CTRL1_PAGE1            BIT(1)
+#define IAR_DTM_CTRL1_PAGE0            BIT(0)
+
+/* TX_MODE_CTRL */
+#define IAR_TX_MODE_CTRL_TX_INV                BIT(4)
+#define IAR_TX_MODE_CTRL_BT_EN         BIT(3)
+#define IAR_TX_MODE_CTRL_DTS2          BIT(2)
+#define IAR_TX_MODE_CTRL_DTS1          BIT(1)
+#define IAR_TX_MODE_CTRL_DTS0          BIT(0)
+
+#define TX_MODE_CTRL_DTS_MASK  (7)
+
+#endif /* _MCR20A_H */
index 5166575..a115f12 100644 (file)
@@ -74,6 +74,7 @@ struct ipvl_dev {
        DECLARE_BITMAP(mac_filters, IPVLAN_MAC_FILTER_SIZE);
        netdev_features_t       sfeatures;
        u32                     msg_enable;
+       spinlock_t              addrs_lock;
 };
 
 struct ipvl_addr {
index 1b5dc20..17daebd 100644 (file)
@@ -109,25 +109,33 @@ void ipvlan_ht_addr_del(struct ipvl_addr *addr)
 struct ipvl_addr *ipvlan_find_addr(const struct ipvl_dev *ipvlan,
                                   const void *iaddr, bool is_v6)
 {
-       struct ipvl_addr *addr;
+       struct ipvl_addr *addr, *ret = NULL;
 
-       list_for_each_entry(addr, &ipvlan->addrs, anode)
-               if (addr_equal(is_v6, addr, iaddr))
-                       return addr;
-       return NULL;
+       rcu_read_lock();
+       list_for_each_entry_rcu(addr, &ipvlan->addrs, anode) {
+               if (addr_equal(is_v6, addr, iaddr)) {
+                       ret = addr;
+                       break;
+               }
+       }
+       rcu_read_unlock();
+       return ret;
 }
 
 bool ipvlan_addr_busy(struct ipvl_port *port, void *iaddr, bool is_v6)
 {
        struct ipvl_dev *ipvlan;
+       bool ret = false;
 
-       ASSERT_RTNL();
-
-       list_for_each_entry(ipvlan, &port->ipvlans, pnode) {
-               if (ipvlan_find_addr(ipvlan, iaddr, is_v6))
-                       return true;
+       rcu_read_lock();
+       list_for_each_entry_rcu(ipvlan, &port->ipvlans, pnode) {
+               if (ipvlan_find_addr(ipvlan, iaddr, is_v6)) {
+                       ret = true;
+                       break;
+               }
        }
-       return false;
+       rcu_read_unlock();
+       return ret;
 }
 
 static void *ipvlan_get_L3_hdr(struct ipvl_port *port, struct sk_buff *skb, int *type)
@@ -498,8 +506,8 @@ static int ipvlan_process_outbound(struct sk_buff *skb)
 
        /* In this mode we dont care about multicast and broadcast traffic */
        if (is_multicast_ether_addr(ethh->h_dest)) {
-               pr_warn_ratelimited("Dropped {multi|broad}cast of type= [%x]\n",
-                                   ntohs(skb->protocol));
+               pr_debug_ratelimited("Dropped {multi|broad}cast of type=[%x]\n",
+                                    ntohs(skb->protocol));
                kfree_skb(skb);
                goto out;
        }
index 67c91ce..3efc1c9 100644 (file)
@@ -227,8 +227,10 @@ static int ipvlan_open(struct net_device *dev)
        else
                dev->flags &= ~IFF_NOARP;
 
-       list_for_each_entry(addr, &ipvlan->addrs, anode)
+       rcu_read_lock();
+       list_for_each_entry_rcu(addr, &ipvlan->addrs, anode)
                ipvlan_ht_addr_add(ipvlan, addr);
+       rcu_read_unlock();
 
        return dev_uc_add(phy_dev, phy_dev->dev_addr);
 }
@@ -244,8 +246,10 @@ static int ipvlan_stop(struct net_device *dev)
 
        dev_uc_del(phy_dev, phy_dev->dev_addr);
 
-       list_for_each_entry(addr, &ipvlan->addrs, anode)
+       rcu_read_lock();
+       list_for_each_entry_rcu(addr, &ipvlan->addrs, anode)
                ipvlan_ht_addr_del(addr);
+       rcu_read_unlock();
 
        return 0;
 }
@@ -588,6 +592,7 @@ int ipvlan_link_new(struct net *src_net, struct net_device *dev,
        ipvlan->sfeatures = IPVLAN_FEATURES;
        ipvlan_adjust_mtu(ipvlan, phy_dev);
        INIT_LIST_HEAD(&ipvlan->addrs);
+       spin_lock_init(&ipvlan->addrs_lock);
 
        /* TODO Probably put random address here to be presented to the
         * world but keep using the physical-dev address for the outgoing
@@ -665,11 +670,13 @@ void ipvlan_link_delete(struct net_device *dev, struct list_head *head)
        struct ipvl_dev *ipvlan = netdev_priv(dev);
        struct ipvl_addr *addr, *next;
 
+       spin_lock_bh(&ipvlan->addrs_lock);
        list_for_each_entry_safe(addr, next, &ipvlan->addrs, anode) {
                ipvlan_ht_addr_del(addr);
-               list_del(&addr->anode);
+               list_del_rcu(&addr->anode);
                kfree_rcu(addr, rcu);
        }
+       spin_unlock_bh(&ipvlan->addrs_lock);
 
        ida_simple_remove(&ipvlan->port->ida, dev->dev_id);
        list_del_rcu(&ipvlan->pnode);
@@ -760,8 +767,7 @@ static int ipvlan_device_event(struct notifier_block *unused,
                if (dev->reg_state != NETREG_UNREGISTERING)
                        break;
 
-               list_for_each_entry_safe(ipvlan, next, &port->ipvlans,
-                                        pnode)
+               list_for_each_entry_safe(ipvlan, next, &port->ipvlans, pnode)
                        ipvlan->dev->rtnl_link_ops->dellink(ipvlan->dev,
                                                            &lst_kill);
                unregister_netdevice_many(&lst_kill);
@@ -793,6 +799,7 @@ static int ipvlan_device_event(struct notifier_block *unused,
        return NOTIFY_DONE;
 }
 
+/* the caller must held the addrs lock */
 static int ipvlan_add_addr(struct ipvl_dev *ipvlan, void *iaddr, bool is_v6)
 {
        struct ipvl_addr *addr;
@@ -811,7 +818,8 @@ static int ipvlan_add_addr(struct ipvl_dev *ipvlan, void *iaddr, bool is_v6)
                addr->atype = IPVL_IPV6;
 #endif
        }
-       list_add_tail(&addr->anode, &ipvlan->addrs);
+
+       list_add_tail_rcu(&addr->anode, &ipvlan->addrs);
 
        /* If the interface is not up, the address will be added to the hash
         * list by ipvlan_open.
@@ -826,15 +834,17 @@ static void ipvlan_del_addr(struct ipvl_dev *ipvlan, void *iaddr, bool is_v6)
 {
        struct ipvl_addr *addr;
 
+       spin_lock_bh(&ipvlan->addrs_lock);
        addr = ipvlan_find_addr(ipvlan, iaddr, is_v6);
-       if (!addr)
+       if (!addr) {
+               spin_unlock_bh(&ipvlan->addrs_lock);
                return;
+       }
 
        ipvlan_ht_addr_del(addr);
-       list_del(&addr->anode);
+       list_del_rcu(&addr->anode);
+       spin_unlock_bh(&ipvlan->addrs_lock);
        kfree_rcu(addr, rcu);
-
-       return;
 }
 
 static bool ipvlan_is_valid_dev(const struct net_device *dev)
@@ -853,14 +863,17 @@ static bool ipvlan_is_valid_dev(const struct net_device *dev)
 #if IS_ENABLED(CONFIG_IPV6)
 static int ipvlan_add_addr6(struct ipvl_dev *ipvlan, struct in6_addr *ip6_addr)
 {
-       if (ipvlan_addr_busy(ipvlan->port, ip6_addr, true)) {
+       int ret = -EINVAL;
+
+       spin_lock_bh(&ipvlan->addrs_lock);
+       if (ipvlan_addr_busy(ipvlan->port, ip6_addr, true))
                netif_err(ipvlan, ifup, ipvlan->dev,
                          "Failed to add IPv6=%pI6c addr for %s intf\n",
                          ip6_addr, ipvlan->dev->name);
-               return -EINVAL;
-       }
-
-       return ipvlan_add_addr(ipvlan, ip6_addr, true);
+       else
+               ret = ipvlan_add_addr(ipvlan, ip6_addr, true);
+       spin_unlock_bh(&ipvlan->addrs_lock);
+       return ret;
 }
 
 static void ipvlan_del_addr6(struct ipvl_dev *ipvlan, struct in6_addr *ip6_addr)
@@ -899,10 +912,6 @@ static int ipvlan_addr6_validator_event(struct notifier_block *unused,
        struct net_device *dev = (struct net_device *)i6vi->i6vi_dev->dev;
        struct ipvl_dev *ipvlan = netdev_priv(dev);
 
-       /* FIXME IPv6 autoconf calls us from bh without RTNL */
-       if (in_softirq())
-               return NOTIFY_DONE;
-
        if (!ipvlan_is_valid_dev(dev))
                return NOTIFY_DONE;
 
@@ -922,14 +931,17 @@ static int ipvlan_addr6_validator_event(struct notifier_block *unused,
 
 static int ipvlan_add_addr4(struct ipvl_dev *ipvlan, struct in_addr *ip4_addr)
 {
-       if (ipvlan_addr_busy(ipvlan->port, ip4_addr, false)) {
+       int ret = -EINVAL;
+
+       spin_lock_bh(&ipvlan->addrs_lock);
+       if (ipvlan_addr_busy(ipvlan->port, ip4_addr, false))
                netif_err(ipvlan, ifup, ipvlan->dev,
                          "Failed to add IPv4=%pI4 on %s intf.\n",
                          ip4_addr, ipvlan->dev->name);
-               return -EINVAL;
-       }
-
-       return ipvlan_add_addr(ipvlan, ip4_addr, false);
+       else
+               ret = ipvlan_add_addr(ipvlan, ip4_addr, false);
+       spin_unlock_bh(&ipvlan->addrs_lock);
+       return ret;
 }
 
 static void ipvlan_del_addr4(struct ipvl_dev *ipvlan, struct in_addr *ip4_addr)
@@ -1024,6 +1036,7 @@ static struct pernet_operations ipvlan_net_ops = {
        .id = &ipvlan_netid,
        .size = sizeof(struct ipvlan_netns),
        .exit = ipvlan_ns_exit,
+       .async = true,
 };
 
 static int __init ipvlan_init_module(void)
index e8ae50e..319edc9 100644 (file)
@@ -38,14 +38,6 @@ static int aquantia_config_aneg(struct phy_device *phydev)
        return 0;
 }
 
-static int aquantia_aneg_done(struct phy_device *phydev)
-{
-       int reg;
-
-       reg = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_STAT1);
-       return (reg < 0) ? reg : (reg & BMSR_ANEGCOMPLETE);
-}
-
 static int aquantia_config_intr(struct phy_device *phydev)
 {
        int err;
@@ -125,7 +117,7 @@ static struct phy_driver aquantia_driver[] = {
        .name           = "Aquantia AQ1202",
        .features       = PHY_AQUANTIA_FEATURES,
        .flags          = PHY_HAS_INTERRUPT,
-       .aneg_done      = aquantia_aneg_done,
+       .aneg_done      = genphy_c45_aneg_done,
        .config_aneg    = aquantia_config_aneg,
        .config_intr    = aquantia_config_intr,
        .ack_interrupt  = aquantia_ack_interrupt,
@@ -137,7 +129,7 @@ static struct phy_driver aquantia_driver[] = {
        .name           = "Aquantia AQ2104",
        .features       = PHY_AQUANTIA_FEATURES,
        .flags          = PHY_HAS_INTERRUPT,
-       .aneg_done      = aquantia_aneg_done,
+       .aneg_done      = genphy_c45_aneg_done,
        .config_aneg    = aquantia_config_aneg,
        .config_intr    = aquantia_config_intr,
        .ack_interrupt  = aquantia_ack_interrupt,
@@ -149,7 +141,7 @@ static struct phy_driver aquantia_driver[] = {
        .name           = "Aquantia AQR105",
        .features       = PHY_AQUANTIA_FEATURES,
        .flags          = PHY_HAS_INTERRUPT,
-       .aneg_done      = aquantia_aneg_done,
+       .aneg_done      = genphy_c45_aneg_done,
        .config_aneg    = aquantia_config_aneg,
        .config_intr    = aquantia_config_intr,
        .ack_interrupt  = aquantia_ack_interrupt,
@@ -161,7 +153,7 @@ static struct phy_driver aquantia_driver[] = {
        .name           = "Aquantia AQR106",
        .features       = PHY_AQUANTIA_FEATURES,
        .flags          = PHY_HAS_INTERRUPT,
-       .aneg_done      = aquantia_aneg_done,
+       .aneg_done      = genphy_c45_aneg_done,
        .config_aneg    = aquantia_config_aneg,
        .config_intr    = aquantia_config_intr,
        .ack_interrupt  = aquantia_ack_interrupt,
@@ -173,7 +165,7 @@ static struct phy_driver aquantia_driver[] = {
        .name           = "Aquantia AQR107",
        .features       = PHY_AQUANTIA_FEATURES,
        .flags          = PHY_HAS_INTERRUPT,
-       .aneg_done      = aquantia_aneg_done,
+       .aneg_done      = genphy_c45_aneg_done,
        .config_aneg    = aquantia_config_aneg,
        .config_intr    = aquantia_config_intr,
        .ack_interrupt  = aquantia_ack_interrupt,
@@ -185,7 +177,7 @@ static struct phy_driver aquantia_driver[] = {
        .name           = "Aquantia AQR405",
        .features       = PHY_AQUANTIA_FEATURES,
        .flags          = PHY_HAS_INTERRUPT,
-       .aneg_done      = aquantia_aneg_done,
+       .aneg_done      = genphy_c45_aneg_done,
        .config_aneg    = aquantia_config_aneg,
        .config_intr    = aquantia_config_intr,
        .ack_interrupt  = aquantia_ack_interrupt,
index 9442db2..8022cd3 100644 (file)
@@ -30,14 +30,6 @@ static int cortina_read_reg(struct phy_device *phydev, u16 regnum)
                            MII_ADDR_C45 | regnum);
 }
 
-static int cortina_config_aneg(struct phy_device *phydev)
-{
-       phydev->supported = SUPPORTED_10000baseT_Full;
-       phydev->advertising = SUPPORTED_10000baseT_Full;
-
-       return 0;
-}
-
 static int cortina_read_status(struct phy_device *phydev)
 {
        int gpio_int_status, ret = 0;
@@ -61,11 +53,6 @@ err:
        return ret;
 }
 
-static int cortina_soft_reset(struct phy_device *phydev)
-{
-       return 0;
-}
-
 static int cortina_probe(struct phy_device *phydev)
 {
        u32 phy_id = 0;
@@ -101,9 +88,10 @@ static struct phy_driver cortina_driver[] = {
        .phy_id         = PHY_ID_CS4340,
        .phy_id_mask    = 0xffffffff,
        .name           = "Cortina CS4340",
-       .config_aneg    = cortina_config_aneg,
+       .config_init    = gen10g_config_init,
+       .config_aneg    = gen10g_config_aneg,
        .read_status    = cortina_read_status,
-       .soft_reset     = cortina_soft_reset,
+       .soft_reset     = gen10g_no_soft_reset,
        .probe          = cortina_probe,
 },
 };
index 8a0bd98..4f1efa0 100644 (file)
@@ -71,15 +71,6 @@ static int mv3310_probe(struct phy_device *phydev)
        return 0;
 }
 
-/*
- * Resetting the MV88X3310 causes it to become non-responsive.  Avoid
- * setting the reset bit(s).
- */
-static int mv3310_soft_reset(struct phy_device *phydev)
-{
-       return 0;
-}
-
 static int mv3310_config_init(struct phy_device *phydev)
 {
        __ETHTOOL_DECLARE_LINK_MODE_MASK(supported) = { 0, };
@@ -377,7 +368,7 @@ static struct phy_driver mv3310_drivers[] = {
                                  SUPPORTED_10000baseT_Full |
                                  SUPPORTED_Backplane,
                .probe          = mv3310_probe,
-               .soft_reset     = mv3310_soft_reset,
+               .soft_reset     = gen10g_no_soft_reset,
                .config_init    = mv3310_config_init,
                .config_aneg    = mv3310_config_aneg,
                .aneg_done      = mv3310_aneg_done,
index a457685..0017edd 100644 (file)
@@ -268,12 +268,13 @@ EXPORT_SYMBOL_GPL(genphy_c45_read_mdix);
 
 /* The gen10g_* functions are the old Clause 45 stub */
 
-static int gen10g_config_aneg(struct phy_device *phydev)
+int gen10g_config_aneg(struct phy_device *phydev)
 {
        return 0;
 }
+EXPORT_SYMBOL_GPL(gen10g_config_aneg);
 
-static int gen10g_read_status(struct phy_device *phydev)
+int gen10g_read_status(struct phy_device *phydev)
 {
        u32 mmd_mask = phydev->c45_ids.devices_in_package;
        int ret;
@@ -291,14 +292,16 @@ static int gen10g_read_status(struct phy_device *phydev)
 
        return 0;
 }
+EXPORT_SYMBOL_GPL(gen10g_read_status);
 
-static int gen10g_soft_reset(struct phy_device *phydev)
+int gen10g_no_soft_reset(struct phy_device *phydev)
 {
        /* Do nothing for now */
        return 0;
 }
+EXPORT_SYMBOL_GPL(gen10g_no_soft_reset);
 
-static int gen10g_config_init(struct phy_device *phydev)
+int gen10g_config_init(struct phy_device *phydev)
 {
        /* Temporarily just say we support everything */
        phydev->supported = SUPPORTED_10000baseT_Full;
@@ -306,22 +309,25 @@ static int gen10g_config_init(struct phy_device *phydev)
 
        return 0;
 }
+EXPORT_SYMBOL_GPL(gen10g_config_init);
 
-static int gen10g_suspend(struct phy_device *phydev)
+int gen10g_suspend(struct phy_device *phydev)
 {
        return 0;
 }
+EXPORT_SYMBOL_GPL(gen10g_suspend);
 
-static int gen10g_resume(struct phy_device *phydev)
+int gen10g_resume(struct phy_device *phydev)
 {
        return 0;
 }
+EXPORT_SYMBOL_GPL(gen10g_resume);
 
 struct phy_driver genphy_10g_driver = {
        .phy_id         = 0xffffffff,
        .phy_id_mask    = 0xffffffff,
        .name           = "Generic 10G PHY",
-       .soft_reset     = gen10g_soft_reset,
+       .soft_reset     = gen10g_no_soft_reset,
        .config_init    = gen10g_config_init,
        .features       = 0,
        .config_aneg    = gen10g_config_aneg,
index 6ac8b29..7d17240 100644 (file)
@@ -679,7 +679,6 @@ static int phylink_bringup_phy(struct phylink *pl, struct phy_device *phy)
 
        mutex_lock(&phy->lock);
        mutex_lock(&pl->state_mutex);
-       pl->netdev->phydev = phy;
        pl->phydev = phy;
        linkmode_copy(pl->supported, supported);
        linkmode_copy(pl->link_config.advertising, config.advertising);
@@ -817,7 +816,6 @@ void phylink_disconnect_phy(struct phylink *pl)
        if (phy) {
                mutex_lock(&phy->lock);
                mutex_lock(&pl->state_mutex);
-               pl->netdev->phydev = NULL;
                pl->phydev = NULL;
                mutex_unlock(&pl->state_mutex);
                mutex_unlock(&phy->lock);
@@ -1584,25 +1582,14 @@ static int phylink_sfp_module_insert(void *upstream,
        bool changed;
        u8 port;
 
-       sfp_parse_support(pl->sfp_bus, id, support);
-       port = sfp_parse_port(pl->sfp_bus, id, support);
-       iface = sfp_parse_interface(pl->sfp_bus, id);
-
        ASSERT_RTNL();
 
-       switch (iface) {
-       case PHY_INTERFACE_MODE_SGMII:
-       case PHY_INTERFACE_MODE_1000BASEX:
-       case PHY_INTERFACE_MODE_2500BASEX:
-       case PHY_INTERFACE_MODE_10GKR:
-               break;
-       default:
-               return -EINVAL;
-       }
+       sfp_parse_support(pl->sfp_bus, id, support);
+       port = sfp_parse_port(pl->sfp_bus, id, support);
 
        memset(&config, 0, sizeof(config));
        linkmode_copy(config.advertising, support);
-       config.interface = iface;
+       config.interface = PHY_INTERFACE_MODE_NA;
        config.speed = SPEED_UNKNOWN;
        config.duplex = DUPLEX_UNKNOWN;
        config.pause = MLO_PAUSE_AN;
@@ -1610,6 +1597,22 @@ static int phylink_sfp_module_insert(void *upstream,
 
        /* Ignore errors if we're expecting a PHY to attach later */
        ret = phylink_validate(pl, support, &config);
+       if (ret) {
+               netdev_err(pl->netdev, "validation with support %*pb failed: %d\n",
+                          __ETHTOOL_LINK_MODE_MASK_NBITS, support, ret);
+               return ret;
+       }
+
+       iface = sfp_select_interface(pl->sfp_bus, id, config.advertising);
+       if (iface == PHY_INTERFACE_MODE_NA) {
+               netdev_err(pl->netdev,
+                          "selection of interface failed, advertisment %*pb\n",
+                          __ETHTOOL_LINK_MODE_MASK_NBITS, config.advertising);
+               return -EINVAL;
+       }
+
+       config.interface = iface;
+       ret = phylink_validate(pl, support, &config);
        if (ret) {
                netdev_err(pl->netdev, "validation of %s/%s with support %*pb failed: %d\n",
                           phylink_an_mode_str(MLO_AN_INBAND),
index 8961209..3d4ff5d 100644 (file)
@@ -105,68 +105,6 @@ int sfp_parse_port(struct sfp_bus *bus, const struct sfp_eeprom_id *id,
 }
 EXPORT_SYMBOL_GPL(sfp_parse_port);
 
-/**
- * sfp_parse_interface() - Parse the phy_interface_t
- * @bus: a pointer to the &struct sfp_bus structure for the sfp module
- * @id: a pointer to the module's &struct sfp_eeprom_id
- *
- * Derive the phy_interface_t mode for the information found in the
- * module's identifying EEPROM. There is no standard or defined way
- * to derive this information, so we use some heuristics.
- *
- * If the encoding is 64b66b, then the module must be >= 10G, so
- * return %PHY_INTERFACE_MODE_10GKR.
- *
- * If it's 8b10b, then it's 1G or slower. If it's definitely a fibre
- * module, return %PHY_INTERFACE_MODE_1000BASEX mode, otherwise return
- * %PHY_INTERFACE_MODE_SGMII mode.
- *
- * If the encoding is not known, return %PHY_INTERFACE_MODE_NA.
- */
-phy_interface_t sfp_parse_interface(struct sfp_bus *bus,
-                                   const struct sfp_eeprom_id *id)
-{
-       phy_interface_t iface;
-
-       /* Setting the serdes link mode is guesswork: there's no field in
-        * the EEPROM which indicates what mode should be used.
-        *
-        * If the module wants 64b66b, then it must be >= 10G.
-        *
-        * If it's a gigabit-only fiber module, it probably does not have
-        * a PHY, so switch to 802.3z negotiation mode. Otherwise, switch
-        * to SGMII mode (which is required to support non-gigabit speeds).
-        */
-       switch (id->base.encoding) {
-       case SFP_ENCODING_8472_64B66B:
-               iface = PHY_INTERFACE_MODE_10GKR;
-               break;
-
-       case SFP_ENCODING_8B10B:
-               if (!id->base.e1000_base_t &&
-                   !id->base.e100_base_lx &&
-                   !id->base.e100_base_fx)
-                       iface = PHY_INTERFACE_MODE_1000BASEX;
-               else
-                       iface = PHY_INTERFACE_MODE_SGMII;
-               break;
-
-       default:
-               if (id->base.e1000_base_cx) {
-                       iface = PHY_INTERFACE_MODE_1000BASEX;
-                       break;
-               }
-
-               iface = PHY_INTERFACE_MODE_NA;
-               dev_err(bus->sfp_dev,
-                       "SFP module encoding does not support 8b10b nor 64b66b\n");
-               break;
-       }
-
-       return iface;
-}
-EXPORT_SYMBOL_GPL(sfp_parse_interface);
-
 /**
  * sfp_parse_support() - Parse the eeprom id for supported link modes
  * @bus: a pointer to the &struct sfp_bus structure for the sfp module
@@ -180,10 +118,7 @@ void sfp_parse_support(struct sfp_bus *bus, const struct sfp_eeprom_id *id,
                       unsigned long *support)
 {
        unsigned int br_min, br_nom, br_max;
-
-       phylink_set(support, Autoneg);
-       phylink_set(support, Pause);
-       phylink_set(support, Asym_Pause);
+       __ETHTOOL_DECLARE_LINK_MODE_MASK(modes) = { 0, };
 
        /* Decode the bitrate information to MBd */
        br_min = br_nom = br_max = 0;
@@ -201,20 +136,20 @@ void sfp_parse_support(struct sfp_bus *bus, const struct sfp_eeprom_id *id,
 
        /* Set ethtool support from the compliance fields. */
        if (id->base.e10g_base_sr)
-               phylink_set(support, 10000baseSR_Full);
+               phylink_set(modes, 10000baseSR_Full);
        if (id->base.e10g_base_lr)
-               phylink_set(support, 10000baseLR_Full);
+               phylink_set(modes, 10000baseLR_Full);
        if (id->base.e10g_base_lrm)
-               phylink_set(support, 10000baseLRM_Full);
+               phylink_set(modes, 10000baseLRM_Full);
        if (id->base.e10g_base_er)
-               phylink_set(support, 10000baseER_Full);
+               phylink_set(modes, 10000baseER_Full);
        if (id->base.e1000_base_sx ||
            id->base.e1000_base_lx ||
            id->base.e1000_base_cx)
-               phylink_set(support, 1000baseX_Full);
+               phylink_set(modes, 1000baseX_Full);
        if (id->base.e1000_base_t) {
-               phylink_set(support, 1000baseT_Half);
-               phylink_set(support, 1000baseT_Full);
+               phylink_set(modes, 1000baseT_Half);
+               phylink_set(modes, 1000baseT_Full);
        }
 
        /* 1000Base-PX or 1000Base-BX10 */
@@ -228,20 +163,20 @@ void sfp_parse_support(struct sfp_bus *bus, const struct sfp_eeprom_id *id,
        if ((id->base.sfp_ct_passive || id->base.sfp_ct_active) && br_nom) {
                /* This may look odd, but some manufacturers use 12000MBd */
                if (br_min <= 12000 && br_max >= 10300)
-                       phylink_set(support, 10000baseCR_Full);
+                       phylink_set(modes, 10000baseCR_Full);
                if (br_min <= 3200 && br_max >= 3100)
-                       phylink_set(support, 2500baseX_Full);
+                       phylink_set(modes, 2500baseX_Full);
                if (br_min <= 1300 && br_max >= 1200)
-                       phylink_set(support, 1000baseX_Full);
+                       phylink_set(modes, 1000baseX_Full);
        }
        if (id->base.sfp_ct_passive) {
                if (id->base.passive.sff8431_app_e)
-                       phylink_set(support, 10000baseCR_Full);
+                       phylink_set(modes, 10000baseCR_Full);
        }
        if (id->base.sfp_ct_active) {
                if (id->base.active.sff8431_app_e ||
                    id->base.active.sff8431_lim) {
-                       phylink_set(support, 10000baseCR_Full);
+                       phylink_set(modes, 10000baseCR_Full);
                }
        }
 
@@ -249,18 +184,18 @@ void sfp_parse_support(struct sfp_bus *bus, const struct sfp_eeprom_id *id,
        case 0x00: /* Unspecified */
                break;
        case 0x02: /* 100Gbase-SR4 or 25Gbase-SR */
-               phylink_set(support, 100000baseSR4_Full);
-               phylink_set(support, 25000baseSR_Full);
+               phylink_set(modes, 100000baseSR4_Full);
+               phylink_set(modes, 25000baseSR_Full);
                break;
        case 0x03: /* 100Gbase-LR4 or 25Gbase-LR */
        case 0x04: /* 100Gbase-ER4 or 25Gbase-ER */
-               phylink_set(support, 100000baseLR4_ER4_Full);
+               phylink_set(modes, 100000baseLR4_ER4_Full);
                break;
        case 0x0b: /* 100Gbase-CR4 or 25Gbase-CR CA-L */
        case 0x0c: /* 25Gbase-CR CA-S */
        case 0x0d: /* 25Gbase-CR CA-N */
-               phylink_set(support, 100000baseCR4_Full);
-               phylink_set(support, 25000baseCR_Full);
+               phylink_set(modes, 100000baseCR4_Full);
+               phylink_set(modes, 25000baseCR_Full);
                break;
        default:
                dev_warn(bus->sfp_dev,
@@ -274,13 +209,70 @@ void sfp_parse_support(struct sfp_bus *bus, const struct sfp_eeprom_id *id,
            id->base.fc_speed_200 ||
            id->base.fc_speed_400) {
                if (id->base.br_nominal >= 31)
-                       phylink_set(support, 2500baseX_Full);
+                       phylink_set(modes, 2500baseX_Full);
                if (id->base.br_nominal >= 12)
-                       phylink_set(support, 1000baseX_Full);
+                       phylink_set(modes, 1000baseX_Full);
        }
+
+       /* If we haven't discovered any modes that this module supports, try
+        * the encoding and bitrate to determine supported modes. Some BiDi
+        * modules (eg, 1310nm/1550nm) are not 1000BASE-BX compliant due to
+        * the differing wavelengths, so do not set any transceiver bits.
+        */
+       if (bitmap_empty(modes, __ETHTOOL_LINK_MODE_MASK_NBITS)) {
+               /* If the encoding and bit rate allows 1000baseX */
+               if (id->base.encoding == SFP_ENCODING_8B10B && br_nom &&
+                   br_min <= 1300 && br_max >= 1200)
+                       phylink_set(modes, 1000baseX_Full);
+       }
+
+       bitmap_or(support, support, modes, __ETHTOOL_LINK_MODE_MASK_NBITS);
+
+       phylink_set(support, Autoneg);
+       phylink_set(support, Pause);
+       phylink_set(support, Asym_Pause);
 }
 EXPORT_SYMBOL_GPL(sfp_parse_support);
 
+/**
+ * sfp_select_interface() - Select appropriate phy_interface_t mode
+ * @bus: a pointer to the &struct sfp_bus structure for the sfp module
+ * @id: a pointer to the module's &struct sfp_eeprom_id
+ * @link_modes: ethtool link modes mask
+ *
+ * Derive the phy_interface_t mode for the information found in the
+ * module's identifying EEPROM and the link modes mask. There is no
+ * standard or defined way to derive this information, so we decide
+ * based upon the link mode mask.
+ */
+phy_interface_t sfp_select_interface(struct sfp_bus *bus,
+                                    const struct sfp_eeprom_id *id,
+                                    unsigned long *link_modes)
+{
+       if (phylink_test(link_modes, 10000baseCR_Full) ||
+           phylink_test(link_modes, 10000baseSR_Full) ||
+           phylink_test(link_modes, 10000baseLR_Full) ||
+           phylink_test(link_modes, 10000baseLRM_Full) ||
+           phylink_test(link_modes, 10000baseER_Full))
+               return PHY_INTERFACE_MODE_10GKR;
+
+       if (phylink_test(link_modes, 2500baseX_Full))
+               return PHY_INTERFACE_MODE_2500BASEX;
+
+       if (id->base.e1000_base_t ||
+           id->base.e100_base_lx ||
+           id->base.e100_base_fx)
+               return PHY_INTERFACE_MODE_SGMII;
+
+       if (phylink_test(link_modes, 1000baseX_Full))
+               return PHY_INTERFACE_MODE_1000BASEX;
+
+       dev_warn(bus->sfp_dev, "Unable to ascertain link mode\n");
+
+       return PHY_INTERFACE_MODE_NA;
+}
+EXPORT_SYMBOL_GPL(sfp_select_interface);
+
 static LIST_HEAD(sfp_buses);
 static DEFINE_MUTEX(sfp_mutex);
 
index 6c7d928..83bf495 100644 (file)
@@ -42,6 +42,7 @@ enum {
 
        SFP_MOD_EMPTY = 0,
        SFP_MOD_PROBE,
+       SFP_MOD_HPOWER,
        SFP_MOD_PRESENT,
        SFP_MOD_ERROR,
 
@@ -86,6 +87,7 @@ static const enum gpiod_flags gpio_flags[] = {
  * access the I2C EEPROM.  However, Avago modules require 300ms.
  */
 #define T_PROBE_INIT   msecs_to_jiffies(300)
+#define T_HPOWER_LEVEL msecs_to_jiffies(300)
 #define T_PROBE_RETRY  msecs_to_jiffies(100)
 
 /* SFP modules appear to always have their PHY configured for bus address
@@ -110,10 +112,12 @@ struct sfp {
        struct sfp_bus *sfp_bus;
        struct phy_device *mod_phy;
        const struct sff_data *type;
+       u32 max_power_mW;
 
        unsigned int (*get_state)(struct sfp *);
        void (*set_state)(struct sfp *, unsigned int);
        int (*read)(struct sfp *, bool, u8, void *, size_t);
+       int (*write)(struct sfp *, bool, u8, void *, size_t);
 
        struct gpio_desc *gpio[GPIO_MAX];
 
@@ -201,10 +205,11 @@ static void sfp_gpio_set_state(struct sfp *sfp, unsigned int state)
        }
 }
 
-static int sfp__i2c_read(struct i2c_adapter *i2c, u8 bus_addr, u8 dev_addr,
-                        void *buf, size_t len)
+static int sfp_i2c_read(struct sfp *sfp, bool a2, u8 dev_addr, void *buf,
+                       size_t len)
 {
        struct i2c_msg msgs[2];
+       u8 bus_addr = a2 ? 0x51 : 0x50;
        int ret;
 
        msgs[0].addr = bus_addr;
@@ -216,17 +221,38 @@ static int sfp__i2c_read(struct i2c_adapter *i2c, u8 bus_addr, u8 dev_addr,
        msgs[1].len = len;
        msgs[1].buf = buf;
 
-       ret = i2c_transfer(i2c, msgs, ARRAY_SIZE(msgs));
+       ret = i2c_transfer(sfp->i2c, msgs, ARRAY_SIZE(msgs));
        if (ret < 0)
                return ret;
 
        return ret == ARRAY_SIZE(msgs) ? len : 0;
 }
 
-static int sfp_i2c_read(struct sfp *sfp, bool a2, u8 addr, void *buf,
-                       size_t len)
+static int sfp_i2c_write(struct sfp *sfp, bool a2, u8 dev_addr, void *buf,
+       size_t len)
 {
-       return sfp__i2c_read(sfp->i2c, a2 ? 0x51 : 0x50, addr, buf, len);
+       struct i2c_msg msgs[1];
+       u8 bus_addr = a2 ? 0x51 : 0x50;
+       int ret;
+
+       msgs[0].addr = bus_addr;
+       msgs[0].flags = 0;
+       msgs[0].len = 1 + len;
+       msgs[0].buf = kmalloc(1 + len, GFP_KERNEL);
+       if (!msgs[0].buf)
+               return -ENOMEM;
+
+       msgs[0].buf[0] = dev_addr;
+       memcpy(&msgs[0].buf[1], buf, len);
+
+       ret = i2c_transfer(sfp->i2c, msgs, ARRAY_SIZE(msgs));
+
+       kfree(msgs[0].buf);
+
+       if (ret < 0)
+               return ret;
+
+       return ret == ARRAY_SIZE(msgs) ? len : 0;
 }
 
 static int sfp_i2c_configure(struct sfp *sfp, struct i2c_adapter *i2c)
@@ -239,6 +265,7 @@ static int sfp_i2c_configure(struct sfp *sfp, struct i2c_adapter *i2c)
 
        sfp->i2c = i2c;
        sfp->read = sfp_i2c_read;
+       sfp->write = sfp_i2c_write;
 
        i2c_mii = mdio_i2c_alloc(sfp->dev, i2c);
        if (IS_ERR(i2c_mii))
@@ -274,6 +301,11 @@ static int sfp_read(struct sfp *sfp, bool a2, u8 addr, void *buf, size_t len)
        return sfp->read(sfp, a2, addr, buf, len);
 }
 
+static int sfp_write(struct sfp *sfp, bool a2, u8 addr, void *buf, size_t len)
+{
+       return sfp->write(sfp, a2, addr, buf, len);
+}
+
 static unsigned int sfp_check(void *buf, size_t len)
 {
        u8 *p, check;
@@ -462,21 +494,83 @@ static void sfp_sm_mod_init(struct sfp *sfp)
                sfp_sm_probe_phy(sfp);
 }
 
+static int sfp_sm_mod_hpower(struct sfp *sfp)
+{
+       u32 power;
+       u8 val;
+       int err;
+
+       power = 1000;
+       if (sfp->id.ext.options & cpu_to_be16(SFP_OPTIONS_POWER_DECL))
+               power = 1500;
+       if (sfp->id.ext.options & cpu_to_be16(SFP_OPTIONS_HIGH_POWER_LEVEL))
+               power = 2000;
+
+       if (sfp->id.ext.sff8472_compliance == SFP_SFF8472_COMPLIANCE_NONE &&
+           (sfp->id.ext.diagmon & (SFP_DIAGMON_DDM | SFP_DIAGMON_ADDRMODE)) !=
+           SFP_DIAGMON_DDM) {
+               /* The module appears not to implement bus address 0xa2,
+                * or requires an address change sequence, so assume that
+                * the module powers up in the indicated power mode.
+                */
+               if (power > sfp->max_power_mW) {
+                       dev_err(sfp->dev,
+                               "Host does not support %u.%uW modules\n",
+                               power / 1000, (power / 100) % 10);
+                       return -EINVAL;
+               }
+               return 0;
+       }
+
+       if (power > sfp->max_power_mW) {
+               dev_warn(sfp->dev,
+                        "Host does not support %u.%uW modules, module left in power mode 1\n",
+                        power / 1000, (power / 100) % 10);
+               return 0;
+       }
+
+       if (power <= 1000)
+               return 0;
+
+       err = sfp_read(sfp, true, SFP_EXT_STATUS, &val, sizeof(val));
+       if (err != sizeof(val)) {
+               dev_err(sfp->dev, "Failed to read EEPROM: %d\n", err);
+               err = -EAGAIN;
+               goto err;
+       }
+
+       val |= BIT(0);
+
+       err = sfp_write(sfp, true, SFP_EXT_STATUS, &val, sizeof(val));
+       if (err != sizeof(val)) {
+               dev_err(sfp->dev, "Failed to write EEPROM: %d\n", err);
+               err = -EAGAIN;
+               goto err;
+       }
+
+       dev_info(sfp->dev, "Module switched to %u.%uW power level\n",
+                power / 1000, (power / 100) % 10);
+       return T_HPOWER_LEVEL;
+
+err:
+       return err;
+}
+
 static int sfp_sm_mod_probe(struct sfp *sfp)
 {
        /* SFP module inserted - read I2C data */
        struct sfp_eeprom_id id;
        u8 check;
-       int err;
+       int ret;
 
-       err = sfp_read(sfp, false, 0, &id, sizeof(id));
-       if (err < 0) {
-               dev_err(sfp->dev, "failed to read EEPROM: %d\n", err);
+       ret = sfp_read(sfp, false, 0, &id, sizeof(id));
+       if (ret < 0) {
+               dev_err(sfp->dev, "failed to read EEPROM: %d\n", ret);
                return -EAGAIN;
        }
 
-       if (err != sizeof(id)) {
-               dev_err(sfp->dev, "EEPROM short read: %d\n", err);
+       if (ret != sizeof(id)) {
+               dev_err(sfp->dev, "EEPROM short read: %d\n", ret);
                return -EAGAIN;
        }
 
@@ -521,7 +615,11 @@ static int sfp_sm_mod_probe(struct sfp *sfp)
                dev_warn(sfp->dev,
                         "module address swap to access page 0xA2 is not supported.\n");
 
-       return sfp_module_insert(sfp->sfp_bus, &sfp->id);
+       ret = sfp_module_insert(sfp->sfp_bus, &sfp->id);
+       if (ret < 0)
+               return ret;
+
+       return sfp_sm_mod_hpower(sfp);
 }
 
 static void sfp_sm_mod_remove(struct sfp *sfp)
@@ -560,17 +658,25 @@ static void sfp_sm_event(struct sfp *sfp, unsigned int event)
                if (event == SFP_E_REMOVE) {
                        sfp_sm_ins_next(sfp, SFP_MOD_EMPTY, 0);
                } else if (event == SFP_E_TIMEOUT) {
-                       int err = sfp_sm_mod_probe(sfp);
+                       int val = sfp_sm_mod_probe(sfp);
 
-                       if (err == 0)
+                       if (val == 0)
                                sfp_sm_ins_next(sfp, SFP_MOD_PRESENT, 0);
-                       else if (err == -EAGAIN)
-                               sfp_sm_set_timer(sfp, T_PROBE_RETRY);
-                       else
+                       else if (val > 0)
+                               sfp_sm_ins_next(sfp, SFP_MOD_HPOWER, val);
+                       else if (val != -EAGAIN)
                                sfp_sm_ins_next(sfp, SFP_MOD_ERROR, 0);
+                       else
+                               sfp_sm_set_timer(sfp, T_PROBE_RETRY);
                }
                break;
 
+       case SFP_MOD_HPOWER:
+               if (event == SFP_E_TIMEOUT) {
+                       sfp_sm_ins_next(sfp, SFP_MOD_PRESENT, 0);
+                       break;
+               }
+               /* fallthrough */
        case SFP_MOD_PRESENT:
        case SFP_MOD_ERROR:
                if (event == SFP_E_REMOVE) {
@@ -889,6 +995,14 @@ static int sfp_probe(struct platform_device *pdev)
        if (!(sfp->gpio[GPIO_MODDEF0]))
                sfp->get_state = sff_gpio_get_state;
 
+       device_property_read_u32(&pdev->dev, "maximum-power-milliwatt",
+                                &sfp->max_power_mW);
+       if (!sfp->max_power_mW)
+               sfp->max_power_mW = 1000;
+
+       dev_info(sfp->dev, "Host maximum power %u.%uW\n",
+                sfp->max_power_mW / 1000, (sfp->max_power_mW / 100) % 10);
+
        sfp->sfp_bus = sfp_register_socket(sfp->dev, sfp, &sfp_module_ops);
        if (!sfp->sfp_bus)
                return -ENOMEM;
index fb2cef7..22f3bdd 100644 (file)
@@ -34,39 +34,17 @@ MODULE_LICENSE("GPL v2");
                                MDIO_PHYXS_LNSTAT_SYNC3 | \
                                MDIO_PHYXS_LNSTAT_ALIGN)
 
-static int teranetics_config_init(struct phy_device *phydev)
-{
-       phydev->supported = SUPPORTED_10000baseT_Full;
-       phydev->advertising = SUPPORTED_10000baseT_Full;
-
-       return 0;
-}
-
-static int teranetics_soft_reset(struct phy_device *phydev)
-{
-       return 0;
-}
-
 static int teranetics_aneg_done(struct phy_device *phydev)
 {
-       int reg;
-
        /* auto negotiation state can only be checked when using copper
         * port, if using fiber port, just lie it's done.
         */
-       if (!phy_read_mmd(phydev, MDIO_MMD_VEND1, 93)) {
-               reg = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_STAT1);
-               return (reg < 0) ? reg : (reg & BMSR_ANEGCOMPLETE);
-       }
+       if (!phy_read_mmd(phydev, MDIO_MMD_VEND1, 93))
+               return genphy_c45_aneg_done(phydev);
 
        return 1;
 }
 
-static int teranetics_config_aneg(struct phy_device *phydev)
-{
-       return 0;
-}
-
 static int teranetics_read_status(struct phy_device *phydev)
 {
        int reg;
@@ -102,10 +80,10 @@ static struct phy_driver teranetics_driver[] = {
        .phy_id         = PHY_ID_TN2020,
        .phy_id_mask    = 0xffffffff,
        .name           = "Teranetics TN2020",
-       .soft_reset     = teranetics_soft_reset,
+       .soft_reset     = gen10g_no_soft_reset,
        .aneg_done      = teranetics_aneg_done,
-       .config_init    = teranetics_config_init,
-       .config_aneg    = teranetics_config_aneg,
+       .config_init    = gen10g_config_init,
+       .config_aneg    = gen10g_config_aneg,
        .read_status    = teranetics_read_status,
        .match_phy_device = teranetics_match_phy_device,
 },
index 255a5de..a393c1d 100644 (file)
@@ -971,6 +971,7 @@ static struct pernet_operations ppp_net_ops = {
        .exit = ppp_exit_net,
        .id   = &ppp_net_id,
        .size = sizeof(struct ppp_net),
+       .async = true,
 };
 
 static int ppp_unit_register(struct ppp *ppp, int unit, bool ifname_is_set)
index bd89d1c..c10e618 100644 (file)
@@ -1161,6 +1161,7 @@ static struct pernet_operations pppoe_net_ops = {
        .exit = pppoe_exit_net,
        .id   = &pppoe_net_id,
        .size = sizeof(struct pppoe_net),
+       .async = true,
 };
 
 static int __init pppoe_init(void)
index a468439..5dd781e 100644 (file)
@@ -1105,14 +1105,15 @@ static void team_port_disable_netpoll(struct team_port *port)
 }
 #endif
 
-static int team_upper_dev_link(struct team *team, struct team_port *port)
+static int team_upper_dev_link(struct team *team, struct team_port *port,
+                              struct netlink_ext_ack *extack)
 {
        struct netdev_lag_upper_info lag_upper_info;
        int err;
 
        lag_upper_info.tx_type = team->mode->lag_tx_type;
        err = netdev_master_upper_dev_link(port->dev, team->dev, NULL,
-                                          &lag_upper_info, NULL);
+                                          &lag_upper_info, extack);
        if (err)
                return err;
        port->dev->priv_flags |= IFF_TEAM_PORT;
@@ -1129,7 +1130,8 @@ static void __team_port_change_port_added(struct team_port *port, bool linkup);
 static int team_dev_type_check_change(struct net_device *dev,
                                      struct net_device *port_dev);
 
-static int team_port_add(struct team *team, struct net_device *port_dev)
+static int team_port_add(struct team *team, struct net_device *port_dev,
+                        struct netlink_ext_ack *extack)
 {
        struct net_device *dev = team->dev;
        struct team_port *port;
@@ -1137,12 +1139,14 @@ static int team_port_add(struct team *team, struct net_device *port_dev)
        int err;
 
        if (port_dev->flags & IFF_LOOPBACK) {
+               NL_SET_ERR_MSG(extack, "Loopback device can't be added as a team port");
                netdev_err(dev, "Device %s is loopback device. Loopback devices can't be added as a team port\n",
                           portname);
                return -EINVAL;
        }
 
        if (team_port_exists(port_dev)) {
+               NL_SET_ERR_MSG(extack, "Device is already a port of a team device");
                netdev_err(dev, "Device %s is already a port "
                                "of a team device\n", portname);
                return -EBUSY;
@@ -1150,6 +1154,7 @@ static int team_port_add(struct team *team, struct net_device *port_dev)
 
        if (port_dev->features & NETIF_F_VLAN_CHALLENGED &&
            vlan_uses_dev(dev)) {
+               NL_SET_ERR_MSG(extack, "Device is VLAN challenged and team device has VLAN set up");
                netdev_err(dev, "Device %s is VLAN challenged and team device has VLAN set up\n",
                           portname);
                return -EPERM;
@@ -1160,6 +1165,7 @@ static int team_port_add(struct team *team, struct net_device *port_dev)
                return err;
 
        if (port_dev->flags & IFF_UP) {
+               NL_SET_ERR_MSG(extack, "Device is up. Set it down before adding it as a team port");
                netdev_err(dev, "Device %s is up. Set it down before adding it as a team port\n",
                           portname);
                return -EBUSY;
@@ -1227,7 +1233,7 @@ static int team_port_add(struct team *team, struct net_device *port_dev)
                goto err_handler_register;
        }
 
-       err = team_upper_dev_link(team, port);
+       err = team_upper_dev_link(team, port, extack);
        if (err) {
                netdev_err(dev, "Device %s failed to set upper link\n",
                           portname);
@@ -1921,7 +1927,7 @@ static int team_add_slave(struct net_device *dev, struct net_device *port_dev,
        int err;
 
        mutex_lock(&team->lock);
-       err = team_port_add(team, port_dev);
+       err = team_port_add(team, port_dev, extack);
        mutex_unlock(&team->lock);
 
        if (!err)
index 9ce0182..e459e60 100644 (file)
@@ -1434,6 +1434,7 @@ static struct pernet_operations vrf_net_ops __net_initdata = {
        .init = vrf_netns_init,
        .id   = &vrf_net_id,
        .size = sizeof(bool),
+       .async = true,
 };
 
 static int __init vrf_init_module(void)
index fab7a4d..aa5f034 100644 (file)
@@ -3752,6 +3752,7 @@ static struct pernet_operations vxlan_net_ops = {
        .exit_batch = vxlan_exit_batch_net,
        .id   = &vxlan_net_id,
        .size = sizeof(struct vxlan_net),
+       .async = true,
 };
 
 static int __init vxlan_init_module(void)
index b1cf7c6..ef58870 100644 (file)
@@ -419,7 +419,7 @@ static void xenvif_rx_extra_slot(struct xenvif_queue *queue,
        BUG();
 }
 
-void xenvif_rx_skb(struct xenvif_queue *queue)
+static void xenvif_rx_skb(struct xenvif_queue *queue)
 {
        struct xenvif_pkt_state pkt;
 
index 9c36d61..2dee4e0 100644 (file)
@@ -709,6 +709,7 @@ static struct pernet_operations lockd_net_ops = {
        .exit = lockd_exit_net,
        .id = &lockd_net_id,
        .size = sizeof(struct lockd_net),
+       .async = true,
 };
 
 
index 7d89354..6c3083c 100644 (file)
@@ -2122,6 +2122,7 @@ static struct pernet_operations nfs_net_ops = {
        .exit = nfs_net_exit,
        .id   = &nfs_net_id,
        .size = sizeof(struct nfs_net),
+       .async = true,
 };
 
 /*
index 5be08f0..8c743a4 100644 (file)
@@ -118,6 +118,7 @@ static struct pernet_operations grace_net_ops = {
        .exit = grace_exit_net,
        .id   = &grace_net_id,
        .size = sizeof(struct list_head),
+       .async = true,
 };
 
 static int __init
index a7f16e0..8a45666 100644 (file)
@@ -96,7 +96,7 @@ int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor,
 #define BPF_CGROUP_RUN_PROG_INET_SOCK(sk)                                     \
 ({                                                                            \
        int __ret = 0;                                                         \
-       if (cgroup_bpf_enabled && sk) {                                        \
+       if (cgroup_bpf_enabled) {                                              \
                __ret = __cgroup_bpf_run_filter_sk(sk,                         \
                                                 BPF_CGROUP_INET_SOCK_CREATE); \
        }                                                                      \
index 276932d..fdb691b 100644 (file)
@@ -20,7 +20,6 @@
 #include <linux/set_memory.h>
 #include <linux/kallsyms.h>
 
-#include <net/xdp.h>
 #include <net/sch_generic.h>
 
 #include <uapi/linux/filter.h>
@@ -30,6 +29,7 @@ struct sk_buff;
 struct sock;
 struct seccomp_data;
 struct bpf_prog_aux;
+struct xdp_rxq_info;
 
 /* ArgX, context and stack frame pointer register positions. Note,
  * Arg1, Arg2, Arg3, etc are used as argument mappings of function
index bfea26a..4814cad 100644 (file)
@@ -1224,6 +1224,12 @@ static inline int mlx5_core_is_pf(struct mlx5_core_dev *dev)
        return !(dev->priv.pci_dev_data & MLX5_PCI_DEV_IS_VF);
 }
 
+#define MLX5_TOTAL_VPORTS(mdev) (1 + pci_sriov_get_totalvfs((mdev)->pdev))
+#define MLX5_VPORT_MANAGER(mdev) \
+       (MLX5_CAP_GEN(mdev, vport_group_manager) && \
+        (MLX5_CAP_GEN(mdev, port_type) == MLX5_CAP_PORT_TYPE_ETH) && \
+        mlx5_core_is_pf(mdev))
+
 static inline int mlx5_get_gid_table_len(u16 param)
 {
        if (param > 4) {
diff --git a/include/linux/mlx5/eswitch.h b/include/linux/mlx5/eswitch.h
new file mode 100644 (file)
index 0000000..d3c9db4
--- /dev/null
@@ -0,0 +1,58 @@
+/* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */
+/*
+ * Copyright (c) 2018 Mellanox Technologies. All rights reserved.
+ */
+
+#ifndef _MLX5_ESWITCH_
+#define _MLX5_ESWITCH_
+
+#include <linux/mlx5/driver.h>
+
+enum {
+       SRIOV_NONE,
+       SRIOV_LEGACY,
+       SRIOV_OFFLOADS
+};
+
+enum {
+       REP_ETH,
+       REP_IB,
+       NUM_REP_TYPES,
+};
+
+struct mlx5_eswitch_rep;
+struct mlx5_eswitch_rep_if {
+       int                    (*load)(struct mlx5_core_dev *dev,
+                                      struct mlx5_eswitch_rep *rep);
+       void                   (*unload)(struct mlx5_eswitch_rep *rep);
+       void                   *(*get_proto_dev)(struct mlx5_eswitch_rep *rep);
+       void                    *priv;
+       bool                   valid;
+};
+
+struct mlx5_eswitch_rep {
+       struct mlx5_eswitch_rep_if rep_if[NUM_REP_TYPES];
+       u16                    vport;
+       u8                     hw_id[ETH_ALEN];
+       u16                    vlan;
+       u32                    vlan_refcount;
+};
+
+void mlx5_eswitch_register_vport_rep(struct mlx5_eswitch *esw,
+                                    int vport_index,
+                                    struct mlx5_eswitch_rep_if *rep_if,
+                                    u8 rep_type);
+void mlx5_eswitch_unregister_vport_rep(struct mlx5_eswitch *esw,
+                                      int vport_index,
+                                      u8 rep_type);
+void *mlx5_eswitch_get_proto_dev(struct mlx5_eswitch *esw,
+                                int vport,
+                                u8 rep_type);
+struct mlx5_eswitch_rep *mlx5_eswitch_vport_rep(struct mlx5_eswitch *esw,
+                                               int vport);
+void *mlx5_eswitch_uplink_get_proto_dev(struct mlx5_eswitch *esw, u8 rep_type);
+u8 mlx5_eswitch_mode(struct mlx5_eswitch *esw);
+struct mlx5_flow_handle *
+mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw,
+                                   int vport, u32 sqn);
+#endif
index 5396521..7ed82e4 100644 (file)
@@ -4,11 +4,10 @@
 
 #include <linux/in.h>
 #include <linux/pim.h>
-#include <linux/rhashtable.h>
-#include <net/sock.h>
 #include <net/fib_rules.h>
 #include <net/fib_notifier.h>
 #include <uapi/linux/mroute.h>
+#include <linux/mroute_base.h>
 
 #ifdef CONFIG_IP_MROUTE
 static inline int ip_mroute_opt(int opt)
@@ -56,18 +55,6 @@ static inline bool ipmr_rule_default(const struct fib_rule *rule)
 }
 #endif
 
-struct vif_device {
-       struct net_device       *dev;                   /* Device we are using */
-       struct netdev_phys_item_id dev_parent_id;       /* Device parent ID    */
-       unsigned long   bytes_in,bytes_out;
-       unsigned long   pkt_in,pkt_out;         /* Statistics                   */
-       unsigned long   rate_limit;             /* Traffic shaping (NI)         */
-       unsigned char   threshold;              /* TTL threshold                */
-       unsigned short  flags;                  /* Control flags                */
-       __be32          local,remote;           /* Addresses(remote for tunnels)*/
-       int             link;                   /* Physical interface index     */
-};
-
 struct vif_entry_notifier_info {
        struct fib_notifier_info info;
        struct net_device *dev;
@@ -78,34 +65,6 @@ struct vif_entry_notifier_info {
 
 #define VIFF_STATIC 0x8000
 
-#define VIF_EXISTS(_mrt, _idx) ((_mrt)->vif_table[_idx].dev != NULL)
-
-struct mr_table {
-       struct list_head        list;
-       possible_net_t          net;
-       u32                     id;
-       struct sock __rcu       *mroute_sk;
-       struct timer_list       ipmr_expire_timer;
-       struct list_head        mfc_unres_queue;
-       struct vif_device       vif_table[MAXVIFS];
-       struct rhltable         mfc_hash;
-       struct list_head        mfc_cache_list;
-       int                     maxvif;
-       atomic_t                cache_resolve_queue_len;
-       bool                    mroute_do_assert;
-       bool                    mroute_do_pim;
-       int                     mroute_reg_vif_num;
-};
-
-/* mfc_flags:
- * MFC_STATIC - the entry was added statically (not by a routing daemon)
- * MFC_OFFLOAD - the entry was offloaded to the hardware
- */
-enum {
-       MFC_STATIC = BIT(0),
-       MFC_OFFLOAD = BIT(1),
-};
-
 struct mfc_cache_cmp_arg {
        __be32 mfc_mcastgrp;
        __be32 mfc_origin;
@@ -113,28 +72,13 @@ struct mfc_cache_cmp_arg {
 
 /**
  * struct mfc_cache - multicast routing entries
- * @mnode: rhashtable list
+ * @_c: Common multicast routing information; has to be first [for casting]
  * @mfc_mcastgrp: destination multicast group address
  * @mfc_origin: source address
  * @cmparg: used for rhashtable comparisons
- * @mfc_parent: source interface (iif)
- * @mfc_flags: entry flags
- * @expires: unresolved entry expire time
- * @unresolved: unresolved cached skbs
- * @last_assert: time of last assert
- * @minvif: minimum VIF id
- * @maxvif: maximum VIF id
- * @bytes: bytes that have passed for this entry
- * @pkt: packets that have passed for this entry
- * @wrong_if: number of wrong source interface hits
- * @lastuse: time of last use of the group (traffic or update)
- * @ttls: OIF TTL threshold array
- * @refcount: reference count for this entry
- * @list: global entry list
- * @rcu: used for entry destruction
  */
 struct mfc_cache {
-       struct rhlist_head mnode;
+       struct mr_mfc _c;
        union {
                struct {
                        __be32 mfc_mcastgrp;
@@ -142,28 +86,6 @@ struct mfc_cache {
                };
                struct mfc_cache_cmp_arg cmparg;
        };
-       vifi_t mfc_parent;
-       int mfc_flags;
-
-       union {
-               struct {
-                       unsigned long expires;
-                       struct sk_buff_head unresolved;
-               } unres;
-               struct {
-                       unsigned long last_assert;
-                       int minvif;
-                       int maxvif;
-                       unsigned long bytes;
-                       unsigned long pkt;
-                       unsigned long wrong_if;
-                       unsigned long lastuse;
-                       unsigned char ttls[MAXVIFS];
-                       refcount_t refcount;
-               } res;
-       } mfc_un;
-       struct list_head list;
-       struct rcu_head rcu;
 };
 
 struct mfc_entry_notifier_info {
@@ -187,12 +109,12 @@ static inline void ipmr_cache_free(struct mfc_cache *mfc_cache)
 
 static inline void ipmr_cache_put(struct mfc_cache *c)
 {
-       if (refcount_dec_and_test(&c->mfc_un.res.refcount))
+       if (refcount_dec_and_test(&c->_c.mfc_un.res.refcount))
                ipmr_cache_free(c);
 }
 static inline void ipmr_cache_hold(struct mfc_cache *c)
 {
-       refcount_inc(&c->mfc_un.res.refcount);
+       refcount_inc(&c->_c.mfc_un.res.refcount);
 }
 
 #endif
index 3014c52..1ac38e6 100644 (file)
@@ -7,6 +7,7 @@
 #include <linux/skbuff.h>      /* for struct sk_buff_head */
 #include <net/net_namespace.h>
 #include <uapi/linux/mroute6.h>
+#include <linux/mroute_base.h>
 
 #ifdef CONFIG_IPV6_MROUTE
 static inline int ip6_mroute_opt(int opt)
@@ -62,57 +63,24 @@ static inline void ip6_mr_cleanup(void)
 }
 #endif
 
-struct mif_device {
-       struct net_device       *dev;                   /* Device we are using */
-       unsigned long   bytes_in,bytes_out;
-       unsigned long   pkt_in,pkt_out;         /* Statistics                   */
-       unsigned long   rate_limit;             /* Traffic shaping (NI)         */
-       unsigned char   threshold;              /* TTL threshold                */
-       unsigned short  flags;                  /* Control flags                */
-       int             link;                   /* Physical interface index     */
-};
-
 #define VIFF_STATIC 0x8000
 
-struct mfc6_cache {
-       struct list_head list;
-       struct in6_addr mf6c_mcastgrp;                  /* Group the entry belongs to   */
-       struct in6_addr mf6c_origin;                    /* Source of packet             */
-       mifi_t mf6c_parent;                     /* Source interface             */
-       int mfc_flags;                          /* Flags on line                */
+struct mfc6_cache_cmp_arg {
+       struct in6_addr mf6c_mcastgrp;
+       struct in6_addr mf6c_origin;
+};
 
+struct mfc6_cache {
+       struct mr_mfc _c;
        union {
                struct {
-                       unsigned long expires;
-                       struct sk_buff_head unresolved; /* Unresolved buffers           */
-               } unres;
-               struct {
-                       unsigned long last_assert;
-                       int minvif;
-                       int maxvif;
-                       unsigned long bytes;
-                       unsigned long pkt;
-                       unsigned long wrong_if;
-                       unsigned long lastuse;
-                       unsigned char ttls[MAXMIFS];    /* TTL thresholds               */
-               } res;
-       } mfc_un;
+                       struct in6_addr mf6c_mcastgrp;
+                       struct in6_addr mf6c_origin;
+               };
+               struct mfc6_cache_cmp_arg cmparg;
+       };
 };
 
-#define MFC_STATIC             1
-#define MFC_NOTIFY             2
-
-#define MFC6_LINES             64
-
-#define MFC6_HASH(a, g) (((__force u32)(a)->s6_addr32[0] ^ \
-                         (__force u32)(a)->s6_addr32[1] ^ \
-                         (__force u32)(a)->s6_addr32[2] ^ \
-                         (__force u32)(a)->s6_addr32[3] ^ \
-                         (__force u32)(g)->s6_addr32[0] ^ \
-                         (__force u32)(g)->s6_addr32[1] ^ \
-                         (__force u32)(g)->s6_addr32[2] ^ \
-                         (__force u32)(g)->s6_addr32[3]) % MFC6_LINES)
-
 #define MFC_ASSERT_THRESH (3*HZ)               /* Maximal freq. of asserts */
 
 struct rtmsg;
@@ -120,12 +88,12 @@ extern int ip6mr_get_route(struct net *net, struct sk_buff *skb,
                           struct rtmsg *rtm, u32 portid);
 
 #ifdef CONFIG_IPV6_MROUTE
-extern struct sock *mroute6_socket(struct net *net, struct sk_buff *skb);
+bool mroute6_is_socket(struct net *net, struct sk_buff *skb);
 extern int ip6mr_sk_done(struct sock *sk);
 #else
-static inline struct sock *mroute6_socket(struct net *net, struct sk_buff *skb)
+static inline bool mroute6_is_socket(struct net *net, struct sk_buff *skb)
 {
-       return NULL;
+       return false;
 }
 static inline int ip6mr_sk_done(struct sock *sk)
 {
diff --git a/include/linux/mroute_base.h b/include/linux/mroute_base.h
new file mode 100644 (file)
index 0000000..c2560cb
--- /dev/null
@@ -0,0 +1,346 @@
+#ifndef __LINUX_MROUTE_BASE_H
+#define __LINUX_MROUTE_BASE_H
+
+#include <linux/netdevice.h>
+#include <linux/rhashtable.h>
+#include <linux/spinlock.h>
+#include <net/net_namespace.h>
+#include <net/sock.h>
+
+/**
+ * struct vif_device - interface representor for multicast routing
+ * @dev: network device being used
+ * @bytes_in: statistic; bytes ingressing
+ * @bytes_out: statistic; bytes egresing
+ * @pkt_in: statistic; packets ingressing
+ * @pkt_out: statistic; packets egressing
+ * @rate_limit: Traffic shaping (NI)
+ * @threshold: TTL threshold
+ * @flags: Control flags
+ * @link: Physical interface index
+ * @dev_parent_id: device parent id
+ * @local: Local address
+ * @remote: Remote address for tunnels
+ */
+struct vif_device {
+       struct net_device *dev;
+       unsigned long bytes_in, bytes_out;
+       unsigned long pkt_in, pkt_out;
+       unsigned long rate_limit;
+       unsigned char threshold;
+       unsigned short flags;
+       int link;
+
+       /* Currently only used by ipmr */
+       struct netdev_phys_item_id dev_parent_id;
+       __be32 local, remote;
+};
+
+#ifndef MAXVIFS
+/* This one is nasty; value is defined in uapi using different symbols for
+ * mroute and morute6 but both map into same 32.
+ */
+#define MAXVIFS        32
+#endif
+
+#define VIF_EXISTS(_mrt, _idx) (!!((_mrt)->vif_table[_idx].dev))
+
+/* mfc_flags:
+ * MFC_STATIC - the entry was added statically (not by a routing daemon)
+ * MFC_OFFLOAD - the entry was offloaded to the hardware
+ */
+enum {
+       MFC_STATIC = BIT(0),
+       MFC_OFFLOAD = BIT(1),
+};
+
+/**
+ * struct mr_mfc - common multicast routing entries
+ * @mnode: rhashtable list
+ * @mfc_parent: source interface (iif)
+ * @mfc_flags: entry flags
+ * @expires: unresolved entry expire time
+ * @unresolved: unresolved cached skbs
+ * @last_assert: time of last assert
+ * @minvif: minimum VIF id
+ * @maxvif: maximum VIF id
+ * @bytes: bytes that have passed for this entry
+ * @pkt: packets that have passed for this entry
+ * @wrong_if: number of wrong source interface hits
+ * @lastuse: time of last use of the group (traffic or update)
+ * @ttls: OIF TTL threshold array
+ * @refcount: reference count for this entry
+ * @list: global entry list
+ * @rcu: used for entry destruction
+ */
+struct mr_mfc {
+       struct rhlist_head mnode;
+       unsigned short mfc_parent;
+       int mfc_flags;
+
+       union {
+               struct {
+                       unsigned long expires;
+                       struct sk_buff_head unresolved;
+               } unres;
+               struct {
+                       unsigned long last_assert;
+                       int minvif;
+                       int maxvif;
+                       unsigned long bytes;
+                       unsigned long pkt;
+                       unsigned long wrong_if;
+                       unsigned long lastuse;
+                       unsigned char ttls[MAXVIFS];
+                       refcount_t refcount;
+               } res;
+       } mfc_un;
+       struct list_head list;
+       struct rcu_head rcu;
+};
+
+struct mr_table;
+
+/**
+ * struct mr_table_ops - callbacks and info for protocol-specific ops
+ * @rht_params: parameters for accessing the MFC hash
+ * @cmparg_any: a hash key to be used for matching on (*,*) routes
+ */
+struct mr_table_ops {
+       const struct rhashtable_params *rht_params;
+       void *cmparg_any;
+};
+
+/**
+ * struct mr_table - a multicast routing table
+ * @list: entry within a list of multicast routing tables
+ * @net: net where this table belongs
+ * @ops: protocol specific operations
+ * @id: identifier of the table
+ * @mroute_sk: socket associated with the table
+ * @ipmr_expire_timer: timer for handling unresolved routes
+ * @mfc_unres_queue: list of unresolved MFC entries
+ * @vif_table: array containing all possible vifs
+ * @mfc_hash: Hash table of all resolved routes for easy lookup
+ * @mfc_cache_list: list of resovled routes for possible traversal
+ * @maxvif: Identifier of highest value vif currently in use
+ * @cache_resolve_queue_len: current size of unresolved queue
+ * @mroute_do_assert: Whether to inform userspace on wrong ingress
+ * @mroute_do_pim: Whether to receive IGMP PIMv1
+ * @mroute_reg_vif_num: PIM-device vif index
+ */
+struct mr_table {
+       struct list_head        list;
+       possible_net_t          net;
+       struct mr_table_ops     ops;
+       u32                     id;
+       struct sock __rcu       *mroute_sk;
+       struct timer_list       ipmr_expire_timer;
+       struct list_head        mfc_unres_queue;
+       struct vif_device       vif_table[MAXVIFS];
+       struct rhltable         mfc_hash;
+       struct list_head        mfc_cache_list;
+       int                     maxvif;
+       atomic_t                cache_resolve_queue_len;
+       bool                    mroute_do_assert;
+       bool                    mroute_do_pim;
+       int                     mroute_reg_vif_num;
+};
+
+#ifdef CONFIG_IP_MROUTE_COMMON
+void vif_device_init(struct vif_device *v,
+                    struct net_device *dev,
+                    unsigned long rate_limit,
+                    unsigned char threshold,
+                    unsigned short flags,
+                    unsigned short get_iflink_mask);
+
+struct mr_table *
+mr_table_alloc(struct net *net, u32 id,
+              struct mr_table_ops *ops,
+              void (*expire_func)(struct timer_list *t),
+              void (*table_set)(struct mr_table *mrt,
+                                struct net *net));
+
+/* These actually return 'struct mr_mfc *', but to avoid need for explicit
+ * castings they simply return void.
+ */
+void *mr_mfc_find_parent(struct mr_table *mrt,
+                        void *hasharg, int parent);
+void *mr_mfc_find_any_parent(struct mr_table *mrt, int vifi);
+void *mr_mfc_find_any(struct mr_table *mrt, int vifi, void *hasharg);
+
+int mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
+                  struct mr_mfc *c, struct rtmsg *rtm);
+int mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb,
+                    struct mr_table *(*iter)(struct net *net,
+                                             struct mr_table *mrt),
+                    int (*fill)(struct mr_table *mrt,
+                                struct sk_buff *skb,
+                                u32 portid, u32 seq, struct mr_mfc *c,
+                                int cmd, int flags),
+                    spinlock_t *lock);
+#else
+static inline void vif_device_init(struct vif_device *v,
+                                  struct net_device *dev,
+                                  unsigned long rate_limit,
+                                  unsigned char threshold,
+                                  unsigned short flags,
+                                  unsigned short get_iflink_mask)
+{
+}
+
+static inline void *
+mr_table_alloc(struct net *net, u32 id,
+              struct mr_table_ops *ops,
+              void (*expire_func)(struct timer_list *t),
+              void (*table_set)(struct mr_table *mrt,
+                                struct net *net))
+{
+       return NULL;
+}
+
+static inline void *mr_mfc_find_parent(struct mr_table *mrt,
+                                      void *hasharg, int parent)
+{
+       return NULL;
+}
+
+static inline void *mr_mfc_find_any_parent(struct mr_table *mrt,
+                                          int vifi)
+{
+       return NULL;
+}
+
+static inline struct mr_mfc *mr_mfc_find_any(struct mr_table *mrt,
+                                            int vifi, void *hasharg)
+{
+       return NULL;
+}
+
+static inline int mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
+                                struct mr_mfc *c, struct rtmsg *rtm)
+{
+       return -EINVAL;
+}
+
+static inline int
+mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb,
+                struct mr_table *(*iter)(struct net *net,
+                                         struct mr_table *mrt),
+                int (*fill)(struct mr_table *mrt,
+                            struct sk_buff *skb,
+                            u32 portid, u32 seq, struct mr_mfc *c,
+                            int cmd, int flags),
+                spinlock_t *lock)
+{
+       return -EINVAL;
+}
+#endif
+
+static inline void *mr_mfc_find(struct mr_table *mrt, void *hasharg)
+{
+       return mr_mfc_find_parent(mrt, hasharg, -1);
+}
+
+#ifdef CONFIG_PROC_FS
+struct mr_vif_iter {
+       struct seq_net_private p;
+       struct mr_table *mrt;
+       int ct;
+};
+
+struct mr_mfc_iter {
+       struct seq_net_private p;
+       struct mr_table *mrt;
+       struct list_head *cache;
+
+       /* Lock protecting the mr_table's unresolved queue */
+       spinlock_t *lock;
+};
+
+#ifdef CONFIG_IP_MROUTE_COMMON
+void *mr_vif_seq_idx(struct net *net, struct mr_vif_iter *iter, loff_t pos);
+void *mr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos);
+
+static inline void *mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
+{
+       return *pos ? mr_vif_seq_idx(seq_file_net(seq),
+                                    seq->private, *pos - 1)
+                   : SEQ_START_TOKEN;
+}
+
+/* These actually return 'struct mr_mfc *', but to avoid need for explicit
+ * castings they simply return void.
+ */
+void *mr_mfc_seq_idx(struct net *net,
+                    struct mr_mfc_iter *it, loff_t pos);
+void *mr_mfc_seq_next(struct seq_file *seq, void *v,
+                     loff_t *pos);
+
+static inline void *mr_mfc_seq_start(struct seq_file *seq, loff_t *pos,
+                                    struct mr_table *mrt, spinlock_t *lock)
+{
+       struct mr_mfc_iter *it = seq->private;
+
+       it->mrt = mrt;
+       it->cache = NULL;
+       it->lock = lock;
+
+       return *pos ? mr_mfc_seq_idx(seq_file_net(seq),
+                                    seq->private, *pos - 1)
+                   : SEQ_START_TOKEN;
+}
+
+static inline void mr_mfc_seq_stop(struct seq_file *seq, void *v)
+{
+       struct mr_mfc_iter *it = seq->private;
+       struct mr_table *mrt = it->mrt;
+
+       if (it->cache == &mrt->mfc_unres_queue)
+               spin_unlock_bh(it->lock);
+       else if (it->cache == &mrt->mfc_cache_list)
+               rcu_read_unlock();
+}
+#else
+static inline void *mr_vif_seq_idx(struct net *net, struct mr_vif_iter *iter,
+                                  loff_t pos)
+{
+       return NULL;
+}
+
+static inline void *mr_vif_seq_next(struct seq_file *seq,
+                                   void *v, loff_t *pos)
+{
+       return NULL;
+}
+
+static inline void *mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
+{
+       return NULL;
+}
+
+static inline void *mr_mfc_seq_idx(struct net *net,
+                                  struct mr_mfc_iter *it, loff_t pos)
+{
+       return NULL;
+}
+
+static inline void *mr_mfc_seq_next(struct seq_file *seq, void *v,
+                                   loff_t *pos)
+{
+       return NULL;
+}
+
+static inline void *mr_mfc_seq_start(struct seq_file *seq, loff_t *pos,
+                                    struct mr_table *mrt, spinlock_t *lock)
+{
+       return NULL;
+}
+
+static inline void mr_mfc_seq_stop(struct seq_file *seq, void *v)
+{
+}
+#endif
+#endif
+#endif
index 5a0c3e5..6e38c69 100644 (file)
@@ -994,6 +994,14 @@ int genphy_c45_pma_setup_forced(struct phy_device *phydev);
 int genphy_c45_an_disable_aneg(struct phy_device *phydev);
 int genphy_c45_read_mdix(struct phy_device *phydev);
 
+/* The gen10g_* functions are the old Clause 45 stub */
+int gen10g_config_aneg(struct phy_device *phydev);
+int gen10g_read_status(struct phy_device *phydev);
+int gen10g_no_soft_reset(struct phy_device *phydev);
+int gen10g_config_init(struct phy_device *phydev);
+int gen10g_suspend(struct phy_device *phydev);
+int gen10g_resume(struct phy_device *phydev);
+
 static inline int phy_read_status(struct phy_device *phydev)
 {
        if (!phydev->drv)
index e724d5a..ebce9e2 100644 (file)
@@ -422,10 +422,11 @@ struct sfp_upstream_ops {
 #if IS_ENABLED(CONFIG_SFP)
 int sfp_parse_port(struct sfp_bus *bus, const struct sfp_eeprom_id *id,
                   unsigned long *support);
-phy_interface_t sfp_parse_interface(struct sfp_bus *bus,
-                                   const struct sfp_eeprom_id *id);
 void sfp_parse_support(struct sfp_bus *bus, const struct sfp_eeprom_id *id,
                       unsigned long *support);
+phy_interface_t sfp_select_interface(struct sfp_bus *bus,
+                                    const struct sfp_eeprom_id *id,
+                                    unsigned long *link_modes);
 
 int sfp_get_module_info(struct sfp_bus *bus, struct ethtool_modinfo *modinfo);
 int sfp_get_module_eeprom(struct sfp_bus *bus, struct ethtool_eeprom *ee,
@@ -444,18 +445,19 @@ static inline int sfp_parse_port(struct sfp_bus *bus,
        return PORT_OTHER;
 }
 
-static inline phy_interface_t sfp_parse_interface(struct sfp_bus *bus,
-                                               const struct sfp_eeprom_id *id)
-{
-       return PHY_INTERFACE_MODE_NA;
-}
-
 static inline void sfp_parse_support(struct sfp_bus *bus,
                                     const struct sfp_eeprom_id *id,
                                     unsigned long *support)
 {
 }
 
+static inline phy_interface_t sfp_select_interface(struct sfp_bus *bus,
+                                                  const struct sfp_eeprom_id *id,
+                                                  unsigned long *link_modes)
+{
+       return PHY_INTERFACE_MODE_NA;
+}
+
 static inline int sfp_get_module_info(struct sfp_bus *bus,
                                      struct ethtool_modinfo *modinfo)
 {
index 336da25..9cce0d8 100644 (file)
@@ -20,7 +20,6 @@ struct net_device *cs89x0_probe(int unit);
 struct net_device *mvme147lance_probe(int unit);
 struct net_device *tc515_probe(int unit);
 struct net_device *lance_probe(int unit);
-struct net_device *mac89x0_probe(int unit);
 struct net_device *cops_probe(int unit);
 struct net_device *ltpc_probe(void);
 
index bb7f467..29ba069 100644 (file)
@@ -21,4 +21,3 @@ struct ethoc_platform_data {
 };
 
 #endif /* !LINUX_NET_ETHOC_H */
-
index b3d2162..1c9e17c 100644 (file)
@@ -27,7 +27,7 @@ struct fib_rule {
        u8                      action;
        u8                      l3mdev;
        u8                      proto;
-       /* 1 byte hole, try to use */
+       u8                      ip_proto;
        u32                     target;
        __be64                  tun_id;
        struct fib_rule __rcu   *ctarget;
@@ -40,6 +40,8 @@ struct fib_rule {
        char                    iifname[IFNAMSIZ];
        char                    oifname[IFNAMSIZ];
        struct fib_kuid_range   uid_range;
+       struct fib_rule_port_range      sport_range;
+       struct fib_rule_port_range      dport_range;
        struct rcu_head         rcu;
 };
 
@@ -110,7 +112,11 @@ struct fib_rule_notifier_info {
        [FRA_GOTO]      = { .type = NLA_U32 }, \
        [FRA_L3MDEV]    = { .type = NLA_U8 }, \
        [FRA_UID_RANGE] = { .len = sizeof(struct fib_rule_uid_range) }, \
-       [FRA_PROTOCOL]  = { .type = NLA_U8 }
+       [FRA_PROTOCOL]  = { .type = NLA_U8 }, \
+       [FRA_IP_PROTO]  = { .type = NLA_U8 }, \
+       [FRA_SPORT_RANGE] = { .len = sizeof(struct fib_rule_port_range) }, \
+       [FRA_DPORT_RANGE] = { .len = sizeof(struct fib_rule_port_range) }
+
 
 static inline void fib_rule_get(struct fib_rule *rule)
 {
@@ -144,6 +150,38 @@ static inline u32 frh_get_table(struct fib_rule_hdr *frh, struct nlattr **nla)
        return frh->table;
 }
 
+static inline bool fib_rule_port_range_set(const struct fib_rule_port_range *range)
+{
+       return range->start != 0 && range->end != 0;
+}
+
+static inline bool fib_rule_port_inrange(const struct fib_rule_port_range *a,
+                                        __be16 port)
+{
+       return ntohs(port) >= a->start &&
+               ntohs(port) <= a->end;
+}
+
+static inline bool fib_rule_port_range_valid(const struct fib_rule_port_range *a)
+{
+       return a->start != 0 && a->end != 0 && a->end < 0xffff &&
+               a->start <= a->end;
+}
+
+static inline bool fib_rule_port_range_compare(struct fib_rule_port_range *a,
+                                              struct fib_rule_port_range *b)
+{
+       return a->start == b->start &&
+               a->end == b->end;
+}
+
+static inline bool fib_rule_requires_fldissect(struct fib_rule *rule)
+{
+       return rule->ip_proto ||
+               fib_rule_port_range_set(&rule->sport_range) ||
+               fib_rule_port_range_set(&rule->dport_range);
+}
+
 struct fib_rules_ops *fib_rules_register(const struct fib_rules_ops *,
                                         struct net *);
 void fib_rules_unregister(struct fib_rules_ops *);
index f1624fd..64e7ee9 100644 (file)
@@ -125,7 +125,7 @@ static inline void flowi4_update_output(struct flowi4 *fl4, int oif, __u8 tos,
        fl4->daddr = daddr;
        fl4->saddr = saddr;
 }
-                                     
+
 
 struct flowi6 {
        struct flowi_common     __fl_common;
index f90585d..797142e 100644 (file)
@@ -37,6 +37,9 @@ struct net_device *gretap_fb_dev_create(struct net *net, const char *name,
 int gre_parse_header(struct sk_buff *skb, struct tnl_ptk_info *tpi,
                     bool *csum_err, __be16 proto, int nhs);
 
+bool is_gretap_dev(const struct net_device *dev);
+bool is_ip6gretap_dev(const struct net_device *dev);
+
 static inline int gre_calc_hlen(__be16 o_flags)
 {
        int addend = 4;
index c1a93ce..b68fea0 100644 (file)
@@ -49,9 +49,9 @@ struct inet_connection_sock_af_ops {
        u16         net_header_len;
        u16         net_frag_header_len;
        u16         sockaddr_len;
-       int         (*setsockopt)(struct sock *sk, int level, int optname, 
+       int         (*setsockopt)(struct sock *sk, int level, int optname,
                                  char __user *optval, unsigned int optlen);
-       int         (*getsockopt)(struct sock *sk, int level, int optname, 
+       int         (*getsockopt)(struct sock *sk, int level, int optname,
                                  char __user *optval, int __user *optlen);
 #ifdef CONFIG_COMPAT
        int         (*compat_setsockopt)(struct sock *sk,
@@ -67,7 +67,7 @@ struct inet_connection_sock_af_ops {
 
 /** inet_connection_sock - INET connection oriented sock
  *
- * @icsk_accept_queue:    FIFO of established children 
+ * @icsk_accept_queue:    FIFO of established children
  * @icsk_bind_hash:       Bind node
  * @icsk_timeout:         Timeout
  * @icsk_retransmit_timer: Resend (no ack)
@@ -122,7 +122,7 @@ struct inet_connection_sock {
                unsigned long     timeout;       /* Currently scheduled timeout            */
                __u32             lrcvtime;      /* timestamp of last received data packet */
                __u16             last_seg_size; /* Size of last incoming segment          */
-               __u16             rcv_mss;       /* MSS used for delayed ACK decisions     */ 
+               __u16             rcv_mss;       /* MSS used for delayed ACK decisions     */
        } icsk_ack;
        struct {
                int               enabled;
@@ -201,7 +201,7 @@ extern const char inet_csk_timer_bug_msg[];
 static inline void inet_csk_clear_xmit_timer(struct sock *sk, const int what)
 {
        struct inet_connection_sock *icsk = inet_csk(sk);
-       
+
        if (what == ICSK_TIME_RETRANS || what == ICSK_TIME_PROBE0) {
                icsk->icsk_pending = 0;
 #ifdef INET_CSK_CLEAR_TIMERS
index 746abff..fe63ba9 100644 (file)
@@ -186,15 +186,15 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len);
 void ip4_datagram_release_cb(struct sock *sk);
 
 struct ip_reply_arg {
-       struct kvec iov[1];   
+       struct kvec iov[1];
        int         flags;
        __wsum      csum;
        int         csumoffset; /* u16 offset of csum in iov[0].iov_base */
-                               /* -1 if not needed */ 
+                               /* -1 if not needed */
        int         bound_dev_if;
        u8          tos;
        kuid_t      uid;
-}; 
+};
 
 #define IP_REPLY_ARG_NOSRCCHECK 1
 
@@ -577,13 +577,13 @@ int ip_frag_mem(struct net *net);
 /*
  *     Functions provided by ip_forward.c
  */
+
 int ip_forward(struct sk_buff *skb);
+
 /*
  *     Functions provided by ip_options.c
  */
+
 void ip_options_build(struct sk_buff *skb, struct ip_options *opt,
                      __be32 daddr, struct rtable *rt, int is_frag);
 
index 34ec321..8d906a3 100644 (file)
@@ -415,6 +415,24 @@ void fib6_rules_cleanup(void);
 bool fib6_rule_default(const struct fib_rule *rule);
 int fib6_rules_dump(struct net *net, struct notifier_block *nb);
 unsigned int fib6_rules_seq_read(struct net *net);
+
+static inline bool fib6_rules_early_flow_dissect(struct net *net,
+                                                struct sk_buff *skb,
+                                                struct flowi6 *fl6,
+                                                struct flow_keys *flkeys)
+{
+       unsigned int flag = FLOW_DISSECTOR_F_STOP_AT_ENCAP;
+
+       if (!net->ipv6.fib6_rules_require_fldissect)
+               return false;
+
+       skb_flow_dissect_flow_keys(skb, flkeys, flag);
+       fl6->fl6_sport = flkeys->ports.src;
+       fl6->fl6_dport = flkeys->ports.dst;
+       fl6->flowi6_proto = flkeys->basic.ip_proto;
+
+       return true;
+}
 #else
 static inline int               fib6_rules_init(void)
 {
@@ -436,5 +454,12 @@ static inline unsigned int fib6_rules_seq_read(struct net *net)
 {
        return 0;
 }
+static inline bool fib6_rules_early_flow_dissect(struct net *net,
+                                                struct sk_buff *skb,
+                                                struct flowi6 *fl6,
+                                                struct flow_keys *flkeys)
+{
+       return false;
+}
 #endif
 #endif
index 27d23a6..da2bde5 100644 (file)
@@ -127,7 +127,8 @@ static inline int ip6_route_get_saddr(struct net *net, struct rt6_info *rt,
 
 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
                            const struct in6_addr *saddr, int oif, int flags);
-u32 rt6_multipath_hash(const struct flowi6 *fl6, const struct sk_buff *skb);
+u32 rt6_multipath_hash(const struct flowi6 *fl6, const struct sk_buff *skb,
+                      struct flow_keys *hkeys);
 
 struct dst_entry *icmp6_dst_alloc(struct net_device *dev, struct flowi6 *fl6);
 
@@ -266,4 +267,5 @@ static inline bool rt6_duplicate_nexthop(struct rt6_info *a, struct rt6_info *b)
               ipv6_addr_equal(&a->rt6i_gateway, &b->rt6i_gateway) &&
               !lwtunnel_cmp_encap(a->dst.lwtstate, b->dst.lwtstate);
 }
+
 #endif
index f805243..8812582 100644 (file)
@@ -157,7 +157,7 @@ struct fib_result_nl {
        unsigned char   nh_sel;
        unsigned char   type;
        unsigned char   scope;
-       int             err;      
+       int             err;
 };
 
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
@@ -293,6 +293,13 @@ static inline unsigned int fib4_rules_seq_read(struct net *net)
        return 0;
 }
 
+static inline bool fib4_rules_early_flow_dissect(struct net *net,
+                                                struct sk_buff *skb,
+                                                struct flowi4 *fl4,
+                                                struct flow_keys *flkeys)
+{
+       return false;
+}
 #else /* CONFIG_IP_MULTIPLE_TABLES */
 int __net_init fib4_rules_init(struct net *net);
 void __net_exit fib4_rules_exit(struct net *net);
@@ -341,6 +348,24 @@ bool fib4_rule_default(const struct fib_rule *rule);
 int fib4_rules_dump(struct net *net, struct notifier_block *nb);
 unsigned int fib4_rules_seq_read(struct net *net);
 
+static inline bool fib4_rules_early_flow_dissect(struct net *net,
+                                                struct sk_buff *skb,
+                                                struct flowi4 *fl4,
+                                                struct flow_keys *flkeys)
+{
+       unsigned int flag = FLOW_DISSECTOR_F_STOP_AT_ENCAP;
+
+       if (!net->ipv4.fib_rules_require_fldissect)
+               return false;
+
+       skb_flow_dissect_flow_keys(skb, flkeys, flag);
+       fl4->fl4_sport = flkeys->ports.src;
+       fl4->fl4_dport = flkeys->ports.dst;
+       fl4->flowi4_proto = flkeys->basic.ip_proto;
+
+       return true;
+}
+
 #endif /* CONFIG_IP_MULTIPLE_TABLES */
 
 /* Exported by fib_frontend.c */
@@ -371,7 +396,7 @@ int fib_sync_up(struct net_device *dev, unsigned int nh_flags);
 
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
 int fib_multipath_hash(const struct fib_info *fi, const struct flowi4 *fl4,
-                      const struct sk_buff *skb);
+                      const struct sk_buff *skb, struct flow_keys *flkeys);
 #endif
 void fib_select_multipath(struct fib_result *res, int hash);
 void fib_select_path(struct net *net, struct fib_result *res,
index 1f16773..cbe5add 100644 (file)
@@ -254,6 +254,22 @@ static inline __be32 tunnel_id_to_key32(__be64 tun_id)
 
 #ifdef CONFIG_INET
 
+static inline void ip_tunnel_init_flow(struct flowi4 *fl4,
+                                      int proto,
+                                      __be32 daddr, __be32 saddr,
+                                      __be32 key, __u8 tos, int oif,
+                                      __u32 mark)
+{
+       memset(fl4, 0, sizeof(*fl4));
+       fl4->flowi4_oif = oif;
+       fl4->daddr = daddr;
+       fl4->saddr = saddr;
+       fl4->flowi4_tos = tos;
+       fl4->flowi4_proto = proto;
+       fl4->fl4_gre_key = key;
+       fl4->flowi4_mark = mark;
+}
+
 int ip_tunnel_init(struct net_device *dev);
 void ip_tunnel_uninit(struct net_device *dev);
 void  ip_tunnel_dellink(struct net_device *dev, struct list_head *head);
index 7a98cd5..cabd3cd 100644 (file)
 
 #define IPV6_ADDR_ANY          0x0000U
 
-#define IPV6_ADDR_UNICAST              0x0001U 
-#define IPV6_ADDR_MULTICAST            0x0002U 
+#define IPV6_ADDR_UNICAST      0x0001U
+#define IPV6_ADDR_MULTICAST    0x0002U
 
 #define IPV6_ADDR_LOOPBACK     0x0010U
 #define IPV6_ADDR_LINKLOCAL    0x0020U
@@ -447,7 +447,7 @@ ipv6_masked_addr_cmp(const struct in6_addr *a1, const struct in6_addr *m,
 #endif
 }
 
-static inline void ipv6_addr_prefix(struct in6_addr *pfx, 
+static inline void ipv6_addr_prefix(struct in6_addr *pfx,
                                    const struct in6_addr *addr,
                                    int plen)
 {
@@ -496,7 +496,7 @@ static inline void __ipv6_addr_set_half(__be32 *addr,
        addr[1] = wl;
 }
 
-static inline void ipv6_addr_set(struct in6_addr *addr, 
+static inline void ipv6_addr_set(struct in6_addr *addr,
                                     __be32 w1, __be32 w2,
                                     __be32 w3, __be32 w4)
 {
@@ -732,7 +732,7 @@ static inline int __ipv6_addr_diff32(const void *token1, const void *token2, int
        }
 
        /*
-        *      we should *never* get to this point since that 
+        *      we should *never* get to this point since that
         *      would mean the addrs are equal
         *
         *      However, we do get to it 8) And exacly, when
index 44668c2..3a970e4 100644 (file)
@@ -52,6 +52,7 @@ struct netns_ipv4 {
 #ifdef CONFIG_IP_MULTIPLE_TABLES
        struct fib_rules_ops    *rules_ops;
        bool                    fib_has_custom_rules;
+       unsigned int            fib_rules_require_fldissect;
        struct fib_table __rcu  *fib_main;
        struct fib_table __rcu  *fib_default;
 #endif
index 987cc45..e286fda 100644 (file)
@@ -71,7 +71,8 @@ struct netns_ipv6 {
        unsigned int             ip6_rt_gc_expire;
        unsigned long            ip6_rt_last_gc;
 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
-       bool                     fib6_has_custom_rules;
+       unsigned int            fib6_rules_require_fldissect;
+       bool                    fib6_has_custom_rules;
        struct rt6_info         *ip6_prohibit_entry;
        struct rt6_info         *ip6_blk_hole_entry;
        struct fib6_table       *fib6_local_tbl;
@@ -84,7 +85,7 @@ struct netns_ipv6 {
        struct sock             *mc_autojoin_sk;
 #ifdef CONFIG_IPV6_MROUTE
 #ifndef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
-       struct mr6_table        *mrt6;
+       struct mr_table         *mrt6;
 #else
        struct list_head        mr6_tables;
        struct fib_rules_ops    *mr6_rules_ops;
index 8740625..e828d31 100644 (file)
@@ -806,6 +806,7 @@ enum tc_prio_command {
        TC_PRIO_REPLACE,
        TC_PRIO_DESTROY,
        TC_PRIO_STATS,
+       TC_PRIO_GRAFT,
 };
 
 struct tc_prio_qopt_offload_params {
@@ -818,6 +819,11 @@ struct tc_prio_qopt_offload_params {
        struct gnet_stats_queue *qstats;
 };
 
+struct tc_prio_qopt_offload_graft_params {
+       u8 band;
+       u32 child_handle;
+};
+
 struct tc_prio_qopt_offload {
        enum tc_prio_command command;
        u32 handle;
@@ -825,6 +831,8 @@ struct tc_prio_qopt_offload {
        union {
                struct tc_prio_qopt_offload_params replace_params;
                struct tc_qopt_offload_stats stats;
+               struct tc_prio_qopt_offload_graft_params graft_params;
        };
 };
+
 #endif
index e2ab136..d4907b5 100644 (file)
@@ -540,7 +540,7 @@ static inline bool skb_skip_tc_classify(struct sk_buff *skb)
        return false;
 }
 
-/* Reset all TX qdiscs greater then index of a device.  */
+/* Reset all TX qdiscs greater than index of a device.  */
 static inline void qdisc_reset_all_tx_gt(struct net_device *dev, unsigned int i)
 {
        struct Qdisc *qdisc;
index 92b06c6..9c9b376 100644 (file)
@@ -511,8 +511,6 @@ __u32 cookie_v6_init_sequence(const struct sk_buff *skb, __u16 *mss);
 #endif
 /* tcp_output.c */
 
-u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now,
-                    int min_tso_segs);
 void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss,
                               int nonagle);
 int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs);
@@ -981,8 +979,8 @@ struct tcp_congestion_ops {
        u32  (*undo_cwnd)(struct sock *sk);
        /* hook for packet ack accounting (optional) */
        void (*pkts_acked)(struct sock *sk, const struct ack_sample *sample);
-       /* suggest number of segments for each skb to transmit (optional) */
-       u32 (*tso_segs_goal)(struct sock *sk);
+       /* override sysctl_tcp_min_tso_segs */
+       u32 (*min_tso_segs)(struct sock *sk);
        /* returns the multiplier used in tcp_sndbuf_expand (optional) */
        u32 (*sndbuf_expand)(struct sock *sk);
        /* call when packets are delivered to update cwnd and pacing rate,
index 7d20776..aa027ba 100644 (file)
@@ -1267,12 +1267,12 @@ static inline void xfrm_sk_free_policy(struct sock *sk)
 
 static inline void xfrm_sk_free_policy(struct sock *sk) {}
 static inline int xfrm_sk_clone_policy(struct sock *sk, const struct sock *osk) { return 0; }
-static inline int xfrm6_route_forward(struct sk_buff *skb) { return 1; }  
-static inline int xfrm4_route_forward(struct sk_buff *skb) { return 1; } 
+static inline int xfrm6_route_forward(struct sk_buff *skb) { return 1; }
+static inline int xfrm4_route_forward(struct sk_buff *skb) { return 1; }
 static inline int xfrm6_policy_check(struct sock *sk, int dir, struct sk_buff *skb)
-{ 
-       return 1; 
-} 
+{
+       return 1;
+}
 static inline int xfrm4_policy_check(struct sock *sk, int dir, struct sk_buff *skb)
 {
        return 1;
@@ -1356,7 +1356,7 @@ __xfrm6_state_addr_check(const struct xfrm_state *x,
 {
        if (ipv6_addr_equal((struct in6_addr *)daddr, (struct in6_addr *)&x->id.daddr) &&
            (ipv6_addr_equal((struct in6_addr *)saddr, (struct in6_addr *)&x->props.saddr) ||
-            ipv6_addr_any((struct in6_addr *)saddr) || 
+            ipv6_addr_any((struct in6_addr *)saddr) ||
             ipv6_addr_any((struct in6_addr *)&x->props.saddr)))
                return 1;
        return 0;
@@ -1666,7 +1666,7 @@ int xfrm_user_policy(struct sock *sk, int optname,
 static inline int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen)
 {
        return -ENOPROTOOPT;
-} 
+}
 
 static inline int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
 {
index 28812ed..dc64cfa 100644 (file)
@@ -20,13 +20,11 @@ struct sock_extended_err {
 #define SO_EE_ORIGIN_ICMP6     3
 #define SO_EE_ORIGIN_TXSTATUS  4
 #define SO_EE_ORIGIN_ZEROCOPY  5
-#define SO_EE_ORIGIN_ZCOOKIE   6
 #define SO_EE_ORIGIN_TIMESTAMPING SO_EE_ORIGIN_TXSTATUS
 
 #define SO_EE_OFFENDER(ee)     ((struct sockaddr*)((ee)+1))
 
 #define SO_EE_CODE_ZEROCOPY_COPIED     1
-#define        SO_EE_ORIGIN_MAX_ZCOOKIES       8
 
 /**
  *     struct scm_timestamping - timestamps exposed through cmsg
index 77d90ae..232df14 100644 (file)
@@ -35,6 +35,11 @@ struct fib_rule_uid_range {
        __u32           end;
 };
 
+struct fib_rule_port_range {
+       __u16           start;
+       __u16           end;
+};
+
 enum {
        FRA_UNSPEC,
        FRA_DST,        /* destination address */
@@ -59,6 +64,9 @@ enum {
        FRA_L3MDEV,     /* iif or oif is l3mdev goto its table */
        FRA_UID_RANGE,  /* UID range */
        FRA_PROTOCOL,   /* Originator of the rule */
+       FRA_IP_PROTO,   /* ip proto */
+       FRA_SPORT_RANGE, /* sport */
+       FRA_DPORT_RANGE, /* dport */
        __FRA_MAX
 };
 
index 12e3bca..a66b213 100644 (file)
 #define RDS_CMSG_MASKED_ATOMIC_CSWP    9
 #define RDS_CMSG_RXPATH_LATENCY                11
 #define        RDS_CMSG_ZCOPY_COOKIE           12
+#define        RDS_CMSG_ZCOPY_COMPLETION       13
 
 #define RDS_INFO_FIRST                 10000
 #define RDS_INFO_COUNTERS              10000
@@ -317,6 +318,12 @@ struct rds_rdma_notify {
 #define RDS_RDMA_DROPPED       3
 #define RDS_RDMA_OTHER_ERROR   4
 
+#define        RDS_MAX_ZCOOKIES        8
+struct rds_zcopy_cookies {
+       __u32 num;
+       __u32 cookies[RDS_MAX_ZCOOKIES];
+};
+
 /*
  * Common set of flags for all RDMA related structs
  */
index 5fb69a8..3c74b16 100644 (file)
@@ -508,10 +508,6 @@ err:
 static const int caller_saved[CALLER_SAVED_REGS] = {
        BPF_REG_0, BPF_REG_1, BPF_REG_2, BPF_REG_3, BPF_REG_4, BPF_REG_5
 };
-#define CALLEE_SAVED_REGS 5
-static const int callee_saved[CALLEE_SAVED_REGS] = {
-       BPF_REG_6, BPF_REG_7, BPF_REG_8, BPF_REG_9
-};
 
 static void __mark_reg_not_init(struct bpf_reg_state *reg);
 
index bad01b1..bd0ed39 100644 (file)
@@ -729,6 +729,7 @@ static struct pernet_operations vlan_net_ops = {
        .exit = vlan_exit_net,
        .id   = &vlan_net_id,
        .size = sizeof(struct vlan_net),
+       .async = true,
 };
 
 static int __init vlan_proto_init(void)
index 6bf06e7..7770481 100644 (file)
@@ -188,6 +188,7 @@ static void __net_exit br_net_exit(struct net *net)
 
 static struct pernet_operations br_net_ops = {
        .exit   = br_net_exit,
+       .async  = true,
 };
 
 static const struct stp_proto br_stp_proto = {
index 27f1d4f..484f541 100644 (file)
@@ -967,6 +967,7 @@ static struct pernet_operations brnf_net_ops __read_mostly = {
        .exit = brnf_exit_net,
        .id   = &brnf_net_id,
        .size = sizeof(struct brnf_net),
+       .async = true,
 };
 
 static struct notifier_block brnf_notifier __read_mostly = {
index ac5e5e3..26730d3 100644 (file)
@@ -1717,6 +1717,7 @@ static void canbcm_pernet_exit(struct net *net)
 static struct pernet_operations canbcm_pernet_ops __read_mostly = {
        .init = canbcm_pernet_init,
        .exit = canbcm_pernet_exit,
+       .async = true,
 };
 
 static int __init bcm_module_init(void)
index 5bdcc5a..40fb3ae 100644 (file)
@@ -2378,7 +2378,7 @@ EXPORT_SYMBOL(netdev_set_num_tc);
 
 /*
  * Routine to help set real_num_tx_queues. To avoid skbs mapped to queues
- * greater then real_num_tx_queues stale skbs on the qdisc must be flushed.
+ * greater than real_num_tx_queues stale skbs on the qdisc must be flushed.
  */
 int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq)
 {
index a6aea80..f6f04fc 100644 (file)
@@ -33,6 +33,10 @@ bool fib_rule_matchall(const struct fib_rule *rule)
        if (!uid_eq(rule->uid_range.start, fib_kuid_range_unset.start) ||
            !uid_eq(rule->uid_range.end, fib_kuid_range_unset.end))
                return false;
+       if (fib_rule_port_range_set(&rule->sport_range))
+               return false;
+       if (fib_rule_port_range_set(&rule->dport_range))
+               return false;
        return true;
 }
 EXPORT_SYMBOL_GPL(fib_rule_matchall);
@@ -221,6 +225,26 @@ static int nla_put_uid_range(struct sk_buff *skb, struct fib_kuid_range *range)
        return nla_put(skb, FRA_UID_RANGE, sizeof(out), &out);
 }
 
+static int nla_get_port_range(struct nlattr *pattr,
+                             struct fib_rule_port_range *port_range)
+{
+       const struct fib_rule_port_range *pr = nla_data(pattr);
+
+       if (!fib_rule_port_range_valid(pr))
+               return -EINVAL;
+
+       port_range->start = pr->start;
+       port_range->end = pr->end;
+
+       return 0;
+}
+
+static int nla_put_port_range(struct sk_buff *skb, int attrtype,
+                             struct fib_rule_port_range *range)
+{
+       return nla_put(skb, attrtype, sizeof(*range), range);
+}
+
 static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops,
                          struct flowi *fl, int flags,
                          struct fib_lookup_arg *arg)
@@ -425,6 +449,17 @@ static int rule_exists(struct fib_rules_ops *ops, struct fib_rule_hdr *frh,
                    !uid_eq(r->uid_range.end, rule->uid_range.end))
                        continue;
 
+               if (r->ip_proto != rule->ip_proto)
+                       continue;
+
+               if (!fib_rule_port_range_compare(&r->sport_range,
+                                                &rule->sport_range))
+                       continue;
+
+               if (!fib_rule_port_range_compare(&r->dport_range,
+                                                &rule->dport_range))
+                       continue;
+
                if (!ops->compare(r, frh, tb))
                        continue;
                return 1;
@@ -569,6 +604,23 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh,
                rule->uid_range = fib_kuid_range_unset;
        }
 
+       if (tb[FRA_IP_PROTO])
+               rule->ip_proto = nla_get_u8(tb[FRA_IP_PROTO]);
+
+       if (tb[FRA_SPORT_RANGE]) {
+               err = nla_get_port_range(tb[FRA_SPORT_RANGE],
+                                        &rule->sport_range);
+               if (err)
+                       goto errout_free;
+       }
+
+       if (tb[FRA_DPORT_RANGE]) {
+               err = nla_get_port_range(tb[FRA_DPORT_RANGE],
+                                        &rule->dport_range);
+               if (err)
+                       goto errout_free;
+       }
+
        if ((nlh->nlmsg_flags & NLM_F_EXCL) &&
            rule_exists(ops, frh, tb, rule)) {
                err = -EEXIST;
@@ -634,6 +686,8 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh,
 {
        struct net *net = sock_net(skb->sk);
        struct fib_rule_hdr *frh = nlmsg_data(nlh);
+       struct fib_rule_port_range sprange = {0, 0};
+       struct fib_rule_port_range dprange = {0, 0};
        struct fib_rules_ops *ops = NULL;
        struct fib_rule *rule, *r;
        struct nlattr *tb[FRA_MAX+1];
@@ -667,6 +721,20 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh,
                range = fib_kuid_range_unset;
        }
 
+       if (tb[FRA_SPORT_RANGE]) {
+               err = nla_get_port_range(tb[FRA_SPORT_RANGE],
+                                        &sprange);
+               if (err)
+                       goto errout;
+       }
+
+       if (tb[FRA_DPORT_RANGE]) {
+               err = nla_get_port_range(tb[FRA_DPORT_RANGE],
+                                        &dprange);
+               if (err)
+                       goto errout;
+       }
+
        list_for_each_entry(rule, &ops->rules_list, list) {
                if (tb[FRA_PROTOCOL] &&
                    (rule->proto != nla_get_u8(tb[FRA_PROTOCOL])))
@@ -712,6 +780,18 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh,
                     !uid_eq(rule->uid_range.end, range.end)))
                        continue;
 
+               if (tb[FRA_IP_PROTO] &&
+                   (rule->ip_proto != nla_get_u8(tb[FRA_IP_PROTO])))
+                       continue;
+
+               if (fib_rule_port_range_set(&sprange) &&
+                   !fib_rule_port_range_compare(&rule->sport_range, &sprange))
+                       continue;
+
+               if (fib_rule_port_range_set(&dprange) &&
+                   !fib_rule_port_range_compare(&rule->dport_range, &dprange))
+                       continue;
+
                if (!ops->compare(rule, frh, tb))
                        continue;
 
@@ -790,7 +870,10 @@ static inline size_t fib_rule_nlmsg_size(struct fib_rules_ops *ops,
                         + nla_total_size(4) /* FRA_FWMASK */
                         + nla_total_size_64bit(8) /* FRA_TUN_ID */
                         + nla_total_size(sizeof(struct fib_kuid_range))
-                        + nla_total_size(1); /* FRA_PROTOCOL */
+                        + nla_total_size(1) /* FRA_PROTOCOL */
+                        + nla_total_size(1) /* FRA_IP_PROTO */
+                        + nla_total_size(sizeof(struct fib_rule_port_range)) /* FRA_SPORT_RANGE */
+                        + nla_total_size(sizeof(struct fib_rule_port_range)); /* FRA_DPORT_RANGE */
 
        if (ops->nlmsg_payload)
                payload += ops->nlmsg_payload(rule);
@@ -855,7 +938,12 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule,
            (rule->l3mdev &&
             nla_put_u8(skb, FRA_L3MDEV, rule->l3mdev)) ||
            (uid_range_set(&rule->uid_range) &&
-            nla_put_uid_range(skb, &rule->uid_range)))
+            nla_put_uid_range(skb, &rule->uid_range)) ||
+           (fib_rule_port_range_set(&rule->sport_range) &&
+            nla_put_port_range(skb, FRA_SPORT_RANGE, &rule->sport_range)) ||
+           (fib_rule_port_range_set(&rule->dport_range) &&
+            nla_put_port_range(skb, FRA_DPORT_RANGE, &rule->dport_range)) ||
+           (rule->ip_proto && nla_put_u8(skb, FRA_IP_PROTO, rule->ip_proto)))
                goto nla_put_failure;
 
        if (rule->suppress_ifgroup != -1) {
index 27a5523..690e78c 100644 (file)
@@ -362,7 +362,7 @@ static void dec_net_namespaces(struct ucounts *ucounts)
        dec_ucount(ucounts, UCOUNT_NET_NAMESPACES);
 }
 
-static struct kmem_cache *net_cachep;
+static struct kmem_cache *net_cachep __ro_after_init;
 static struct workqueue_struct *netns_wq;
 
 static struct net *net_alloc(void)
index 1a7485a..96d36b8 100644 (file)
@@ -77,8 +77,8 @@
 #include <linux/capability.h>
 #include <linux/user_namespace.h>
 
-struct kmem_cache *skbuff_head_cache __read_mostly;
-static struct kmem_cache *skbuff_fclone_cache __read_mostly;
+struct kmem_cache *skbuff_head_cache __ro_after_init;
+static struct kmem_cache *skbuff_fclone_cache __ro_after_init;
 int sysctl_max_skb_frags __read_mostly = MAX_SKB_FRAGS;
 EXPORT_SYMBOL(sysctl_max_skb_frags);
 
index 974765b..e4f3053 100644 (file)
@@ -104,6 +104,7 @@ static void lowpan_setup(struct net_device *ldev)
        /* We need an ipv6hdr as minimum len when calling xmit */
        ldev->hard_header_len   = sizeof(struct ipv6hdr);
        ldev->flags             = IFF_BROADCAST | IFF_MULTICAST;
+       ldev->priv_flags        |= IFF_NO_QUEUE;
 
        ldev->netdev_ops        = &lowpan_netdev_ops;
        ldev->header_ops        = &lowpan_header_ops;
index cb7176c..9104943 100644 (file)
@@ -345,6 +345,7 @@ static void __net_exit cfg802154_pernet_exit(struct net *net)
 
 static struct pernet_operations cfg802154_pernet_ops = {
        .exit = cfg802154_pernet_exit,
+       .async = true,
 };
 
 static int __init wpan_phy_class_init(void)
index f48fe6f..80dad30 100644 (file)
@@ -212,9 +212,14 @@ config NET_IPGRE_BROADCAST
          Network), but can be distributed all over the Internet. If you want
          to do that, say Y here and to "IP multicast routing" below.
 
+config IP_MROUTE_COMMON
+       bool
+       depends on IP_MROUTE || IPV6_MROUTE
+
 config IP_MROUTE
        bool "IP: multicast routing"
        depends on IP_MULTICAST
+       select IP_MROUTE_COMMON
        help
          This is used if you want your machine to act as a router for IP
          packets that have several destination addresses. It is needed on the
index 47a0a66..a07b7dd 100644 (file)
@@ -20,6 +20,7 @@ obj-$(CONFIG_SYSCTL) += sysctl_net_ipv4.o
 obj-$(CONFIG_PROC_FS) += proc.o
 obj-$(CONFIG_IP_MULTIPLE_TABLES) += fib_rules.o
 obj-$(CONFIG_IP_MROUTE) += ipmr.o
+obj-$(CONFIG_IP_MROUTE_COMMON) += ipmr_base.o
 obj-$(CONFIG_NET_IPIP) += ipip.o
 gre-y := gre_demux.o
 obj-$(CONFIG_NET_FOU) += fou.o
index 35d646a..737d11b 100644 (file)
@@ -182,6 +182,17 @@ static int fib4_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
        if (r->tos && (r->tos != fl4->flowi4_tos))
                return 0;
 
+       if (rule->ip_proto && (rule->ip_proto != fl4->flowi4_proto))
+               return 0;
+
+       if (fib_rule_port_range_set(&rule->sport_range) &&
+           !fib_rule_port_inrange(&rule->sport_range, fl4->fl4_sport))
+               return 0;
+
+       if (fib_rule_port_range_set(&rule->dport_range) &&
+           !fib_rule_port_inrange(&rule->dport_range, fl4->fl4_dport))
+               return 0;
+
        return 1;
 }
 
@@ -244,6 +255,9 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
        }
 #endif
 
+       if (fib_rule_requires_fldissect(rule))
+               net->ipv4.fib_rules_require_fldissect++;
+
        rule4->src_len = frh->src_len;
        rule4->srcmask = inet_make_mask(rule4->src_len);
        rule4->dst_len = frh->dst_len;
@@ -272,6 +286,10 @@ static int fib4_rule_delete(struct fib_rule *rule)
                net->ipv4.fib_num_tclassid_users--;
 #endif
        net->ipv4.fib_has_custom_rules = true;
+
+       if (net->ipv4.fib_rules_require_fldissect &&
+           fib_rule_requires_fldissect(rule))
+               net->ipv4.fib_rules_require_fldissect--;
 errout:
        return err;
 }
@@ -389,6 +407,7 @@ int __net_init fib4_rules_init(struct net *net)
                goto fail;
        net->ipv4.rules_ops = ops;
        net->ipv4.fib_has_custom_rules = false;
+       net->ipv4.fib_rules_require_fldissect = 0;
        return 0;
 
 fail:
index cd46d76..181b0d8 100644 (file)
@@ -171,7 +171,7 @@ static void free_nh_exceptions(struct fib_nh *nh)
                fnhe = rcu_dereference_protected(hash[i].chain, 1);
                while (fnhe) {
                        struct fib_nh_exception *next;
-                       
+
                        next = rcu_dereference_protected(fnhe->fnhe_next, 1);
 
                        rt_fibinfo_free(&fnhe->fnhe_rth_input);
@@ -1770,7 +1770,7 @@ void fib_select_path(struct net *net, struct fib_result *res,
 
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
        if (res->fi->fib_nhs > 1) {
-               int h = fib_multipath_hash(res->fi, fl4, skb);
+               int h = fib_multipath_hash(res->fi, fl4, skb, NULL);
 
                fib_select_multipath(res, h);
        }
index 5530cd6..62243a8 100644 (file)
@@ -50,6 +50,7 @@
 
 #define VERSION "0.409"
 
+#include <linux/cache.h>
 #include <linux/uaccess.h>
 #include <linux/bitops.h>
 #include <linux/types.h>
@@ -191,8 +192,8 @@ static size_t tnode_free_size;
  */
 static const int sync_pages = 128;
 
-static struct kmem_cache *fn_alias_kmem __read_mostly;
-static struct kmem_cache *trie_leaf_kmem __read_mostly;
+static struct kmem_cache *fn_alias_kmem __ro_after_init;
+static struct kmem_cache *trie_leaf_kmem __ro_after_init;
 
 static inline struct tnode *tn_info(struct key_vector *kv)
 {
index 914d569..1f04bd9 100644 (file)
@@ -6,6 +6,7 @@
  *  Authors:   Andrey V. Savochkin <saw@msu.ru>
  */
 
+#include <linux/cache.h>
 #include <linux/module.h>
 #include <linux/types.h>
 #include <linux/slab.h>
@@ -51,7 +52,7 @@
  *             daddr: unchangeable
  */
 
-static struct kmem_cache *peer_cachep __read_mostly;
+static struct kmem_cache *peer_cachep __ro_after_init;
 
 void inet_peer_base_init(struct inet_peer_base *bp)
 {
index 45d97e9..0fe1d69 100644 (file)
@@ -1044,6 +1044,7 @@ static struct pernet_operations ipgre_net_ops = {
        .exit_batch = ipgre_exit_batch_net,
        .id   = &ipgre_net_id,
        .size = sizeof(struct ip_tunnel_net),
+       .async = true,
 };
 
 static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[],
@@ -1322,6 +1323,12 @@ static void ipgre_tap_setup(struct net_device *dev)
        ip_tunnel_setup(dev, gre_tap_net_id);
 }
 
+bool is_gretap_dev(const struct net_device *dev)
+{
+       return dev->netdev_ops == &gre_tap_netdev_ops;
+}
+EXPORT_SYMBOL_GPL(is_gretap_dev);
+
 static int ipgre_newlink(struct net *src_net, struct net_device *dev,
                         struct nlattr *tb[], struct nlattr *data[],
                         struct netlink_ext_ack *extack)
@@ -1623,6 +1630,7 @@ static struct pernet_operations ipgre_tap_net_ops = {
        .exit_batch = ipgre_tap_exit_batch_net,
        .id   = &gre_tap_net_id,
        .size = sizeof(struct ip_tunnel_net),
+       .async = true,
 };
 
 static int __net_init erspan_init_net(struct net *net)
@@ -1641,6 +1649,7 @@ static struct pernet_operations erspan_net_ops = {
        .exit_batch = erspan_exit_batch_net,
        .id   = &erspan_net_id,
        .size = sizeof(struct ip_tunnel_net),
+       .async = true,
 };
 
 static int __init ipgre_init(void)
index d786a84..b2117d8 100644 (file)
@@ -290,22 +290,6 @@ failed:
        return ERR_PTR(err);
 }
 
-static inline void init_tunnel_flow(struct flowi4 *fl4,
-                                   int proto,
-                                   __be32 daddr, __be32 saddr,
-                                   __be32 key, __u8 tos, int oif,
-                                   __u32 mark)
-{
-       memset(fl4, 0, sizeof(*fl4));
-       fl4->flowi4_oif = oif;
-       fl4->daddr = daddr;
-       fl4->saddr = saddr;
-       fl4->flowi4_tos = tos;
-       fl4->flowi4_proto = proto;
-       fl4->fl4_gre_key = key;
-       fl4->flowi4_mark = mark;
-}
-
 static int ip_tunnel_bind_dev(struct net_device *dev)
 {
        struct net_device *tdev = NULL;
@@ -322,10 +306,10 @@ static int ip_tunnel_bind_dev(struct net_device *dev)
                struct flowi4 fl4;
                struct rtable *rt;
 
-               init_tunnel_flow(&fl4, iph->protocol, iph->daddr,
-                                iph->saddr, tunnel->parms.o_key,
-                                RT_TOS(iph->tos), tunnel->parms.link,
-                                tunnel->fwmark);
+               ip_tunnel_init_flow(&fl4, iph->protocol, iph->daddr,
+                                   iph->saddr, tunnel->parms.o_key,
+                                   RT_TOS(iph->tos), tunnel->parms.link,
+                                   tunnel->fwmark);
                rt = ip_route_output_key(tunnel->net, &fl4);
 
                if (!IS_ERR(rt)) {
@@ -581,8 +565,8 @@ void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, u8 proto)
                else if (skb->protocol == htons(ETH_P_IPV6))
                        tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
        }
-       init_tunnel_flow(&fl4, proto, key->u.ipv4.dst, key->u.ipv4.src, 0,
-                        RT_TOS(tos), tunnel->parms.link, tunnel->fwmark);
+       ip_tunnel_init_flow(&fl4, proto, key->u.ipv4.dst, key->u.ipv4.src, 0,
+                           RT_TOS(tos), tunnel->parms.link, tunnel->fwmark);
        if (tunnel->encap.type != TUNNEL_ENCAP_NONE)
                goto tx_error;
        rt = ip_route_output_key(tunnel->net, &fl4);
@@ -711,14 +695,14 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
        }
 
        if (tunnel->fwmark) {
-               init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr,
-                                tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link,
-                                tunnel->fwmark);
+               ip_tunnel_init_flow(&fl4, protocol, dst, tnl_params->saddr,
+                                   tunnel->parms.o_key, RT_TOS(tos),
+                                   tunnel->parms.link, tunnel->fwmark);
        }
        else {
-               init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr,
-                                tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link,
-                                skb->mark);
+               ip_tunnel_init_flow(&fl4, protocol, dst, tnl_params->saddr,
+                                   tunnel->parms.o_key, RT_TOS(tos),
+                                   tunnel->parms.link, skb->mark);
        }
 
        if (ip_tunnel_encap(skb, tunnel, &protocol, &fl4) < 0)
index 51b1669..b10bf56 100644 (file)
@@ -454,6 +454,7 @@ static struct pernet_operations vti_net_ops = {
        .exit_batch = vti_exit_batch_net,
        .id   = &vti_net_id,
        .size = sizeof(struct ip_tunnel_net),
+       .async = true,
 };
 
 static int vti_tunnel_validate(struct nlattr *tb[], struct nlattr *data[],
index c891235..9c5a4d1 100644 (file)
@@ -669,6 +669,7 @@ static struct pernet_operations ipip_net_ops = {
        .exit_batch = ipip_exit_batch_net,
        .id   = &ipip_net_id,
        .size = sizeof(struct ip_tunnel_net),
+       .async = true,
 };
 
 static int __init ipip_init(void)
index 7c7ac9d..d752a70 100644 (file)
@@ -28,9 +28,9 @@
 
 #include <linux/uaccess.h>
 #include <linux/types.h>
+#include <linux/cache.h>
 #include <linux/capability.h>
 #include <linux/errno.h>
-#include <linux/timer.h>
 #include <linux/mm.h>
 #include <linux/kernel.h>
 #include <linux/fcntl.h>
@@ -52,7 +52,6 @@
 #include <net/protocol.h>
 #include <linux/skbuff.h>
 #include <net/route.h>
-#include <net/sock.h>
 #include <net/icmp.h>
 #include <net/udp.h>
 #include <net/raw.h>
@@ -96,7 +95,7 @@ static DEFINE_SPINLOCK(mfc_unres_lock);
  * In this case data path is free of exclusive locks at all.
  */
 
-static struct kmem_cache *mrt_cachep __read_mostly;
+static struct kmem_cache *mrt_cachep __ro_after_init;
 
 static struct mr_table *ipmr_new_table(struct net *net, u32 id);
 static void ipmr_free_table(struct mr_table *mrt);
@@ -106,8 +105,6 @@ static void ip_mr_forward(struct net *net, struct mr_table *mrt,
                          struct mfc_cache *cache, int local);
 static int ipmr_cache_report(struct mr_table *mrt,
                             struct sk_buff *pkt, vifi_t vifi, int assert);
-static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
-                             struct mfc_cache *c, struct rtmsg *rtm);
 static void mroute_netlink_event(struct mr_table *mrt, struct mfc_cache *mfc,
                                 int cmd);
 static void igmpmsg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt);
@@ -118,6 +115,23 @@ static void ipmr_expire_process(struct timer_list *t);
 #define ipmr_for_each_table(mrt, net) \
        list_for_each_entry_rcu(mrt, &net->ipv4.mr_tables, list)
 
+static struct mr_table *ipmr_mr_table_iter(struct net *net,
+                                          struct mr_table *mrt)
+{
+       struct mr_table *ret;
+
+       if (!mrt)
+               ret = list_entry_rcu(net->ipv4.mr_tables.next,
+                                    struct mr_table, list);
+       else
+               ret = list_entry_rcu(mrt->list.next,
+                                    struct mr_table, list);
+
+       if (&ret->list == &net->ipv4.mr_tables)
+               return NULL;
+       return ret;
+}
+
 static struct mr_table *ipmr_get_table(struct net *net, u32 id)
 {
        struct mr_table *mrt;
@@ -285,6 +299,14 @@ EXPORT_SYMBOL(ipmr_rule_default);
 #define ipmr_for_each_table(mrt, net) \
        for (mrt = net->ipv4.mrt; mrt; mrt = NULL)
 
+static struct mr_table *ipmr_mr_table_iter(struct net *net,
+                                          struct mr_table *mrt)
+{
+       if (!mrt)
+               return net->ipv4.mrt;
+       return NULL;
+}
+
 static struct mr_table *ipmr_get_table(struct net *net, u32 id)
 {
        return net->ipv4.mrt;
@@ -344,7 +366,7 @@ static inline int ipmr_hash_cmp(struct rhashtable_compare_arg *arg,
 }
 
 static const struct rhashtable_params ipmr_rht_params = {
-       .head_offset = offsetof(struct mfc_cache, mnode),
+       .head_offset = offsetof(struct mr_mfc, mnode),
        .key_offset = offsetof(struct mfc_cache, cmparg),
        .key_len = sizeof(struct mfc_cache_cmp_arg),
        .nelem_hint = 3,
@@ -353,6 +375,24 @@ static const struct rhashtable_params ipmr_rht_params = {
        .automatic_shrinking = true,
 };
 
+static void ipmr_new_table_set(struct mr_table *mrt,
+                              struct net *net)
+{
+#ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
+       list_add_tail_rcu(&mrt->list, &net->ipv4.mr_tables);
+#endif
+}
+
+static struct mfc_cache_cmp_arg ipmr_mr_table_ops_cmparg_any = {
+       .mfc_mcastgrp = htonl(INADDR_ANY),
+       .mfc_origin = htonl(INADDR_ANY),
+};
+
+static struct mr_table_ops ipmr_mr_table_ops = {
+       .rht_params = &ipmr_rht_params,
+       .cmparg_any = &ipmr_mr_table_ops_cmparg_any,
+};
+
 static struct mr_table *ipmr_new_table(struct net *net, u32 id)
 {
        struct mr_table *mrt;
@@ -365,23 +405,8 @@ static struct mr_table *ipmr_new_table(struct net *net, u32 id)
        if (mrt)
                return mrt;
 
-       mrt = kzalloc(sizeof(*mrt), GFP_KERNEL);
-       if (!mrt)
-               return ERR_PTR(-ENOMEM);
-       write_pnet(&mrt->net, net);
-       mrt->id = id;
-
-       rhltable_init(&mrt->mfc_hash, &ipmr_rht_params);
-       INIT_LIST_HEAD(&mrt->mfc_cache_list);
-       INIT_LIST_HEAD(&mrt->mfc_unres_queue);
-
-       timer_setup(&mrt->ipmr_expire_timer, ipmr_expire_process, 0);
-
-       mrt->mroute_reg_vif_num = -1;
-#ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
-       list_add_tail_rcu(&mrt->list, &net->ipv4.mr_tables);
-#endif
-       return mrt;
+       return mr_table_alloc(net, id, &ipmr_mr_table_ops,
+                             ipmr_expire_process, ipmr_new_table_set);
 }
 
 static void ipmr_free_table(struct mr_table *mrt)
@@ -760,14 +785,14 @@ static int vif_delete(struct mr_table *mrt, int vifi, int notify,
 
 static void ipmr_cache_free_rcu(struct rcu_head *head)
 {
-       struct mfc_cache *c = container_of(head, struct mfc_cache, rcu);
+       struct mr_mfc *c = container_of(head, struct mr_mfc, rcu);
 
-       kmem_cache_free(mrt_cachep, c);
+       kmem_cache_free(mrt_cachep, (struct mfc_cache *)c);
 }
 
 void ipmr_cache_free(struct mfc_cache *c)
 {
-       call_rcu(&c->rcu, ipmr_cache_free_rcu);
+       call_rcu(&c->_c.rcu, ipmr_cache_free_rcu);
 }
 EXPORT_SYMBOL(ipmr_cache_free);
 
@@ -782,7 +807,7 @@ static void ipmr_destroy_unres(struct mr_table *mrt, struct mfc_cache *c)
 
        atomic_dec(&mrt->cache_resolve_queue_len);
 
-       while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved))) {
+       while ((skb = skb_dequeue(&c->_c.mfc_un.unres.unresolved))) {
                if (ip_hdr(skb)->version == 0) {
                        struct nlmsghdr *nlh = skb_pull(skb,
                                                        sizeof(struct iphdr));
@@ -806,9 +831,9 @@ static void ipmr_destroy_unres(struct mr_table *mrt, struct mfc_cache *c)
 static void ipmr_expire_process(struct timer_list *t)
 {
        struct mr_table *mrt = from_timer(mrt, t, ipmr_expire_timer);
-       unsigned long now;
+       struct mr_mfc *c, *next;
        unsigned long expires;
-       struct mfc_cache *c, *next;
+       unsigned long now;
 
        if (!spin_trylock(&mfc_unres_lock)) {
                mod_timer(&mrt->ipmr_expire_timer, jiffies+HZ/10);
@@ -830,8 +855,8 @@ static void ipmr_expire_process(struct timer_list *t)
                }
 
                list_del(&c->list);
-               mroute_netlink_event(mrt, c, RTM_DELROUTE);
-               ipmr_destroy_unres(mrt, c);
+               mroute_netlink_event(mrt, (struct mfc_cache *)c, RTM_DELROUTE);
+               ipmr_destroy_unres(mrt, (struct mfc_cache *)c);
        }
 
        if (!list_empty(&mrt->mfc_unres_queue))
@@ -842,7 +867,7 @@ out:
 }
 
 /* Fill oifs list. It is called under write locked mrt_lock. */
-static void ipmr_update_thresholds(struct mr_table *mrt, struct mfc_cache *cache,
+static void ipmr_update_thresholds(struct mr_table *mrt, struct mr_mfc *cache,
                                   unsigned char *ttls)
 {
        int vifi;
@@ -944,6 +969,10 @@ static int vif_add(struct net *net, struct mr_table *mrt,
        ip_rt_multicast_event(in_dev);
 
        /* Fill in the VIF structures */
+       vif_device_init(v, dev, vifc->vifc_rate_limit,
+                       vifc->vifc_threshold,
+                       vifc->vifc_flags | (!mrtsock ? VIFF_STATIC : 0),
+                       (VIFF_TUNNEL | VIFF_REGISTER));
 
        attr.orig_dev = dev;
        if (!switchdev_port_attr_get(dev, &attr)) {
@@ -952,20 +981,9 @@ static int vif_add(struct net *net, struct mr_table *mrt,
        } else {
                v->dev_parent_id.id_len = 0;
        }
-       v->rate_limit = vifc->vifc_rate_limit;
+
        v->local = vifc->vifc_lcl_addr.s_addr;
        v->remote = vifc->vifc_rmt_addr.s_addr;
-       v->flags = vifc->vifc_flags;
-       if (!mrtsock)
-               v->flags |= VIFF_STATIC;
-       v->threshold = vifc->vifc_threshold;
-       v->bytes_in = 0;
-       v->bytes_out = 0;
-       v->pkt_in = 0;
-       v->pkt_out = 0;
-       v->link = dev->ifindex;
-       if (v->flags & (VIFF_TUNNEL | VIFF_REGISTER))
-               v->link = dev_get_iflink(dev);
 
        /* And finish update writing critical data */
        write_lock_bh(&mrt_lock);
@@ -988,33 +1006,8 @@ static struct mfc_cache *ipmr_cache_find(struct mr_table *mrt,
                        .mfc_mcastgrp = mcastgrp,
                        .mfc_origin = origin
        };
-       struct rhlist_head *tmp, *list;
-       struct mfc_cache *c;
-
-       list = rhltable_lookup(&mrt->mfc_hash, &arg, ipmr_rht_params);
-       rhl_for_each_entry_rcu(c, tmp, list, mnode)
-               return c;
-
-       return NULL;
-}
-
-/* Look for a (*,*,oif) entry */
-static struct mfc_cache *ipmr_cache_find_any_parent(struct mr_table *mrt,
-                                                   int vifi)
-{
-       struct mfc_cache_cmp_arg arg = {
-                       .mfc_mcastgrp = htonl(INADDR_ANY),
-                       .mfc_origin = htonl(INADDR_ANY)
-       };
-       struct rhlist_head *tmp, *list;
-       struct mfc_cache *c;
 
-       list = rhltable_lookup(&mrt->mfc_hash, &arg, ipmr_rht_params);
-       rhl_for_each_entry_rcu(c, tmp, list, mnode)
-               if (c->mfc_un.res.ttls[vifi] < 255)
-                       return c;
-
-       return NULL;
+       return mr_mfc_find(mrt, &arg);
 }
 
 /* Look for a (*,G) entry */
@@ -1025,25 +1018,10 @@ static struct mfc_cache *ipmr_cache_find_any(struct mr_table *mrt,
                        .mfc_mcastgrp = mcastgrp,
                        .mfc_origin = htonl(INADDR_ANY)
        };
-       struct rhlist_head *tmp, *list;
-       struct mfc_cache *c, *proxy;
 
        if (mcastgrp == htonl(INADDR_ANY))
-               goto skip;
-
-       list = rhltable_lookup(&mrt->mfc_hash, &arg, ipmr_rht_params);
-       rhl_for_each_entry_rcu(c, tmp, list, mnode) {
-               if (c->mfc_un.res.ttls[vifi] < 255)
-                       return c;
-
-               /* It's ok if the vifi is part of the static tree */
-               proxy = ipmr_cache_find_any_parent(mrt, c->mfc_parent);
-               if (proxy && proxy->mfc_un.res.ttls[vifi] < 255)
-                       return c;
-       }
-
-skip:
-       return ipmr_cache_find_any_parent(mrt, vifi);
+               return mr_mfc_find_any_parent(mrt, vifi);
+       return mr_mfc_find_any(mrt, vifi, &arg);
 }
 
 /* Look for a (S,G,iif) entry if parent != -1 */
@@ -1055,15 +1033,8 @@ static struct mfc_cache *ipmr_cache_find_parent(struct mr_table *mrt,
                        .mfc_mcastgrp = mcastgrp,
                        .mfc_origin = origin,
        };
-       struct rhlist_head *tmp, *list;
-       struct mfc_cache *c;
-
-       list = rhltable_lookup(&mrt->mfc_hash, &arg, ipmr_rht_params);
-       rhl_for_each_entry_rcu(c, tmp, list, mnode)
-               if (parent == -1 || parent == c->mfc_parent)
-                       return c;
 
-       return NULL;
+       return mr_mfc_find_parent(mrt, &arg, parent);
 }
 
 /* Allocate a multicast cache entry */
@@ -1072,9 +1043,9 @@ static struct mfc_cache *ipmr_cache_alloc(void)
        struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
 
        if (c) {
-               c->mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1;
-               c->mfc_un.res.minvif = MAXVIFS;
-               refcount_set(&c->mfc_un.res.refcount, 1);
+               c->_c.mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1;
+               c->_c.mfc_un.res.minvif = MAXVIFS;
+               refcount_set(&c->_c.mfc_un.res.refcount, 1);
        }
        return c;
 }
@@ -1084,8 +1055,8 @@ static struct mfc_cache *ipmr_cache_alloc_unres(void)
        struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
 
        if (c) {
-               skb_queue_head_init(&c->mfc_un.unres.unresolved);
-               c->mfc_un.unres.expires = jiffies + 10*HZ;
+               skb_queue_head_init(&c->_c.mfc_un.unres.unresolved);
+               c->_c.mfc_un.unres.expires = jiffies + 10 * HZ;
        }
        return c;
 }
@@ -1098,12 +1069,13 @@ static void ipmr_cache_resolve(struct net *net, struct mr_table *mrt,
        struct nlmsgerr *e;
 
        /* Play the pending entries through our router */
-       while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
+       while ((skb = __skb_dequeue(&uc->_c.mfc_un.unres.unresolved))) {
                if (ip_hdr(skb)->version == 0) {
                        struct nlmsghdr *nlh = skb_pull(skb,
                                                        sizeof(struct iphdr));
 
-                       if (__ipmr_fill_mroute(mrt, skb, c, nlmsg_data(nlh)) > 0) {
+                       if (mr_fill_mroute(mrt, skb, &c->_c,
+                                          nlmsg_data(nlh)) > 0) {
                                nlh->nlmsg_len = skb_tail_pointer(skb) -
                                                 (u8 *)nlh;
                        } else {
@@ -1211,7 +1183,7 @@ static int ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi,
        int err;
 
        spin_lock_bh(&mfc_unres_lock);
-       list_for_each_entry(c, &mrt->mfc_unres_queue, list) {
+       list_for_each_entry(c, &mrt->mfc_unres_queue, _c.list) {
                if (c->mfc_mcastgrp == iph->daddr &&
                    c->mfc_origin == iph->saddr) {
                        found = true;
@@ -1230,12 +1202,13 @@ static int ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi,
                }
 
                /* Fill in the new cache entry */
-               c->mfc_parent   = -1;
+               c->_c.mfc_parent = -1;
                c->mfc_origin   = iph->saddr;
                c->mfc_mcastgrp = iph->daddr;
 
                /* Reflect first query at mrouted. */
                err = ipmr_cache_report(mrt, skb, vifi, IGMPMSG_NOCACHE);
+
                if (err < 0) {
                        /* If the report failed throw the cache entry
                           out - Brad Parker
@@ -1248,15 +1221,16 @@ static int ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi,
                }
 
                atomic_inc(&mrt->cache_resolve_queue_len);
-               list_add(&c->list, &mrt->mfc_unres_queue);
+               list_add(&c->_c.list, &mrt->mfc_unres_queue);
                mroute_netlink_event(mrt, c, RTM_NEWROUTE);
 
                if (atomic_read(&mrt->cache_resolve_queue_len) == 1)
-                       mod_timer(&mrt->ipmr_expire_timer, c->mfc_un.unres.expires);
+                       mod_timer(&mrt->ipmr_expire_timer,
+                                 c->_c.mfc_un.unres.expires);
        }
 
        /* See if we can append the packet */
-       if (c->mfc_un.unres.unresolved.qlen > 3) {
+       if (c->_c.mfc_un.unres.unresolved.qlen > 3) {
                kfree_skb(skb);
                err = -ENOBUFS;
        } else {
@@ -1264,7 +1238,7 @@ static int ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi,
                        skb->dev = dev;
                        skb->skb_iif = dev->ifindex;
                }
-               skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
+               skb_queue_tail(&c->_c.mfc_un.unres.unresolved, skb);
                err = 0;
        }
 
@@ -1286,8 +1260,8 @@ static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc, int parent)
        rcu_read_unlock();
        if (!c)
                return -ENOENT;
-       rhltable_remove(&mrt->mfc_hash, &c->mnode, ipmr_rht_params);
-       list_del_rcu(&c->list);
+       rhltable_remove(&mrt->mfc_hash, &c->_c.mnode, ipmr_rht_params);
+       list_del_rcu(&c->_c.list);
        call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, c, mrt->id);
        mroute_netlink_event(mrt, c, RTM_DELROUTE);
        ipmr_cache_put(c);
@@ -1299,6 +1273,7 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
                        struct mfcctl *mfc, int mrtsock, int parent)
 {
        struct mfc_cache *uc, *c;
+       struct mr_mfc *_uc;
        bool found;
        int ret;
 
@@ -1312,10 +1287,10 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
        rcu_read_unlock();
        if (c) {
                write_lock_bh(&mrt_lock);
-               c->mfc_parent = mfc->mfcc_parent;
-               ipmr_update_thresholds(mrt, c, mfc->mfcc_ttls);
+               c->_c.mfc_parent = mfc->mfcc_parent;
+               ipmr_update_thresholds(mrt, &c->_c, mfc->mfcc_ttls);
                if (!mrtsock)
-                       c->mfc_flags |= MFC_STATIC;
+                       c->_c.mfc_flags |= MFC_STATIC;
                write_unlock_bh(&mrt_lock);
                call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE, c,
                                              mrt->id);
@@ -1333,28 +1308,29 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
 
        c->mfc_origin = mfc->mfcc_origin.s_addr;
        c->mfc_mcastgrp = mfc->mfcc_mcastgrp.s_addr;
-       c->mfc_parent = mfc->mfcc_parent;
-       ipmr_update_thresholds(mrt, c, mfc->mfcc_ttls);
+       c->_c.mfc_parent = mfc->mfcc_parent;
+       ipmr_update_thresholds(mrt, &c->_c, mfc->mfcc_ttls);
        if (!mrtsock)
-               c->mfc_flags |= MFC_STATIC;
+               c->_c.mfc_flags |= MFC_STATIC;
 
-       ret = rhltable_insert_key(&mrt->mfc_hash, &c->cmparg, &c->mnode,
+       ret = rhltable_insert_key(&mrt->mfc_hash, &c->cmparg, &c->_c.mnode,
                                  ipmr_rht_params);
        if (ret) {
                pr_err("ipmr: rhtable insert error %d\n", ret);
                ipmr_cache_free(c);
                return ret;
        }
-       list_add_tail_rcu(&c->list, &mrt->mfc_cache_list);
+       list_add_tail_rcu(&c->_c.list, &mrt->mfc_cache_list);
        /* Check to see if we resolved a queued list. If so we
         * need to send on the frames and tidy up.
         */
        found = false;
        spin_lock_bh(&mfc_unres_lock);
-       list_for_each_entry(uc, &mrt->mfc_unres_queue, list) {
+       list_for_each_entry(_uc, &mrt->mfc_unres_queue, list) {
+               uc = (struct mfc_cache *)_uc;
                if (uc->mfc_origin == c->mfc_origin &&
                    uc->mfc_mcastgrp == c->mfc_mcastgrp) {
-                       list_del(&uc->list);
+                       list_del(&_uc->list);
                        atomic_dec(&mrt->cache_resolve_queue_len);
                        found = true;
                        break;
@@ -1377,7 +1353,8 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
 static void mroute_clean_tables(struct mr_table *mrt, bool all)
 {
        struct net *net = read_pnet(&mrt->net);
-       struct mfc_cache *c, *tmp;
+       struct mr_mfc *c, *tmp;
+       struct mfc_cache *cache;
        LIST_HEAD(list);
        int i;
 
@@ -1395,18 +1372,20 @@ static void mroute_clean_tables(struct mr_table *mrt, bool all)
                        continue;
                rhltable_remove(&mrt->mfc_hash, &c->mnode, ipmr_rht_params);
                list_del_rcu(&c->list);
-               call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, c,
+               cache = (struct mfc_cache *)c;
+               call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, cache,
                                              mrt->id);
-               mroute_netlink_event(mrt, c, RTM_DELROUTE);
-               ipmr_cache_put(c);
+               mroute_netlink_event(mrt, cache, RTM_DELROUTE);
+               ipmr_cache_put(cache);
        }
 
        if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
                spin_lock_bh(&mfc_unres_lock);
                list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue, list) {
                        list_del(&c->list);
-                       mroute_netlink_event(mrt, c, RTM_DELROUTE);
-                       ipmr_destroy_unres(mrt, c);
+                       cache = (struct mfc_cache *)c;
+                       mroute_netlink_event(mrt, cache, RTM_DELROUTE);
+                       ipmr_destroy_unres(mrt, cache);
                }
                spin_unlock_bh(&mfc_unres_lock);
        }
@@ -1698,9 +1677,9 @@ int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
                rcu_read_lock();
                c = ipmr_cache_find(mrt, sr.src.s_addr, sr.grp.s_addr);
                if (c) {
-                       sr.pktcnt = c->mfc_un.res.pkt;
-                       sr.bytecnt = c->mfc_un.res.bytes;
-                       sr.wrong_if = c->mfc_un.res.wrong_if;
+                       sr.pktcnt = c->_c.mfc_un.res.pkt;
+                       sr.bytecnt = c->_c.mfc_un.res.bytes;
+                       sr.wrong_if = c->_c.mfc_un.res.wrong_if;
                        rcu_read_unlock();
 
                        if (copy_to_user(arg, &sr, sizeof(sr)))
@@ -1772,9 +1751,9 @@ int ipmr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
                rcu_read_lock();
                c = ipmr_cache_find(mrt, sr.src.s_addr, sr.grp.s_addr);
                if (c) {
-                       sr.pktcnt = c->mfc_un.res.pkt;
-                       sr.bytecnt = c->mfc_un.res.bytes;
-                       sr.wrong_if = c->mfc_un.res.wrong_if;
+                       sr.pktcnt = c->_c.mfc_un.res.pkt;
+                       sr.bytecnt = c->_c.mfc_un.res.bytes;
+                       sr.wrong_if = c->_c.mfc_un.res.wrong_if;
                        rcu_read_unlock();
 
                        if (copy_to_user(arg, &sr, sizeof(sr)))
@@ -1998,26 +1977,26 @@ static int ipmr_find_vif(struct mr_table *mrt, struct net_device *dev)
 /* "local" means that we should preserve one skb (for local delivery) */
 static void ip_mr_forward(struct net *net, struct mr_table *mrt,
                          struct net_device *dev, struct sk_buff *skb,
-                         struct mfc_cache *cache, int local)
+                         struct mfc_cache *c, int local)
 {
        int true_vifi = ipmr_find_vif(mrt, dev);
        int psend = -1;
        int vif, ct;
 
-       vif = cache->mfc_parent;
-       cache->mfc_un.res.pkt++;
-       cache->mfc_un.res.bytes += skb->len;
-       cache->mfc_un.res.lastuse = jiffies;
+       vif = c->_c.mfc_parent;
+       c->_c.mfc_un.res.pkt++;
+       c->_c.mfc_un.res.bytes += skb->len;
+       c->_c.mfc_un.res.lastuse = jiffies;
 
-       if (cache->mfc_origin == htonl(INADDR_ANY) && true_vifi >= 0) {
+       if (c->mfc_origin == htonl(INADDR_ANY) && true_vifi >= 0) {
                struct mfc_cache *cache_proxy;
 
                /* For an (*,G) entry, we only check that the incomming
                 * interface is part of the static tree.
                 */
-               cache_proxy = ipmr_cache_find_any_parent(mrt, vif);
+               cache_proxy = mr_mfc_find_any_parent(mrt, vif);
                if (cache_proxy &&
-                   cache_proxy->mfc_un.res.ttls[true_vifi] < 255)
+                   cache_proxy->_c.mfc_un.res.ttls[true_vifi] < 255)
                        goto forward;
        }
 
@@ -2038,7 +2017,7 @@ static void ip_mr_forward(struct net *net, struct mr_table *mrt,
                        goto dont_forward;
                }
 
-               cache->mfc_un.res.wrong_if++;
+               c->_c.mfc_un.res.wrong_if++;
 
                if (true_vifi >= 0 && mrt->mroute_do_assert &&
                    /* pimsm uses asserts, when switching from RPT to SPT,
@@ -2047,10 +2026,11 @@ static void ip_mr_forward(struct net *net, struct mr_table *mrt,
                     * large chunk of pimd to kernel. Ough... --ANK
                     */
                    (mrt->mroute_do_pim ||
-                    cache->mfc_un.res.ttls[true_vifi] < 255) &&
+                    c->_c.mfc_un.res.ttls[true_vifi] < 255) &&
                    time_after(jiffies,
-                              cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
-                       cache->mfc_un.res.last_assert = jiffies;
+                              c->_c.mfc_un.res.last_assert +
+                              MFC_ASSERT_THRESH)) {
+                       c->_c.mfc_un.res.last_assert = jiffies;
                        ipmr_cache_report(mrt, skb, true_vifi, IGMPMSG_WRONGVIF);
                }
                goto dont_forward;
@@ -2061,33 +2041,33 @@ forward:
        mrt->vif_table[vif].bytes_in += skb->len;
 
        /* Forward the frame */
-       if (cache->mfc_origin == htonl(INADDR_ANY) &&
-           cache->mfc_mcastgrp == htonl(INADDR_ANY)) {
+       if (c->mfc_origin == htonl(INADDR_ANY) &&
+           c->mfc_mcastgrp == htonl(INADDR_ANY)) {
                if (true_vifi >= 0 &&
-                   true_vifi != cache->mfc_parent &&
+                   true_vifi != c->_c.mfc_parent &&
                    ip_hdr(skb)->ttl >
-                               cache->mfc_un.res.ttls[cache->mfc_parent]) {
+                               c->_c.mfc_un.res.ttls[c->_c.mfc_parent]) {
                        /* It's an (*,*) entry and the packet is not coming from
                         * the upstream: forward the packet to the upstream
                         * only.
                         */
-                       psend = cache->mfc_parent;
+                       psend = c->_c.mfc_parent;
                        goto last_forward;
                }
                goto dont_forward;
        }
-       for (ct = cache->mfc_un.res.maxvif - 1;
-            ct >= cache->mfc_un.res.minvif; ct--) {
+       for (ct = c->_c.mfc_un.res.maxvif - 1;
+            ct >= c->_c.mfc_un.res.minvif; ct--) {
                /* For (*,G) entry, don't forward to the incoming interface */
-               if ((cache->mfc_origin != htonl(INADDR_ANY) ||
+               if ((c->mfc_origin != htonl(INADDR_ANY) ||
                     ct != true_vifi) &&
-                   ip_hdr(skb)->ttl > cache->mfc_un.res.ttls[ct]) {
+                   ip_hdr(skb)->ttl > c->_c.mfc_un.res.ttls[ct]) {
                        if (psend != -1) {
                                struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
 
                                if (skb2)
                                        ipmr_queue_xmit(net, mrt, true_vifi,
-                                                       skb2, cache, psend);
+                                                       skb2, c, psend);
                        }
                        psend = ct;
                }
@@ -2099,9 +2079,9 @@ last_forward:
 
                        if (skb2)
                                ipmr_queue_xmit(net, mrt, true_vifi, skb2,
-                                               cache, psend);
+                                               c, psend);
                } else {
-                       ipmr_queue_xmit(net, mrt, true_vifi, skb, cache, psend);
+                       ipmr_queue_xmit(net, mrt, true_vifi, skb, c, psend);
                        return;
                }
        }
@@ -2299,62 +2279,6 @@ drop:
 }
 #endif
 
-static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
-                             struct mfc_cache *c, struct rtmsg *rtm)
-{
-       struct rta_mfc_stats mfcs;
-       struct nlattr *mp_attr;
-       struct rtnexthop *nhp;
-       unsigned long lastuse;
-       int ct;
-
-       /* If cache is unresolved, don't try to parse IIF and OIF */
-       if (c->mfc_parent >= MAXVIFS) {
-               rtm->rtm_flags |= RTNH_F_UNRESOLVED;
-               return -ENOENT;
-       }
-
-       if (VIF_EXISTS(mrt, c->mfc_parent) &&
-           nla_put_u32(skb, RTA_IIF, mrt->vif_table[c->mfc_parent].dev->ifindex) < 0)
-               return -EMSGSIZE;
-
-       if (c->mfc_flags & MFC_OFFLOAD)
-               rtm->rtm_flags |= RTNH_F_OFFLOAD;
-
-       if (!(mp_attr = nla_nest_start(skb, RTA_MULTIPATH)))
-               return -EMSGSIZE;
-
-       for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
-               if (VIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) {
-                       if (!(nhp = nla_reserve_nohdr(skb, sizeof(*nhp)))) {
-                               nla_nest_cancel(skb, mp_attr);
-                               return -EMSGSIZE;
-                       }
-
-                       nhp->rtnh_flags = 0;
-                       nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
-                       nhp->rtnh_ifindex = mrt->vif_table[ct].dev->ifindex;
-                       nhp->rtnh_len = sizeof(*nhp);
-               }
-       }
-
-       nla_nest_end(skb, mp_attr);
-
-       lastuse = READ_ONCE(c->mfc_un.res.lastuse);
-       lastuse = time_after_eq(jiffies, lastuse) ? jiffies - lastuse : 0;
-
-       mfcs.mfcs_packets = c->mfc_un.res.pkt;
-       mfcs.mfcs_bytes = c->mfc_un.res.bytes;
-       mfcs.mfcs_wrong_if = c->mfc_un.res.wrong_if;
-       if (nla_put_64bit(skb, RTA_MFC_STATS, sizeof(mfcs), &mfcs, RTA_PAD) ||
-           nla_put_u64_64bit(skb, RTA_EXPIRES, jiffies_to_clock_t(lastuse),
-                             RTA_PAD))
-               return -EMSGSIZE;
-
-       rtm->rtm_type = RTN_MULTICAST;
-       return 1;
-}
-
 int ipmr_get_route(struct net *net, struct sk_buff *skb,
                   __be32 saddr, __be32 daddr,
                   struct rtmsg *rtm, u32 portid)
@@ -2412,7 +2336,7 @@ int ipmr_get_route(struct net *net, struct sk_buff *skb,
        }
 
        read_lock(&mrt_lock);
-       err = __ipmr_fill_mroute(mrt, skb, cache, rtm);
+       err = mr_fill_mroute(mrt, skb, &cache->_c, rtm);
        read_unlock(&mrt_lock);
        rcu_read_unlock();
        return err;
@@ -2440,7 +2364,7 @@ static int ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
                goto nla_put_failure;
        rtm->rtm_type     = RTN_MULTICAST;
        rtm->rtm_scope    = RT_SCOPE_UNIVERSE;
-       if (c->mfc_flags & MFC_STATIC)
+       if (c->_c.mfc_flags & MFC_STATIC)
                rtm->rtm_protocol = RTPROT_STATIC;
        else
                rtm->rtm_protocol = RTPROT_MROUTED;
@@ -2449,7 +2373,7 @@ static int ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
        if (nla_put_in_addr(skb, RTA_SRC, c->mfc_origin) ||
            nla_put_in_addr(skb, RTA_DST, c->mfc_mcastgrp))
                goto nla_put_failure;
-       err = __ipmr_fill_mroute(mrt, skb, c, rtm);
+       err = mr_fill_mroute(mrt, skb, &c->_c, rtm);
        /* do not break the dump if cache is unresolved */
        if (err < 0 && err != -ENOENT)
                goto nla_put_failure;
@@ -2462,6 +2386,14 @@ nla_put_failure:
        return -EMSGSIZE;
 }
 
+static int _ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
+                            u32 portid, u32 seq, struct mr_mfc *c, int cmd,
+                            int flags)
+{
+       return ipmr_fill_mroute(mrt, skb, portid, seq, (struct mfc_cache *)c,
+                               cmd, flags);
+}
+
 static size_t mroute_msgsize(bool unresolved, int maxvif)
 {
        size_t len =
@@ -2490,7 +2422,8 @@ static void mroute_netlink_event(struct mr_table *mrt, struct mfc_cache *mfc,
        struct sk_buff *skb;
        int err = -ENOBUFS;
 
-       skb = nlmsg_new(mroute_msgsize(mfc->mfc_parent >= MAXVIFS, mrt->maxvif),
+       skb = nlmsg_new(mroute_msgsize(mfc->_c.mfc_parent >= MAXVIFS,
+                                      mrt->maxvif),
                        GFP_ATOMIC);
        if (!skb)
                goto errout;
@@ -2634,62 +2567,8 @@ errout_free:
 
 static int ipmr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
 {
-       struct net *net = sock_net(skb->sk);
-       struct mr_table *mrt;
-       struct mfc_cache *mfc;
-       unsigned int t = 0, s_t;
-       unsigned int e = 0, s_e;
-
-       s_t = cb->args[0];
-       s_e = cb->args[1];
-
-       rcu_read_lock();
-       ipmr_for_each_table(mrt, net) {
-               if (t < s_t)
-                       goto next_table;
-               list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list) {
-                       if (e < s_e)
-                               goto next_entry;
-                       if (ipmr_fill_mroute(mrt, skb,
-                                            NETLINK_CB(cb->skb).portid,
-                                            cb->nlh->nlmsg_seq,
-                                            mfc, RTM_NEWROUTE,
-                                            NLM_F_MULTI) < 0)
-                               goto done;
-next_entry:
-                       e++;
-               }
-               e = 0;
-               s_e = 0;
-
-               spin_lock_bh(&mfc_unres_lock);
-               list_for_each_entry(mfc, &mrt->mfc_unres_queue, list) {
-                       if (e < s_e)
-                               goto next_entry2;
-                       if (ipmr_fill_mroute(mrt, skb,
-                                            NETLINK_CB(cb->skb).portid,
-                                            cb->nlh->nlmsg_seq,
-                                            mfc, RTM_NEWROUTE,
-                                            NLM_F_MULTI) < 0) {
-                               spin_unlock_bh(&mfc_unres_lock);
-                               goto done;
-                       }
-next_entry2:
-                       e++;
-               }
-               spin_unlock_bh(&mfc_unres_lock);
-               e = 0;
-               s_e = 0;
-next_table:
-               t++;
-       }
-done:
-       rcu_read_unlock();
-
-       cb->args[1] = e;
-       cb->args[0] = t;
-
-       return skb->len;
+       return mr_rtm_dumproute(skb, cb, ipmr_mr_table_iter,
+                               _ipmr_fill_mroute, &mfc_unres_lock);
 }
 
 static const struct nla_policy rtm_ipmr_policy[RTA_MAX + 1] = {
@@ -2946,31 +2825,11 @@ out:
 /* The /proc interfaces to multicast routing :
  * /proc/net/ip_mr_cache & /proc/net/ip_mr_vif
  */
-struct ipmr_vif_iter {
-       struct seq_net_private p;
-       struct mr_table *mrt;
-       int ct;
-};
-
-static struct vif_device *ipmr_vif_seq_idx(struct net *net,
-                                          struct ipmr_vif_iter *iter,
-                                          loff_t pos)
-{
-       struct mr_table *mrt = iter->mrt;
-
-       for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) {
-               if (!VIF_EXISTS(mrt, iter->ct))
-                       continue;
-               if (pos-- == 0)
-                       return &mrt->vif_table[iter->ct];
-       }
-       return NULL;
-}
 
 static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos)
        __acquires(mrt_lock)
 {
-       struct ipmr_vif_iter *iter = seq->private;
+       struct mr_vif_iter *iter = seq->private;
        struct net *net = seq_file_net(seq);
        struct mr_table *mrt;
 
@@ -2981,26 +2840,7 @@ static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos)
        iter->mrt = mrt;
 
        read_lock(&mrt_lock);
-       return *pos ? ipmr_vif_seq_idx(net, seq->private, *pos - 1)
-               : SEQ_START_TOKEN;
-}
-
-static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
-{
-       struct ipmr_vif_iter *iter = seq->private;
-       struct net *net = seq_file_net(seq);
-       struct mr_table *mrt = iter->mrt;
-
-       ++*pos;
-       if (v == SEQ_START_TOKEN)
-               return ipmr_vif_seq_idx(net, iter, 0);
-
-       while (++iter->ct < mrt->maxvif) {
-               if (!VIF_EXISTS(mrt, iter->ct))
-                       continue;
-               return &mrt->vif_table[iter->ct];
-       }
-       return NULL;
+       return mr_vif_seq_start(seq, pos);
 }
 
 static void ipmr_vif_seq_stop(struct seq_file *seq, void *v)
@@ -3011,7 +2851,7 @@ static void ipmr_vif_seq_stop(struct seq_file *seq, void *v)
 
 static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
 {
-       struct ipmr_vif_iter *iter = seq->private;
+       struct mr_vif_iter *iter = seq->private;
        struct mr_table *mrt = iter->mrt;
 
        if (v == SEQ_START_TOKEN) {
@@ -3019,7 +2859,8 @@ static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
                         "Interface      BytesIn  PktsIn  BytesOut PktsOut Flags Local    Remote\n");
        } else {
                const struct vif_device *vif = v;
-               const char *name =  vif->dev ? vif->dev->name : "none";
+               const char *name =  vif->dev ?
+                                   vif->dev->name : "none";
 
                seq_printf(seq,
                           "%2td %-10s %8ld %7ld  %8ld %7ld %05X %08X %08X\n",
@@ -3033,7 +2874,7 @@ static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
 
 static const struct seq_operations ipmr_vif_seq_ops = {
        .start = ipmr_vif_seq_start,
-       .next  = ipmr_vif_seq_next,
+       .next  = mr_vif_seq_next,
        .stop  = ipmr_vif_seq_stop,
        .show  = ipmr_vif_seq_show,
 };
@@ -3041,7 +2882,7 @@ static const struct seq_operations ipmr_vif_seq_ops = {
 static int ipmr_vif_open(struct inode *inode, struct file *file)
 {
        return seq_open_net(inode, file, &ipmr_vif_seq_ops,
-                           sizeof(struct ipmr_vif_iter));
+                           sizeof(struct mr_vif_iter));
 }
 
 static const struct file_operations ipmr_vif_fops = {
@@ -3051,40 +2892,8 @@ static const struct file_operations ipmr_vif_fops = {
        .release = seq_release_net,
 };
 
-struct ipmr_mfc_iter {
-       struct seq_net_private p;
-       struct mr_table *mrt;
-       struct list_head *cache;
-};
-
-static struct mfc_cache *ipmr_mfc_seq_idx(struct net *net,
-                                         struct ipmr_mfc_iter *it, loff_t pos)
-{
-       struct mr_table *mrt = it->mrt;
-       struct mfc_cache *mfc;
-
-       rcu_read_lock();
-       it->cache = &mrt->mfc_cache_list;
-       list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list)
-               if (pos-- == 0)
-                       return mfc;
-       rcu_read_unlock();
-
-       spin_lock_bh(&mfc_unres_lock);
-       it->cache = &mrt->mfc_unres_queue;
-       list_for_each_entry(mfc, it->cache, list)
-               if (pos-- == 0)
-                       return mfc;
-       spin_unlock_bh(&mfc_unres_lock);
-
-       it->cache = NULL;
-       return NULL;
-}
-
-
 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
 {
-       struct ipmr_mfc_iter *it = seq->private;
        struct net *net = seq_file_net(seq);
        struct mr_table *mrt;
 
@@ -3092,54 +2901,7 @@ static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
        if (!mrt)
                return ERR_PTR(-ENOENT);
 
-       it->mrt = mrt;
-       it->cache = NULL;
-       return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
-               : SEQ_START_TOKEN;
-}
-
-static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
-{
-       struct ipmr_mfc_iter *it = seq->private;
-       struct net *net = seq_file_net(seq);
-       struct mr_table *mrt = it->mrt;
-       struct mfc_cache *mfc = v;
-
-       ++*pos;
-
-       if (v == SEQ_START_TOKEN)
-               return ipmr_mfc_seq_idx(net, seq->private, 0);
-
-       if (mfc->list.next != it->cache)
-               return list_entry(mfc->list.next, struct mfc_cache, list);
-
-       if (it->cache == &mrt->mfc_unres_queue)
-               goto end_of_list;
-
-       /* exhausted cache_array, show unresolved */
-       rcu_read_unlock();
-       it->cache = &mrt->mfc_unres_queue;
-
-       spin_lock_bh(&mfc_unres_lock);
-       if (!list_empty(it->cache))
-               return list_first_entry(it->cache, struct mfc_cache, list);
-
-end_of_list:
-       spin_unlock_bh(&mfc_unres_lock);
-       it->cache = NULL;
-
-       return NULL;
-}
-
-static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
-{
-       struct ipmr_mfc_iter *it = seq->private;
-       struct mr_table *mrt = it->mrt;
-
-       if (it->cache == &mrt->mfc_unres_queue)
-               spin_unlock_bh(&mfc_unres_lock);
-       else if (it->cache == &mrt->mfc_cache_list)
-               rcu_read_unlock();
+       return mr_mfc_seq_start(seq, pos, mrt, &mfc_unres_lock);
 }
 
 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
@@ -3151,26 +2913,26 @@ static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
                 "Group    Origin   Iif     Pkts    Bytes    Wrong Oifs\n");
        } else {
                const struct mfc_cache *mfc = v;
-               const struct ipmr_mfc_iter *it = seq->private;
+               const struct mr_mfc_iter *it = seq->private;
                const struct mr_table *mrt = it->mrt;
 
                seq_printf(seq, "%08X %08X %-3hd",
                           (__force u32) mfc->mfc_mcastgrp,
                           (__force u32) mfc->mfc_origin,
-                          mfc->mfc_parent);
+                          mfc->_c.mfc_parent);
 
                if (it->cache != &mrt->mfc_unres_queue) {
                        seq_printf(seq, " %8lu %8lu %8lu",
-                                  mfc->mfc_un.res.pkt,
-                                  mfc->mfc_un.res.bytes,
-                                  mfc->mfc_un.res.wrong_if);
-                       for (n = mfc->mfc_un.res.minvif;
-                            n < mfc->mfc_un.res.maxvif; n++) {
+                                  mfc->_c.mfc_un.res.pkt,
+                                  mfc->_c.mfc_un.res.bytes,
+                                  mfc->_c.mfc_un.res.wrong_if);
+                       for (n = mfc->_c.mfc_un.res.minvif;
+                            n < mfc->_c.mfc_un.res.maxvif; n++) {
                                if (VIF_EXISTS(mrt, n) &&
-                                   mfc->mfc_un.res.ttls[n] < 255)
+                                   mfc->_c.mfc_un.res.ttls[n] < 255)
                                        seq_printf(seq,
                                           " %2d:%-3d",
-                                          n, mfc->mfc_un.res.ttls[n]);
+                                          n, mfc->_c.mfc_un.res.ttls[n]);
                        }
                } else {
                        /* unresolved mfc_caches don't contain
@@ -3185,15 +2947,15 @@ static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
 
 static const struct seq_operations ipmr_mfc_seq_ops = {
        .start = ipmr_mfc_seq_start,
-       .next  = ipmr_mfc_seq_next,
-       .stop  = ipmr_mfc_seq_stop,
+       .next  = mr_mfc_seq_next,
+       .stop  = mr_mfc_seq_stop,
        .show  = ipmr_mfc_seq_show,
 };
 
 static int ipmr_mfc_open(struct inode *inode, struct file *file)
 {
        return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
-                           sizeof(struct ipmr_mfc_iter));
+                           sizeof(struct mr_mfc_iter));
 }
 
 static const struct file_operations ipmr_mfc_fops = {
@@ -3229,7 +2991,7 @@ static int ipmr_dump(struct net *net, struct notifier_block *nb)
 
        ipmr_for_each_table(mrt, net) {
                struct vif_device *v = &mrt->vif_table[0];
-               struct mfc_cache *mfc;
+               struct mr_mfc *mfc;
                int vifi;
 
                /* Notifiy on table VIF entries */
@@ -3246,7 +3008,8 @@ static int ipmr_dump(struct net *net, struct notifier_block *nb)
                /* Notify on table MFC entries */
                list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list)
                        call_ipmr_mfc_entry_notifier(nb, net,
-                                                    FIB_EVENT_ENTRY_ADD, mfc,
+                                                    FIB_EVENT_ENTRY_ADD,
+                                                    (struct mfc_cache *)mfc,
                                                     mrt->id);
        }
 
diff --git a/net/ipv4/ipmr_base.c b/net/ipv4/ipmr_base.c
new file mode 100644 (file)
index 0000000..8ba55bf
--- /dev/null
@@ -0,0 +1,323 @@
+/* Linux multicast routing support
+ * Common logic shared by IPv4 [ipmr] and IPv6 [ip6mr] implementation
+ */
+
+#include <linux/mroute_base.h>
+
+/* Sets everything common except 'dev', since that is done under locking */
+void vif_device_init(struct vif_device *v,
+                    struct net_device *dev,
+                    unsigned long rate_limit,
+                    unsigned char threshold,
+                    unsigned short flags,
+                    unsigned short get_iflink_mask)
+{
+       v->dev = NULL;
+       v->bytes_in = 0;
+       v->bytes_out = 0;
+       v->pkt_in = 0;
+       v->pkt_out = 0;
+       v->rate_limit = rate_limit;
+       v->flags = flags;
+       v->threshold = threshold;
+       if (v->flags & get_iflink_mask)
+               v->link = dev_get_iflink(dev);
+       else
+               v->link = dev->ifindex;
+}
+EXPORT_SYMBOL(vif_device_init);
+
+struct mr_table *
+mr_table_alloc(struct net *net, u32 id,
+              struct mr_table_ops *ops,
+              void (*expire_func)(struct timer_list *t),
+              void (*table_set)(struct mr_table *mrt,
+                                struct net *net))
+{
+       struct mr_table *mrt;
+
+       mrt = kzalloc(sizeof(*mrt), GFP_KERNEL);
+       if (!mrt)
+               return NULL;
+       mrt->id = id;
+       write_pnet(&mrt->net, net);
+
+       mrt->ops = *ops;
+       rhltable_init(&mrt->mfc_hash, mrt->ops.rht_params);
+       INIT_LIST_HEAD(&mrt->mfc_cache_list);
+       INIT_LIST_HEAD(&mrt->mfc_unres_queue);
+
+       timer_setup(&mrt->ipmr_expire_timer, expire_func, 0);
+
+       mrt->mroute_reg_vif_num = -1;
+       table_set(mrt, net);
+       return mrt;
+}
+EXPORT_SYMBOL(mr_table_alloc);
+
+void *mr_mfc_find_parent(struct mr_table *mrt, void *hasharg, int parent)
+{
+       struct rhlist_head *tmp, *list;
+       struct mr_mfc *c;
+
+       list = rhltable_lookup(&mrt->mfc_hash, hasharg, *mrt->ops.rht_params);
+       rhl_for_each_entry_rcu(c, tmp, list, mnode)
+               if (parent == -1 || parent == c->mfc_parent)
+                       return c;
+
+       return NULL;
+}
+EXPORT_SYMBOL(mr_mfc_find_parent);
+
+void *mr_mfc_find_any_parent(struct mr_table *mrt, int vifi)
+{
+       struct rhlist_head *tmp, *list;
+       struct mr_mfc *c;
+
+       list = rhltable_lookup(&mrt->mfc_hash, mrt->ops.cmparg_any,
+                              *mrt->ops.rht_params);
+       rhl_for_each_entry_rcu(c, tmp, list, mnode)
+               if (c->mfc_un.res.ttls[vifi] < 255)
+                       return c;
+
+       return NULL;
+}
+EXPORT_SYMBOL(mr_mfc_find_any_parent);
+
+void *mr_mfc_find_any(struct mr_table *mrt, int vifi, void *hasharg)
+{
+       struct rhlist_head *tmp, *list;
+       struct mr_mfc *c, *proxy;
+
+       list = rhltable_lookup(&mrt->mfc_hash, hasharg, *mrt->ops.rht_params);
+       rhl_for_each_entry_rcu(c, tmp, list, mnode) {
+               if (c->mfc_un.res.ttls[vifi] < 255)
+                       return c;
+
+               /* It's ok if the vifi is part of the static tree */
+               proxy = mr_mfc_find_any_parent(mrt, c->mfc_parent);
+               if (proxy && proxy->mfc_un.res.ttls[vifi] < 255)
+                       return c;
+       }
+
+       return mr_mfc_find_any_parent(mrt, vifi);
+}
+EXPORT_SYMBOL(mr_mfc_find_any);
+
+#ifdef CONFIG_PROC_FS
+void *mr_vif_seq_idx(struct net *net, struct mr_vif_iter *iter, loff_t pos)
+{
+       struct mr_table *mrt = iter->mrt;
+
+       for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) {
+               if (!VIF_EXISTS(mrt, iter->ct))
+                       continue;
+               if (pos-- == 0)
+                       return &mrt->vif_table[iter->ct];
+       }
+       return NULL;
+}
+EXPORT_SYMBOL(mr_vif_seq_idx);
+
+void *mr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+       struct mr_vif_iter *iter = seq->private;
+       struct net *net = seq_file_net(seq);
+       struct mr_table *mrt = iter->mrt;
+
+       ++*pos;
+       if (v == SEQ_START_TOKEN)
+               return mr_vif_seq_idx(net, iter, 0);
+
+       while (++iter->ct < mrt->maxvif) {
+               if (!VIF_EXISTS(mrt, iter->ct))
+                       continue;
+               return &mrt->vif_table[iter->ct];
+       }
+       return NULL;
+}
+EXPORT_SYMBOL(mr_vif_seq_next);
+
+void *mr_mfc_seq_idx(struct net *net,
+                    struct mr_mfc_iter *it, loff_t pos)
+{
+       struct mr_table *mrt = it->mrt;
+       struct mr_mfc *mfc;
+
+       rcu_read_lock();
+       it->cache = &mrt->mfc_cache_list;
+       list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list)
+               if (pos-- == 0)
+                       return mfc;
+       rcu_read_unlock();
+
+       spin_lock_bh(it->lock);
+       it->cache = &mrt->mfc_unres_queue;
+       list_for_each_entry(mfc, it->cache, list)
+               if (pos-- == 0)
+                       return mfc;
+       spin_unlock_bh(it->lock);
+
+       it->cache = NULL;
+       return NULL;
+}
+EXPORT_SYMBOL(mr_mfc_seq_idx);
+
+void *mr_mfc_seq_next(struct seq_file *seq, void *v,
+                     loff_t *pos)
+{
+       struct mr_mfc_iter *it = seq->private;
+       struct net *net = seq_file_net(seq);
+       struct mr_table *mrt = it->mrt;
+       struct mr_mfc *c = v;
+
+       ++*pos;
+
+       if (v == SEQ_START_TOKEN)
+               return mr_mfc_seq_idx(net, seq->private, 0);
+
+       if (c->list.next != it->cache)
+               return list_entry(c->list.next, struct mr_mfc, list);
+
+       if (it->cache == &mrt->mfc_unres_queue)
+               goto end_of_list;
+
+       /* exhausted cache_array, show unresolved */
+       rcu_read_unlock();
+       it->cache = &mrt->mfc_unres_queue;
+
+       spin_lock_bh(it->lock);
+       if (!list_empty(it->cache))
+               return list_first_entry(it->cache, struct mr_mfc, list);
+
+end_of_list:
+       spin_unlock_bh(it->lock);
+       it->cache = NULL;
+
+       return NULL;
+}
+EXPORT_SYMBOL(mr_mfc_seq_next);
+#endif
+
+int mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
+                  struct mr_mfc *c, struct rtmsg *rtm)
+{
+       struct rta_mfc_stats mfcs;
+       struct nlattr *mp_attr;
+       struct rtnexthop *nhp;
+       unsigned long lastuse;
+       int ct;
+
+       /* If cache is unresolved, don't try to parse IIF and OIF */
+       if (c->mfc_parent >= MAXVIFS) {
+               rtm->rtm_flags |= RTNH_F_UNRESOLVED;
+               return -ENOENT;
+       }
+
+       if (VIF_EXISTS(mrt, c->mfc_parent) &&
+           nla_put_u32(skb, RTA_IIF,
+                       mrt->vif_table[c->mfc_parent].dev->ifindex) < 0)
+               return -EMSGSIZE;
+
+       if (c->mfc_flags & MFC_OFFLOAD)
+               rtm->rtm_flags |= RTNH_F_OFFLOAD;
+
+       mp_attr = nla_nest_start(skb, RTA_MULTIPATH);
+       if (!mp_attr)
+               return -EMSGSIZE;
+
+       for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
+               if (VIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) {
+                       struct vif_device *vif;
+
+                       nhp = nla_reserve_nohdr(skb, sizeof(*nhp));
+                       if (!nhp) {
+                               nla_nest_cancel(skb, mp_attr);
+                               return -EMSGSIZE;
+                       }
+
+                       nhp->rtnh_flags = 0;
+                       nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
+                       vif = &mrt->vif_table[ct];
+                       nhp->rtnh_ifindex = vif->dev->ifindex;
+                       nhp->rtnh_len = sizeof(*nhp);
+               }
+       }
+
+       nla_nest_end(skb, mp_attr);
+
+       lastuse = READ_ONCE(c->mfc_un.res.lastuse);
+       lastuse = time_after_eq(jiffies, lastuse) ? jiffies - lastuse : 0;
+
+       mfcs.mfcs_packets = c->mfc_un.res.pkt;
+       mfcs.mfcs_bytes = c->mfc_un.res.bytes;
+       mfcs.mfcs_wrong_if = c->mfc_un.res.wrong_if;
+       if (nla_put_64bit(skb, RTA_MFC_STATS, sizeof(mfcs), &mfcs, RTA_PAD) ||
+           nla_put_u64_64bit(skb, RTA_EXPIRES, jiffies_to_clock_t(lastuse),
+                             RTA_PAD))
+               return -EMSGSIZE;
+
+       rtm->rtm_type = RTN_MULTICAST;
+       return 1;
+}
+EXPORT_SYMBOL(mr_fill_mroute);
+
+int mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb,
+                    struct mr_table *(*iter)(struct net *net,
+                                             struct mr_table *mrt),
+                    int (*fill)(struct mr_table *mrt,
+                                struct sk_buff *skb,
+                                u32 portid, u32 seq, struct mr_mfc *c,
+                                int cmd, int flags),
+                    spinlock_t *lock)
+{
+       unsigned int t = 0, e = 0, s_t = cb->args[0], s_e = cb->args[1];
+       struct net *net = sock_net(skb->sk);
+       struct mr_table *mrt;
+       struct mr_mfc *mfc;
+
+       rcu_read_lock();
+       for (mrt = iter(net, NULL); mrt; mrt = iter(net, mrt)) {
+               if (t < s_t)
+                       goto next_table;
+               list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list) {
+                       if (e < s_e)
+                               goto next_entry;
+                       if (fill(mrt, skb, NETLINK_CB(cb->skb).portid,
+                                cb->nlh->nlmsg_seq, mfc,
+                                RTM_NEWROUTE, NLM_F_MULTI) < 0)
+                               goto done;
+next_entry:
+                       e++;
+               }
+               e = 0;
+               s_e = 0;
+
+               spin_lock_bh(lock);
+               list_for_each_entry(mfc, &mrt->mfc_unres_queue, list) {
+                       if (e < s_e)
+                               goto next_entry2;
+                       if (fill(mrt, skb, NETLINK_CB(cb->skb).portid,
+                                cb->nlh->nlmsg_seq, mfc,
+                                RTM_NEWROUTE, NLM_F_MULTI) < 0) {
+                               spin_unlock_bh(lock);
+                               goto done;
+                       }
+next_entry2:
+                       e++;
+               }
+               spin_unlock_bh(lock);
+               e = 0;
+               s_e = 0;
+next_table:
+               t++;
+       }
+done:
+       rcu_read_unlock();
+
+       cb->args[1] = e;
+       cb->args[0] = t;
+
+       return skb->len;
+}
+EXPORT_SYMBOL(mr_rtm_dumproute);
index 4b02ab3..08b3e48 100644 (file)
@@ -840,6 +840,7 @@ static struct pernet_operations clusterip_net_ops = {
        .exit = clusterip_net_exit,
        .id   = &clusterip_net_id,
        .size = sizeof(struct clusterip_net),
+       .async = true,
 };
 
 static int __init clusterip_tg_init(void)
index a0d3ad6..57244b6 100644 (file)
@@ -118,6 +118,7 @@ static void __net_exit defrag4_net_exit(struct net *net)
 
 static struct pernet_operations defrag4_net_ops = {
        .exit = defrag4_net_exit,
+       .async = true,
 };
 
 static int __init nf_defrag_init(void)
index fdabc70..d97e83b 100644 (file)
@@ -556,4 +556,3 @@ int __init ip_misc_proc_init(void)
 {
        return register_pernet_subsys(&ip_proc_ops);
 }
-
index 26eefa2..3bb686d 100644 (file)
@@ -1783,7 +1783,7 @@ static void ip_multipath_l3_keys(const struct sk_buff *skb,
 
 /* if skb is set it will be used and fl4 can be NULL */
 int fib_multipath_hash(const struct fib_info *fi, const struct flowi4 *fl4,
-                      const struct sk_buff *skb)
+                      const struct sk_buff *skb, struct flow_keys *flkeys)
 {
        struct net *net = fi->fib_net;
        struct flow_keys hash_keys;
@@ -1810,14 +1810,23 @@ int fib_multipath_hash(const struct fib_info *fi, const struct flowi4 *fl4,
                        if (skb->l4_hash)
                                return skb_get_hash_raw(skb) >> 1;
                        memset(&hash_keys, 0, sizeof(hash_keys));
-                       skb_flow_dissect_flow_keys(skb, &keys, flag);
 
-                       hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
-                       hash_keys.addrs.v4addrs.src = keys.addrs.v4addrs.src;
-                       hash_keys.addrs.v4addrs.dst = keys.addrs.v4addrs.dst;
-                       hash_keys.ports.src = keys.ports.src;
-                       hash_keys.ports.dst = keys.ports.dst;
-                       hash_keys.basic.ip_proto = keys.basic.ip_proto;
+                       if (flkeys) {
+                               hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
+                               hash_keys.addrs.v4addrs.src = flkeys->addrs.v4addrs.src;
+                               hash_keys.addrs.v4addrs.dst = flkeys->addrs.v4addrs.dst;
+                               hash_keys.ports.src = flkeys->ports.src;
+                               hash_keys.ports.dst = flkeys->ports.dst;
+                               hash_keys.basic.ip_proto = flkeys->basic.ip_proto;
+                       } else {
+                               skb_flow_dissect_flow_keys(skb, &keys, flag);
+                               hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
+                               hash_keys.addrs.v4addrs.src = keys.addrs.v4addrs.src;
+                               hash_keys.addrs.v4addrs.dst = keys.addrs.v4addrs.dst;
+                               hash_keys.ports.src = keys.ports.src;
+                               hash_keys.ports.dst = keys.ports.dst;
+                               hash_keys.basic.ip_proto = keys.basic.ip_proto;
+                       }
                } else {
                        memset(&hash_keys, 0, sizeof(hash_keys));
                        hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
@@ -1838,11 +1847,12 @@ int fib_multipath_hash(const struct fib_info *fi, const struct flowi4 *fl4,
 static int ip_mkroute_input(struct sk_buff *skb,
                            struct fib_result *res,
                            struct in_device *in_dev,
-                           __be32 daddr, __be32 saddr, u32 tos)
+                           __be32 daddr, __be32 saddr, u32 tos,
+                           struct flow_keys *hkeys)
 {
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
        if (res->fi && res->fi->fib_nhs > 1) {
-               int h = fib_multipath_hash(res->fi, NULL, skb);
+               int h = fib_multipath_hash(res->fi, NULL, skb, hkeys);
 
                fib_select_multipath(res, h);
        }
@@ -1868,13 +1878,14 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
                               struct fib_result *res)
 {
        struct in_device *in_dev = __in_dev_get_rcu(dev);
+       struct flow_keys *flkeys = NULL, _flkeys;
+       struct net    *net = dev_net(dev);
        struct ip_tunnel_info *tun_info;
-       struct flowi4   fl4;
+       int             err = -EINVAL;
        unsigned int    flags = 0;
        u32             itag = 0;
        struct rtable   *rth;
-       int             err = -EINVAL;
-       struct net    *net = dev_net(dev);
+       struct flowi4   fl4;
        bool do_cache;
 
        /* IP on this device is disabled. */
@@ -1933,6 +1944,10 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
        fl4.daddr = daddr;
        fl4.saddr = saddr;
        fl4.flowi4_uid = sock_net_uid(net, NULL);
+
+       if (fib4_rules_early_flow_dissect(net, skb, &fl4, &_flkeys))
+               flkeys = &_flkeys;
+
        err = fib_lookup(net, &fl4, res, 0);
        if (err != 0) {
                if (!IN_DEV_FORWARD(in_dev))
@@ -1958,7 +1973,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
        if (res->type != RTN_UNICAST)
                goto martian_destination;
 
-       err = ip_mkroute_input(skb, res, in_dev, daddr, saddr, tos);
+       err = ip_mkroute_input(skb, res, in_dev, daddr, saddr, tos, flkeys);
 out:   return err;
 
 brd_input:
index a471f69..c92014c 100644 (file)
@@ -97,10 +97,9 @@ struct bbr {
                packet_conservation:1,  /* use packet conservation? */
                restore_cwnd:1,      /* decided to revert cwnd to old value */
                round_start:1,       /* start of packet-timed tx->ack round? */
-               tso_segs_goal:7,     /* segments we want in each skb we send */
                idle_restart:1,      /* restarting after idle? */
                probe_rtt_round_done:1,  /* a BBR_PROBE_RTT round at 4 pkts? */
-               unused:5,
+               unused:12,
                lt_is_sampling:1,    /* taking long-term ("LT") samples now? */
                lt_rtt_cnt:7,        /* round trips in long-term interval */
                lt_use_bw:1;         /* use lt_bw as our bw estimate? */
@@ -261,23 +260,25 @@ static void bbr_set_pacing_rate(struct sock *sk, u32 bw, int gain)
                sk->sk_pacing_rate = rate;
 }
 
-/* Return count of segments we want in the skbs we send, or 0 for default. */
-static u32 bbr_tso_segs_goal(struct sock *sk)
+/* override sysctl_tcp_min_tso_segs */
+static u32 bbr_min_tso_segs(struct sock *sk)
 {
-       struct bbr *bbr = inet_csk_ca(sk);
-
-       return bbr->tso_segs_goal;
+       return sk->sk_pacing_rate < (bbr_min_tso_rate >> 3) ? 1 : 2;
 }
 
-static void bbr_set_tso_segs_goal(struct sock *sk)
+static u32 bbr_tso_segs_goal(struct sock *sk)
 {
        struct tcp_sock *tp = tcp_sk(sk);
-       struct bbr *bbr = inet_csk_ca(sk);
-       u32 min_segs;
+       u32 segs, bytes;
+
+       /* Sort of tcp_tso_autosize() but ignoring
+        * driver provided sk_gso_max_size.
+        */
+       bytes = min_t(u32, sk->sk_pacing_rate >> sk->sk_pacing_shift,
+                     GSO_MAX_SIZE - 1 - MAX_TCP_HEADER);
+       segs = max_t(u32, bytes / tp->mss_cache, bbr_min_tso_segs(sk));
 
-       min_segs = sk->sk_pacing_rate < (bbr_min_tso_rate >> 3) ? 1 : 2;
-       bbr->tso_segs_goal = min(tcp_tso_autosize(sk, tp->mss_cache, min_segs),
-                                0x7FU);
+       return min(segs, 0x7FU);
 }
 
 /* Save "last known good" cwnd so we can restore it after losses or PROBE_RTT */
@@ -348,7 +349,7 @@ static u32 bbr_target_cwnd(struct sock *sk, u32 bw, int gain)
        cwnd = (((w * gain) >> BBR_SCALE) + BW_UNIT - 1) / BW_UNIT;
 
        /* Allow enough full-sized skbs in flight to utilize end systems. */
-       cwnd += 3 * bbr->tso_segs_goal;
+       cwnd += 3 * bbr_tso_segs_goal(sk);
 
        /* Reduce delayed ACKs by rounding up cwnd to the next even number. */
        cwnd = (cwnd + 1) & ~1U;
@@ -824,7 +825,6 @@ static void bbr_main(struct sock *sk, const struct rate_sample *rs)
 
        bw = bbr_bw(sk);
        bbr_set_pacing_rate(sk, bw, bbr->pacing_gain);
-       bbr_set_tso_segs_goal(sk);
        bbr_set_cwnd(sk, rs, rs->acked_sacked, bw, bbr->cwnd_gain);
 }
 
@@ -834,7 +834,6 @@ static void bbr_init(struct sock *sk)
        struct bbr *bbr = inet_csk_ca(sk);
 
        bbr->prior_cwnd = 0;
-       bbr->tso_segs_goal = 0;  /* default segs per skb until first ACK */
        bbr->rtt_cnt = 0;
        bbr->next_rtt_delivered = 0;
        bbr->prev_ca_state = TCP_CA_Open;
@@ -936,7 +935,7 @@ static struct tcp_congestion_ops tcp_bbr_cong_ops __read_mostly = {
        .undo_cwnd      = bbr_undo_cwnd,
        .cwnd_event     = bbr_cwnd_event,
        .ssthresh       = bbr_ssthresh,
-       .tso_segs_goal  = bbr_tso_segs_goal,
+       .min_tso_segs   = bbr_min_tso_segs,
        .get_info       = bbr_get_info,
        .set_state      = bbr_set_state,
 };
index 49d043d..383cac0 100644 (file)
@@ -1703,8 +1703,8 @@ static bool tcp_nagle_check(bool partial, const struct tcp_sock *tp,
 /* Return how many segs we'd like on a TSO packet,
  * to send one TSO packet per ms
  */
-u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now,
-                    int min_tso_segs)
+static u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now,
+                           int min_tso_segs)
 {
        u32 bytes, segs;
 
@@ -1720,7 +1720,6 @@ u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now,
 
        return segs;
 }
-EXPORT_SYMBOL(tcp_tso_autosize);
 
 /* Return the number of segments we want in the skb we are transmitting.
  * See if congestion control module wants to decide; otherwise, autosize.
@@ -1728,11 +1727,13 @@ EXPORT_SYMBOL(tcp_tso_autosize);
 static u32 tcp_tso_segs(struct sock *sk, unsigned int mss_now)
 {
        const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops;
-       u32 tso_segs = ca_ops->tso_segs_goal ? ca_ops->tso_segs_goal(sk) : 0;
+       u32 min_tso, tso_segs;
 
-       if (!tso_segs)
-               tso_segs = tcp_tso_autosize(sk, mss_now,
-                               sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs);
+       min_tso = ca_ops->min_tso_segs ?
+                       ca_ops->min_tso_segs(sk) :
+                       sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs;
+
+       tso_segs = tcp_tso_autosize(sk, mss_now, min_tso);
        return min_t(u32, tso_segs, sk->sk_gso_max_segs);
 }
 
index ec35eaa..c063001 100644 (file)
@@ -90,7 +90,7 @@ EXPORT_SYMBOL(xfrm4_tunnel_deregister);
        for (handler = rcu_dereference(head);           \
             handler != NULL;                           \
             handler = rcu_dereference(handler->next))  \
-       
+
 static int tunnel4_rcv(struct sk_buff *skb)
 {
        struct xfrm_tunnel *handler;
index 796ac41..0c752dc 100644 (file)
@@ -379,4 +379,3 @@ void __init xfrm4_init(void)
        xfrm4_protocol_init();
        register_pernet_subsys(&xfrm4_net_ops);
 }
-
index ea71e4b..6794ddf 100644 (file)
@@ -278,6 +278,7 @@ config IPV6_SUBTREES
 config IPV6_MROUTE
        bool "IPv6: multicast routing"
        depends on IPV6
+       select IP_MROUTE_COMMON
        ---help---
          Experimental support for IPv6 multicast forwarding.
          If unsure, say N.
index 4facfe0..b5fd116 100644 (file)
@@ -1459,6 +1459,21 @@ static bool ipv6_use_optimistic_addr(struct net *net,
 #endif
 }
 
+static bool ipv6_allow_optimistic_dad(struct net *net,
+                                     struct inet6_dev *idev)
+{
+#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
+       if (!idev)
+               return false;
+       if (!net->ipv6.devconf_all->optimistic_dad && !idev->cnf.optimistic_dad)
+               return false;
+
+       return true;
+#else
+       return false;
+#endif
+}
+
 static int ipv6_get_saddr_eval(struct net *net,
                               struct ipv6_saddr_score *score,
                               struct ipv6_saddr_dst *dst,
@@ -1968,6 +1983,8 @@ static void addrconf_dad_stop(struct inet6_ifaddr *ifp, int dad_failed)
                spin_lock_bh(&ifp->lock);
                addrconf_del_dad_work(ifp);
                ifp->flags |= IFA_F_TENTATIVE;
+               if (dad_failed)
+                       ifp->flags &= ~IFA_F_OPTIMISTIC;
                spin_unlock_bh(&ifp->lock);
                if (dad_failed)
                        ipv6_ifa_notify(0, ifp);
@@ -4501,6 +4518,9 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, u32 ifa_flags,
            (ifp->flags & IFA_F_TEMPORARY || ifp->prefix_len != 64))
                return -EINVAL;
 
+       if (!(ifp->flags & IFA_F_TENTATIVE) || ifp->flags & IFA_F_DADFAILED)
+               ifa_flags &= ~IFA_F_OPTIMISTIC;
+
        timeout = addrconf_timeout_fixup(valid_lft, HZ);
        if (addrconf_finite_timeout(timeout)) {
                expires = jiffies_to_clock_t(timeout * HZ);
@@ -4574,6 +4594,7 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
        struct in6_addr *pfx, *peer_pfx;
        struct inet6_ifaddr *ifa;
        struct net_device *dev;
+       struct inet6_dev *idev;
        u32 valid_lft = INFINITY_LIFE_TIME, preferred_lft = INFINITY_LIFE_TIME;
        u32 ifa_flags;
        int err;
@@ -4607,7 +4628,19 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
 
        /* We ignore other flags so far. */
        ifa_flags &= IFA_F_NODAD | IFA_F_HOMEADDRESS | IFA_F_MANAGETEMPADDR |
-                    IFA_F_NOPREFIXROUTE | IFA_F_MCAUTOJOIN;
+                    IFA_F_NOPREFIXROUTE | IFA_F_MCAUTOJOIN | IFA_F_OPTIMISTIC;
+
+       idev = ipv6_find_idev(dev);
+       if (IS_ERR(idev))
+               return PTR_ERR(idev);
+
+       if (!ipv6_allow_optimistic_dad(net, idev))
+               ifa_flags &= ~IFA_F_OPTIMISTIC;
+
+       if (ifa_flags & IFA_F_NODAD && ifa_flags & IFA_F_OPTIMISTIC) {
+               NL_SET_ERR_MSG(extack, "IFA_F_NODAD and IFA_F_OPTIMISTIC are mutually exclusive");
+               return -EINVAL;
+       }
 
        ifa = ipv6_get_ifaddr(net, pfx, dev, 1);
        if (!ifa) {
index 8e085cc..d7d0abc 100644 (file)
@@ -552,4 +552,3 @@ void ac6_proc_exit(struct net *net)
        remove_proc_entry("anycast6", net->proc_net);
 }
 #endif
-
index 11025f8..b643f5c 100644 (file)
@@ -279,4 +279,3 @@ int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset,
        return nexthdr;
 }
 EXPORT_SYMBOL(ipv6_find_hdr);
-
index 95a2c9e..04e5f52 100644 (file)
@@ -223,6 +223,17 @@ static int fib6_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
        if (r->tclass && r->tclass != ip6_tclass(fl6->flowlabel))
                return 0;
 
+       if (rule->ip_proto && (rule->ip_proto != fl6->flowi6_proto))
+               return 0;
+
+       if (fib_rule_port_range_set(&rule->sport_range) &&
+           !fib_rule_port_inrange(&rule->sport_range, fl6->fl6_sport))
+               return 0;
+
+       if (fib_rule_port_range_set(&rule->dport_range) &&
+           !fib_rule_port_inrange(&rule->dport_range, fl6->fl6_dport))
+               return 0;
+
        return 1;
 }
 
@@ -258,12 +269,26 @@ static int fib6_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
        rule6->dst.plen = frh->dst_len;
        rule6->tclass = frh->tos;
 
+       if (fib_rule_requires_fldissect(rule))
+               net->ipv6.fib6_rules_require_fldissect++;
+
        net->ipv6.fib6_has_custom_rules = true;
        err = 0;
 errout:
        return err;
 }
 
+static int fib6_rule_delete(struct fib_rule *rule)
+{
+       struct net *net = rule->fr_net;
+
+       if (net->ipv6.fib6_rules_require_fldissect &&
+           fib_rule_requires_fldissect(rule))
+               net->ipv6.fib6_rules_require_fldissect--;
+
+       return 0;
+}
+
 static int fib6_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
                             struct nlattr **tb)
 {
@@ -323,6 +348,7 @@ static const struct fib_rules_ops __net_initconst fib6_rules_ops_template = {
        .match                  = fib6_rule_match,
        .suppress               = fib6_rule_suppress,
        .configure              = fib6_rule_configure,
+       .delete                 = fib6_rule_delete,
        .compare                = fib6_rule_compare,
        .fill                   = fib6_rule_fill,
        .nlmsg_payload          = fib6_rule_nlmsg_payload,
@@ -350,6 +376,7 @@ static int __net_init fib6_rules_net_init(struct net *net)
                goto out_fib6_rules_ops;
 
        net->ipv6.fib6_rules_ops = ops;
+       net->ipv6.fib6_rules_require_fldissect = 0;
 out:
        return err;
 
index 4fa4f1b..b0778d3 100644 (file)
@@ -522,7 +522,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
        fl6.fl6_icmp_type = type;
        fl6.fl6_icmp_code = code;
        fl6.flowi6_uid = sock_net_uid(net, NULL);
-       fl6.mp_hash = rt6_multipath_hash(&fl6, skb);
+       fl6.mp_hash = rt6_multipath_hash(&fl6, skb, NULL);
        security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
 
        sk = icmpv6_xmit_lock(net);
index 44c39c5..e438699 100644 (file)
@@ -613,6 +613,7 @@ static struct pernet_operations ila_net_ops = {
        .exit = ila_exit_net,
        .id   = &ila_net_id,
        .size = sizeof(struct ila_net),
+       .async = true,
 };
 
 static int ila_xlat_addr(struct sk_buff *skb, bool sir2ila)
index 3c35312..4f150a3 100644 (file)
@@ -1517,6 +1517,7 @@ static struct pernet_operations ip6gre_net_ops = {
        .exit_batch = ip6gre_exit_batch_net,
        .id   = &ip6gre_net_id,
        .size = sizeof(struct ip6gre_net),
+       .async = true,
 };
 
 static int ip6gre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[],
@@ -1784,6 +1785,12 @@ static void ip6gre_tap_setup(struct net_device *dev)
        netif_keep_dst(dev);
 }
 
+bool is_ip6gretap_dev(const struct net_device *dev)
+{
+       return dev->netdev_ops == &ip6gre_tap_netdev_ops;
+}
+EXPORT_SYMBOL_GPL(is_ip6gretap_dev);
+
 static bool ip6gre_netlink_encap_parms(struct nlattr *data[],
                                       struct ip_tunnel_encap *ipencap)
 {
index 997c7f1..a6eb0e6 100644 (file)
@@ -71,7 +71,7 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *
                struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
 
                if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(sk) &&
-                   ((mroute6_socket(net, skb) &&
+                   ((mroute6_is_socket(net, skb) &&
                     !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
                     ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
                                         &ipv6_hdr(skb)->saddr))) {
index 4b15fe9..869e2e6 100644 (file)
@@ -2250,6 +2250,7 @@ static struct pernet_operations ip6_tnl_net_ops = {
        .exit_batch = ip6_tnl_exit_batch_net,
        .id   = &ip6_tnl_net_id,
        .size = sizeof(struct ip6_tnl_net),
+       .async = true,
 };
 
 /**
index fa3ae1c..c617ea1 100644 (file)
@@ -1148,6 +1148,7 @@ static struct pernet_operations vti6_net_ops = {
        .exit_batch = vti6_exit_batch_net,
        .id   = &vti6_net_id,
        .size = sizeof(struct vti6_net),
+       .async = true,
 };
 
 static struct xfrm6_protocol vti_esp6_protocol __read_mostly = {
index 295eb5e..2a38f9d 100644 (file)
@@ -20,7 +20,6 @@
 #include <linux/types.h>
 #include <linux/sched.h>
 #include <linux/errno.h>
-#include <linux/timer.h>
 #include <linux/mm.h>
 #include <linux/kernel.h>
 #include <linux/fcntl.h>
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
 #include <linux/init.h>
-#include <linux/slab.h>
 #include <linux/compat.h>
 #include <net/protocol.h>
 #include <linux/skbuff.h>
-#include <net/sock.h>
 #include <net/raw.h>
 #include <linux/notifier.h>
 #include <linux/if_arp.h>
 #include <net/ip6_checksum.h>
 #include <linux/netconf.h>
 
-struct mr6_table {
-       struct list_head        list;
-       possible_net_t          net;
-       u32                     id;
-       struct sock             *mroute6_sk;
-       struct timer_list       ipmr_expire_timer;
-       struct list_head        mfc6_unres_queue;
-       struct list_head        mfc6_cache_array[MFC6_LINES];
-       struct mif_device       vif6_table[MAXMIFS];
-       int                     maxvif;
-       atomic_t                cache_resolve_queue_len;
-       bool                    mroute_do_assert;
-       bool                    mroute_do_pim;
-#ifdef CONFIG_IPV6_PIMSM_V2
-       int                     mroute_reg_vif_num;
-#endif
-};
-
 struct ip6mr_rule {
        struct fib_rule         common;
 };
 
 struct ip6mr_result {
-       struct mr6_table        *mrt;
+       struct mr_table *mrt;
 };
 
 /* Big lock, protecting vif table, mrt cache and mroute socket state.
@@ -86,11 +65,7 @@ struct ip6mr_result {
 
 static DEFINE_RWLOCK(mrt_lock);
 
-/*
- *     Multicast router control variables
- */
-
-#define MIF_EXISTS(_mrt, _idx) ((_mrt)->vif6_table[_idx].dev != NULL)
+/* Multicast router control variables */
 
 /* Special spinlock for queue of unresolved entries */
 static DEFINE_SPINLOCK(mfc_unres_lock);
@@ -105,30 +80,45 @@ static DEFINE_SPINLOCK(mfc_unres_lock);
 
 static struct kmem_cache *mrt_cachep __read_mostly;
 
-static struct mr6_table *ip6mr_new_table(struct net *net, u32 id);
-static void ip6mr_free_table(struct mr6_table *mrt);
+static struct mr_table *ip6mr_new_table(struct net *net, u32 id);
+static void ip6mr_free_table(struct mr_table *mrt);
 
-static void ip6_mr_forward(struct net *net, struct mr6_table *mrt,
+static void ip6_mr_forward(struct net *net, struct mr_table *mrt,
                           struct sk_buff *skb, struct mfc6_cache *cache);
-static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
+static int ip6mr_cache_report(struct mr_table *mrt, struct sk_buff *pkt,
                              mifi_t mifi, int assert);
-static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
-                              struct mfc6_cache *c, struct rtmsg *rtm);
-static void mr6_netlink_event(struct mr6_table *mrt, struct mfc6_cache *mfc,
+static void mr6_netlink_event(struct mr_table *mrt, struct mfc6_cache *mfc,
                              int cmd);
-static void mrt6msg_netlink_event(struct mr6_table *mrt, struct sk_buff *pkt);
+static void mrt6msg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt);
 static int ip6mr_rtm_dumproute(struct sk_buff *skb,
                               struct netlink_callback *cb);
-static void mroute_clean_tables(struct mr6_table *mrt, bool all);
+static void mroute_clean_tables(struct mr_table *mrt, bool all);
 static void ipmr_expire_process(struct timer_list *t);
 
 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
 #define ip6mr_for_each_table(mrt, net) \
        list_for_each_entry_rcu(mrt, &net->ipv6.mr6_tables, list)
 
-static struct mr6_table *ip6mr_get_table(struct net *net, u32 id)
+static struct mr_table *ip6mr_mr_table_iter(struct net *net,
+                                           struct mr_table *mrt)
+{
+       struct mr_table *ret;
+
+       if (!mrt)
+               ret = list_entry_rcu(net->ipv6.mr6_tables.next,
+                                    struct mr_table, list);
+       else
+               ret = list_entry_rcu(mrt->list.next,
+                                    struct mr_table, list);
+
+       if (&ret->list == &net->ipv6.mr6_tables)
+               return NULL;
+       return ret;
+}
+
+static struct mr_table *ip6mr_get_table(struct net *net, u32 id)
 {
-       struct mr6_table *mrt;
+       struct mr_table *mrt;
 
        ip6mr_for_each_table(mrt, net) {
                if (mrt->id == id)
@@ -138,7 +128,7 @@ static struct mr6_table *ip6mr_get_table(struct net *net, u32 id)
 }
 
 static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
-                           struct mr6_table **mrt)
+                           struct mr_table **mrt)
 {
        int err;
        struct ip6mr_result res;
@@ -159,7 +149,7 @@ static int ip6mr_rule_action(struct fib_rule *rule, struct flowi *flp,
                             int flags, struct fib_lookup_arg *arg)
 {
        struct ip6mr_result *res = arg->result;
-       struct mr6_table *mrt;
+       struct mr_table *mrt;
 
        switch (rule->action) {
        case FR_ACT_TO_TBL:
@@ -227,7 +217,7 @@ static const struct fib_rules_ops __net_initconst ip6mr_rules_ops_template = {
 static int __net_init ip6mr_rules_init(struct net *net)
 {
        struct fib_rules_ops *ops;
-       struct mr6_table *mrt;
+       struct mr_table *mrt;
        int err;
 
        ops = fib_rules_register(&ip6mr_rules_ops_template, net);
@@ -258,7 +248,7 @@ err1:
 
 static void __net_exit ip6mr_rules_exit(struct net *net)
 {
-       struct mr6_table *mrt, *next;
+       struct mr_table *mrt, *next;
 
        rtnl_lock();
        list_for_each_entry_safe(mrt, next, &net->ipv6.mr6_tables, list) {
@@ -272,13 +262,21 @@ static void __net_exit ip6mr_rules_exit(struct net *net)
 #define ip6mr_for_each_table(mrt, net) \
        for (mrt = net->ipv6.mrt6; mrt; mrt = NULL)
 
-static struct mr6_table *ip6mr_get_table(struct net *net, u32 id)
+static struct mr_table *ip6mr_mr_table_iter(struct net *net,
+                                           struct mr_table *mrt)
+{
+       if (!mrt)
+               return net->ipv6.mrt6;
+       return NULL;
+}
+
+static struct mr_table *ip6mr_get_table(struct net *net, u32 id)
 {
        return net->ipv6.mrt6;
 }
 
 static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
-                           struct mr6_table **mrt)
+                           struct mr_table **mrt)
 {
        *mrt = net->ipv6.mrt6;
        return 0;
@@ -299,112 +297,75 @@ static void __net_exit ip6mr_rules_exit(struct net *net)
 }
 #endif
 
-static struct mr6_table *ip6mr_new_table(struct net *net, u32 id)
+static int ip6mr_hash_cmp(struct rhashtable_compare_arg *arg,
+                         const void *ptr)
 {
-       struct mr6_table *mrt;
-       unsigned int i;
+       const struct mfc6_cache_cmp_arg *cmparg = arg->key;
+       struct mfc6_cache *c = (struct mfc6_cache *)ptr;
 
-       mrt = ip6mr_get_table(net, id);
-       if (mrt)
-               return mrt;
-
-       mrt = kzalloc(sizeof(*mrt), GFP_KERNEL);
-       if (!mrt)
-               return NULL;
-       mrt->id = id;
-       write_pnet(&mrt->net, net);
-
-       /* Forwarding cache */
-       for (i = 0; i < MFC6_LINES; i++)
-               INIT_LIST_HEAD(&mrt->mfc6_cache_array[i]);
-
-       INIT_LIST_HEAD(&mrt->mfc6_unres_queue);
+       return !ipv6_addr_equal(&c->mf6c_mcastgrp, &cmparg->mf6c_mcastgrp) ||
+              !ipv6_addr_equal(&c->mf6c_origin, &cmparg->mf6c_origin);
+}
 
-       timer_setup(&mrt->ipmr_expire_timer, ipmr_expire_process, 0);
+static const struct rhashtable_params ip6mr_rht_params = {
+       .head_offset = offsetof(struct mr_mfc, mnode),
+       .key_offset = offsetof(struct mfc6_cache, cmparg),
+       .key_len = sizeof(struct mfc6_cache_cmp_arg),
+       .nelem_hint = 3,
+       .locks_mul = 1,
+       .obj_cmpfn = ip6mr_hash_cmp,
+       .automatic_shrinking = true,
+};
 
-#ifdef CONFIG_IPV6_PIMSM_V2
-       mrt->mroute_reg_vif_num = -1;
-#endif
+static void ip6mr_new_table_set(struct mr_table *mrt,
+                               struct net *net)
+{
 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
        list_add_tail_rcu(&mrt->list, &net->ipv6.mr6_tables);
 #endif
-       return mrt;
 }
 
-static void ip6mr_free_table(struct mr6_table *mrt)
-{
-       del_timer_sync(&mrt->ipmr_expire_timer);
-       mroute_clean_tables(mrt, true);
-       kfree(mrt);
-}
-
-#ifdef CONFIG_PROC_FS
-
-struct ipmr_mfc_iter {
-       struct seq_net_private p;
-       struct mr6_table *mrt;
-       struct list_head *cache;
-       int ct;
+static struct mfc6_cache_cmp_arg ip6mr_mr_table_ops_cmparg_any = {
+       .mf6c_origin = IN6ADDR_ANY_INIT,
+       .mf6c_mcastgrp = IN6ADDR_ANY_INIT,
 };
 
+static struct mr_table_ops ip6mr_mr_table_ops = {
+       .rht_params = &ip6mr_rht_params,
+       .cmparg_any = &ip6mr_mr_table_ops_cmparg_any,
+};
 
-static struct mfc6_cache *ipmr_mfc_seq_idx(struct net *net,
-                                          struct ipmr_mfc_iter *it, loff_t pos)
+static struct mr_table *ip6mr_new_table(struct net *net, u32 id)
 {
-       struct mr6_table *mrt = it->mrt;
-       struct mfc6_cache *mfc;
-
-       read_lock(&mrt_lock);
-       for (it->ct = 0; it->ct < MFC6_LINES; it->ct++) {
-               it->cache = &mrt->mfc6_cache_array[it->ct];
-               list_for_each_entry(mfc, it->cache, list)
-                       if (pos-- == 0)
-                               return mfc;
-       }
-       read_unlock(&mrt_lock);
+       struct mr_table *mrt;
 
-       spin_lock_bh(&mfc_unres_lock);
-       it->cache = &mrt->mfc6_unres_queue;
-       list_for_each_entry(mfc, it->cache, list)
-               if (pos-- == 0)
-                       return mfc;
-       spin_unlock_bh(&mfc_unres_lock);
+       mrt = ip6mr_get_table(net, id);
+       if (mrt)
+               return mrt;
 
-       it->cache = NULL;
-       return NULL;
+       return mr_table_alloc(net, id, &ip6mr_mr_table_ops,
+                             ipmr_expire_process, ip6mr_new_table_set);
 }
 
-/*
- *     The /proc interfaces to multicast routing /proc/ip6_mr_cache /proc/ip6_mr_vif
- */
-
-struct ipmr_vif_iter {
-       struct seq_net_private p;
-       struct mr6_table *mrt;
-       int ct;
-};
-
-static struct mif_device *ip6mr_vif_seq_idx(struct net *net,
-                                           struct ipmr_vif_iter *iter,
-                                           loff_t pos)
+static void ip6mr_free_table(struct mr_table *mrt)
 {
-       struct mr6_table *mrt = iter->mrt;
-
-       for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) {
-               if (!MIF_EXISTS(mrt, iter->ct))
-                       continue;
-               if (pos-- == 0)
-                       return &mrt->vif6_table[iter->ct];
-       }
-       return NULL;
+       del_timer_sync(&mrt->ipmr_expire_timer);
+       mroute_clean_tables(mrt, true);
+       rhltable_destroy(&mrt->mfc_hash);
+       kfree(mrt);
 }
 
+#ifdef CONFIG_PROC_FS
+/* The /proc interfaces to multicast routing
+ * /proc/ip6_mr_cache /proc/ip6_mr_vif
+ */
+
 static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
        __acquires(mrt_lock)
 {
-       struct ipmr_vif_iter *iter = seq->private;
+       struct mr_vif_iter *iter = seq->private;
        struct net *net = seq_file_net(seq);
-       struct mr6_table *mrt;
+       struct mr_table *mrt;
 
        mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
        if (!mrt)
@@ -413,26 +374,7 @@ static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
        iter->mrt = mrt;
 
        read_lock(&mrt_lock);
-       return *pos ? ip6mr_vif_seq_idx(net, seq->private, *pos - 1)
-               : SEQ_START_TOKEN;
-}
-
-static void *ip6mr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
-{
-       struct ipmr_vif_iter *iter = seq->private;
-       struct net *net = seq_file_net(seq);
-       struct mr6_table *mrt = iter->mrt;
-
-       ++*pos;
-       if (v == SEQ_START_TOKEN)
-               return ip6mr_vif_seq_idx(net, iter, 0);
-
-       while (++iter->ct < mrt->maxvif) {
-               if (!MIF_EXISTS(mrt, iter->ct))
-                       continue;
-               return &mrt->vif6_table[iter->ct];
-       }
-       return NULL;
+       return mr_vif_seq_start(seq, pos);
 }
 
 static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v)
@@ -443,19 +385,19 @@ static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v)
 
 static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
 {
-       struct ipmr_vif_iter *iter = seq->private;
-       struct mr6_table *mrt = iter->mrt;
+       struct mr_vif_iter *iter = seq->private;
+       struct mr_table *mrt = iter->mrt;
 
        if (v == SEQ_START_TOKEN) {
                seq_puts(seq,
                         "Interface      BytesIn  PktsIn  BytesOut PktsOut Flags\n");
        } else {
-               const struct mif_device *vif = v;
+               const struct vif_device *vif = v;
                const char *name = vif->dev ? vif->dev->name : "none";
 
                seq_printf(seq,
                           "%2td %-10s %8ld %7ld  %8ld %7ld %05X\n",
-                          vif - mrt->vif6_table,
+                          vif - mrt->vif_table,
                           name, vif->bytes_in, vif->pkt_in,
                           vif->bytes_out, vif->pkt_out,
                           vif->flags);
@@ -465,7 +407,7 @@ static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
 
 static const struct seq_operations ip6mr_vif_seq_ops = {
        .start = ip6mr_vif_seq_start,
-       .next  = ip6mr_vif_seq_next,
+       .next  = mr_vif_seq_next,
        .stop  = ip6mr_vif_seq_stop,
        .show  = ip6mr_vif_seq_show,
 };
@@ -473,7 +415,7 @@ static const struct seq_operations ip6mr_vif_seq_ops = {
 static int ip6mr_vif_open(struct inode *inode, struct file *file)
 {
        return seq_open_net(inode, file, &ip6mr_vif_seq_ops,
-                           sizeof(struct ipmr_vif_iter));
+                           sizeof(struct mr_vif_iter));
 }
 
 static const struct file_operations ip6mr_vif_fops = {
@@ -485,72 +427,14 @@ static const struct file_operations ip6mr_vif_fops = {
 
 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
 {
-       struct ipmr_mfc_iter *it = seq->private;
        struct net *net = seq_file_net(seq);
-       struct mr6_table *mrt;
+       struct mr_table *mrt;
 
        mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
        if (!mrt)
                return ERR_PTR(-ENOENT);
 
-       it->mrt = mrt;
-       it->cache = NULL;
-       return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
-               : SEQ_START_TOKEN;
-}
-
-static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
-{
-       struct mfc6_cache *mfc = v;
-       struct ipmr_mfc_iter *it = seq->private;
-       struct net *net = seq_file_net(seq);
-       struct mr6_table *mrt = it->mrt;
-
-       ++*pos;
-
-       if (v == SEQ_START_TOKEN)
-               return ipmr_mfc_seq_idx(net, seq->private, 0);
-
-       if (mfc->list.next != it->cache)
-               return list_entry(mfc->list.next, struct mfc6_cache, list);
-
-       if (it->cache == &mrt->mfc6_unres_queue)
-               goto end_of_list;
-
-       BUG_ON(it->cache != &mrt->mfc6_cache_array[it->ct]);
-
-       while (++it->ct < MFC6_LINES) {
-               it->cache = &mrt->mfc6_cache_array[it->ct];
-               if (list_empty(it->cache))
-                       continue;
-               return list_first_entry(it->cache, struct mfc6_cache, list);
-       }
-
-       /* exhausted cache_array, show unresolved */
-       read_unlock(&mrt_lock);
-       it->cache = &mrt->mfc6_unres_queue;
-       it->ct = 0;
-
-       spin_lock_bh(&mfc_unres_lock);
-       if (!list_empty(it->cache))
-               return list_first_entry(it->cache, struct mfc6_cache, list);
-
- end_of_list:
-       spin_unlock_bh(&mfc_unres_lock);
-       it->cache = NULL;
-
-       return NULL;
-}
-
-static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
-{
-       struct ipmr_mfc_iter *it = seq->private;
-       struct mr6_table *mrt = it->mrt;
-
-       if (it->cache == &mrt->mfc6_unres_queue)
-               spin_unlock_bh(&mfc_unres_lock);
-       else if (it->cache == &mrt->mfc6_cache_array[it->ct])
-               read_unlock(&mrt_lock);
+       return mr_mfc_seq_start(seq, pos, mrt, &mfc_unres_lock);
 }
 
 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
@@ -564,25 +448,25 @@ static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
                         "Iif      Pkts  Bytes     Wrong  Oifs\n");
        } else {
                const struct mfc6_cache *mfc = v;
-               const struct ipmr_mfc_iter *it = seq->private;
-               struct mr6_table *mrt = it->mrt;
+               const struct mr_mfc_iter *it = seq->private;
+               struct mr_table *mrt = it->mrt;
 
                seq_printf(seq, "%pI6 %pI6 %-3hd",
                           &mfc->mf6c_mcastgrp, &mfc->mf6c_origin,
-                          mfc->mf6c_parent);
+                          mfc->_c.mfc_parent);
 
-               if (it->cache != &mrt->mfc6_unres_queue) {
+               if (it->cache != &mrt->mfc_unres_queue) {
                        seq_printf(seq, " %8lu %8lu %8lu",
-                                  mfc->mfc_un.res.pkt,
-                                  mfc->mfc_un.res.bytes,
-                                  mfc->mfc_un.res.wrong_if);
-                       for (n = mfc->mfc_un.res.minvif;
-                            n < mfc->mfc_un.res.maxvif; n++) {
-                               if (MIF_EXISTS(mrt, n) &&
-                                   mfc->mfc_un.res.ttls[n] < 255)
+                                  mfc->_c.mfc_un.res.pkt,
+                                  mfc->_c.mfc_un.res.bytes,
+                                  mfc->_c.mfc_un.res.wrong_if);
+                       for (n = mfc->_c.mfc_un.res.minvif;
+                            n < mfc->_c.mfc_un.res.maxvif; n++) {
+                               if (VIF_EXISTS(mrt, n) &&
+                                   mfc->_c.mfc_un.res.ttls[n] < 255)
                                        seq_printf(seq,
-                                                  " %2d:%-3d",
-                                                  n, mfc->mfc_un.res.ttls[n]);
+                                                  " %2d:%-3d", n,
+                                                  mfc->_c.mfc_un.res.ttls[n]);
                        }
                } else {
                        /* unresolved mfc_caches don't contain
@@ -597,15 +481,15 @@ static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
 
 static const struct seq_operations ipmr_mfc_seq_ops = {
        .start = ipmr_mfc_seq_start,
-       .next  = ipmr_mfc_seq_next,
-       .stop  = ipmr_mfc_seq_stop,
+       .next  = mr_mfc_seq_next,
+       .stop  = mr_mfc_seq_stop,
        .show  = ipmr_mfc_seq_show,
 };
 
 static int ipmr_mfc_open(struct inode *inode, struct file *file)
 {
        return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
-                           sizeof(struct ipmr_mfc_iter));
+                           sizeof(struct mr_mfc_iter));
 }
 
 static const struct file_operations ip6mr_mfc_fops = {
@@ -624,7 +508,7 @@ static int pim6_rcv(struct sk_buff *skb)
        struct ipv6hdr   *encap;
        struct net_device  *reg_dev = NULL;
        struct net *net = dev_net(skb->dev);
-       struct mr6_table *mrt;
+       struct mr_table *mrt;
        struct flowi6 fl6 = {
                .flowi6_iif     = skb->dev->ifindex,
                .flowi6_mark    = skb->mark,
@@ -658,7 +542,7 @@ static int pim6_rcv(struct sk_buff *skb)
 
        read_lock(&mrt_lock);
        if (reg_vif_num >= 0)
-               reg_dev = mrt->vif6_table[reg_vif_num].dev;
+               reg_dev = mrt->vif_table[reg_vif_num].dev;
        if (reg_dev)
                dev_hold(reg_dev);
        read_unlock(&mrt_lock);
@@ -693,7 +577,7 @@ static netdev_tx_t reg_vif_xmit(struct sk_buff *skb,
                                      struct net_device *dev)
 {
        struct net *net = dev_net(dev);
-       struct mr6_table *mrt;
+       struct mr_table *mrt;
        struct flowi6 fl6 = {
                .flowi6_oif     = dev->ifindex,
                .flowi6_iif     = skb->skb_iif ? : LOOPBACK_IFINDEX,
@@ -736,7 +620,7 @@ static void reg_vif_setup(struct net_device *dev)
        dev->features           |= NETIF_F_NETNS_LOCAL;
 }
 
-static struct net_device *ip6mr_reg_vif(struct net *net, struct mr6_table *mrt)
+static struct net_device *ip6mr_reg_vif(struct net *net, struct mr_table *mrt)
 {
        struct net_device *dev;
        char name[IFNAMSIZ];
@@ -773,17 +657,17 @@ failure:
  *     Delete a VIF entry
  */
 
-static int mif6_delete(struct mr6_table *mrt, int vifi, int notify,
+static int mif6_delete(struct mr_table *mrt, int vifi, int notify,
                       struct list_head *head)
 {
-       struct mif_device *v;
+       struct vif_device *v;
        struct net_device *dev;
        struct inet6_dev *in6_dev;
 
        if (vifi < 0 || vifi >= mrt->maxvif)
                return -EADDRNOTAVAIL;
 
-       v = &mrt->vif6_table[vifi];
+       v = &mrt->vif_table[vifi];
 
        write_lock_bh(&mrt_lock);
        dev = v->dev;
@@ -802,7 +686,7 @@ static int mif6_delete(struct mr6_table *mrt, int vifi, int notify,
        if (vifi + 1 == mrt->maxvif) {
                int tmp;
                for (tmp = vifi - 1; tmp >= 0; tmp--) {
-                       if (MIF_EXISTS(mrt, tmp))
+                       if (VIF_EXISTS(mrt, tmp))
                                break;
                }
                mrt->maxvif = tmp + 1;
@@ -827,23 +711,30 @@ static int mif6_delete(struct mr6_table *mrt, int vifi, int notify,
        return 0;
 }
 
+static inline void ip6mr_cache_free_rcu(struct rcu_head *head)
+{
+       struct mr_mfc *c = container_of(head, struct mr_mfc, rcu);
+
+       kmem_cache_free(mrt_cachep, (struct mfc6_cache *)c);
+}
+
 static inline void ip6mr_cache_free(struct mfc6_cache *c)
 {
-       kmem_cache_free(mrt_cachep, c);
+       call_rcu(&c->_c.rcu, ip6mr_cache_free_rcu);
 }
 
 /* Destroy an unresolved cache entry, killing queued skbs
    and reporting error to netlink readers.
  */
 
-static void ip6mr_destroy_unres(struct mr6_table *mrt, struct mfc6_cache *c)
+static void ip6mr_destroy_unres(struct mr_table *mrt, struct mfc6_cache *c)
 {
        struct net *net = read_pnet(&mrt->net);
        struct sk_buff *skb;
 
        atomic_dec(&mrt->cache_resolve_queue_len);
 
-       while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved)) != NULL) {
+       while ((skb = skb_dequeue(&c->_c.mfc_un.unres.unresolved)) != NULL) {
                if (ipv6_hdr(skb)->version == 0) {
                        struct nlmsghdr *nlh = skb_pull(skb,
                                                        sizeof(struct ipv6hdr));
@@ -862,13 +753,13 @@ static void ip6mr_destroy_unres(struct mr6_table *mrt, struct mfc6_cache *c)
 
 /* Timer process for all the unresolved queue. */
 
-static void ipmr_do_expire_process(struct mr6_table *mrt)
+static void ipmr_do_expire_process(struct mr_table *mrt)
 {
        unsigned long now = jiffies;
        unsigned long expires = 10 * HZ;
-       struct mfc6_cache *c, *next;
+       struct mr_mfc *c, *next;
 
-       list_for_each_entry_safe(c, next, &mrt->mfc6_unres_queue, list) {
+       list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) {
                if (time_after(c->mfc_un.unres.expires, now)) {
                        /* not yet... */
                        unsigned long interval = c->mfc_un.unres.expires - now;
@@ -878,24 +769,24 @@ static void ipmr_do_expire_process(struct mr6_table *mrt)
                }
 
                list_del(&c->list);
-               mr6_netlink_event(mrt, c, RTM_DELROUTE);
-               ip6mr_destroy_unres(mrt, c);
+               mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE);
+               ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c);
        }
 
-       if (!list_empty(&mrt->mfc6_unres_queue))
+       if (!list_empty(&mrt->mfc_unres_queue))
                mod_timer(&mrt->ipmr_expire_timer, jiffies + expires);
 }
 
 static void ipmr_expire_process(struct timer_list *t)
 {
-       struct mr6_table *mrt = from_timer(mrt, t, ipmr_expire_timer);
+       struct mr_table *mrt = from_timer(mrt, t, ipmr_expire_timer);
 
        if (!spin_trylock(&mfc_unres_lock)) {
                mod_timer(&mrt->ipmr_expire_timer, jiffies + 1);
                return;
        }
 
-       if (!list_empty(&mrt->mfc6_unres_queue))
+       if (!list_empty(&mrt->mfc_unres_queue))
                ipmr_do_expire_process(mrt);
 
        spin_unlock(&mfc_unres_lock);
@@ -903,7 +794,8 @@ static void ipmr_expire_process(struct timer_list *t)
 
 /* Fill oifs list. It is called under write locked mrt_lock. */
 
-static void ip6mr_update_thresholds(struct mr6_table *mrt, struct mfc6_cache *cache,
+static void ip6mr_update_thresholds(struct mr_table *mrt,
+                                   struct mr_mfc *cache,
                                    unsigned char *ttls)
 {
        int vifi;
@@ -913,7 +805,7 @@ static void ip6mr_update_thresholds(struct mr6_table *mrt, struct mfc6_cache *ca
        memset(cache->mfc_un.res.ttls, 255, MAXMIFS);
 
        for (vifi = 0; vifi < mrt->maxvif; vifi++) {
-               if (MIF_EXISTS(mrt, vifi) &&
+               if (VIF_EXISTS(mrt, vifi) &&
                    ttls[vifi] && ttls[vifi] < 255) {
                        cache->mfc_un.res.ttls[vifi] = ttls[vifi];
                        if (cache->mfc_un.res.minvif > vifi)
@@ -925,17 +817,17 @@ static void ip6mr_update_thresholds(struct mr6_table *mrt, struct mfc6_cache *ca
        cache->mfc_un.res.lastuse = jiffies;
 }
 
-static int mif6_add(struct net *net, struct mr6_table *mrt,
+static int mif6_add(struct net *net, struct mr_table *mrt,
                    struct mif6ctl *vifc, int mrtsock)
 {
        int vifi = vifc->mif6c_mifi;
-       struct mif_device *v = &mrt->vif6_table[vifi];
+       struct vif_device *v = &mrt->vif_table[vifi];
        struct net_device *dev;
        struct inet6_dev *in6_dev;
        int err;
 
        /* Is vif busy ? */
-       if (MIF_EXISTS(mrt, vifi))
+       if (VIF_EXISTS(mrt, vifi))
                return -EADDRINUSE;
 
        switch (vifc->mif6c_flags) {
@@ -980,21 +872,10 @@ static int mif6_add(struct net *net, struct mr6_table *mrt,
                                             dev->ifindex, &in6_dev->cnf);
        }
 
-       /*
-        *      Fill in the VIF structures
-        */
-       v->rate_limit = vifc->vifc_rate_limit;
-       v->flags = vifc->mif6c_flags;
-       if (!mrtsock)
-               v->flags |= VIFF_STATIC;
-       v->threshold = vifc->vifc_threshold;
-       v->bytes_in = 0;
-       v->bytes_out = 0;
-       v->pkt_in = 0;
-       v->pkt_out = 0;
-       v->link = dev->ifindex;
-       if (v->flags & MIFF_REGISTER)
-               v->link = dev_get_iflink(dev);
+       /* Fill in the VIF structures */
+       vif_device_init(v, dev, vifc->vifc_rate_limit, vifc->vifc_threshold,
+                       vifc->mif6c_flags | (!mrtsock ? VIFF_STATIC : 0),
+                       MIFF_REGISTER);
 
        /* And finish update writing critical data */
        write_lock_bh(&mrt_lock);
@@ -1009,75 +890,56 @@ static int mif6_add(struct net *net, struct mr6_table *mrt,
        return 0;
 }
 
-static struct mfc6_cache *ip6mr_cache_find(struct mr6_table *mrt,
+static struct mfc6_cache *ip6mr_cache_find(struct mr_table *mrt,
                                           const struct in6_addr *origin,
                                           const struct in6_addr *mcastgrp)
 {
-       int line = MFC6_HASH(mcastgrp, origin);
-       struct mfc6_cache *c;
-
-       list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) {
-               if (ipv6_addr_equal(&c->mf6c_origin, origin) &&
-                   ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp))
-                       return c;
-       }
-       return NULL;
-}
-
-/* Look for a (*,*,oif) entry */
-static struct mfc6_cache *ip6mr_cache_find_any_parent(struct mr6_table *mrt,
-                                                     mifi_t mifi)
-{
-       int line = MFC6_HASH(&in6addr_any, &in6addr_any);
-       struct mfc6_cache *c;
-
-       list_for_each_entry(c, &mrt->mfc6_cache_array[line], list)
-               if (ipv6_addr_any(&c->mf6c_origin) &&
-                   ipv6_addr_any(&c->mf6c_mcastgrp) &&
-                   (c->mfc_un.res.ttls[mifi] < 255))
-                       return c;
+       struct mfc6_cache_cmp_arg arg = {
+               .mf6c_origin = *origin,
+               .mf6c_mcastgrp = *mcastgrp,
+       };
 
-       return NULL;
+       return mr_mfc_find(mrt, &arg);
 }
 
 /* Look for a (*,G) entry */
-static struct mfc6_cache *ip6mr_cache_find_any(struct mr6_table *mrt,
+static struct mfc6_cache *ip6mr_cache_find_any(struct mr_table *mrt,
                                               struct in6_addr *mcastgrp,
                                               mifi_t mifi)
 {
-       int line = MFC6_HASH(mcastgrp, &in6addr_any);
-       struct mfc6_cache *c, *proxy;
+       struct mfc6_cache_cmp_arg arg = {
+               .mf6c_origin = in6addr_any,
+               .mf6c_mcastgrp = *mcastgrp,
+       };
 
        if (ipv6_addr_any(mcastgrp))
-               goto skip;
-
-       list_for_each_entry(c, &mrt->mfc6_cache_array[line], list)
-               if (ipv6_addr_any(&c->mf6c_origin) &&
-                   ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp)) {
-                       if (c->mfc_un.res.ttls[mifi] < 255)
-                               return c;
-
-                       /* It's ok if the mifi is part of the static tree */
-                       proxy = ip6mr_cache_find_any_parent(mrt,
-                                                           c->mf6c_parent);
-                       if (proxy && proxy->mfc_un.res.ttls[mifi] < 255)
-                               return c;
-               }
+               return mr_mfc_find_any_parent(mrt, mifi);
+       return mr_mfc_find_any(mrt, mifi, &arg);
+}
 
-skip:
-       return ip6mr_cache_find_any_parent(mrt, mifi);
+/* Look for a (S,G,iif) entry if parent != -1 */
+static struct mfc6_cache *
+ip6mr_cache_find_parent(struct mr_table *mrt,
+                       const struct in6_addr *origin,
+                       const struct in6_addr *mcastgrp,
+                       int parent)
+{
+       struct mfc6_cache_cmp_arg arg = {
+               .mf6c_origin = *origin,
+               .mf6c_mcastgrp = *mcastgrp,
+       };
+
+       return mr_mfc_find_parent(mrt, &arg, parent);
 }
 
-/*
- *     Allocate a multicast cache entry
- */
+/* Allocate a multicast cache entry */
 static struct mfc6_cache *ip6mr_cache_alloc(void)
 {
        struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
        if (!c)
                return NULL;
-       c->mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1;
-       c->mfc_un.res.minvif = MAXMIFS;
+       c->_c.mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1;
+       c->_c.mfc_un.res.minvif = MAXMIFS;
        return c;
 }
 
@@ -1086,8 +948,8 @@ static struct mfc6_cache *ip6mr_cache_alloc_unres(void)
        struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
        if (!c)
                return NULL;
-       skb_queue_head_init(&c->mfc_un.unres.unresolved);
-       c->mfc_un.unres.expires = jiffies + 10 * HZ;
+       skb_queue_head_init(&c->_c.mfc_un.unres.unresolved);
+       c->_c.mfc_un.unres.expires = jiffies + 10 * HZ;
        return c;
 }
 
@@ -1095,7 +957,7 @@ static struct mfc6_cache *ip6mr_cache_alloc_unres(void)
  *     A cache entry has gone into a resolved state from queued
  */
 
-static void ip6mr_cache_resolve(struct net *net, struct mr6_table *mrt,
+static void ip6mr_cache_resolve(struct net *net, struct mr_table *mrt,
                                struct mfc6_cache *uc, struct mfc6_cache *c)
 {
        struct sk_buff *skb;
@@ -1104,12 +966,13 @@ static void ip6mr_cache_resolve(struct net *net, struct mr6_table *mrt,
         *      Play the pending entries through our router
         */
 
-       while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
+       while ((skb = __skb_dequeue(&uc->_c.mfc_un.unres.unresolved))) {
                if (ipv6_hdr(skb)->version == 0) {
                        struct nlmsghdr *nlh = skb_pull(skb,
                                                        sizeof(struct ipv6hdr));
 
-                       if (__ip6mr_fill_mroute(mrt, skb, c, nlmsg_data(nlh)) > 0) {
+                       if (mr_fill_mroute(mrt, skb, &c->_c,
+                                          nlmsg_data(nlh)) > 0) {
                                nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh;
                        } else {
                                nlh->nlmsg_type = NLMSG_ERROR;
@@ -1129,9 +992,10 @@ static void ip6mr_cache_resolve(struct net *net, struct mr6_table *mrt,
  *     Called under mrt_lock.
  */
 
-static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
+static int ip6mr_cache_report(struct mr_table *mrt, struct sk_buff *pkt,
                              mifi_t mifi, int assert)
 {
+       struct sock *mroute6_sk;
        struct sk_buff *skb;
        struct mrt6msg *msg;
        int ret;
@@ -1201,17 +1065,19 @@ static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
        skb->ip_summed = CHECKSUM_UNNECESSARY;
        }
 
-       if (!mrt->mroute6_sk) {
+       rcu_read_lock();
+       mroute6_sk = rcu_dereference(mrt->mroute_sk);
+       if (!mroute6_sk) {
+               rcu_read_unlock();
                kfree_skb(skb);
                return -EINVAL;
        }
 
        mrt6msg_netlink_event(mrt, skb);
 
-       /*
-        *      Deliver to user space multicast routing algorithms
-        */
-       ret = sock_queue_rcv_skb(mrt->mroute6_sk, skb);
+       /* Deliver to user space multicast routing algorithms */
+       ret = sock_queue_rcv_skb(mroute6_sk, skb);
+       rcu_read_unlock();
        if (ret < 0) {
                net_warn_ratelimited("mroute6: pending queue full, dropping entries\n");
                kfree_skb(skb);
@@ -1220,19 +1086,16 @@ static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
        return ret;
 }
 
-/*
- *     Queue a packet for resolution. It gets locked cache entry!
- */
-
-static int
-ip6mr_cache_unresolved(struct mr6_table *mrt, mifi_t mifi, struct sk_buff *skb)
+/* Queue a packet for resolution. It gets locked cache entry! */
+static int ip6mr_cache_unresolved(struct mr_table *mrt, mifi_t mifi,
+                                 struct sk_buff *skb)
 {
+       struct mfc6_cache *c;
        bool found = false;
        int err;
-       struct mfc6_cache *c;
 
        spin_lock_bh(&mfc_unres_lock);
-       list_for_each_entry(c, &mrt->mfc6_unres_queue, list) {
+       list_for_each_entry(c, &mrt->mfc_unres_queue, _c.list) {
                if (ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) &&
                    ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr)) {
                        found = true;
@@ -1253,10 +1116,8 @@ ip6mr_cache_unresolved(struct mr6_table *mrt, mifi_t mifi, struct sk_buff *skb)
                        return -ENOBUFS;
                }
 
-               /*
-                *      Fill in the new cache entry
-                */
-               c->mf6c_parent = -1;
+               /* Fill in the new cache entry */
+               c->_c.mfc_parent = -1;
                c->mf6c_origin = ipv6_hdr(skb)->saddr;
                c->mf6c_mcastgrp = ipv6_hdr(skb)->daddr;
 
@@ -1276,20 +1137,18 @@ ip6mr_cache_unresolved(struct mr6_table *mrt, mifi_t mifi, struct sk_buff *skb)
                }
 
                atomic_inc(&mrt->cache_resolve_queue_len);
-               list_add(&c->list, &mrt->mfc6_unres_queue);
+               list_add(&c->_c.list, &mrt->mfc_unres_queue);
                mr6_netlink_event(mrt, c, RTM_NEWROUTE);
 
                ipmr_do_expire_process(mrt);
        }
 
-       /*
-        *      See if we can append the packet
-        */
-       if (c->mfc_un.unres.unresolved.qlen > 3) {
+       /* See if we can append the packet */
+       if (c->_c.mfc_un.unres.unresolved.qlen > 3) {
                kfree_skb(skb);
                err = -ENOBUFS;
        } else {
-               skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
+               skb_queue_tail(&c->_c.mfc_un.unres.unresolved, skb);
                err = 0;
        }
 
@@ -1301,29 +1160,24 @@ ip6mr_cache_unresolved(struct mr6_table *mrt, mifi_t mifi, struct sk_buff *skb)
  *     MFC6 cache manipulation by user space
  */
 
-static int ip6mr_mfc_delete(struct mr6_table *mrt, struct mf6cctl *mfc,
+static int ip6mr_mfc_delete(struct mr_table *mrt, struct mf6cctl *mfc,
                            int parent)
 {
-       int line;
-       struct mfc6_cache *c, *next;
-
-       line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
+       struct mfc6_cache *c;
 
-       list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[line], list) {
-               if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
-                   ipv6_addr_equal(&c->mf6c_mcastgrp,
-                                   &mfc->mf6cc_mcastgrp.sin6_addr) &&
-                   (parent == -1 || parent == c->mf6c_parent)) {
-                       write_lock_bh(&mrt_lock);
-                       list_del(&c->list);
-                       write_unlock_bh(&mrt_lock);
+       /* The entries are added/deleted only under RTNL */
+       rcu_read_lock();
+       c = ip6mr_cache_find_parent(mrt, &mfc->mf6cc_origin.sin6_addr,
+                                   &mfc->mf6cc_mcastgrp.sin6_addr, parent);
+       rcu_read_unlock();
+       if (!c)
+               return -ENOENT;
+       rhltable_remove(&mrt->mfc_hash, &c->_c.mnode, ip6mr_rht_params);
+       list_del_rcu(&c->_c.list);
 
-                       mr6_netlink_event(mrt, c, RTM_DELROUTE);
-                       ip6mr_cache_free(c);
-                       return 0;
-               }
-       }
-       return -ENOENT;
+       mr6_netlink_event(mrt, c, RTM_DELROUTE);
+       ip6mr_cache_free(c);
+       return 0;
 }
 
 static int ip6mr_device_event(struct notifier_block *this,
@@ -1331,15 +1185,15 @@ static int ip6mr_device_event(struct notifier_block *this,
 {
        struct net_device *dev = netdev_notifier_info_to_dev(ptr);
        struct net *net = dev_net(dev);
-       struct mr6_table *mrt;
-       struct mif_device *v;
+       struct mr_table *mrt;
+       struct vif_device *v;
        int ct;
 
        if (event != NETDEV_UNREGISTER)
                return NOTIFY_DONE;
 
        ip6mr_for_each_table(mrt, net) {
-               v = &mrt->vif6_table[0];
+               v = &mrt->vif_table[0];
                for (ct = 0; ct < mrt->maxvif; ct++, v++) {
                        if (v->dev == dev)
                                mif6_delete(mrt, ct, 1, NULL);
@@ -1453,14 +1307,14 @@ void ip6_mr_cleanup(void)
        kmem_cache_destroy(mrt_cachep);
 }
 
-static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt,
+static int ip6mr_mfc_add(struct net *net, struct mr_table *mrt,
                         struct mf6cctl *mfc, int mrtsock, int parent)
 {
-       bool found = false;
-       int line;
-       struct mfc6_cache *uc, *c;
        unsigned char ttls[MAXMIFS];
-       int i;
+       struct mfc6_cache *uc, *c;
+       struct mr_mfc *_uc;
+       bool found;
+       int i, err;
 
        if (mfc->mf6cc_parent >= MAXMIFS)
                return -ENFILE;
@@ -1469,27 +1323,19 @@ static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt,
        for (i = 0; i < MAXMIFS; i++) {
                if (IF_ISSET(i, &mfc->mf6cc_ifset))
                        ttls[i] = 1;
-
-       }
-
-       line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
-
-       list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) {
-               if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
-                   ipv6_addr_equal(&c->mf6c_mcastgrp,
-                                   &mfc->mf6cc_mcastgrp.sin6_addr) &&
-                   (parent == -1 || parent == mfc->mf6cc_parent)) {
-                       found = true;
-                       break;
-               }
        }
 
-       if (found) {
+       /* The entries are added/deleted only under RTNL */
+       rcu_read_lock();
+       c = ip6mr_cache_find_parent(mrt, &mfc->mf6cc_origin.sin6_addr,
+                                   &mfc->mf6cc_mcastgrp.sin6_addr, parent);
+       rcu_read_unlock();
+       if (c) {
                write_lock_bh(&mrt_lock);
-               c->mf6c_parent = mfc->mf6cc_parent;
-               ip6mr_update_thresholds(mrt, c, ttls);
+               c->_c.mfc_parent = mfc->mf6cc_parent;
+               ip6mr_update_thresholds(mrt, &c->_c, ttls);
                if (!mrtsock)
-                       c->mfc_flags |= MFC_STATIC;
+                       c->_c.mfc_flags |= MFC_STATIC;
                write_unlock_bh(&mrt_lock);
                mr6_netlink_event(mrt, c, RTM_NEWROUTE);
                return 0;
@@ -1505,31 +1351,36 @@ static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt,
 
        c->mf6c_origin = mfc->mf6cc_origin.sin6_addr;
        c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr;
-       c->mf6c_parent = mfc->mf6cc_parent;
-       ip6mr_update_thresholds(mrt, c, ttls);
+       c->_c.mfc_parent = mfc->mf6cc_parent;
+       ip6mr_update_thresholds(mrt, &c->_c, ttls);
        if (!mrtsock)
-               c->mfc_flags |= MFC_STATIC;
+               c->_c.mfc_flags |= MFC_STATIC;
 
-       write_lock_bh(&mrt_lock);
-       list_add(&c->list, &mrt->mfc6_cache_array[line]);
-       write_unlock_bh(&mrt_lock);
+       err = rhltable_insert_key(&mrt->mfc_hash, &c->cmparg, &c->_c.mnode,
+                                 ip6mr_rht_params);
+       if (err) {
+               pr_err("ip6mr: rhtable insert error %d\n", err);
+               ip6mr_cache_free(c);
+               return err;
+       }
+       list_add_tail_rcu(&c->_c.list, &mrt->mfc_cache_list);
 
-       /*
-        *      Check to see if we resolved a queued list. If so we
-        *      need to send on the frames and tidy up.
+       /* Check to see if we resolved a queued list. If so we
+        * need to send on the frames and tidy up.
         */
        found = false;
        spin_lock_bh(&mfc_unres_lock);
-       list_for_each_entry(uc, &mrt->mfc6_unres_queue, list) {
+       list_for_each_entry(_uc, &mrt->mfc_unres_queue, list) {
+               uc = (struct mfc6_cache *)_uc;
                if (ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) &&
                    ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) {
-                       list_del(&uc->list);
+                       list_del(&_uc->list);
                        atomic_dec(&mrt->cache_resolve_queue_len);
                        found = true;
                        break;
                }
        }
-       if (list_empty(&mrt->mfc6_unres_queue))
+       if (list_empty(&mrt->mfc_unres_queue))
                del_timer(&mrt->ipmr_expire_timer);
        spin_unlock_bh(&mfc_unres_lock);
 
@@ -1545,61 +1396,54 @@ static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt,
  *     Close the multicast socket, and clear the vif tables etc
  */
 
-static void mroute_clean_tables(struct mr6_table *mrt, bool all)
+static void mroute_clean_tables(struct mr_table *mrt, bool all)
 {
-       int i;
+       struct mr_mfc *c, *tmp;
        LIST_HEAD(list);
-       struct mfc6_cache *c, *next;
+       int i;
 
-       /*
-        *      Shut down all active vif entries
-        */
+       /* Shut down all active vif entries */
        for (i = 0; i < mrt->maxvif; i++) {
-               if (!all && (mrt->vif6_table[i].flags & VIFF_STATIC))
+               if (!all && (mrt->vif_table[i].flags & VIFF_STATIC))
                        continue;
                mif6_delete(mrt, i, 0, &list);
        }
        unregister_netdevice_many(&list);
 
-       /*
-        *      Wipe the cache
-        */
-       for (i = 0; i < MFC6_LINES; i++) {
-               list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[i], list) {
-                       if (!all && (c->mfc_flags & MFC_STATIC))
-                               continue;
-                       write_lock_bh(&mrt_lock);
-                       list_del(&c->list);
-                       write_unlock_bh(&mrt_lock);
-
-                       mr6_netlink_event(mrt, c, RTM_DELROUTE);
-                       ip6mr_cache_free(c);
-               }
+       /* Wipe the cache */
+       list_for_each_entry_safe(c, tmp, &mrt->mfc_cache_list, list) {
+               if (!all && (c->mfc_flags & MFC_STATIC))
+                       continue;
+               rhltable_remove(&mrt->mfc_hash, &c->mnode, ip6mr_rht_params);
+               list_del_rcu(&c->list);
+               mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE);
+               ip6mr_cache_free((struct mfc6_cache *)c);
        }
 
        if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
                spin_lock_bh(&mfc_unres_lock);
-               list_for_each_entry_safe(c, next, &mrt->mfc6_unres_queue, list) {
+               list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue, list) {
                        list_del(&c->list);
-                       mr6_netlink_event(mrt, c, RTM_DELROUTE);
-                       ip6mr_destroy_unres(mrt, c);
+                       mr6_netlink_event(mrt, (struct mfc6_cache *)c,
+                                         RTM_DELROUTE);
+                       ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c);
                }
                spin_unlock_bh(&mfc_unres_lock);
        }
 }
 
-static int ip6mr_sk_init(struct mr6_table *mrt, struct sock *sk)
+static int ip6mr_sk_init(struct mr_table *mrt, struct sock *sk)
 {
        int err = 0;
        struct net *net = sock_net(sk);
 
        rtnl_lock();
        write_lock_bh(&mrt_lock);
-       if (likely(mrt->mroute6_sk == NULL)) {
-               mrt->mroute6_sk = sk;
-               net->ipv6.devconf_all->mc_forwarding++;
-       } else {
+       if (rtnl_dereference(mrt->mroute_sk)) {
                err = -EADDRINUSE;
+       } else {
+               rcu_assign_pointer(mrt->mroute_sk, sk);
+               net->ipv6.devconf_all->mc_forwarding++;
        }
        write_unlock_bh(&mrt_lock);
 
@@ -1617,7 +1461,7 @@ int ip6mr_sk_done(struct sock *sk)
 {
        int err = -EACCES;
        struct net *net = sock_net(sk);
-       struct mr6_table *mrt;
+       struct mr_table *mrt;
 
        if (sk->sk_type != SOCK_RAW ||
            inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
@@ -1625,9 +1469,9 @@ int ip6mr_sk_done(struct sock *sk)
 
        rtnl_lock();
        ip6mr_for_each_table(mrt, net) {
-               if (sk == mrt->mroute6_sk) {
+               if (sk == rtnl_dereference(mrt->mroute_sk)) {
                        write_lock_bh(&mrt_lock);
-                       mrt->mroute6_sk = NULL;
+                       RCU_INIT_POINTER(mrt->mroute_sk, NULL);
                        net->ipv6.devconf_all->mc_forwarding--;
                        write_unlock_bh(&mrt_lock);
                        inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
@@ -1641,13 +1485,14 @@ int ip6mr_sk_done(struct sock *sk)
                }
        }
        rtnl_unlock();
+       synchronize_rcu();
 
        return err;
 }
 
-struct sock *mroute6_socket(struct net *net, struct sk_buff *skb)
+bool mroute6_is_socket(struct net *net, struct sk_buff *skb)
 {
-       struct mr6_table *mrt;
+       struct mr_table *mrt;
        struct flowi6 fl6 = {
                .flowi6_iif     = skb->skb_iif ? : LOOPBACK_IFINDEX,
                .flowi6_oif     = skb->dev->ifindex,
@@ -1657,8 +1502,9 @@ struct sock *mroute6_socket(struct net *net, struct sk_buff *skb)
        if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
                return NULL;
 
-       return mrt->mroute6_sk;
+       return rcu_access_pointer(mrt->mroute_sk);
 }
+EXPORT_SYMBOL(mroute6_is_socket);
 
 /*
  *     Socket options and virtual interface manipulation. The whole
@@ -1674,7 +1520,7 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
        struct mf6cctl mfc;
        mifi_t mifi;
        struct net *net = sock_net(sk);
-       struct mr6_table *mrt;
+       struct mr_table *mrt;
 
        if (sk->sk_type != SOCK_RAW ||
            inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
@@ -1685,7 +1531,8 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
                return -ENOENT;
 
        if (optname != MRT6_INIT) {
-               if (sk != mrt->mroute6_sk && !ns_capable(net->user_ns, CAP_NET_ADMIN))
+               if (sk != rcu_access_pointer(mrt->mroute_sk) &&
+                   !ns_capable(net->user_ns, CAP_NET_ADMIN))
                        return -EACCES;
        }
 
@@ -1707,7 +1554,8 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
                if (vif.mif6c_mifi >= MAXMIFS)
                        return -ENFILE;
                rtnl_lock();
-               ret = mif6_add(net, mrt, &vif, sk == mrt->mroute6_sk);
+               ret = mif6_add(net, mrt, &vif,
+                              sk == rtnl_dereference(mrt->mroute_sk));
                rtnl_unlock();
                return ret;
 
@@ -1742,7 +1590,9 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
                        ret = ip6mr_mfc_delete(mrt, &mfc, parent);
                else
                        ret = ip6mr_mfc_add(net, mrt, &mfc,
-                                           sk == mrt->mroute6_sk, parent);
+                                           sk ==
+                                           rtnl_dereference(mrt->mroute_sk),
+                                           parent);
                rtnl_unlock();
                return ret;
 
@@ -1794,7 +1644,7 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
                /* "pim6reg%u" should not exceed 16 bytes (IFNAMSIZ) */
                if (v != RT_TABLE_DEFAULT && v >= 100000000)
                        return -EINVAL;
-               if (sk == mrt->mroute6_sk)
+               if (sk == rcu_access_pointer(mrt->mroute_sk))
                        return -EBUSY;
 
                rtnl_lock();
@@ -1825,7 +1675,7 @@ int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval,
        int olr;
        int val;
        struct net *net = sock_net(sk);
-       struct mr6_table *mrt;
+       struct mr_table *mrt;
 
        if (sk->sk_type != SOCK_RAW ||
            inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
@@ -1873,10 +1723,10 @@ int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
 {
        struct sioc_sg_req6 sr;
        struct sioc_mif_req6 vr;
-       struct mif_device *vif;
+       struct vif_device *vif;
        struct mfc6_cache *c;
        struct net *net = sock_net(sk);
-       struct mr6_table *mrt;
+       struct mr_table *mrt;
 
        mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
        if (!mrt)
@@ -1889,8 +1739,8 @@ int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
                if (vr.mifi >= mrt->maxvif)
                        return -EINVAL;
                read_lock(&mrt_lock);
-               vif = &mrt->vif6_table[vr.mifi];
-               if (MIF_EXISTS(mrt, vr.mifi)) {
+               vif = &mrt->vif_table[vr.mifi];
+               if (VIF_EXISTS(mrt, vr.mifi)) {
                        vr.icount = vif->pkt_in;
                        vr.ocount = vif->pkt_out;
                        vr.ibytes = vif->bytes_in;
@@ -1907,19 +1757,19 @@ int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
                if (copy_from_user(&sr, arg, sizeof(sr)))
                        return -EFAULT;
 
-               read_lock(&mrt_lock);
+               rcu_read_lock();
                c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
                if (c) {
-                       sr.pktcnt = c->mfc_un.res.pkt;
-                       sr.bytecnt = c->mfc_un.res.bytes;
-                       sr.wrong_if = c->mfc_un.res.wrong_if;
-                       read_unlock(&mrt_lock);
+                       sr.pktcnt = c->_c.mfc_un.res.pkt;
+                       sr.bytecnt = c->_c.mfc_un.res.bytes;
+                       sr.wrong_if = c->_c.mfc_un.res.wrong_if;
+                       rcu_read_unlock();
 
                        if (copy_to_user(arg, &sr, sizeof(sr)))
                                return -EFAULT;
                        return 0;
                }
-               read_unlock(&mrt_lock);
+               rcu_read_unlock();
                return -EADDRNOTAVAIL;
        default:
                return -ENOIOCTLCMD;
@@ -1947,10 +1797,10 @@ int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
 {
        struct compat_sioc_sg_req6 sr;
        struct compat_sioc_mif_req6 vr;
-       struct mif_device *vif;
+       struct vif_device *vif;
        struct mfc6_cache *c;
        struct net *net = sock_net(sk);
-       struct mr6_table *mrt;
+       struct mr_table *mrt;
 
        mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
        if (!mrt)
@@ -1963,8 +1813,8 @@ int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
                if (vr.mifi >= mrt->maxvif)
                        return -EINVAL;
                read_lock(&mrt_lock);
-               vif = &mrt->vif6_table[vr.mifi];
-               if (MIF_EXISTS(mrt, vr.mifi)) {
+               vif = &mrt->vif_table[vr.mifi];
+               if (VIF_EXISTS(mrt, vr.mifi)) {
                        vr.icount = vif->pkt_in;
                        vr.ocount = vif->pkt_out;
                        vr.ibytes = vif->bytes_in;
@@ -1981,19 +1831,19 @@ int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
                if (copy_from_user(&sr, arg, sizeof(sr)))
                        return -EFAULT;
 
-               read_lock(&mrt_lock);
+               rcu_read_lock();
                c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
                if (c) {
-                       sr.pktcnt = c->mfc_un.res.pkt;
-                       sr.bytecnt = c->mfc_un.res.bytes;
-                       sr.wrong_if = c->mfc_un.res.wrong_if;
-                       read_unlock(&mrt_lock);
+                       sr.pktcnt = c->_c.mfc_un.res.pkt;
+                       sr.bytecnt = c->_c.mfc_un.res.bytes;
+                       sr.wrong_if = c->_c.mfc_un.res.wrong_if;
+                       rcu_read_unlock();
 
                        if (copy_to_user(arg, &sr, sizeof(sr)))
                                return -EFAULT;
                        return 0;
                }
-               read_unlock(&mrt_lock);
+               rcu_read_unlock();
                return -EADDRNOTAVAIL;
        default:
                return -ENOIOCTLCMD;
@@ -2014,11 +1864,11 @@ static inline int ip6mr_forward2_finish(struct net *net, struct sock *sk, struct
  *     Processing handlers for ip6mr_forward
  */
 
-static int ip6mr_forward2(struct net *net, struct mr6_table *mrt,
+static int ip6mr_forward2(struct net *net, struct mr_table *mrt,
                          struct sk_buff *skb, struct mfc6_cache *c, int vifi)
 {
        struct ipv6hdr *ipv6h;
-       struct mif_device *vif = &mrt->vif6_table[vifi];
+       struct vif_device *vif = &mrt->vif_table[vifi];
        struct net_device *dev;
        struct dst_entry *dst;
        struct flowi6 fl6;
@@ -2088,46 +1938,50 @@ out_free:
        return 0;
 }
 
-static int ip6mr_find_vif(struct mr6_table *mrt, struct net_device *dev)
+static int ip6mr_find_vif(struct mr_table *mrt, struct net_device *dev)
 {
        int ct;
 
        for (ct = mrt->maxvif - 1; ct >= 0; ct--) {
-               if (mrt->vif6_table[ct].dev == dev)
+               if (mrt->vif_table[ct].dev == dev)
                        break;
        }
        return ct;
 }
 
-static void ip6_mr_forward(struct net *net, struct mr6_table *mrt,
-                          struct sk_buff *skb, struct mfc6_cache *cache)
+static void ip6_mr_forward(struct net *net, struct mr_table *mrt,
+                          struct sk_buff *skb, struct mfc6_cache *c)
 {
        int psend = -1;
        int vif, ct;
        int true_vifi = ip6mr_find_vif(mrt, skb->dev);
 
-       vif = cache->mf6c_parent;
-       cache->mfc_un.res.pkt++;
-       cache->mfc_un.res.bytes += skb->len;
-       cache->mfc_un.res.lastuse = jiffies;
+       vif = c->_c.mfc_parent;
+       c->_c.mfc_un.res.pkt++;
+       c->_c.mfc_un.res.bytes += skb->len;
+       c->_c.mfc_un.res.lastuse = jiffies;
 
-       if (ipv6_addr_any(&cache->mf6c_origin) && true_vifi >= 0) {
+       if (ipv6_addr_any(&c->mf6c_origin) && true_vifi >= 0) {
                struct mfc6_cache *cache_proxy;
 
                /* For an (*,G) entry, we only check that the incoming
                 * interface is part of the static tree.
                 */
-               cache_proxy = ip6mr_cache_find_any_parent(mrt, vif);
+               rcu_read_lock();
+               cache_proxy = mr_mfc_find_any_parent(mrt, vif);
                if (cache_proxy &&
-                   cache_proxy->mfc_un.res.ttls[true_vifi] < 255)
+                   cache_proxy->_c.mfc_un.res.ttls[true_vifi] < 255) {
+                       rcu_read_unlock();
                        goto forward;
+               }
+               rcu_read_unlock();
        }
 
        /*
         * Wrong interface: drop packet and (maybe) send PIM assert.
         */
-       if (mrt->vif6_table[vif].dev != skb->dev) {
-               cache->mfc_un.res.wrong_if++;
+       if (mrt->vif_table[vif].dev != skb->dev) {
+               c->_c.mfc_un.res.wrong_if++;
 
                if (true_vifi >= 0 && mrt->mroute_do_assert &&
                    /* pimsm uses asserts, when switching from RPT to SPT,
@@ -2136,52 +1990,55 @@ static void ip6_mr_forward(struct net *net, struct mr6_table *mrt,
                       large chunk of pimd to kernel. Ough... --ANK
                     */
                    (mrt->mroute_do_pim ||
-                    cache->mfc_un.res.ttls[true_vifi] < 255) &&
+                    c->_c.mfc_un.res.ttls[true_vifi] < 255) &&
                    time_after(jiffies,
-                              cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
-                       cache->mfc_un.res.last_assert = jiffies;
+                              c->_c.mfc_un.res.last_assert +
+                              MFC_ASSERT_THRESH)) {
+                       c->_c.mfc_un.res.last_assert = jiffies;
                        ip6mr_cache_report(mrt, skb, true_vifi, MRT6MSG_WRONGMIF);
                }
                goto dont_forward;
        }
 
 forward:
-       mrt->vif6_table[vif].pkt_in++;
-       mrt->vif6_table[vif].bytes_in += skb->len;
+       mrt->vif_table[vif].pkt_in++;
+       mrt->vif_table[vif].bytes_in += skb->len;
 
        /*
         *      Forward the frame
         */
-       if (ipv6_addr_any(&cache->mf6c_origin) &&
-           ipv6_addr_any(&cache->mf6c_mcastgrp)) {
+       if (ipv6_addr_any(&c->mf6c_origin) &&
+           ipv6_addr_any(&c->mf6c_mcastgrp)) {
                if (true_vifi >= 0 &&
-                   true_vifi != cache->mf6c_parent &&
+                   true_vifi != c->_c.mfc_parent &&
                    ipv6_hdr(skb)->hop_limit >
-                               cache->mfc_un.res.ttls[cache->mf6c_parent]) {
+                               c->_c.mfc_un.res.ttls[c->_c.mfc_parent]) {
                        /* It's an (*,*) entry and the packet is not coming from
                         * the upstream: forward the packet to the upstream
                         * only.
                         */
-                       psend = cache->mf6c_parent;
+                       psend = c->_c.mfc_parent;
                        goto last_forward;
                }
                goto dont_forward;
        }
-       for (ct = cache->mfc_un.res.maxvif - 1; ct >= cache->mfc_un.res.minvif; ct--) {
+       for (ct = c->_c.mfc_un.res.maxvif - 1;
+            ct >= c->_c.mfc_un.res.minvif; ct--) {
                /* For (*,G) entry, don't forward to the incoming interface */
-               if ((!ipv6_addr_any(&cache->mf6c_origin) || ct != true_vifi) &&
-                   ipv6_hdr(skb)->hop_limit > cache->mfc_un.res.ttls[ct]) {
+               if ((!ipv6_addr_any(&c->mf6c_origin) || ct != true_vifi) &&
+                   ipv6_hdr(skb)->hop_limit > c->_c.mfc_un.res.ttls[ct]) {
                        if (psend != -1) {
                                struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
                                if (skb2)
-                                       ip6mr_forward2(net, mrt, skb2, cache, psend);
+                                       ip6mr_forward2(net, mrt, skb2,
+                                                      c, psend);
                        }
                        psend = ct;
                }
        }
 last_forward:
        if (psend != -1) {
-               ip6mr_forward2(net, mrt, skb, cache, psend);
+               ip6mr_forward2(net, mrt, skb, c, psend);
                return;
        }
 
@@ -2198,7 +2055,7 @@ int ip6_mr_input(struct sk_buff *skb)
 {
        struct mfc6_cache *cache;
        struct net *net = dev_net(skb->dev);
-       struct mr6_table *mrt;
+       struct mr_table *mrt;
        struct flowi6 fl6 = {
                .flowi6_iif     = skb->dev->ifindex,
                .flowi6_mark    = skb->mark,
@@ -2248,66 +2105,11 @@ int ip6_mr_input(struct sk_buff *skb)
        return 0;
 }
 
-
-static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
-                              struct mfc6_cache *c, struct rtmsg *rtm)
-{
-       struct rta_mfc_stats mfcs;
-       struct nlattr *mp_attr;
-       struct rtnexthop *nhp;
-       unsigned long lastuse;
-       int ct;
-
-       /* If cache is unresolved, don't try to parse IIF and OIF */
-       if (c->mf6c_parent >= MAXMIFS) {
-               rtm->rtm_flags |= RTNH_F_UNRESOLVED;
-               return -ENOENT;
-       }
-
-       if (MIF_EXISTS(mrt, c->mf6c_parent) &&
-           nla_put_u32(skb, RTA_IIF, mrt->vif6_table[c->mf6c_parent].dev->ifindex) < 0)
-               return -EMSGSIZE;
-       mp_attr = nla_nest_start(skb, RTA_MULTIPATH);
-       if (!mp_attr)
-               return -EMSGSIZE;
-
-       for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
-               if (MIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) {
-                       nhp = nla_reserve_nohdr(skb, sizeof(*nhp));
-                       if (!nhp) {
-                               nla_nest_cancel(skb, mp_attr);
-                               return -EMSGSIZE;
-                       }
-
-                       nhp->rtnh_flags = 0;
-                       nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
-                       nhp->rtnh_ifindex = mrt->vif6_table[ct].dev->ifindex;
-                       nhp->rtnh_len = sizeof(*nhp);
-               }
-       }
-
-       nla_nest_end(skb, mp_attr);
-
-       lastuse = READ_ONCE(c->mfc_un.res.lastuse);
-       lastuse = time_after_eq(jiffies, lastuse) ? jiffies - lastuse : 0;
-
-       mfcs.mfcs_packets = c->mfc_un.res.pkt;
-       mfcs.mfcs_bytes = c->mfc_un.res.bytes;
-       mfcs.mfcs_wrong_if = c->mfc_un.res.wrong_if;
-       if (nla_put_64bit(skb, RTA_MFC_STATS, sizeof(mfcs), &mfcs, RTA_PAD) ||
-           nla_put_u64_64bit(skb, RTA_EXPIRES, jiffies_to_clock_t(lastuse),
-                             RTA_PAD))
-               return -EMSGSIZE;
-
-       rtm->rtm_type = RTN_MULTICAST;
-       return 1;
-}
-
 int ip6mr_get_route(struct net *net, struct sk_buff *skb, struct rtmsg *rtm,
                    u32 portid)
 {
        int err;
-       struct mr6_table *mrt;
+       struct mr_table *mrt;
        struct mfc6_cache *cache;
        struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
 
@@ -2368,15 +2170,12 @@ int ip6mr_get_route(struct net *net, struct sk_buff *skb, struct rtmsg *rtm,
                return err;
        }
 
-       if (rtm->rtm_flags & RTM_F_NOTIFY)
-               cache->mfc_flags |= MFC_NOTIFY;
-
-       err = __ip6mr_fill_mroute(mrt, skb, cache, rtm);
+       err = mr_fill_mroute(mrt, skb, &cache->_c, rtm);
        read_unlock(&mrt_lock);
        return err;
 }
 
-static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
+static int ip6mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
                             u32 portid, u32 seq, struct mfc6_cache *c, int cmd,
                             int flags)
 {
@@ -2398,7 +2197,7 @@ static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
                goto nla_put_failure;
        rtm->rtm_type = RTN_MULTICAST;
        rtm->rtm_scope    = RT_SCOPE_UNIVERSE;
-       if (c->mfc_flags & MFC_STATIC)
+       if (c->_c.mfc_flags & MFC_STATIC)
                rtm->rtm_protocol = RTPROT_STATIC;
        else
                rtm->rtm_protocol = RTPROT_MROUTED;
@@ -2407,7 +2206,7 @@ static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
        if (nla_put_in6_addr(skb, RTA_SRC, &c->mf6c_origin) ||
            nla_put_in6_addr(skb, RTA_DST, &c->mf6c_mcastgrp))
                goto nla_put_failure;
-       err = __ip6mr_fill_mroute(mrt, skb, c, rtm);
+       err = mr_fill_mroute(mrt, skb, &c->_c, rtm);
        /* do not break the dump if cache is unresolved */
        if (err < 0 && err != -ENOENT)
                goto nla_put_failure;
@@ -2420,6 +2219,14 @@ nla_put_failure:
        return -EMSGSIZE;
 }
 
+static int _ip6mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
+                             u32 portid, u32 seq, struct mr_mfc *c,
+                             int cmd, int flags)
+{
+       return ip6mr_fill_mroute(mrt, skb, portid, seq, (struct mfc6_cache *)c,
+                                cmd, flags);
+}
+
 static int mr6_msgsize(bool unresolved, int maxvif)
 {
        size_t len =
@@ -2441,14 +2248,14 @@ static int mr6_msgsize(bool unresolved, int maxvif)
        return len;
 }
 
-static void mr6_netlink_event(struct mr6_table *mrt, struct mfc6_cache *mfc,
+static void mr6_netlink_event(struct mr_table *mrt, struct mfc6_cache *mfc,
                              int cmd)
 {
        struct net *net = read_pnet(&mrt->net);
        struct sk_buff *skb;
        int err = -ENOBUFS;
 
-       skb = nlmsg_new(mr6_msgsize(mfc->mf6c_parent >= MAXMIFS, mrt->maxvif),
+       skb = nlmsg_new(mr6_msgsize(mfc->_c.mfc_parent >= MAXMIFS, mrt->maxvif),
                        GFP_ATOMIC);
        if (!skb)
                goto errout;
@@ -2483,7 +2290,7 @@ static size_t mrt6msg_netlink_msgsize(size_t payloadlen)
        return len;
 }
 
-static void mrt6msg_netlink_event(struct mr6_table *mrt, struct sk_buff *pkt)
+static void mrt6msg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt)
 {
        struct net *net = read_pnet(&mrt->net);
        struct nlmsghdr *nlh;
@@ -2533,65 +2340,6 @@ errout:
 
 static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
 {
-       struct net *net = sock_net(skb->sk);
-       struct mr6_table *mrt;
-       struct mfc6_cache *mfc;
-       unsigned int t = 0, s_t;
-       unsigned int h = 0, s_h;
-       unsigned int e = 0, s_e;
-
-       s_t = cb->args[0];
-       s_h = cb->args[1];
-       s_e = cb->args[2];
-
-       read_lock(&mrt_lock);
-       ip6mr_for_each_table(mrt, net) {
-               if (t < s_t)
-                       goto next_table;
-               if (t > s_t)
-                       s_h = 0;
-               for (h = s_h; h < MFC6_LINES; h++) {
-                       list_for_each_entry(mfc, &mrt->mfc6_cache_array[h], list) {
-                               if (e < s_e)
-                                       goto next_entry;
-                               if (ip6mr_fill_mroute(mrt, skb,
-                                                     NETLINK_CB(cb->skb).portid,
-                                                     cb->nlh->nlmsg_seq,
-                                                     mfc, RTM_NEWROUTE,
-                                                     NLM_F_MULTI) < 0)
-                                       goto done;
-next_entry:
-                               e++;
-                       }
-                       e = s_e = 0;
-               }
-               spin_lock_bh(&mfc_unres_lock);
-               list_for_each_entry(mfc, &mrt->mfc6_unres_queue, list) {
-                       if (e < s_e)
-                               goto next_entry2;
-                       if (ip6mr_fill_mroute(mrt, skb,
-                                             NETLINK_CB(cb->skb).portid,
-                                             cb->nlh->nlmsg_seq,
-                                             mfc, RTM_NEWROUTE,
-                                             NLM_F_MULTI) < 0) {
-                               spin_unlock_bh(&mfc_unres_lock);
-                               goto done;
-                       }
-next_entry2:
-                       e++;
-               }
-               spin_unlock_bh(&mfc_unres_lock);
-               e = s_e = 0;
-               s_h = 0;
-next_table:
-               t++;
-       }
-done:
-       read_unlock(&mrt_lock);
-
-       cb->args[2] = e;
-       cb->args[1] = h;
-       cb->args[0] = t;
-
-       return skb->len;
+       return mr_rtm_dumproute(skb, cb, ip6mr_mr_table_iter,
+                               _ip6mr_fill_mroute, &mfc_unres_lock);
 }
index 2453516..4d780c7 100644 (file)
@@ -1415,4 +1415,3 @@ int compat_ipv6_getsockopt(struct sock *sk, int level, int optname,
 }
 EXPORT_SYMBOL(compat_ipv6_getsockopt);
 #endif
-
index c87b483..32f98bc 100644 (file)
@@ -103,6 +103,7 @@ static void __net_exit defrag6_net_exit(struct net *net)
 
 static struct pernet_operations defrag6_net_ops = {
        .exit = defrag6_net_exit,
+       .async = true,
 };
 
 static int __init nf_defrag_init(void)
index b8858c5..1678cf0 100644 (file)
@@ -355,4 +355,3 @@ void ipv6_misc_proc_exit(void)
 {
        unregister_pernet_subsys(&ipv6_proc_ops);
 }
-
index aa709b6..e2bb408 100644 (file)
@@ -460,7 +460,7 @@ static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
         * case it will always be non-zero. Otherwise now is the time to do it.
         */
        if (!fl6->mp_hash)
-               fl6->mp_hash = rt6_multipath_hash(fl6, NULL);
+               fl6->mp_hash = rt6_multipath_hash(fl6, NULL, NULL);
 
        if (fl6->mp_hash <= atomic_read(&match->rt6i_nh_upper_bound))
                return match;
@@ -1786,10 +1786,12 @@ struct dst_entry *ip6_route_input_lookup(struct net *net,
 EXPORT_SYMBOL_GPL(ip6_route_input_lookup);
 
 static void ip6_multipath_l3_keys(const struct sk_buff *skb,
-                                 struct flow_keys *keys)
+                                 struct flow_keys *keys,
+                                 struct flow_keys *flkeys)
 {
        const struct ipv6hdr *outer_iph = ipv6_hdr(skb);
        const struct ipv6hdr *key_iph = outer_iph;
+       struct flow_keys *_flkeys = flkeys;
        const struct ipv6hdr *inner_iph;
        const struct icmp6hdr *icmph;
        struct ipv6hdr _inner_iph;
@@ -1811,22 +1813,31 @@ static void ip6_multipath_l3_keys(const struct sk_buff *skb,
                goto out;
 
        key_iph = inner_iph;
+       _flkeys = NULL;
 out:
        memset(keys, 0, sizeof(*keys));
        keys->control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
-       keys->addrs.v6addrs.src = key_iph->saddr;
-       keys->addrs.v6addrs.dst = key_iph->daddr;
-       keys->tags.flow_label = ip6_flowinfo(key_iph);
-       keys->basic.ip_proto = key_iph->nexthdr;
+       if (_flkeys) {
+               keys->addrs.v6addrs.src = _flkeys->addrs.v6addrs.src;
+               keys->addrs.v6addrs.dst = _flkeys->addrs.v6addrs.dst;
+               keys->tags.flow_label = _flkeys->tags.flow_label;
+               keys->basic.ip_proto = _flkeys->basic.ip_proto;
+       } else {
+               keys->addrs.v6addrs.src = key_iph->saddr;
+               keys->addrs.v6addrs.dst = key_iph->daddr;
+               keys->tags.flow_label = ip6_flowinfo(key_iph);
+               keys->basic.ip_proto = key_iph->nexthdr;
+       }
 }
 
 /* if skb is set it will be used and fl6 can be NULL */
-u32 rt6_multipath_hash(const struct flowi6 *fl6, const struct sk_buff *skb)
+u32 rt6_multipath_hash(const struct flowi6 *fl6, const struct sk_buff *skb,
+                      struct flow_keys *flkeys)
 {
        struct flow_keys hash_keys;
 
        if (skb) {
-               ip6_multipath_l3_keys(skb, &hash_keys);
+               ip6_multipath_l3_keys(skb, &hash_keys, flkeys);
                return flow_hash_from_keys(&hash_keys) >> 1;
        }
 
@@ -1847,12 +1858,17 @@ void ip6_route_input(struct sk_buff *skb)
                .flowi6_mark = skb->mark,
                .flowi6_proto = iph->nexthdr,
        };
+       struct flow_keys *flkeys = NULL, _flkeys;
 
        tun_info = skb_tunnel_info(skb);
        if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
                fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
+
+       if (fib6_rules_early_flow_dissect(net, skb, &fl6, &_flkeys))
+               flkeys = &_flkeys;
+
        if (unlikely(fl6.flowi6_proto == IPPROTO_ICMPV6))
-               fl6.mp_hash = rt6_multipath_hash(&fl6, skb);
+               fl6.mp_hash = rt6_multipath_hash(&fl6, skb, flkeys);
        skb_dst_drop(skb);
        skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
 }
index 3a1775a..182db07 100644 (file)
@@ -1878,6 +1878,7 @@ static struct pernet_operations sit_net_ops = {
        .exit_batch = sit_exit_batch_net,
        .id   = &sit_net_id,
        .size = sizeof(struct sit_net),
+       .async = true,
 };
 
 static void __exit sit_cleanup(void)
index b15075a..16f4347 100644 (file)
@@ -196,4 +196,3 @@ void xfrm6_state_fini(void)
 {
        xfrm_state_unregister_afinfo(&xfrm6_state_afinfo);
 }
-
index f85f0d7..a967361 100644 (file)
@@ -353,6 +353,7 @@ static struct pernet_operations xfrm6_tunnel_net_ops = {
        .exit   = xfrm6_tunnel_net_exit,
        .id     = &xfrm6_tunnel_net_id,
        .size   = sizeof(struct xfrm6_tunnel_net),
+       .async  = true,
 };
 
 static int __init xfrm6_tunnel_init(void)
index 9d5649e..2c1c8b3 100644 (file)
@@ -433,6 +433,7 @@ static void kcm_proc_exit_net(struct net *net)
 static struct pernet_operations kcm_net_ops = {
        .init = kcm_proc_init_net,
        .exit = kcm_proc_exit_net,
+       .async = true,
 };
 
 int __init kcm_proc_init(void)
index 4355946..a6cd071 100644 (file)
@@ -2015,6 +2015,7 @@ static struct pernet_operations kcm_net_ops = {
        .exit = kcm_exit_net,
        .id   = &kcm_net_id,
        .size = sizeof(struct kcm_net),
+       .async = true,
 };
 
 static int __init kcm_init(void)
index 7e2e718..3ac08ab 100644 (file)
@@ -3863,6 +3863,7 @@ static struct pernet_operations pfkey_net_ops = {
        .exit = pfkey_net_exit,
        .id   = &pfkey_net_id,
        .size = sizeof(struct netns_pfkey),
+       .async = true,
 };
 
 static void __exit ipsec_pfkey_exit(void)
index 99a03c7..0c4f49a 100644 (file)
@@ -1770,6 +1770,7 @@ static struct pernet_operations pppol2tp_net_ops = {
        .init = pppol2tp_init_net,
        .exit = pppol2tp_exit_net,
        .id   = &pppol2tp_net_id,
+       .async = true,
 };
 
 /*****************************************************************************
index d625179..6a340c9 100644 (file)
@@ -604,6 +604,7 @@ static void __net_exit __ip_vs_lblc_exit(struct net *net) { }
 static struct pernet_operations ip_vs_lblc_ops = {
        .init = __ip_vs_lblc_init,
        .exit = __ip_vs_lblc_exit,
+       .async = true,
 };
 
 static int __init ip_vs_lblc_init(void)
index 84c57b6..0627881 100644 (file)
@@ -789,6 +789,7 @@ static void __net_exit __ip_vs_lblcr_exit(struct net *net) { }
 static struct pernet_operations ip_vs_lblcr_ops = {
        .init = __ip_vs_lblcr_init,
        .exit = __ip_vs_lblcr_exit,
+       .async = true,
 };
 
 static int __init ip_vs_lblcr_init(void)
index 92139a0..64b875e 100644 (file)
@@ -398,6 +398,7 @@ static struct pernet_operations synproxy_net_ops = {
        .exit           = synproxy_net_exit,
        .id             = &synproxy_net_id,
        .size           = sizeof(struct synproxy_net),
+       .async          = true,
 };
 
 static int __init synproxy_core_init(void)
index 66f5aca..db2fe09 100644 (file)
@@ -1345,6 +1345,7 @@ static struct pernet_operations hashlimit_net_ops = {
        .exit   = hashlimit_net_exit,
        .id     = &hashlimit_net_id,
        .size   = sizeof(struct hashlimit_net),
+       .async  = true,
 };
 
 static int __init hashlimit_mt_init(void)
index 6d232d1..19efdb7 100644 (file)
@@ -687,6 +687,7 @@ static struct pernet_operations recent_net_ops = {
        .exit   = recent_net_exit,
        .id     = &recent_net_id,
        .size   = sizeof(struct recent_net),
+       .async  = true,
 };
 
 static struct xt_match recent_mt_reg[] __read_mostly = {
index 7778751..9454e83 100644 (file)
@@ -342,6 +342,7 @@ static struct pernet_operations phonet_net_ops = {
        .exit = phonet_exit_net,
        .id   = &phonet_net_id,
        .size = sizeof(struct phonet_net),
+       .async = true,
 };
 
 /* Initialize Phonet devices list */
index a937f18..f712610 100644 (file)
@@ -77,6 +77,7 @@ static int rds_release(struct socket *sock)
        rds_send_drop_to(rs, NULL);
        rds_rdma_drop_keys(rs);
        rds_notify_queue_get(rs, NULL);
+       __skb_queue_purge(&rs->rs_zcookie_queue);
 
        spin_lock_bh(&rds_sock_lock);
        list_del_init(&rs->rs_item);
@@ -144,7 +145,7 @@ static int rds_getname(struct socket *sock, struct sockaddr *uaddr,
  *  -  to signal that a previously congested destination may have become
  *     uncongested
  *  -  A notification has been queued to the socket (this can be a congestion
- *     update, or a RDMA completion).
+ *     update, or a RDMA completion, or a MSG_ZEROCOPY completion).
  *
  * EPOLLOUT is asserted if there is room on the send queue. This does not mean
  * however, that the next sendmsg() call will succeed. If the application tries
@@ -178,7 +179,8 @@ static __poll_t rds_poll(struct file *file, struct socket *sock,
                spin_unlock(&rs->rs_lock);
        }
        if (!list_empty(&rs->rs_recv_queue) ||
-           !list_empty(&rs->rs_notify_queue))
+           !list_empty(&rs->rs_notify_queue) ||
+           !skb_queue_empty(&rs->rs_zcookie_queue))
                mask |= (EPOLLIN | EPOLLRDNORM);
        if (rs->rs_snd_bytes < rds_sk_sndbuf(rs))
                mask |= (EPOLLOUT | EPOLLWRNORM);
@@ -513,6 +515,7 @@ static int __rds_create(struct socket *sock, struct sock *sk, int protocol)
        INIT_LIST_HEAD(&rs->rs_recv_queue);
        INIT_LIST_HEAD(&rs->rs_notify_queue);
        INIT_LIST_HEAD(&rs->rs_cong_list);
+       skb_queue_head_init(&rs->rs_zcookie_queue);
        spin_lock_init(&rs->rs_rdma_lock);
        rs->rs_rdma_keys = RB_ROOT;
        rs->rs_rx_traces = 0;
index 6518345..116cf87 100644 (file)
@@ -58,32 +58,26 @@ EXPORT_SYMBOL_GPL(rds_message_addref);
 
 static inline bool skb_zcookie_add(struct sk_buff *skb, u32 cookie)
 {
-       struct sock_exterr_skb *serr = SKB_EXT_ERR(skb);
-       int ncookies;
-       u32 *ptr;
+       struct rds_zcopy_cookies *ck = (struct rds_zcopy_cookies *)skb->cb;
+       int ncookies = ck->num;
 
-       if (serr->ee.ee_origin != SO_EE_ORIGIN_ZCOOKIE)
+       if (ncookies == RDS_MAX_ZCOOKIES)
                return false;
-       ncookies = serr->ee.ee_data;
-       if (ncookies == SO_EE_ORIGIN_MAX_ZCOOKIES)
-               return false;
-       ptr = skb_put(skb, sizeof(u32));
-       *ptr = cookie;
-       serr->ee.ee_data = ++ncookies;
+       ck->cookies[ncookies] = cookie;
+       ck->num =  ++ncookies;
        return true;
 }
 
 static void rds_rm_zerocopy_callback(struct rds_sock *rs,
                                     struct rds_znotifier *znotif)
 {
-       struct sock *sk = rds_rs_to_sk(rs);
        struct sk_buff *skb, *tail;
-       struct sock_exterr_skb *serr;
        unsigned long flags;
        struct sk_buff_head *q;
        u32 cookie = znotif->z_cookie;
+       struct rds_zcopy_cookies *ck;
 
-       q = &sk->sk_error_queue;
+       q = &rs->rs_zcookie_queue;
        spin_lock_irqsave(&q->lock, flags);
        tail = skb_peek_tail(q);
 
@@ -91,22 +85,19 @@ static void rds_rm_zerocopy_callback(struct rds_sock *rs,
                spin_unlock_irqrestore(&q->lock, flags);
                mm_unaccount_pinned_pages(&znotif->z_mmp);
                consume_skb(rds_skb_from_znotifier(znotif));
-               sk->sk_error_report(sk);
+               /* caller invokes rds_wake_sk_sleep() */
                return;
        }
 
        skb = rds_skb_from_znotifier(znotif);
-       serr = SKB_EXT_ERR(skb);
-       memset(&serr->ee, 0, sizeof(serr->ee));
-       serr->ee.ee_errno = 0;
-       serr->ee.ee_origin = SO_EE_ORIGIN_ZCOOKIE;
-       serr->ee.ee_info = 0;
+       ck = (struct rds_zcopy_cookies *)skb->cb;
+       memset(ck, 0, sizeof(*ck));
        WARN_ON(!skb_zcookie_add(skb, cookie));
 
        __skb_queue_tail(q, skb);
 
        spin_unlock_irqrestore(&q->lock, flags);
-       sk->sk_error_report(sk);
+       /* caller invokes rds_wake_sk_sleep() */
 
        mm_unaccount_pinned_pages(&znotif->z_mmp);
 }
@@ -129,6 +120,7 @@ static void rds_message_purge(struct rds_message *rm)
                if (rm->data.op_mmp_znotifier) {
                        zcopy = true;
                        rds_rm_zerocopy_callback(rs, rm->data.op_mmp_znotifier);
+                       rds_wake_sk_sleep(rs);
                        rm->data.op_mmp_znotifier = NULL;
                }
                sock_put(rds_rs_to_sk(rs));
@@ -362,10 +354,12 @@ int rds_message_copy_from_user(struct rds_message *rm, struct iov_iter *from,
                int total_copied = 0;
                struct sk_buff *skb;
 
-               skb = alloc_skb(SO_EE_ORIGIN_MAX_ZCOOKIES * sizeof(u32),
-                               GFP_KERNEL);
+               skb = alloc_skb(0, GFP_KERNEL);
                if (!skb)
                        return -ENOMEM;
+               BUILD_BUG_ON(sizeof(skb->cb) <
+                            max_t(int, sizeof(struct rds_znotifier),
+                                  sizeof(struct rds_zcopy_cookies)));
                rm->data.op_mmp_znotifier = RDS_ZCOPY_SKB(skb);
                if (mm_account_pinned_pages(&rm->data.op_mmp_znotifier->z_mmp,
                                            length)) {
index 31cd388..33b1635 100644 (file)
@@ -603,6 +603,8 @@ struct rds_sock {
        /* Socket receive path trace points*/
        u8                      rs_rx_traces;
        u8                      rs_rx_trace[RDS_MSG_RX_DGRAM_TRACE_MAX];
+
+       struct sk_buff_head     rs_zcookie_queue;
 };
 
 static inline struct rds_sock *rds_sk_to_rs(const struct sock *sk)
index b080961..d507477 100644 (file)
@@ -577,6 +577,32 @@ out:
        return ret;
 }
 
+static bool rds_recvmsg_zcookie(struct rds_sock *rs, struct msghdr *msg)
+{
+       struct sk_buff *skb;
+       struct sk_buff_head *q = &rs->rs_zcookie_queue;
+       struct rds_zcopy_cookies *done;
+
+       if (!msg->msg_control)
+               return false;
+
+       if (!sock_flag(rds_rs_to_sk(rs), SOCK_ZEROCOPY) ||
+           msg->msg_controllen < CMSG_SPACE(sizeof(*done)))
+               return false;
+
+       skb = skb_dequeue(q);
+       if (!skb)
+               return false;
+       done = (struct rds_zcopy_cookies *)skb->cb;
+       if (put_cmsg(msg, SOL_RDS, RDS_CMSG_ZCOPY_COMPLETION, sizeof(*done),
+                    done)) {
+               skb_queue_head(q, skb);
+               return false;
+       }
+       consume_skb(skb);
+       return true;
+}
+
 int rds_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
                int msg_flags)
 {
@@ -611,7 +637,9 @@ int rds_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
 
                if (!rds_next_incoming(rs, &inc)) {
                        if (nonblock) {
-                               ret = -EAGAIN;
+                               bool reaped = rds_recvmsg_zcookie(rs, msg);
+
+                               ret = reaped ?  0 : -EAGAIN;
                                break;
                        }
 
@@ -660,6 +688,7 @@ int rds_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
                        ret = -EFAULT;
                        goto out;
                }
+               rds_recvmsg_zcookie(rs, msg);
 
                rds_stats_inc(s_recv_delivered);
 
index cb3c5d4..da72e0c 100644 (file)
@@ -413,6 +413,7 @@ static struct pernet_operations bpf_net_ops = {
        .exit_batch = bpf_exit_net,
        .id   = &bpf_net_id,
        .size = sizeof(struct tc_action_net),
+       .async = true,
 };
 
 static int __init bpf_init_module(void)
index e4b880f..371e5e4 100644 (file)
@@ -222,6 +222,7 @@ static struct pernet_operations connmark_net_ops = {
        .exit_batch = connmark_exit_net,
        .id   = &connmark_net_id,
        .size = sizeof(struct tc_action_net),
+       .async = true,
 };
 
 static int __init connmark_init_module(void)
index d5c2e52..1fb1f1f 100644 (file)
@@ -677,6 +677,7 @@ static struct pernet_operations csum_net_ops = {
        .exit_batch = csum_exit_net,
        .id   = &csum_net_id,
        .size = sizeof(struct tc_action_net),
+       .async = true,
 };
 
 MODULE_DESCRIPTION("Checksum updating actions");
index f072bcf..7456325 100644 (file)
@@ -247,6 +247,7 @@ static struct pernet_operations gact_net_ops = {
        .exit_batch = gact_exit_net,
        .id   = &gact_net_id,
        .size = sizeof(struct tc_action_net),
+       .async = true,
 };
 
 MODULE_AUTHOR("Jamal Hadi Salim(2002-4)");
index a5994cf..555b1ca 100644 (file)
@@ -870,6 +870,7 @@ static struct pernet_operations ife_net_ops = {
        .exit_batch = ife_exit_net,
        .id   = &ife_net_id,
        .size = sizeof(struct tc_action_net),
+       .async = true,
 };
 
 static int __init ife_init_module(void)
index 9784629..1086671 100644 (file)
@@ -349,6 +349,7 @@ static struct pernet_operations ipt_net_ops = {
        .exit_batch = ipt_exit_net,
        .id   = &ipt_net_id,
        .size = sizeof(struct tc_action_net),
+       .async = true,
 };
 
 static int tcf_xt_walker(struct net *net, struct sk_buff *skb,
@@ -399,6 +400,7 @@ static struct pernet_operations xt_net_ops = {
        .exit_batch = xt_exit_net,
        .id   = &xt_net_id,
        .size = sizeof(struct tc_action_net),
+       .async = true,
 };
 
 MODULE_AUTHOR("Jamal Hadi Salim(2002-13)");
index fd34015..64c8657 100644 (file)
@@ -353,6 +353,7 @@ static struct pernet_operations mirred_net_ops = {
        .exit_batch = mirred_exit_net,
        .id   = &mirred_net_id,
        .size = sizeof(struct tc_action_net),
+       .async = true,
 };
 
 MODULE_AUTHOR("Jamal Hadi Salim(2002)");
index 4b5848b..b1bc757 100644 (file)
@@ -323,6 +323,7 @@ static struct pernet_operations nat_net_ops = {
        .exit_batch = nat_exit_net,
        .id   = &nat_net_id,
        .size = sizeof(struct tc_action_net),
+       .async = true,
 };
 
 MODULE_DESCRIPTION("Stateless NAT actions");
index 094303c..5e8cc8f 100644 (file)
@@ -465,6 +465,7 @@ static struct pernet_operations pedit_net_ops = {
        .exit_batch = pedit_exit_net,
        .id   = &pedit_net_id,
        .size = sizeof(struct tc_action_net),
+       .async = true,
 };
 
 MODULE_AUTHOR("Jamal Hadi Salim(2002-4)");
index ff55bd6..51fe4fe 100644 (file)
@@ -347,6 +347,7 @@ static struct pernet_operations police_net_ops = {
        .exit_batch = police_exit_net,
        .id   = &police_net_id,
        .size = sizeof(struct tc_action_net),
+       .async = true,
 };
 
 static int __init police_init_module(void)
index 9765145..238dfd2 100644 (file)
@@ -248,6 +248,7 @@ static struct pernet_operations sample_net_ops = {
        .exit_batch = sample_exit_net,
        .id   = &sample_net_id,
        .size = sizeof(struct tc_action_net),
+       .async = true,
 };
 
 static int __init sample_init_module(void)
index 8244e22..91816d7 100644 (file)
@@ -216,6 +216,7 @@ static struct pernet_operations simp_net_ops = {
        .exit_batch = simp_exit_net,
        .id   = &simp_net_id,
        .size = sizeof(struct tc_action_net),
+       .async = true,
 };
 
 MODULE_AUTHOR("Jamal Hadi Salim(2005)");
index ddf69fc..7971510 100644 (file)
@@ -253,6 +253,7 @@ static struct pernet_operations skbedit_net_ops = {
        .exit_batch = skbedit_exit_net,
        .id   = &skbedit_net_id,
        .size = sizeof(struct tc_action_net),
+       .async = true,
 };
 
 MODULE_AUTHOR("Alexander Duyck, <alexander.h.duyck@intel.com>");
index a406f19..febec75 100644 (file)
@@ -278,6 +278,7 @@ static struct pernet_operations skbmod_net_ops = {
        .exit_batch = skbmod_exit_net,
        .id   = &skbmod_net_id,
        .size = sizeof(struct tc_action_net),
+       .async = true,
 };
 
 MODULE_AUTHOR("Jamal Hadi Salim, <jhs@mojatatu.com>");
index 41ff9d0..9169b7e 100644 (file)
@@ -337,6 +337,7 @@ static struct pernet_operations tunnel_key_net_ops = {
        .exit_batch = tunnel_key_exit_net,
        .id   = &tunnel_key_net_id,
        .size = sizeof(struct tc_action_net),
+       .async = true,
 };
 
 static int __init tunnel_key_init_module(void)
index 71411a2..c2ee7fd 100644 (file)
@@ -313,6 +313,7 @@ static struct pernet_operations vlan_net_ops = {
        .exit_batch = vlan_exit_net,
        .id   = &vlan_net_id,
        .size = sizeof(struct tc_action_net),
+       .async = true,
 };
 
 static int __init vlan_init_module(void)
index 9d1a8bb..19f9f42 100644 (file)
@@ -1618,6 +1618,7 @@ static struct pernet_operations tcf_net_ops = {
        .exit = tcf_net_exit,
        .id   = &tcf_net_id,
        .size = sizeof(struct tcf_net),
+       .async = true,
 };
 
 static int __init tc_filter_init(void)
index 27e672c..68f9d94 100644 (file)
@@ -739,6 +739,7 @@ static u32 qdisc_alloc_handle(struct net_device *dev)
 void qdisc_tree_reduce_backlog(struct Qdisc *sch, unsigned int n,
                               unsigned int len)
 {
+       bool qdisc_is_offloaded = sch->flags & TCQ_F_OFFLOADED;
        const struct Qdisc_class_ops *cops;
        unsigned long cl;
        u32 parentid;
@@ -760,8 +761,12 @@ void qdisc_tree_reduce_backlog(struct Qdisc *sch, unsigned int n,
                 * If child was empty even before update then backlog
                 * counter is screwed and we skip notification because
                 * parent class is already passive.
+                *
+                * If the original child was offloaded then it is allowed
+                * to be seem as empty, so the parent is notified anyway.
                 */
-               notify = !sch->q.qlen && !WARN_ON_ONCE(!n);
+               notify = !sch->q.qlen && !WARN_ON_ONCE(!n &&
+                                                      !qdisc_is_offloaded);
                /* TODO: perform the search on a per txq basis */
                sch = qdisc_lookup(qdisc_dev(sch), TC_H_MAJ(parentid));
                if (sch == NULL) {
index efbf51f..222e53d 100644 (file)
@@ -142,9 +142,8 @@ prio_reset(struct Qdisc *sch)
        sch->q.qlen = 0;
 }
 
-static int prio_offload(struct Qdisc *sch, bool enable)
+static int prio_offload(struct Qdisc *sch, struct tc_prio_qopt *qopt)
 {
-       struct prio_sched_data *q = qdisc_priv(sch);
        struct net_device *dev = qdisc_dev(sch);
        struct tc_prio_qopt_offload opt = {
                .handle = sch->handle,
@@ -154,10 +153,10 @@ static int prio_offload(struct Qdisc *sch, bool enable)
        if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
                return -EOPNOTSUPP;
 
-       if (enable) {
+       if (qopt) {
                opt.command = TC_PRIO_REPLACE;
-               opt.replace_params.bands = q->bands;
-               memcpy(&opt.replace_params.priomap, q->prio2band,
+               opt.replace_params.bands = qopt->bands;
+               memcpy(&opt.replace_params.priomap, qopt->priomap,
                       TC_PRIO_MAX + 1);
                opt.replace_params.qstats = &sch->qstats;
        } else {
@@ -174,7 +173,7 @@ prio_destroy(struct Qdisc *sch)
        struct prio_sched_data *q = qdisc_priv(sch);
 
        tcf_block_put(q->block);
-       prio_offload(sch, false);
+       prio_offload(sch, NULL);
        for (prio = 0; prio < q->bands; prio++)
                qdisc_destroy(q->queues[prio]);
 }
@@ -211,6 +210,7 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt,
                }
        }
 
+       prio_offload(sch, qopt);
        sch_tree_lock(sch);
        q->bands = qopt->bands;
        memcpy(q->prio2band, qopt->priomap, TC_PRIO_MAX+1);
@@ -230,7 +230,6 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt,
        }
 
        sch_tree_unlock(sch);
-       prio_offload(sch, true);
        return 0;
 }
 
@@ -309,12 +308,44 @@ static int prio_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
                      struct Qdisc **old, struct netlink_ext_ack *extack)
 {
        struct prio_sched_data *q = qdisc_priv(sch);
+       struct tc_prio_qopt_offload graft_offload;
+       struct net_device *dev = qdisc_dev(sch);
        unsigned long band = arg - 1;
+       bool any_qdisc_is_offloaded;
+       int err;
 
        if (new == NULL)
                new = &noop_qdisc;
 
        *old = qdisc_replace(sch, new, &q->queues[band]);
+
+       if (!tc_can_offload(dev))
+               return 0;
+
+       graft_offload.handle = sch->handle;
+       graft_offload.parent = sch->parent;
+       graft_offload.graft_params.band = band;
+       graft_offload.graft_params.child_handle = new->handle;
+       graft_offload.command = TC_PRIO_GRAFT;
+
+       err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_PRIO,
+                                           &graft_offload);
+
+       /* Don't report error if the graft is part of destroy operation. */
+       if (err && new != &noop_qdisc) {
+               /* Don't report error if the parent, the old child and the new
+                * one are not offloaded.
+                */
+               any_qdisc_is_offloaded = sch->flags & TCQ_F_OFFLOADED;
+               any_qdisc_is_offloaded |= new->flags & TCQ_F_OFFLOADED;
+               if (*old)
+                       any_qdisc_is_offloaded |= (*old)->flags &
+                                                  TCQ_F_OFFLOADED;
+
+               if (any_qdisc_is_offloaded)
+                       NL_SET_ERR_MSG(extack, "Offloading graft operation failed.");
+       }
+
        return 0;
 }
 
index 38ae22b..26684e0 100644 (file)
@@ -7,7 +7,6 @@
  *  applicable with RoCE-cards only
  *
  *  Initial restrictions:
- *    - non-blocking connect postponed
  *    - IPv6 support postponed
  *    - support for alternate links postponed
  *    - partial support for non-blocking sockets only
@@ -24,7 +23,6 @@
 
 #include <linux/module.h>
 #include <linux/socket.h>
-#include <linux/inetdevice.h>
 #include <linux/workqueue.h>
 #include <linux/in.h>
 #include <linux/sched/signal.h>
@@ -273,46 +271,7 @@ static void smc_copy_sock_settings_to_smc(struct smc_sock *smc)
        smc_copy_sock_settings(&smc->sk, smc->clcsock->sk, SK_FLAGS_CLC_TO_SMC);
 }
 
-/* determine subnet and mask of internal TCP socket */
-int smc_netinfo_by_tcpsk(struct socket *clcsock,
-                        __be32 *subnet, u8 *prefix_len)
-{
-       struct dst_entry *dst = sk_dst_get(clcsock->sk);
-       struct in_device *in_dev;
-       struct sockaddr_in addr;
-       int rc = -ENOENT;
-
-       if (!dst) {
-               rc = -ENOTCONN;
-               goto out;
-       }
-       if (!dst->dev) {
-               rc = -ENODEV;
-               goto out_rel;
-       }
-
-       /* get address to which the internal TCP socket is bound */
-       kernel_getsockname(clcsock, (struct sockaddr *)&addr);
-       /* analyze IPv4 specific data of net_device belonging to TCP socket */
-       rcu_read_lock();
-       in_dev = __in_dev_get_rcu(dst->dev);
-       for_ifa(in_dev) {
-               if (!inet_ifa_match(addr.sin_addr.s_addr, ifa))
-                       continue;
-               *prefix_len = inet_mask_len(ifa->ifa_mask);
-               *subnet = ifa->ifa_address & ifa->ifa_mask;
-               rc = 0;
-               break;
-       } endfor_ifa(in_dev);
-       rcu_read_unlock();
-
-out_rel:
-       dst_release(dst);
-out:
-       return rc;
-}
-
-static int smc_clnt_conf_first_link(struct smc_sock *smc, union ib_gid *gid)
+static int smc_clnt_conf_first_link(struct smc_sock *smc)
 {
        struct smc_link_group *lgr = smc->conn.lgr;
        struct smc_link *link;
@@ -332,6 +291,9 @@ static int smc_clnt_conf_first_link(struct smc_sock *smc, union ib_gid *gid)
                return rc;
        }
 
+       if (link->llc_confirm_rc)
+               return SMC_CLC_DECL_RMBE_EC;
+
        rc = smc_ib_modify_qp_rts(link);
        if (rc)
                return SMC_CLC_DECL_INTERR;
@@ -346,11 +308,33 @@ static int smc_clnt_conf_first_link(struct smc_sock *smc, union ib_gid *gid)
        /* send CONFIRM LINK response over RoCE fabric */
        rc = smc_llc_send_confirm_link(link,
                                       link->smcibdev->mac[link->ibport - 1],
-                                      gid, SMC_LLC_RESP);
+                                      &link->smcibdev->gid[link->ibport - 1],
+                                      SMC_LLC_RESP);
        if (rc < 0)
                return SMC_CLC_DECL_TCL;
 
-       return rc;
+       /* receive ADD LINK request from server over RoCE fabric */
+       rest = wait_for_completion_interruptible_timeout(&link->llc_add,
+                                                        SMC_LLC_WAIT_TIME);
+       if (rest <= 0) {
+               struct smc_clc_msg_decline dclc;
+
+               rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
+                                     SMC_CLC_DECLINE);
+               return rc;
+       }
+
+       /* send add link reject message, only one link supported for now */
+       rc = smc_llc_send_add_link(link,
+                                  link->smcibdev->mac[link->ibport - 1],
+                                  &link->smcibdev->gid[link->ibport - 1],
+                                  SMC_LLC_RESP);
+       if (rc < 0)
+               return SMC_CLC_DECL_TCL;
+
+       link->state = SMC_LNK_ACTIVE;
+
+       return 0;
 }
 
 static void smc_conn_save_peer_info(struct smc_sock *smc,
@@ -372,19 +356,9 @@ static void smc_link_save_peer_info(struct smc_link *link,
        link->peer_mtu = clc->qp_mtu;
 }
 
-static void smc_lgr_forget(struct smc_link_group *lgr)
-{
-       spin_lock_bh(&smc_lgr_list.lock);
-       /* do not use this link group for new connections */
-       if (!list_empty(&lgr->list))
-               list_del_init(&lgr->list);
-       spin_unlock_bh(&smc_lgr_list.lock);
-}
-
 /* setup for RDMA connection of client */
 static int smc_connect_rdma(struct smc_sock *smc)
 {
-       struct sockaddr_in *inaddr = (struct sockaddr_in *)smc->addr;
        struct smc_clc_msg_accept_confirm aclc;
        int local_contact = SMC_FIRST_CONTACT;
        struct smc_ib_device *smcibdev;
@@ -438,8 +412,8 @@ static int smc_connect_rdma(struct smc_sock *smc)
 
        srv_first_contact = aclc.hdr.flag;
        mutex_lock(&smc_create_lgr_pending);
-       local_contact = smc_conn_create(smc, inaddr->sin_addr.s_addr, smcibdev,
-                                       ibport, &aclc.lcl, srv_first_contact);
+       local_contact = smc_conn_create(smc, smcibdev, ibport, &aclc.lcl,
+                                       srv_first_contact);
        if (local_contact < 0) {
                rc = local_contact;
                if (rc == -ENOMEM)
@@ -498,8 +472,7 @@ static int smc_connect_rdma(struct smc_sock *smc)
 
        if (local_contact == SMC_FIRST_CONTACT) {
                /* QP confirmation over RoCE fabric */
-               reason_code = smc_clnt_conf_first_link(
-                       smc, &smcibdev->gid[ibport - 1]);
+               reason_code = smc_clnt_conf_first_link(smc);
                if (reason_code < 0) {
                        rc = reason_code;
                        goto out_err_unlock;
@@ -558,7 +531,6 @@ static int smc_connect(struct socket *sock, struct sockaddr *addr,
                goto out_err;
        if (addr->sa_family != AF_INET)
                goto out_err;
-       smc->addr = addr;       /* needed for nonblocking connect */
 
        lock_sock(sk);
        switch (sk->sk_state) {
@@ -748,9 +720,34 @@ static int smc_serv_conf_first_link(struct smc_sock *smc)
 
                rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
                                      SMC_CLC_DECLINE);
+               return rc;
        }
 
-       return rc;
+       if (link->llc_confirm_resp_rc)
+               return SMC_CLC_DECL_RMBE_EC;
+
+       /* send ADD LINK request to client over the RoCE fabric */
+       rc = smc_llc_send_add_link(link,
+                                  link->smcibdev->mac[link->ibport - 1],
+                                  &link->smcibdev->gid[link->ibport - 1],
+                                  SMC_LLC_REQ);
+       if (rc < 0)
+               return SMC_CLC_DECL_TCL;
+
+       /* receive ADD LINK response from client over the RoCE fabric */
+       rest = wait_for_completion_interruptible_timeout(&link->llc_add_resp,
+                                                        SMC_LLC_WAIT_TIME);
+       if (rest <= 0) {
+               struct smc_clc_msg_decline dclc;
+
+               rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
+                                     SMC_CLC_DECLINE);
+               return rc;
+       }
+
+       link->state = SMC_LNK_ACTIVE;
+
+       return 0;
 }
 
 /* setup for RDMA connection of server */
@@ -766,7 +763,6 @@ static void smc_listen_work(struct work_struct *work)
        struct sock *newsmcsk = &new_smc->sk;
        struct smc_clc_msg_proposal *pclc;
        struct smc_ib_device *smcibdev;
-       struct sockaddr_in peeraddr;
        u8 buf[SMC_CLC_MAX_LEN];
        struct smc_link *link;
        int reason_code = 0;
@@ -808,7 +804,7 @@ static void smc_listen_work(struct work_struct *work)
        }
 
        /* determine subnet and mask from internal TCP socket */
-       rc = smc_netinfo_by_tcpsk(newclcsock, &subnet, &prefix_len);
+       rc = smc_clc_netinfo_by_tcpsk(newclcsock, &subnet, &prefix_len);
        if (rc) {
                reason_code = SMC_CLC_DECL_CNFERR; /* configuration error */
                goto decline_rdma;
@@ -822,13 +818,10 @@ static void smc_listen_work(struct work_struct *work)
                goto decline_rdma;
        }
 
-       /* get address of the peer connected to the internal TCP socket */
-       kernel_getpeername(newclcsock, (struct sockaddr *)&peeraddr);
-
        /* allocate connection / link group */
        mutex_lock(&smc_create_lgr_pending);
-       local_contact = smc_conn_create(new_smc, peeraddr.sin_addr.s_addr,
-                                       smcibdev, ibport, &pclc->lcl, 0);
+       local_contact = smc_conn_create(new_smc, smcibdev, ibport, &pclc->lcl,
+                                       0);
        if (local_contact < 0) {
                rc = local_contact;
                if (rc == -ENOMEM)
index 9518986..268cdf1 100644 (file)
@@ -172,7 +172,6 @@ struct smc_sock {                           /* smc sock container */
        struct sock             sk;
        struct socket           *clcsock;       /* internal tcp socket */
        struct smc_connection   conn;           /* smc connection */
-       struct sockaddr         *addr;          /* inet connect address */
        struct smc_sock         *listen_smc;    /* listen parent */
        struct work_struct      tcp_listen_work;/* handle tcp socket accepts */
        struct work_struct      smc_listen_work;/* prepare new accept socket */
@@ -263,10 +262,8 @@ static inline bool using_ipsec(struct smc_sock *smc)
 
 struct smc_clc_msg_local;
 
-int smc_netinfo_by_tcpsk(struct socket *clcsock, __be32 *subnet,
-                        u8 *prefix_len);
 void smc_conn_free(struct smc_connection *conn);
-int smc_conn_create(struct smc_sock *smc, __be32 peer_in_addr,
+int smc_conn_create(struct smc_sock *smc,
                    struct smc_ib_device *smcibdev, u8 ibport,
                    struct smc_clc_msg_local *lcl, int srv_first_contact);
 struct sock *smc_accept_dequeue(struct sock *parent, struct socket *new_sock);
index 8ac5158..874c5a7 100644 (file)
@@ -11,6 +11,7 @@
  */
 
 #include <linux/in.h>
+#include <linux/inetdevice.h>
 #include <linux/if_ether.h>
 #include <linux/sched/signal.h>
 
@@ -22,6 +23,9 @@
 #include "smc_clc.h"
 #include "smc_ib.h"
 
+/* eye catcher "SMCR" EBCDIC for CLC messages */
+static const char SMC_EYECATCHER[4] = {'\xe2', '\xd4', '\xc3', '\xd9'};
+
 /* check if received message has a correct header length and contains valid
  * heading and trailing eyecatchers
  */
@@ -70,6 +74,45 @@ static bool smc_clc_msg_hdr_valid(struct smc_clc_msg_hdr *clcm)
        return true;
 }
 
+/* determine subnet and mask of internal TCP socket */
+int smc_clc_netinfo_by_tcpsk(struct socket *clcsock,
+                            __be32 *subnet, u8 *prefix_len)
+{
+       struct dst_entry *dst = sk_dst_get(clcsock->sk);
+       struct in_device *in_dev;
+       struct sockaddr_in addr;
+       int rc = -ENOENT;
+
+       if (!dst) {
+               rc = -ENOTCONN;
+               goto out;
+       }
+       if (!dst->dev) {
+               rc = -ENODEV;
+               goto out_rel;
+       }
+
+       /* get address to which the internal TCP socket is bound */
+       kernel_getsockname(clcsock, (struct sockaddr *)&addr);
+       /* analyze IPv4 specific data of net_device belonging to TCP socket */
+       rcu_read_lock();
+       in_dev = __in_dev_get_rcu(dst->dev);
+       for_ifa(in_dev) {
+               if (!inet_ifa_match(addr.sin_addr.s_addr, ifa))
+                       continue;
+               *prefix_len = inet_mask_len(ifa->ifa_mask);
+               *subnet = ifa->ifa_address & ifa->ifa_mask;
+               rc = 0;
+               break;
+       } endfor_ifa(in_dev);
+       rcu_read_unlock();
+
+out_rel:
+       dst_release(dst);
+out:
+       return rc;
+}
+
 /* Wait for data on the tcp-socket, analyze received data
  * Returns:
  * 0 if success and it was not a decline that we received.
@@ -211,8 +254,8 @@ int smc_clc_send_proposal(struct smc_sock *smc,
 
        memset(&pclc_prfx, 0, sizeof(pclc_prfx));
        /* determine subnet and mask from internal TCP socket */
-       rc = smc_netinfo_by_tcpsk(smc->clcsock, &pclc_prfx.outgoing_subnet,
-                                 &pclc_prfx.prefix_len);
+       rc = smc_clc_netinfo_by_tcpsk(smc->clcsock, &pclc_prfx.outgoing_subnet,
+                                     &pclc_prfx.prefix_len);
        if (rc)
                return SMC_CLC_DECL_CNFERR; /* configuration error */
        pclc_prfx.ipv6_prefixes_cnt = 0;
index c145a0f..20e048b 100644 (file)
@@ -22,9 +22,6 @@
 #define SMC_CLC_CONFIRM                0x03
 #define SMC_CLC_DECLINE                0x04
 
-/* eye catcher "SMCR" EBCDIC for CLC messages */
-static const char SMC_EYECATCHER[4] = {'\xe2', '\xd4', '\xc3', '\xd9'};
-
 #define SMC_CLC_V1             0x1             /* SMC version                */
 #define CLC_WAIT_TIME          (6 * HZ)        /* max. wait time on clcsock  */
 #define SMC_CLC_DECL_MEM       0x01010000  /* insufficient memory resources  */
@@ -36,6 +33,7 @@ static const char SMC_EYECATCHER[4] = {'\xe2', '\xd4', '\xc3', '\xd9'};
 #define SMC_CLC_DECL_INTERR    0x99990000  /* internal error                 */
 #define SMC_CLC_DECL_TCL       0x02040000  /* timeout w4 QP confirm          */
 #define SMC_CLC_DECL_SEND      0x07000000  /* sending problem                */
+#define SMC_CLC_DECL_RMBE_EC   0x08000000  /* peer has eyecatcher in RMBE    */
 
 struct smc_clc_msg_hdr {       /* header1 of clc messages */
        u8 eyecatcher[4];       /* eye catcher */
@@ -124,9 +122,8 @@ smc_clc_proposal_get_prefix(struct smc_clc_msg_proposal *pclc)
               ((u8 *)pclc + sizeof(*pclc) + ntohs(pclc->iparea_offset));
 }
 
-struct smc_sock;
-struct smc_ib_device;
-
+int smc_clc_netinfo_by_tcpsk(struct socket *clcsock, __be32 *subnet,
+                            u8 *prefix_len);
 int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen,
                     u8 expected_type);
 int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info);
index 2424c71..702ce5f 100644 (file)
@@ -144,7 +144,7 @@ free:
 }
 
 /* create a new SMC link group */
-static int smc_lgr_create(struct smc_sock *smc, __be32 peer_in_addr,
+static int smc_lgr_create(struct smc_sock *smc,
                          struct smc_ib_device *smcibdev, u8 ibport,
                          char *peer_systemid, unsigned short vlan_id)
 {
@@ -161,7 +161,6 @@ static int smc_lgr_create(struct smc_sock *smc, __be32 peer_in_addr,
        }
        lgr->role = smc->listen_smc ? SMC_SERV : SMC_CLNT;
        lgr->sync_err = false;
-       lgr->daddr = peer_in_addr;
        memcpy(lgr->peer_systemid, peer_systemid, SMC_SYSTEMID_LEN);
        lgr->vlan_id = vlan_id;
        rwlock_init(&lgr->sndbufs_lock);
@@ -177,6 +176,7 @@ static int smc_lgr_create(struct smc_sock *smc, __be32 peer_in_addr,
 
        lnk = &lgr->lnk[SMC_SINGLE_LINK];
        /* initialize link */
+       lnk->state = SMC_LNK_ACTIVATING;
        lnk->smcibdev = smcibdev;
        lnk->ibport = ibport;
        lnk->path_mtu = smcibdev->pattr[ibport - 1].active_mtu;
@@ -198,6 +198,8 @@ static int smc_lgr_create(struct smc_sock *smc, __be32 peer_in_addr,
                goto destroy_qp;
        init_completion(&lnk->llc_confirm);
        init_completion(&lnk->llc_confirm_resp);
+       init_completion(&lnk->llc_add);
+       init_completion(&lnk->llc_add_resp);
 
        smc->conn.lgr = lgr;
        rwlock_init(&lgr->conns_lock);
@@ -306,6 +308,15 @@ void smc_lgr_free(struct smc_link_group *lgr)
        kfree(lgr);
 }
 
+void smc_lgr_forget(struct smc_link_group *lgr)
+{
+       spin_lock_bh(&smc_lgr_list.lock);
+       /* do not use this link group for new connections */
+       if (!list_empty(&lgr->list))
+               list_del_init(&lgr->list);
+       spin_unlock_bh(&smc_lgr_list.lock);
+}
+
 /* terminate linkgroup abnormally */
 void smc_lgr_terminate(struct smc_link_group *lgr)
 {
@@ -313,15 +324,7 @@ void smc_lgr_terminate(struct smc_link_group *lgr)
        struct smc_sock *smc;
        struct rb_node *node;
 
-       spin_lock_bh(&smc_lgr_list.lock);
-       if (list_empty(&lgr->list)) {
-               /* termination already triggered */
-               spin_unlock_bh(&smc_lgr_list.lock);
-               return;
-       }
-       /* do not use this link group for new connections */
-       list_del_init(&lgr->list);
-       spin_unlock_bh(&smc_lgr_list.lock);
+       smc_lgr_forget(lgr);
 
        write_lock_bh(&lgr->conns_lock);
        node = rb_first(&lgr->conns_all);
@@ -400,7 +403,7 @@ static int smc_link_determine_gid(struct smc_link_group *lgr)
 }
 
 /* create a new SMC connection (and a new link group if necessary) */
-int smc_conn_create(struct smc_sock *smc, __be32 peer_in_addr,
+int smc_conn_create(struct smc_sock *smc,
                    struct smc_ib_device *smcibdev, u8 ibport,
                    struct smc_clc_msg_local *lcl, int srv_first_contact)
 {
@@ -457,7 +460,7 @@ int smc_conn_create(struct smc_sock *smc, __be32 peer_in_addr,
 
 create:
        if (local_contact == SMC_FIRST_CONTACT) {
-               rc = smc_lgr_create(smc, peer_in_addr, smcibdev, ibport,
+               rc = smc_lgr_create(smc, smcibdev, ibport,
                                    lcl->id_for_peer, vlan_id);
                if (rc)
                        goto out;
@@ -698,27 +701,55 @@ static inline int smc_rmb_reserve_rtoken_idx(struct smc_link_group *lgr)
        return -ENOSPC;
 }
 
-/* save rkey and dma_addr received from peer during clc handshake */
-int smc_rmb_rtoken_handling(struct smc_connection *conn,
-                           struct smc_clc_msg_accept_confirm *clc)
+/* add a new rtoken from peer */
+int smc_rtoken_add(struct smc_link_group *lgr, __be64 nw_vaddr, __be32 nw_rkey)
 {
-       u64 dma_addr = be64_to_cpu(clc->rmb_dma_addr);
-       struct smc_link_group *lgr = conn->lgr;
-       u32 rkey = ntohl(clc->rmb_rkey);
+       u64 dma_addr = be64_to_cpu(nw_vaddr);
+       u32 rkey = ntohl(nw_rkey);
        int i;
 
        for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) {
                if ((lgr->rtokens[i][SMC_SINGLE_LINK].rkey == rkey) &&
                    (lgr->rtokens[i][SMC_SINGLE_LINK].dma_addr == dma_addr) &&
                    test_bit(i, lgr->rtokens_used_mask)) {
-                       conn->rtoken_idx = i;
+                       /* already in list */
+                       return i;
+               }
+       }
+       i = smc_rmb_reserve_rtoken_idx(lgr);
+       if (i < 0)
+               return i;
+       lgr->rtokens[i][SMC_SINGLE_LINK].rkey = rkey;
+       lgr->rtokens[i][SMC_SINGLE_LINK].dma_addr = dma_addr;
+       return i;
+}
+
+/* delete an rtoken */
+int smc_rtoken_delete(struct smc_link_group *lgr, __be32 nw_rkey)
+{
+       u32 rkey = ntohl(nw_rkey);
+       int i;
+
+       for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) {
+               if (lgr->rtokens[i][SMC_SINGLE_LINK].rkey == rkey &&
+                   test_bit(i, lgr->rtokens_used_mask)) {
+                       lgr->rtokens[i][SMC_SINGLE_LINK].rkey = 0;
+                       lgr->rtokens[i][SMC_SINGLE_LINK].dma_addr = 0;
+
+                       clear_bit(i, lgr->rtokens_used_mask);
                        return 0;
                }
        }
-       conn->rtoken_idx = smc_rmb_reserve_rtoken_idx(lgr);
+       return -ENOENT;
+}
+
+/* save rkey and dma_addr received from peer during clc handshake */
+int smc_rmb_rtoken_handling(struct smc_connection *conn,
+                           struct smc_clc_msg_accept_confirm *clc)
+{
+       conn->rtoken_idx = smc_rtoken_add(conn->lgr, clc->rmb_dma_addr,
+                                         clc->rmb_rkey);
        if (conn->rtoken_idx < 0)
                return conn->rtoken_idx;
-       lgr->rtokens[conn->rtoken_idx][SMC_SINGLE_LINK].rkey = rkey;
-       lgr->rtokens[conn->rtoken_idx][SMC_SINGLE_LINK].dma_addr = dma_addr;
        return 0;
 }
index fe691bf..07e2a39 100644 (file)
@@ -32,6 +32,12 @@ enum smc_lgr_role {          /* possible roles of a link group */
        SMC_SERV        /* server */
 };
 
+enum smc_link_state {                  /* possible states of a link */
+       SMC_LNK_INACTIVE,       /* link is inactive */
+       SMC_LNK_ACTIVATING,     /* link is being activated */
+       SMC_LNK_ACTIVE          /* link is active */
+};
+
 #define SMC_WR_BUF_SIZE                48      /* size of work request buffer */
 
 struct smc_wr_buf {
@@ -87,8 +93,14 @@ struct smc_link {
        u8                      peer_mac[ETH_ALEN];     /* = gid[8:10||13:15] */
        u8                      peer_gid[sizeof(union ib_gid)]; /* gid of peer*/
        u8                      link_id;        /* unique # within link group */
+
+       enum smc_link_state     state;          /* state of link */
        struct completion       llc_confirm;    /* wait for rx of conf link */
        struct completion       llc_confirm_resp; /* wait 4 rx of cnf lnk rsp */
+       int                     llc_confirm_rc; /* rc from confirm link msg */
+       int                     llc_confirm_resp_rc; /* rc from conf_resp msg */
+       struct completion       llc_add;        /* wait for rx of add link */
+       struct completion       llc_add_resp;   /* wait for rx of add link rsp*/
 };
 
 /* For now we just allow one parallel link per link group. The SMC protocol
@@ -124,7 +136,6 @@ struct smc_rtoken {                         /* address/key of remote RMB */
 struct smc_link_group {
        struct list_head        list;
        enum smc_lgr_role       role;           /* client or server */
-       __be32                  daddr;          /* destination ip address */
        struct smc_link         lnk[SMC_LINKS_PER_LGR_MAX];     /* smc link */
        char                    peer_systemid[SMC_SYSTEMID_LEN];
                                                /* unique system_id of peer */
@@ -186,10 +197,13 @@ struct smc_sock;
 struct smc_clc_msg_accept_confirm;
 
 void smc_lgr_free(struct smc_link_group *lgr);
+void smc_lgr_forget(struct smc_link_group *lgr);
 void smc_lgr_terminate(struct smc_link_group *lgr);
 int smc_buf_create(struct smc_sock *smc);
 int smc_rmb_rtoken_handling(struct smc_connection *conn,
                            struct smc_clc_msg_accept_confirm *clc);
+int smc_rtoken_add(struct smc_link_group *lgr, __be64 nw_vaddr, __be32 nw_rkey);
+int smc_rtoken_delete(struct smc_link_group *lgr, __be32 nw_rkey);
 void smc_sndbuf_sync_sg_for_cpu(struct smc_connection *conn);
 void smc_sndbuf_sync_sg_for_device(struct smc_connection *conn);
 void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn);
index 92fe4cc..54e8d6d 100644 (file)
@@ -4,9 +4,6 @@
  *
  *  Link Layer Control (LLC)
  *
- *  For now, we only support the necessary "confirm link" functionality
- *  which happens for the first RoCE link after successful CLC handshake.
- *
  *  Copyright IBM Corp. 2016
  *
  *  Author(s):  Klaus Wacker <Klaus.Wacker@de.ibm.com>
 #include "smc_clc.h"
 #include "smc_llc.h"
 
+#define SMC_LLC_DATA_LEN               40
+
+struct smc_llc_hdr {
+       struct smc_wr_rx_hdr common;
+       u8 length;      /* 44 */
+#if defined(__BIG_ENDIAN_BITFIELD)
+       u8 reserved:4,
+          add_link_rej_rsn:4;
+#elif defined(__LITTLE_ENDIAN_BITFIELD)
+       u8 add_link_rej_rsn:4,
+          reserved:4;
+#endif
+       u8 flags;
+};
+
+#define SMC_LLC_FLAG_NO_RMBE_EYEC      0x03
+
+struct smc_llc_msg_confirm_link {      /* type 0x01 */
+       struct smc_llc_hdr hd;
+       u8 sender_mac[ETH_ALEN];
+       u8 sender_gid[SMC_GID_SIZE];
+       u8 sender_qp_num[3];
+       u8 link_num;
+       u8 link_uid[SMC_LGR_ID_SIZE];
+       u8 max_links;
+       u8 reserved[9];
+};
+
+#define SMC_LLC_FLAG_ADD_LNK_REJ       0x40
+#define SMC_LLC_REJ_RSN_NO_ALT_PATH    1
+
+#define SMC_LLC_ADD_LNK_MAX_LINKS      2
+
+struct smc_llc_msg_add_link {          /* type 0x02 */
+       struct smc_llc_hdr hd;
+       u8 sender_mac[ETH_ALEN];
+       u8 reserved2[2];
+       u8 sender_gid[SMC_GID_SIZE];
+       u8 sender_qp_num[3];
+       u8 link_num;
+       u8 flags2;      /* QP mtu */
+       u8 initial_psn[3];
+       u8 reserved[8];
+};
+
+#define SMC_LLC_FLAG_DEL_LINK_ALL      0x40
+#define SMC_LLC_FLAG_DEL_LINK_ORDERLY  0x20
+
+struct smc_llc_msg_del_link {          /* type 0x04 */
+       struct smc_llc_hdr hd;
+       u8 link_num;
+       __be32 reason;
+       u8 reserved[35];
+} __packed;                    /* format defined in RFC7609 */
+
+struct smc_llc_msg_test_link {         /* type 0x07 */
+       struct smc_llc_hdr hd;
+       u8 user_data[16];
+       u8 reserved[24];
+};
+
+struct smc_rmb_rtoken {
+       union {
+               u8 num_rkeys;   /* first rtoken byte of CONFIRM LINK msg */
+                               /* is actually the num of rtokens, first */
+                               /* rtoken is always for the current link */
+               u8 link_id;     /* link id of the rtoken */
+       };
+       __be32 rmb_key;
+       __be64 rmb_vaddr;
+} __packed;                    /* format defined in RFC7609 */
+
+#define SMC_LLC_RKEYS_PER_MSG  3
+
+struct smc_llc_msg_confirm_rkey {      /* type 0x06 */
+       struct smc_llc_hdr hd;
+       struct smc_rmb_rtoken rtoken[SMC_LLC_RKEYS_PER_MSG];
+       u8 reserved;
+};
+
+struct smc_llc_msg_confirm_rkey_cont { /* type 0x08 */
+       struct smc_llc_hdr hd;
+       u8 num_rkeys;
+       struct smc_rmb_rtoken rtoken[SMC_LLC_RKEYS_PER_MSG];
+};
+
+#define SMC_LLC_DEL_RKEY_MAX   8
+#define SMC_LLC_FLAG_RKEY_NEG  0x20
+
+struct smc_llc_msg_delete_rkey {       /* type 0x09 */
+       struct smc_llc_hdr hd;
+       u8 num_rkeys;
+       u8 err_mask;
+       u8 reserved[2];
+       __be32 rkey[8];
+       u8 reserved2[4];
+};
+
+union smc_llc_msg {
+       struct smc_llc_msg_confirm_link confirm_link;
+       struct smc_llc_msg_add_link add_link;
+       struct smc_llc_msg_del_link delete_link;
+
+       struct smc_llc_msg_confirm_rkey confirm_rkey;
+       struct smc_llc_msg_confirm_rkey_cont confirm_rkey_cont;
+       struct smc_llc_msg_delete_rkey delete_rkey;
+
+       struct smc_llc_msg_test_link test_link;
+       struct {
+               struct smc_llc_hdr hdr;
+               u8 data[SMC_LLC_DATA_LEN];
+       } raw;
+};
+
+#define SMC_LLC_FLAG_RESP              0x80
+
 /********************************** send *************************************/
 
 struct smc_llc_tx_pend {
@@ -87,6 +200,7 @@ int smc_llc_send_confirm_link(struct smc_link *link, u8 mac[],
        memset(confllc, 0, sizeof(*confllc));
        confllc->hd.common.type = SMC_LLC_CONFIRM_LINK;
        confllc->hd.length = sizeof(struct smc_llc_msg_confirm_link);
+       confllc->hd.flags |= SMC_LLC_FLAG_NO_RMBE_EYEC;
        if (reqresp == SMC_LLC_RESP)
                confllc->hd.flags |= SMC_LLC_FLAG_RESP;
        memcpy(confllc->sender_mac, mac, ETH_ALEN);
@@ -94,7 +208,104 @@ int smc_llc_send_confirm_link(struct smc_link *link, u8 mac[],
        hton24(confllc->sender_qp_num, link->roce_qp->qp_num);
        /* confllc->link_num = SMC_SINGLE_LINK; already done by memset above */
        memcpy(confllc->link_uid, lgr->id, SMC_LGR_ID_SIZE);
-       confllc->max_links = SMC_LINKS_PER_LGR_MAX;
+       confllc->max_links = SMC_LLC_ADD_LNK_MAX_LINKS; /* enforce peer resp. */
+       /* send llc message */
+       rc = smc_wr_tx_send(link, pend);
+       return rc;
+}
+
+/* send ADD LINK request or response */
+int smc_llc_send_add_link(struct smc_link *link, u8 mac[],
+                         union ib_gid *gid,
+                         enum smc_llc_reqresp reqresp)
+{
+       struct smc_llc_msg_add_link *addllc;
+       struct smc_wr_tx_pend_priv *pend;
+       struct smc_wr_buf *wr_buf;
+       int rc;
+
+       rc = smc_llc_add_pending_send(link, &wr_buf, &pend);
+       if (rc)
+               return rc;
+       addllc = (struct smc_llc_msg_add_link *)wr_buf;
+       memset(addllc, 0, sizeof(*addllc));
+       addllc->hd.common.type = SMC_LLC_ADD_LINK;
+       addllc->hd.length = sizeof(struct smc_llc_msg_add_link);
+       if (reqresp == SMC_LLC_RESP) {
+               addllc->hd.flags |= SMC_LLC_FLAG_RESP;
+               /* always reject more links for now */
+               addllc->hd.flags |= SMC_LLC_FLAG_ADD_LNK_REJ;
+               addllc->hd.add_link_rej_rsn = SMC_LLC_REJ_RSN_NO_ALT_PATH;
+       }
+       memcpy(addllc->sender_mac, mac, ETH_ALEN);
+       memcpy(addllc->sender_gid, gid, SMC_GID_SIZE);
+       /* send llc message */
+       rc = smc_wr_tx_send(link, pend);
+       return rc;
+}
+
+/* send DELETE LINK request or response */
+int smc_llc_send_delete_link(struct smc_link *link,
+                            enum smc_llc_reqresp reqresp)
+{
+       struct smc_llc_msg_del_link *delllc;
+       struct smc_wr_tx_pend_priv *pend;
+       struct smc_wr_buf *wr_buf;
+       int rc;
+
+       rc = smc_llc_add_pending_send(link, &wr_buf, &pend);
+       if (rc)
+               return rc;
+       delllc = (struct smc_llc_msg_del_link *)wr_buf;
+       memset(delllc, 0, sizeof(*delllc));
+       delllc->hd.common.type = SMC_LLC_DELETE_LINK;
+       delllc->hd.length = sizeof(struct smc_llc_msg_add_link);
+       if (reqresp == SMC_LLC_RESP)
+               delllc->hd.flags |= SMC_LLC_FLAG_RESP;
+       /* DEL_LINK_ALL because only 1 link supported */
+       delllc->hd.flags |= SMC_LLC_FLAG_DEL_LINK_ALL;
+       delllc->hd.flags |= SMC_LLC_FLAG_DEL_LINK_ORDERLY;
+       delllc->link_num = link->link_id;
+       /* send llc message */
+       rc = smc_wr_tx_send(link, pend);
+       return rc;
+}
+
+/* send LLC test link request or response */
+int smc_llc_send_test_link(struct smc_link *link, u8 user_data[16],
+                          enum smc_llc_reqresp reqresp)
+{
+       struct smc_llc_msg_test_link *testllc;
+       struct smc_wr_tx_pend_priv *pend;
+       struct smc_wr_buf *wr_buf;
+       int rc;
+
+       rc = smc_llc_add_pending_send(link, &wr_buf, &pend);
+       if (rc)
+               return rc;
+       testllc = (struct smc_llc_msg_test_link *)wr_buf;
+       memset(testllc, 0, sizeof(*testllc));
+       testllc->hd.common.type = SMC_LLC_TEST_LINK;
+       testllc->hd.length = sizeof(struct smc_llc_msg_test_link);
+       if (reqresp == SMC_LLC_RESP)
+               testllc->hd.flags |= SMC_LLC_FLAG_RESP;
+       memcpy(testllc->user_data, user_data, sizeof(testllc->user_data));
+       /* send llc message */
+       rc = smc_wr_tx_send(link, pend);
+       return rc;
+}
+
+/* send a prepared message */
+static int smc_llc_send_message(struct smc_link *link, void *llcbuf, int llclen)
+{
+       struct smc_wr_tx_pend_priv *pend;
+       struct smc_wr_buf *wr_buf;
+       int rc;
+
+       rc = smc_llc_add_pending_send(link, &wr_buf, &pend);
+       if (rc)
+               return rc;
+       memcpy(wr_buf, llcbuf, llclen);
        /* send llc message */
        rc = smc_wr_tx_send(link, pend);
        return rc;
@@ -106,19 +317,156 @@ static void smc_llc_rx_confirm_link(struct smc_link *link,
                                    struct smc_llc_msg_confirm_link *llc)
 {
        struct smc_link_group *lgr;
+       int conf_rc;
 
        lgr = container_of(link, struct smc_link_group, lnk[SMC_SINGLE_LINK]);
+
+       /* RMBE eyecatchers are not supported */
+       if (llc->hd.flags & SMC_LLC_FLAG_NO_RMBE_EYEC)
+               conf_rc = 0;
+       else
+               conf_rc = ENOTSUPP;
+
        if (llc->hd.flags & SMC_LLC_FLAG_RESP) {
-               if (lgr->role == SMC_SERV)
+               if (lgr->role == SMC_SERV &&
+                   link->state == SMC_LNK_ACTIVATING) {
+                       link->llc_confirm_resp_rc = conf_rc;
                        complete(&link->llc_confirm_resp);
+               }
        } else {
-               if (lgr->role == SMC_CLNT) {
+               if (lgr->role == SMC_CLNT &&
+                   link->state == SMC_LNK_ACTIVATING) {
+                       link->llc_confirm_rc = conf_rc;
                        link->link_id = llc->link_num;
                        complete(&link->llc_confirm);
                }
        }
 }
 
+static void smc_llc_rx_add_link(struct smc_link *link,
+                               struct smc_llc_msg_add_link *llc)
+{
+       struct smc_link_group *lgr = container_of(link, struct smc_link_group,
+                                                 lnk[SMC_SINGLE_LINK]);
+
+       if (llc->hd.flags & SMC_LLC_FLAG_RESP) {
+               if (link->state == SMC_LNK_ACTIVATING)
+                       complete(&link->llc_add_resp);
+       } else {
+               if (link->state == SMC_LNK_ACTIVATING) {
+                       complete(&link->llc_add);
+                       return;
+               }
+
+               if (lgr->role == SMC_SERV) {
+                       smc_llc_send_add_link(link,
+                                       link->smcibdev->mac[link->ibport - 1],
+                                       &link->smcibdev->gid[link->ibport - 1],
+                                       SMC_LLC_REQ);
+
+               } else {
+                       smc_llc_send_add_link(link,
+                                       link->smcibdev->mac[link->ibport - 1],
+                                       &link->smcibdev->gid[link->ibport - 1],
+                                       SMC_LLC_RESP);
+               }
+       }
+}
+
+static void smc_llc_rx_delete_link(struct smc_link *link,
+                                  struct smc_llc_msg_del_link *llc)
+{
+       struct smc_link_group *lgr = container_of(link, struct smc_link_group,
+                                                 lnk[SMC_SINGLE_LINK]);
+
+       if (llc->hd.flags & SMC_LLC_FLAG_RESP) {
+               if (lgr->role == SMC_SERV)
+                       smc_lgr_terminate(lgr);
+       } else {
+               if (lgr->role == SMC_SERV) {
+                       smc_lgr_forget(lgr);
+                       smc_llc_send_delete_link(link, SMC_LLC_REQ);
+               } else {
+                       smc_llc_send_delete_link(link, SMC_LLC_RESP);
+                       smc_lgr_terminate(lgr);
+               }
+       }
+}
+
+static void smc_llc_rx_test_link(struct smc_link *link,
+                                struct smc_llc_msg_test_link *llc)
+{
+       if (llc->hd.flags & SMC_LLC_FLAG_RESP) {
+               /* unused as long as we don't send this type of msg */
+       } else {
+               smc_llc_send_test_link(link, llc->user_data, SMC_LLC_RESP);
+       }
+}
+
+static void smc_llc_rx_confirm_rkey(struct smc_link *link,
+                                   struct smc_llc_msg_confirm_rkey *llc)
+{
+       struct smc_link_group *lgr;
+       int rc;
+
+       lgr = container_of(link, struct smc_link_group, lnk[SMC_SINGLE_LINK]);
+
+       if (llc->hd.flags & SMC_LLC_FLAG_RESP) {
+               /* unused as long as we don't send this type of msg */
+       } else {
+               rc = smc_rtoken_add(lgr,
+                                   llc->rtoken[0].rmb_vaddr,
+                                   llc->rtoken[0].rmb_key);
+
+               /* ignore rtokens for other links, we have only one link */
+
+               llc->hd.flags |= SMC_LLC_FLAG_RESP;
+               if (rc < 0)
+                       llc->hd.flags |= SMC_LLC_FLAG_RKEY_NEG;
+               smc_llc_send_message(link, (void *)llc, sizeof(*llc));
+       }
+}
+
+static void smc_llc_rx_confirm_rkey_cont(struct smc_link *link,
+                                     struct smc_llc_msg_confirm_rkey_cont *llc)
+{
+       if (llc->hd.flags & SMC_LLC_FLAG_RESP) {
+               /* unused as long as we don't send this type of msg */
+       } else {
+               /* ignore rtokens for other links, we have only one link */
+               llc->hd.flags |= SMC_LLC_FLAG_RESP;
+               smc_llc_send_message(link, (void *)llc, sizeof(*llc));
+       }
+}
+
+static void smc_llc_rx_delete_rkey(struct smc_link *link,
+                                  struct smc_llc_msg_delete_rkey *llc)
+{
+       struct smc_link_group *lgr;
+       u8 err_mask = 0;
+       int i, max;
+
+       lgr = container_of(link, struct smc_link_group, lnk[SMC_SINGLE_LINK]);
+
+       if (llc->hd.flags & SMC_LLC_FLAG_RESP) {
+               /* unused as long as we don't send this type of msg */
+       } else {
+               max = min_t(u8, llc->num_rkeys, SMC_LLC_DEL_RKEY_MAX);
+               for (i = 0; i < max; i++) {
+                       if (smc_rtoken_delete(lgr, llc->rkey[i]))
+                               err_mask |= 1 << (SMC_LLC_DEL_RKEY_MAX - 1 - i);
+               }
+
+               if (err_mask) {
+                       llc->hd.flags |= SMC_LLC_FLAG_RKEY_NEG;
+                       llc->err_mask = err_mask;
+               }
+
+               llc->hd.flags |= SMC_LLC_FLAG_RESP;
+               smc_llc_send_message(link, (void *)llc, sizeof(*llc));
+       }
+}
+
 static void smc_llc_rx_handler(struct ib_wc *wc, void *buf)
 {
        struct smc_link *link = (struct smc_link *)wc->qp->qp_context;
@@ -128,8 +476,30 @@ static void smc_llc_rx_handler(struct ib_wc *wc, void *buf)
                return; /* short message */
        if (llc->raw.hdr.length != sizeof(*llc))
                return; /* invalid message */
-       if (llc->raw.hdr.common.type == SMC_LLC_CONFIRM_LINK)
+
+       switch (llc->raw.hdr.common.type) {
+       case SMC_LLC_TEST_LINK:
+               smc_llc_rx_test_link(link, &llc->test_link);
+               break;
+       case SMC_LLC_CONFIRM_LINK:
                smc_llc_rx_confirm_link(link, &llc->confirm_link);
+               break;
+       case SMC_LLC_ADD_LINK:
+               smc_llc_rx_add_link(link, &llc->add_link);
+               break;
+       case SMC_LLC_DELETE_LINK:
+               smc_llc_rx_delete_link(link, &llc->delete_link);
+               break;
+       case SMC_LLC_CONFIRM_RKEY:
+               smc_llc_rx_confirm_rkey(link, &llc->confirm_rkey);
+               break;
+       case SMC_LLC_CONFIRM_RKEY_CONT:
+               smc_llc_rx_confirm_rkey_cont(link, &llc->confirm_rkey_cont);
+               break;
+       case SMC_LLC_DELETE_RKEY:
+               smc_llc_rx_delete_rkey(link, &llc->delete_rkey);
+               break;
+       }
 }
 
 /***************************** init, exit, misc ******************************/
@@ -139,6 +509,30 @@ static struct smc_wr_rx_handler smc_llc_rx_handlers[] = {
                .handler        = smc_llc_rx_handler,
                .type           = SMC_LLC_CONFIRM_LINK
        },
+       {
+               .handler        = smc_llc_rx_handler,
+               .type           = SMC_LLC_TEST_LINK
+       },
+       {
+               .handler        = smc_llc_rx_handler,
+               .type           = SMC_LLC_ADD_LINK
+       },
+       {
+               .handler        = smc_llc_rx_handler,
+               .type           = SMC_LLC_DELETE_LINK
+       },
+       {
+               .handler        = smc_llc_rx_handler,
+               .type           = SMC_LLC_CONFIRM_RKEY
+       },
+       {
+               .handler        = smc_llc_rx_handler,
+               .type           = SMC_LLC_CONFIRM_RKEY_CONT
+       },
+       {
+               .handler        = smc_llc_rx_handler,
+               .type           = SMC_LLC_DELETE_RKEY
+       },
        {
                .handler        = NULL,
        }
index 51b27ce..e4a7d5e 100644 (file)
@@ -18,6 +18,7 @@
 #define SMC_LLC_FLAG_RESP              0x80
 
 #define SMC_LLC_WAIT_FIRST_TIME                (5 * HZ)
+#define SMC_LLC_WAIT_TIME              (2 * HZ)
 
 enum smc_llc_reqresp {
        SMC_LLC_REQ,
@@ -26,39 +27,23 @@ enum smc_llc_reqresp {
 
 enum smc_llc_msg_type {
        SMC_LLC_CONFIRM_LINK            = 0x01,
-};
-
-#define SMC_LLC_DATA_LEN               40
-
-struct smc_llc_hdr {
-       struct smc_wr_rx_hdr common;
-       u8 length;      /* 44 */
-       u8 reserved;
-       u8 flags;
-};
-
-struct smc_llc_msg_confirm_link {      /* type 0x01 */
-       struct smc_llc_hdr hd;
-       u8 sender_mac[ETH_ALEN];
-       u8 sender_gid[SMC_GID_SIZE];
-       u8 sender_qp_num[3];
-       u8 link_num;
-       u8 link_uid[SMC_LGR_ID_SIZE];
-       u8 max_links;
-       u8 reserved[9];
-};
-
-union smc_llc_msg {
-       struct smc_llc_msg_confirm_link confirm_link;
-       struct {
-               struct smc_llc_hdr hdr;
-               u8 data[SMC_LLC_DATA_LEN];
-       } raw;
+       SMC_LLC_ADD_LINK                = 0x02,
+       SMC_LLC_DELETE_LINK             = 0x04,
+       SMC_LLC_CONFIRM_RKEY            = 0x06,
+       SMC_LLC_TEST_LINK               = 0x07,
+       SMC_LLC_CONFIRM_RKEY_CONT       = 0x08,
+       SMC_LLC_DELETE_RKEY             = 0x09,
 };
 
 /* transmit */
 int smc_llc_send_confirm_link(struct smc_link *lnk, u8 mac[], union ib_gid *gid,
                              enum smc_llc_reqresp reqresp);
+int smc_llc_send_add_link(struct smc_link *link, u8 mac[], union ib_gid *gid,
+                         enum smc_llc_reqresp reqresp);
+int smc_llc_send_delete_link(struct smc_link *link,
+                            enum smc_llc_reqresp reqresp);
+int smc_llc_send_test_link(struct smc_link *lnk, u8 user_data[16],
+                          enum smc_llc_reqresp reqresp);
 int smc_llc_init(void) __init;
 
 #endif /* SMC_LLC_H */
index ab58e57..d9a1ac2 100644 (file)
@@ -233,7 +233,7 @@ static int move_addr_to_user(struct sockaddr_storage *kaddr, int klen,
        return __put_user(klen, ulen);
 }
 
-static struct kmem_cache *sock_inode_cachep __read_mostly;
+static struct kmem_cache *sock_inode_cachep __ro_after_init;
 
 static struct inode *sock_alloc_inode(struct super_block *sb)
 {
@@ -2289,10 +2289,12 @@ int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
        if (!sock)
                return err;
 
-       err = sock_error(sock->sk);
-       if (err) {
-               datagrams = err;
-               goto out_put;
+       if (likely(!(flags & MSG_ERRQUEUE))) {
+               err = sock_error(sock->sk);
+               if (err) {
+                       datagrams = err;
+                       goto out_put;
+               }
        }
 
        entry = mmsg;
index ec3fc8d..2c2a587 100644 (file)
@@ -43,6 +43,7 @@ hostprogs-y += xdp_redirect_cpu
 hostprogs-y += xdp_monitor
 hostprogs-y += xdp_rxq_info
 hostprogs-y += syscall_tp
+hostprogs-y += cpustat
 
 # Libbpf dependencies
 LIBBPF := ../../tools/lib/bpf/bpf.o ../../tools/lib/bpf/nlattr.o
@@ -93,6 +94,7 @@ xdp_redirect_cpu-objs := bpf_load.o $(LIBBPF) xdp_redirect_cpu_user.o
 xdp_monitor-objs := bpf_load.o $(LIBBPF) xdp_monitor_user.o
 xdp_rxq_info-objs := bpf_load.o $(LIBBPF) xdp_rxq_info_user.o
 syscall_tp-objs := bpf_load.o $(LIBBPF) syscall_tp_user.o
+cpustat-objs := bpf_load.o $(LIBBPF) cpustat_user.o
 
 # Tell kbuild to always build the programs
 always := $(hostprogs-y)
@@ -144,6 +146,7 @@ always += xdp_monitor_kern.o
 always += xdp_rxq_info_kern.o
 always += xdp2skb_meta_kern.o
 always += syscall_tp_kern.o
+always += cpustat_kern.o
 
 HOSTCFLAGS += -I$(objtree)/usr/include
 HOSTCFLAGS += -I$(srctree)/tools/lib/
@@ -188,6 +191,7 @@ HOSTLOADLIBES_xdp_redirect_cpu += -lelf
 HOSTLOADLIBES_xdp_monitor += -lelf
 HOSTLOADLIBES_xdp_rxq_info += -lelf
 HOSTLOADLIBES_syscall_tp += -lelf
+HOSTLOADLIBES_cpustat += -lelf
 
 # Allows pointing LLC/CLANG to a LLVM backend with bpf support, redefine on cmdline:
 #  make samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang
diff --git a/samples/bpf/cpustat_kern.c b/samples/bpf/cpustat_kern.c
new file mode 100644 (file)
index 0000000..68c84da
--- /dev/null
@@ -0,0 +1,281 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/version.h>
+#include <linux/ptrace.h>
+#include <uapi/linux/bpf.h>
+#include "bpf_helpers.h"
+
+/*
+ * The CPU number, cstate number and pstate number are based
+ * on 96boards Hikey with octa CA53 CPUs.
+ *
+ * Every CPU have three idle states for cstate:
+ *   WFI, CPU_OFF, CLUSTER_OFF
+ *
+ * Every CPU have 5 operating points:
+ *   208MHz, 432MHz, 729MHz, 960MHz, 1200MHz
+ *
+ * This code is based on these assumption and other platforms
+ * need to adjust these definitions.
+ */
+#define MAX_CPU                        8
+#define MAX_PSTATE_ENTRIES     5
+#define MAX_CSTATE_ENTRIES     3
+
+static int cpu_opps[] = { 208000, 432000, 729000, 960000, 1200000 };
+
+/*
+ * my_map structure is used to record cstate and pstate index and
+ * timestamp (Idx, Ts), when new event incoming we need to update
+ * combination for new state index and timestamp (Idx`, Ts`).
+ *
+ * Based on (Idx, Ts) and (Idx`, Ts`) we can calculate the time
+ * interval for the previous state: Duration(Idx) = Ts` - Ts.
+ *
+ * Every CPU has one below array for recording state index and
+ * timestamp, and record for cstate and pstate saperately:
+ *
+ * +--------------------------+
+ * | cstate timestamp         |
+ * +--------------------------+
+ * | cstate index             |
+ * +--------------------------+
+ * | pstate timestamp         |
+ * +--------------------------+
+ * | pstate index             |
+ * +--------------------------+
+ */
+#define MAP_OFF_CSTATE_TIME    0
+#define MAP_OFF_CSTATE_IDX     1
+#define MAP_OFF_PSTATE_TIME    2
+#define MAP_OFF_PSTATE_IDX     3
+#define MAP_OFF_NUM            4
+
+struct bpf_map_def SEC("maps") my_map = {
+       .type = BPF_MAP_TYPE_ARRAY,
+       .key_size = sizeof(u32),
+       .value_size = sizeof(u64),
+       .max_entries = MAX_CPU * MAP_OFF_NUM,
+};
+
+/* cstate_duration records duration time for every idle state per CPU */
+struct bpf_map_def SEC("maps") cstate_duration = {
+       .type = BPF_MAP_TYPE_ARRAY,
+       .key_size = sizeof(u32),
+       .value_size = sizeof(u64),
+       .max_entries = MAX_CPU * MAX_CSTATE_ENTRIES,
+};
+
+/* pstate_duration records duration time for every operating point per CPU */
+struct bpf_map_def SEC("maps") pstate_duration = {
+       .type = BPF_MAP_TYPE_ARRAY,
+       .key_size = sizeof(u32),
+       .value_size = sizeof(u64),
+       .max_entries = MAX_CPU * MAX_PSTATE_ENTRIES,
+};
+
+/*
+ * The trace events for cpu_idle and cpu_frequency are taken from:
+ * /sys/kernel/debug/tracing/events/power/cpu_idle/format
+ * /sys/kernel/debug/tracing/events/power/cpu_frequency/format
+ *
+ * These two events have same format, so define one common structure.
+ */
+struct cpu_args {
+       u64 pad;
+       u32 state;
+       u32 cpu_id;
+};
+
+/* calculate pstate index, returns MAX_PSTATE_ENTRIES for failure */
+static u32 find_cpu_pstate_idx(u32 frequency)
+{
+       u32 i;
+
+       for (i = 0; i < sizeof(cpu_opps) / sizeof(u32); i++) {
+               if (frequency == cpu_opps[i])
+                       return i;
+       }
+
+       return i;
+}
+
+SEC("tracepoint/power/cpu_idle")
+int bpf_prog1(struct cpu_args *ctx)
+{
+       u64 *cts, *pts, *cstate, *pstate, prev_state, cur_ts, delta;
+       u32 key, cpu, pstate_idx;
+       u64 *val;
+
+       if (ctx->cpu_id > MAX_CPU)
+               return 0;
+
+       cpu = ctx->cpu_id;
+
+       key = cpu * MAP_OFF_NUM + MAP_OFF_CSTATE_TIME;
+       cts = bpf_map_lookup_elem(&my_map, &key);
+       if (!cts)
+               return 0;
+
+       key = cpu * MAP_OFF_NUM + MAP_OFF_CSTATE_IDX;
+       cstate = bpf_map_lookup_elem(&my_map, &key);
+       if (!cstate)
+               return 0;
+
+       key = cpu * MAP_OFF_NUM + MAP_OFF_PSTATE_TIME;
+       pts = bpf_map_lookup_elem(&my_map, &key);
+       if (!pts)
+               return 0;
+
+       key = cpu * MAP_OFF_NUM + MAP_OFF_PSTATE_IDX;
+       pstate = bpf_map_lookup_elem(&my_map, &key);
+       if (!pstate)
+               return 0;
+
+       prev_state = *cstate;
+       *cstate = ctx->state;
+
+       if (!*cts) {
+               *cts = bpf_ktime_get_ns();
+               return 0;
+       }
+
+       cur_ts = bpf_ktime_get_ns();
+       delta = cur_ts - *cts;
+       *cts = cur_ts;
+
+       /*
+        * When state doesn't equal to (u32)-1, the cpu will enter
+        * one idle state; for this case we need to record interval
+        * for the pstate.
+        *
+        *                 OPP2
+        *            +---------------------+
+        *     OPP1   |                     |
+        *   ---------+                     |
+        *                                  |  Idle state
+        *                                  +---------------
+        *
+        *            |<- pstate duration ->|
+        *            ^                     ^
+        *           pts                  cur_ts
+        */
+       if (ctx->state != (u32)-1) {
+
+               /* record pstate after have first cpu_frequency event */
+               if (!*pts)
+                       return 0;
+
+               delta = cur_ts - *pts;
+
+               pstate_idx = find_cpu_pstate_idx(*pstate);
+               if (pstate_idx >= MAX_PSTATE_ENTRIES)
+                       return 0;
+
+               key = cpu * MAX_PSTATE_ENTRIES + pstate_idx;
+               val = bpf_map_lookup_elem(&pstate_duration, &key);
+               if (val)
+                       __sync_fetch_and_add((long *)val, delta);
+
+       /*
+        * When state equal to (u32)-1, the cpu just exits from one
+        * specific idle state; for this case we need to record
+        * interval for the pstate.
+        *
+        *       OPP2
+        *   -----------+
+        *              |                          OPP1
+        *              |                     +-----------
+        *              |     Idle state      |
+        *              +---------------------+
+        *
+        *              |<- cstate duration ->|
+        *              ^                     ^
+        *             cts                  cur_ts
+        */
+       } else {
+
+               key = cpu * MAX_CSTATE_ENTRIES + prev_state;
+               val = bpf_map_lookup_elem(&cstate_duration, &key);
+               if (val)
+                       __sync_fetch_and_add((long *)val, delta);
+       }
+
+       /* Update timestamp for pstate as new start time */
+       if (*pts)
+               *pts = cur_ts;
+
+       return 0;
+}
+
+SEC("tracepoint/power/cpu_frequency")
+int bpf_prog2(struct cpu_args *ctx)
+{
+       u64 *pts, *cstate, *pstate, prev_state, cur_ts, delta;
+       u32 key, cpu, pstate_idx;
+       u64 *val;
+
+       cpu = ctx->cpu_id;
+
+       key = cpu * MAP_OFF_NUM + MAP_OFF_PSTATE_TIME;
+       pts = bpf_map_lookup_elem(&my_map, &key);
+       if (!pts)
+               return 0;
+
+       key = cpu * MAP_OFF_NUM + MAP_OFF_PSTATE_IDX;
+       pstate = bpf_map_lookup_elem(&my_map, &key);
+       if (!pstate)
+               return 0;
+
+       key = cpu * MAP_OFF_NUM + MAP_OFF_CSTATE_IDX;
+       cstate = bpf_map_lookup_elem(&my_map, &key);
+       if (!cstate)
+               return 0;
+
+       prev_state = *pstate;
+       *pstate = ctx->state;
+
+       if (!*pts) {
+               *pts = bpf_ktime_get_ns();
+               return 0;
+       }
+
+       cur_ts = bpf_ktime_get_ns();
+       delta = cur_ts - *pts;
+       *pts = cur_ts;
+
+       /* When CPU is in idle, bail out to skip pstate statistics */
+       if (*cstate != (u32)(-1))
+               return 0;
+
+       /*
+        * The cpu changes to another different OPP (in below diagram
+        * change frequency from OPP3 to OPP1), need recording interval
+        * for previous frequency OPP3 and update timestamp as start
+        * time for new frequency OPP1.
+        *
+        *                 OPP3
+        *            +---------------------+
+        *     OPP2   |                     |
+        *   ---------+                     |
+        *                                  |    OPP1
+        *                                  +---------------
+        *
+        *            |<- pstate duration ->|
+        *            ^                     ^
+        *           pts                  cur_ts
+        */
+       pstate_idx = find_cpu_pstate_idx(*pstate);
+       if (pstate_idx >= MAX_PSTATE_ENTRIES)
+               return 0;
+
+       key = cpu * MAX_PSTATE_ENTRIES + pstate_idx;
+       val = bpf_map_lookup_elem(&pstate_duration, &key);
+       if (val)
+               __sync_fetch_and_add((long *)val, delta);
+
+       return 0;
+}
+
+char _license[] SEC("license") = "GPL";
+u32 _version SEC("version") = LINUX_VERSION_CODE;
diff --git a/samples/bpf/cpustat_user.c b/samples/bpf/cpustat_user.c
new file mode 100644 (file)
index 0000000..2b4cd1a
--- /dev/null
@@ -0,0 +1,219 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <sched.h>
+#include <string.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <linux/bpf.h>
+#include <locale.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+#include <sys/wait.h>
+
+#include "libbpf.h"
+#include "bpf_load.h"
+
+#define MAX_CPU                        8
+#define MAX_PSTATE_ENTRIES     5
+#define MAX_CSTATE_ENTRIES     3
+#define MAX_STARS              40
+
+#define CPUFREQ_MAX_SYSFS_PATH "/sys/devices/system/cpu/cpu0/cpufreq/scaling_max_freq"
+#define CPUFREQ_LOWEST_FREQ    "208000"
+#define CPUFREQ_HIGHEST_FREQ   "12000000"
+
+struct cpu_stat_data {
+       unsigned long cstate[MAX_CSTATE_ENTRIES];
+       unsigned long pstate[MAX_PSTATE_ENTRIES];
+};
+
+static struct cpu_stat_data stat_data[MAX_CPU];
+
+static void cpu_stat_print(void)
+{
+       int i, j;
+       char state_str[sizeof("cstate-9")];
+       struct cpu_stat_data *data;
+
+       /* Clear screen */
+       printf("\033[2J");
+
+       /* Header */
+       printf("\nCPU states statistics:\n");
+       printf("%-10s ", "state(ms)");
+
+       for (i = 0; i < MAX_CSTATE_ENTRIES; i++) {
+               sprintf(state_str, "cstate-%d", i);
+               printf("%-11s ", state_str);
+       }
+
+       for (i = 0; i < MAX_PSTATE_ENTRIES; i++) {
+               sprintf(state_str, "pstate-%d", i);
+               printf("%-11s ", state_str);
+       }
+
+       printf("\n");
+
+       for (j = 0; j < MAX_CPU; j++) {
+               data = &stat_data[j];
+
+               printf("CPU-%-6d ", j);
+               for (i = 0; i < MAX_CSTATE_ENTRIES; i++)
+                       printf("%-11ld ", data->cstate[i] / 1000000);
+
+               for (i = 0; i < MAX_PSTATE_ENTRIES; i++)
+                       printf("%-11ld ", data->pstate[i] / 1000000);
+
+               printf("\n");
+       }
+}
+
+static void cpu_stat_update(int cstate_fd, int pstate_fd)
+{
+       unsigned long key, value;
+       int c, i;
+
+       for (c = 0; c < MAX_CPU; c++) {
+               for (i = 0; i < MAX_CSTATE_ENTRIES; i++) {
+                       key = c * MAX_CSTATE_ENTRIES + i;
+                       bpf_map_lookup_elem(cstate_fd, &key, &value);
+                       stat_data[c].cstate[i] = value;
+               }
+
+               for (i = 0; i < MAX_PSTATE_ENTRIES; i++) {
+                       key = c * MAX_PSTATE_ENTRIES + i;
+                       bpf_map_lookup_elem(pstate_fd, &key, &value);
+                       stat_data[c].pstate[i] = value;
+               }
+       }
+}
+
+/*
+ * This function is copied from 'idlestat' tool function
+ * idlestat_wake_all() in idlestate.c.
+ *
+ * It sets the self running task affinity to cpus one by one so can wake up
+ * the specific CPU to handle scheduling; this results in all cpus can be
+ * waken up once and produce ftrace event 'trace_cpu_idle'.
+ */
+static int cpu_stat_inject_cpu_idle_event(void)
+{
+       int rcpu, i, ret;
+       cpu_set_t cpumask;
+       cpu_set_t original_cpumask;
+
+       ret = sysconf(_SC_NPROCESSORS_CONF);
+       if (ret < 0)
+               return -1;
+
+       rcpu = sched_getcpu();
+       if (rcpu < 0)
+               return -1;
+
+       /* Keep track of the CPUs we will run on */
+       sched_getaffinity(0, sizeof(original_cpumask), &original_cpumask);
+
+       for (i = 0; i < ret; i++) {
+
+               /* Pointless to wake up ourself */
+               if (i == rcpu)
+                       continue;
+
+               /* Pointless to wake CPUs we will not run on */
+               if (!CPU_ISSET(i, &original_cpumask))
+                       continue;
+
+               CPU_ZERO(&cpumask);
+               CPU_SET(i, &cpumask);
+
+               sched_setaffinity(0, sizeof(cpumask), &cpumask);
+       }
+
+       /* Enable all the CPUs of the original mask */
+       sched_setaffinity(0, sizeof(original_cpumask), &original_cpumask);
+       return 0;
+}
+
+/*
+ * It's possible to have no any frequency change for long time and cannot
+ * get ftrace event 'trace_cpu_frequency' for long period, this introduces
+ * big deviation for pstate statistics.
+ *
+ * To solve this issue, below code forces to set 'scaling_max_freq' to 208MHz
+ * for triggering ftrace event 'trace_cpu_frequency' and then recovery back to
+ * the maximum frequency value 1.2GHz.
+ */
+static int cpu_stat_inject_cpu_frequency_event(void)
+{
+       int len, fd;
+
+       fd = open(CPUFREQ_MAX_SYSFS_PATH, O_WRONLY);
+       if (fd < 0) {
+               printf("failed to open scaling_max_freq, errno=%d\n", errno);
+               return fd;
+       }
+
+       len = write(fd, CPUFREQ_LOWEST_FREQ, strlen(CPUFREQ_LOWEST_FREQ));
+       if (len < 0) {
+               printf("failed to open scaling_max_freq, errno=%d\n", errno);
+               goto err;
+       }
+
+       len = write(fd, CPUFREQ_HIGHEST_FREQ, strlen(CPUFREQ_HIGHEST_FREQ));
+       if (len < 0) {
+               printf("failed to open scaling_max_freq, errno=%d\n", errno);
+               goto err;
+       }
+
+err:
+       close(fd);
+       return len;
+}
+
+static void int_exit(int sig)
+{
+       cpu_stat_inject_cpu_idle_event();
+       cpu_stat_inject_cpu_frequency_event();
+       cpu_stat_update(map_fd[1], map_fd[2]);
+       cpu_stat_print();
+       exit(0);
+}
+
+int main(int argc, char **argv)
+{
+       char filename[256];
+       int ret;
+
+       snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+
+       if (load_bpf_file(filename)) {
+               printf("%s", bpf_log_buf);
+               return 1;
+       }
+
+       ret = cpu_stat_inject_cpu_idle_event();
+       if (ret < 0)
+               return 1;
+
+       ret = cpu_stat_inject_cpu_frequency_event();
+       if (ret < 0)
+               return 1;
+
+       signal(SIGINT, int_exit);
+       signal(SIGTERM, int_exit);
+
+       while (1) {
+               cpu_stat_update(map_fd[1], map_fd[2]);
+               cpu_stat_print();
+               sleep(5);
+       }
+
+       return 0;
+}
index d54e91e..b701b5c 100644 (file)
@@ -20,6 +20,7 @@
 #include <string.h>
 #include <unistd.h>
 #include <libgen.h>
+#include <sys/resource.h>
 
 #include "bpf_load.h"
 #include "bpf_util.h"
@@ -75,6 +76,7 @@ static void usage(const char *prog)
 
 int main(int argc, char **argv)
 {
+       struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
        const char *optstr = "SN";
        char filename[256];
        int ret, opt, key = 0;
@@ -98,6 +100,11 @@ int main(int argc, char **argv)
                return 1;
        }
 
+       if (setrlimit(RLIMIT_MEMLOCK, &r)) {
+               perror("setrlimit(RLIMIT_MEMLOCK)");
+               return 1;
+       }
+
        ifindex_in = strtoul(argv[optind], NULL, 0);
        ifindex_out = strtoul(argv[optind + 1], NULL, 0);
        printf("input: %d output: %d\n", ifindex_in, ifindex_out);
index 73f1da4..9bf2881 100644 (file)
@@ -2,7 +2,7 @@
 hostprogs-y := sockmap
 
 # Libbpf dependencies
-LIBBPF := ../../tools/lib/bpf/bpf.o
+LIBBPF := ../../tools/lib/bpf/bpf.o ../../tools/lib/bpf/nlattr.o
 
 HOSTCFLAGS += -I$(objtree)/usr/include
 HOSTCFLAGS += -I$(srctree)/tools/lib/
index 7c25c0c..95a54a8 100644 (file)
@@ -566,6 +566,7 @@ run:
        else
                fprintf(stderr, "unknown test\n");
 out:
+       bpf_prog_detach2(prog_fd[2], cg_fd, BPF_CGROUP_SOCK_OPS);
        close(s1);
        close(s2);
        close(p1);
index 8644d86..b4d7b62 100644 (file)
@@ -6743,6 +6743,7 @@ static void __net_exit selinux_nf_unregister(struct net *net)
 static struct pernet_operations selinux_net_ops = {
        .init = selinux_nf_register,
        .exit = selinux_nf_unregister,
+       .async = true,
 };
 
 static int __init selinux_nf_ip_init(void)
index e36d178..3f29c03 100644 (file)
@@ -89,6 +89,7 @@ static void __net_exit smack_nf_unregister(struct net *net)
 static struct pernet_operations smack_net_ops = {
        .init = smack_nf_register,
        .exit = smack_nf_unregister,
+       .async = true,
 };
 
 static int __init smack_nf_ip_init(void)
index 5c43c18..8567a85 100644 (file)
@@ -35,12 +35,14 @@ TEST_GEN_PROGS_EXTENDED = test_libbpf_open
 
 include ../lib.mk
 
-BPFOBJ := $(OUTPUT)/libbpf.a cgroup_helpers.c
+BPFOBJ := $(OUTPUT)/libbpf.a
 
 $(TEST_GEN_PROGS): $(BPFOBJ)
 
 $(TEST_GEN_PROGS_EXTENDED): $(OUTPUT)/libbpf.a
 
+$(OUTPUT)/test_dev_cgroup: cgroup_helpers.c
+
 .PHONY: force
 
 # force a rebuild of BPFOBJ when its dependencies are updated
index 95a370f..5d73db4 100644 (file)
@@ -11,6 +11,8 @@
 #include <linux/ptrace.h>
 #include <linux/bpf.h>
 #include <sys/ioctl.h>
+#include <sys/time.h>
+#include <sys/resource.h>
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <fcntl.h>
@@ -42,6 +44,7 @@ static int bpf_find_map(const char *test, struct bpf_object *obj,
 
 int main(int argc, char **argv)
 {
+       struct rlimit limit  = { RLIM_INFINITY, RLIM_INFINITY };
        const char *file = "test_tcpbpf_kern.o";
        struct tcpbpf_globals g = {0};
        int cg_fd, prog_fd, map_fd;
@@ -54,6 +57,9 @@ int main(int argc, char **argv)
        int pid;
        int rv;
 
+       if (setrlimit(RLIMIT_MEMLOCK, &limit) < 0)
+               perror("Unable to lift memlock rlimit");
+
        if (argc > 1 && strcmp(argv[1], "-d") == 0)
                debug_flag = true;
 
index c73592f..2164d21 100644 (file)
@@ -57,6 +57,9 @@
 #define F_NEEDS_EFFICIENT_UNALIGNED_ACCESS     (1 << 0)
 #define F_LOAD_WITH_STRICT_ALIGNMENT           (1 << 1)
 
+#define UNPRIV_SYSCTL "kernel/unprivileged_bpf_disabled"
+static bool unpriv_disabled = false;
+
 struct bpf_test {
        const char *descr;
        struct bpf_insn insns[MAX_INSNS];
@@ -11163,6 +11166,95 @@ static struct bpf_test tests[] = {
                .result = REJECT,
                .prog_type = BPF_PROG_TYPE_TRACEPOINT,
        },
+       {
+               "jit: lsh, rsh, arsh by 1",
+               .insns = {
+                       BPF_MOV64_IMM(BPF_REG_0, 1),
+                       BPF_MOV64_IMM(BPF_REG_1, 0xff),
+                       BPF_ALU64_IMM(BPF_LSH, BPF_REG_1, 1),
+                       BPF_ALU32_IMM(BPF_LSH, BPF_REG_1, 1),
+                       BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0x3fc, 1),
+                       BPF_EXIT_INSN(),
+                       BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 1),
+                       BPF_ALU32_IMM(BPF_RSH, BPF_REG_1, 1),
+                       BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0xff, 1),
+                       BPF_EXIT_INSN(),
+                       BPF_ALU64_IMM(BPF_ARSH, BPF_REG_1, 1),
+                       BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0x7f, 1),
+                       BPF_EXIT_INSN(),
+                       BPF_MOV64_IMM(BPF_REG_0, 2),
+                       BPF_EXIT_INSN(),
+               },
+               .result = ACCEPT,
+               .retval = 2,
+       },
+       {
+               "jit: mov32 for ldimm64, 1",
+               .insns = {
+                       BPF_MOV64_IMM(BPF_REG_0, 2),
+                       BPF_LD_IMM64(BPF_REG_1, 0xfeffffffffffffffULL),
+                       BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 32),
+                       BPF_LD_IMM64(BPF_REG_2, 0xfeffffffULL),
+                       BPF_JMP_REG(BPF_JEQ, BPF_REG_1, BPF_REG_2, 1),
+                       BPF_MOV64_IMM(BPF_REG_0, 1),
+                       BPF_EXIT_INSN(),
+               },
+               .result = ACCEPT,
+               .retval = 2,
+       },
+       {
+               "jit: mov32 for ldimm64, 2",
+               .insns = {
+                       BPF_MOV64_IMM(BPF_REG_0, 1),
+                       BPF_LD_IMM64(BPF_REG_1, 0x1ffffffffULL),
+                       BPF_LD_IMM64(BPF_REG_2, 0xffffffffULL),
+                       BPF_JMP_REG(BPF_JEQ, BPF_REG_1, BPF_REG_2, 1),
+                       BPF_MOV64_IMM(BPF_REG_0, 2),
+                       BPF_EXIT_INSN(),
+               },
+               .result = ACCEPT,
+               .retval = 2,
+       },
+       {
+               "jit: various mul tests",
+               .insns = {
+                       BPF_LD_IMM64(BPF_REG_2, 0xeeff0d413122ULL),
+                       BPF_LD_IMM64(BPF_REG_0, 0xfefefeULL),
+                       BPF_LD_IMM64(BPF_REG_1, 0xefefefULL),
+                       BPF_ALU64_REG(BPF_MUL, BPF_REG_0, BPF_REG_1),
+                       BPF_JMP_REG(BPF_JEQ, BPF_REG_0, BPF_REG_2, 2),
+                       BPF_MOV64_IMM(BPF_REG_0, 1),
+                       BPF_EXIT_INSN(),
+                       BPF_LD_IMM64(BPF_REG_3, 0xfefefeULL),
+                       BPF_ALU64_REG(BPF_MUL, BPF_REG_3, BPF_REG_1),
+                       BPF_JMP_REG(BPF_JEQ, BPF_REG_3, BPF_REG_2, 2),
+                       BPF_MOV64_IMM(BPF_REG_0, 1),
+                       BPF_EXIT_INSN(),
+                       BPF_MOV32_REG(BPF_REG_2, BPF_REG_2),
+                       BPF_LD_IMM64(BPF_REG_0, 0xfefefeULL),
+                       BPF_ALU32_REG(BPF_MUL, BPF_REG_0, BPF_REG_1),
+                       BPF_JMP_REG(BPF_JEQ, BPF_REG_0, BPF_REG_2, 2),
+                       BPF_MOV64_IMM(BPF_REG_0, 1),
+                       BPF_EXIT_INSN(),
+                       BPF_LD_IMM64(BPF_REG_3, 0xfefefeULL),
+                       BPF_ALU32_REG(BPF_MUL, BPF_REG_3, BPF_REG_1),
+                       BPF_JMP_REG(BPF_JEQ, BPF_REG_3, BPF_REG_2, 2),
+                       BPF_MOV64_IMM(BPF_REG_0, 1),
+                       BPF_EXIT_INSN(),
+                       BPF_LD_IMM64(BPF_REG_0, 0x952a7bbcULL),
+                       BPF_LD_IMM64(BPF_REG_1, 0xfefefeULL),
+                       BPF_LD_IMM64(BPF_REG_2, 0xeeff0d413122ULL),
+                       BPF_ALU32_REG(BPF_MUL, BPF_REG_2, BPF_REG_1),
+                       BPF_JMP_REG(BPF_JEQ, BPF_REG_2, BPF_REG_0, 2),
+                       BPF_MOV64_IMM(BPF_REG_0, 1),
+                       BPF_EXIT_INSN(),
+                       BPF_MOV64_IMM(BPF_REG_0, 2),
+                       BPF_EXIT_INSN(),
+               },
+               .result = ACCEPT,
+               .retval = 2,
+       },
+
 };
 
 static int probe_filter_length(const struct bpf_insn *fp)
@@ -11317,7 +11409,8 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
                        goto fail_log;
                }
                if (!strstr(bpf_vlog, expected_err) && !reject_from_alignment) {
-                       printf("FAIL\nUnexpected error message!\n");
+                       printf("FAIL\nUnexpected error message!\n\tEXP: %s\n\tRES: %s\n",
+                             expected_err, bpf_vlog);
                        goto fail_log;
                }
        }
@@ -11401,9 +11494,20 @@ out:
        return ret;
 }
 
+static void get_unpriv_disabled()
+{
+       char buf[2];
+       FILE *fd;
+
+       fd = fopen("/proc/sys/"UNPRIV_SYSCTL, "r");
+       if (fgets(buf, 2, fd) == buf && atoi(buf))
+               unpriv_disabled = true;
+       fclose(fd);
+}
+
 static int do_test(bool unpriv, unsigned int from, unsigned int to)
 {
-       int i, passes = 0, errors = 0;
+       int i, passes = 0, errors = 0, skips = 0;
 
        for (i = from; i < to; i++) {
                struct bpf_test *test = &tests[i];
@@ -11411,7 +11515,10 @@ static int do_test(bool unpriv, unsigned int from, unsigned int to)
                /* Program types that are not supported by non-root we
                 * skip right away.
                 */
-               if (!test->prog_type) {
+               if (!test->prog_type && unpriv_disabled) {
+                       printf("#%d/u %s SKIP\n", i, test->descr);
+                       skips++;
+               } else if (!test->prog_type) {
                        if (!unpriv)
                                set_admin(false);
                        printf("#%d/u %s ", i, test->descr);
@@ -11420,13 +11527,17 @@ static int do_test(bool unpriv, unsigned int from, unsigned int to)
                                set_admin(true);
                }
 
-               if (!unpriv) {
+               if (unpriv) {
+                       printf("#%d/p %s SKIP\n", i, test->descr);
+                       skips++;
+               } else {
                        printf("#%d/p %s ", i, test->descr);
                        do_test_single(test, false, &passes, &errors);
                }
        }
 
-       printf("Summary: %d PASSED, %d FAILED\n", passes, errors);
+       printf("Summary: %d PASSED, %d SKIPPED, %d FAILED\n", passes,
+              skips, errors);
        return errors ? EXIT_FAILURE : EXIT_SUCCESS;
 }
 
@@ -11454,6 +11565,13 @@ int main(int argc, char **argv)
                }
        }
 
+       get_unpriv_disabled();
+       if (unpriv && unpriv_disabled) {
+               printf("Cannot run as unprivileged user with sysctl %s.\n",
+                      UNPRIV_SYSCTL);
+               return EXIT_FAILURE;
+       }
+
        setrlimit(RLIMIT_MEMLOCK, unpriv ? &rlim : &rinf);
        return do_test(unpriv, from, to);
 }
index d7c30d3..229a038 100644 (file)
@@ -5,7 +5,7 @@ CFLAGS =  -Wall -Wl,--no-as-needed -O2 -g
 CFLAGS += -I../../../../usr/include/
 
 TEST_PROGS := run_netsocktests run_afpackettests test_bpf.sh netdevice.sh rtnetlink.sh
-TEST_PROGS += fib_tests.sh
+TEST_PROGS += fib_tests.sh fib-onlink-tests.sh
 TEST_GEN_FILES =  socket
 TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy
 TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa
diff --git a/tools/testing/selftests/net/forwarding/.gitignore b/tools/testing/selftests/net/forwarding/.gitignore
new file mode 100644 (file)
index 0000000..a793eef
--- /dev/null
@@ -0,0 +1 @@
+forwarding.config
diff --git a/tools/testing/selftests/net/forwarding/README b/tools/testing/selftests/net/forwarding/README
new file mode 100644 (file)
index 0000000..4a0964c
--- /dev/null
@@ -0,0 +1,56 @@
+Motivation
+==========
+
+One of the nice things about network namespaces is that they allow one
+to easily create and test complex environments.
+
+Unfortunately, these namespaces can not be used with actual switching
+ASICs, as their ports can not be migrated to other network namespaces
+(NETIF_F_NETNS_LOCAL) and most of them probably do not support the
+L1-separation provided by namespaces.
+
+However, a similar kind of flexibility can be achieved by using VRFs and
+by looping the switch ports together. For example:
+
+                             br0
+                              +
+               vrf-h1         |           vrf-h2
+                 +        +---+----+        +
+                 |        |        |        |
+    192.0.2.1/24 +        +        +        + 192.0.2.2/24
+               swp1     swp2     swp3     swp4
+                 +        +        +        +
+                 |        |        |        |
+                 +--------+        +--------+
+
+The VRFs act as lightweight namespaces representing hosts connected to
+the switch.
+
+This approach for testing switch ASICs has several advantages over the
+traditional method that requires multiple physical machines, to name a
+few:
+
+1. Only the device under test (DUT) is being tested without noise from
+other system.
+
+2. Ability to easily provision complex topologies. Testing bridging
+between 4-ports LAGs or 8-way ECMP requires many physical links that are
+not always available. With the VRF-based approach one merely needs to
+loopback more ports.
+
+These tests are written with switch ASICs in mind, but they can be run
+on any Linux box using veth pairs to emulate physical loopbacks.
+
+Guidelines for Writing Tests
+============================
+
+o Where possible, reuse an existing topology for different tests instead
+  of recreating the same topology.
+o Where possible, IPv6 and IPv4 addresses shall conform to RFC 3849 and
+  RFC 5737, respectively.
+o Where possible, tests shall be written so that they can be reused by
+  multiple topologies and added to lib.sh.
+o Checks shall be added to lib.sh for any external dependencies.
+o Code shall be checked using ShellCheck [1] prior to submission.
+
+1. https://www.shellcheck.net/
diff --git a/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh b/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh
new file mode 100755 (executable)
index 0000000..75d9224
--- /dev/null
@@ -0,0 +1,88 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+NUM_NETIFS=4
+CHECK_TC="yes"
+source lib.sh
+
+h1_create()
+{
+       simple_if_init $h1 192.0.2.1/24 2001:db8:1::1/64
+}
+
+h1_destroy()
+{
+       simple_if_fini $h1 192.0.2.1/24 2001:db8:1::1/64
+}
+
+h2_create()
+{
+       simple_if_init $h2 192.0.2.2/24 2001:db8:1::2/64
+}
+
+h2_destroy()
+{
+       simple_if_fini $h2 192.0.2.2/24 2001:db8:1::2/64
+}
+
+switch_create()
+{
+       # 10 Seconds ageing time.
+       ip link add dev br0 type bridge vlan_filtering 1 ageing_time 1000 \
+               mcast_snooping 0
+
+       ip link set dev $swp1 master br0
+       ip link set dev $swp2 master br0
+
+       ip link set dev br0 up
+       ip link set dev $swp1 up
+       ip link set dev $swp2 up
+}
+
+switch_destroy()
+{
+       ip link set dev $swp2 down
+       ip link set dev $swp1 down
+
+       ip link del dev br0
+}
+
+setup_prepare()
+{
+       h1=${NETIFS[p1]}
+       swp1=${NETIFS[p2]}
+
+       swp2=${NETIFS[p3]}
+       h2=${NETIFS[p4]}
+
+       vrf_prepare
+
+       h1_create
+       h2_create
+
+       switch_create
+}
+
+cleanup()
+{
+       pre_cleanup
+
+       switch_destroy
+
+       h2_destroy
+       h1_destroy
+
+       vrf_cleanup
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+ping_test $h1 192.0.2.2
+ping6_test $h1 2001:db8:1::2
+learning_test "br0" $swp1 $h1 $h2
+flood_test $swp2 $h1 $h2
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/config b/tools/testing/selftests/net/forwarding/config
new file mode 100644 (file)
index 0000000..5cd2aed
--- /dev/null
@@ -0,0 +1,12 @@
+CONFIG_BRIDGE=m
+CONFIG_VLAN_8021Q=m
+CONFIG_BRIDGE_VLAN_FILTERING=y
+CONFIG_NET_L3_MASTER_DEV=y
+CONFIG_IPV6_MULTIPLE_TABLES=y
+CONFIG_NET_VRF=m
+CONFIG_BPF_SYSCALL=y
+CONFIG_CGROUP_BPF=y
+CONFIG_NET_CLS_FLOWER=m
+CONFIG_NET_SCH_INGRESS=m
+CONFIG_NET_ACT_GACT=m
+CONFIG_VETH=m
diff --git a/tools/testing/selftests/net/forwarding/forwarding.config.sample b/tools/testing/selftests/net/forwarding/forwarding.config.sample
new file mode 100644 (file)
index 0000000..ab235c1
--- /dev/null
@@ -0,0 +1,31 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+##############################################################################
+# Topology description. p1 looped back to p2, p3 to p4 and so on.
+declare -A NETIFS
+
+NETIFS[p1]=veth0
+NETIFS[p2]=veth1
+NETIFS[p3]=veth2
+NETIFS[p4]=veth3
+NETIFS[p5]=veth4
+NETIFS[p6]=veth5
+NETIFS[p7]=veth6
+NETIFS[p8]=veth7
+
+##############################################################################
+# Defines
+
+# IPv4 ping utility name
+PING=ping
+# IPv6 ping utility name. Some distributions use 'ping' for IPv6.
+PING6=ping6
+# Packet generator. Some distributions use 'mz'.
+MZ=mausezahn
+# Time to wait after interfaces participating in the test are all UP
+WAIT_TIME=5
+# Whether to pause on failure or not.
+PAUSE_ON_FAIL=no
+# Whether to pause on cleanup or not.
+PAUSE_ON_CLEANUP=no
diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh
new file mode 100644 (file)
index 0000000..d0af521
--- /dev/null
@@ -0,0 +1,540 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+##############################################################################
+# Defines
+
+# Can be overridden by the configuration file.
+PING=${PING:=ping}
+PING6=${PING6:=ping6}
+MZ=${MZ:=mausezahn}
+WAIT_TIME=${WAIT_TIME:=5}
+PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no}
+PAUSE_ON_CLEANUP=${PAUSE_ON_CLEANUP:=no}
+
+if [[ -f forwarding.config ]]; then
+       source forwarding.config
+fi
+
+##############################################################################
+# Sanity checks
+
+check_tc_version()
+{
+       tc -j &> /dev/null
+       if [[ $? -ne 0 ]]; then
+               echo "SKIP: iproute2 too old; tc is missing JSON support"
+               exit 1
+       fi
+
+       tc filter help 2>&1 | grep block &> /dev/null
+       if [[ $? -ne 0 ]]; then
+               echo "SKIP: iproute2 too old; tc is missing shared block support"
+               exit 1
+       fi
+}
+
+if [[ "$(id -u)" -ne 0 ]]; then
+       echo "SKIP: need root privileges"
+       exit 0
+fi
+
+if [[ "$CHECK_TC" = "yes" ]]; then
+       check_tc_version
+fi
+
+if [[ ! -x "$(command -v jq)" ]]; then
+       echo "SKIP: jq not installed"
+       exit 1
+fi
+
+if [[ ! -x "$(command -v $MZ)" ]]; then
+       echo "SKIP: $MZ not installed"
+       exit 0
+fi
+
+if [[ ! -v NUM_NETIFS ]]; then
+       echo "SKIP: importer does not define \"NUM_NETIFS\""
+       exit 0
+fi
+
+##############################################################################
+# Command line options handling
+
+count=0
+
+while [[ $# -gt 0 ]]; do
+       if [[ "$count" -eq "0" ]]; then
+               unset NETIFS
+               declare -A NETIFS
+       fi
+       count=$((count + 1))
+       NETIFS[p$count]="$1"
+       shift
+done
+
+##############################################################################
+# Network interfaces configuration
+
+for i in $(eval echo {1..$NUM_NETIFS}); do
+       ip link show dev ${NETIFS[p$i]} &> /dev/null
+       if [[ $? -ne 0 ]]; then
+               echo "SKIP: could not find all required interfaces"
+               exit 0
+       fi
+done
+
+##############################################################################
+# Helpers
+
+# Exit status to return at the end. Set in case one of the tests fails.
+EXIT_STATUS=0
+# Per-test return value. Clear at the beginning of each test.
+RET=0
+
+check_err()
+{
+       local err=$1
+       local msg=$2
+
+       if [[ $RET -eq 0 && $err -ne 0 ]]; then
+               RET=$err
+               retmsg=$msg
+       fi
+}
+
+check_fail()
+{
+       local err=$1
+       local msg=$2
+
+       if [[ $RET -eq 0 && $err -eq 0 ]]; then
+               RET=1
+               retmsg=$msg
+       fi
+}
+
+log_test()
+{
+       local test_name=$1
+       local opt_str=$2
+
+       if [[ $# -eq 2 ]]; then
+               opt_str="($opt_str)"
+       fi
+
+       if [[ $RET -ne 0 ]]; then
+               EXIT_STATUS=1
+               printf "TEST: %-60s  [FAIL]\n" "$test_name $opt_str"
+               if [[ ! -z "$retmsg" ]]; then
+                       printf "\t%s\n" "$retmsg"
+               fi
+               if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+                       echo "Hit enter to continue, 'q' to quit"
+                       read a
+                       [ "$a" = "q" ] && exit 1
+               fi
+               return 1
+       fi
+
+       printf "TEST: %-60s  [PASS]\n" "$test_name $opt_str"
+       return 0
+}
+
+log_info()
+{
+       local msg=$1
+
+       echo "INFO: $msg"
+}
+
+setup_wait()
+{
+       for i in $(eval echo {1..$NUM_NETIFS}); do
+               while true; do
+                       ip link show dev ${NETIFS[p$i]} up \
+                               | grep 'state UP' &> /dev/null
+                       if [[ $? -ne 0 ]]; then
+                               sleep 1
+                       else
+                               break
+                       fi
+               done
+       done
+
+       # Make sure links are ready.
+       sleep $WAIT_TIME
+}
+
+pre_cleanup()
+{
+       if [ "${PAUSE_ON_CLEANUP}" = "yes" ]; then
+               echo "Pausing before cleanup, hit any key to continue"
+               read
+       fi
+}
+
+vrf_prepare()
+{
+       ip -4 rule add pref 32765 table local
+       ip -4 rule del pref 0
+       ip -6 rule add pref 32765 table local
+       ip -6 rule del pref 0
+}
+
+vrf_cleanup()
+{
+       ip -6 rule add pref 0 table local
+       ip -6 rule del pref 32765
+       ip -4 rule add pref 0 table local
+       ip -4 rule del pref 32765
+}
+
+__last_tb_id=0
+declare -A __TB_IDS
+
+__vrf_td_id_assign()
+{
+       local vrf_name=$1
+
+       __last_tb_id=$((__last_tb_id + 1))
+       __TB_IDS[$vrf_name]=$__last_tb_id
+       return $__last_tb_id
+}
+
+__vrf_td_id_lookup()
+{
+       local vrf_name=$1
+
+       return ${__TB_IDS[$vrf_name]}
+}
+
+vrf_create()
+{
+       local vrf_name=$1
+       local tb_id
+
+       __vrf_td_id_assign $vrf_name
+       tb_id=$?
+
+       ip link add dev $vrf_name type vrf table $tb_id
+       ip -4 route add table $tb_id unreachable default metric 4278198272
+       ip -6 route add table $tb_id unreachable default metric 4278198272
+}
+
+vrf_destroy()
+{
+       local vrf_name=$1
+       local tb_id
+
+       __vrf_td_id_lookup $vrf_name
+       tb_id=$?
+
+       ip -6 route del table $tb_id unreachable default metric 4278198272
+       ip -4 route del table $tb_id unreachable default metric 4278198272
+       ip link del dev $vrf_name
+}
+
+__addr_add_del()
+{
+       local if_name=$1
+       local add_del=$2
+       local array
+
+       shift
+       shift
+       array=("${@}")
+
+       for addrstr in "${array[@]}"; do
+               ip address $add_del $addrstr dev $if_name
+       done
+}
+
+simple_if_init()
+{
+       local if_name=$1
+       local vrf_name
+       local array
+
+       shift
+       vrf_name=v$if_name
+       array=("${@}")
+
+       vrf_create $vrf_name
+       ip link set dev $if_name master $vrf_name
+       ip link set dev $vrf_name up
+       ip link set dev $if_name up
+
+       __addr_add_del $if_name add "${array[@]}"
+}
+
+simple_if_fini()
+{
+       local if_name=$1
+       local vrf_name
+       local array
+
+       shift
+       vrf_name=v$if_name
+       array=("${@}")
+
+       __addr_add_del $if_name del "${array[@]}"
+
+       ip link set dev $if_name down
+       vrf_destroy $vrf_name
+}
+
+master_name_get()
+{
+       local if_name=$1
+
+       ip -j link show dev $if_name | jq -r '.[]["master"]'
+}
+
+link_stats_tx_packets_get()
+{
+       local if_name=$1
+
+       ip -j -s link show dev $if_name | jq '.[]["stats64"]["tx"]["packets"]'
+}
+
+mac_get()
+{
+       local if_name=$1
+
+       ip -j link show dev $if_name | jq -r '.[]["address"]'
+}
+
+bridge_ageing_time_get()
+{
+       local bridge=$1
+       local ageing_time
+
+       # Need to divide by 100 to convert to seconds.
+       ageing_time=$(ip -j -d link show dev $bridge \
+                     | jq '.[]["linkinfo"]["info_data"]["ageing_time"]')
+       echo $((ageing_time / 100))
+}
+
+forwarding_enable()
+{
+       ipv4_fwd=$(sysctl -n net.ipv4.conf.all.forwarding)
+       ipv6_fwd=$(sysctl -n net.ipv6.conf.all.forwarding)
+
+       sysctl -q -w net.ipv4.conf.all.forwarding=1
+       sysctl -q -w net.ipv6.conf.all.forwarding=1
+}
+
+forwarding_restore()
+{
+       sysctl -q -w net.ipv6.conf.all.forwarding=$ipv6_fwd
+       sysctl -q -w net.ipv4.conf.all.forwarding=$ipv4_fwd
+}
+
+tc_offload_check()
+{
+       for i in $(eval echo {1..$NUM_NETIFS}); do
+               ethtool -k ${NETIFS[p$i]} \
+                       | grep "hw-tc-offload: on" &> /dev/null
+               if [[ $? -ne 0 ]]; then
+                       return 1
+               fi
+       done
+
+       return 0
+}
+
+##############################################################################
+# Tests
+
+ping_test()
+{
+       local if_name=$1
+       local dip=$2
+       local vrf_name
+
+       RET=0
+
+       vrf_name=$(master_name_get $if_name)
+       ip vrf exec $vrf_name $PING $dip -c 10 -i 0.1 -w 2 &> /dev/null
+       check_err $?
+       log_test "ping"
+}
+
+ping6_test()
+{
+       local if_name=$1
+       local dip=$2
+       local vrf_name
+
+       RET=0
+
+       vrf_name=$(master_name_get $if_name)
+       ip vrf exec $vrf_name $PING6 $dip -c 10 -i 0.1 -w 2 &> /dev/null
+       check_err $?
+       log_test "ping6"
+}
+
+learning_test()
+{
+       local bridge=$1
+       local br_port1=$2       # Connected to `host1_if`.
+       local host1_if=$3
+       local host2_if=$4
+       local mac=de:ad:be:ef:13:37
+       local ageing_time
+
+       RET=0
+
+       bridge -j fdb show br $bridge brport $br_port1 \
+               | jq -e ".[] | select(.mac == \"$mac\")" &> /dev/null
+       check_fail $? "Found FDB record when should not"
+
+       # Disable unknown unicast flooding on `br_port1` to make sure
+       # packets are only forwarded through the port after a matching
+       # FDB entry was installed.
+       bridge link set dev $br_port1 flood off
+
+       tc qdisc add dev $host1_if ingress
+       tc filter add dev $host1_if ingress protocol ip pref 1 handle 101 \
+               flower dst_mac $mac action drop
+
+       $MZ $host2_if -c 1 -p 64 -b $mac -t ip -q
+       sleep 1
+
+       tc -j -s filter show dev $host1_if ingress \
+               | jq -e ".[] | select(.options.handle == 101) \
+               | select(.options.actions[0].stats.packets == 1)" &> /dev/null
+       check_fail $? "Packet reached second host when should not"
+
+       $MZ $host1_if -c 1 -p 64 -a $mac -t ip -q
+       sleep 1
+
+       bridge -j fdb show br $bridge brport $br_port1 \
+               | jq -e ".[] | select(.mac == \"$mac\")" &> /dev/null
+       check_err $? "Did not find FDB record when should"
+
+       $MZ $host2_if -c 1 -p 64 -b $mac -t ip -q
+       sleep 1
+
+       tc -j -s filter show dev $host1_if ingress \
+               | jq -e ".[] | select(.options.handle == 101) \
+               | select(.options.actions[0].stats.packets == 1)" &> /dev/null
+       check_err $? "Packet did not reach second host when should"
+
+       # Wait for 10 seconds after the ageing time to make sure FDB
+       # record was aged-out.
+       ageing_time=$(bridge_ageing_time_get $bridge)
+       sleep $((ageing_time + 10))
+
+       bridge -j fdb show br $bridge brport $br_port1 \
+               | jq -e ".[] | select(.mac == \"$mac\")" &> /dev/null
+       check_fail $? "Found FDB record when should not"
+
+       bridge link set dev $br_port1 learning off
+
+       $MZ $host1_if -c 1 -p 64 -a $mac -t ip -q
+       sleep 1
+
+       bridge -j fdb show br $bridge brport $br_port1 \
+               | jq -e ".[] | select(.mac == \"$mac\")" &> /dev/null
+       check_fail $? "Found FDB record when should not"
+
+       bridge link set dev $br_port1 learning on
+
+       tc filter del dev $host1_if ingress protocol ip pref 1 handle 101 flower
+       tc qdisc del dev $host1_if ingress
+
+       bridge link set dev $br_port1 flood on
+
+       log_test "FDB learning"
+}
+
+flood_test_do()
+{
+       local should_flood=$1
+       local mac=$2
+       local ip=$3
+       local host1_if=$4
+       local host2_if=$5
+       local err=0
+
+       # Add an ACL on `host2_if` which will tell us whether the packet
+       # was flooded to it or not.
+       tc qdisc add dev $host2_if ingress
+       tc filter add dev $host2_if ingress protocol ip pref 1 handle 101 \
+               flower dst_mac $mac action drop
+
+       $MZ $host1_if -c 1 -p 64 -b $mac -B $ip -t ip -q
+       sleep 1
+
+       tc -j -s filter show dev $host2_if ingress \
+               | jq -e ".[] | select(.options.handle == 101) \
+               | select(.options.actions[0].stats.packets == 1)" &> /dev/null
+       if [[ $? -ne 0 && $should_flood == "true" || \
+             $? -eq 0 && $should_flood == "false" ]]; then
+               err=1
+       fi
+
+       tc filter del dev $host2_if ingress protocol ip pref 1 handle 101 flower
+       tc qdisc del dev $host2_if ingress
+
+       return $err
+}
+
+flood_unicast_test()
+{
+       local br_port=$1
+       local host1_if=$2
+       local host2_if=$3
+       local mac=de:ad:be:ef:13:37
+       local ip=192.0.2.100
+
+       RET=0
+
+       bridge link set dev $br_port flood off
+
+       flood_test_do false $mac $ip $host1_if $host2_if
+       check_err $? "Packet flooded when should not"
+
+       bridge link set dev $br_port flood on
+
+       flood_test_do true $mac $ip $host1_if $host2_if
+       check_err $? "Packet was not flooded when should"
+
+       log_test "Unknown unicast flood"
+}
+
+flood_multicast_test()
+{
+       local br_port=$1
+       local host1_if=$2
+       local host2_if=$3
+       local mac=01:00:5e:00:00:01
+       local ip=239.0.0.1
+
+       RET=0
+
+       bridge link set dev $br_port mcast_flood off
+
+       flood_test_do false $mac $ip $host1_if $host2_if
+       check_err $? "Packet flooded when should not"
+
+       bridge link set dev $br_port mcast_flood on
+
+       flood_test_do true $mac $ip $host1_if $host2_if
+       check_err $? "Packet was not flooded when should"
+
+       log_test "Unregistered multicast flood"
+}
+
+flood_test()
+{
+       # `br_port` is connected to `host2_if`
+       local br_port=$1
+       local host1_if=$2
+       local host2_if=$3
+
+       flood_unicast_test $br_port $host1_if $host2_if
+       flood_multicast_test $br_port $host1_if $host2_if
+}
diff --git a/tools/testing/selftests/net/forwarding/router.sh b/tools/testing/selftests/net/forwarding/router.sh
new file mode 100755 (executable)
index 0000000..cc6a14a
--- /dev/null
@@ -0,0 +1,125 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+NUM_NETIFS=4
+source lib.sh
+
+h1_create()
+{
+       vrf_create "vrf-h1"
+       ip link set dev $h1 master vrf-h1
+
+       ip link set dev vrf-h1 up
+       ip link set dev $h1 up
+
+       ip address add 192.0.2.2/24 dev $h1
+       ip address add 2001:db8:1::2/64 dev $h1
+
+       ip route add 198.51.100.0/24 vrf vrf-h1 nexthop via 192.0.2.1
+       ip route add 2001:db8:2::/64 vrf vrf-h1 nexthop via 2001:db8:1::1
+}
+
+h1_destroy()
+{
+       ip route del 2001:db8:2::/64 vrf vrf-h1
+       ip route del 198.51.100.0/24 vrf vrf-h1
+
+       ip address del 2001:db8:1::2/64 dev $h1
+       ip address del 192.0.2.2/24 dev $h1
+
+       ip link set dev $h1 down
+       vrf_destroy "vrf-h1"
+}
+
+h2_create()
+{
+       vrf_create "vrf-h2"
+       ip link set dev $h2 master vrf-h2
+
+       ip link set dev vrf-h2 up
+       ip link set dev $h2 up
+
+       ip address add 198.51.100.2/24 dev $h2
+       ip address add 2001:db8:2::2/64 dev $h2
+
+       ip route add 192.0.2.0/24 vrf vrf-h2 nexthop via 198.51.100.1
+       ip route add 2001:db8:1::/64 vrf vrf-h2 nexthop via 2001:db8:2::1
+}
+
+h2_destroy()
+{
+       ip route del 2001:db8:1::/64 vrf vrf-h2
+       ip route del 192.0.2.0/24 vrf vrf-h2
+
+       ip address del 2001:db8:2::2/64 dev $h2
+       ip address del 198.51.100.2/24 dev $h2
+
+       ip link set dev $h2 down
+       vrf_destroy "vrf-h2"
+}
+
+router_create()
+{
+       ip link set dev $rp1 up
+       ip link set dev $rp2 up
+
+       ip address add 192.0.2.1/24 dev $rp1
+       ip address add 2001:db8:1::1/64 dev $rp1
+
+       ip address add 198.51.100.1/24 dev $rp2
+       ip address add 2001:db8:2::1/64 dev $rp2
+}
+
+router_destroy()
+{
+       ip address del 2001:db8:2::1/64 dev $rp2
+       ip address del 198.51.100.1/24 dev $rp2
+
+       ip address del 2001:db8:1::1/64 dev $rp1
+       ip address del 192.0.2.1/24 dev $rp1
+
+       ip link set dev $rp2 down
+       ip link set dev $rp1 down
+}
+
+setup_prepare()
+{
+       h1=${NETIFS[p1]}
+       rp1=${NETIFS[p2]}
+
+       rp2=${NETIFS[p3]}
+       h2=${NETIFS[p4]}
+
+       vrf_prepare
+
+       h1_create
+       h2_create
+
+       router_create
+
+       forwarding_enable
+}
+
+cleanup()
+{
+       pre_cleanup
+
+       forwarding_restore
+
+       router_destroy
+
+       h2_destroy
+       h1_destroy
+
+       vrf_cleanup
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+ping_test $h1 198.51.100.2
+ping6_test $h1 2001:db8:2::2
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/router_multipath.sh b/tools/testing/selftests/net/forwarding/router_multipath.sh
new file mode 100755 (executable)
index 0000000..5559530
--- /dev/null
@@ -0,0 +1,332 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+NUM_NETIFS=8
+source lib.sh
+
+h1_create()
+{
+       vrf_create "vrf-h1"
+       ip link set dev $h1 master vrf-h1
+
+       ip link set dev vrf-h1 up
+       ip link set dev $h1 up
+
+       ip address add 192.0.2.2/24 dev $h1
+       ip address add 2001:db8:1::2/64 dev $h1
+
+       ip route add 198.51.100.0/24 vrf vrf-h1 nexthop via 192.0.2.1
+       ip route add 2001:db8:2::/64 vrf vrf-h1 nexthop via 2001:db8:1::1
+}
+
+h1_destroy()
+{
+       ip route del 2001:db8:2::/64 vrf vrf-h1
+       ip route del 198.51.100.0/24 vrf vrf-h1
+
+       ip address del 2001:db8:1::2/64 dev $h1
+       ip address del 192.0.2.2/24 dev $h1
+
+       ip link set dev $h1 down
+       vrf_destroy "vrf-h1"
+}
+
+h2_create()
+{
+       vrf_create "vrf-h2"
+       ip link set dev $h2 master vrf-h2
+
+       ip link set dev vrf-h2 up
+       ip link set dev $h2 up
+
+       ip address add 198.51.100.2/24 dev $h2
+       ip address add 2001:db8:2::2/64 dev $h2
+
+       ip route add 192.0.2.0/24 vrf vrf-h2 nexthop via 198.51.100.1
+       ip route add 2001:db8:1::/64 vrf vrf-h2 nexthop via 2001:db8:2::1
+}
+
+h2_destroy()
+{
+       ip route del 2001:db8:1::/64 vrf vrf-h2
+       ip route del 192.0.2.0/24 vrf vrf-h2
+
+       ip address del 2001:db8:2::2/64 dev $h2
+       ip address del 198.51.100.2/24 dev $h2
+
+       ip link set dev $h2 down
+       vrf_destroy "vrf-h2"
+}
+
+router1_create()
+{
+       vrf_create "vrf-r1"
+       ip link set dev $rp11 master vrf-r1
+       ip link set dev $rp12 master vrf-r1
+       ip link set dev $rp13 master vrf-r1
+
+       ip link set dev vrf-r1 up
+       ip link set dev $rp11 up
+       ip link set dev $rp12 up
+       ip link set dev $rp13 up
+
+       ip address add 192.0.2.1/24 dev $rp11
+       ip address add 2001:db8:1::1/64 dev $rp11
+
+       ip address add 169.254.2.12/24 dev $rp12
+       ip address add fe80:2::12/64 dev $rp12
+
+       ip address add 169.254.3.13/24 dev $rp13
+       ip address add fe80:3::13/64 dev $rp13
+
+       ip route add 198.51.100.0/24 vrf vrf-r1 \
+               nexthop via 169.254.2.22 dev $rp12 \
+               nexthop via 169.254.3.23 dev $rp13
+       ip route add 2001:db8:2::/64 vrf vrf-r1 \
+               nexthop via fe80:2::22 dev $rp12 \
+               nexthop via fe80:3::23 dev $rp13
+}
+
+router1_destroy()
+{
+       ip route del 2001:db8:2::/64 vrf vrf-r1
+       ip route del 198.51.100.0/24 vrf vrf-r1
+
+       ip address del fe80:3::13/64 dev $rp13
+       ip address del 169.254.3.13/24 dev $rp13
+
+       ip address del fe80:2::12/64 dev $rp12
+       ip address del 169.254.2.12/24 dev $rp12
+
+       ip address del 2001:db8:1::1/64 dev $rp11
+       ip address del 192.0.2.1/24 dev $rp11
+
+       ip link set dev $rp13 down
+       ip link set dev $rp12 down
+       ip link set dev $rp11 down
+
+       vrf_destroy "vrf-r1"
+}
+
+router2_create()
+{
+       vrf_create "vrf-r2"
+       ip link set dev $rp21 master vrf-r2
+       ip link set dev $rp22 master vrf-r2
+       ip link set dev $rp23 master vrf-r2
+
+       ip link set dev vrf-r2 up
+       ip link set dev $rp21 up
+       ip link set dev $rp22 up
+       ip link set dev $rp23 up
+
+       ip address add 198.51.100.1/24 dev $rp21
+       ip address add 2001:db8:2::1/64 dev $rp21
+
+       ip address add 169.254.2.22/24 dev $rp22
+       ip address add fe80:2::22/64 dev $rp22
+
+       ip address add 169.254.3.23/24 dev $rp23
+       ip address add fe80:3::23/64 dev $rp23
+
+       ip route add 192.0.2.0/24 vrf vrf-r2 \
+               nexthop via 169.254.2.12 dev $rp22 \
+               nexthop via 169.254.3.13 dev $rp23
+       ip route add 2001:db8:1::/64 vrf vrf-r2 \
+               nexthop via fe80:2::12 dev $rp22 \
+               nexthop via fe80:3::13 dev $rp23
+}
+
+router2_destroy()
+{
+       ip route del 2001:db8:1::/64 vrf vrf-r2
+       ip route del 192.0.2.0/24 vrf vrf-r2
+
+       ip address del fe80:3::23/64 dev $rp23
+       ip address del 169.254.3.23/24 dev $rp23
+
+       ip address del fe80:2::22/64 dev $rp22
+       ip address del 169.254.2.22/24 dev $rp22
+
+       ip address del 2001:db8:2::1/64 dev $rp21
+       ip address del 198.51.100.1/24 dev $rp21
+
+       ip link set dev $rp23 down
+       ip link set dev $rp22 down
+       ip link set dev $rp21 down
+
+       vrf_destroy "vrf-r2"
+}
+
+multipath_eval()
+{
+       local desc="$1"
+       local weight_rp12=$2
+       local weight_rp13=$3
+       local packets_rp12=$4
+       local packets_rp13=$5
+       local weights_ratio packets_ratio diff
+
+       RET=0
+
+       if [[ "$packets_rp12" -eq "0" || "$packets_rp13" -eq "0" ]]; then
+              check_err 1 "Packet difference is 0"
+              log_test "Multipath"
+              log_info "Expected ratio $weights_ratio"
+              return
+       fi
+
+       if [[ "$weight_rp12" -gt "$weight_rp13" ]]; then
+               weights_ratio=$(echo "scale=2; $weight_rp12 / $weight_rp13" \
+                      | bc -l)
+               packets_ratio=$(echo "scale=2; $packets_rp12 / $packets_rp13" \
+                      | bc -l)
+       else
+               weights_ratio=$(echo "scale=2; $weight_rp13 / $weight_rp12" | \
+                      bc -l)
+               packets_ratio=$(echo "scale=2; $packets_rp13 / $packets_rp12" | \
+                      bc -l)
+       fi
+
+       diff=$(echo $weights_ratio - $packets_ratio | bc -l)
+       diff=${diff#-}
+
+       test "$(echo "$diff / $weights_ratio > 0.1" | bc -l)" -eq 0
+       check_err $? "Too large discrepancy between expected and measured ratios"
+       log_test "$desc"
+       log_info "Expected ratio $weights_ratio Measured ratio $packets_ratio"
+}
+
+multipath4_test()
+{
+       local desc="$1"
+       local weight_rp12=$2
+       local weight_rp13=$3
+       local t0_rp12 t0_rp13 t1_rp12 t1_rp13
+       local packets_rp12 packets_rp13
+       local hash_policy
+
+       # Transmit multiple flows from h1 to h2 and make sure they are
+       # distributed between both multipath links (rp12 and rp13)
+       # according to the configured weights.
+       hash_policy=$(sysctl -n net.ipv4.fib_multipath_hash_policy)
+       sysctl -q -w net.ipv4.fib_multipath_hash_policy=1
+       ip route replace 198.51.100.0/24 vrf vrf-r1 \
+               nexthop via 169.254.2.22 dev $rp12 weight $weight_rp12 \
+               nexthop via 169.254.3.23 dev $rp13 weight $weight_rp13
+
+       t0_rp12=$(link_stats_tx_packets_get $rp12)
+       t0_rp13=$(link_stats_tx_packets_get $rp13)
+
+       ip vrf exec vrf-h1 $MZ -q -p 64 -A 192.0.2.2 -B 198.51.100.2 \
+              -d 1msec -t udp "sp=1024,dp=0-32768"
+
+       t1_rp12=$(link_stats_tx_packets_get $rp12)
+       t1_rp13=$(link_stats_tx_packets_get $rp13)
+
+       let "packets_rp12 = $t1_rp12 - $t0_rp12"
+       let "packets_rp13 = $t1_rp13 - $t0_rp13"
+       multipath_eval "$desc" $weight_rp12 $weight_rp13 $packets_rp12 $packets_rp13
+
+       # Restore settings.
+       ip route replace 198.51.100.0/24 vrf vrf-r1 \
+               nexthop via 169.254.2.22 dev $rp12 \
+               nexthop via 169.254.3.23 dev $rp13
+       sysctl -q -w net.ipv4.fib_multipath_hash_policy=$hash_policy
+}
+
+multipath6_test()
+{
+       local desc="$1"
+       local weight_rp12=$2
+       local weight_rp13=$3
+       local t0_rp12 t0_rp13 t1_rp12 t1_rp13
+       local packets_rp12 packets_rp13
+
+       ip route replace 2001:db8:2::/64 vrf vrf-r1 \
+              nexthop via fe80:2::22 dev $rp12 weight $weight_rp12 \
+              nexthop via fe80:3::23 dev $rp13 weight $weight_rp13
+
+       t0_rp12=$(link_stats_tx_packets_get $rp12)
+       t0_rp13=$(link_stats_tx_packets_get $rp13)
+
+       # Generate 16384 echo requests, each with a random flow label.
+       for _ in $(seq 1 16384); do
+              ip vrf exec vrf-h1 $PING6 2001:db8:2::2 -F 0 -c 1 -q &> /dev/null
+       done
+
+       t1_rp12=$(link_stats_tx_packets_get $rp12)
+       t1_rp13=$(link_stats_tx_packets_get $rp13)
+
+       let "packets_rp12 = $t1_rp12 - $t0_rp12"
+       let "packets_rp13 = $t1_rp13 - $t0_rp13"
+       multipath_eval "$desc" $weight_rp12 $weight_rp13 $packets_rp12 $packets_rp13
+
+       ip route replace 2001:db8:2::/64 vrf vrf-r1 \
+              nexthop via fe80:2::22 dev $rp12 \
+              nexthop via fe80:3::23 dev $rp13
+}
+
+multipath_test()
+{
+       log_info "Running IPv4 multipath tests"
+       multipath4_test "ECMP" 1 1
+       multipath4_test "Weighted MP 2:1" 2 1
+       multipath4_test "Weighted MP 11:45" 11 45
+
+       log_info "Running IPv6 multipath tests"
+       multipath6_test "ECMP" 1 1
+       multipath6_test "Weighted MP 2:1" 2 1
+       multipath6_test "Weighted MP 11:45" 11 45
+}
+
+setup_prepare()
+{
+       h1=${NETIFS[p1]}
+       rp11=${NETIFS[p2]}
+
+       rp12=${NETIFS[p3]}
+       rp22=${NETIFS[p4]}
+
+       rp13=${NETIFS[p5]}
+       rp23=${NETIFS[p6]}
+
+       rp21=${NETIFS[p7]}
+       h2=${NETIFS[p8]}
+
+       vrf_prepare
+
+       h1_create
+       h2_create
+
+       router1_create
+       router2_create
+
+       forwarding_enable
+}
+
+cleanup()
+{
+       pre_cleanup
+
+       forwarding_restore
+
+       router2_destroy
+       router1_destroy
+
+       h2_destroy
+       h1_destroy
+
+       vrf_cleanup
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+ping_test $h1 198.51.100.2
+ping6_test $h1 2001:db8:2::2
+multipath_test
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/tc_actions.sh b/tools/testing/selftests/net/forwarding/tc_actions.sh
new file mode 100755 (executable)
index 0000000..8ab5cf0
--- /dev/null
@@ -0,0 +1,195 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+NUM_NETIFS=4
+source tc_common.sh
+source lib.sh
+
+tcflags="skip_hw"
+
+h1_create()
+{
+       simple_if_init $h1 192.0.2.1/24
+}
+
+h1_destroy()
+{
+       simple_if_fini $h1 192.0.2.1/24
+}
+
+h2_create()
+{
+       simple_if_init $h2 192.0.2.2/24
+       tc qdisc add dev $h2 clsact
+}
+
+h2_destroy()
+{
+       tc qdisc del dev $h2 clsact
+       simple_if_fini $h2 192.0.2.2/24
+}
+
+switch_create()
+{
+       simple_if_init $swp1 192.0.2.2/24
+       tc qdisc add dev $swp1 clsact
+
+       simple_if_init $swp2 192.0.2.1/24
+}
+
+switch_destroy()
+{
+       simple_if_fini $swp2 192.0.2.1/24
+
+       tc qdisc del dev $swp1 clsact
+       simple_if_fini $swp1 192.0.2.2/24
+}
+
+mirred_egress_redirect_test()
+{
+       RET=0
+
+       tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+               $tcflags dst_ip 192.0.2.2 action drop
+
+       $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+               -t ip -q
+
+       tc_check_packets "dev $h2 ingress" 101 1
+       check_fail $? "Matched without redirect rule inserted"
+
+       tc filter add dev $swp1 ingress protocol ip pref 1 handle 101 flower \
+               $tcflags dst_ip 192.0.2.2 action mirred egress redirect \
+               dev $swp2
+
+       $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+               -t ip -q
+
+       tc_check_packets "dev $h2 ingress" 101 1
+       check_err $? "Did not match incoming redirected packet"
+
+       tc filter del dev $swp1 ingress protocol ip pref 1 handle 101 flower
+       tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+
+       log_test "mirred egress redirect ($tcflags)"
+}
+
+gact_drop_and_ok_test()
+{
+       RET=0
+
+       tc filter add dev $swp1 ingress protocol ip pref 2 handle 102 flower \
+               skip_hw dst_ip 192.0.2.2 action drop
+
+       $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+               -t ip -q
+
+       tc_check_packets "dev $swp1 ingress" 102 1
+       check_err $? "Packet was not dropped"
+
+       tc filter add dev $swp1 ingress protocol ip pref 1 handle 101 flower \
+               $tcflags dst_ip 192.0.2.2 action ok
+
+       $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+               -t ip -q
+
+       tc_check_packets "dev $swp1 ingress" 101 1
+       check_err $? "Did not see trapped packet"
+
+       tc filter del dev $swp1 ingress protocol ip pref 2 handle 102 flower
+       tc filter del dev $swp1 ingress protocol ip pref 1 handle 101 flower
+
+       log_test "gact drop and ok ($tcflags)"
+}
+
+gact_trap_test()
+{
+       RET=0
+
+       tc filter add dev $swp1 ingress protocol ip pref 1 handle 101 flower \
+               skip_hw dst_ip 192.0.2.2 action drop
+       tc filter add dev $swp1 ingress protocol ip pref 3 handle 103 flower \
+               $tcflags dst_ip 192.0.2.2 action mirred egress redirect \
+               dev $swp2
+
+       $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+               -t ip -q
+
+       tc_check_packets "dev $swp1 ingress" 101 1
+       check_fail $? "Saw packet without trap rule inserted"
+
+       tc filter add dev $swp1 ingress protocol ip pref 2 handle 102 flower \
+               $tcflags dst_ip 192.0.2.2 action trap
+
+       $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+               -t ip -q
+
+       tc_check_packets "dev $swp1 ingress" 102 1
+       check_err $? "Packet was not trapped"
+
+       tc_check_packets "dev $swp1 ingress" 101 1
+       check_err $? "Did not see trapped packet"
+
+       tc filter del dev $swp1 ingress protocol ip pref 3 handle 103 flower
+       tc filter del dev $swp1 ingress protocol ip pref 2 handle 102 flower
+       tc filter del dev $swp1 ingress protocol ip pref 1 handle 101 flower
+
+       log_test "trap ($tcflags)"
+}
+
+setup_prepare()
+{
+       h1=${NETIFS[p1]}
+       swp1=${NETIFS[p2]}
+
+       swp2=${NETIFS[p3]}
+       h2=${NETIFS[p4]}
+
+       h1mac=$(mac_get $h1)
+       h2mac=$(mac_get $h2)
+
+       swp1origmac=$(mac_get $swp1)
+       swp2origmac=$(mac_get $swp2)
+       ip link set $swp1 address $h2mac
+       ip link set $swp2 address $h1mac
+
+       vrf_prepare
+
+       h1_create
+       h2_create
+       switch_create
+}
+
+cleanup()
+{
+       pre_cleanup
+
+       switch_destroy
+       h2_destroy
+       h1_destroy
+
+       vrf_cleanup
+
+       ip link set $swp2 address $swp2origmac
+       ip link set $swp1 address $swp1origmac
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+gact_drop_and_ok_test
+mirred_egress_redirect_test
+
+tc_offload_check
+if [[ $? -ne 0 ]]; then
+       log_info "Could not test offloaded functionality"
+else
+       tcflags="skip_sw"
+       gact_drop_and_ok_test
+       mirred_egress_redirect_test
+       gact_trap_test
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/tc_chains.sh b/tools/testing/selftests/net/forwarding/tc_chains.sh
new file mode 100755 (executable)
index 0000000..2fd1522
--- /dev/null
@@ -0,0 +1,122 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+NUM_NETIFS=2
+source tc_common.sh
+source lib.sh
+
+tcflags="skip_hw"
+
+h1_create()
+{
+       simple_if_init $h1 192.0.2.1/24
+}
+
+h1_destroy()
+{
+       simple_if_fini $h1 192.0.2.1/24
+}
+
+h2_create()
+{
+       simple_if_init $h2 192.0.2.2/24
+       tc qdisc add dev $h2 clsact
+}
+
+h2_destroy()
+{
+       tc qdisc del dev $h2 clsact
+       simple_if_fini $h2 192.0.2.2/24
+}
+
+unreachable_chain_test()
+{
+       RET=0
+
+       tc filter add dev $h2 ingress chain 1 protocol ip pref 1 handle 1101 \
+               flower $tcflags dst_mac $h2mac action drop
+
+       $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+               -t ip -q
+
+       tc_check_packets "dev $h2 ingress" 1101 1
+       check_fail $? "matched on filter in unreachable chain"
+
+       tc filter del dev $h2 ingress chain 1 protocol ip pref 1 handle 1101 \
+               flower
+
+       log_test "unreachable chain ($tcflags)"
+}
+
+gact_goto_chain_test()
+{
+       RET=0
+
+       tc filter add dev $h2 ingress chain 1 protocol ip pref 1 handle 1101 \
+               flower $tcflags dst_mac $h2mac action drop
+       tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \
+               $tcflags dst_mac $h2mac action drop
+       tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+               $tcflags dst_mac $h2mac action goto chain 1
+
+       $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+               -t ip -q
+
+       tc_check_packets "dev $h2 ingress" 102 1
+       check_fail $? "Matched on a wrong filter"
+
+       tc_check_packets "dev $h2 ingress" 101 1
+       check_err $? "Did not match on correct filter with goto chain action"
+
+       tc_check_packets "dev $h2 ingress" 1101 1
+       check_err $? "Did not match on correct filter in chain 1"
+
+       tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+       tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower
+       tc filter del dev $h2 ingress chain 1 protocol ip pref 1 handle 1101 \
+               flower
+
+       log_test "gact goto chain ($tcflags)"
+}
+
+setup_prepare()
+{
+       h1=${NETIFS[p1]}
+       h2=${NETIFS[p2]}
+       h1mac=$(mac_get $h1)
+       h2mac=$(mac_get $h2)
+
+       vrf_prepare
+
+       h1_create
+       h2_create
+}
+
+cleanup()
+{
+       pre_cleanup
+
+       h2_destroy
+       h1_destroy
+
+       vrf_cleanup
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+unreachable_chain_test
+gact_goto_chain_test
+
+tc_offload_check
+if [[ $? -ne 0 ]]; then
+       log_info "Could not test offloaded functionality"
+else
+       tcflags="skip_sw"
+       unreachable_chain_test
+       gact_goto_chain_test
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/tc_common.sh b/tools/testing/selftests/net/forwarding/tc_common.sh
new file mode 100644 (file)
index 0000000..9d3b64a
--- /dev/null
@@ -0,0 +1,25 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+CHECK_TC="yes"
+
+tc_check_packets()
+{
+       local id=$1
+       local handle=$2
+       local count=$3
+       local ret
+
+       output="$(tc -j -s filter show $id)"
+       # workaround the jq bug which causes jq to return 0 in case input is ""
+       ret=$?
+       if [[ $ret -ne 0 ]]; then
+               return $ret
+       fi
+       echo $output | \
+               jq -e ".[] \
+               | select(.options.handle == $handle) \
+               | select(.options.actions[0].stats.packets == $count)" \
+               &> /dev/null
+       return $?
+}
diff --git a/tools/testing/selftests/net/forwarding/tc_flower.sh b/tools/testing/selftests/net/forwarding/tc_flower.sh
new file mode 100755 (executable)
index 0000000..032b882
--- /dev/null
@@ -0,0 +1,196 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+NUM_NETIFS=2
+source tc_common.sh
+source lib.sh
+
+tcflags="skip_hw"
+
+h1_create()
+{
+       simple_if_init $h1 192.0.2.1/24 198.51.100.1/24
+}
+
+h1_destroy()
+{
+       simple_if_fini $h1 192.0.2.1/24 198.51.100.1/24
+}
+
+h2_create()
+{
+       simple_if_init $h2 192.0.2.2/24 198.51.100.2/24
+       tc qdisc add dev $h2 clsact
+}
+
+h2_destroy()
+{
+       tc qdisc del dev $h2 clsact
+       simple_if_fini $h2 192.0.2.2/24 198.51.100.2/24
+}
+
+match_dst_mac_test()
+{
+       local dummy_mac=de:ad:be:ef:aa:aa
+
+       RET=0
+
+       tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+               $tcflags dst_mac $dummy_mac action drop
+       tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \
+               $tcflags dst_mac $h2mac action drop
+
+       $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+               -t ip -q
+
+       tc_check_packets "dev $h2 ingress" 101 1
+       check_fail $? "Matched on a wrong filter"
+
+       tc_check_packets "dev $h2 ingress" 102 1
+       check_err $? "Did not match on correct filter"
+
+       tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+       tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower
+
+       log_test "dst_mac match ($tcflags)"
+}
+
+match_src_mac_test()
+{
+       local dummy_mac=de:ad:be:ef:aa:aa
+
+       RET=0
+
+       tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+               $tcflags src_mac $dummy_mac action drop
+       tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \
+               $tcflags src_mac $h1mac action drop
+
+       $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+               -t ip -q
+
+       tc_check_packets "dev $h2 ingress" 101 1
+       check_fail $? "Matched on a wrong filter"
+
+       tc_check_packets "dev $h2 ingress" 102 1
+       check_err $? "Did not match on correct filter"
+
+       tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+       tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower
+
+       log_test "src_mac match ($tcflags)"
+}
+
+match_dst_ip_test()
+{
+       RET=0
+
+       tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+               $tcflags dst_ip 198.51.100.2 action drop
+       tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \
+               $tcflags dst_ip 192.0.2.2 action drop
+       tc filter add dev $h2 ingress protocol ip pref 3 handle 103 flower \
+               $tcflags dst_ip 192.0.2.0/24 action drop
+
+       $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+               -t ip -q
+
+       tc_check_packets "dev $h2 ingress" 101 1
+       check_fail $? "Matched on a wrong filter"
+
+       tc_check_packets "dev $h2 ingress" 102 1
+       check_err $? "Did not match on correct filter"
+
+       tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower
+
+       $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+               -t ip -q
+
+       tc_check_packets "dev $h2 ingress" 103 1
+       check_err $? "Did not match on correct filter with mask"
+
+       tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+       tc filter del dev $h2 ingress protocol ip pref 3 handle 103 flower
+
+       log_test "dst_ip match ($tcflags)"
+}
+
+match_src_ip_test()
+{
+       RET=0
+
+       tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+               $tcflags src_ip 198.51.100.1 action drop
+       tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \
+               $tcflags src_ip 192.0.2.1 action drop
+       tc filter add dev $h2 ingress protocol ip pref 3 handle 103 flower \
+               $tcflags src_ip 192.0.2.0/24 action drop
+
+       $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+               -t ip -q
+
+       tc_check_packets "dev $h2 ingress" 101 1
+       check_fail $? "Matched on a wrong filter"
+
+       tc_check_packets "dev $h2 ingress" 102 1
+       check_err $? "Did not match on correct filter"
+
+       tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower
+
+       $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+               -t ip -q
+
+       tc_check_packets "dev $h2 ingress" 103 1
+       check_err $? "Did not match on correct filter with mask"
+
+       tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+       tc filter del dev $h2 ingress protocol ip pref 3 handle 103 flower
+
+       log_test "src_ip match ($tcflags)"
+}
+
+setup_prepare()
+{
+       h1=${NETIFS[p1]}
+       h2=${NETIFS[p2]}
+       h1mac=$(mac_get $h1)
+       h2mac=$(mac_get $h2)
+
+       vrf_prepare
+
+       h1_create
+       h2_create
+}
+
+cleanup()
+{
+       pre_cleanup
+
+       h2_destroy
+       h1_destroy
+
+       vrf_cleanup
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+match_dst_mac_test
+match_src_mac_test
+match_dst_ip_test
+match_src_ip_test
+
+tc_offload_check
+if [[ $? -ne 0 ]]; then
+       log_info "Could not test offloaded functionality"
+else
+       tcflags="skip_sw"
+       match_dst_mac_test
+       match_src_mac_test
+       match_dst_ip_test
+       match_src_ip_test
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/tc_shblocks.sh b/tools/testing/selftests/net/forwarding/tc_shblocks.sh
new file mode 100755 (executable)
index 0000000..077b980
--- /dev/null
@@ -0,0 +1,122 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+NUM_NETIFS=4
+source tc_common.sh
+source lib.sh
+
+tcflags="skip_hw"
+
+h1_create()
+{
+       simple_if_init $h1 192.0.2.1/24
+}
+
+h1_destroy()
+{
+       simple_if_fini $h1 192.0.2.1/24
+}
+
+h2_create()
+{
+       simple_if_init $h2 192.0.2.1/24
+}
+
+h2_destroy()
+{
+       simple_if_fini $h2 192.0.2.1/24
+}
+
+switch_create()
+{
+       simple_if_init $swp1 192.0.2.2/24
+       tc qdisc add dev $swp1 ingress_block 22 egress_block 23 clsact
+
+       simple_if_init $swp2 192.0.2.2/24
+       tc qdisc add dev $swp2 ingress_block 22 egress_block 23 clsact
+}
+
+switch_destroy()
+{
+       tc qdisc del dev $swp2 clsact
+       simple_if_fini $swp2 192.0.2.2/24
+
+       tc qdisc del dev $swp1 clsact
+       simple_if_fini $swp1 192.0.2.2/24
+}
+
+shared_block_test()
+{
+       RET=0
+
+       tc filter add block 22 protocol ip pref 1 handle 101 flower \
+               $tcflags dst_ip 192.0.2.2 action drop
+
+       $MZ $h1 -c 1 -p 64 -a $h1mac -b $swmac -A 192.0.2.1 -B 192.0.2.2 \
+               -t ip -q
+
+       tc_check_packets "block 22" 101 1
+       check_err $? "Did not match first incoming packet on a block"
+
+       $MZ $h2 -c 1 -p 64 -a $h2mac -b $swmac -A 192.0.2.1 -B 192.0.2.2 \
+               -t ip -q
+
+       tc_check_packets "block 22" 101 2
+       check_err $? "Did not match second incoming packet on a block"
+
+       tc filter del block 22 protocol ip pref 1 handle 101 flower
+
+       log_test "shared block ($tcflags)"
+}
+
+setup_prepare()
+{
+       h1=${NETIFS[p1]}
+       swp1=${NETIFS[p2]}
+
+       swp2=${NETIFS[p3]}
+       h2=${NETIFS[p4]}
+
+       h1mac=$(mac_get $h1)
+       h2mac=$(mac_get $h2)
+
+       swmac=$(mac_get $swp1)
+       swp2origmac=$(mac_get $swp2)
+       ip link set $swp2 address $swmac
+
+       vrf_prepare
+
+       h1_create
+       h2_create
+       switch_create
+}
+
+cleanup()
+{
+       pre_cleanup
+
+       switch_destroy
+       h2_destroy
+       h1_destroy
+
+       vrf_cleanup
+
+       ip link set $swp2 address $swp2origmac
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+shared_block_test
+
+tc_offload_check
+if [[ $? -ne 0 ]]; then
+       log_info "Could not test offloaded functionality"
+else
+       tcflags="skip_sw"
+       shared_block_test
+fi
+
+exit $EXIT_STATUS
index 5cc2a53..406cc70 100644 (file)
@@ -344,27 +344,53 @@ static int do_setup_tx(int domain, int type, int protocol)
        return fd;
 }
 
-static int do_process_zerocopy_cookies(struct sock_extended_err *serr,
-                                      uint32_t *ckbuf, size_t nbytes)
+static uint32_t do_process_zerocopy_cookies(struct rds_zcopy_cookies *ck)
 {
-       int ncookies, i;
+       int i;
 
-       if (serr->ee_errno != 0)
-               error(1, 0, "serr: wrong error code: %u", serr->ee_errno);
-       ncookies = serr->ee_data;
-       if (ncookies > SO_EE_ORIGIN_MAX_ZCOOKIES)
+       if (ck->num > RDS_MAX_ZCOOKIES)
                error(1, 0, "Returned %d cookies, max expected %d\n",
-                     ncookies, SO_EE_ORIGIN_MAX_ZCOOKIES);
-       if (nbytes != ncookies * sizeof(uint32_t))
-               error(1, 0, "Expected %d cookies, got %ld\n",
-                     ncookies, nbytes/sizeof(uint32_t));
-       for (i = 0; i < ncookies; i++)
+                     ck->num, RDS_MAX_ZCOOKIES);
+       for (i = 0; i < ck->num; i++)
                if (cfg_verbose >= 2)
-                       fprintf(stderr, "%d\n", ckbuf[i]);
-       return ncookies;
+                       fprintf(stderr, "%d\n", ck->cookies[i]);
+       return ck->num;
 }
 
-static bool do_recv_completion(int fd)
+static bool do_recvmsg_completion(int fd)
+{
+       char cmsgbuf[CMSG_SPACE(sizeof(struct rds_zcopy_cookies))];
+       struct rds_zcopy_cookies *ck;
+       struct cmsghdr *cmsg;
+       struct msghdr msg;
+       bool ret = false;
+
+       memset(&msg, 0, sizeof(msg));
+       msg.msg_control = cmsgbuf;
+       msg.msg_controllen = sizeof(cmsgbuf);
+
+       if (recvmsg(fd, &msg, MSG_DONTWAIT))
+               return ret;
+
+       if (msg.msg_flags & MSG_CTRUNC)
+               error(1, errno, "recvmsg notification: truncated");
+
+       for (cmsg = CMSG_FIRSTHDR(&msg); cmsg; cmsg = CMSG_NXTHDR(&msg, cmsg)) {
+               if (cmsg->cmsg_level == SOL_RDS &&
+                   cmsg->cmsg_type == RDS_CMSG_ZCOPY_COMPLETION) {
+
+                       ck = (struct rds_zcopy_cookies *)CMSG_DATA(cmsg);
+                       completions += do_process_zerocopy_cookies(ck);
+                       ret = true;
+                       break;
+               }
+               error(0, 0, "ignoring cmsg at level %d type %d\n",
+                           cmsg->cmsg_level, cmsg->cmsg_type);
+       }
+       return ret;
+}
+
+static bool do_recv_completion(int fd, int domain)
 {
        struct sock_extended_err *serr;
        struct msghdr msg = {};
@@ -372,17 +398,13 @@ static bool do_recv_completion(int fd)
        uint32_t hi, lo, range;
        int ret, zerocopy;
        char control[100];
-       uint32_t ckbuf[SO_EE_ORIGIN_MAX_ZCOOKIES];
-       struct iovec iov;
+
+       if (domain == PF_RDS)
+               return do_recvmsg_completion(fd);
 
        msg.msg_control = control;
        msg.msg_controllen = sizeof(control);
 
-       iov.iov_base = ckbuf;
-       iov.iov_len = (SO_EE_ORIGIN_MAX_ZCOOKIES * sizeof(ckbuf[0]));
-       msg.msg_iov = &iov;
-       msg.msg_iovlen = 1;
-
        ret = recvmsg(fd, &msg, MSG_ERRQUEUE);
        if (ret == -1 && errno == EAGAIN)
                return false;
@@ -402,10 +424,6 @@ static bool do_recv_completion(int fd)
 
        serr = (void *) CMSG_DATA(cm);
 
-       if (serr->ee_origin == SO_EE_ORIGIN_ZCOOKIE) {
-               completions += do_process_zerocopy_cookies(serr, ckbuf, ret);
-               return true;
-       }
        if (serr->ee_origin != SO_EE_ORIGIN_ZEROCOPY)
                error(1, 0, "serr: wrong origin: %u", serr->ee_origin);
        if (serr->ee_errno != 0)
@@ -440,20 +458,20 @@ static bool do_recv_completion(int fd)
 }
 
 /* Read all outstanding messages on the errqueue */
-static void do_recv_completions(int fd)
+static void do_recv_completions(int fd, int domain)
 {
-       while (do_recv_completion(fd)) {}
+       while (do_recv_completion(fd, domain)) {}
 }
 
 /* Wait for all remaining completions on the errqueue */
-static void do_recv_remaining_completions(int fd)
+static void do_recv_remaining_completions(int fd, int domain)
 {
        int64_t tstop = gettimeofday_ms() + cfg_waittime_ms;
 
        while (completions < expected_completions &&
               gettimeofday_ms() < tstop) {
-               if (do_poll(fd, POLLERR))
-                       do_recv_completions(fd);
+               if (do_poll(fd, domain == PF_RDS ? POLLIN : POLLERR))
+                       do_recv_completions(fd, domain);
        }
 
        if (completions < expected_completions)
@@ -534,13 +552,13 @@ static void do_tx(int domain, int type, int protocol)
 
                while (!do_poll(fd, POLLOUT)) {
                        if (cfg_zerocopy)
-                               do_recv_completions(fd);
+                               do_recv_completions(fd, domain);
                }
 
        } while (gettimeofday_ms() < tstop);
 
        if (cfg_zerocopy)
-               do_recv_remaining_completions(fd);
+               do_recv_remaining_completions(fd, domain);
 
        if (close(fd))
                error(1, errno, "close");
@@ -631,40 +649,6 @@ static void do_flush_datagram(int fd, int type)
        bytes += cfg_payload_len;
 }
 
-
-static void do_recvmsg(int fd)
-{
-       int ret, off = 0;
-       char *buf;
-       struct iovec iov;
-       struct msghdr msg;
-       struct sockaddr_storage din;
-
-       buf = calloc(cfg_payload_len, sizeof(char));
-       iov.iov_base = buf;
-       iov.iov_len = cfg_payload_len;
-
-       memset(&msg, 0, sizeof(msg));
-       msg.msg_name = &din;
-       msg.msg_namelen = sizeof(din);
-       msg.msg_iov = &iov;
-       msg.msg_iovlen = 1;
-
-       ret = recvmsg(fd, &msg, MSG_TRUNC);
-
-       if (ret == -1)
-               error(1, errno, "recv");
-       if (ret != cfg_payload_len)
-               error(1, 0, "recv: ret=%u != %u", ret, cfg_payload_len);
-
-       if (memcmp(buf + off, payload, ret))
-               error(1, 0, "recv: data mismatch");
-
-       free(buf);
-       packets++;
-       bytes += cfg_payload_len;
-}
-
 static void do_rx(int domain, int type, int protocol)
 {
        uint64_t tstop;
@@ -676,8 +660,6 @@ static void do_rx(int domain, int type, int protocol)
        do {
                if (type == SOCK_STREAM)
                        do_flush_tcp(fd);
-               else if (domain == PF_RDS)
-                       do_recvmsg(fd);
                else
                        do_flush_datagram(fd, type);
 
index b3754b9..7b50775 100755 (executable)
@@ -15,6 +15,7 @@ import importlib
 import json
 import subprocess
 import time
+import traceback
 from collections import OrderedDict
 from string import Template
 
@@ -23,6 +24,13 @@ from tdc_helper import *
 
 import TdcPlugin
 
+
+class PluginMgrTestFail(Exception):
+    def __init__(self, stage, output, message):
+        self.stage = stage
+        self.output = output
+        self.message = message
+
 class PluginMgr:
     def __init__(self, argparser):
         super().__init__()
@@ -135,7 +143,7 @@ def exec_cmd(args, pm, stage, command):
     return proc, foutput
 
 
-def prepare_env(args, pm, stage, prefix, cmdlist):
+def prepare_env(args, pm, stage, prefix, cmdlist, output = None):
     """
     Execute the setup/teardown commands for a test case.
     Optionally terminate test execution if the command fails.
@@ -164,7 +172,9 @@ def prepare_env(args, pm, stage, prefix, cmdlist):
             print("\n{} *** Aborting test run.".format(prefix), file=sys.stderr)
             print("\n\n{} *** stdout ***".format(proc.stdout), file=sys.stderr)
             print("\n\n{} *** stderr ***".format(proc.stderr), file=sys.stderr)
-            raise Exception('"{}" did not complete successfully'.format(prefix))
+            raise PluginMgrTestFail(
+                stage, output,
+                '"{}" did not complete successfully'.format(prefix))
 
 def run_one_test(pm, args, index, tidx):
     result = True
@@ -194,8 +204,11 @@ def run_one_test(pm, args, index, tidx):
         match_pattern = re.compile(
             str(tidx["matchPattern"]), re.DOTALL | re.MULTILINE)
         (p, procout) = exec_cmd(args, pm, 'verify', tidx["verifyCmd"])
-        match_index = re.findall(match_pattern, procout)
-        if len(match_index) != int(tidx["matchCount"]):
+        if procout:
+            match_index = re.findall(match_pattern, procout)
+            if len(match_index) != int(tidx["matchCount"]):
+                result = False
+        elif int(tidx["matchCount"]) != 0:
             result = False
 
     if not result:
@@ -204,9 +217,12 @@ def run_one_test(pm, args, index, tidx):
     tap += tresult
 
     if result == False:
-        tap += procout
+        if procout:
+            tap += procout
+        else:
+            tap += 'No output!\n'
 
-    prepare_env(args, pm, 'teardown', '-----> teardown stage', tidx['teardown'])
+    prepare_env(args, pm, 'teardown', '-----> teardown stage', tidx['teardown'], procout)
     pm.call_post_case()
 
     index += 1
@@ -227,30 +243,70 @@ def test_runner(pm, args, filtered_tests):
     index = 1
     tap = str(index) + ".." + str(tcount) + "\n"
     badtest = None
+    stage = None
+    emergency_exit = False
+    emergency_exit_message = ''
 
-    pm.call_pre_suite(tcount, [tidx['id'] for tidx in testlist])
-
+    try:
+        pm.call_pre_suite(tcount, [tidx['id'] for tidx in testlist])
+    except Exception as ee:
+        ex_type, ex, ex_tb = sys.exc_info()
+        print('Exception {} {} (caught in pre_suite).'.
+              format(ex_type, ex))
+        # when the extra print statements are uncommented,
+        # the traceback does not appear between them
+        # (it appears way earlier in the tdc.py output)
+        # so don't bother ...
+        # print('--------------------(')
+        # print('traceback')
+        traceback.print_tb(ex_tb)
+        # print('--------------------)')
+        emergency_exit_message = 'EMERGENCY EXIT, call_pre_suite failed with exception {} {}\n'.format(ex_type, ex)
+        emergency_exit = True
+        stage = 'pre-SUITE'
+
+    if emergency_exit:
+        pm.call_post_suite(index)
+        return emergency_exit_message
     if args.verbose > 1:
-        print('Run tests here')
+        print('give test rig 2 seconds to stabilize')
+    time.sleep(2)
     for tidx in testlist:
         if "flower" in tidx["category"] and args.device == None:
+            if args.verbose > 1:
+                print('Not executing test {} {} because DEV2 not defined'.
+                      format(tidx['id'], tidx['name']))
             continue
         try:
             badtest = tidx  # in case it goes bad
             tap += run_one_test(pm, args, index, tidx)
-        except Exception as ee:
-            print('Exception {} (caught in test_runner, running test {} {} {})'.
-                  format(ee, index, tidx['id'], tidx['name']))
+        except PluginMgrTestFail as pmtf:
+            ex_type, ex, ex_tb = sys.exc_info()
+            stage = pmtf.stage
+            message = pmtf.message
+            output = pmtf.output
+            print(message)
+            print('Exception {} {} (caught in test_runner, running test {} {} {} stage {})'.
+                  format(ex_type, ex, index, tidx['id'], tidx['name'], stage))
+            print('---------------')
+            print('traceback')
+            traceback.print_tb(ex_tb)
+            print('---------------')
+            if stage == 'teardown':
+                print('accumulated output for this test:')
+                if pmtf.output:
+                    print(pmtf.output)
+            print('---------------')
             break
         index += 1
 
     # if we failed in setup or teardown,
-    # fill in the remaining tests with not ok
+    # fill in the remaining tests with ok-skipped
     count = index
     tap += 'about to flush the tap output if tests need to be skipped\n'
     if tcount + 1 != index:
         for tidx in testlist[index - 1:]:
-            msg = 'skipped - previous setup or teardown failed'
+            msg = 'skipped - previous {} failed'.format(stage)
             tap += 'ok {} - {} # {} {} {}\n'.format(
                 count, tidx['id'], msg, index, badtest.get('id', '--Unknown--'))
             count += 1
@@ -347,9 +403,9 @@ def check_default_settings(args, remaining, pm):
     global NAMES
 
     if args.path != None:
-         NAMES['TC'] = args.path
+        NAMES['TC'] = args.path
     if args.device != None:
-         NAMES['DEV2'] = args.device
+        NAMES['DEV2'] = args.device
     if not os.path.isfile(NAMES['TC']):
         print("The specified tc path " + NAMES['TC'] + " does not exist.")
         exit(1)
@@ -389,7 +445,7 @@ def generate_case_ids(alltests):
     for c in alltests:
         if (c["id"] == ""):
             while True:
-                newid = str('%04x' % random.randrange(16**4))
+                newid = str('{:04x}'.format(random.randrange(16**4)))
                 if (does_id_exist(alltests, newid)):
                     continue
                 else:
index 707c6bf..52fa539 100755 (executable)
@@ -49,13 +49,13 @@ index = 0
 for i in range(0x100):
     for j in range(0x100):
         for k in range(0x100):
-            mac = ("%02x:%02x:%02x" % (i, j, k))
+            mac = ("{:02x}:{:02x}:{:02x}".format(i, j, k))
             src_mac = "e4:11:00:" + mac
             dst_mac = "e4:12:00:" + mac
-            cmd = ("filter add dev %s %s protocol ip parent ffff: flower %s "
-                   "src_mac %s dst_mac %s action drop %s" %
+            cmd = ("filter add dev {} {} protocol ip parent ffff: flower {} "
+                   "src_mac {} dst_mac {} action drop {}".format
                    (device, prio, skip, src_mac, dst_mac, share_action))
-            file.write("%s\n" % cmd)
+            file.write("{}\n".format(cmd))
             index += 1
             if index >= number:
                 file.close()