Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
authorDavid S. Miller <davem@davemloft.net>
Thu, 18 Aug 2016 05:17:32 +0000 (01:17 -0400)
committerDavid S. Miller <davem@davemloft.net>
Thu, 18 Aug 2016 05:17:32 +0000 (01:17 -0400)
Minor overlapping changes for both merge conflicts.

Resolution work done by Stephen Rothwell was used
as a reference.

Signed-off-by: David S. Miller <davem@davemloft.net>
232 files changed:
Documentation/devicetree/bindings/net/apm-xgene-enet.txt
Documentation/devicetree/bindings/net/xilinx_gmii2rgmii.txt [new file with mode: 0644]
Documentation/networking/00-INDEX
Documentation/networking/batman-adv.txt
Documentation/networking/ena.txt [new file with mode: 0644]
Documentation/networking/strparser.txt [new file with mode: 0644]
MAINTAINERS
arch/arm64/boot/dts/apm/apm-mustang.dts
arch/arm64/boot/dts/apm/apm-storm.dtsi
drivers/net/can/usb/ems_usb.c
drivers/net/can/usb/esd_usb2.c
drivers/net/can/usb/gs_usb.c
drivers/net/can/usb/kvaser_usb.c
drivers/net/can/usb/peak_usb/pcan_usb_core.c
drivers/net/can/usb/usb_8dev.c
drivers/net/dsa/b53/b53_common.c
drivers/net/dsa/b53/b53_mdio.c
drivers/net/dsa/b53/b53_mmap.c
drivers/net/dsa/b53/b53_priv.h
drivers/net/dsa/b53/b53_spi.c
drivers/net/dsa/b53/b53_srab.c
drivers/net/dsa/mv88e6xxx/chip.c
drivers/net/dsa/mv88e6xxx/mv88e6xxx.h
drivers/net/ethernet/Kconfig
drivers/net/ethernet/Makefile
drivers/net/ethernet/adi/bfin_mac.c
drivers/net/ethernet/aeroflex/greth.c
drivers/net/ethernet/aeroflex/greth.h
drivers/net/ethernet/amazon/Kconfig [new file with mode: 0644]
drivers/net/ethernet/amazon/Makefile [new file with mode: 0644]
drivers/net/ethernet/amazon/ena/Makefile [new file with mode: 0644]
drivers/net/ethernet/amazon/ena/ena_admin_defs.h [new file with mode: 0644]
drivers/net/ethernet/amazon/ena/ena_com.c [new file with mode: 0644]
drivers/net/ethernet/amazon/ena/ena_com.h [new file with mode: 0644]
drivers/net/ethernet/amazon/ena/ena_common_defs.h [new file with mode: 0644]
drivers/net/ethernet/amazon/ena/ena_eth_com.c [new file with mode: 0644]
drivers/net/ethernet/amazon/ena/ena_eth_com.h [new file with mode: 0644]
drivers/net/ethernet/amazon/ena/ena_eth_io_defs.h [new file with mode: 0644]
drivers/net/ethernet/amazon/ena/ena_ethtool.c [new file with mode: 0644]
drivers/net/ethernet/amazon/ena/ena_netdev.c [new file with mode: 0644]
drivers/net/ethernet/amazon/ena/ena_netdev.h [new file with mode: 0644]
drivers/net/ethernet/amazon/ena/ena_pci_id_tbl.h [new file with mode: 0644]
drivers/net/ethernet/amazon/ena/ena_regs_defs.h [new file with mode: 0644]
drivers/net/ethernet/apm/xgene/Kconfig
drivers/net/ethernet/apm/xgene/xgene_enet_cle.c
drivers/net/ethernet/apm/xgene/xgene_enet_cle.h
drivers/net/ethernet/apm/xgene/xgene_enet_hw.c
drivers/net/ethernet/apm/xgene/xgene_enet_hw.h
drivers/net/ethernet/apm/xgene/xgene_enet_main.c
drivers/net/ethernet/apm/xgene/xgene_enet_main.h
drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.c
drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.h
drivers/net/ethernet/brocade/bna/bnad.c
drivers/net/ethernet/brocade/bna/bnad.h
drivers/net/ethernet/cadence/macb.c
drivers/net/ethernet/cadence/macb.h
drivers/net/ethernet/cavium/Kconfig
drivers/net/ethernet/cavium/thunder/Makefile
drivers/net/ethernet/cavium/thunder/nic.h
drivers/net/ethernet/cavium/thunder/nic_main.c
drivers/net/ethernet/cavium/thunder/nic_reg.h
drivers/net/ethernet/cavium/thunder/nicvf_main.c
drivers/net/ethernet/cavium/thunder/nicvf_queues.c
drivers/net/ethernet/cavium/thunder/nicvf_queues.h
drivers/net/ethernet/cavium/thunder/thunder_bgx.c
drivers/net/ethernet/cavium/thunder/thunder_bgx.h
drivers/net/ethernet/cavium/thunder/thunder_xcv.c [new file with mode: 0644]
drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c
drivers/net/ethernet/chelsio/cxgb4vf/t4vf_common.h
drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c
drivers/net/ethernet/dec/tulip/de4x5.c
drivers/net/ethernet/dec/tulip/de4x5.h
drivers/net/ethernet/emulex/benet/be.h
drivers/net/ethernet/emulex/benet/be_cmds.c
drivers/net/ethernet/emulex/benet/be_main.c
drivers/net/ethernet/freescale/fec_main.c
drivers/net/ethernet/freescale/xgmac_mdio.c
drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c
drivers/net/ethernet/marvell/mvneta.c
drivers/net/ethernet/mediatek/mtk_eth_soc.c
drivers/net/ethernet/qlogic/qed/qed_cxt.c
drivers/net/ethernet/qlogic/qed/qed_dcbx.c
drivers/net/ethernet/qlogic/qed/qed_dev.c
drivers/net/ethernet/qlogic/qed/qed_hw.c
drivers/net/ethernet/qlogic/qed/qed_init_ops.c
drivers/net/ethernet/qlogic/qed/qed_int.c
drivers/net/ethernet/qlogic/qed/qed_l2.c
drivers/net/ethernet/qlogic/qed/qed_main.c
drivers/net/ethernet/qlogic/qed/qed_mcp.c
drivers/net/ethernet/qlogic/qed/qed_mcp.h
drivers/net/ethernet/qlogic/qed/qed_sp_commands.c
drivers/net/ethernet/qlogic/qed/qed_spq.c
drivers/net/ethernet/qlogic/qed/qed_sriov.c
drivers/net/ethernet/qlogic/qede/qede_ethtool.c
drivers/net/ethernet/qlogic/qede/qede_main.c
drivers/net/ethernet/renesas/ravb_main.c
drivers/net/ethernet/renesas/sh_eth.c
drivers/net/ethernet/renesas/sh_eth.h
drivers/net/ethernet/sfc/ef10.c
drivers/net/ethernet/sfc/efx.c
drivers/net/ethernet/sfc/efx.h
drivers/net/ethernet/sfc/falcon.c
drivers/net/ethernet/sfc/mcdi.c
drivers/net/ethernet/sfc/mcdi_pcol.h
drivers/net/ethernet/sfc/net_driver.h
drivers/net/ethernet/sfc/nic.h
drivers/net/ethernet/sfc/ptp.c
drivers/net/ethernet/sfc/siena.c
drivers/net/ethernet/sfc/workarounds.h
drivers/net/ethernet/ti/cpsw.c
drivers/net/ethernet/ti/davinci_cpdma.c
drivers/net/ethernet/ti/davinci_emac.c
drivers/net/fjes/fjes_main.c
drivers/net/hyperv/hyperv_net.h
drivers/net/hyperv/netvsc.c
drivers/net/hyperv/netvsc_drv.c
drivers/net/hyperv/rndis_filter.c
drivers/net/phy/Kconfig
drivers/net/phy/Makefile
drivers/net/phy/mscc.c [new file with mode: 0644]
drivers/net/phy/xilinx_gmii2rgmii.c [new file with mode: 0644]
drivers/net/ppp/ppp_generic.c
drivers/net/ppp/pptp.c
drivers/net/usb/hso.c
drivers/net/usb/lan78xx.c
drivers/net/usb/usbnet.c
drivers/net/wimax/i2400m/usb-notif.c
drivers/net/wireless/ath/ar5523/ar5523.c
drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c
drivers/net/wireless/intersil/orinoco/orinoco_usb.c
drivers/net/wireless/marvell/libertas_tf/if_usb.c
drivers/net/wireless/marvell/mwifiex/usb.c
drivers/net/wireless/realtek/rtlwifi/usb.c
drivers/staging/octeon/ethernet-mdio.c
drivers/staging/octeon/ethernet-rgmii.c
drivers/staging/octeon/ethernet.c
drivers/staging/octeon/octeon-ethernet.h
fs/proc/generic.c
fs/proc/proc_net.c
fs/proc/proc_sysctl.c
include/linux/cgroup.h
include/linux/netdevice.h
include/linux/qed/qed_if.h
include/linux/sysctl.h
include/net/gre.h
include/net/kcm.h
include/net/netns/ipv4.h
include/net/pkt_sched.h
include/net/pptp.h [new file with mode: 0644]
include/net/sch_generic.h
include/net/strparser.h [new file with mode: 0644]
include/uapi/linux/batman_adv.h
include/uapi/linux/bpf.h
include/uapi/linux/if_tunnel.h
include/uapi/linux/mii.h
kernel/bpf/arraymap.c
kernel/bpf/verifier.c
kernel/trace/bpf_trace.c
net/Kconfig
net/Makefile
net/batman-adv/bat_algo.c
net/batman-adv/bat_algo.h
net/batman-adv/bat_iv_ogm.c
net/batman-adv/bat_v.c
net/batman-adv/bridge_loop_avoidance.c
net/batman-adv/bridge_loop_avoidance.h
net/batman-adv/debugfs.c
net/batman-adv/gateway_client.c
net/batman-adv/gateway_client.h
net/batman-adv/gateway_common.c
net/batman-adv/hard-interface.c
net/batman-adv/main.c
net/batman-adv/main.h
net/batman-adv/multicast.c
net/batman-adv/netlink.c
net/batman-adv/netlink.h
net/batman-adv/originator.c
net/batman-adv/originator.h
net/batman-adv/packet.h
net/batman-adv/routing.c
net/batman-adv/send.c
net/batman-adv/send.h
net/batman-adv/soft-interface.c
net/batman-adv/sysfs.c
net/batman-adv/translation-table.c
net/batman-adv/translation-table.h
net/batman-adv/types.h
net/core/dev.c
net/core/flow_dissector.c
net/core/neighbour.c
net/core/net_namespace.c
net/ipv4/fib_frontend.c
net/ipv4/igmp.c
net/ipv4/ipconfig.c
net/ipv6/ila/ila_common.c
net/ipv6/ip6_gre.c
net/ipv6/ip6_tunnel.c
net/ipv6/ip6_vti.c
net/ipv6/mcast.c
net/ipv6/sit.c
net/kcm/Kconfig
net/kcm/kcmproc.c
net/kcm/kcmsock.c
net/mac80211/driver-ops.h
net/mac80211/mesh_hwmp.c
net/mac80211/sta_info.c
net/mac80211/sta_info.h
net/mac80211/tx.c
net/rds/ib.h
net/rds/rds.h
net/sched/sch_api.c
net/sched/sch_generic.c
net/sched/sch_hfsc.c
net/sched/sch_mq.c
net/sched/sch_mqprio.c
net/strparser/Kconfig [new file with mode: 0644]
net/strparser/Makefile [new file with mode: 0644]
net/strparser/strparser.c [new file with mode: 0644]
net/switchdev/switchdev.c
net/sysctl_net.c
net/wireless/core.c
net/wireless/nl80211.c
net/wireless/nl80211.h
net/wireless/util.c
samples/bpf/Makefile
samples/bpf/bpf_helpers.h
samples/bpf/test_current_task_under_cgroup_kern.c [new file with mode: 0644]
samples/bpf/test_current_task_under_cgroup_user.c [new file with mode: 0644]
samples/bpf/test_verifier.c

index e41b2d5..f591ab7 100644 (file)
@@ -47,6 +47,9 @@ Optional properties:
            Valid values are between 0 to 7, that maps to
            273, 589, 899, 1222, 1480, 1806, 2147, 2464 ps
            Default value is 2, which corresponds to 899 ps
+- rxlos-gpios: Input gpio from SFP+ module to indicate availability of
+              incoming signal.
+
 
 Example:
        menetclk: menetclk {
diff --git a/Documentation/devicetree/bindings/net/xilinx_gmii2rgmii.txt b/Documentation/devicetree/bindings/net/xilinx_gmii2rgmii.txt
new file mode 100644 (file)
index 0000000..038dda4
--- /dev/null
@@ -0,0 +1,35 @@
+XILINX GMIITORGMII Converter Driver Device Tree Bindings
+--------------------------------------------------------
+
+The Gigabit Media Independent Interface (GMII) to Reduced Gigabit Media
+Independent Interface (RGMII) core provides the RGMII between RGMII-compliant
+Ethernet physical media devices (PHY) and the Gigabit Ethernet controller.
+This core can be used in all three modes of operation(10/100/1000 Mb/s).
+The Management Data Input/Output (MDIO) interface is used to configure the
+Speed of operation. This core can switch dynamically between the three
+Different speed modes by configuring the conveter register through mdio write.
+
+This converter sits between the ethernet MAC and the external phy.
+MAC <==> GMII2RGMII <==> RGMII_PHY
+
+For more details about mdio please refer phy.txt file in the same directory.
+
+Required properties:
+- compatible   : Should be "xlnx,gmii-to-rgmii-1.0"
+- reg          : The ID number for the phy, usually a small integer
+- phy-handle   : Should point to the external phy device.
+                 See ethernet.txt file in the same directory.
+
+Example:
+       mdio {
+               #address-cells = <1>;
+               #size-cells = <0>;
+               phy: ethernet-phy@0 {
+                       ......
+               };
+               gmiitorgmii: gmiitorgmii@8 {
+                       compatible = "xlnx,gmii-to-rgmii-1.0";
+                       reg = <8>;
+                       phy-handle = <&phy>;
+               };
+       };
index 415154a..a769778 100644 (file)
@@ -74,6 +74,8 @@ dns_resolver.txt
        - The DNS resolver module allows kernel servies to make DNS queries.
 driver.txt
        - Softnet driver issues.
+ena.txt
+       - info on Amazon's Elastic Network Adapter (ENA)
 e100.txt
        - info on Intel's EtherExpress PRO/100 line of 10/100 boards
 e1000.txt
index 1b5e7a7..8a8d3d9 100644 (file)
@@ -43,10 +43,15 @@ new interfaces to verify the compatibility. There is no  need  to
 reload the module if you plug your USB wifi adapter into your ma-
 chine after batman advanced was initially loaded.
 
-To activate a  given  interface  simply  write  "bat0"  into  its
-"mesh_iface" file inside the batman_adv subfolder:
+The batman-adv soft-interface can be created using  the  iproute2
+tool "ip"
 
-# echo bat0 > /sys/class/net/eth0/batman_adv/mesh_iface
+# ip link add name bat0 type batadv
+
+To  activate a  given  interface  simply  attach it to the "bat0"
+interface
+
+# ip link set dev eth0 master bat0
 
 Repeat  this step for all interfaces you wish to add.  Now batman
 starts using/broadcasting on this/these interface(s).
@@ -56,10 +61,10 @@ By reading the "iface_status" file you can check its status:
 # cat /sys/class/net/eth0/batman_adv/iface_status
 # active
 
-To deactivate an interface you have  to  write  "none"  into  its
-"mesh_iface" file:
+To  deactivate  an  interface  you  have   to  detach it from the
+"bat0" interface:
 
-# echo none > /sys/class/net/eth0/batman_adv/mesh_iface
+# ip link set dev eth0 nomaster
 
 
 All  mesh  wide  settings  can be found in batman's own interface
diff --git a/Documentation/networking/ena.txt b/Documentation/networking/ena.txt
new file mode 100644 (file)
index 0000000..2b4b6f5
--- /dev/null
@@ -0,0 +1,305 @@
+Linux kernel driver for Elastic Network Adapter (ENA) family:
+=============================================================
+
+Overview:
+=========
+ENA is a networking interface designed to make good use of modern CPU
+features and system architectures.
+
+The ENA device exposes a lightweight management interface with a
+minimal set of memory mapped registers and extendable command set
+through an Admin Queue.
+
+The driver supports a range of ENA devices, is link-speed independent
+(i.e., the same driver is used for 10GbE, 25GbE, 40GbE, etc.), and has
+a negotiated and extendable feature set.
+
+Some ENA devices support SR-IOV. This driver is used for both the
+SR-IOV Physical Function (PF) and Virtual Function (VF) devices.
+
+ENA devices enable high speed and low overhead network traffic
+processing by providing multiple Tx/Rx queue pairs (the maximum number
+is advertised by the device via the Admin Queue), a dedicated MSI-X
+interrupt vector per Tx/Rx queue pair, adaptive interrupt moderation,
+and CPU cacheline optimized data placement.
+
+The ENA driver supports industry standard TCP/IP offload features such
+as checksum offload and TCP transmit segmentation offload (TSO).
+Receive-side scaling (RSS) is supported for multi-core scaling.
+
+The ENA driver and its corresponding devices implement health
+monitoring mechanisms such as watchdog, enabling the device and driver
+to recover in a manner transparent to the application, as well as
+debug logs.
+
+Some of the ENA devices support a working mode called Low-latency
+Queue (LLQ), which saves several more microseconds.
+
+Supported PCI vendor ID/device IDs:
+===================================
+1d0f:0ec2 - ENA PF
+1d0f:1ec2 - ENA PF with LLQ support
+1d0f:ec20 - ENA VF
+1d0f:ec21 - ENA VF with LLQ support
+
+ENA Source Code Directory Structure:
+====================================
+ena_com.[ch]      - Management communication layer. This layer is
+                    responsible for the handling all the management
+                    (admin) communication between the device and the
+                    driver.
+ena_eth_com.[ch]  - Tx/Rx data path.
+ena_admin_defs.h  - Definition of ENA management interface.
+ena_eth_io_defs.h - Definition of ENA data path interface.
+ena_common_defs.h - Common definitions for ena_com layer.
+ena_regs_defs.h   - Definition of ENA PCI memory-mapped (MMIO) registers.
+ena_netdev.[ch]   - Main Linux kernel driver.
+ena_syfsfs.[ch]   - Sysfs files.
+ena_ethtool.c     - ethtool callbacks.
+ena_pci_id_tbl.h  - Supported device IDs.
+
+Management Interface:
+=====================
+ENA management interface is exposed by means of:
+- PCIe Configuration Space
+- Device Registers
+- Admin Queue (AQ) and Admin Completion Queue (ACQ)
+- Asynchronous Event Notification Queue (AENQ)
+
+ENA device MMIO Registers are accessed only during driver
+initialization and are not involved in further normal device
+operation.
+
+AQ is used for submitting management commands, and the
+results/responses are reported asynchronously through ACQ.
+
+ENA introduces a very small set of management commands with room for
+vendor-specific extensions. Most of the management operations are
+framed in a generic Get/Set feature command.
+
+The following admin queue commands are supported:
+- Create I/O submission queue
+- Create I/O completion queue
+- Destroy I/O submission queue
+- Destroy I/O completion queue
+- Get feature
+- Set feature
+- Configure AENQ
+- Get statistics
+
+Refer to ena_admin_defs.h for the list of supported Get/Set Feature
+properties.
+
+The Asynchronous Event Notification Queue (AENQ) is a uni-directional
+queue used by the ENA device to send to the driver events that cannot
+be reported using ACQ. AENQ events are subdivided into groups. Each
+group may have multiple syndromes, as shown below
+
+The events are:
+       Group                   Syndrome
+       Link state change       - X -
+       Fatal error             - X -
+       Notification            Suspend traffic
+       Notification            Resume traffic
+       Keep-Alive              - X -
+
+ACQ and AENQ share the same MSI-X vector.
+
+Keep-Alive is a special mechanism that allows monitoring of the
+device's health. The driver maintains a watchdog (WD) handler which,
+if fired, logs the current state and statistics then resets and
+restarts the ENA device and driver. A Keep-Alive event is delivered by
+the device every second. The driver re-arms the WD upon reception of a
+Keep-Alive event. A missed Keep-Alive event causes the WD handler to
+fire.
+
+Data Path Interface:
+====================
+I/O operations are based on Tx and Rx Submission Queues (Tx SQ and Rx
+SQ correspondingly). Each SQ has a completion queue (CQ) associated
+with it.
+
+The SQs and CQs are implemented as descriptor rings in contiguous
+physical memory.
+
+The ENA driver supports two Queue Operation modes for Tx SQs:
+- Regular mode
+  * In this mode the Tx SQs reside in the host's memory. The ENA
+    device fetches the ENA Tx descriptors and packet data from host
+    memory.
+- Low Latency Queue (LLQ) mode or "push-mode".
+  * In this mode the driver pushes the transmit descriptors and the
+    first 128 bytes of the packet directly to the ENA device memory
+    space. The rest of the packet payload is fetched by the
+    device. For this operation mode, the driver uses a dedicated PCI
+    device memory BAR, which is mapped with write-combine capability.
+
+The Rx SQs support only the regular mode.
+
+Note: Not all ENA devices support LLQ, and this feature is negotiated
+      with the device upon initialization. If the ENA device does not
+      support LLQ mode, the driver falls back to the regular mode.
+
+The driver supports multi-queue for both Tx and Rx. This has various
+benefits:
+- Reduced CPU/thread/process contention on a given Ethernet interface.
+- Cache miss rate on completion is reduced, particularly for data
+  cache lines that hold the sk_buff structures.
+- Increased process-level parallelism when handling received packets.
+- Increased data cache hit rate, by steering kernel processing of
+  packets to the CPU, where the application thread consuming the
+  packet is running.
+- In hardware interrupt re-direction.
+
+Interrupt Modes:
+================
+The driver assigns a single MSI-X vector per queue pair (for both Tx
+and Rx directions). The driver assigns an additional dedicated MSI-X vector
+for management (for ACQ and AENQ).
+
+Management interrupt registration is performed when the Linux kernel
+probes the adapter, and it is de-registered when the adapter is
+removed. I/O queue interrupt registration is performed when the Linux
+interface of the adapter is opened, and it is de-registered when the
+interface is closed.
+
+The management interrupt is named:
+   ena-mgmnt@pci:<PCI domain:bus:slot.function>
+and for each queue pair, an interrupt is named:
+   <interface name>-Tx-Rx-<queue index>
+
+The ENA device operates in auto-mask and auto-clear interrupt
+modes. That is, once MSI-X is delivered to the host, its Cause bit is
+automatically cleared and the interrupt is masked. The interrupt is
+unmasked by the driver after NAPI processing is complete.
+
+Interrupt Moderation:
+=====================
+ENA driver and device can operate in conventional or adaptive interrupt
+moderation mode.
+
+In conventional mode the driver instructs device to postpone interrupt
+posting according to static interrupt delay value. The interrupt delay
+value can be configured through ethtool(8). The following ethtool
+parameters are supported by the driver: tx-usecs, rx-usecs
+
+In adaptive interrupt moderation mode the interrupt delay value is
+updated by the driver dynamically and adjusted every NAPI cycle
+according to the traffic nature.
+
+By default ENA driver applies adaptive coalescing on Rx traffic and
+conventional coalescing on Tx traffic.
+
+Adaptive coalescing can be switched on/off through ethtool(8)
+adaptive_rx on|off parameter.
+
+The driver chooses interrupt delay value according to the number of
+bytes and packets received between interrupt unmasking and interrupt
+posting. The driver uses interrupt delay table that subdivides the
+range of received bytes/packets into 5 levels and assigns interrupt
+delay value to each level.
+
+The user can enable/disable adaptive moderation, modify the interrupt
+delay table and restore its default values through sysfs.
+
+The rx_copybreak is initialized by default to ENA_DEFAULT_RX_COPYBREAK
+and can be configured by the ETHTOOL_STUNABLE command of the
+SIOCETHTOOL ioctl.
+
+SKB:
+The driver-allocated SKB for frames received from Rx handling using
+NAPI context. The allocation method depends on the size of the packet.
+If the frame length is larger than rx_copybreak, napi_get_frags()
+is used, otherwise netdev_alloc_skb_ip_align() is used, the buffer
+content is copied (by CPU) to the SKB, and the buffer is recycled.
+
+Statistics:
+===========
+The user can obtain ENA device and driver statistics using ethtool.
+The driver can collect regular or extended statistics (including
+per-queue stats) from the device.
+
+In addition the driver logs the stats to syslog upon device reset.
+
+MTU:
+====
+The driver supports an arbitrarily large MTU with a maximum that is
+negotiated with the device. The driver configures MTU using the
+SetFeature command (ENA_ADMIN_MTU property). The user can change MTU
+via ip(8) and similar legacy tools.
+
+Stateless Offloads:
+===================
+The ENA driver supports:
+- TSO over IPv4/IPv6
+- TSO with ECN
+- IPv4 header checksum offload
+- TCP/UDP over IPv4/IPv6 checksum offloads
+
+RSS:
+====
+- The ENA device supports RSS that allows flexible Rx traffic
+  steering.
+- Toeplitz and CRC32 hash functions are supported.
+- Different combinations of L2/L3/L4 fields can be configured as
+  inputs for hash functions.
+- The driver configures RSS settings using the AQ SetFeature command
+  (ENA_ADMIN_RSS_HASH_FUNCTION, ENA_ADMIN_RSS_HASH_INPUT and
+  ENA_ADMIN_RSS_REDIRECTION_TABLE_CONFIG properties).
+- If the NETIF_F_RXHASH flag is set, the 32-bit result of the hash
+  function delivered in the Rx CQ descriptor is set in the received
+  SKB.
+- The user can provide a hash key, hash function, and configure the
+  indirection table through ethtool(8).
+
+DATA PATH:
+==========
+Tx:
+---
+end_start_xmit() is called by the stack. This function does the following:
+- Maps data buffers (skb->data and frags).
+- Populates ena_buf for the push buffer (if the driver and device are
+  in push mode.)
+- Prepares ENA bufs for the remaining frags.
+- Allocates a new request ID from the empty req_id ring. The request
+  ID is the index of the packet in the Tx info. This is used for
+  out-of-order TX completions.
+- Adds the packet to the proper place in the Tx ring.
+- Calls ena_com_prepare_tx(), an ENA communication layer that converts
+  the ena_bufs to ENA descriptors (and adds meta ENA descriptors as
+  needed.)
+  * This function also copies the ENA descriptors and the push buffer
+    to the Device memory space (if in push mode.)
+- Writes doorbell to the ENA device.
+- When the ENA device finishes sending the packet, a completion
+  interrupt is raised.
+- The interrupt handler schedules NAPI.
+- The ena_clean_tx_irq() function is called. This function handles the
+  completion descriptors generated by the ENA, with a single
+  completion descriptor per completed packet.
+  * req_id is retrieved from the completion descriptor. The tx_info of
+    the packet is retrieved via the req_id. The data buffers are
+    unmapped and req_id is returned to the empty req_id ring.
+  * The function stops when the completion descriptors are completed or
+    the budget is reached.
+
+Rx:
+---
+- When a packet is received from the ENA device.
+- The interrupt handler schedules NAPI.
+- The ena_clean_rx_irq() function is called. This function calls
+  ena_rx_pkt(), an ENA communication layer function, which returns the
+  number of descriptors used for a new unhandled packet, and zero if
+  no new packet is found.
+- Then it calls the ena_clean_rx_irq() function.
+- ena_eth_rx_skb() checks packet length:
+  * If the packet is small (len < rx_copybreak), the driver allocates
+    a SKB for the new packet, and copies the packet payload into the
+    SKB data buffer.
+    - In this way the original data buffer is not passed to the stack
+      and is reused for future Rx packets.
+  * Otherwise the function unmaps the Rx buffer, then allocates the
+    new SKB structure and hooks the Rx buffer to the SKB frags.
+- The new SKB is updated with the necessary information (protocol,
+  checksum hw verify result, etc.), and then passed to the network
+  stack, using the NAPI interface function napi_gro_receive().
diff --git a/Documentation/networking/strparser.txt b/Documentation/networking/strparser.txt
new file mode 100644 (file)
index 0000000..a0bf573
--- /dev/null
@@ -0,0 +1,136 @@
+Stream Parser
+-------------
+
+The stream parser (strparser) is a utility that parses messages of an
+application layer protocol running over a TCP connection. The stream
+parser works in conjunction with an upper layer in the kernel to provide
+kernel support for application layer messages. For instance, Kernel
+Connection Multiplexor (KCM) uses the Stream Parser to parse messages
+using a BPF program.
+
+Interface
+---------
+
+The API includes a context structure, a set of callbacks, utility
+functions, and a data_ready function. The callbacks include
+a parse_msg function that is called to perform parsing (e.g.
+BPF parsing in case of KCM), and a rcv_msg function that is called
+when a full message has been completed.
+
+A stream parser can be instantiated for a TCP connection. This is done
+by:
+
+strp_init(struct strparser *strp, struct sock *csk,
+         struct strp_callbacks *cb)
+
+strp is a struct of type strparser that is allocated by the upper layer.
+csk is the TCP socket associated with the stream parser. Callbacks are
+called by the stream parser.
+
+Callbacks
+---------
+
+There are four callbacks:
+
+int (*parse_msg)(struct strparser *strp, struct sk_buff *skb);
+
+    parse_msg is called to determine the length of the next message
+    in the stream. The upper layer must implement this function. It
+    should parse the sk_buff as containing the headers for the
+    next application layer messages in the stream.
+
+    The skb->cb in the input skb is a struct strp_rx_msg. Only
+    the offset field is relevant in parse_msg and gives the offset
+    where the message starts in the skb.
+
+    The return values of this function are:
+
+    >0 : indicates length of successfully parsed message
+    0  : indicates more data must be received to parse the message
+    -ESTRPIPE : current message should not be processed by the
+          kernel, return control of the socket to userspace which
+          can proceed to read the messages itself
+    other < 0 : Error is parsing, give control back to userspace
+          assuming that synchronization is lost and the stream
+          is unrecoverable (application expected to close TCP socket)
+
+    In the case that an error is returned (return value is less than
+    zero) the stream parser will set the error on TCP socket and wake
+    it up. If parse_msg returned -ESTRPIPE and the stream parser had
+    previously read some bytes for the current message, then the error
+    set on the attached socket is ENODATA since the stream is
+    unrecoverable in that case.
+
+void (*rcv_msg)(struct strparser *strp, struct sk_buff *skb);
+
+    rcv_msg is called when a full message has been received and
+    is queued. The callee must consume the sk_buff; it can
+    call strp_pause to prevent any further messages from being
+    received in rcv_msg (see strp_pause below). This callback
+    must be set.
+
+    The skb->cb in the input skb is a struct strp_rx_msg. This
+    struct contains two fields: offset and full_len. Offset is
+    where the message starts in the skb, and full_len is the
+    the length of the message. skb->len - offset may be greater
+    then full_len since strparser does not trim the skb.
+
+int (*read_sock_done)(struct strparser *strp, int err);
+
+     read_sock_done is called when the stream parser is done reading
+     the TCP socket. The stream parser may read multiple messages
+     in a loop and this function allows cleanup to occur when existing
+     the loop. If the callback is not set (NULL in strp_init) a
+     default function is used.
+
+void (*abort_parser)(struct strparser *strp, int err);
+
+     This function is called when stream parser encounters an error
+     in parsing. The default function stops the stream parser for the
+     TCP socket and sets the error in the socket. The default function
+     can be changed by setting the callback to non-NULL in strp_init.
+
+Functions
+---------
+
+The upper layer calls strp_tcp_data_ready when data is ready on the lower
+socket for strparser to process. This should be called from a data_ready
+callback that is set on the socket.
+
+strp_stop is called to completely stop stream parser operations. This
+is called internally when the stream parser encounters an error, and
+it is called from the upper layer when unattaching a TCP socket.
+
+strp_done is called to unattach the stream parser from the TCP socket.
+This must be called after the stream processor has be stopped.
+
+strp_check_rcv is called to check for new messages on the socket. This
+is normally called at initialization of the a stream parser instance
+of after strp_unpause.
+
+Statistics
+----------
+
+Various counters are kept for each stream parser for a TCP socket.
+These are in the strp_stats structure. strp_aggr_stats is a convenience
+structure for accumulating statistics for multiple stream parser
+instances. save_strp_stats and aggregate_strp_stats are helper functions
+to save and aggregate statistics.
+
+Message assembly limits
+-----------------------
+
+The stream parser provide mechanisms to limit the resources consumed by
+message assembly.
+
+A timer is set when assembly starts for a new message. The message
+timeout is taken from rcvtime for the associated TCP socket. If the
+timer fires before assembly completes the stream parser is aborted
+and the ETIMEDOUT error is set on the TCP socket.
+
+Message length is limited to the receive buffer size of the associated
+TCP socket. If the length returned by parse_msg is greater than
+the socket buffer size then the stream parser is aborted with
+EMSGSIZE error set on the TCP socket. Note that this makes the
+maximum size of receive skbuffs for a socket with a stream parser
+to be 2*sk_rcvbuf of the TCP socket.
index a306795..e902b63 100644 (file)
@@ -636,6 +636,15 @@ F: drivers/tty/serial/altera_jtaguart.c
 F:     include/linux/altera_uart.h
 F:     include/linux/altera_jtaguart.h
 
+AMAZON ETHERNET DRIVERS
+M:     Netanel Belgazal <netanel@annapurnalabs.com>
+R:     Saeed Bishara <saeed@annapurnalabs.com>
+R:     Zorik Machulsky <zorik@annapurnalabs.com>
+L:     netdev@vger.kernel.org
+S:     Supported
+F:     Documentation/networking/ena.txt
+F:     drivers/net/ethernet/amazon/
+
 AMD CRYPTOGRAPHIC COPROCESSOR (CCP) DRIVER
 M:     Tom Lendacky <thomas.lendacky@amd.com>
 M:     Gary Hook <gary.hook@amd.com>
index b7fb5d9..32a961c 100644 (file)
@@ -74,6 +74,7 @@
 
 &xgenet {
        status = "ok";
+       rxlos-gpios = <&sbgpio 12 1>;
 };
 
 &mmc0 {
index f1c2c71..d5c3435 100644 (file)
                        /* mac address will be overwritten by the bootloader */
                        local-mac-address = [00 00 00 00 00 00];
                        phy-connection-type = "rgmii";
-                       phy-handle = <&menet0phy>,<&menetphy>;
+                       phy-handle = <&menetphy>,<&menet0phy>;
                        mdio {
                                compatible = "apm,xgene-mdio";
                                #address-cells = <1>;
index 71f0e79..b3d0275 100644 (file)
@@ -600,7 +600,6 @@ static int ems_usb_start(struct ems_usb *dev)
                /* create a URB, and a buffer for it */
                urb = usb_alloc_urb(0, GFP_KERNEL);
                if (!urb) {
-                       netdev_err(netdev, "No memory left for URBs\n");
                        err = -ENOMEM;
                        break;
                }
@@ -752,10 +751,8 @@ static netdev_tx_t ems_usb_start_xmit(struct sk_buff *skb, struct net_device *ne
 
        /* create a URB, and a buffer for it, and copy the data to the URB */
        urb = usb_alloc_urb(0, GFP_ATOMIC);
-       if (!urb) {
-               netdev_err(netdev, "No memory left for URBs\n");
+       if (!urb)
                goto nomem;
-       }
 
        buf = usb_alloc_coherent(dev->udev, size, GFP_ATOMIC, &urb->transfer_dma);
        if (!buf) {
@@ -1007,10 +1004,8 @@ static int ems_usb_probe(struct usb_interface *intf,
                dev->tx_contexts[i].echo_index = MAX_TX_URBS;
 
        dev->intr_urb = usb_alloc_urb(0, GFP_KERNEL);
-       if (!dev->intr_urb) {
-               dev_err(&intf->dev, "Couldn't alloc intr URB\n");
+       if (!dev->intr_urb)
                goto cleanup_candev;
-       }
 
        dev->intr_in_buffer = kzalloc(INTR_IN_BUFFER_SIZE, GFP_KERNEL);
        if (!dev->intr_in_buffer)
index 784a900..be928ce 100644 (file)
@@ -558,8 +558,6 @@ static int esd_usb2_setup_rx_urbs(struct esd_usb2 *dev)
                /* create a URB, and a buffer for it */
                urb = usb_alloc_urb(0, GFP_KERNEL);
                if (!urb) {
-                       dev_warn(dev->udev->dev.parent,
-                                "No memory left for URBs\n");
                        err = -ENOMEM;
                        break;
                }
@@ -730,7 +728,6 @@ static netdev_tx_t esd_usb2_start_xmit(struct sk_buff *skb,
        /* create a URB, and a buffer for it, and copy the data to the URB */
        urb = usb_alloc_urb(0, GFP_ATOMIC);
        if (!urb) {
-               netdev_err(netdev, "No memory left for URBs\n");
                stats->tx_dropped++;
                dev_kfree_skb(skb);
                goto nourbmem;
index 6f0cbc3..77e3cc0 100644 (file)
@@ -493,10 +493,8 @@ static netdev_tx_t gs_can_start_xmit(struct sk_buff *skb,
 
        /* create a URB, and a buffer for it */
        urb = usb_alloc_urb(0, GFP_ATOMIC);
-       if (!urb) {
-               netdev_err(netdev, "No memory left for URB\n");
+       if (!urb)
                goto nomem_urb;
-       }
 
        hf = usb_alloc_coherent(dev->udev, sizeof(*hf), GFP_ATOMIC,
                                &urb->transfer_dma);
@@ -600,11 +598,8 @@ static int gs_can_open(struct net_device *netdev)
 
                        /* alloc rx urb */
                        urb = usb_alloc_urb(0, GFP_KERNEL);
-                       if (!urb) {
-                               netdev_err(netdev,
-                                          "No memory left for URB\n");
+                       if (!urb)
                                return -ENOMEM;
-                       }
 
                        /* alloc rx buffer */
                        buf = usb_alloc_coherent(dev->udev,
index 6f1f3b6..d51e0c4 100644 (file)
@@ -787,10 +787,8 @@ static int kvaser_usb_simple_msg_async(struct kvaser_usb_net_priv *priv,
        int err;
 
        urb = usb_alloc_urb(0, GFP_ATOMIC);
-       if (!urb) {
-               netdev_err(netdev, "No memory left for URBs\n");
+       if (!urb)
                return -ENOMEM;
-       }
 
        buf = kmalloc(sizeof(struct kvaser_msg), GFP_ATOMIC);
        if (!buf) {
@@ -1393,8 +1391,6 @@ static int kvaser_usb_setup_rx_urbs(struct kvaser_usb *dev)
 
                urb = usb_alloc_urb(0, GFP_KERNEL);
                if (!urb) {
-                       dev_warn(dev->udev->dev.parent,
-                                "No memory left for URBs\n");
                        err = -ENOMEM;
                        break;
                }
@@ -1670,7 +1666,6 @@ static netdev_tx_t kvaser_usb_start_xmit(struct sk_buff *skb,
 
        urb = usb_alloc_urb(0, GFP_ATOMIC);
        if (!urb) {
-               netdev_err(netdev, "No memory left for URBs\n");
                stats->tx_dropped++;
                dev_kfree_skb(skb);
                return NETDEV_TX_OK;
index bfb91d8..c06382c 100644 (file)
@@ -399,7 +399,6 @@ static int peak_usb_start(struct peak_usb_device *dev)
                /* create a URB, and a buffer for it, to receive usb messages */
                urb = usb_alloc_urb(0, GFP_KERNEL);
                if (!urb) {
-                       netdev_err(netdev, "No memory left for URBs\n");
                        err = -ENOMEM;
                        break;
                }
@@ -454,7 +453,6 @@ static int peak_usb_start(struct peak_usb_device *dev)
                /* create a URB and a buffer for it, to transmit usb messages */
                urb = usb_alloc_urb(0, GFP_KERNEL);
                if (!urb) {
-                       netdev_err(netdev, "No memory left for URBs\n");
                        err = -ENOMEM;
                        break;
                }
@@ -651,10 +649,8 @@ static int peak_usb_restart(struct peak_usb_device *dev)
 
        /* first allocate a urb to handle the asynchronous steps */
        urb = usb_alloc_urb(0, GFP_ATOMIC);
-       if (!urb) {
-               netdev_err(dev->netdev, "no memory left for urb\n");
+       if (!urb)
                return -ENOMEM;
-       }
 
        /* also allocate enough space for the commands to send */
        buf = kmalloc(PCAN_USB_MAX_CMD_LEN, GFP_ATOMIC);
index a731720..108a30e 100644 (file)
@@ -623,10 +623,8 @@ static netdev_tx_t usb_8dev_start_xmit(struct sk_buff *skb,
 
        /* create a URB, and a buffer for it, and copy the data to the URB */
        urb = usb_alloc_urb(0, GFP_ATOMIC);
-       if (!urb) {
-               netdev_err(netdev, "No memory left for URBs\n");
+       if (!urb)
                goto nomem;
-       }
 
        buf = usb_alloc_coherent(priv->udev, size, GFP_ATOMIC,
                                 &urb->transfer_dma);
@@ -748,7 +746,6 @@ static int usb_8dev_start(struct usb_8dev_priv *priv)
                /* create a URB, and a buffer for it */
                urb = usb_alloc_urb(0, GFP_KERNEL);
                if (!urb) {
-                       netdev_err(netdev, "No memory left for URBs\n");
                        err = -ENOMEM;
                        break;
                }
index bda37d3..38ee10d 100644 (file)
@@ -1681,7 +1681,8 @@ static int b53_switch_init(struct b53_device *dev)
        return 0;
 }
 
-struct b53_device *b53_switch_alloc(struct device *base, struct b53_io_ops *ops,
+struct b53_device *b53_switch_alloc(struct device *base,
+                                   const struct b53_io_ops *ops,
                                    void *priv)
 {
        struct dsa_switch *ds;
index aa87c3f..477a16b 100644 (file)
@@ -267,7 +267,7 @@ static int b53_mdio_phy_write16(struct b53_device *dev, int addr, int reg,
        return mdiobus_write_nested(bus, addr, reg, value);
 }
 
-static struct b53_io_ops b53_mdio_ops = {
+static const struct b53_io_ops b53_mdio_ops = {
        .read8 = b53_mdio_read8,
        .read16 = b53_mdio_read16,
        .read32 = b53_mdio_read32,
index 77ffc43..cc9e6bd 100644 (file)
@@ -208,7 +208,7 @@ static int b53_mmap_write64(struct b53_device *dev, u8 page, u8 reg,
        return 0;
 }
 
-static struct b53_io_ops b53_mmap_ops = {
+static const struct b53_io_ops b53_mmap_ops = {
        .read8 = b53_mmap_read8,
        .read16 = b53_mmap_read16,
        .read32 = b53_mmap_read32,
index 835a744..d268493 100644 (file)
@@ -182,7 +182,8 @@ static inline int is_cpu_port(struct b53_device *dev, int port)
        return dev->cpu_port;
 }
 
-struct b53_device *b53_switch_alloc(struct device *base, struct b53_io_ops *ops,
+struct b53_device *b53_switch_alloc(struct device *base,
+                                   const struct b53_io_ops *ops,
                                    void *priv);
 
 int b53_switch_detect(struct b53_device *dev);
index 2bda0b5..f89f530 100644 (file)
@@ -270,7 +270,7 @@ static int b53_spi_write64(struct b53_device *dev, u8 page, u8 reg, u64 value)
        return spi_write(spi, txbuf, sizeof(txbuf));
 }
 
-static struct b53_io_ops b53_spi_ops = {
+static const struct b53_io_ops b53_spi_ops = {
        .read8 = b53_spi_read8,
        .read16 = b53_spi_read16,
        .read32 = b53_spi_read32,
@@ -317,8 +317,6 @@ static int b53_spi_remove(struct spi_device *spi)
 static struct spi_driver b53_spi_driver = {
        .driver = {
                .name   = "b53-switch",
-               .bus    = &spi_bus_type,
-               .owner  = THIS_MODULE,
        },
        .probe  = b53_spi_probe,
        .remove = b53_spi_remove,
index 3e2d4a5..8a62b6a 100644 (file)
@@ -344,7 +344,7 @@ err:
        return ret;
 }
 
-static struct b53_io_ops b53_srab_ops = {
+static const struct b53_io_ops b53_srab_ops = {
        .read8 = b53_srab_read8,
        .read16 = b53_srab_read16,
        .read32 = b53_srab_read32,
index d1d9d3c..a230fcb 100644 (file)
@@ -216,6 +216,118 @@ static int mv88e6xxx_write(struct mv88e6xxx_chip *chip,
        return 0;
 }
 
+static int mv88e6xxx_phy_read(struct mv88e6xxx_chip *chip, int phy,
+                             int reg, u16 *val)
+{
+       int addr = phy; /* PHY devices addresses start at 0x0 */
+
+       if (!chip->phy_ops)
+               return -EOPNOTSUPP;
+
+       return chip->phy_ops->read(chip, addr, reg, val);
+}
+
+static int mv88e6xxx_phy_write(struct mv88e6xxx_chip *chip, int phy,
+                              int reg, u16 val)
+{
+       int addr = phy; /* PHY devices addresses start at 0x0 */
+
+       if (!chip->phy_ops)
+               return -EOPNOTSUPP;
+
+       return chip->phy_ops->write(chip, addr, reg, val);
+}
+
+static int mv88e6xxx_phy_page_get(struct mv88e6xxx_chip *chip, int phy, u8 page)
+{
+       if (!mv88e6xxx_has(chip, MV88E6XXX_FLAG_PHY_PAGE))
+               return -EOPNOTSUPP;
+
+       return mv88e6xxx_phy_write(chip, phy, PHY_PAGE, page);
+}
+
+static void mv88e6xxx_phy_page_put(struct mv88e6xxx_chip *chip, int phy)
+{
+       int err;
+
+       /* Restore PHY page Copper 0x0 for access via the registered MDIO bus */
+       err = mv88e6xxx_phy_write(chip, phy, PHY_PAGE, PHY_PAGE_COPPER);
+       if (unlikely(err)) {
+               dev_err(chip->dev, "failed to restore PHY %d page Copper (%d)\n",
+                       phy, err);
+       }
+}
+
+static int mv88e6xxx_phy_page_read(struct mv88e6xxx_chip *chip, int phy,
+                                  u8 page, int reg, u16 *val)
+{
+       int err;
+
+       /* There is no paging for registers 22 */
+       if (reg == PHY_PAGE)
+               return -EINVAL;
+
+       err = mv88e6xxx_phy_page_get(chip, phy, page);
+       if (!err) {
+               err = mv88e6xxx_phy_read(chip, phy, reg, val);
+               mv88e6xxx_phy_page_put(chip, phy);
+       }
+
+       return err;
+}
+
+static int mv88e6xxx_phy_page_write(struct mv88e6xxx_chip *chip, int phy,
+                                   u8 page, int reg, u16 val)
+{
+       int err;
+
+       /* There is no paging for registers 22 */
+       if (reg == PHY_PAGE)
+               return -EINVAL;
+
+       err = mv88e6xxx_phy_page_get(chip, phy, page);
+       if (!err) {
+               err = mv88e6xxx_phy_write(chip, phy, PHY_PAGE, page);
+               mv88e6xxx_phy_page_put(chip, phy);
+       }
+
+       return err;
+}
+
+static int mv88e6xxx_serdes_read(struct mv88e6xxx_chip *chip, int reg, u16 *val)
+{
+       return mv88e6xxx_phy_page_read(chip, ADDR_SERDES, SERDES_PAGE_FIBER,
+                                      reg, val);
+}
+
+static int mv88e6xxx_serdes_write(struct mv88e6xxx_chip *chip, int reg, u16 val)
+{
+       return mv88e6xxx_phy_page_write(chip, ADDR_SERDES, SERDES_PAGE_FIBER,
+                                       reg, val);
+}
+
+static int mv88e6xxx_wait(struct mv88e6xxx_chip *chip, int addr, int reg,
+                         u16 mask)
+{
+       unsigned long timeout = jiffies + HZ / 10;
+
+       while (time_before(jiffies, timeout)) {
+               u16 val;
+               int err;
+
+               err = mv88e6xxx_read(chip, addr, reg, &val);
+               if (err)
+                       return err;
+
+               if (!(val & mask))
+                       return 0;
+
+               usleep_range(1000, 2000);
+       }
+
+       return -ETIMEDOUT;
+}
+
 /* Indirect write to single pointer-data register with an Update bit */
 static int mv88e6xxx_update(struct mv88e6xxx_chip *chip, int addr, int reg,
                            u16 update)
@@ -260,22 +372,6 @@ static int _mv88e6xxx_reg_write(struct mv88e6xxx_chip *chip, int addr,
        return mv88e6xxx_write(chip, addr, reg, val);
 }
 
-static int mv88e6xxx_mdio_read_direct(struct mv88e6xxx_chip *chip,
-                                     int addr, int regnum)
-{
-       if (addr >= 0)
-               return _mv88e6xxx_reg_read(chip, addr, regnum);
-       return 0xffff;
-}
-
-static int mv88e6xxx_mdio_write_direct(struct mv88e6xxx_chip *chip,
-                                      int addr, int regnum, u16 val)
-{
-       if (addr >= 0)
-               return _mv88e6xxx_reg_write(chip, addr, regnum, val);
-       return 0;
-}
-
 static int mv88e6xxx_ppu_disable(struct mv88e6xxx_chip *chip)
 {
        int ret;
@@ -400,34 +496,39 @@ static void mv88e6xxx_ppu_state_init(struct mv88e6xxx_chip *chip)
        chip->ppu_timer.function = mv88e6xxx_ppu_reenable_timer;
 }
 
-static int mv88e6xxx_mdio_read_ppu(struct mv88e6xxx_chip *chip, int addr,
-                                  int regnum)
+static int mv88e6xxx_phy_ppu_read(struct mv88e6xxx_chip *chip, int addr,
+                                 int reg, u16 *val)
 {
-       int ret;
+       int err;
 
-       ret = mv88e6xxx_ppu_access_get(chip);
-       if (ret >= 0) {
-               ret = _mv88e6xxx_reg_read(chip, addr, regnum);
+       err = mv88e6xxx_ppu_access_get(chip);
+       if (!err) {
+               err = mv88e6xxx_read(chip, addr, reg, val);
                mv88e6xxx_ppu_access_put(chip);
        }
 
-       return ret;
+       return err;
 }
 
-static int mv88e6xxx_mdio_write_ppu(struct mv88e6xxx_chip *chip, int addr,
-                                   int regnum, u16 val)
+static int mv88e6xxx_phy_ppu_write(struct mv88e6xxx_chip *chip, int addr,
+                                  int reg, u16 val)
 {
-       int ret;
+       int err;
 
-       ret = mv88e6xxx_ppu_access_get(chip);
-       if (ret >= 0) {
-               ret = _mv88e6xxx_reg_write(chip, addr, regnum, val);
+       err = mv88e6xxx_ppu_access_get(chip);
+       if (!err) {
+               err = mv88e6xxx_write(chip, addr, reg, val);
                mv88e6xxx_ppu_access_put(chip);
        }
 
-       return ret;
+       return err;
 }
 
+static const struct mv88e6xxx_ops mv88e6xxx_phy_ppu_ops = {
+       .read = mv88e6xxx_phy_ppu_read,
+       .write = mv88e6xxx_phy_ppu_write,
+};
+
 static bool mv88e6xxx_6065_family(struct mv88e6xxx_chip *chip)
 {
        return chip->info->family == MV88E6XXX_FAMILY_6065;
@@ -819,130 +920,69 @@ static void mv88e6xxx_get_regs(struct dsa_switch *ds, int port,
        mutex_unlock(&chip->reg_lock);
 }
 
-static int _mv88e6xxx_wait(struct mv88e6xxx_chip *chip, int reg, int offset,
-                          u16 mask)
-{
-       unsigned long timeout = jiffies + HZ / 10;
-
-       while (time_before(jiffies, timeout)) {
-               int ret;
-
-               ret = _mv88e6xxx_reg_read(chip, reg, offset);
-               if (ret < 0)
-                       return ret;
-               if (!(ret & mask))
-                       return 0;
-
-               usleep_range(1000, 2000);
-       }
-       return -ETIMEDOUT;
-}
-
-static int mv88e6xxx_mdio_wait(struct mv88e6xxx_chip *chip)
-{
-       return _mv88e6xxx_wait(chip, REG_GLOBAL2, GLOBAL2_SMI_OP,
-                              GLOBAL2_SMI_OP_BUSY);
-}
-
 static int _mv88e6xxx_atu_wait(struct mv88e6xxx_chip *chip)
 {
-       return _mv88e6xxx_wait(chip, REG_GLOBAL, GLOBAL_ATU_OP,
-                              GLOBAL_ATU_OP_BUSY);
-}
-
-static int mv88e6xxx_mdio_read_indirect(struct mv88e6xxx_chip *chip,
-                                       int addr, int regnum)
-{
-       int ret;
-
-       ret = _mv88e6xxx_reg_write(chip, REG_GLOBAL2, GLOBAL2_SMI_OP,
-                                  GLOBAL2_SMI_OP_22_READ | (addr << 5) |
-                                  regnum);
-       if (ret < 0)
-               return ret;
-
-       ret = mv88e6xxx_mdio_wait(chip);
-       if (ret < 0)
-               return ret;
-
-       ret = _mv88e6xxx_reg_read(chip, REG_GLOBAL2, GLOBAL2_SMI_DATA);
-
-       return ret;
-}
-
-static int mv88e6xxx_mdio_write_indirect(struct mv88e6xxx_chip *chip,
-                                        int addr, int regnum, u16 val)
-{
-       int ret;
-
-       ret = _mv88e6xxx_reg_write(chip, REG_GLOBAL2, GLOBAL2_SMI_DATA, val);
-       if (ret < 0)
-               return ret;
-
-       ret = _mv88e6xxx_reg_write(chip, REG_GLOBAL2, GLOBAL2_SMI_OP,
-                                  GLOBAL2_SMI_OP_22_WRITE | (addr << 5) |
-                                  regnum);
-
-       return mv88e6xxx_mdio_wait(chip);
+       return mv88e6xxx_wait(chip, REG_GLOBAL, GLOBAL_ATU_OP,
+                             GLOBAL_ATU_OP_BUSY);
 }
 
 static int mv88e6xxx_get_eee(struct dsa_switch *ds, int port,
                             struct ethtool_eee *e)
 {
        struct mv88e6xxx_chip *chip = ds_to_priv(ds);
-       int reg;
+       u16 reg;
+       int err;
 
        if (!mv88e6xxx_has(chip, MV88E6XXX_FLAG_EEE))
                return -EOPNOTSUPP;
 
        mutex_lock(&chip->reg_lock);
 
-       reg = mv88e6xxx_mdio_read_indirect(chip, port, 16);
-       if (reg < 0)
+       err = mv88e6xxx_phy_read(chip, port, 16, &reg);
+       if (err)
                goto out;
 
        e->eee_enabled = !!(reg & 0x0200);
        e->tx_lpi_enabled = !!(reg & 0x0100);
 
-       reg = _mv88e6xxx_reg_read(chip, REG_PORT(port), PORT_STATUS);
-       if (reg < 0)
+       err = mv88e6xxx_read(chip, REG_PORT(port), PORT_STATUS, &reg);
+       if (err)
                goto out;
 
        e->eee_active = !!(reg & PORT_STATUS_EEE);
-       reg = 0;
-
 out:
        mutex_unlock(&chip->reg_lock);
-       return reg;
+
+       return err;
 }
 
 static int mv88e6xxx_set_eee(struct dsa_switch *ds, int port,
                             struct phy_device *phydev, struct ethtool_eee *e)
 {
        struct mv88e6xxx_chip *chip = ds_to_priv(ds);
-       int reg;
-       int ret;
+       u16 reg;
+       int err;
 
        if (!mv88e6xxx_has(chip, MV88E6XXX_FLAG_EEE))
                return -EOPNOTSUPP;
 
        mutex_lock(&chip->reg_lock);
 
-       ret = mv88e6xxx_mdio_read_indirect(chip, port, 16);
-       if (ret < 0)
+       err = mv88e6xxx_phy_read(chip, port, 16, &reg);
+       if (err)
                goto out;
 
-       reg = ret & ~0x0300;
+       reg &= ~0x0300;
        if (e->eee_enabled)
                reg |= 0x0200;
        if (e->tx_lpi_enabled)
                reg |= 0x0100;
 
-       ret = mv88e6xxx_mdio_write_indirect(chip, port, 16, reg);
+       err = mv88e6xxx_phy_write(chip, port, 16, reg);
 out:
        mutex_unlock(&chip->reg_lock);
 
-       return ret;
+       return err;
 }
 
 static int _mv88e6xxx_atu_cmd(struct mv88e6xxx_chip *chip, u16 fid, u16 cmd)
@@ -1227,8 +1267,8 @@ static int _mv88e6xxx_port_pvid_set(struct mv88e6xxx_chip *chip,
 
 static int _mv88e6xxx_vtu_wait(struct mv88e6xxx_chip *chip)
 {
-       return _mv88e6xxx_wait(chip, REG_GLOBAL, GLOBAL_VTU_OP,
-                              GLOBAL_VTU_OP_BUSY);
+       return mv88e6xxx_wait(chip, REG_GLOBAL, GLOBAL_VTU_OP,
+                             GLOBAL_VTU_OP_BUSY);
 }
 
 static int _mv88e6xxx_vtu_cmd(struct mv88e6xxx_chip *chip, u16 op)
@@ -2302,38 +2342,6 @@ static void mv88e6xxx_port_bridge_leave(struct dsa_switch *ds, int port)
        mutex_unlock(&chip->reg_lock);
 }
 
-static int _mv88e6xxx_mdio_page_write(struct mv88e6xxx_chip *chip,
-                                     int port, int page, int reg, int val)
-{
-       int ret;
-
-       ret = mv88e6xxx_mdio_write_indirect(chip, port, 0x16, page);
-       if (ret < 0)
-               goto restore_page_0;
-
-       ret = mv88e6xxx_mdio_write_indirect(chip, port, reg, val);
-restore_page_0:
-       mv88e6xxx_mdio_write_indirect(chip, port, 0x16, 0x0);
-
-       return ret;
-}
-
-static int _mv88e6xxx_mdio_page_read(struct mv88e6xxx_chip *chip,
-                                    int port, int page, int reg)
-{
-       int ret;
-
-       ret = mv88e6xxx_mdio_write_indirect(chip, port, 0x16, page);
-       if (ret < 0)
-               goto restore_page_0;
-
-       ret = mv88e6xxx_mdio_read_indirect(chip, port, reg);
-restore_page_0:
-       mv88e6xxx_mdio_write_indirect(chip, port, 0x16, 0x0);
-
-       return ret;
-}
-
 static int mv88e6xxx_switch_reset(struct mv88e6xxx_chip *chip)
 {
        bool ppu_active = mv88e6xxx_has(chip, MV88E6XXX_FLAG_PPU_ACTIVE);
@@ -2396,23 +2404,22 @@ static int mv88e6xxx_switch_reset(struct mv88e6xxx_chip *chip)
        return ret;
 }
 
-static int mv88e6xxx_power_on_serdes(struct mv88e6xxx_chip *chip)
+static int mv88e6xxx_serdes_power_on(struct mv88e6xxx_chip *chip)
 {
-       int ret;
+       u16 val;
+       int err;
 
-       ret = _mv88e6xxx_mdio_page_read(chip, REG_FIBER_SERDES,
-                                       PAGE_FIBER_SERDES, MII_BMCR);
-       if (ret < 0)
-               return ret;
+       /* Clear Power Down bit */
+       err = mv88e6xxx_serdes_read(chip, MII_BMCR, &val);
+       if (err)
+               return err;
 
-       if (ret & BMCR_PDOWN) {
-               ret &= ~BMCR_PDOWN;
-               ret = _mv88e6xxx_mdio_page_write(chip, REG_FIBER_SERDES,
-                                                PAGE_FIBER_SERDES, MII_BMCR,
-                                                ret);
+       if (val & BMCR_PDOWN) {
+               val &= ~BMCR_PDOWN;
+               err = mv88e6xxx_serdes_write(chip, MII_BMCR, val);
        }
 
-       return ret;
+       return err;
 }
 
 static int mv88e6xxx_port_read(struct mv88e6xxx_chip *chip, int port,
@@ -2535,7 +2542,7 @@ static int mv88e6xxx_setup_port(struct mv88e6xxx_chip *chip, int port)
        /* If this port is connected to a SerDes, make sure the SerDes is not
         * powered down.
         */
-       if (mv88e6xxx_6352_family(chip)) {
+       if (mv88e6xxx_has(chip, MV88E6XXX_FLAGS_SERDES)) {
                ret = _mv88e6xxx_reg_read(chip, REG_PORT(port), PORT_STATUS);
                if (ret < 0)
                        return ret;
@@ -2543,7 +2550,7 @@ static int mv88e6xxx_setup_port(struct mv88e6xxx_chip *chip, int port)
                if ((ret == PORT_STATUS_CMODE_100BASE_X) ||
                    (ret == PORT_STATUS_CMODE_1000BASE_X) ||
                    (ret == PORT_STATUS_CMODE_SGMII)) {
-                       ret = mv88e6xxx_power_on_serdes(chip);
+                       ret = mv88e6xxx_serdes_power_on(chip);
                        if (ret < 0)
                                return ret;
                }
@@ -2949,8 +2956,8 @@ static int mv88e6xxx_g2_clear_irl(struct mv88e6xxx_chip *chip)
                        break;
 
                /* Wait for the operation to complete */
-               err = _mv88e6xxx_wait(chip, REG_GLOBAL2, GLOBAL2_IRL_CMD,
-                                     GLOBAL2_IRL_CMD_BUSY);
+               err = mv88e6xxx_wait(chip, REG_GLOBAL2, GLOBAL2_IRL_CMD,
+                                    GLOBAL2_IRL_CMD_BUSY);
                if (err)
                        break;
        }
@@ -3004,9 +3011,9 @@ static int mv88e6xxx_g2_clear_pot(struct mv88e6xxx_chip *chip)
 
 static int mv88e6xxx_g2_eeprom_wait(struct mv88e6xxx_chip *chip)
 {
-       return _mv88e6xxx_wait(chip, REG_GLOBAL2, GLOBAL2_EEPROM_CMD,
-                              GLOBAL2_EEPROM_CMD_BUSY |
-                              GLOBAL2_EEPROM_CMD_RUNNING);
+       return mv88e6xxx_wait(chip, REG_GLOBAL2, GLOBAL2_EEPROM_CMD,
+                             GLOBAL2_EEPROM_CMD_BUSY |
+                             GLOBAL2_EEPROM_CMD_RUNNING);
 }
 
 static int mv88e6xxx_g2_eeprom_cmd(struct mv88e6xxx_chip *chip, u16 cmd)
@@ -3054,6 +3061,62 @@ static int mv88e6xxx_g2_eeprom_write16(struct mv88e6xxx_chip *chip,
        return mv88e6xxx_g2_eeprom_cmd(chip, cmd);
 }
 
+static int mv88e6xxx_g2_smi_phy_wait(struct mv88e6xxx_chip *chip)
+{
+       return mv88e6xxx_wait(chip, REG_GLOBAL2, GLOBAL2_SMI_PHY_CMD,
+                             GLOBAL2_SMI_PHY_CMD_BUSY);
+}
+
+static int mv88e6xxx_g2_smi_phy_cmd(struct mv88e6xxx_chip *chip, u16 cmd)
+{
+       int err;
+
+       err = mv88e6xxx_write(chip, REG_GLOBAL2, GLOBAL2_SMI_PHY_CMD, cmd);
+       if (err)
+               return err;
+
+       return mv88e6xxx_g2_smi_phy_wait(chip);
+}
+
+static int mv88e6xxx_g2_smi_phy_read(struct mv88e6xxx_chip *chip, int addr,
+                                    int reg, u16 *val)
+{
+       u16 cmd = GLOBAL2_SMI_PHY_CMD_OP_22_READ_DATA | (addr << 5) | reg;
+       int err;
+
+       err = mv88e6xxx_g2_smi_phy_wait(chip);
+       if (err)
+               return err;
+
+       err = mv88e6xxx_g2_smi_phy_cmd(chip, cmd);
+       if (err)
+               return err;
+
+       return mv88e6xxx_read(chip, REG_GLOBAL2, GLOBAL2_SMI_PHY_DATA, val);
+}
+
+static int mv88e6xxx_g2_smi_phy_write(struct mv88e6xxx_chip *chip, int addr,
+                                     int reg, u16 val)
+{
+       u16 cmd = GLOBAL2_SMI_PHY_CMD_OP_22_WRITE_DATA | (addr << 5) | reg;
+       int err;
+
+       err = mv88e6xxx_g2_smi_phy_wait(chip);
+       if (err)
+               return err;
+
+       err = mv88e6xxx_write(chip, REG_GLOBAL2, GLOBAL2_SMI_PHY_DATA, val);
+       if (err)
+               return err;
+
+       return mv88e6xxx_g2_smi_phy_cmd(chip, cmd);
+}
+
+static const struct mv88e6xxx_ops mv88e6xxx_g2_smi_phy_ops = {
+       .read = mv88e6xxx_g2_smi_phy_read,
+       .write = mv88e6xxx_g2_smi_phy_write,
+};
+
 static int mv88e6xxx_g2_setup(struct mv88e6xxx_chip *chip)
 {
        u16 reg;
@@ -3187,84 +3250,35 @@ static int mv88e6xxx_set_addr(struct dsa_switch *ds, u8 *addr)
        return err;
 }
 
-#ifdef CONFIG_NET_DSA_HWMON
-static int mv88e6xxx_mdio_page_read(struct dsa_switch *ds, int port, int page,
-                                   int reg)
-{
-       struct mv88e6xxx_chip *chip = ds_to_priv(ds);
-       int ret;
-
-       mutex_lock(&chip->reg_lock);
-       ret = _mv88e6xxx_mdio_page_read(chip, port, page, reg);
-       mutex_unlock(&chip->reg_lock);
-
-       return ret;
-}
-
-static int mv88e6xxx_mdio_page_write(struct dsa_switch *ds, int port, int page,
-                                    int reg, int val)
-{
-       struct mv88e6xxx_chip *chip = ds_to_priv(ds);
-       int ret;
-
-       mutex_lock(&chip->reg_lock);
-       ret = _mv88e6xxx_mdio_page_write(chip, port, page, reg, val);
-       mutex_unlock(&chip->reg_lock);
-
-       return ret;
-}
-#endif
-
-static int mv88e6xxx_port_to_mdio_addr(struct mv88e6xxx_chip *chip, int port)
-{
-       if (port >= 0 && port < chip->info->num_ports)
-               return port;
-       return -EINVAL;
-}
-
-static int mv88e6xxx_mdio_read(struct mii_bus *bus, int port, int regnum)
+static int mv88e6xxx_mdio_read(struct mii_bus *bus, int phy, int reg)
 {
        struct mv88e6xxx_chip *chip = bus->priv;
-       int addr = mv88e6xxx_port_to_mdio_addr(chip, port);
-       int ret;
+       u16 val;
+       int err;
 
-       if (addr < 0)
+       if (phy >= chip->info->num_ports)
                return 0xffff;
 
        mutex_lock(&chip->reg_lock);
-
-       if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_PPU))
-               ret = mv88e6xxx_mdio_read_ppu(chip, addr, regnum);
-       else if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_SMI_PHY))
-               ret = mv88e6xxx_mdio_read_indirect(chip, addr, regnum);
-       else
-               ret = mv88e6xxx_mdio_read_direct(chip, addr, regnum);
-
+       err = mv88e6xxx_phy_read(chip, phy, reg, &val);
        mutex_unlock(&chip->reg_lock);
-       return ret;
+
+       return err ? err : val;
 }
 
-static int mv88e6xxx_mdio_write(struct mii_bus *bus, int port, int regnum,
-                               u16 val)
+static int mv88e6xxx_mdio_write(struct mii_bus *bus, int phy, int reg, u16 val)
 {
        struct mv88e6xxx_chip *chip = bus->priv;
-       int addr = mv88e6xxx_port_to_mdio_addr(chip, port);
-       int ret;
+       int err;
 
-       if (addr < 0)
+       if (phy >= chip->info->num_ports)
                return 0xffff;
 
        mutex_lock(&chip->reg_lock);
-
-       if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_PPU))
-               ret = mv88e6xxx_mdio_write_ppu(chip, addr, regnum, val);
-       else if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_SMI_PHY))
-               ret = mv88e6xxx_mdio_write_indirect(chip, addr, regnum, val);
-       else
-               ret = mv88e6xxx_mdio_write_direct(chip, addr, regnum, val);
-
+       err = mv88e6xxx_phy_write(chip, phy, reg, val);
        mutex_unlock(&chip->reg_lock);
-       return ret;
+
+       return err;
 }
 
 static int mv88e6xxx_mdio_register(struct mv88e6xxx_chip *chip,
@@ -3274,9 +3288,6 @@ static int mv88e6xxx_mdio_register(struct mv88e6xxx_chip *chip,
        struct mii_bus *bus;
        int err;
 
-       if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_PPU))
-               mv88e6xxx_ppu_state_init(chip);
-
        if (np)
                chip->mdio_np = of_get_child_by_name(np, "mdio");
 
@@ -3332,44 +3343,42 @@ static void mv88e6xxx_mdio_unregister(struct mv88e6xxx_chip *chip)
 static int mv88e61xx_get_temp(struct dsa_switch *ds, int *temp)
 {
        struct mv88e6xxx_chip *chip = ds_to_priv(ds);
+       u16 val;
        int ret;
-       int val;
 
        *temp = 0;
 
        mutex_lock(&chip->reg_lock);
 
-       ret = mv88e6xxx_mdio_write_direct(chip, 0x0, 0x16, 0x6);
+       ret = mv88e6xxx_phy_write(chip, 0x0, 0x16, 0x6);
        if (ret < 0)
                goto error;
 
        /* Enable temperature sensor */
-       ret = mv88e6xxx_mdio_read_direct(chip, 0x0, 0x1a);
+       ret = mv88e6xxx_phy_read(chip, 0x0, 0x1a, &val);
        if (ret < 0)
                goto error;
 
-       ret = mv88e6xxx_mdio_write_direct(chip, 0x0, 0x1a, ret | (1 << 5));
+       ret = mv88e6xxx_phy_write(chip, 0x0, 0x1a, val | (1 << 5));
        if (ret < 0)
                goto error;
 
        /* Wait for temperature to stabilize */
        usleep_range(10000, 12000);
 
-       val = mv88e6xxx_mdio_read_direct(chip, 0x0, 0x1a);
-       if (val < 0) {
-               ret = val;
+       ret = mv88e6xxx_phy_read(chip, 0x0, 0x1a, &val);
+       if (ret < 0)
                goto error;
-       }
 
        /* Disable temperature sensor */
-       ret = mv88e6xxx_mdio_write_direct(chip, 0x0, 0x1a, ret & ~(1 << 5));
+       ret = mv88e6xxx_phy_write(chip, 0x0, 0x1a, val & ~(1 << 5));
        if (ret < 0)
                goto error;
 
        *temp = ((val & 0x1f) - 5) * 5;
 
 error:
-       mv88e6xxx_mdio_write_direct(chip, 0x0, 0x16, 0x0);
+       mv88e6xxx_phy_write(chip, 0x0, 0x16, 0x0);
        mutex_unlock(&chip->reg_lock);
        return ret;
 }
@@ -3378,15 +3387,18 @@ static int mv88e63xx_get_temp(struct dsa_switch *ds, int *temp)
 {
        struct mv88e6xxx_chip *chip = ds_to_priv(ds);
        int phy = mv88e6xxx_6320_family(chip) ? 3 : 0;
+       u16 val;
        int ret;
 
        *temp = 0;
 
-       ret = mv88e6xxx_mdio_page_read(ds, phy, 6, 27);
+       mutex_lock(&chip->reg_lock);
+       ret = mv88e6xxx_phy_page_read(chip, phy, 6, 27, &val);
+       mutex_unlock(&chip->reg_lock);
        if (ret < 0)
                return ret;
 
-       *temp = (ret & 0xff) - 25;
+       *temp = (val & 0xff) - 25;
 
        return 0;
 }
@@ -3408,6 +3420,7 @@ static int mv88e6xxx_get_temp_limit(struct dsa_switch *ds, int *temp)
 {
        struct mv88e6xxx_chip *chip = ds_to_priv(ds);
        int phy = mv88e6xxx_6320_family(chip) ? 3 : 0;
+       u16 val;
        int ret;
 
        if (!mv88e6xxx_has(chip, MV88E6XXX_FLAG_TEMP_LIMIT))
@@ -3415,11 +3428,13 @@ static int mv88e6xxx_get_temp_limit(struct dsa_switch *ds, int *temp)
 
        *temp = 0;
 
-       ret = mv88e6xxx_mdio_page_read(ds, phy, 6, 26);
+       mutex_lock(&chip->reg_lock);
+       ret = mv88e6xxx_phy_page_read(chip, phy, 6, 26, &val);
+       mutex_unlock(&chip->reg_lock);
        if (ret < 0)
                return ret;
 
-       *temp = (((ret >> 8) & 0x1f) * 5) - 25;
+       *temp = (((val >> 8) & 0x1f) * 5) - 25;
 
        return 0;
 }
@@ -3428,23 +3443,30 @@ static int mv88e6xxx_set_temp_limit(struct dsa_switch *ds, int temp)
 {
        struct mv88e6xxx_chip *chip = ds_to_priv(ds);
        int phy = mv88e6xxx_6320_family(chip) ? 3 : 0;
-       int ret;
+       u16 val;
+       int err;
 
        if (!mv88e6xxx_has(chip, MV88E6XXX_FLAG_TEMP_LIMIT))
                return -EOPNOTSUPP;
 
-       ret = mv88e6xxx_mdio_page_read(ds, phy, 6, 26);
-       if (ret < 0)
-               return ret;
+       mutex_lock(&chip->reg_lock);
+       err = mv88e6xxx_phy_page_read(chip, phy, 6, 26, &val);
+       if (err)
+               goto unlock;
        temp = clamp_val(DIV_ROUND_CLOSEST(temp, 5) + 5, 0, 0x1f);
-       return mv88e6xxx_mdio_page_write(ds, phy, 6, 26,
-                                        (ret & 0xe0ff) | (temp << 8));
+       err = mv88e6xxx_phy_page_write(chip, phy, 6, 26,
+                                      (val & 0xe0ff) | (temp << 8));
+unlock:
+       mutex_unlock(&chip->reg_lock);
+
+       return err;
 }
 
 static int mv88e6xxx_get_temp_alarm(struct dsa_switch *ds, bool *alarm)
 {
        struct mv88e6xxx_chip *chip = ds_to_priv(ds);
        int phy = mv88e6xxx_6320_family(chip) ? 3 : 0;
+       u16 val;
        int ret;
 
        if (!mv88e6xxx_has(chip, MV88E6XXX_FLAG_TEMP_LIMIT))
@@ -3452,11 +3474,13 @@ static int mv88e6xxx_get_temp_alarm(struct dsa_switch *ds, bool *alarm)
 
        *alarm = false;
 
-       ret = mv88e6xxx_mdio_page_read(ds, phy, 6, 26);
+       mutex_lock(&chip->reg_lock);
+       ret = mv88e6xxx_phy_page_read(chip, phy, 6, 26, &val);
+       mutex_unlock(&chip->reg_lock);
        if (ret < 0)
                return ret;
 
-       *alarm = !!(ret & 0x40);
+       *alarm = !!(val & 0x40);
 
        return 0;
 }
@@ -3873,6 +3897,23 @@ static struct mv88e6xxx_chip *mv88e6xxx_alloc_chip(struct device *dev)
        return chip;
 }
 
+static const struct mv88e6xxx_ops mv88e6xxx_phy_ops = {
+       .read = mv88e6xxx_read,
+       .write = mv88e6xxx_write,
+};
+
+static void mv88e6xxx_phy_init(struct mv88e6xxx_chip *chip)
+{
+       if (mv88e6xxx_has(chip, MV88E6XXX_FLAGS_SMI_PHY)) {
+               chip->phy_ops = &mv88e6xxx_g2_smi_phy_ops;
+       } else if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_PPU)) {
+               chip->phy_ops = &mv88e6xxx_phy_ppu_ops;
+               mv88e6xxx_ppu_state_init(chip);
+       } else {
+               chip->phy_ops = &mv88e6xxx_phy_ops;
+       }
+}
+
 static int mv88e6xxx_smi_init(struct mv88e6xxx_chip *chip,
                              struct mii_bus *bus, int sw_addr)
 {
@@ -3882,7 +3923,7 @@ static int mv88e6xxx_smi_init(struct mv88e6xxx_chip *chip,
 
        if (sw_addr == 0)
                chip->smi_ops = &mv88e6xxx_smi_single_chip_ops;
-       else if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_MULTI_CHIP))
+       else if (mv88e6xxx_has(chip, MV88E6XXX_FLAGS_MULTI_CHIP))
                chip->smi_ops = &mv88e6xxx_smi_multi_chip_ops;
        else
                return -EINVAL;
@@ -3920,6 +3961,8 @@ static const char *mv88e6xxx_drv_probe(struct device *dsa_dev,
        if (err)
                goto free;
 
+       mv88e6xxx_phy_init(chip);
+
        err = mv88e6xxx_mdio_register(chip, NULL);
        if (err)
                goto free;
@@ -4021,6 +4064,8 @@ static int mv88e6xxx_probe(struct mdio_device *mdiodev)
        if (err)
                return err;
 
+       mv88e6xxx_phy_init(chip);
+
        chip->reset = devm_gpiod_get_optional(dev, "reset", GPIOD_ASIS);
        if (IS_ERR(chip->reset))
                return PTR_ERR(chip->reset);
index 48d6ea7..1f9bab5 100644 (file)
 #define SMI_CMD_OP_45_READ_DATA_INC    ((3 << 10) | SMI_CMD_BUSY)
 #define SMI_DATA               0x01
 
-/* Fiber/SERDES Registers are located at SMI address F, page 1 */
-#define REG_FIBER_SERDES       0x0f
-#define PAGE_FIBER_SERDES      0x01
+/* PHY Registers */
+#define PHY_PAGE               0x16
+#define PHY_PAGE_COPPER                0x00
+
+#define ADDR_SERDES            0x0f
+#define SERDES_PAGE_FIBER      0x01
 
 #define REG_PORT(p)            (0x10 + (p))
 #define PORT_STATUS            0x00
 #define GLOBAL2_EEPROM_DATA    0x15
 #define GLOBAL2_PTP_AVB_OP     0x16
 #define GLOBAL2_PTP_AVB_DATA   0x17
-#define GLOBAL2_SMI_OP         0x18
-#define GLOBAL2_SMI_OP_BUSY            BIT(15)
-#define GLOBAL2_SMI_OP_CLAUSE_22       BIT(12)
-#define GLOBAL2_SMI_OP_22_WRITE                ((1 << 10) | GLOBAL2_SMI_OP_BUSY | \
-                                        GLOBAL2_SMI_OP_CLAUSE_22)
-#define GLOBAL2_SMI_OP_22_READ         ((2 << 10) | GLOBAL2_SMI_OP_BUSY | \
-                                        GLOBAL2_SMI_OP_CLAUSE_22)
-#define GLOBAL2_SMI_OP_45_WRITE_ADDR   ((0 << 10) | GLOBAL2_SMI_OP_BUSY)
-#define GLOBAL2_SMI_OP_45_WRITE_DATA   ((1 << 10) | GLOBAL2_SMI_OP_BUSY)
-#define GLOBAL2_SMI_OP_45_READ_DATA    ((2 << 10) | GLOBAL2_SMI_OP_BUSY)
-#define GLOBAL2_SMI_DATA       0x19
+#define GLOBAL2_SMI_PHY_CMD                    0x18
+#define GLOBAL2_SMI_PHY_CMD_BUSY               BIT(15)
+#define GLOBAL2_SMI_PHY_CMD_MODE_22            BIT(12)
+#define GLOBAL2_SMI_PHY_CMD_OP_22_WRITE_DATA   ((0x1 << 10) | \
+                                                GLOBAL2_SMI_PHY_CMD_MODE_22 | \
+                                                GLOBAL2_SMI_PHY_CMD_BUSY)
+#define GLOBAL2_SMI_PHY_CMD_OP_22_READ_DATA    ((0x2 << 10) | \
+                                                GLOBAL2_SMI_PHY_CMD_MODE_22 | \
+                                                GLOBAL2_SMI_PHY_CMD_BUSY)
+#define GLOBAL2_SMI_PHY_DATA                   0x19
 #define GLOBAL2_SCRATCH_MISC   0x1a
 #define GLOBAL2_SCRATCH_BUSY           BIT(15)
 #define GLOBAL2_SCRATCH_REGISTER_SHIFT 8
@@ -388,6 +390,21 @@ enum mv88e6xxx_cap {
         */
        MV88E6XXX_CAP_EEE,
 
+       /* Multi-chip Addressing Mode.
+        * Some chips respond to only 2 registers of its own SMI device address
+        * when it is non-zero, and use indirect access to internal registers.
+        */
+       MV88E6XXX_CAP_SMI_CMD,          /* (0x00) SMI Command */
+       MV88E6XXX_CAP_SMI_DATA,         /* (0x01) SMI Data */
+
+       /* PHY Registers.
+        */
+       MV88E6XXX_CAP_PHY_PAGE,         /* (0x16) Page Register */
+
+       /* Fiber/SERDES Registers (SMI address F).
+        */
+       MV88E6XXX_CAP_SERDES,
+
        /* Switch Global 2 Registers.
         * The device contains a second set of global 16-bit registers.
         */
@@ -402,12 +419,8 @@ enum mv88e6xxx_cap {
        MV88E6XXX_CAP_G2_POT,           /* (0x0f) Priority Override Table */
        MV88E6XXX_CAP_G2_EEPROM_CMD,    /* (0x14) EEPROM Command */
        MV88E6XXX_CAP_G2_EEPROM_DATA,   /* (0x15) EEPROM Data */
-
-       /* Multi-chip Addressing Mode.
-        * Some chips require an indirect SMI access when their SMI device
-        * address is not zero. See SMI_CMD and SMI_DATA.
-        */
-       MV88E6XXX_CAP_MULTI_CHIP,
+       MV88E6XXX_CAP_G2_SMI_PHY_CMD,   /* (0x18) SMI PHY Command */
+       MV88E6XXX_CAP_G2_SMI_PHY_DATA,  /* (0x19) SMI PHY Data */
 
        /* PHY Polling Unit.
         * See GLOBAL_CONTROL_PPU_ENABLE and GLOBAL_STATUS_PPU_POLLING.
@@ -415,12 +428,6 @@ enum mv88e6xxx_cap {
        MV88E6XXX_CAP_PPU,
        MV88E6XXX_CAP_PPU_ACTIVE,
 
-       /* SMI PHY Command and Data registers.
-        * This requires an indirect access to PHY registers through
-        * GLOBAL2_SMI_OP, otherwise direct access to PHY registers is done.
-        */
-       MV88E6XXX_CAP_SMI_PHY,
-
        /* Per VLAN Spanning Tree Unit (STU).
         * The Port State database, if present, is accessed through VTU
         * operations and dedicated SID registers. See GLOBAL_VTU_SID.
@@ -441,6 +448,14 @@ enum mv88e6xxx_cap {
 
 /* Bitmask of capabilities */
 #define MV88E6XXX_FLAG_EEE             BIT(MV88E6XXX_CAP_EEE)
+
+#define MV88E6XXX_FLAG_SMI_CMD         BIT(MV88E6XXX_CAP_SMI_CMD)
+#define MV88E6XXX_FLAG_SMI_DATA                BIT(MV88E6XXX_CAP_SMI_DATA)
+
+#define MV88E6XXX_FLAG_PHY_PAGE                BIT(MV88E6XXX_CAP_PHY_PAGE)
+
+#define MV88E6XXX_FLAG_SERDES          BIT(MV88E6XXX_CAP_SERDES)
+
 #define MV88E6XXX_FLAG_GLOBAL2         BIT(MV88E6XXX_CAP_GLOBAL2)
 #define MV88E6XXX_FLAG_G2_MGMT_EN_2X   BIT(MV88E6XXX_CAP_G2_MGMT_EN_2X)
 #define MV88E6XXX_FLAG_G2_MGMT_EN_0X   BIT(MV88E6XXX_CAP_G2_MGMT_EN_0X)
@@ -452,10 +467,11 @@ enum mv88e6xxx_cap {
 #define MV88E6XXX_FLAG_G2_POT          BIT(MV88E6XXX_CAP_G2_POT)
 #define MV88E6XXX_FLAG_G2_EEPROM_CMD   BIT(MV88E6XXX_CAP_G2_EEPROM_CMD)
 #define MV88E6XXX_FLAG_G2_EEPROM_DATA  BIT(MV88E6XXX_CAP_G2_EEPROM_DATA)
-#define MV88E6XXX_FLAG_MULTI_CHIP      BIT(MV88E6XXX_CAP_MULTI_CHIP)
+#define MV88E6XXX_FLAG_G2_SMI_PHY_CMD  BIT(MV88E6XXX_CAP_G2_SMI_PHY_CMD)
+#define MV88E6XXX_FLAG_G2_SMI_PHY_DATA BIT(MV88E6XXX_CAP_G2_SMI_PHY_DATA)
+
 #define MV88E6XXX_FLAG_PPU             BIT(MV88E6XXX_CAP_PPU)
 #define MV88E6XXX_FLAG_PPU_ACTIVE      BIT(MV88E6XXX_CAP_PPU_ACTIVE)
-#define MV88E6XXX_FLAG_SMI_PHY         BIT(MV88E6XXX_CAP_SMI_PHY)
 #define MV88E6XXX_FLAG_STU             BIT(MV88E6XXX_CAP_STU)
 #define MV88E6XXX_FLAG_TEMP            BIT(MV88E6XXX_CAP_TEMP)
 #define MV88E6XXX_FLAG_TEMP_LIMIT      BIT(MV88E6XXX_CAP_TEMP_LIMIT)
@@ -471,28 +487,43 @@ enum mv88e6xxx_cap {
        (MV88E6XXX_FLAG_G2_IRL_CMD |    \
         MV88E6XXX_FLAG_G2_IRL_DATA)
 
+/* Multi-chip Addressing Mode */
+#define MV88E6XXX_FLAGS_MULTI_CHIP     \
+       (MV88E6XXX_FLAG_SMI_CMD |       \
+        MV88E6XXX_FLAG_SMI_DATA)
+
 /* Cross-chip Port VLAN Table */
 #define MV88E6XXX_FLAGS_PVT            \
        (MV88E6XXX_FLAG_G2_PVT_ADDR |   \
         MV88E6XXX_FLAG_G2_PVT_DATA)
 
+/* Fiber/SERDES Registers at SMI address F, page 1 */
+#define MV88E6XXX_FLAGS_SERDES         \
+       (MV88E6XXX_FLAG_PHY_PAGE |      \
+        MV88E6XXX_FLAG_SERDES)
+
+/* Indirect PHY access via Global2 SMI PHY registers */
+#define MV88E6XXX_FLAGS_SMI_PHY                \
+       (MV88E6XXX_FLAG_G2_SMI_PHY_CMD |\
+        MV88E6XXX_FLAG_G2_SMI_PHY_DATA)
+
 #define MV88E6XXX_FLAGS_FAMILY_6095    \
        (MV88E6XXX_FLAG_GLOBAL2 |       \
         MV88E6XXX_FLAG_G2_MGMT_EN_0X | \
-        MV88E6XXX_FLAG_MULTI_CHIP |    \
         MV88E6XXX_FLAG_PPU |           \
-        MV88E6XXX_FLAG_VTU)
+        MV88E6XXX_FLAG_VTU |           \
+        MV88E6XXX_FLAGS_MULTI_CHIP)
 
 #define MV88E6XXX_FLAGS_FAMILY_6097    \
        (MV88E6XXX_FLAG_GLOBAL2 |       \
         MV88E6XXX_FLAG_G2_MGMT_EN_2X | \
         MV88E6XXX_FLAG_G2_MGMT_EN_0X | \
         MV88E6XXX_FLAG_G2_POT |        \
-        MV88E6XXX_FLAG_MULTI_CHIP |    \
         MV88E6XXX_FLAG_PPU |           \
         MV88E6XXX_FLAG_STU |           \
         MV88E6XXX_FLAG_VTU |           \
         MV88E6XXX_FLAGS_IRL |          \
+        MV88E6XXX_FLAGS_MULTI_CHIP |   \
         MV88E6XXX_FLAGS_PVT)
 
 #define MV88E6XXX_FLAGS_FAMILY_6165    \
@@ -501,17 +532,17 @@ enum mv88e6xxx_cap {
         MV88E6XXX_FLAG_G2_MGMT_EN_0X | \
         MV88E6XXX_FLAG_G2_SWITCH_MAC | \
         MV88E6XXX_FLAG_G2_POT |        \
-        MV88E6XXX_FLAG_MULTI_CHIP |    \
         MV88E6XXX_FLAG_STU |           \
         MV88E6XXX_FLAG_TEMP |          \
         MV88E6XXX_FLAG_VTU |           \
         MV88E6XXX_FLAGS_IRL |          \
+        MV88E6XXX_FLAGS_MULTI_CHIP |   \
         MV88E6XXX_FLAGS_PVT)
 
 #define MV88E6XXX_FLAGS_FAMILY_6185    \
        (MV88E6XXX_FLAG_GLOBAL2 |       \
         MV88E6XXX_FLAG_G2_MGMT_EN_0X | \
-        MV88E6XXX_FLAG_MULTI_CHIP |    \
+        MV88E6XXX_FLAGS_MULTI_CHIP |   \
         MV88E6XXX_FLAG_PPU |           \
         MV88E6XXX_FLAG_VTU)
 
@@ -522,15 +553,15 @@ enum mv88e6xxx_cap {
         MV88E6XXX_FLAG_G2_MGMT_EN_0X | \
         MV88E6XXX_FLAG_G2_SWITCH_MAC | \
         MV88E6XXX_FLAG_G2_POT |        \
-        MV88E6XXX_FLAG_MULTI_CHIP |    \
         MV88E6XXX_FLAG_PPU_ACTIVE |    \
-        MV88E6XXX_FLAG_SMI_PHY |       \
         MV88E6XXX_FLAG_TEMP |          \
         MV88E6XXX_FLAG_TEMP_LIMIT |    \
         MV88E6XXX_FLAG_VTU |           \
         MV88E6XXX_FLAGS_EEPROM16 |     \
         MV88E6XXX_FLAGS_IRL |          \
-        MV88E6XXX_FLAGS_PVT)
+        MV88E6XXX_FLAGS_MULTI_CHIP |   \
+        MV88E6XXX_FLAGS_PVT |          \
+        MV88E6XXX_FLAGS_SMI_PHY)
 
 #define MV88E6XXX_FLAGS_FAMILY_6351    \
        (MV88E6XXX_FLAG_GLOBAL2 |       \
@@ -538,14 +569,14 @@ enum mv88e6xxx_cap {
         MV88E6XXX_FLAG_G2_MGMT_EN_0X | \
         MV88E6XXX_FLAG_G2_SWITCH_MAC | \
         MV88E6XXX_FLAG_G2_POT |        \
-        MV88E6XXX_FLAG_MULTI_CHIP |    \
         MV88E6XXX_FLAG_PPU_ACTIVE |    \
-        MV88E6XXX_FLAG_SMI_PHY |       \
         MV88E6XXX_FLAG_STU |           \
         MV88E6XXX_FLAG_TEMP |          \
         MV88E6XXX_FLAG_VTU |           \
         MV88E6XXX_FLAGS_IRL |          \
-        MV88E6XXX_FLAGS_PVT)
+        MV88E6XXX_FLAGS_MULTI_CHIP |   \
+        MV88E6XXX_FLAGS_PVT |          \
+        MV88E6XXX_FLAGS_SMI_PHY)
 
 #define MV88E6XXX_FLAGS_FAMILY_6352    \
        (MV88E6XXX_FLAG_EEE |           \
@@ -554,16 +585,17 @@ enum mv88e6xxx_cap {
         MV88E6XXX_FLAG_G2_MGMT_EN_0X | \
         MV88E6XXX_FLAG_G2_SWITCH_MAC | \
         MV88E6XXX_FLAG_G2_POT |        \
-        MV88E6XXX_FLAG_MULTI_CHIP |    \
         MV88E6XXX_FLAG_PPU_ACTIVE |    \
-        MV88E6XXX_FLAG_SMI_PHY |       \
         MV88E6XXX_FLAG_STU |           \
         MV88E6XXX_FLAG_TEMP |          \
         MV88E6XXX_FLAG_TEMP_LIMIT |    \
         MV88E6XXX_FLAG_VTU |           \
         MV88E6XXX_FLAGS_EEPROM16 |     \
         MV88E6XXX_FLAGS_IRL |          \
-        MV88E6XXX_FLAGS_PVT)
+        MV88E6XXX_FLAGS_MULTI_CHIP |   \
+        MV88E6XXX_FLAGS_PVT |          \
+        MV88E6XXX_FLAGS_SERDES |       \
+        MV88E6XXX_FLAGS_SMI_PHY)
 
 struct mv88e6xxx_info {
        enum mv88e6xxx_family family;
@@ -623,6 +655,7 @@ struct mv88e6xxx_chip {
        /* Handles automatic disabling and re-enabling of the PHY
         * polling unit.
         */
+       const struct mv88e6xxx_ops *phy_ops;
        struct mutex            ppu_mutex;
        int                     ppu_disabled;
        struct work_struct      ppu_work;
index 2ffd634..8cc7467 100644 (file)
@@ -24,6 +24,7 @@ source "drivers/net/ethernet/agere/Kconfig"
 source "drivers/net/ethernet/allwinner/Kconfig"
 source "drivers/net/ethernet/alteon/Kconfig"
 source "drivers/net/ethernet/altera/Kconfig"
+source "drivers/net/ethernet/amazon/Kconfig"
 source "drivers/net/ethernet/amd/Kconfig"
 source "drivers/net/ethernet/apm/Kconfig"
 source "drivers/net/ethernet/apple/Kconfig"
index 1d349e9..a09423d 100644 (file)
@@ -10,6 +10,7 @@ obj-$(CONFIG_NET_VENDOR_AGERE) += agere/
 obj-$(CONFIG_NET_VENDOR_ALLWINNER) += allwinner/
 obj-$(CONFIG_NET_VENDOR_ALTEON) += alteon/
 obj-$(CONFIG_ALTERA_TSE) += altera/
+obj-$(CONFIG_NET_VENDOR_AMAZON) += amazon/
 obj-$(CONFIG_NET_VENDOR_AMD) += amd/
 obj-$(CONFIG_NET_XGENE) += apm/
 obj-$(CONFIG_NET_VENDOR_APPLE) += apple/
index 38eaea1..00f9ee3 100644 (file)
@@ -192,8 +192,8 @@ static int desc_list_init(struct net_device *dev)
                        goto init_error;
 
                skb_reserve(new_skb, NET_IP_ALIGN);
-               /* Invidate the data cache of skb->data range when it is write back
-                * cache. It will prevent overwritting the new data from DMA
+               /* Invalidate the data cache of skb->data range when it is write back
+                * cache. It will prevent overwriting the new data from DMA
                 */
                blackfin_dcache_invalidate_range((unsigned long)new_skb->head,
                                         (unsigned long)new_skb->end);
@@ -1205,7 +1205,7 @@ static void bfin_mac_rx(struct bfin_mac_local *lp)
        }
        /* reserve 2 bytes for RXDWA padding */
        skb_reserve(new_skb, NET_IP_ALIGN);
-       /* Invidate the data cache of skb->data range when it is write back
+       /* Invalidate the data cache of skb->data range when it is write back
         * cache. It will prevent overwritting the new data from DMA
         */
        blackfin_dcache_invalidate_range((unsigned long)new_skb->head,
@@ -1599,7 +1599,7 @@ static int bfin_mac_probe(struct platform_device *pdev)
        *(__le16 *) (&(ndev->dev_addr[4])) = cpu_to_le16((u16) bfin_read_EMAC_ADDRHI());
 
        /* probe mac */
-       /*todo: how to proble? which is revision_register */
+       /*todo: how to probe? which is revision_register */
        bfin_write_EMAC_ADDRLO(0x12345678);
        if (bfin_read_EMAC_ADDRLO() != 0x12345678) {
                dev_err(&pdev->dev, "Cannot detect Blackfin on-chip ethernet MAC controller!\n");
index bca07c5..f8df824 100644 (file)
@@ -1105,27 +1105,6 @@ static void greth_set_msglevel(struct net_device *dev, u32 value)
        struct greth_private *greth = netdev_priv(dev);
        greth->msg_enable = value;
 }
-static int greth_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
-{
-       struct greth_private *greth = netdev_priv(dev);
-       struct phy_device *phy = greth->phy;
-
-       if (!phy)
-               return -ENODEV;
-
-       return phy_ethtool_gset(phy, cmd);
-}
-
-static int greth_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
-{
-       struct greth_private *greth = netdev_priv(dev);
-       struct phy_device *phy = greth->phy;
-
-       if (!phy)
-               return -ENODEV;
-
-       return phy_ethtool_sset(phy, cmd);
-}
 
 static int greth_get_regs_len(struct net_device *dev)
 {
@@ -1157,12 +1136,12 @@ static void greth_get_regs(struct net_device *dev, struct ethtool_regs *regs, vo
 static const struct ethtool_ops greth_ethtool_ops = {
        .get_msglevel           = greth_get_msglevel,
        .set_msglevel           = greth_set_msglevel,
-       .get_settings           = greth_get_settings,
-       .set_settings           = greth_set_settings,
        .get_drvinfo            = greth_get_drvinfo,
        .get_regs_len           = greth_get_regs_len,
        .get_regs               = greth_get_regs,
        .get_link               = ethtool_op_get_link,
+       .get_link_ksettings     = phy_ethtool_get_link_ksettings,
+       .set_link_ksettings     = phy_ethtool_set_link_ksettings,
 };
 
 static struct net_device_ops greth_netdev_ops = {
@@ -1224,7 +1203,7 @@ static int greth_mdio_write(struct mii_bus *bus, int phy, int reg, u16 val)
 static void greth_link_change(struct net_device *dev)
 {
        struct greth_private *greth = netdev_priv(dev);
-       struct phy_device *phydev = greth->phy;
+       struct phy_device *phydev = dev->phydev;
        unsigned long flags;
        int status_change = 0;
        u32 ctrl;
@@ -1307,7 +1286,6 @@ static int greth_mdio_probe(struct net_device *dev)
        greth->link = 0;
        greth->speed = 0;
        greth->duplex = -1;
-       greth->phy = phy;
 
        return 0;
 }
@@ -1325,6 +1303,7 @@ static int greth_mdio_init(struct greth_private *greth)
 {
        int ret;
        unsigned long timeout;
+       struct net_device *ndev = greth->netdev;
 
        greth->mdio = mdiobus_alloc();
        if (!greth->mdio) {
@@ -1349,15 +1328,16 @@ static int greth_mdio_init(struct greth_private *greth)
                goto unreg_mdio;
        }
 
-       phy_start(greth->phy);
+       phy_start(ndev->phydev);
 
        /* If Ethernet debug link is used make autoneg happen right away */
        if (greth->edcl && greth_edcl == 1) {
-               phy_start_aneg(greth->phy);
+               phy_start_aneg(ndev->phydev);
                timeout = jiffies + 6*HZ;
-               while (!phy_aneg_done(greth->phy) && time_before(jiffies, timeout)) {
+               while (!phy_aneg_done(ndev->phydev) &&
+                      time_before(jiffies, timeout)) {
                }
-               phy_read_status(greth->phy);
+               phy_read_status(ndev->phydev);
                greth_link_change(greth->netdev);
        }
 
@@ -1569,8 +1549,8 @@ static int greth_of_remove(struct platform_device *of_dev)
 
        dma_free_coherent(&of_dev->dev, 1024, greth->tx_bd_base, greth->tx_bd_base_phys);
 
-       if (greth->phy)
-               phy_stop(greth->phy);
+       if (ndev->phydev)
+               phy_stop(ndev->phydev);
        mdiobus_unregister(greth->mdio);
 
        unregister_netdev(ndev);
index 92dd918..9c07140 100644 (file)
@@ -123,7 +123,6 @@ struct greth_private {
        struct napi_struct napi;
        spinlock_t devlock;
 
-       struct phy_device *phy;
        struct mii_bus *mdio;
        unsigned int link;
        unsigned int speed;
diff --git a/drivers/net/ethernet/amazon/Kconfig b/drivers/net/ethernet/amazon/Kconfig
new file mode 100644 (file)
index 0000000..99b3035
--- /dev/null
@@ -0,0 +1,27 @@
+#
+# Amazon network device configuration
+#
+
+config NET_VENDOR_AMAZON
+       bool "Amazon Devices"
+       default y
+       ---help---
+         If you have a network (Ethernet) device belonging to this class, say Y.
+
+         Note that the answer to this question doesn't directly affect the
+         kernel: saying N will just cause the configurator to skip all
+         the questions about Amazon devices. If you say Y, you will be asked
+         for your specific device in the following questions.
+
+if NET_VENDOR_AMAZON
+
+config ENA_ETHERNET
+       tristate "Elastic Network Adapter (ENA) support"
+       depends on (PCI_MSI && X86)
+       ---help---
+         This driver supports Elastic Network Adapter (ENA)"
+
+         To compile this driver as a module, choose M here.
+         The module will be called ena.
+
+endif #NET_VENDOR_AMAZON
diff --git a/drivers/net/ethernet/amazon/Makefile b/drivers/net/ethernet/amazon/Makefile
new file mode 100644 (file)
index 0000000..8e0b73f
--- /dev/null
@@ -0,0 +1,5 @@
+#
+# Makefile for the Amazon network device drivers.
+#
+
+obj-$(CONFIG_ENA_ETHERNET) += ena/
diff --git a/drivers/net/ethernet/amazon/ena/Makefile b/drivers/net/ethernet/amazon/ena/Makefile
new file mode 100644 (file)
index 0000000..eaeeae0
--- /dev/null
@@ -0,0 +1,7 @@
+#
+# Makefile for the Elastic Network Adapter (ENA) device drivers.
+#
+
+obj-$(CONFIG_ENA_ETHERNET) += ena.o
+
+ena-y := ena_netdev.o ena_com.o ena_eth_com.o ena_ethtool.o
diff --git a/drivers/net/ethernet/amazon/ena/ena_admin_defs.h b/drivers/net/ethernet/amazon/ena/ena_admin_defs.h
new file mode 100644 (file)
index 0000000..a46e749
--- /dev/null
@@ -0,0 +1,973 @@
+/*
+ * Copyright 2015 - 2016 Amazon.com, Inc. or its affiliates.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef _ENA_ADMIN_H_
+#define _ENA_ADMIN_H_
+
+enum ena_admin_aq_opcode {
+       ENA_ADMIN_CREATE_SQ     = 1,
+
+       ENA_ADMIN_DESTROY_SQ    = 2,
+
+       ENA_ADMIN_CREATE_CQ     = 3,
+
+       ENA_ADMIN_DESTROY_CQ    = 4,
+
+       ENA_ADMIN_GET_FEATURE   = 8,
+
+       ENA_ADMIN_SET_FEATURE   = 9,
+
+       ENA_ADMIN_GET_STATS     = 11,
+};
+
+enum ena_admin_aq_completion_status {
+       ENA_ADMIN_SUCCESS                       = 0,
+
+       ENA_ADMIN_RESOURCE_ALLOCATION_FAILURE   = 1,
+
+       ENA_ADMIN_BAD_OPCODE                    = 2,
+
+       ENA_ADMIN_UNSUPPORTED_OPCODE            = 3,
+
+       ENA_ADMIN_MALFORMED_REQUEST             = 4,
+
+       /* Additional status is provided in ACQ entry extended_status */
+       ENA_ADMIN_ILLEGAL_PARAMETER             = 5,
+
+       ENA_ADMIN_UNKNOWN_ERROR                 = 6,
+};
+
+enum ena_admin_aq_feature_id {
+       ENA_ADMIN_DEVICE_ATTRIBUTES             = 1,
+
+       ENA_ADMIN_MAX_QUEUES_NUM                = 2,
+
+       ENA_ADMIN_RSS_HASH_FUNCTION             = 10,
+
+       ENA_ADMIN_STATELESS_OFFLOAD_CONFIG      = 11,
+
+       ENA_ADMIN_RSS_REDIRECTION_TABLE_CONFIG  = 12,
+
+       ENA_ADMIN_MTU                           = 14,
+
+       ENA_ADMIN_RSS_HASH_INPUT                = 18,
+
+       ENA_ADMIN_INTERRUPT_MODERATION          = 20,
+
+       ENA_ADMIN_AENQ_CONFIG                   = 26,
+
+       ENA_ADMIN_LINK_CONFIG                   = 27,
+
+       ENA_ADMIN_HOST_ATTR_CONFIG              = 28,
+
+       ENA_ADMIN_FEATURES_OPCODE_NUM           = 32,
+};
+
+enum ena_admin_placement_policy_type {
+       /* descriptors and headers are in host memory */
+       ENA_ADMIN_PLACEMENT_POLICY_HOST = 1,
+
+       /* descriptors and headers are in device memory (a.k.a Low Latency
+        * Queue)
+        */
+       ENA_ADMIN_PLACEMENT_POLICY_DEV  = 3,
+};
+
+enum ena_admin_link_types {
+       ENA_ADMIN_LINK_SPEED_1G         = 0x1,
+
+       ENA_ADMIN_LINK_SPEED_2_HALF_G   = 0x2,
+
+       ENA_ADMIN_LINK_SPEED_5G         = 0x4,
+
+       ENA_ADMIN_LINK_SPEED_10G        = 0x8,
+
+       ENA_ADMIN_LINK_SPEED_25G        = 0x10,
+
+       ENA_ADMIN_LINK_SPEED_40G        = 0x20,
+
+       ENA_ADMIN_LINK_SPEED_50G        = 0x40,
+
+       ENA_ADMIN_LINK_SPEED_100G       = 0x80,
+
+       ENA_ADMIN_LINK_SPEED_200G       = 0x100,
+
+       ENA_ADMIN_LINK_SPEED_400G       = 0x200,
+};
+
+enum ena_admin_completion_policy_type {
+       /* completion queue entry for each sq descriptor */
+       ENA_ADMIN_COMPLETION_POLICY_DESC                = 0,
+
+       /* completion queue entry upon request in sq descriptor */
+       ENA_ADMIN_COMPLETION_POLICY_DESC_ON_DEMAND      = 1,
+
+       /* current queue head pointer is updated in OS memory upon sq
+        * descriptor request
+        */
+       ENA_ADMIN_COMPLETION_POLICY_HEAD_ON_DEMAND      = 2,
+
+       /* current queue head pointer is updated in OS memory for each sq
+        * descriptor
+        */
+       ENA_ADMIN_COMPLETION_POLICY_HEAD                = 3,
+};
+
+/* basic stats return ena_admin_basic_stats while extanded stats return a
+ * buffer (string format) with additional statistics per queue and per
+ * device id
+ */
+enum ena_admin_get_stats_type {
+       ENA_ADMIN_GET_STATS_TYPE_BASIC          = 0,
+
+       ENA_ADMIN_GET_STATS_TYPE_EXTENDED       = 1,
+};
+
+enum ena_admin_get_stats_scope {
+       ENA_ADMIN_SPECIFIC_QUEUE        = 0,
+
+       ENA_ADMIN_ETH_TRAFFIC           = 1,
+};
+
+struct ena_admin_aq_common_desc {
+       /* 11:0 : command_id
+        * 15:12 : reserved12
+        */
+       u16 command_id;
+
+       /* as appears in ena_admin_aq_opcode */
+       u8 opcode;
+
+       /* 0 : phase
+        * 1 : ctrl_data - control buffer address valid
+        * 2 : ctrl_data_indirect - control buffer address
+        *    points to list of pages with addresses of control
+        *    buffers
+        * 7:3 : reserved3
+        */
+       u8 flags;
+};
+
+/* used in ena_admin_aq_entry. Can point directly to control data, or to a
+ * page list chunk. Used also at the end of indirect mode page list chunks,
+ * for chaining.
+ */
+struct ena_admin_ctrl_buff_info {
+       u32 length;
+
+       struct ena_common_mem_addr address;
+};
+
+struct ena_admin_sq {
+       u16 sq_idx;
+
+       /* 4:0 : reserved
+        * 7:5 : sq_direction - 0x1 - Tx; 0x2 - Rx
+        */
+       u8 sq_identity;
+
+       u8 reserved1;
+};
+
+struct ena_admin_aq_entry {
+       struct ena_admin_aq_common_desc aq_common_descriptor;
+
+       union {
+               u32 inline_data_w1[3];
+
+               struct ena_admin_ctrl_buff_info control_buffer;
+       } u;
+
+       u32 inline_data_w4[12];
+};
+
+struct ena_admin_acq_common_desc {
+       /* command identifier to associate it with the aq descriptor
+        * 11:0 : command_id
+        * 15:12 : reserved12
+        */
+       u16 command;
+
+       u8 status;
+
+       /* 0 : phase
+        * 7:1 : reserved1
+        */
+       u8 flags;
+
+       u16 extended_status;
+
+       /* serves as a hint what AQ entries can be revoked */
+       u16 sq_head_indx;
+};
+
+struct ena_admin_acq_entry {
+       struct ena_admin_acq_common_desc acq_common_descriptor;
+
+       u32 response_specific_data[14];
+};
+
+struct ena_admin_aq_create_sq_cmd {
+       struct ena_admin_aq_common_desc aq_common_descriptor;
+
+       /* 4:0 : reserved0_w1
+        * 7:5 : sq_direction - 0x1 - Tx, 0x2 - Rx
+        */
+       u8 sq_identity;
+
+       u8 reserved8_w1;
+
+       /* 3:0 : placement_policy - Describing where the SQ
+        *    descriptor ring and the SQ packet headers reside:
+        *    0x1 - descriptors and headers are in OS memory,
+        *    0x3 - descriptors and headers in device memory
+        *    (a.k.a Low Latency Queue)
+        * 6:4 : completion_policy - Describing what policy
+        *    to use for generation completion entry (cqe) in
+        *    the CQ associated with this SQ: 0x0 - cqe for each
+        *    sq descriptor, 0x1 - cqe upon request in sq
+        *    descriptor, 0x2 - current queue head pointer is
+        *    updated in OS memory upon sq descriptor request
+        *    0x3 - current queue head pointer is updated in OS
+        *    memory for each sq descriptor
+        * 7 : reserved15_w1
+        */
+       u8 sq_caps_2;
+
+       /* 0 : is_physically_contiguous - Described if the
+        *    queue ring memory is allocated in physical
+        *    contiguous pages or split.
+        * 7:1 : reserved17_w1
+        */
+       u8 sq_caps_3;
+
+       /* associated completion queue id. This CQ must be created prior to
+        *    SQ creation
+        */
+       u16 cq_idx;
+
+       /* submission queue depth in entries */
+       u16 sq_depth;
+
+       /* SQ physical base address in OS memory. This field should not be
+        * used for Low Latency queues. Has to be page aligned.
+        */
+       struct ena_common_mem_addr sq_ba;
+
+       /* specifies queue head writeback location in OS memory. Valid if
+        * completion_policy is set to completion_policy_head_on_demand or
+        * completion_policy_head. Has to be cache aligned
+        */
+       struct ena_common_mem_addr sq_head_writeback;
+
+       u32 reserved0_w7;
+
+       u32 reserved0_w8;
+};
+
+enum ena_admin_sq_direction {
+       ENA_ADMIN_SQ_DIRECTION_TX       = 1,
+
+       ENA_ADMIN_SQ_DIRECTION_RX       = 2,
+};
+
+struct ena_admin_acq_create_sq_resp_desc {
+       struct ena_admin_acq_common_desc acq_common_desc;
+
+       u16 sq_idx;
+
+       u16 reserved;
+
+       /* queue doorbell address as an offset to PCIe MMIO REG BAR */
+       u32 sq_doorbell_offset;
+
+       /* low latency queue ring base address as an offset to PCIe MMIO
+        * LLQ_MEM BAR
+        */
+       u32 llq_descriptors_offset;
+
+       /* low latency queue headers' memory as an offset to PCIe MMIO
+        * LLQ_MEM BAR
+        */
+       u32 llq_headers_offset;
+};
+
+struct ena_admin_aq_destroy_sq_cmd {
+       struct ena_admin_aq_common_desc aq_common_descriptor;
+
+       struct ena_admin_sq sq;
+};
+
+struct ena_admin_acq_destroy_sq_resp_desc {
+       struct ena_admin_acq_common_desc acq_common_desc;
+};
+
+struct ena_admin_aq_create_cq_cmd {
+       struct ena_admin_aq_common_desc aq_common_descriptor;
+
+       /* 4:0 : reserved5
+        * 5 : interrupt_mode_enabled - if set, cq operates
+        *    in interrupt mode, otherwise - polling
+        * 7:6 : reserved6
+        */
+       u8 cq_caps_1;
+
+       /* 4:0 : cq_entry_size_words - size of CQ entry in
+        *    32-bit words, valid values: 4, 8.
+        * 7:5 : reserved7
+        */
+       u8 cq_caps_2;
+
+       /* completion queue depth in # of entries. must be power of 2 */
+       u16 cq_depth;
+
+       /* msix vector assigned to this cq */
+       u32 msix_vector;
+
+       /* cq physical base address in OS memory. CQ must be physically
+        * contiguous
+        */
+       struct ena_common_mem_addr cq_ba;
+};
+
+struct ena_admin_acq_create_cq_resp_desc {
+       struct ena_admin_acq_common_desc acq_common_desc;
+
+       u16 cq_idx;
+
+       /* actual cq depth in number of entries */
+       u16 cq_actual_depth;
+
+       u32 numa_node_register_offset;
+
+       u32 cq_head_db_register_offset;
+
+       u32 cq_interrupt_unmask_register_offset;
+};
+
+struct ena_admin_aq_destroy_cq_cmd {
+       struct ena_admin_aq_common_desc aq_common_descriptor;
+
+       u16 cq_idx;
+
+       u16 reserved1;
+};
+
+struct ena_admin_acq_destroy_cq_resp_desc {
+       struct ena_admin_acq_common_desc acq_common_desc;
+};
+
+/* ENA AQ Get Statistics command. Extended statistics are placed in control
+ * buffer pointed by AQ entry
+ */
+struct ena_admin_aq_get_stats_cmd {
+       struct ena_admin_aq_common_desc aq_common_descriptor;
+
+       union {
+               /* command specific inline data */
+               u32 inline_data_w1[3];
+
+               struct ena_admin_ctrl_buff_info control_buffer;
+       } u;
+
+       /* stats type as defined in enum ena_admin_get_stats_type */
+       u8 type;
+
+       /* stats scope defined in enum ena_admin_get_stats_scope */
+       u8 scope;
+
+       u16 reserved3;
+
+       /* queue id. used when scope is specific_queue */
+       u16 queue_idx;
+
+       /* device id, value 0xFFFF means mine. only privileged device can get
+        *    stats of other device
+        */
+       u16 device_id;
+};
+
+/* Basic Statistics Command. */
+struct ena_admin_basic_stats {
+       u32 tx_bytes_low;
+
+       u32 tx_bytes_high;
+
+       u32 tx_pkts_low;
+
+       u32 tx_pkts_high;
+
+       u32 rx_bytes_low;
+
+       u32 rx_bytes_high;
+
+       u32 rx_pkts_low;
+
+       u32 rx_pkts_high;
+
+       u32 rx_drops_low;
+
+       u32 rx_drops_high;
+};
+
+struct ena_admin_acq_get_stats_resp {
+       struct ena_admin_acq_common_desc acq_common_desc;
+
+       struct ena_admin_basic_stats basic_stats;
+};
+
+struct ena_admin_get_set_feature_common_desc {
+       /* 1:0 : select - 0x1 - current value; 0x3 - default
+        *    value
+        * 7:3 : reserved3
+        */
+       u8 flags;
+
+       /* as appears in ena_admin_aq_feature_id */
+       u8 feature_id;
+
+       u16 reserved16;
+};
+
+struct ena_admin_device_attr_feature_desc {
+       u32 impl_id;
+
+       u32 device_version;
+
+       /* bitmap of ena_admin_aq_feature_id */
+       u32 supported_features;
+
+       u32 reserved3;
+
+       /* Indicates how many bits are used physical address access. */
+       u32 phys_addr_width;
+
+       /* Indicates how many bits are used virtual address access. */
+       u32 virt_addr_width;
+
+       /* unicast MAC address (in Network byte order) */
+       u8 mac_addr[6];
+
+       u8 reserved7[2];
+
+       u32 max_mtu;
+};
+
+struct ena_admin_queue_feature_desc {
+       /* including LLQs */
+       u32 max_sq_num;
+
+       u32 max_sq_depth;
+
+       u32 max_cq_num;
+
+       u32 max_cq_depth;
+
+       u32 max_llq_num;
+
+       u32 max_llq_depth;
+
+       u32 max_header_size;
+
+       /* Maximum Descriptors number, including meta descriptor, allowed for
+        *    a single Tx packet
+        */
+       u16 max_packet_tx_descs;
+
+       /* Maximum Descriptors number allowed for a single Rx packet */
+       u16 max_packet_rx_descs;
+};
+
+struct ena_admin_set_feature_mtu_desc {
+       /* exclude L2 */
+       u32 mtu;
+};
+
+struct ena_admin_set_feature_host_attr_desc {
+       /* host OS info base address in OS memory. host info is 4KB of
+        * physically contiguous
+        */
+       struct ena_common_mem_addr os_info_ba;
+
+       /* host debug area base address in OS memory. debug area must be
+        * physically contiguous
+        */
+       struct ena_common_mem_addr debug_ba;
+
+       /* debug area size */
+       u32 debug_area_size;
+};
+
+struct ena_admin_feature_intr_moder_desc {
+       /* interrupt delay granularity in usec */
+       u16 intr_delay_resolution;
+
+       u16 reserved;
+};
+
+struct ena_admin_get_feature_link_desc {
+       /* Link speed in Mb */
+       u32 speed;
+
+       /* bit field of enum ena_admin_link types */
+       u32 supported;
+
+       /* 0 : autoneg
+        * 1 : duplex - Full Duplex
+        * 31:2 : reserved2
+        */
+       u32 flags;
+};
+
+struct ena_admin_feature_aenq_desc {
+       /* bitmask for AENQ groups the device can report */
+       u32 supported_groups;
+
+       /* bitmask for AENQ groups to report */
+       u32 enabled_groups;
+};
+
+struct ena_admin_feature_offload_desc {
+       /* 0 : TX_L3_csum_ipv4
+        * 1 : TX_L4_ipv4_csum_part - The checksum field
+        *    should be initialized with pseudo header checksum
+        * 2 : TX_L4_ipv4_csum_full
+        * 3 : TX_L4_ipv6_csum_part - The checksum field
+        *    should be initialized with pseudo header checksum
+        * 4 : TX_L4_ipv6_csum_full
+        * 5 : tso_ipv4
+        * 6 : tso_ipv6
+        * 7 : tso_ecn
+        */
+       u32 tx;
+
+       /* Receive side supported stateless offload
+        * 0 : RX_L3_csum_ipv4 - IPv4 checksum
+        * 1 : RX_L4_ipv4_csum - TCP/UDP/IPv4 checksum
+        * 2 : RX_L4_ipv6_csum - TCP/UDP/IPv6 checksum
+        * 3 : RX_hash - Hash calculation
+        */
+       u32 rx_supported;
+
+       u32 rx_enabled;
+};
+
+enum ena_admin_hash_functions {
+       ENA_ADMIN_TOEPLITZ      = 1,
+
+       ENA_ADMIN_CRC32         = 2,
+};
+
+struct ena_admin_feature_rss_flow_hash_control {
+       u32 keys_num;
+
+       u32 reserved;
+
+       u32 key[10];
+};
+
+struct ena_admin_feature_rss_flow_hash_function {
+       /* 7:0 : funcs - bitmask of ena_admin_hash_functions */
+       u32 supported_func;
+
+       /* 7:0 : selected_func - bitmask of
+        *    ena_admin_hash_functions
+        */
+       u32 selected_func;
+
+       /* initial value */
+       u32 init_val;
+};
+
+/* RSS flow hash protocols */
+enum ena_admin_flow_hash_proto {
+       ENA_ADMIN_RSS_TCP4      = 0,
+
+       ENA_ADMIN_RSS_UDP4      = 1,
+
+       ENA_ADMIN_RSS_TCP6      = 2,
+
+       ENA_ADMIN_RSS_UDP6      = 3,
+
+       ENA_ADMIN_RSS_IP4       = 4,
+
+       ENA_ADMIN_RSS_IP6       = 5,
+
+       ENA_ADMIN_RSS_IP4_FRAG  = 6,
+
+       ENA_ADMIN_RSS_NOT_IP    = 7,
+
+       ENA_ADMIN_RSS_PROTO_NUM = 16,
+};
+
+/* RSS flow hash fields */
+enum ena_admin_flow_hash_fields {
+       /* Ethernet Dest Addr */
+       ENA_ADMIN_RSS_L2_DA     = 0,
+
+       /* Ethernet Src Addr */
+       ENA_ADMIN_RSS_L2_SA     = 1,
+
+       /* ipv4/6 Dest Addr */
+       ENA_ADMIN_RSS_L3_DA     = 2,
+
+       /* ipv4/6 Src Addr */
+       ENA_ADMIN_RSS_L3_SA     = 5,
+
+       /* tcp/udp Dest Port */
+       ENA_ADMIN_RSS_L4_DP     = 6,
+
+       /* tcp/udp Src Port */
+       ENA_ADMIN_RSS_L4_SP     = 7,
+};
+
+struct ena_admin_proto_input {
+       /* flow hash fields (bitwise according to ena_admin_flow_hash_fields) */
+       u16 fields;
+
+       u16 reserved2;
+};
+
+struct ena_admin_feature_rss_hash_control {
+       struct ena_admin_proto_input supported_fields[ENA_ADMIN_RSS_PROTO_NUM];
+
+       struct ena_admin_proto_input selected_fields[ENA_ADMIN_RSS_PROTO_NUM];
+
+       struct ena_admin_proto_input reserved2[ENA_ADMIN_RSS_PROTO_NUM];
+
+       struct ena_admin_proto_input reserved3[ENA_ADMIN_RSS_PROTO_NUM];
+};
+
+struct ena_admin_feature_rss_flow_hash_input {
+       /* supported hash input sorting
+        * 1 : L3_sort - support swap L3 addresses if DA is
+        *    smaller than SA
+        * 2 : L4_sort - support swap L4 ports if DP smaller
+        *    SP
+        */
+       u16 supported_input_sort;
+
+       /* enabled hash input sorting
+        * 1 : enable_L3_sort - enable swap L3 addresses if
+        *    DA smaller than SA
+        * 2 : enable_L4_sort - enable swap L4 ports if DP
+        *    smaller than SP
+        */
+       u16 enabled_input_sort;
+};
+
+enum ena_admin_os_type {
+       ENA_ADMIN_OS_LINUX      = 1,
+
+       ENA_ADMIN_OS_WIN        = 2,
+
+       ENA_ADMIN_OS_DPDK       = 3,
+
+       ENA_ADMIN_OS_FREEBSD    = 4,
+
+       ENA_ADMIN_OS_IPXE       = 5,
+};
+
+struct ena_admin_host_info {
+       /* defined in enum ena_admin_os_type */
+       u32 os_type;
+
+       /* os distribution string format */
+       u8 os_dist_str[128];
+
+       /* OS distribution numeric format */
+       u32 os_dist;
+
+       /* kernel version string format */
+       u8 kernel_ver_str[32];
+
+       /* Kernel version numeric format */
+       u32 kernel_ver;
+
+       /* 7:0 : major
+        * 15:8 : minor
+        * 23:16 : sub_minor
+        */
+       u32 driver_version;
+
+       /* features bitmap */
+       u32 supported_network_features[4];
+};
+
+struct ena_admin_rss_ind_table_entry {
+       u16 cq_idx;
+
+       u16 reserved;
+};
+
+struct ena_admin_feature_rss_ind_table {
+       /* min supported table size (2^min_size) */
+       u16 min_size;
+
+       /* max supported table size (2^max_size) */
+       u16 max_size;
+
+       /* table size (2^size) */
+       u16 size;
+
+       u16 reserved;
+
+       /* index of the inline entry. 0xFFFFFFFF means invalid */
+       u32 inline_index;
+
+       /* used for updating single entry, ignored when setting the entire
+        * table through the control buffer.
+        */
+       struct ena_admin_rss_ind_table_entry inline_entry;
+};
+
+struct ena_admin_get_feat_cmd {
+       struct ena_admin_aq_common_desc aq_common_descriptor;
+
+       struct ena_admin_ctrl_buff_info control_buffer;
+
+       struct ena_admin_get_set_feature_common_desc feat_common;
+
+       u32 raw[11];
+};
+
+struct ena_admin_get_feat_resp {
+       struct ena_admin_acq_common_desc acq_common_desc;
+
+       union {
+               u32 raw[14];
+
+               struct ena_admin_device_attr_feature_desc dev_attr;
+
+               struct ena_admin_queue_feature_desc max_queue;
+
+               struct ena_admin_feature_aenq_desc aenq;
+
+               struct ena_admin_get_feature_link_desc link;
+
+               struct ena_admin_feature_offload_desc offload;
+
+               struct ena_admin_feature_rss_flow_hash_function flow_hash_func;
+
+               struct ena_admin_feature_rss_flow_hash_input flow_hash_input;
+
+               struct ena_admin_feature_rss_ind_table ind_table;
+
+               struct ena_admin_feature_intr_moder_desc intr_moderation;
+       } u;
+};
+
+struct ena_admin_set_feat_cmd {
+       struct ena_admin_aq_common_desc aq_common_descriptor;
+
+       struct ena_admin_ctrl_buff_info control_buffer;
+
+       struct ena_admin_get_set_feature_common_desc feat_common;
+
+       union {
+               u32 raw[11];
+
+               /* mtu size */
+               struct ena_admin_set_feature_mtu_desc mtu;
+
+               /* host attributes */
+               struct ena_admin_set_feature_host_attr_desc host_attr;
+
+               /* AENQ configuration */
+               struct ena_admin_feature_aenq_desc aenq;
+
+               /* rss flow hash function */
+               struct ena_admin_feature_rss_flow_hash_function flow_hash_func;
+
+               /* rss flow hash input */
+               struct ena_admin_feature_rss_flow_hash_input flow_hash_input;
+
+               /* rss indirection table */
+               struct ena_admin_feature_rss_ind_table ind_table;
+       } u;
+};
+
+struct ena_admin_set_feat_resp {
+       struct ena_admin_acq_common_desc acq_common_desc;
+
+       union {
+               u32 raw[14];
+       } u;
+};
+
+struct ena_admin_aenq_common_desc {
+       u16 group;
+
+       u16 syndrom;
+
+       /* 0 : phase */
+       u8 flags;
+
+       u8 reserved1[3];
+
+       u32 timestamp_low;
+
+       u32 timestamp_high;
+};
+
+/* asynchronous event notification groups */
+enum ena_admin_aenq_group {
+       ENA_ADMIN_LINK_CHANGE           = 0,
+
+       ENA_ADMIN_FATAL_ERROR           = 1,
+
+       ENA_ADMIN_WARNING               = 2,
+
+       ENA_ADMIN_NOTIFICATION          = 3,
+
+       ENA_ADMIN_KEEP_ALIVE            = 4,
+
+       ENA_ADMIN_AENQ_GROUPS_NUM       = 5,
+};
+
+enum ena_admin_aenq_notification_syndrom {
+       ENA_ADMIN_SUSPEND       = 0,
+
+       ENA_ADMIN_RESUME        = 1,
+};
+
+struct ena_admin_aenq_entry {
+       struct ena_admin_aenq_common_desc aenq_common_desc;
+
+       /* command specific inline data */
+       u32 inline_data_w4[12];
+};
+
+struct ena_admin_aenq_link_change_desc {
+       struct ena_admin_aenq_common_desc aenq_common_desc;
+
+       /* 0 : link_status */
+       u32 flags;
+};
+
+struct ena_admin_ena_mmio_req_read_less_resp {
+       u16 req_id;
+
+       u16 reg_off;
+
+       /* value is valid when poll is cleared */
+       u32 reg_val;
+};
+
+/* aq_common_desc */
+#define ENA_ADMIN_AQ_COMMON_DESC_COMMAND_ID_MASK GENMASK(11, 0)
+#define ENA_ADMIN_AQ_COMMON_DESC_PHASE_MASK BIT(0)
+#define ENA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_SHIFT 1
+#define ENA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_MASK BIT(1)
+#define ENA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_INDIRECT_SHIFT 2
+#define ENA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_INDIRECT_MASK BIT(2)
+
+/* sq */
+#define ENA_ADMIN_SQ_SQ_DIRECTION_SHIFT 5
+#define ENA_ADMIN_SQ_SQ_DIRECTION_MASK GENMASK(7, 5)
+
+/* acq_common_desc */
+#define ENA_ADMIN_ACQ_COMMON_DESC_COMMAND_ID_MASK GENMASK(11, 0)
+#define ENA_ADMIN_ACQ_COMMON_DESC_PHASE_MASK BIT(0)
+
+/* aq_create_sq_cmd */
+#define ENA_ADMIN_AQ_CREATE_SQ_CMD_SQ_DIRECTION_SHIFT 5
+#define ENA_ADMIN_AQ_CREATE_SQ_CMD_SQ_DIRECTION_MASK GENMASK(7, 5)
+#define ENA_ADMIN_AQ_CREATE_SQ_CMD_PLACEMENT_POLICY_MASK GENMASK(3, 0)
+#define ENA_ADMIN_AQ_CREATE_SQ_CMD_COMPLETION_POLICY_SHIFT 4
+#define ENA_ADMIN_AQ_CREATE_SQ_CMD_COMPLETION_POLICY_MASK GENMASK(6, 4)
+#define ENA_ADMIN_AQ_CREATE_SQ_CMD_IS_PHYSICALLY_CONTIGUOUS_MASK BIT(0)
+
+/* aq_create_cq_cmd */
+#define ENA_ADMIN_AQ_CREATE_CQ_CMD_INTERRUPT_MODE_ENABLED_SHIFT 5
+#define ENA_ADMIN_AQ_CREATE_CQ_CMD_INTERRUPT_MODE_ENABLED_MASK BIT(5)
+#define ENA_ADMIN_AQ_CREATE_CQ_CMD_CQ_ENTRY_SIZE_WORDS_MASK GENMASK(4, 0)
+
+/* get_set_feature_common_desc */
+#define ENA_ADMIN_GET_SET_FEATURE_COMMON_DESC_SELECT_MASK GENMASK(1, 0)
+
+/* get_feature_link_desc */
+#define ENA_ADMIN_GET_FEATURE_LINK_DESC_AUTONEG_MASK BIT(0)
+#define ENA_ADMIN_GET_FEATURE_LINK_DESC_DUPLEX_SHIFT 1
+#define ENA_ADMIN_GET_FEATURE_LINK_DESC_DUPLEX_MASK BIT(1)
+
+/* feature_offload_desc */
+#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L3_CSUM_IPV4_MASK BIT(0)
+#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_PART_SHIFT 1
+#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_PART_MASK BIT(1)
+#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_FULL_SHIFT 2
+#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_FULL_MASK BIT(2)
+#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV6_CSUM_PART_SHIFT 3
+#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV6_CSUM_PART_MASK BIT(3)
+#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV6_CSUM_FULL_SHIFT 4
+#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV6_CSUM_FULL_MASK BIT(4)
+#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_IPV4_SHIFT 5
+#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_IPV4_MASK BIT(5)
+#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_IPV6_SHIFT 6
+#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_IPV6_MASK BIT(6)
+#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_ECN_SHIFT 7
+#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_ECN_MASK BIT(7)
+#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L3_CSUM_IPV4_MASK BIT(0)
+#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV4_CSUM_SHIFT 1
+#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV4_CSUM_MASK BIT(1)
+#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV6_CSUM_SHIFT 2
+#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV6_CSUM_MASK BIT(2)
+#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_HASH_SHIFT 3
+#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_HASH_MASK BIT(3)
+
+/* feature_rss_flow_hash_function */
+#define ENA_ADMIN_FEATURE_RSS_FLOW_HASH_FUNCTION_FUNCS_MASK GENMASK(7, 0)
+#define ENA_ADMIN_FEATURE_RSS_FLOW_HASH_FUNCTION_SELECTED_FUNC_MASK GENMASK(7, 0)
+
+/* feature_rss_flow_hash_input */
+#define ENA_ADMIN_FEATURE_RSS_FLOW_HASH_INPUT_L3_SORT_SHIFT 1
+#define ENA_ADMIN_FEATURE_RSS_FLOW_HASH_INPUT_L3_SORT_MASK BIT(1)
+#define ENA_ADMIN_FEATURE_RSS_FLOW_HASH_INPUT_L4_SORT_SHIFT 2
+#define ENA_ADMIN_FEATURE_RSS_FLOW_HASH_INPUT_L4_SORT_MASK BIT(2)
+#define ENA_ADMIN_FEATURE_RSS_FLOW_HASH_INPUT_ENABLE_L3_SORT_SHIFT 1
+#define ENA_ADMIN_FEATURE_RSS_FLOW_HASH_INPUT_ENABLE_L3_SORT_MASK BIT(1)
+#define ENA_ADMIN_FEATURE_RSS_FLOW_HASH_INPUT_ENABLE_L4_SORT_SHIFT 2
+#define ENA_ADMIN_FEATURE_RSS_FLOW_HASH_INPUT_ENABLE_L4_SORT_MASK BIT(2)
+
+/* host_info */
+#define ENA_ADMIN_HOST_INFO_MAJOR_MASK GENMASK(7, 0)
+#define ENA_ADMIN_HOST_INFO_MINOR_SHIFT 8
+#define ENA_ADMIN_HOST_INFO_MINOR_MASK GENMASK(15, 8)
+#define ENA_ADMIN_HOST_INFO_SUB_MINOR_SHIFT 16
+#define ENA_ADMIN_HOST_INFO_SUB_MINOR_MASK GENMASK(23, 16)
+
+/* aenq_common_desc */
+#define ENA_ADMIN_AENQ_COMMON_DESC_PHASE_MASK BIT(0)
+
+/* aenq_link_change_desc */
+#define ENA_ADMIN_AENQ_LINK_CHANGE_DESC_LINK_STATUS_MASK BIT(0)
+
+#endif /*_ENA_ADMIN_H_ */
diff --git a/drivers/net/ethernet/amazon/ena/ena_com.c b/drivers/net/ethernet/amazon/ena/ena_com.c
new file mode 100644 (file)
index 0000000..3066d9c
--- /dev/null
@@ -0,0 +1,2666 @@
+/*
+ * Copyright 2015 Amazon.com, Inc. or its affiliates.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "ena_com.h"
+
+/*****************************************************************************/
+/*****************************************************************************/
+
+/* Timeout in micro-sec */
+#define ADMIN_CMD_TIMEOUT_US (1000000)
+
+#define ENA_ASYNC_QUEUE_DEPTH 4
+#define ENA_ADMIN_QUEUE_DEPTH 32
+
+#define MIN_ENA_VER (((ENA_COMMON_SPEC_VERSION_MAJOR) << \
+               ENA_REGS_VERSION_MAJOR_VERSION_SHIFT) \
+               | (ENA_COMMON_SPEC_VERSION_MINOR))
+
+#define ENA_CTRL_MAJOR         0
+#define ENA_CTRL_MINOR         0
+#define ENA_CTRL_SUB_MINOR     1
+
+#define MIN_ENA_CTRL_VER \
+       (((ENA_CTRL_MAJOR) << \
+       (ENA_REGS_CONTROLLER_VERSION_MAJOR_VERSION_SHIFT)) | \
+       ((ENA_CTRL_MINOR) << \
+       (ENA_REGS_CONTROLLER_VERSION_MINOR_VERSION_SHIFT)) | \
+       (ENA_CTRL_SUB_MINOR))
+
+#define ENA_DMA_ADDR_TO_UINT32_LOW(x)  ((u32)((u64)(x)))
+#define ENA_DMA_ADDR_TO_UINT32_HIGH(x) ((u32)(((u64)(x)) >> 32))
+
+#define ENA_MMIO_READ_TIMEOUT 0xFFFFFFFF
+
+/*****************************************************************************/
+/*****************************************************************************/
+/*****************************************************************************/
+
+enum ena_cmd_status {
+       ENA_CMD_SUBMITTED,
+       ENA_CMD_COMPLETED,
+       /* Abort - canceled by the driver */
+       ENA_CMD_ABORTED,
+};
+
+struct ena_comp_ctx {
+       struct completion wait_event;
+       struct ena_admin_acq_entry *user_cqe;
+       u32 comp_size;
+       enum ena_cmd_status status;
+       /* status from the device */
+       u8 comp_status;
+       u8 cmd_opcode;
+       bool occupied;
+};
+
+struct ena_com_stats_ctx {
+       struct ena_admin_aq_get_stats_cmd get_cmd;
+       struct ena_admin_acq_get_stats_resp get_resp;
+};
+
+static inline int ena_com_mem_addr_set(struct ena_com_dev *ena_dev,
+                                      struct ena_common_mem_addr *ena_addr,
+                                      dma_addr_t addr)
+{
+       if ((addr & GENMASK_ULL(ena_dev->dma_addr_bits - 1, 0)) != addr) {
+               pr_err("dma address has more bits that the device supports\n");
+               return -EINVAL;
+       }
+
+       ena_addr->mem_addr_low = (u32)addr;
+       ena_addr->mem_addr_high = (u64)addr >> 32;
+
+       return 0;
+}
+
+static int ena_com_admin_init_sq(struct ena_com_admin_queue *queue)
+{
+       struct ena_com_admin_sq *sq = &queue->sq;
+       u16 size = ADMIN_SQ_SIZE(queue->q_depth);
+
+       sq->entries = dma_zalloc_coherent(queue->q_dmadev, size, &sq->dma_addr,
+                                         GFP_KERNEL);
+
+       if (!sq->entries) {
+               pr_err("memory allocation failed");
+               return -ENOMEM;
+       }
+
+       sq->head = 0;
+       sq->tail = 0;
+       sq->phase = 1;
+
+       sq->db_addr = NULL;
+
+       return 0;
+}
+
+static int ena_com_admin_init_cq(struct ena_com_admin_queue *queue)
+{
+       struct ena_com_admin_cq *cq = &queue->cq;
+       u16 size = ADMIN_CQ_SIZE(queue->q_depth);
+
+       cq->entries = dma_zalloc_coherent(queue->q_dmadev, size, &cq->dma_addr,
+                                         GFP_KERNEL);
+
+       if (!cq->entries) {
+               pr_err("memory allocation failed");
+               return -ENOMEM;
+       }
+
+       cq->head = 0;
+       cq->phase = 1;
+
+       return 0;
+}
+
+static int ena_com_admin_init_aenq(struct ena_com_dev *dev,
+                                  struct ena_aenq_handlers *aenq_handlers)
+{
+       struct ena_com_aenq *aenq = &dev->aenq;
+       u32 addr_low, addr_high, aenq_caps;
+       u16 size;
+
+       dev->aenq.q_depth = ENA_ASYNC_QUEUE_DEPTH;
+       size = ADMIN_AENQ_SIZE(ENA_ASYNC_QUEUE_DEPTH);
+       aenq->entries = dma_zalloc_coherent(dev->dmadev, size, &aenq->dma_addr,
+                                           GFP_KERNEL);
+
+       if (!aenq->entries) {
+               pr_err("memory allocation failed");
+               return -ENOMEM;
+       }
+
+       aenq->head = aenq->q_depth;
+       aenq->phase = 1;
+
+       addr_low = ENA_DMA_ADDR_TO_UINT32_LOW(aenq->dma_addr);
+       addr_high = ENA_DMA_ADDR_TO_UINT32_HIGH(aenq->dma_addr);
+
+       writel(addr_low, dev->reg_bar + ENA_REGS_AENQ_BASE_LO_OFF);
+       writel(addr_high, dev->reg_bar + ENA_REGS_AENQ_BASE_HI_OFF);
+
+       aenq_caps = 0;
+       aenq_caps |= dev->aenq.q_depth & ENA_REGS_AENQ_CAPS_AENQ_DEPTH_MASK;
+       aenq_caps |= (sizeof(struct ena_admin_aenq_entry)
+                     << ENA_REGS_AENQ_CAPS_AENQ_ENTRY_SIZE_SHIFT) &
+                    ENA_REGS_AENQ_CAPS_AENQ_ENTRY_SIZE_MASK;
+       writel(aenq_caps, dev->reg_bar + ENA_REGS_AENQ_CAPS_OFF);
+
+       if (unlikely(!aenq_handlers)) {
+               pr_err("aenq handlers pointer is NULL\n");
+               return -EINVAL;
+       }
+
+       aenq->aenq_handlers = aenq_handlers;
+
+       return 0;
+}
+
+static inline void comp_ctxt_release(struct ena_com_admin_queue *queue,
+                                    struct ena_comp_ctx *comp_ctx)
+{
+       comp_ctx->occupied = false;
+       atomic_dec(&queue->outstanding_cmds);
+}
+
+static struct ena_comp_ctx *get_comp_ctxt(struct ena_com_admin_queue *queue,
+                                         u16 command_id, bool capture)
+{
+       if (unlikely(command_id >= queue->q_depth)) {
+               pr_err("command id is larger than the queue size. cmd_id: %u queue size %d\n",
+                      command_id, queue->q_depth);
+               return NULL;
+       }
+
+       if (unlikely(queue->comp_ctx[command_id].occupied && capture)) {
+               pr_err("Completion context is occupied\n");
+               return NULL;
+       }
+
+       if (capture) {
+               atomic_inc(&queue->outstanding_cmds);
+               queue->comp_ctx[command_id].occupied = true;
+       }
+
+       return &queue->comp_ctx[command_id];
+}
+
+static struct ena_comp_ctx *__ena_com_submit_admin_cmd(struct ena_com_admin_queue *admin_queue,
+                                                      struct ena_admin_aq_entry *cmd,
+                                                      size_t cmd_size_in_bytes,
+                                                      struct ena_admin_acq_entry *comp,
+                                                      size_t comp_size_in_bytes)
+{
+       struct ena_comp_ctx *comp_ctx;
+       u16 tail_masked, cmd_id;
+       u16 queue_size_mask;
+       u16 cnt;
+
+       queue_size_mask = admin_queue->q_depth - 1;
+
+       tail_masked = admin_queue->sq.tail & queue_size_mask;
+
+       /* In case of queue FULL */
+       cnt = admin_queue->sq.tail - admin_queue->sq.head;
+       if (cnt >= admin_queue->q_depth) {
+               pr_debug("admin queue is FULL (tail %d head %d depth: %d)\n",
+                        admin_queue->sq.tail, admin_queue->sq.head,
+                        admin_queue->q_depth);
+               admin_queue->stats.out_of_space++;
+               return ERR_PTR(-ENOSPC);
+       }
+
+       cmd_id = admin_queue->curr_cmd_id;
+
+       cmd->aq_common_descriptor.flags |= admin_queue->sq.phase &
+               ENA_ADMIN_AQ_COMMON_DESC_PHASE_MASK;
+
+       cmd->aq_common_descriptor.command_id |= cmd_id &
+               ENA_ADMIN_AQ_COMMON_DESC_COMMAND_ID_MASK;
+
+       comp_ctx = get_comp_ctxt(admin_queue, cmd_id, true);
+       if (unlikely(!comp_ctx))
+               return ERR_PTR(-EINVAL);
+
+       comp_ctx->status = ENA_CMD_SUBMITTED;
+       comp_ctx->comp_size = (u32)comp_size_in_bytes;
+       comp_ctx->user_cqe = comp;
+       comp_ctx->cmd_opcode = cmd->aq_common_descriptor.opcode;
+
+       reinit_completion(&comp_ctx->wait_event);
+
+       memcpy(&admin_queue->sq.entries[tail_masked], cmd, cmd_size_in_bytes);
+
+       admin_queue->curr_cmd_id = (admin_queue->curr_cmd_id + 1) &
+               queue_size_mask;
+
+       admin_queue->sq.tail++;
+       admin_queue->stats.submitted_cmd++;
+
+       if (unlikely((admin_queue->sq.tail & queue_size_mask) == 0))
+               admin_queue->sq.phase = !admin_queue->sq.phase;
+
+       writel(admin_queue->sq.tail, admin_queue->sq.db_addr);
+
+       return comp_ctx;
+}
+
+static inline int ena_com_init_comp_ctxt(struct ena_com_admin_queue *queue)
+{
+       size_t size = queue->q_depth * sizeof(struct ena_comp_ctx);
+       struct ena_comp_ctx *comp_ctx;
+       u16 i;
+
+       queue->comp_ctx = devm_kzalloc(queue->q_dmadev, size, GFP_KERNEL);
+       if (unlikely(!queue->comp_ctx)) {
+               pr_err("memory allocation failed");
+               return -ENOMEM;
+       }
+
+       for (i = 0; i < queue->q_depth; i++) {
+               comp_ctx = get_comp_ctxt(queue, i, false);
+               if (comp_ctx)
+                       init_completion(&comp_ctx->wait_event);
+       }
+
+       return 0;
+}
+
+static struct ena_comp_ctx *ena_com_submit_admin_cmd(struct ena_com_admin_queue *admin_queue,
+                                                    struct ena_admin_aq_entry *cmd,
+                                                    size_t cmd_size_in_bytes,
+                                                    struct ena_admin_acq_entry *comp,
+                                                    size_t comp_size_in_bytes)
+{
+       unsigned long flags;
+       struct ena_comp_ctx *comp_ctx;
+
+       spin_lock_irqsave(&admin_queue->q_lock, flags);
+       if (unlikely(!admin_queue->running_state)) {
+               spin_unlock_irqrestore(&admin_queue->q_lock, flags);
+               return ERR_PTR(-ENODEV);
+       }
+       comp_ctx = __ena_com_submit_admin_cmd(admin_queue, cmd,
+                                             cmd_size_in_bytes,
+                                             comp,
+                                             comp_size_in_bytes);
+       if (unlikely(IS_ERR(comp_ctx)))
+               admin_queue->running_state = false;
+       spin_unlock_irqrestore(&admin_queue->q_lock, flags);
+
+       return comp_ctx;
+}
+
+static int ena_com_init_io_sq(struct ena_com_dev *ena_dev,
+                             struct ena_com_create_io_ctx *ctx,
+                             struct ena_com_io_sq *io_sq)
+{
+       size_t size;
+       int dev_node = 0;
+
+       memset(&io_sq->desc_addr, 0x0, sizeof(struct ena_com_io_desc_addr));
+
+       io_sq->desc_entry_size =
+               (io_sq->direction == ENA_COM_IO_QUEUE_DIRECTION_TX) ?
+               sizeof(struct ena_eth_io_tx_desc) :
+               sizeof(struct ena_eth_io_rx_desc);
+
+       size = io_sq->desc_entry_size * io_sq->q_depth;
+
+       if (io_sq->mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_HOST) {
+               dev_node = dev_to_node(ena_dev->dmadev);
+               set_dev_node(ena_dev->dmadev, ctx->numa_node);
+               io_sq->desc_addr.virt_addr =
+                       dma_zalloc_coherent(ena_dev->dmadev, size,
+                                           &io_sq->desc_addr.phys_addr,
+                                           GFP_KERNEL);
+               set_dev_node(ena_dev->dmadev, dev_node);
+               if (!io_sq->desc_addr.virt_addr) {
+                       io_sq->desc_addr.virt_addr =
+                               dma_zalloc_coherent(ena_dev->dmadev, size,
+                                                   &io_sq->desc_addr.phys_addr,
+                                                   GFP_KERNEL);
+               }
+       } else {
+               dev_node = dev_to_node(ena_dev->dmadev);
+               set_dev_node(ena_dev->dmadev, ctx->numa_node);
+               io_sq->desc_addr.virt_addr =
+                       devm_kzalloc(ena_dev->dmadev, size, GFP_KERNEL);
+               set_dev_node(ena_dev->dmadev, dev_node);
+               if (!io_sq->desc_addr.virt_addr) {
+                       io_sq->desc_addr.virt_addr =
+                               devm_kzalloc(ena_dev->dmadev, size, GFP_KERNEL);
+               }
+       }
+
+       if (!io_sq->desc_addr.virt_addr) {
+               pr_err("memory allocation failed");
+               return -ENOMEM;
+       }
+
+       io_sq->tail = 0;
+       io_sq->next_to_comp = 0;
+       io_sq->phase = 1;
+
+       return 0;
+}
+
+static int ena_com_init_io_cq(struct ena_com_dev *ena_dev,
+                             struct ena_com_create_io_ctx *ctx,
+                             struct ena_com_io_cq *io_cq)
+{
+       size_t size;
+       int prev_node = 0;
+
+       memset(&io_cq->cdesc_addr, 0x0, sizeof(struct ena_com_io_desc_addr));
+
+       /* Use the basic completion descriptor for Rx */
+       io_cq->cdesc_entry_size_in_bytes =
+               (io_cq->direction == ENA_COM_IO_QUEUE_DIRECTION_TX) ?
+               sizeof(struct ena_eth_io_tx_cdesc) :
+               sizeof(struct ena_eth_io_rx_cdesc_base);
+
+       size = io_cq->cdesc_entry_size_in_bytes * io_cq->q_depth;
+
+       prev_node = dev_to_node(ena_dev->dmadev);
+       set_dev_node(ena_dev->dmadev, ctx->numa_node);
+       io_cq->cdesc_addr.virt_addr =
+               dma_zalloc_coherent(ena_dev->dmadev, size,
+                                   &io_cq->cdesc_addr.phys_addr, GFP_KERNEL);
+       set_dev_node(ena_dev->dmadev, prev_node);
+       if (!io_cq->cdesc_addr.virt_addr) {
+               io_cq->cdesc_addr.virt_addr =
+                       dma_zalloc_coherent(ena_dev->dmadev, size,
+                                           &io_cq->cdesc_addr.phys_addr,
+                                           GFP_KERNEL);
+       }
+
+       if (!io_cq->cdesc_addr.virt_addr) {
+               pr_err("memory allocation failed");
+               return -ENOMEM;
+       }
+
+       io_cq->phase = 1;
+       io_cq->head = 0;
+
+       return 0;
+}
+
+static void ena_com_handle_single_admin_completion(struct ena_com_admin_queue *admin_queue,
+                                                  struct ena_admin_acq_entry *cqe)
+{
+       struct ena_comp_ctx *comp_ctx;
+       u16 cmd_id;
+
+       cmd_id = cqe->acq_common_descriptor.command &
+               ENA_ADMIN_ACQ_COMMON_DESC_COMMAND_ID_MASK;
+
+       comp_ctx = get_comp_ctxt(admin_queue, cmd_id, false);
+       if (unlikely(!comp_ctx)) {
+               pr_err("comp_ctx is NULL. Changing the admin queue running state\n");
+               admin_queue->running_state = false;
+               return;
+       }
+
+       comp_ctx->status = ENA_CMD_COMPLETED;
+       comp_ctx->comp_status = cqe->acq_common_descriptor.status;
+
+       if (comp_ctx->user_cqe)
+               memcpy(comp_ctx->user_cqe, (void *)cqe, comp_ctx->comp_size);
+
+       if (!admin_queue->polling)
+               complete(&comp_ctx->wait_event);
+}
+
+static void ena_com_handle_admin_completion(struct ena_com_admin_queue *admin_queue)
+{
+       struct ena_admin_acq_entry *cqe = NULL;
+       u16 comp_num = 0;
+       u16 head_masked;
+       u8 phase;
+
+       head_masked = admin_queue->cq.head & (admin_queue->q_depth - 1);
+       phase = admin_queue->cq.phase;
+
+       cqe = &admin_queue->cq.entries[head_masked];
+
+       /* Go over all the completions */
+       while ((cqe->acq_common_descriptor.flags &
+                       ENA_ADMIN_ACQ_COMMON_DESC_PHASE_MASK) == phase) {
+               /* Do not read the rest of the completion entry before the
+                * phase bit was validated
+                */
+               rmb();
+               ena_com_handle_single_admin_completion(admin_queue, cqe);
+
+               head_masked++;
+               comp_num++;
+               if (unlikely(head_masked == admin_queue->q_depth)) {
+                       head_masked = 0;
+                       phase = !phase;
+               }
+
+               cqe = &admin_queue->cq.entries[head_masked];
+       }
+
+       admin_queue->cq.head += comp_num;
+       admin_queue->cq.phase = phase;
+       admin_queue->sq.head += comp_num;
+       admin_queue->stats.completed_cmd += comp_num;
+}
+
+static int ena_com_comp_status_to_errno(u8 comp_status)
+{
+       if (unlikely(comp_status != 0))
+               pr_err("admin command failed[%u]\n", comp_status);
+
+       if (unlikely(comp_status > ENA_ADMIN_UNKNOWN_ERROR))
+               return -EINVAL;
+
+       switch (comp_status) {
+       case ENA_ADMIN_SUCCESS:
+               return 0;
+       case ENA_ADMIN_RESOURCE_ALLOCATION_FAILURE:
+               return -ENOMEM;
+       case ENA_ADMIN_UNSUPPORTED_OPCODE:
+               return -EPERM;
+       case ENA_ADMIN_BAD_OPCODE:
+       case ENA_ADMIN_MALFORMED_REQUEST:
+       case ENA_ADMIN_ILLEGAL_PARAMETER:
+       case ENA_ADMIN_UNKNOWN_ERROR:
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+static int ena_com_wait_and_process_admin_cq_polling(struct ena_comp_ctx *comp_ctx,
+                                                    struct ena_com_admin_queue *admin_queue)
+{
+       unsigned long flags;
+       u32 start_time;
+       int ret;
+
+       start_time = ((u32)jiffies_to_usecs(jiffies));
+
+       while (comp_ctx->status == ENA_CMD_SUBMITTED) {
+               if ((((u32)jiffies_to_usecs(jiffies)) - start_time) >
+                   ADMIN_CMD_TIMEOUT_US) {
+                       pr_err("Wait for completion (polling) timeout\n");
+                       /* ENA didn't have any completion */
+                       spin_lock_irqsave(&admin_queue->q_lock, flags);
+                       admin_queue->stats.no_completion++;
+                       admin_queue->running_state = false;
+                       spin_unlock_irqrestore(&admin_queue->q_lock, flags);
+
+                       ret = -ETIME;
+                       goto err;
+               }
+
+               spin_lock_irqsave(&admin_queue->q_lock, flags);
+               ena_com_handle_admin_completion(admin_queue);
+               spin_unlock_irqrestore(&admin_queue->q_lock, flags);
+
+               msleep(100);
+       }
+
+       if (unlikely(comp_ctx->status == ENA_CMD_ABORTED)) {
+               pr_err("Command was aborted\n");
+               spin_lock_irqsave(&admin_queue->q_lock, flags);
+               admin_queue->stats.aborted_cmd++;
+               spin_unlock_irqrestore(&admin_queue->q_lock, flags);
+               ret = -ENODEV;
+               goto err;
+       }
+
+       WARN(comp_ctx->status != ENA_CMD_COMPLETED, "Invalid comp status %d\n",
+            comp_ctx->status);
+
+       ret = ena_com_comp_status_to_errno(comp_ctx->comp_status);
+err:
+       comp_ctxt_release(admin_queue, comp_ctx);
+       return ret;
+}
+
+static int ena_com_wait_and_process_admin_cq_interrupts(struct ena_comp_ctx *comp_ctx,
+                                                       struct ena_com_admin_queue *admin_queue)
+{
+       unsigned long flags;
+       int ret;
+
+       wait_for_completion_timeout(&comp_ctx->wait_event,
+                                   usecs_to_jiffies(ADMIN_CMD_TIMEOUT_US));
+
+       /* In case the command wasn't completed find out the root cause.
+        * There might be 2 kinds of errors
+        * 1) No completion (timeout reached)
+        * 2) There is completion but the device didn't get any msi-x interrupt.
+        */
+       if (unlikely(comp_ctx->status == ENA_CMD_SUBMITTED)) {
+               spin_lock_irqsave(&admin_queue->q_lock, flags);
+               ena_com_handle_admin_completion(admin_queue);
+               admin_queue->stats.no_completion++;
+               spin_unlock_irqrestore(&admin_queue->q_lock, flags);
+
+               if (comp_ctx->status == ENA_CMD_COMPLETED)
+                       pr_err("The ena device have completion but the driver didn't receive any MSI-X interrupt (cmd %d)\n",
+                              comp_ctx->cmd_opcode);
+               else
+                       pr_err("The ena device doesn't send any completion for the admin cmd %d status %d\n",
+                              comp_ctx->cmd_opcode, comp_ctx->status);
+
+               admin_queue->running_state = false;
+               ret = -ETIME;
+               goto err;
+       }
+
+       ret = ena_com_comp_status_to_errno(comp_ctx->comp_status);
+err:
+       comp_ctxt_release(admin_queue, comp_ctx);
+       return ret;
+}
+
+/* This method read the hardware device register through posting writes
+ * and waiting for response
+ * On timeout the function will return ENA_MMIO_READ_TIMEOUT
+ */
+static u32 ena_com_reg_bar_read32(struct ena_com_dev *ena_dev, u16 offset)
+{
+       struct ena_com_mmio_read *mmio_read = &ena_dev->mmio_read;
+       volatile struct ena_admin_ena_mmio_req_read_less_resp *read_resp =
+               mmio_read->read_resp;
+       u32 mmio_read_reg, ret;
+       unsigned long flags;
+       int i;
+
+       might_sleep();
+
+       /* If readless is disabled, perform regular read */
+       if (!mmio_read->readless_supported)
+               return readl(ena_dev->reg_bar + offset);
+
+       spin_lock_irqsave(&mmio_read->lock, flags);
+       mmio_read->seq_num++;
+
+       read_resp->req_id = mmio_read->seq_num + 0xDEAD;
+       mmio_read_reg = (offset << ENA_REGS_MMIO_REG_READ_REG_OFF_SHIFT) &
+                       ENA_REGS_MMIO_REG_READ_REG_OFF_MASK;
+       mmio_read_reg |= mmio_read->seq_num &
+                       ENA_REGS_MMIO_REG_READ_REQ_ID_MASK;
+
+       /* make sure read_resp->req_id get updated before the hw can write
+        * there
+        */
+       wmb();
+
+       writel(mmio_read_reg, ena_dev->reg_bar + ENA_REGS_MMIO_REG_READ_OFF);
+
+       for (i = 0; i < ENA_REG_READ_TIMEOUT; i++) {
+               if (read_resp->req_id == mmio_read->seq_num)
+                       break;
+
+               udelay(1);
+       }
+
+       if (unlikely(i == ENA_REG_READ_TIMEOUT)) {
+               pr_err("reading reg failed for timeout. expected: req id[%hu] offset[%hu] actual: req id[%hu] offset[%hu]\n",
+                      mmio_read->seq_num, offset, read_resp->req_id,
+                      read_resp->reg_off);
+               ret = ENA_MMIO_READ_TIMEOUT;
+               goto err;
+       }
+
+       if (read_resp->reg_off != offset) {
+               pr_err("Read failure: wrong offset provided");
+               ret = ENA_MMIO_READ_TIMEOUT;
+       } else {
+               ret = read_resp->reg_val;
+       }
+err:
+       spin_unlock_irqrestore(&mmio_read->lock, flags);
+
+       return ret;
+}
+
+/* There are two types to wait for completion.
+ * Polling mode - wait until the completion is available.
+ * Async mode - wait on wait queue until the completion is ready
+ * (or the timeout expired).
+ * It is expected that the IRQ called ena_com_handle_admin_completion
+ * to mark the completions.
+ */
+static int ena_com_wait_and_process_admin_cq(struct ena_comp_ctx *comp_ctx,
+                                            struct ena_com_admin_queue *admin_queue)
+{
+       if (admin_queue->polling)
+               return ena_com_wait_and_process_admin_cq_polling(comp_ctx,
+                                                                admin_queue);
+
+       return ena_com_wait_and_process_admin_cq_interrupts(comp_ctx,
+                                                           admin_queue);
+}
+
+static int ena_com_destroy_io_sq(struct ena_com_dev *ena_dev,
+                                struct ena_com_io_sq *io_sq)
+{
+       struct ena_com_admin_queue *admin_queue = &ena_dev->admin_queue;
+       struct ena_admin_aq_destroy_sq_cmd destroy_cmd;
+       struct ena_admin_acq_destroy_sq_resp_desc destroy_resp;
+       u8 direction;
+       int ret;
+
+       memset(&destroy_cmd, 0x0, sizeof(struct ena_admin_aq_destroy_sq_cmd));
+
+       if (io_sq->direction == ENA_COM_IO_QUEUE_DIRECTION_TX)
+               direction = ENA_ADMIN_SQ_DIRECTION_TX;
+       else
+               direction = ENA_ADMIN_SQ_DIRECTION_RX;
+
+       destroy_cmd.sq.sq_identity |= (direction <<
+               ENA_ADMIN_SQ_SQ_DIRECTION_SHIFT) &
+               ENA_ADMIN_SQ_SQ_DIRECTION_MASK;
+
+       destroy_cmd.sq.sq_idx = io_sq->idx;
+       destroy_cmd.aq_common_descriptor.opcode = ENA_ADMIN_DESTROY_SQ;
+
+       ret = ena_com_execute_admin_command(admin_queue,
+                                           (struct ena_admin_aq_entry *)&destroy_cmd,
+                                           sizeof(destroy_cmd),
+                                           (struct ena_admin_acq_entry *)&destroy_resp,
+                                           sizeof(destroy_resp));
+
+       if (unlikely(ret && (ret != -ENODEV)))
+               pr_err("failed to destroy io sq error: %d\n", ret);
+
+       return ret;
+}
+
+static void ena_com_io_queue_free(struct ena_com_dev *ena_dev,
+                                 struct ena_com_io_sq *io_sq,
+                                 struct ena_com_io_cq *io_cq)
+{
+       size_t size;
+
+       if (io_cq->cdesc_addr.virt_addr) {
+               size = io_cq->cdesc_entry_size_in_bytes * io_cq->q_depth;
+
+               dma_free_coherent(ena_dev->dmadev, size,
+                                 io_cq->cdesc_addr.virt_addr,
+                                 io_cq->cdesc_addr.phys_addr);
+
+               io_cq->cdesc_addr.virt_addr = NULL;
+       }
+
+       if (io_sq->desc_addr.virt_addr) {
+               size = io_sq->desc_entry_size * io_sq->q_depth;
+
+               if (io_sq->mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_HOST)
+                       dma_free_coherent(ena_dev->dmadev, size,
+                                         io_sq->desc_addr.virt_addr,
+                                         io_sq->desc_addr.phys_addr);
+               else
+                       devm_kfree(ena_dev->dmadev, io_sq->desc_addr.virt_addr);
+
+               io_sq->desc_addr.virt_addr = NULL;
+       }
+}
+
+static int wait_for_reset_state(struct ena_com_dev *ena_dev, u32 timeout,
+                               u16 exp_state)
+{
+       u32 val, i;
+
+       for (i = 0; i < timeout; i++) {
+               val = ena_com_reg_bar_read32(ena_dev, ENA_REGS_DEV_STS_OFF);
+
+               if (unlikely(val == ENA_MMIO_READ_TIMEOUT)) {
+                       pr_err("Reg read timeout occurred\n");
+                       return -ETIME;
+               }
+
+               if ((val & ENA_REGS_DEV_STS_RESET_IN_PROGRESS_MASK) ==
+                       exp_state)
+                       return 0;
+
+               /* The resolution of the timeout is 100ms */
+               msleep(100);
+       }
+
+       return -ETIME;
+}
+
+static bool ena_com_check_supported_feature_id(struct ena_com_dev *ena_dev,
+                                              enum ena_admin_aq_feature_id feature_id)
+{
+       u32 feature_mask = 1 << feature_id;
+
+       /* Device attributes is always supported */
+       if ((feature_id != ENA_ADMIN_DEVICE_ATTRIBUTES) &&
+           !(ena_dev->supported_features & feature_mask))
+               return false;
+
+       return true;
+}
+
+static int ena_com_get_feature_ex(struct ena_com_dev *ena_dev,
+                                 struct ena_admin_get_feat_resp *get_resp,
+                                 enum ena_admin_aq_feature_id feature_id,
+                                 dma_addr_t control_buf_dma_addr,
+                                 u32 control_buff_size)
+{
+       struct ena_com_admin_queue *admin_queue;
+       struct ena_admin_get_feat_cmd get_cmd;
+       int ret;
+
+       if (!ena_com_check_supported_feature_id(ena_dev, feature_id)) {
+               pr_info("Feature %d isn't supported\n", feature_id);
+               return -EPERM;
+       }
+
+       memset(&get_cmd, 0x0, sizeof(get_cmd));
+       admin_queue = &ena_dev->admin_queue;
+
+       get_cmd.aq_common_descriptor.opcode = ENA_ADMIN_GET_FEATURE;
+
+       if (control_buff_size)
+               get_cmd.aq_common_descriptor.flags =
+                       ENA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_INDIRECT_MASK;
+       else
+               get_cmd.aq_common_descriptor.flags = 0;
+
+       ret = ena_com_mem_addr_set(ena_dev,
+                                  &get_cmd.control_buffer.address,
+                                  control_buf_dma_addr);
+       if (unlikely(ret)) {
+               pr_err("memory address set failed\n");
+               return ret;
+       }
+
+       get_cmd.control_buffer.length = control_buff_size;
+
+       get_cmd.feat_common.feature_id = feature_id;
+
+       ret = ena_com_execute_admin_command(admin_queue,
+                                           (struct ena_admin_aq_entry *)
+                                           &get_cmd,
+                                           sizeof(get_cmd),
+                                           (struct ena_admin_acq_entry *)
+                                           get_resp,
+                                           sizeof(*get_resp));
+
+       if (unlikely(ret))
+               pr_err("Failed to submit get_feature command %d error: %d\n",
+                      feature_id, ret);
+
+       return ret;
+}
+
+static int ena_com_get_feature(struct ena_com_dev *ena_dev,
+                              struct ena_admin_get_feat_resp *get_resp,
+                              enum ena_admin_aq_feature_id feature_id)
+{
+       return ena_com_get_feature_ex(ena_dev,
+                                     get_resp,
+                                     feature_id,
+                                     0,
+                                     0);
+}
+
+static int ena_com_hash_key_allocate(struct ena_com_dev *ena_dev)
+{
+       struct ena_rss *rss = &ena_dev->rss;
+
+       rss->hash_key =
+               dma_zalloc_coherent(ena_dev->dmadev, sizeof(*rss->hash_key),
+                                   &rss->hash_key_dma_addr, GFP_KERNEL);
+
+       if (unlikely(!rss->hash_key))
+               return -ENOMEM;
+
+       return 0;
+}
+
+static void ena_com_hash_key_destroy(struct ena_com_dev *ena_dev)
+{
+       struct ena_rss *rss = &ena_dev->rss;
+
+       if (rss->hash_key)
+               dma_free_coherent(ena_dev->dmadev, sizeof(*rss->hash_key),
+                                 rss->hash_key, rss->hash_key_dma_addr);
+       rss->hash_key = NULL;
+}
+
+static int ena_com_hash_ctrl_init(struct ena_com_dev *ena_dev)
+{
+       struct ena_rss *rss = &ena_dev->rss;
+
+       rss->hash_ctrl =
+               dma_zalloc_coherent(ena_dev->dmadev, sizeof(*rss->hash_ctrl),
+                                   &rss->hash_ctrl_dma_addr, GFP_KERNEL);
+
+       if (unlikely(!rss->hash_ctrl))
+               return -ENOMEM;
+
+       return 0;
+}
+
+static void ena_com_hash_ctrl_destroy(struct ena_com_dev *ena_dev)
+{
+       struct ena_rss *rss = &ena_dev->rss;
+
+       if (rss->hash_ctrl)
+               dma_free_coherent(ena_dev->dmadev, sizeof(*rss->hash_ctrl),
+                                 rss->hash_ctrl, rss->hash_ctrl_dma_addr);
+       rss->hash_ctrl = NULL;
+}
+
+static int ena_com_indirect_table_allocate(struct ena_com_dev *ena_dev,
+                                          u16 log_size)
+{
+       struct ena_rss *rss = &ena_dev->rss;
+       struct ena_admin_get_feat_resp get_resp;
+       size_t tbl_size;
+       int ret;
+
+       ret = ena_com_get_feature(ena_dev, &get_resp,
+                                 ENA_ADMIN_RSS_REDIRECTION_TABLE_CONFIG);
+       if (unlikely(ret))
+               return ret;
+
+       if ((get_resp.u.ind_table.min_size > log_size) ||
+           (get_resp.u.ind_table.max_size < log_size)) {
+               pr_err("indirect table size doesn't fit. requested size: %d while min is:%d and max %d\n",
+                      1 << log_size, 1 << get_resp.u.ind_table.min_size,
+                      1 << get_resp.u.ind_table.max_size);
+               return -EINVAL;
+       }
+
+       tbl_size = (1ULL << log_size) *
+               sizeof(struct ena_admin_rss_ind_table_entry);
+
+       rss->rss_ind_tbl =
+               dma_zalloc_coherent(ena_dev->dmadev, tbl_size,
+                                   &rss->rss_ind_tbl_dma_addr, GFP_KERNEL);
+       if (unlikely(!rss->rss_ind_tbl))
+               goto mem_err1;
+
+       tbl_size = (1ULL << log_size) * sizeof(u16);
+       rss->host_rss_ind_tbl =
+               devm_kzalloc(ena_dev->dmadev, tbl_size, GFP_KERNEL);
+       if (unlikely(!rss->host_rss_ind_tbl))
+               goto mem_err2;
+
+       rss->tbl_log_size = log_size;
+
+       return 0;
+
+mem_err2:
+       tbl_size = (1ULL << log_size) *
+               sizeof(struct ena_admin_rss_ind_table_entry);
+
+       dma_free_coherent(ena_dev->dmadev, tbl_size, rss->rss_ind_tbl,
+                         rss->rss_ind_tbl_dma_addr);
+       rss->rss_ind_tbl = NULL;
+mem_err1:
+       rss->tbl_log_size = 0;
+       return -ENOMEM;
+}
+
+static void ena_com_indirect_table_destroy(struct ena_com_dev *ena_dev)
+{
+       struct ena_rss *rss = &ena_dev->rss;
+       size_t tbl_size = (1ULL << rss->tbl_log_size) *
+               sizeof(struct ena_admin_rss_ind_table_entry);
+
+       if (rss->rss_ind_tbl)
+               dma_free_coherent(ena_dev->dmadev, tbl_size, rss->rss_ind_tbl,
+                                 rss->rss_ind_tbl_dma_addr);
+       rss->rss_ind_tbl = NULL;
+
+       if (rss->host_rss_ind_tbl)
+               devm_kfree(ena_dev->dmadev, rss->host_rss_ind_tbl);
+       rss->host_rss_ind_tbl = NULL;
+}
+
+static int ena_com_create_io_sq(struct ena_com_dev *ena_dev,
+                               struct ena_com_io_sq *io_sq, u16 cq_idx)
+{
+       struct ena_com_admin_queue *admin_queue = &ena_dev->admin_queue;
+       struct ena_admin_aq_create_sq_cmd create_cmd;
+       struct ena_admin_acq_create_sq_resp_desc cmd_completion;
+       u8 direction;
+       int ret;
+
+       memset(&create_cmd, 0x0, sizeof(struct ena_admin_aq_create_sq_cmd));
+
+       create_cmd.aq_common_descriptor.opcode = ENA_ADMIN_CREATE_SQ;
+
+       if (io_sq->direction == ENA_COM_IO_QUEUE_DIRECTION_TX)
+               direction = ENA_ADMIN_SQ_DIRECTION_TX;
+       else
+               direction = ENA_ADMIN_SQ_DIRECTION_RX;
+
+       create_cmd.sq_identity |= (direction <<
+               ENA_ADMIN_AQ_CREATE_SQ_CMD_SQ_DIRECTION_SHIFT) &
+               ENA_ADMIN_AQ_CREATE_SQ_CMD_SQ_DIRECTION_MASK;
+
+       create_cmd.sq_caps_2 |= io_sq->mem_queue_type &
+               ENA_ADMIN_AQ_CREATE_SQ_CMD_PLACEMENT_POLICY_MASK;
+
+       create_cmd.sq_caps_2 |= (ENA_ADMIN_COMPLETION_POLICY_DESC <<
+               ENA_ADMIN_AQ_CREATE_SQ_CMD_COMPLETION_POLICY_SHIFT) &
+               ENA_ADMIN_AQ_CREATE_SQ_CMD_COMPLETION_POLICY_MASK;
+
+       create_cmd.sq_caps_3 |=
+               ENA_ADMIN_AQ_CREATE_SQ_CMD_IS_PHYSICALLY_CONTIGUOUS_MASK;
+
+       create_cmd.cq_idx = cq_idx;
+       create_cmd.sq_depth = io_sq->q_depth;
+
+       if (io_sq->mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_HOST) {
+               ret = ena_com_mem_addr_set(ena_dev,
+                                          &create_cmd.sq_ba,
+                                          io_sq->desc_addr.phys_addr);
+               if (unlikely(ret)) {
+                       pr_err("memory address set failed\n");
+                       return ret;
+               }
+       }
+
+       ret = ena_com_execute_admin_command(admin_queue,
+                                           (struct ena_admin_aq_entry *)&create_cmd,
+                                           sizeof(create_cmd),
+                                           (struct ena_admin_acq_entry *)&cmd_completion,
+                                           sizeof(cmd_completion));
+       if (unlikely(ret)) {
+               pr_err("Failed to create IO SQ. error: %d\n", ret);
+               return ret;
+       }
+
+       io_sq->idx = cmd_completion.sq_idx;
+
+       io_sq->db_addr = (u32 __iomem *)((uintptr_t)ena_dev->reg_bar +
+               (uintptr_t)cmd_completion.sq_doorbell_offset);
+
+       if (io_sq->mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) {
+               io_sq->header_addr = (u8 __iomem *)((uintptr_t)ena_dev->mem_bar
+                               + cmd_completion.llq_headers_offset);
+
+               io_sq->desc_addr.pbuf_dev_addr =
+                       (u8 __iomem *)((uintptr_t)ena_dev->mem_bar +
+                       cmd_completion.llq_descriptors_offset);
+       }
+
+       pr_debug("created sq[%u], depth[%u]\n", io_sq->idx, io_sq->q_depth);
+
+       return ret;
+}
+
+static int ena_com_ind_tbl_convert_to_device(struct ena_com_dev *ena_dev)
+{
+       struct ena_rss *rss = &ena_dev->rss;
+       struct ena_com_io_sq *io_sq;
+       u16 qid;
+       int i;
+
+       for (i = 0; i < 1 << rss->tbl_log_size; i++) {
+               qid = rss->host_rss_ind_tbl[i];
+               if (qid >= ENA_TOTAL_NUM_QUEUES)
+                       return -EINVAL;
+
+               io_sq = &ena_dev->io_sq_queues[qid];
+
+               if (io_sq->direction != ENA_COM_IO_QUEUE_DIRECTION_RX)
+                       return -EINVAL;
+
+               rss->rss_ind_tbl[i].cq_idx = io_sq->idx;
+       }
+
+       return 0;
+}
+
+static int ena_com_ind_tbl_convert_from_device(struct ena_com_dev *ena_dev)
+{
+       u16 dev_idx_to_host_tbl[ENA_TOTAL_NUM_QUEUES] = { (u16)-1 };
+       struct ena_rss *rss = &ena_dev->rss;
+       u8 idx;
+       u16 i;
+
+       for (i = 0; i < ENA_TOTAL_NUM_QUEUES; i++)
+               dev_idx_to_host_tbl[ena_dev->io_sq_queues[i].idx] = i;
+
+       for (i = 0; i < 1 << rss->tbl_log_size; i++) {
+               if (rss->rss_ind_tbl[i].cq_idx > ENA_TOTAL_NUM_QUEUES)
+                       return -EINVAL;
+               idx = (u8)rss->rss_ind_tbl[i].cq_idx;
+
+               if (dev_idx_to_host_tbl[idx] > ENA_TOTAL_NUM_QUEUES)
+                       return -EINVAL;
+
+               rss->host_rss_ind_tbl[i] = dev_idx_to_host_tbl[idx];
+       }
+
+       return 0;
+}
+
+static int ena_com_init_interrupt_moderation_table(struct ena_com_dev *ena_dev)
+{
+       size_t size;
+
+       size = sizeof(struct ena_intr_moder_entry) * ENA_INTR_MAX_NUM_OF_LEVELS;
+
+       ena_dev->intr_moder_tbl =
+               devm_kzalloc(ena_dev->dmadev, size, GFP_KERNEL);
+       if (!ena_dev->intr_moder_tbl)
+               return -ENOMEM;
+
+       ena_com_config_default_interrupt_moderation_table(ena_dev);
+
+       return 0;
+}
+
+static void ena_com_update_intr_delay_resolution(struct ena_com_dev *ena_dev,
+                                                u16 intr_delay_resolution)
+{
+       struct ena_intr_moder_entry *intr_moder_tbl = ena_dev->intr_moder_tbl;
+       unsigned int i;
+
+       if (!intr_delay_resolution) {
+               pr_err("Illegal intr_delay_resolution provided. Going to use default 1 usec resolution\n");
+               intr_delay_resolution = 1;
+       }
+       ena_dev->intr_delay_resolution = intr_delay_resolution;
+
+       /* update Rx */
+       for (i = 0; i < ENA_INTR_MAX_NUM_OF_LEVELS; i++)
+               intr_moder_tbl[i].intr_moder_interval /= intr_delay_resolution;
+
+       /* update Tx */
+       ena_dev->intr_moder_tx_interval /= intr_delay_resolution;
+}
+
+/*****************************************************************************/
+/*******************************      API       ******************************/
+/*****************************************************************************/
+
+int ena_com_execute_admin_command(struct ena_com_admin_queue *admin_queue,
+                                 struct ena_admin_aq_entry *cmd,
+                                 size_t cmd_size,
+                                 struct ena_admin_acq_entry *comp,
+                                 size_t comp_size)
+{
+       struct ena_comp_ctx *comp_ctx;
+       int ret;
+
+       comp_ctx = ena_com_submit_admin_cmd(admin_queue, cmd, cmd_size,
+                                           comp, comp_size);
+       if (unlikely(IS_ERR(comp_ctx))) {
+               pr_err("Failed to submit command [%ld]\n", PTR_ERR(comp_ctx));
+               return PTR_ERR(comp_ctx);
+       }
+
+       ret = ena_com_wait_and_process_admin_cq(comp_ctx, admin_queue);
+       if (unlikely(ret)) {
+               if (admin_queue->running_state)
+                       pr_err("Failed to process command. ret = %d\n", ret);
+               else
+                       pr_debug("Failed to process command. ret = %d\n", ret);
+       }
+       return ret;
+}
+
+int ena_com_create_io_cq(struct ena_com_dev *ena_dev,
+                        struct ena_com_io_cq *io_cq)
+{
+       struct ena_com_admin_queue *admin_queue = &ena_dev->admin_queue;
+       struct ena_admin_aq_create_cq_cmd create_cmd;
+       struct ena_admin_acq_create_cq_resp_desc cmd_completion;
+       int ret;
+
+       memset(&create_cmd, 0x0, sizeof(struct ena_admin_aq_create_cq_cmd));
+
+       create_cmd.aq_common_descriptor.opcode = ENA_ADMIN_CREATE_CQ;
+
+       create_cmd.cq_caps_2 |= (io_cq->cdesc_entry_size_in_bytes / 4) &
+               ENA_ADMIN_AQ_CREATE_CQ_CMD_CQ_ENTRY_SIZE_WORDS_MASK;
+       create_cmd.cq_caps_1 |=
+               ENA_ADMIN_AQ_CREATE_CQ_CMD_INTERRUPT_MODE_ENABLED_MASK;
+
+       create_cmd.msix_vector = io_cq->msix_vector;
+       create_cmd.cq_depth = io_cq->q_depth;
+
+       ret = ena_com_mem_addr_set(ena_dev,
+                                  &create_cmd.cq_ba,
+                                  io_cq->cdesc_addr.phys_addr);
+       if (unlikely(ret)) {
+               pr_err("memory address set failed\n");
+               return ret;
+       }
+
+       ret = ena_com_execute_admin_command(admin_queue,
+                                           (struct ena_admin_aq_entry *)&create_cmd,
+                                           sizeof(create_cmd),
+                                           (struct ena_admin_acq_entry *)&cmd_completion,
+                                           sizeof(cmd_completion));
+       if (unlikely(ret)) {
+               pr_err("Failed to create IO CQ. error: %d\n", ret);
+               return ret;
+       }
+
+       io_cq->idx = cmd_completion.cq_idx;
+
+       io_cq->unmask_reg = (u32 __iomem *)((uintptr_t)ena_dev->reg_bar +
+               cmd_completion.cq_interrupt_unmask_register_offset);
+
+       if (cmd_completion.cq_head_db_register_offset)
+               io_cq->cq_head_db_reg =
+                       (u32 __iomem *)((uintptr_t)ena_dev->reg_bar +
+                       cmd_completion.cq_head_db_register_offset);
+
+       if (cmd_completion.numa_node_register_offset)
+               io_cq->numa_node_cfg_reg =
+                       (u32 __iomem *)((uintptr_t)ena_dev->reg_bar +
+                       cmd_completion.numa_node_register_offset);
+
+       pr_debug("created cq[%u], depth[%u]\n", io_cq->idx, io_cq->q_depth);
+
+       return ret;
+}
+
+int ena_com_get_io_handlers(struct ena_com_dev *ena_dev, u16 qid,
+                           struct ena_com_io_sq **io_sq,
+                           struct ena_com_io_cq **io_cq)
+{
+       if (qid >= ENA_TOTAL_NUM_QUEUES) {
+               pr_err("Invalid queue number %d but the max is %d\n", qid,
+                      ENA_TOTAL_NUM_QUEUES);
+               return -EINVAL;
+       }
+
+       *io_sq = &ena_dev->io_sq_queues[qid];
+       *io_cq = &ena_dev->io_cq_queues[qid];
+
+       return 0;
+}
+
+void ena_com_abort_admin_commands(struct ena_com_dev *ena_dev)
+{
+       struct ena_com_admin_queue *admin_queue = &ena_dev->admin_queue;
+       struct ena_comp_ctx *comp_ctx;
+       u16 i;
+
+       if (!admin_queue->comp_ctx)
+               return;
+
+       for (i = 0; i < admin_queue->q_depth; i++) {
+               comp_ctx = get_comp_ctxt(admin_queue, i, false);
+               if (unlikely(!comp_ctx))
+                       break;
+
+               comp_ctx->status = ENA_CMD_ABORTED;
+
+               complete(&comp_ctx->wait_event);
+       }
+}
+
+void ena_com_wait_for_abort_completion(struct ena_com_dev *ena_dev)
+{
+       struct ena_com_admin_queue *admin_queue = &ena_dev->admin_queue;
+       unsigned long flags;
+
+       spin_lock_irqsave(&admin_queue->q_lock, flags);
+       while (atomic_read(&admin_queue->outstanding_cmds) != 0) {
+               spin_unlock_irqrestore(&admin_queue->q_lock, flags);
+               msleep(20);
+               spin_lock_irqsave(&admin_queue->q_lock, flags);
+       }
+       spin_unlock_irqrestore(&admin_queue->q_lock, flags);
+}
+
+int ena_com_destroy_io_cq(struct ena_com_dev *ena_dev,
+                         struct ena_com_io_cq *io_cq)
+{
+       struct ena_com_admin_queue *admin_queue = &ena_dev->admin_queue;
+       struct ena_admin_aq_destroy_cq_cmd destroy_cmd;
+       struct ena_admin_acq_destroy_cq_resp_desc destroy_resp;
+       int ret;
+
+       memset(&destroy_cmd, 0x0, sizeof(struct ena_admin_aq_destroy_sq_cmd));
+
+       destroy_cmd.cq_idx = io_cq->idx;
+       destroy_cmd.aq_common_descriptor.opcode = ENA_ADMIN_DESTROY_CQ;
+
+       ret = ena_com_execute_admin_command(admin_queue,
+                                           (struct ena_admin_aq_entry *)&destroy_cmd,
+                                           sizeof(destroy_cmd),
+                                           (struct ena_admin_acq_entry *)&destroy_resp,
+                                           sizeof(destroy_resp));
+
+       if (unlikely(ret && (ret != -ENODEV)))
+               pr_err("Failed to destroy IO CQ. error: %d\n", ret);
+
+       return ret;
+}
+
+bool ena_com_get_admin_running_state(struct ena_com_dev *ena_dev)
+{
+       return ena_dev->admin_queue.running_state;
+}
+
+void ena_com_set_admin_running_state(struct ena_com_dev *ena_dev, bool state)
+{
+       struct ena_com_admin_queue *admin_queue = &ena_dev->admin_queue;
+       unsigned long flags;
+
+       spin_lock_irqsave(&admin_queue->q_lock, flags);
+       ena_dev->admin_queue.running_state = state;
+       spin_unlock_irqrestore(&admin_queue->q_lock, flags);
+}
+
+void ena_com_admin_aenq_enable(struct ena_com_dev *ena_dev)
+{
+       u16 depth = ena_dev->aenq.q_depth;
+
+       WARN(ena_dev->aenq.head != depth, "Invalid AENQ state\n");
+
+       /* Init head_db to mark that all entries in the queue
+        * are initially available
+        */
+       writel(depth, ena_dev->reg_bar + ENA_REGS_AENQ_HEAD_DB_OFF);
+}
+
+int ena_com_set_aenq_config(struct ena_com_dev *ena_dev, u32 groups_flag)
+{
+       struct ena_com_admin_queue *admin_queue;
+       struct ena_admin_set_feat_cmd cmd;
+       struct ena_admin_set_feat_resp resp;
+       struct ena_admin_get_feat_resp get_resp;
+       int ret;
+
+       ret = ena_com_get_feature(ena_dev, &get_resp, ENA_ADMIN_AENQ_CONFIG);
+       if (ret) {
+               pr_info("Can't get aenq configuration\n");
+               return ret;
+       }
+
+       if ((get_resp.u.aenq.supported_groups & groups_flag) != groups_flag) {
+               pr_warn("Trying to set unsupported aenq events. supported flag: %x asked flag: %x\n",
+                       get_resp.u.aenq.supported_groups, groups_flag);
+               return -EPERM;
+       }
+
+       memset(&cmd, 0x0, sizeof(cmd));
+       admin_queue = &ena_dev->admin_queue;
+
+       cmd.aq_common_descriptor.opcode = ENA_ADMIN_SET_FEATURE;
+       cmd.aq_common_descriptor.flags = 0;
+       cmd.feat_common.feature_id = ENA_ADMIN_AENQ_CONFIG;
+       cmd.u.aenq.enabled_groups = groups_flag;
+
+       ret = ena_com_execute_admin_command(admin_queue,
+                                           (struct ena_admin_aq_entry *)&cmd,
+                                           sizeof(cmd),
+                                           (struct ena_admin_acq_entry *)&resp,
+                                           sizeof(resp));
+
+       if (unlikely(ret))
+               pr_err("Failed to config AENQ ret: %d\n", ret);
+
+       return ret;
+}
+
+int ena_com_get_dma_width(struct ena_com_dev *ena_dev)
+{
+       u32 caps = ena_com_reg_bar_read32(ena_dev, ENA_REGS_CAPS_OFF);
+       int width;
+
+       if (unlikely(caps == ENA_MMIO_READ_TIMEOUT)) {
+               pr_err("Reg read timeout occurred\n");
+               return -ETIME;
+       }
+
+       width = (caps & ENA_REGS_CAPS_DMA_ADDR_WIDTH_MASK) >>
+               ENA_REGS_CAPS_DMA_ADDR_WIDTH_SHIFT;
+
+       pr_debug("ENA dma width: %d\n", width);
+
+       if ((width < 32) || width > ENA_MAX_PHYS_ADDR_SIZE_BITS) {
+               pr_err("DMA width illegal value: %d\n", width);
+               return -EINVAL;
+       }
+
+       ena_dev->dma_addr_bits = width;
+
+       return width;
+}
+
+int ena_com_validate_version(struct ena_com_dev *ena_dev)
+{
+       u32 ver;
+       u32 ctrl_ver;
+       u32 ctrl_ver_masked;
+
+       /* Make sure the ENA version and the controller version are at least
+        * as the driver expects
+        */
+       ver = ena_com_reg_bar_read32(ena_dev, ENA_REGS_VERSION_OFF);
+       ctrl_ver = ena_com_reg_bar_read32(ena_dev,
+                                         ENA_REGS_CONTROLLER_VERSION_OFF);
+
+       if (unlikely((ver == ENA_MMIO_READ_TIMEOUT) ||
+                    (ctrl_ver == ENA_MMIO_READ_TIMEOUT))) {
+               pr_err("Reg read timeout occurred\n");
+               return -ETIME;
+       }
+
+       pr_info("ena device version: %d.%d\n",
+               (ver & ENA_REGS_VERSION_MAJOR_VERSION_MASK) >>
+                       ENA_REGS_VERSION_MAJOR_VERSION_SHIFT,
+               ver & ENA_REGS_VERSION_MINOR_VERSION_MASK);
+
+       if (ver < MIN_ENA_VER) {
+               pr_err("ENA version is lower than the minimal version the driver supports\n");
+               return -1;
+       }
+
+       pr_info("ena controller version: %d.%d.%d implementation version %d\n",
+               (ctrl_ver & ENA_REGS_CONTROLLER_VERSION_MAJOR_VERSION_MASK) >>
+                       ENA_REGS_CONTROLLER_VERSION_MAJOR_VERSION_SHIFT,
+               (ctrl_ver & ENA_REGS_CONTROLLER_VERSION_MINOR_VERSION_MASK) >>
+                       ENA_REGS_CONTROLLER_VERSION_MINOR_VERSION_SHIFT,
+               (ctrl_ver & ENA_REGS_CONTROLLER_VERSION_SUBMINOR_VERSION_MASK),
+               (ctrl_ver & ENA_REGS_CONTROLLER_VERSION_IMPL_ID_MASK) >>
+                       ENA_REGS_CONTROLLER_VERSION_IMPL_ID_SHIFT);
+
+       ctrl_ver_masked =
+               (ctrl_ver & ENA_REGS_CONTROLLER_VERSION_MAJOR_VERSION_MASK) |
+               (ctrl_ver & ENA_REGS_CONTROLLER_VERSION_MINOR_VERSION_MASK) |
+               (ctrl_ver & ENA_REGS_CONTROLLER_VERSION_SUBMINOR_VERSION_MASK);
+
+       /* Validate the ctrl version without the implementation ID */
+       if (ctrl_ver_masked < MIN_ENA_CTRL_VER) {
+               pr_err("ENA ctrl version is lower than the minimal ctrl version the driver supports\n");
+               return -1;
+       }
+
+       return 0;
+}
+
+void ena_com_admin_destroy(struct ena_com_dev *ena_dev)
+{
+       struct ena_com_admin_queue *admin_queue = &ena_dev->admin_queue;
+       struct ena_com_admin_cq *cq = &admin_queue->cq;
+       struct ena_com_admin_sq *sq = &admin_queue->sq;
+       struct ena_com_aenq *aenq = &ena_dev->aenq;
+       u16 size;
+
+       if (admin_queue->comp_ctx)
+               devm_kfree(ena_dev->dmadev, admin_queue->comp_ctx);
+       admin_queue->comp_ctx = NULL;
+       size = ADMIN_SQ_SIZE(admin_queue->q_depth);
+       if (sq->entries)
+               dma_free_coherent(ena_dev->dmadev, size, sq->entries,
+                                 sq->dma_addr);
+       sq->entries = NULL;
+
+       size = ADMIN_CQ_SIZE(admin_queue->q_depth);
+       if (cq->entries)
+               dma_free_coherent(ena_dev->dmadev, size, cq->entries,
+                                 cq->dma_addr);
+       cq->entries = NULL;
+
+       size = ADMIN_AENQ_SIZE(aenq->q_depth);
+       if (ena_dev->aenq.entries)
+               dma_free_coherent(ena_dev->dmadev, size, aenq->entries,
+                                 aenq->dma_addr);
+       aenq->entries = NULL;
+}
+
+void ena_com_set_admin_polling_mode(struct ena_com_dev *ena_dev, bool polling)
+{
+       ena_dev->admin_queue.polling = polling;
+}
+
+int ena_com_mmio_reg_read_request_init(struct ena_com_dev *ena_dev)
+{
+       struct ena_com_mmio_read *mmio_read = &ena_dev->mmio_read;
+
+       spin_lock_init(&mmio_read->lock);
+       mmio_read->read_resp =
+               dma_zalloc_coherent(ena_dev->dmadev,
+                                   sizeof(*mmio_read->read_resp),
+                                   &mmio_read->read_resp_dma_addr, GFP_KERNEL);
+       if (unlikely(!mmio_read->read_resp))
+               return -ENOMEM;
+
+       ena_com_mmio_reg_read_request_write_dev_addr(ena_dev);
+
+       mmio_read->read_resp->req_id = 0x0;
+       mmio_read->seq_num = 0x0;
+       mmio_read->readless_supported = true;
+
+       return 0;
+}
+
+void ena_com_set_mmio_read_mode(struct ena_com_dev *ena_dev, bool readless_supported)
+{
+       struct ena_com_mmio_read *mmio_read = &ena_dev->mmio_read;
+
+       mmio_read->readless_supported = readless_supported;
+}
+
+void ena_com_mmio_reg_read_request_destroy(struct ena_com_dev *ena_dev)
+{
+       struct ena_com_mmio_read *mmio_read = &ena_dev->mmio_read;
+
+       writel(0x0, ena_dev->reg_bar + ENA_REGS_MMIO_RESP_LO_OFF);
+       writel(0x0, ena_dev->reg_bar + ENA_REGS_MMIO_RESP_HI_OFF);
+
+       dma_free_coherent(ena_dev->dmadev, sizeof(*mmio_read->read_resp),
+                         mmio_read->read_resp, mmio_read->read_resp_dma_addr);
+
+       mmio_read->read_resp = NULL;
+}
+
+void ena_com_mmio_reg_read_request_write_dev_addr(struct ena_com_dev *ena_dev)
+{
+       struct ena_com_mmio_read *mmio_read = &ena_dev->mmio_read;
+       u32 addr_low, addr_high;
+
+       addr_low = ENA_DMA_ADDR_TO_UINT32_LOW(mmio_read->read_resp_dma_addr);
+       addr_high = ENA_DMA_ADDR_TO_UINT32_HIGH(mmio_read->read_resp_dma_addr);
+
+       writel(addr_low, ena_dev->reg_bar + ENA_REGS_MMIO_RESP_LO_OFF);
+       writel(addr_high, ena_dev->reg_bar + ENA_REGS_MMIO_RESP_HI_OFF);
+}
+
+int ena_com_admin_init(struct ena_com_dev *ena_dev,
+                      struct ena_aenq_handlers *aenq_handlers,
+                      bool init_spinlock)
+{
+       struct ena_com_admin_queue *admin_queue = &ena_dev->admin_queue;
+       u32 aq_caps, acq_caps, dev_sts, addr_low, addr_high;
+       int ret;
+
+       dev_sts = ena_com_reg_bar_read32(ena_dev, ENA_REGS_DEV_STS_OFF);
+
+       if (unlikely(dev_sts == ENA_MMIO_READ_TIMEOUT)) {
+               pr_err("Reg read timeout occurred\n");
+               return -ETIME;
+       }
+
+       if (!(dev_sts & ENA_REGS_DEV_STS_READY_MASK)) {
+               pr_err("Device isn't ready, abort com init\n");
+               return -ENODEV;
+       }
+
+       admin_queue->q_depth = ENA_ADMIN_QUEUE_DEPTH;
+
+       admin_queue->q_dmadev = ena_dev->dmadev;
+       admin_queue->polling = false;
+       admin_queue->curr_cmd_id = 0;
+
+       atomic_set(&admin_queue->outstanding_cmds, 0);
+
+       if (init_spinlock)
+               spin_lock_init(&admin_queue->q_lock);
+
+       ret = ena_com_init_comp_ctxt(admin_queue);
+       if (ret)
+               goto error;
+
+       ret = ena_com_admin_init_sq(admin_queue);
+       if (ret)
+               goto error;
+
+       ret = ena_com_admin_init_cq(admin_queue);
+       if (ret)
+               goto error;
+
+       admin_queue->sq.db_addr = (u32 __iomem *)((uintptr_t)ena_dev->reg_bar +
+               ENA_REGS_AQ_DB_OFF);
+
+       addr_low = ENA_DMA_ADDR_TO_UINT32_LOW(admin_queue->sq.dma_addr);
+       addr_high = ENA_DMA_ADDR_TO_UINT32_HIGH(admin_queue->sq.dma_addr);
+
+       writel(addr_low, ena_dev->reg_bar + ENA_REGS_AQ_BASE_LO_OFF);
+       writel(addr_high, ena_dev->reg_bar + ENA_REGS_AQ_BASE_HI_OFF);
+
+       addr_low = ENA_DMA_ADDR_TO_UINT32_LOW(admin_queue->cq.dma_addr);
+       addr_high = ENA_DMA_ADDR_TO_UINT32_HIGH(admin_queue->cq.dma_addr);
+
+       writel(addr_low, ena_dev->reg_bar + ENA_REGS_ACQ_BASE_LO_OFF);
+       writel(addr_high, ena_dev->reg_bar + ENA_REGS_ACQ_BASE_HI_OFF);
+
+       aq_caps = 0;
+       aq_caps |= admin_queue->q_depth & ENA_REGS_AQ_CAPS_AQ_DEPTH_MASK;
+       aq_caps |= (sizeof(struct ena_admin_aq_entry) <<
+                       ENA_REGS_AQ_CAPS_AQ_ENTRY_SIZE_SHIFT) &
+                       ENA_REGS_AQ_CAPS_AQ_ENTRY_SIZE_MASK;
+
+       acq_caps = 0;
+       acq_caps |= admin_queue->q_depth & ENA_REGS_ACQ_CAPS_ACQ_DEPTH_MASK;
+       acq_caps |= (sizeof(struct ena_admin_acq_entry) <<
+               ENA_REGS_ACQ_CAPS_ACQ_ENTRY_SIZE_SHIFT) &
+               ENA_REGS_ACQ_CAPS_ACQ_ENTRY_SIZE_MASK;
+
+       writel(aq_caps, ena_dev->reg_bar + ENA_REGS_AQ_CAPS_OFF);
+       writel(acq_caps, ena_dev->reg_bar + ENA_REGS_ACQ_CAPS_OFF);
+       ret = ena_com_admin_init_aenq(ena_dev, aenq_handlers);
+       if (ret)
+               goto error;
+
+       admin_queue->running_state = true;
+
+       return 0;
+error:
+       ena_com_admin_destroy(ena_dev);
+
+       return ret;
+}
+
+int ena_com_create_io_queue(struct ena_com_dev *ena_dev,
+                           struct ena_com_create_io_ctx *ctx)
+{
+       struct ena_com_io_sq *io_sq;
+       struct ena_com_io_cq *io_cq;
+       int ret;
+
+       if (ctx->qid >= ENA_TOTAL_NUM_QUEUES) {
+               pr_err("Qid (%d) is bigger than max num of queues (%d)\n",
+                      ctx->qid, ENA_TOTAL_NUM_QUEUES);
+               return -EINVAL;
+       }
+
+       io_sq = &ena_dev->io_sq_queues[ctx->qid];
+       io_cq = &ena_dev->io_cq_queues[ctx->qid];
+
+       memset(io_sq, 0x0, sizeof(struct ena_com_io_sq));
+       memset(io_cq, 0x0, sizeof(struct ena_com_io_cq));
+
+       /* Init CQ */
+       io_cq->q_depth = ctx->queue_size;
+       io_cq->direction = ctx->direction;
+       io_cq->qid = ctx->qid;
+
+       io_cq->msix_vector = ctx->msix_vector;
+
+       io_sq->q_depth = ctx->queue_size;
+       io_sq->direction = ctx->direction;
+       io_sq->qid = ctx->qid;
+
+       io_sq->mem_queue_type = ctx->mem_queue_type;
+
+       if (ctx->direction == ENA_COM_IO_QUEUE_DIRECTION_TX)
+               /* header length is limited to 8 bits */
+               io_sq->tx_max_header_size =
+                       min_t(u32, ena_dev->tx_max_header_size, SZ_256);
+
+       ret = ena_com_init_io_sq(ena_dev, ctx, io_sq);
+       if (ret)
+               goto error;
+       ret = ena_com_init_io_cq(ena_dev, ctx, io_cq);
+       if (ret)
+               goto error;
+
+       ret = ena_com_create_io_cq(ena_dev, io_cq);
+       if (ret)
+               goto error;
+
+       ret = ena_com_create_io_sq(ena_dev, io_sq, io_cq->idx);
+       if (ret)
+               goto destroy_io_cq;
+
+       return 0;
+
+destroy_io_cq:
+       ena_com_destroy_io_cq(ena_dev, io_cq);
+error:
+       ena_com_io_queue_free(ena_dev, io_sq, io_cq);
+       return ret;
+}
+
+void ena_com_destroy_io_queue(struct ena_com_dev *ena_dev, u16 qid)
+{
+       struct ena_com_io_sq *io_sq;
+       struct ena_com_io_cq *io_cq;
+
+       if (qid >= ENA_TOTAL_NUM_QUEUES) {
+               pr_err("Qid (%d) is bigger than max num of queues (%d)\n", qid,
+                      ENA_TOTAL_NUM_QUEUES);
+               return;
+       }
+
+       io_sq = &ena_dev->io_sq_queues[qid];
+       io_cq = &ena_dev->io_cq_queues[qid];
+
+       ena_com_destroy_io_sq(ena_dev, io_sq);
+       ena_com_destroy_io_cq(ena_dev, io_cq);
+
+       ena_com_io_queue_free(ena_dev, io_sq, io_cq);
+}
+
+int ena_com_get_link_params(struct ena_com_dev *ena_dev,
+                           struct ena_admin_get_feat_resp *resp)
+{
+       return ena_com_get_feature(ena_dev, resp, ENA_ADMIN_LINK_CONFIG);
+}
+
+int ena_com_get_dev_attr_feat(struct ena_com_dev *ena_dev,
+                             struct ena_com_dev_get_features_ctx *get_feat_ctx)
+{
+       struct ena_admin_get_feat_resp get_resp;
+       int rc;
+
+       rc = ena_com_get_feature(ena_dev, &get_resp,
+                                ENA_ADMIN_DEVICE_ATTRIBUTES);
+       if (rc)
+               return rc;
+
+       memcpy(&get_feat_ctx->dev_attr, &get_resp.u.dev_attr,
+              sizeof(get_resp.u.dev_attr));
+       ena_dev->supported_features = get_resp.u.dev_attr.supported_features;
+
+       rc = ena_com_get_feature(ena_dev, &get_resp,
+                                ENA_ADMIN_MAX_QUEUES_NUM);
+       if (rc)
+               return rc;
+
+       memcpy(&get_feat_ctx->max_queues, &get_resp.u.max_queue,
+              sizeof(get_resp.u.max_queue));
+       ena_dev->tx_max_header_size = get_resp.u.max_queue.max_header_size;
+
+       rc = ena_com_get_feature(ena_dev, &get_resp,
+                                ENA_ADMIN_AENQ_CONFIG);
+       if (rc)
+               return rc;
+
+       memcpy(&get_feat_ctx->aenq, &get_resp.u.aenq,
+              sizeof(get_resp.u.aenq));
+
+       rc = ena_com_get_feature(ena_dev, &get_resp,
+                                ENA_ADMIN_STATELESS_OFFLOAD_CONFIG);
+       if (rc)
+               return rc;
+
+       memcpy(&get_feat_ctx->offload, &get_resp.u.offload,
+              sizeof(get_resp.u.offload));
+
+       return 0;
+}
+
+void ena_com_admin_q_comp_intr_handler(struct ena_com_dev *ena_dev)
+{
+       ena_com_handle_admin_completion(&ena_dev->admin_queue);
+}
+
+/* ena_handle_specific_aenq_event:
+ * return the handler that is relevant to the specific event group
+ */
+static ena_aenq_handler ena_com_get_specific_aenq_cb(struct ena_com_dev *dev,
+                                                    u16 group)
+{
+       struct ena_aenq_handlers *aenq_handlers = dev->aenq.aenq_handlers;
+
+       if ((group < ENA_MAX_HANDLERS) && aenq_handlers->handlers[group])
+               return aenq_handlers->handlers[group];
+
+       return aenq_handlers->unimplemented_handler;
+}
+
+/* ena_aenq_intr_handler:
+ * handles the aenq incoming events.
+ * pop events from the queue and apply the specific handler
+ */
+void ena_com_aenq_intr_handler(struct ena_com_dev *dev, void *data)
+{
+       struct ena_admin_aenq_entry *aenq_e;
+       struct ena_admin_aenq_common_desc *aenq_common;
+       struct ena_com_aenq *aenq  = &dev->aenq;
+       ena_aenq_handler handler_cb;
+       u16 masked_head, processed = 0;
+       u8 phase;
+
+       masked_head = aenq->head & (aenq->q_depth - 1);
+       phase = aenq->phase;
+       aenq_e = &aenq->entries[masked_head]; /* Get first entry */
+       aenq_common = &aenq_e->aenq_common_desc;
+
+       /* Go over all the events */
+       while ((aenq_common->flags & ENA_ADMIN_AENQ_COMMON_DESC_PHASE_MASK) ==
+              phase) {
+               pr_debug("AENQ! Group[%x] Syndrom[%x] timestamp: [%llus]\n",
+                        aenq_common->group, aenq_common->syndrom,
+                        (u64)aenq_common->timestamp_low +
+                                ((u64)aenq_common->timestamp_high << 32));
+
+               /* Handle specific event*/
+               handler_cb = ena_com_get_specific_aenq_cb(dev,
+                                                         aenq_common->group);
+               handler_cb(data, aenq_e); /* call the actual event handler*/
+
+               /* Get next event entry */
+               masked_head++;
+               processed++;
+
+               if (unlikely(masked_head == aenq->q_depth)) {
+                       masked_head = 0;
+                       phase = !phase;
+               }
+               aenq_e = &aenq->entries[masked_head];
+               aenq_common = &aenq_e->aenq_common_desc;
+       }
+
+       aenq->head += processed;
+       aenq->phase = phase;
+
+       /* Don't update aenq doorbell if there weren't any processed events */
+       if (!processed)
+               return;
+
+       /* write the aenq doorbell after all AENQ descriptors were read */
+       mb();
+       writel((u32)aenq->head, dev->reg_bar + ENA_REGS_AENQ_HEAD_DB_OFF);
+}
+
+int ena_com_dev_reset(struct ena_com_dev *ena_dev)
+{
+       u32 stat, timeout, cap, reset_val;
+       int rc;
+
+       stat = ena_com_reg_bar_read32(ena_dev, ENA_REGS_DEV_STS_OFF);
+       cap = ena_com_reg_bar_read32(ena_dev, ENA_REGS_CAPS_OFF);
+
+       if (unlikely((stat == ENA_MMIO_READ_TIMEOUT) ||
+                    (cap == ENA_MMIO_READ_TIMEOUT))) {
+               pr_err("Reg read32 timeout occurred\n");
+               return -ETIME;
+       }
+
+       if ((stat & ENA_REGS_DEV_STS_READY_MASK) == 0) {
+               pr_err("Device isn't ready, can't reset device\n");
+               return -EINVAL;
+       }
+
+       timeout = (cap & ENA_REGS_CAPS_RESET_TIMEOUT_MASK) >>
+                       ENA_REGS_CAPS_RESET_TIMEOUT_SHIFT;
+       if (timeout == 0) {
+               pr_err("Invalid timeout value\n");
+               return -EINVAL;
+       }
+
+       /* start reset */
+       reset_val = ENA_REGS_DEV_CTL_DEV_RESET_MASK;
+       writel(reset_val, ena_dev->reg_bar + ENA_REGS_DEV_CTL_OFF);
+
+       /* Write again the MMIO read request address */
+       ena_com_mmio_reg_read_request_write_dev_addr(ena_dev);
+
+       rc = wait_for_reset_state(ena_dev, timeout,
+                                 ENA_REGS_DEV_STS_RESET_IN_PROGRESS_MASK);
+       if (rc != 0) {
+               pr_err("Reset indication didn't turn on\n");
+               return rc;
+       }
+
+       /* reset done */
+       writel(0, ena_dev->reg_bar + ENA_REGS_DEV_CTL_OFF);
+       rc = wait_for_reset_state(ena_dev, timeout, 0);
+       if (rc != 0) {
+               pr_err("Reset indication didn't turn off\n");
+               return rc;
+       }
+
+       return 0;
+}
+
+static int ena_get_dev_stats(struct ena_com_dev *ena_dev,
+                            struct ena_com_stats_ctx *ctx,
+                            enum ena_admin_get_stats_type type)
+{
+       struct ena_admin_aq_get_stats_cmd *get_cmd = &ctx->get_cmd;
+       struct ena_admin_acq_get_stats_resp *get_resp = &ctx->get_resp;
+       struct ena_com_admin_queue *admin_queue;
+       int ret;
+
+       admin_queue = &ena_dev->admin_queue;
+
+       get_cmd->aq_common_descriptor.opcode = ENA_ADMIN_GET_STATS;
+       get_cmd->aq_common_descriptor.flags = 0;
+       get_cmd->type = type;
+
+       ret =  ena_com_execute_admin_command(admin_queue,
+                                            (struct ena_admin_aq_entry *)get_cmd,
+                                            sizeof(*get_cmd),
+                                            (struct ena_admin_acq_entry *)get_resp,
+                                            sizeof(*get_resp));
+
+       if (unlikely(ret))
+               pr_err("Failed to get stats. error: %d\n", ret);
+
+       return ret;
+}
+
+int ena_com_get_dev_basic_stats(struct ena_com_dev *ena_dev,
+                               struct ena_admin_basic_stats *stats)
+{
+       struct ena_com_stats_ctx ctx;
+       int ret;
+
+       memset(&ctx, 0x0, sizeof(ctx));
+       ret = ena_get_dev_stats(ena_dev, &ctx, ENA_ADMIN_GET_STATS_TYPE_BASIC);
+       if (likely(ret == 0))
+               memcpy(stats, &ctx.get_resp.basic_stats,
+                      sizeof(ctx.get_resp.basic_stats));
+
+       return ret;
+}
+
+int ena_com_set_dev_mtu(struct ena_com_dev *ena_dev, int mtu)
+{
+       struct ena_com_admin_queue *admin_queue;
+       struct ena_admin_set_feat_cmd cmd;
+       struct ena_admin_set_feat_resp resp;
+       int ret;
+
+       if (!ena_com_check_supported_feature_id(ena_dev, ENA_ADMIN_MTU)) {
+               pr_info("Feature %d isn't supported\n", ENA_ADMIN_MTU);
+               return -EPERM;
+       }
+
+       memset(&cmd, 0x0, sizeof(cmd));
+       admin_queue = &ena_dev->admin_queue;
+
+       cmd.aq_common_descriptor.opcode = ENA_ADMIN_SET_FEATURE;
+       cmd.aq_common_descriptor.flags = 0;
+       cmd.feat_common.feature_id = ENA_ADMIN_MTU;
+       cmd.u.mtu.mtu = mtu;
+
+       ret = ena_com_execute_admin_command(admin_queue,
+                                           (struct ena_admin_aq_entry *)&cmd,
+                                           sizeof(cmd),
+                                           (struct ena_admin_acq_entry *)&resp,
+                                           sizeof(resp));
+
+       if (unlikely(ret))
+               pr_err("Failed to set mtu %d. error: %d\n", mtu, ret);
+
+       return ret;
+}
+
+int ena_com_get_offload_settings(struct ena_com_dev *ena_dev,
+                                struct ena_admin_feature_offload_desc *offload)
+{
+       int ret;
+       struct ena_admin_get_feat_resp resp;
+
+       ret = ena_com_get_feature(ena_dev, &resp,
+                                 ENA_ADMIN_STATELESS_OFFLOAD_CONFIG);
+       if (unlikely(ret)) {
+               pr_err("Failed to get offload capabilities %d\n", ret);
+               return ret;
+       }
+
+       memcpy(offload, &resp.u.offload, sizeof(resp.u.offload));
+
+       return 0;
+}
+
+int ena_com_set_hash_function(struct ena_com_dev *ena_dev)
+{
+       struct ena_com_admin_queue *admin_queue = &ena_dev->admin_queue;
+       struct ena_rss *rss = &ena_dev->rss;
+       struct ena_admin_set_feat_cmd cmd;
+       struct ena_admin_set_feat_resp resp;
+       struct ena_admin_get_feat_resp get_resp;
+       int ret;
+
+       if (!ena_com_check_supported_feature_id(ena_dev,
+                                               ENA_ADMIN_RSS_HASH_FUNCTION)) {
+               pr_info("Feature %d isn't supported\n",
+                       ENA_ADMIN_RSS_HASH_FUNCTION);
+               return -EPERM;
+       }
+
+       /* Validate hash function is supported */
+       ret = ena_com_get_feature(ena_dev, &get_resp,
+                                 ENA_ADMIN_RSS_HASH_FUNCTION);
+       if (unlikely(ret))
+               return ret;
+
+       if (get_resp.u.flow_hash_func.supported_func & (1 << rss->hash_func)) {
+               pr_err("Func hash %d isn't supported by device, abort\n",
+                      rss->hash_func);
+               return -EPERM;
+       }
+
+       memset(&cmd, 0x0, sizeof(cmd));
+
+       cmd.aq_common_descriptor.opcode = ENA_ADMIN_SET_FEATURE;
+       cmd.aq_common_descriptor.flags =
+               ENA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_INDIRECT_MASK;
+       cmd.feat_common.feature_id = ENA_ADMIN_RSS_HASH_FUNCTION;
+       cmd.u.flow_hash_func.init_val = rss->hash_init_val;
+       cmd.u.flow_hash_func.selected_func = 1 << rss->hash_func;
+
+       ret = ena_com_mem_addr_set(ena_dev,
+                                  &cmd.control_buffer.address,
+                                  rss->hash_key_dma_addr);
+       if (unlikely(ret)) {
+               pr_err("memory address set failed\n");
+               return ret;
+       }
+
+       cmd.control_buffer.length = sizeof(*rss->hash_key);
+
+       ret = ena_com_execute_admin_command(admin_queue,
+                                           (struct ena_admin_aq_entry *)&cmd,
+                                           sizeof(cmd),
+                                           (struct ena_admin_acq_entry *)&resp,
+                                           sizeof(resp));
+       if (unlikely(ret)) {
+               pr_err("Failed to set hash function %d. error: %d\n",
+                      rss->hash_func, ret);
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+int ena_com_fill_hash_function(struct ena_com_dev *ena_dev,
+                              enum ena_admin_hash_functions func,
+                              const u8 *key, u16 key_len, u32 init_val)
+{
+       struct ena_rss *rss = &ena_dev->rss;
+       struct ena_admin_get_feat_resp get_resp;
+       struct ena_admin_feature_rss_flow_hash_control *hash_key =
+               rss->hash_key;
+       int rc;
+
+       /* Make sure size is a mult of DWs */
+       if (unlikely(key_len & 0x3))
+               return -EINVAL;
+
+       rc = ena_com_get_feature_ex(ena_dev, &get_resp,
+                                   ENA_ADMIN_RSS_HASH_FUNCTION,
+                                   rss->hash_key_dma_addr,
+                                   sizeof(*rss->hash_key));
+       if (unlikely(rc))
+               return rc;
+
+       if (!((1 << func) & get_resp.u.flow_hash_func.supported_func)) {
+               pr_err("Flow hash function %d isn't supported\n", func);
+               return -EPERM;
+       }
+
+       switch (func) {
+       case ENA_ADMIN_TOEPLITZ:
+               if (key_len > sizeof(hash_key->key)) {
+                       pr_err("key len (%hu) is bigger than the max supported (%zu)\n",
+                              key_len, sizeof(hash_key->key));
+                       return -EINVAL;
+               }
+
+               memcpy(hash_key->key, key, key_len);
+               rss->hash_init_val = init_val;
+               hash_key->keys_num = key_len >> 2;
+               break;
+       case ENA_ADMIN_CRC32:
+               rss->hash_init_val = init_val;
+               break;
+       default:
+               pr_err("Invalid hash function (%d)\n", func);
+               return -EINVAL;
+       }
+
+       rc = ena_com_set_hash_function(ena_dev);
+
+       /* Restore the old function */
+       if (unlikely(rc))
+               ena_com_get_hash_function(ena_dev, NULL, NULL);
+
+       return rc;
+}
+
+int ena_com_get_hash_function(struct ena_com_dev *ena_dev,
+                             enum ena_admin_hash_functions *func,
+                             u8 *key)
+{
+       struct ena_rss *rss = &ena_dev->rss;
+       struct ena_admin_get_feat_resp get_resp;
+       struct ena_admin_feature_rss_flow_hash_control *hash_key =
+               rss->hash_key;
+       int rc;
+
+       rc = ena_com_get_feature_ex(ena_dev, &get_resp,
+                                   ENA_ADMIN_RSS_HASH_FUNCTION,
+                                   rss->hash_key_dma_addr,
+                                   sizeof(*rss->hash_key));
+       if (unlikely(rc))
+               return rc;
+
+       rss->hash_func = get_resp.u.flow_hash_func.selected_func;
+       if (func)
+               *func = rss->hash_func;
+
+       if (key)
+               memcpy(key, hash_key->key, (size_t)(hash_key->keys_num) << 2);
+
+       return 0;
+}
+
+int ena_com_get_hash_ctrl(struct ena_com_dev *ena_dev,
+                         enum ena_admin_flow_hash_proto proto,
+                         u16 *fields)
+{
+       struct ena_rss *rss = &ena_dev->rss;
+       struct ena_admin_get_feat_resp get_resp;
+       int rc;
+
+       rc = ena_com_get_feature_ex(ena_dev, &get_resp,
+                                   ENA_ADMIN_RSS_HASH_INPUT,
+                                   rss->hash_ctrl_dma_addr,
+                                   sizeof(*rss->hash_ctrl));
+       if (unlikely(rc))
+               return rc;
+
+       if (fields)
+               *fields = rss->hash_ctrl->selected_fields[proto].fields;
+
+       return 0;
+}
+
+int ena_com_set_hash_ctrl(struct ena_com_dev *ena_dev)
+{
+       struct ena_com_admin_queue *admin_queue = &ena_dev->admin_queue;
+       struct ena_rss *rss = &ena_dev->rss;
+       struct ena_admin_feature_rss_hash_control *hash_ctrl = rss->hash_ctrl;
+       struct ena_admin_set_feat_cmd cmd;
+       struct ena_admin_set_feat_resp resp;
+       int ret;
+
+       if (!ena_com_check_supported_feature_id(ena_dev,
+                                               ENA_ADMIN_RSS_HASH_INPUT)) {
+               pr_info("Feature %d isn't supported\n", ENA_ADMIN_RSS_HASH_INPUT);
+               return -EPERM;
+       }
+
+       memset(&cmd, 0x0, sizeof(cmd));
+
+       cmd.aq_common_descriptor.opcode = ENA_ADMIN_SET_FEATURE;
+       cmd.aq_common_descriptor.flags =
+               ENA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_INDIRECT_MASK;
+       cmd.feat_common.feature_id = ENA_ADMIN_RSS_HASH_INPUT;
+       cmd.u.flow_hash_input.enabled_input_sort =
+               ENA_ADMIN_FEATURE_RSS_FLOW_HASH_INPUT_L3_SORT_MASK |
+               ENA_ADMIN_FEATURE_RSS_FLOW_HASH_INPUT_L4_SORT_MASK;
+
+       ret = ena_com_mem_addr_set(ena_dev,
+                                  &cmd.control_buffer.address,
+                                  rss->hash_ctrl_dma_addr);
+       if (unlikely(ret)) {
+               pr_err("memory address set failed\n");
+               return ret;
+       }
+       cmd.control_buffer.length = sizeof(*hash_ctrl);
+
+       ret = ena_com_execute_admin_command(admin_queue,
+                                           (struct ena_admin_aq_entry *)&cmd,
+                                           sizeof(cmd),
+                                           (struct ena_admin_acq_entry *)&resp,
+                                           sizeof(resp));
+       if (unlikely(ret))
+               pr_err("Failed to set hash input. error: %d\n", ret);
+
+       return ret;
+}
+
+int ena_com_set_default_hash_ctrl(struct ena_com_dev *ena_dev)
+{
+       struct ena_rss *rss = &ena_dev->rss;
+       struct ena_admin_feature_rss_hash_control *hash_ctrl =
+               rss->hash_ctrl;
+       u16 available_fields = 0;
+       int rc, i;
+
+       /* Get the supported hash input */
+       rc = ena_com_get_hash_ctrl(ena_dev, 0, NULL);
+       if (unlikely(rc))
+               return rc;
+
+       hash_ctrl->selected_fields[ENA_ADMIN_RSS_TCP4].fields =
+               ENA_ADMIN_RSS_L3_SA | ENA_ADMIN_RSS_L3_DA |
+               ENA_ADMIN_RSS_L4_DP | ENA_ADMIN_RSS_L4_SP;
+
+       hash_ctrl->selected_fields[ENA_ADMIN_RSS_UDP4].fields =
+               ENA_ADMIN_RSS_L3_SA | ENA_ADMIN_RSS_L3_DA |
+               ENA_ADMIN_RSS_L4_DP | ENA_ADMIN_RSS_L4_SP;
+
+       hash_ctrl->selected_fields[ENA_ADMIN_RSS_TCP6].fields =
+               ENA_ADMIN_RSS_L3_SA | ENA_ADMIN_RSS_L3_DA |
+               ENA_ADMIN_RSS_L4_DP | ENA_ADMIN_RSS_L4_SP;
+
+       hash_ctrl->selected_fields[ENA_ADMIN_RSS_UDP6].fields =
+               ENA_ADMIN_RSS_L3_SA | ENA_ADMIN_RSS_L3_DA |
+               ENA_ADMIN_RSS_L4_DP | ENA_ADMIN_RSS_L4_SP;
+
+       hash_ctrl->selected_fields[ENA_ADMIN_RSS_IP4].fields =
+               ENA_ADMIN_RSS_L3_SA | ENA_ADMIN_RSS_L3_DA;
+
+       hash_ctrl->selected_fields[ENA_ADMIN_RSS_IP6].fields =
+               ENA_ADMIN_RSS_L3_SA | ENA_ADMIN_RSS_L3_DA;
+
+       hash_ctrl->selected_fields[ENA_ADMIN_RSS_IP4_FRAG].fields =
+               ENA_ADMIN_RSS_L3_SA | ENA_ADMIN_RSS_L3_DA;
+
+       hash_ctrl->selected_fields[ENA_ADMIN_RSS_IP4_FRAG].fields =
+               ENA_ADMIN_RSS_L2_DA | ENA_ADMIN_RSS_L2_SA;
+
+       for (i = 0; i < ENA_ADMIN_RSS_PROTO_NUM; i++) {
+               available_fields = hash_ctrl->selected_fields[i].fields &
+                               hash_ctrl->supported_fields[i].fields;
+               if (available_fields != hash_ctrl->selected_fields[i].fields) {
+                       pr_err("hash control doesn't support all the desire configuration. proto %x supported %x selected %x\n",
+                              i, hash_ctrl->supported_fields[i].fields,
+                              hash_ctrl->selected_fields[i].fields);
+                       return -EPERM;
+               }
+       }
+
+       rc = ena_com_set_hash_ctrl(ena_dev);
+
+       /* In case of failure, restore the old hash ctrl */
+       if (unlikely(rc))
+               ena_com_get_hash_ctrl(ena_dev, 0, NULL);
+
+       return rc;
+}
+
+int ena_com_fill_hash_ctrl(struct ena_com_dev *ena_dev,
+                          enum ena_admin_flow_hash_proto proto,
+                          u16 hash_fields)
+{
+       struct ena_rss *rss = &ena_dev->rss;
+       struct ena_admin_feature_rss_hash_control *hash_ctrl = rss->hash_ctrl;
+       u16 supported_fields;
+       int rc;
+
+       if (proto >= ENA_ADMIN_RSS_PROTO_NUM) {
+               pr_err("Invalid proto num (%u)\n", proto);
+               return -EINVAL;
+       }
+
+       /* Get the ctrl table */
+       rc = ena_com_get_hash_ctrl(ena_dev, proto, NULL);
+       if (unlikely(rc))
+               return rc;
+
+       /* Make sure all the fields are supported */
+       supported_fields = hash_ctrl->supported_fields[proto].fields;
+       if ((hash_fields & supported_fields) != hash_fields) {
+               pr_err("proto %d doesn't support the required fields %x. supports only: %x\n",
+                      proto, hash_fields, supported_fields);
+       }
+
+       hash_ctrl->selected_fields[proto].fields = hash_fields;
+
+       rc = ena_com_set_hash_ctrl(ena_dev);
+
+       /* In case of failure, restore the old hash ctrl */
+       if (unlikely(rc))
+               ena_com_get_hash_ctrl(ena_dev, 0, NULL);
+
+       return 0;
+}
+
+int ena_com_indirect_table_fill_entry(struct ena_com_dev *ena_dev,
+                                     u16 entry_idx, u16 entry_value)
+{
+       struct ena_rss *rss = &ena_dev->rss;
+
+       if (unlikely(entry_idx >= (1 << rss->tbl_log_size)))
+               return -EINVAL;
+
+       if (unlikely((entry_value > ENA_TOTAL_NUM_QUEUES)))
+               return -EINVAL;
+
+       rss->host_rss_ind_tbl[entry_idx] = entry_value;
+
+       return 0;
+}
+
+int ena_com_indirect_table_set(struct ena_com_dev *ena_dev)
+{
+       struct ena_com_admin_queue *admin_queue = &ena_dev->admin_queue;
+       struct ena_rss *rss = &ena_dev->rss;
+       struct ena_admin_set_feat_cmd cmd;
+       struct ena_admin_set_feat_resp resp;
+       int ret;
+
+       if (!ena_com_check_supported_feature_id(
+                   ena_dev, ENA_ADMIN_RSS_REDIRECTION_TABLE_CONFIG)) {
+               pr_info("Feature %d isn't supported\n",
+                       ENA_ADMIN_RSS_REDIRECTION_TABLE_CONFIG);
+               return -EPERM;
+       }
+
+       ret = ena_com_ind_tbl_convert_to_device(ena_dev);
+       if (ret) {
+               pr_err("Failed to convert host indirection table to device table\n");
+               return ret;
+       }
+
+       memset(&cmd, 0x0, sizeof(cmd));
+
+       cmd.aq_common_descriptor.opcode = ENA_ADMIN_SET_FEATURE;
+       cmd.aq_common_descriptor.flags =
+               ENA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_INDIRECT_MASK;
+       cmd.feat_common.feature_id = ENA_ADMIN_RSS_REDIRECTION_TABLE_CONFIG;
+       cmd.u.ind_table.size = rss->tbl_log_size;
+       cmd.u.ind_table.inline_index = 0xFFFFFFFF;
+
+       ret = ena_com_mem_addr_set(ena_dev,
+                                  &cmd.control_buffer.address,
+                                  rss->rss_ind_tbl_dma_addr);
+       if (unlikely(ret)) {
+               pr_err("memory address set failed\n");
+               return ret;
+       }
+
+       cmd.control_buffer.length = (1ULL << rss->tbl_log_size) *
+               sizeof(struct ena_admin_rss_ind_table_entry);
+
+       ret = ena_com_execute_admin_command(admin_queue,
+                                           (struct ena_admin_aq_entry *)&cmd,
+                                           sizeof(cmd),
+                                           (struct ena_admin_acq_entry *)&resp,
+                                           sizeof(resp));
+
+       if (unlikely(ret))
+               pr_err("Failed to set indirect table. error: %d\n", ret);
+
+       return ret;
+}
+
+int ena_com_indirect_table_get(struct ena_com_dev *ena_dev, u32 *ind_tbl)
+{
+       struct ena_rss *rss = &ena_dev->rss;
+       struct ena_admin_get_feat_resp get_resp;
+       u32 tbl_size;
+       int i, rc;
+
+       tbl_size = (1ULL << rss->tbl_log_size) *
+               sizeof(struct ena_admin_rss_ind_table_entry);
+
+       rc = ena_com_get_feature_ex(ena_dev, &get_resp,
+                                   ENA_ADMIN_RSS_REDIRECTION_TABLE_CONFIG,
+                                   rss->rss_ind_tbl_dma_addr,
+                                   tbl_size);
+       if (unlikely(rc))
+               return rc;
+
+       if (!ind_tbl)
+               return 0;
+
+       rc = ena_com_ind_tbl_convert_from_device(ena_dev);
+       if (unlikely(rc))
+               return rc;
+
+       for (i = 0; i < (1 << rss->tbl_log_size); i++)
+               ind_tbl[i] = rss->host_rss_ind_tbl[i];
+
+       return 0;
+}
+
+int ena_com_rss_init(struct ena_com_dev *ena_dev, u16 indr_tbl_log_size)
+{
+       int rc;
+
+       memset(&ena_dev->rss, 0x0, sizeof(ena_dev->rss));
+
+       rc = ena_com_indirect_table_allocate(ena_dev, indr_tbl_log_size);
+       if (unlikely(rc))
+               goto err_indr_tbl;
+
+       rc = ena_com_hash_key_allocate(ena_dev);
+       if (unlikely(rc))
+               goto err_hash_key;
+
+       rc = ena_com_hash_ctrl_init(ena_dev);
+       if (unlikely(rc))
+               goto err_hash_ctrl;
+
+       return 0;
+
+err_hash_ctrl:
+       ena_com_hash_key_destroy(ena_dev);
+err_hash_key:
+       ena_com_indirect_table_destroy(ena_dev);
+err_indr_tbl:
+
+       return rc;
+}
+
+void ena_com_rss_destroy(struct ena_com_dev *ena_dev)
+{
+       ena_com_indirect_table_destroy(ena_dev);
+       ena_com_hash_key_destroy(ena_dev);
+       ena_com_hash_ctrl_destroy(ena_dev);
+
+       memset(&ena_dev->rss, 0x0, sizeof(ena_dev->rss));
+}
+
+int ena_com_allocate_host_info(struct ena_com_dev *ena_dev)
+{
+       struct ena_host_attribute *host_attr = &ena_dev->host_attr;
+
+       host_attr->host_info =
+               dma_zalloc_coherent(ena_dev->dmadev, SZ_4K,
+                                   &host_attr->host_info_dma_addr, GFP_KERNEL);
+       if (unlikely(!host_attr->host_info))
+               return -ENOMEM;
+
+       return 0;
+}
+
+int ena_com_allocate_debug_area(struct ena_com_dev *ena_dev,
+                               u32 debug_area_size)
+{
+       struct ena_host_attribute *host_attr = &ena_dev->host_attr;
+
+       host_attr->debug_area_virt_addr =
+               dma_zalloc_coherent(ena_dev->dmadev, debug_area_size,
+                                   &host_attr->debug_area_dma_addr, GFP_KERNEL);
+       if (unlikely(!host_attr->debug_area_virt_addr)) {
+               host_attr->debug_area_size = 0;
+               return -ENOMEM;
+       }
+
+       host_attr->debug_area_size = debug_area_size;
+
+       return 0;
+}
+
+void ena_com_delete_host_info(struct ena_com_dev *ena_dev)
+{
+       struct ena_host_attribute *host_attr = &ena_dev->host_attr;
+
+       if (host_attr->host_info) {
+               dma_free_coherent(ena_dev->dmadev, SZ_4K, host_attr->host_info,
+                                 host_attr->host_info_dma_addr);
+               host_attr->host_info = NULL;
+       }
+}
+
+void ena_com_delete_debug_area(struct ena_com_dev *ena_dev)
+{
+       struct ena_host_attribute *host_attr = &ena_dev->host_attr;
+
+       if (host_attr->debug_area_virt_addr) {
+               dma_free_coherent(ena_dev->dmadev, host_attr->debug_area_size,
+                                 host_attr->debug_area_virt_addr,
+                                 host_attr->debug_area_dma_addr);
+               host_attr->debug_area_virt_addr = NULL;
+       }
+}
+
+int ena_com_set_host_attributes(struct ena_com_dev *ena_dev)
+{
+       struct ena_host_attribute *host_attr = &ena_dev->host_attr;
+       struct ena_com_admin_queue *admin_queue;
+       struct ena_admin_set_feat_cmd cmd;
+       struct ena_admin_set_feat_resp resp;
+
+       int ret;
+
+       if (!ena_com_check_supported_feature_id(ena_dev,
+                                               ENA_ADMIN_HOST_ATTR_CONFIG)) {
+               pr_warn("Set host attribute isn't supported\n");
+               return -EPERM;
+       }
+
+       memset(&cmd, 0x0, sizeof(cmd));
+       admin_queue = &ena_dev->admin_queue;
+
+       cmd.aq_common_descriptor.opcode = ENA_ADMIN_SET_FEATURE;
+       cmd.feat_common.feature_id = ENA_ADMIN_HOST_ATTR_CONFIG;
+
+       ret = ena_com_mem_addr_set(ena_dev,
+                                  &cmd.u.host_attr.debug_ba,
+                                  host_attr->debug_area_dma_addr);
+       if (unlikely(ret)) {
+               pr_err("memory address set failed\n");
+               return ret;
+       }
+
+       ret = ena_com_mem_addr_set(ena_dev,
+                                  &cmd.u.host_attr.os_info_ba,
+                                  host_attr->host_info_dma_addr);
+       if (unlikely(ret)) {
+               pr_err("memory address set failed\n");
+               return ret;
+       }
+
+       cmd.u.host_attr.debug_area_size = host_attr->debug_area_size;
+
+       ret = ena_com_execute_admin_command(admin_queue,
+                                           (struct ena_admin_aq_entry *)&cmd,
+                                           sizeof(cmd),
+                                           (struct ena_admin_acq_entry *)&resp,
+                                           sizeof(resp));
+
+       if (unlikely(ret))
+               pr_err("Failed to set host attributes: %d\n", ret);
+
+       return ret;
+}
+
+/* Interrupt moderation */
+bool ena_com_interrupt_moderation_supported(struct ena_com_dev *ena_dev)
+{
+       return ena_com_check_supported_feature_id(ena_dev,
+                                                 ENA_ADMIN_INTERRUPT_MODERATION);
+}
+
+int ena_com_update_nonadaptive_moderation_interval_tx(struct ena_com_dev *ena_dev,
+                                                     u32 tx_coalesce_usecs)
+{
+       if (!ena_dev->intr_delay_resolution) {
+               pr_err("Illegal interrupt delay granularity value\n");
+               return -EFAULT;
+       }
+
+       ena_dev->intr_moder_tx_interval = tx_coalesce_usecs /
+               ena_dev->intr_delay_resolution;
+
+       return 0;
+}
+
+int ena_com_update_nonadaptive_moderation_interval_rx(struct ena_com_dev *ena_dev,
+                                                     u32 rx_coalesce_usecs)
+{
+       if (!ena_dev->intr_delay_resolution) {
+               pr_err("Illegal interrupt delay granularity value\n");
+               return -EFAULT;
+       }
+
+       /* We use LOWEST entry of moderation table for storing
+        * nonadaptive interrupt coalescing values
+        */
+       ena_dev->intr_moder_tbl[ENA_INTR_MODER_LOWEST].intr_moder_interval =
+               rx_coalesce_usecs / ena_dev->intr_delay_resolution;
+
+       return 0;
+}
+
+void ena_com_destroy_interrupt_moderation(struct ena_com_dev *ena_dev)
+{
+       if (ena_dev->intr_moder_tbl)
+               devm_kfree(ena_dev->dmadev, ena_dev->intr_moder_tbl);
+       ena_dev->intr_moder_tbl = NULL;
+}
+
+int ena_com_init_interrupt_moderation(struct ena_com_dev *ena_dev)
+{
+       struct ena_admin_get_feat_resp get_resp;
+       u16 delay_resolution;
+       int rc;
+
+       rc = ena_com_get_feature(ena_dev, &get_resp,
+                                ENA_ADMIN_INTERRUPT_MODERATION);
+
+       if (rc) {
+               if (rc == -EPERM) {
+                       pr_info("Feature %d isn't supported\n",
+                               ENA_ADMIN_INTERRUPT_MODERATION);
+                       rc = 0;
+               } else {
+                       pr_err("Failed to get interrupt moderation admin cmd. rc: %d\n",
+                              rc);
+               }
+
+               /* no moderation supported, disable adaptive support */
+               ena_com_disable_adaptive_moderation(ena_dev);
+               return rc;
+       }
+
+       rc = ena_com_init_interrupt_moderation_table(ena_dev);
+       if (rc)
+               goto err;
+
+       /* if moderation is supported by device we set adaptive moderation */
+       delay_resolution = get_resp.u.intr_moderation.intr_delay_resolution;
+       ena_com_update_intr_delay_resolution(ena_dev, delay_resolution);
+       ena_com_enable_adaptive_moderation(ena_dev);
+
+       return 0;
+err:
+       ena_com_destroy_interrupt_moderation(ena_dev);
+       return rc;
+}
+
+void ena_com_config_default_interrupt_moderation_table(struct ena_com_dev *ena_dev)
+{
+       struct ena_intr_moder_entry *intr_moder_tbl = ena_dev->intr_moder_tbl;
+
+       if (!intr_moder_tbl)
+               return;
+
+       intr_moder_tbl[ENA_INTR_MODER_LOWEST].intr_moder_interval =
+               ENA_INTR_LOWEST_USECS;
+       intr_moder_tbl[ENA_INTR_MODER_LOWEST].pkts_per_interval =
+               ENA_INTR_LOWEST_PKTS;
+       intr_moder_tbl[ENA_INTR_MODER_LOWEST].bytes_per_interval =
+               ENA_INTR_LOWEST_BYTES;
+
+       intr_moder_tbl[ENA_INTR_MODER_LOW].intr_moder_interval =
+               ENA_INTR_LOW_USECS;
+       intr_moder_tbl[ENA_INTR_MODER_LOW].pkts_per_interval =
+               ENA_INTR_LOW_PKTS;
+       intr_moder_tbl[ENA_INTR_MODER_LOW].bytes_per_interval =
+               ENA_INTR_LOW_BYTES;
+
+       intr_moder_tbl[ENA_INTR_MODER_MID].intr_moder_interval =
+               ENA_INTR_MID_USECS;
+       intr_moder_tbl[ENA_INTR_MODER_MID].pkts_per_interval =
+               ENA_INTR_MID_PKTS;
+       intr_moder_tbl[ENA_INTR_MODER_MID].bytes_per_interval =
+               ENA_INTR_MID_BYTES;
+
+       intr_moder_tbl[ENA_INTR_MODER_HIGH].intr_moder_interval =
+               ENA_INTR_HIGH_USECS;
+       intr_moder_tbl[ENA_INTR_MODER_HIGH].pkts_per_interval =
+               ENA_INTR_HIGH_PKTS;
+       intr_moder_tbl[ENA_INTR_MODER_HIGH].bytes_per_interval =
+               ENA_INTR_HIGH_BYTES;
+
+       intr_moder_tbl[ENA_INTR_MODER_HIGHEST].intr_moder_interval =
+               ENA_INTR_HIGHEST_USECS;
+       intr_moder_tbl[ENA_INTR_MODER_HIGHEST].pkts_per_interval =
+               ENA_INTR_HIGHEST_PKTS;
+       intr_moder_tbl[ENA_INTR_MODER_HIGHEST].bytes_per_interval =
+               ENA_INTR_HIGHEST_BYTES;
+}
+
+unsigned int ena_com_get_nonadaptive_moderation_interval_tx(struct ena_com_dev *ena_dev)
+{
+       return ena_dev->intr_moder_tx_interval;
+}
+
+unsigned int ena_com_get_nonadaptive_moderation_interval_rx(struct ena_com_dev *ena_dev)
+{
+       struct ena_intr_moder_entry *intr_moder_tbl = ena_dev->intr_moder_tbl;
+
+       if (intr_moder_tbl)
+               return intr_moder_tbl[ENA_INTR_MODER_LOWEST].intr_moder_interval;
+
+       return 0;
+}
+
+void ena_com_init_intr_moderation_entry(struct ena_com_dev *ena_dev,
+                                       enum ena_intr_moder_level level,
+                                       struct ena_intr_moder_entry *entry)
+{
+       struct ena_intr_moder_entry *intr_moder_tbl = ena_dev->intr_moder_tbl;
+
+       if (level >= ENA_INTR_MAX_NUM_OF_LEVELS)
+               return;
+
+       intr_moder_tbl[level].intr_moder_interval = entry->intr_moder_interval;
+       if (ena_dev->intr_delay_resolution)
+               intr_moder_tbl[level].intr_moder_interval /=
+                       ena_dev->intr_delay_resolution;
+       intr_moder_tbl[level].pkts_per_interval = entry->pkts_per_interval;
+
+       /* use hardcoded value until ethtool supports bytecount parameter */
+       if (entry->bytes_per_interval != ENA_INTR_BYTE_COUNT_NOT_SUPPORTED)
+               intr_moder_tbl[level].bytes_per_interval = entry->bytes_per_interval;
+}
+
+void ena_com_get_intr_moderation_entry(struct ena_com_dev *ena_dev,
+                                      enum ena_intr_moder_level level,
+                                      struct ena_intr_moder_entry *entry)
+{
+       struct ena_intr_moder_entry *intr_moder_tbl = ena_dev->intr_moder_tbl;
+
+       if (level >= ENA_INTR_MAX_NUM_OF_LEVELS)
+               return;
+
+       entry->intr_moder_interval = intr_moder_tbl[level].intr_moder_interval;
+       if (ena_dev->intr_delay_resolution)
+               entry->intr_moder_interval *= ena_dev->intr_delay_resolution;
+       entry->pkts_per_interval =
+       intr_moder_tbl[level].pkts_per_interval;
+       entry->bytes_per_interval = intr_moder_tbl[level].bytes_per_interval;
+}
diff --git a/drivers/net/ethernet/amazon/ena/ena_com.h b/drivers/net/ethernet/amazon/ena/ena_com.h
new file mode 100644 (file)
index 0000000..509d7b8
--- /dev/null
@@ -0,0 +1,1038 @@
+/*
+ * Copyright 2015 Amazon.com, Inc. or its affiliates.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef ENA_COM
+#define ENA_COM
+
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/gfp.h>
+#include <linux/sched.h>
+#include <linux/sizes.h>
+#include <linux/spinlock.h>
+#include <linux/types.h>
+#include <linux/wait.h>
+
+#include "ena_common_defs.h"
+#include "ena_admin_defs.h"
+#include "ena_eth_io_defs.h"
+#include "ena_regs_defs.h"
+
+#undef pr_fmt
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#define ENA_MAX_NUM_IO_QUEUES          128U
+/* We need to queues for each IO (on for Tx and one for Rx) */
+#define ENA_TOTAL_NUM_QUEUES           (2 * (ENA_MAX_NUM_IO_QUEUES))
+
+#define ENA_MAX_HANDLERS 256
+
+#define ENA_MAX_PHYS_ADDR_SIZE_BITS 48
+
+/* Unit in usec */
+#define ENA_REG_READ_TIMEOUT 200000
+
+#define ADMIN_SQ_SIZE(depth)   ((depth) * sizeof(struct ena_admin_aq_entry))
+#define ADMIN_CQ_SIZE(depth)   ((depth) * sizeof(struct ena_admin_acq_entry))
+#define ADMIN_AENQ_SIZE(depth) ((depth) * sizeof(struct ena_admin_aenq_entry))
+
+/*****************************************************************************/
+/*****************************************************************************/
+/* ENA adaptive interrupt moderation settings */
+
+#define ENA_INTR_LOWEST_USECS           (0)
+#define ENA_INTR_LOWEST_PKTS            (3)
+#define ENA_INTR_LOWEST_BYTES           (2 * 1524)
+
+#define ENA_INTR_LOW_USECS              (32)
+#define ENA_INTR_LOW_PKTS               (12)
+#define ENA_INTR_LOW_BYTES              (16 * 1024)
+
+#define ENA_INTR_MID_USECS              (80)
+#define ENA_INTR_MID_PKTS               (48)
+#define ENA_INTR_MID_BYTES              (64 * 1024)
+
+#define ENA_INTR_HIGH_USECS             (128)
+#define ENA_INTR_HIGH_PKTS              (96)
+#define ENA_INTR_HIGH_BYTES             (128 * 1024)
+
+#define ENA_INTR_HIGHEST_USECS          (192)
+#define ENA_INTR_HIGHEST_PKTS           (128)
+#define ENA_INTR_HIGHEST_BYTES          (192 * 1024)
+
+#define ENA_INTR_INITIAL_TX_INTERVAL_USECS             196
+#define ENA_INTR_INITIAL_RX_INTERVAL_USECS             4
+#define ENA_INTR_DELAY_OLD_VALUE_WEIGHT                        6
+#define ENA_INTR_DELAY_NEW_VALUE_WEIGHT                        4
+#define ENA_INTR_MODER_LEVEL_STRIDE                    2
+#define ENA_INTR_BYTE_COUNT_NOT_SUPPORTED              0xFFFFFF
+
+enum ena_intr_moder_level {
+       ENA_INTR_MODER_LOWEST = 0,
+       ENA_INTR_MODER_LOW,
+       ENA_INTR_MODER_MID,
+       ENA_INTR_MODER_HIGH,
+       ENA_INTR_MODER_HIGHEST,
+       ENA_INTR_MAX_NUM_OF_LEVELS,
+};
+
+struct ena_intr_moder_entry {
+       unsigned int intr_moder_interval;
+       unsigned int pkts_per_interval;
+       unsigned int bytes_per_interval;
+};
+
+enum queue_direction {
+       ENA_COM_IO_QUEUE_DIRECTION_TX,
+       ENA_COM_IO_QUEUE_DIRECTION_RX
+};
+
+struct ena_com_buf {
+       dma_addr_t paddr; /**< Buffer physical address */
+       u16 len; /**< Buffer length in bytes */
+};
+
+struct ena_com_rx_buf_info {
+       u16 len;
+       u16 req_id;
+};
+
+struct ena_com_io_desc_addr {
+       u8 __iomem *pbuf_dev_addr; /* LLQ address */
+       u8 *virt_addr;
+       dma_addr_t phys_addr;
+};
+
+struct ena_com_tx_meta {
+       u16 mss;
+       u16 l3_hdr_len;
+       u16 l3_hdr_offset;
+       u16 l4_hdr_len; /* In words */
+};
+
+struct ena_com_io_cq {
+       struct ena_com_io_desc_addr cdesc_addr;
+
+       /* Interrupt unmask register */
+       u32 __iomem *unmask_reg;
+
+       /* The completion queue head doorbell register */
+       u32 __iomem *cq_head_db_reg;
+
+       /* numa configuration register (for TPH) */
+       u32 __iomem *numa_node_cfg_reg;
+
+       /* The value to write to the above register to unmask
+        * the interrupt of this queue
+        */
+       u32 msix_vector;
+
+       enum queue_direction direction;
+
+       /* holds the number of cdesc of the current packet */
+       u16 cur_rx_pkt_cdesc_count;
+       /* save the firt cdesc idx of the current packet */
+       u16 cur_rx_pkt_cdesc_start_idx;
+
+       u16 q_depth;
+       /* Caller qid */
+       u16 qid;
+
+       /* Device queue index */
+       u16 idx;
+       u16 head;
+       u16 last_head_update;
+       u8 phase;
+       u8 cdesc_entry_size_in_bytes;
+
+} ____cacheline_aligned;
+
+struct ena_com_io_sq {
+       struct ena_com_io_desc_addr desc_addr;
+
+       u32 __iomem *db_addr;
+       u8 __iomem *header_addr;
+
+       enum queue_direction direction;
+       enum ena_admin_placement_policy_type mem_queue_type;
+
+       u32 msix_vector;
+       struct ena_com_tx_meta cached_tx_meta;
+
+       u16 q_depth;
+       u16 qid;
+
+       u16 idx;
+       u16 tail;
+       u16 next_to_comp;
+       u32 tx_max_header_size;
+       u8 phase;
+       u8 desc_entry_size;
+       u8 dma_addr_bits;
+} ____cacheline_aligned;
+
+struct ena_com_admin_cq {
+       struct ena_admin_acq_entry *entries;
+       dma_addr_t dma_addr;
+
+       u16 head;
+       u8 phase;
+};
+
+struct ena_com_admin_sq {
+       struct ena_admin_aq_entry *entries;
+       dma_addr_t dma_addr;
+
+       u32 __iomem *db_addr;
+
+       u16 head;
+       u16 tail;
+       u8 phase;
+
+};
+
+struct ena_com_stats_admin {
+       u32 aborted_cmd;
+       u32 submitted_cmd;
+       u32 completed_cmd;
+       u32 out_of_space;
+       u32 no_completion;
+};
+
+struct ena_com_admin_queue {
+       void *q_dmadev;
+       spinlock_t q_lock; /* spinlock for the admin queue */
+       struct ena_comp_ctx *comp_ctx;
+       u16 q_depth;
+       struct ena_com_admin_cq cq;
+       struct ena_com_admin_sq sq;
+
+       /* Indicate if the admin queue should poll for completion */
+       bool polling;
+
+       u16 curr_cmd_id;
+
+       /* Indicate that the ena was initialized and can
+        * process new admin commands
+        */
+       bool running_state;
+
+       /* Count the number of outstanding admin commands */
+       atomic_t outstanding_cmds;
+
+       struct ena_com_stats_admin stats;
+};
+
+struct ena_aenq_handlers;
+
+struct ena_com_aenq {
+       u16 head;
+       u8 phase;
+       struct ena_admin_aenq_entry *entries;
+       dma_addr_t dma_addr;
+       u16 q_depth;
+       struct ena_aenq_handlers *aenq_handlers;
+};
+
+struct ena_com_mmio_read {
+       struct ena_admin_ena_mmio_req_read_less_resp *read_resp;
+       dma_addr_t read_resp_dma_addr;
+       u16 seq_num;
+       bool readless_supported;
+       /* spin lock to ensure a single outstanding read */
+       spinlock_t lock;
+};
+
+struct ena_rss {
+       /* Indirect table */
+       u16 *host_rss_ind_tbl;
+       struct ena_admin_rss_ind_table_entry *rss_ind_tbl;
+       dma_addr_t rss_ind_tbl_dma_addr;
+       u16 tbl_log_size;
+
+       /* Hash key */
+       enum ena_admin_hash_functions hash_func;
+       struct ena_admin_feature_rss_flow_hash_control *hash_key;
+       dma_addr_t hash_key_dma_addr;
+       u32 hash_init_val;
+
+       /* Flow Control */
+       struct ena_admin_feature_rss_hash_control *hash_ctrl;
+       dma_addr_t hash_ctrl_dma_addr;
+
+};
+
+struct ena_host_attribute {
+       /* Debug area */
+       u8 *debug_area_virt_addr;
+       dma_addr_t debug_area_dma_addr;
+       u32 debug_area_size;
+
+       /* Host information */
+       struct ena_admin_host_info *host_info;
+       dma_addr_t host_info_dma_addr;
+};
+
+/* Each ena_dev is a PCI function. */
+struct ena_com_dev {
+       struct ena_com_admin_queue admin_queue;
+       struct ena_com_aenq aenq;
+       struct ena_com_io_cq io_cq_queues[ENA_TOTAL_NUM_QUEUES];
+       struct ena_com_io_sq io_sq_queues[ENA_TOTAL_NUM_QUEUES];
+       u8 __iomem *reg_bar;
+       void __iomem *mem_bar;
+       void *dmadev;
+
+       enum ena_admin_placement_policy_type tx_mem_queue_type;
+       u32 tx_max_header_size;
+       u16 stats_func; /* Selected function for extended statistic dump */
+       u16 stats_queue; /* Selected queue for extended statistic dump */
+
+       struct ena_com_mmio_read mmio_read;
+
+       struct ena_rss rss;
+       u32 supported_features;
+       u32 dma_addr_bits;
+
+       struct ena_host_attribute host_attr;
+       bool adaptive_coalescing;
+       u16 intr_delay_resolution;
+       u32 intr_moder_tx_interval;
+       struct ena_intr_moder_entry *intr_moder_tbl;
+};
+
+struct ena_com_dev_get_features_ctx {
+       struct ena_admin_queue_feature_desc max_queues;
+       struct ena_admin_device_attr_feature_desc dev_attr;
+       struct ena_admin_feature_aenq_desc aenq;
+       struct ena_admin_feature_offload_desc offload;
+};
+
+struct ena_com_create_io_ctx {
+       enum ena_admin_placement_policy_type mem_queue_type;
+       enum queue_direction direction;
+       int numa_node;
+       u32 msix_vector;
+       u16 queue_size;
+       u16 qid;
+};
+
+typedef void (*ena_aenq_handler)(void *data,
+       struct ena_admin_aenq_entry *aenq_e);
+
+/* Holds aenq handlers. Indexed by AENQ event group */
+struct ena_aenq_handlers {
+       ena_aenq_handler handlers[ENA_MAX_HANDLERS];
+       ena_aenq_handler unimplemented_handler;
+};
+
+/*****************************************************************************/
+/*****************************************************************************/
+
+/* ena_com_mmio_reg_read_request_init - Init the mmio reg read mechanism
+ * @ena_dev: ENA communication layer struct
+ *
+ * Initialize the register read mechanism.
+ *
+ * @note: This method must be the first stage in the initialization sequence.
+ *
+ * @return - 0 on success, negative value on failure.
+ */
+int ena_com_mmio_reg_read_request_init(struct ena_com_dev *ena_dev);
+
+/* ena_com_set_mmio_read_mode - Enable/disable the mmio reg read mechanism
+ * @ena_dev: ENA communication layer struct
+ * @readless_supported: readless mode (enable/disable)
+ */
+void ena_com_set_mmio_read_mode(struct ena_com_dev *ena_dev,
+                               bool readless_supported);
+
+/* ena_com_mmio_reg_read_request_write_dev_addr - Write the mmio reg read return
+ * value physical address.
+ * @ena_dev: ENA communication layer struct
+ */
+void ena_com_mmio_reg_read_request_write_dev_addr(struct ena_com_dev *ena_dev);
+
+/* ena_com_mmio_reg_read_request_destroy - Destroy the mmio reg read mechanism
+ * @ena_dev: ENA communication layer struct
+ */
+void ena_com_mmio_reg_read_request_destroy(struct ena_com_dev *ena_dev);
+
+/* ena_com_admin_init - Init the admin and the async queues
+ * @ena_dev: ENA communication layer struct
+ * @aenq_handlers: Those handlers to be called upon event.
+ * @init_spinlock: Indicate if this method should init the admin spinlock or
+ * the spinlock was init before (for example, in a case of FLR).
+ *
+ * Initialize the admin submission and completion queues.
+ * Initialize the asynchronous events notification queues.
+ *
+ * @return - 0 on success, negative value on failure.
+ */
+int ena_com_admin_init(struct ena_com_dev *ena_dev,
+                      struct ena_aenq_handlers *aenq_handlers,
+                      bool init_spinlock);
+
+/* ena_com_admin_destroy - Destroy the admin and the async events queues.
+ * @ena_dev: ENA communication layer struct
+ *
+ * @note: Before calling this method, the caller must validate that the device
+ * won't send any additional admin completions/aenq.
+ * To achieve that, a FLR is recommended.
+ */
+void ena_com_admin_destroy(struct ena_com_dev *ena_dev);
+
+/* ena_com_dev_reset - Perform device FLR to the device.
+ * @ena_dev: ENA communication layer struct
+ *
+ * @return - 0 on success, negative value on failure.
+ */
+int ena_com_dev_reset(struct ena_com_dev *ena_dev);
+
+/* ena_com_create_io_queue - Create io queue.
+ * @ena_dev: ENA communication layer struct
+ * @ctx - create context structure
+ *
+ * Create the submission and the completion queues.
+ *
+ * @return - 0 on success, negative value on failure.
+ */
+int ena_com_create_io_queue(struct ena_com_dev *ena_dev,
+                           struct ena_com_create_io_ctx *ctx);
+
+/* ena_com_destroy_io_queue - Destroy IO queue with the queue id - qid.
+ * @ena_dev: ENA communication layer struct
+ * @qid - the caller virtual queue id.
+ */
+void ena_com_destroy_io_queue(struct ena_com_dev *ena_dev, u16 qid);
+
+/* ena_com_get_io_handlers - Return the io queue handlers
+ * @ena_dev: ENA communication layer struct
+ * @qid - the caller virtual queue id.
+ * @io_sq - IO submission queue handler
+ * @io_cq - IO completion queue handler.
+ *
+ * @return - 0 on success, negative value on failure.
+ */
+int ena_com_get_io_handlers(struct ena_com_dev *ena_dev, u16 qid,
+                           struct ena_com_io_sq **io_sq,
+                           struct ena_com_io_cq **io_cq);
+
+/* ena_com_admin_aenq_enable - ENAble asynchronous event notifications
+ * @ena_dev: ENA communication layer struct
+ *
+ * After this method, aenq event can be received via AENQ.
+ */
+void ena_com_admin_aenq_enable(struct ena_com_dev *ena_dev);
+
+/* ena_com_set_admin_running_state - Set the state of the admin queue
+ * @ena_dev: ENA communication layer struct
+ *
+ * Change the state of the admin queue (enable/disable)
+ */
+void ena_com_set_admin_running_state(struct ena_com_dev *ena_dev, bool state);
+
+/* ena_com_get_admin_running_state - Get the admin queue state
+ * @ena_dev: ENA communication layer struct
+ *
+ * Retrieve the state of the admin queue (enable/disable)
+ *
+ * @return - current polling mode (enable/disable)
+ */
+bool ena_com_get_admin_running_state(struct ena_com_dev *ena_dev);
+
+/* ena_com_set_admin_polling_mode - Set the admin completion queue polling mode
+ * @ena_dev: ENA communication layer struct
+ * @polling: ENAble/Disable polling mode
+ *
+ * Set the admin completion mode.
+ */
+void ena_com_set_admin_polling_mode(struct ena_com_dev *ena_dev, bool polling);
+
+/* ena_com_set_admin_polling_mode - Get the admin completion queue polling mode
+ * @ena_dev: ENA communication layer struct
+ *
+ * Get the admin completion mode.
+ * If polling mode is on, ena_com_execute_admin_command will perform a
+ * polling on the admin completion queue for the commands completion,
+ * otherwise it will wait on wait event.
+ *
+ * @return state
+ */
+bool ena_com_get_ena_admin_polling_mode(struct ena_com_dev *ena_dev);
+
+/* ena_com_admin_q_comp_intr_handler - admin queue interrupt handler
+ * @ena_dev: ENA communication layer struct
+ *
+ * This method go over the admin completion queue and wake up all the pending
+ * threads that wait on the commands wait event.
+ *
+ * @note: Should be called after MSI-X interrupt.
+ */
+void ena_com_admin_q_comp_intr_handler(struct ena_com_dev *ena_dev);
+
+/* ena_com_aenq_intr_handler - AENQ interrupt handler
+ * @ena_dev: ENA communication layer struct
+ *
+ * This method go over the async event notification queue and call the proper
+ * aenq handler.
+ */
+void ena_com_aenq_intr_handler(struct ena_com_dev *dev, void *data);
+
+/* ena_com_abort_admin_commands - Abort all the outstanding admin commands.
+ * @ena_dev: ENA communication layer struct
+ *
+ * This method aborts all the outstanding admin commands.
+ * The caller should then call ena_com_wait_for_abort_completion to make sure
+ * all the commands were completed.
+ */
+void ena_com_abort_admin_commands(struct ena_com_dev *ena_dev);
+
+/* ena_com_wait_for_abort_completion - Wait for admin commands abort.
+ * @ena_dev: ENA communication layer struct
+ *
+ * This method wait until all the outstanding admin commands will be completed.
+ */
+void ena_com_wait_for_abort_completion(struct ena_com_dev *ena_dev);
+
+/* ena_com_validate_version - Validate the device parameters
+ * @ena_dev: ENA communication layer struct
+ *
+ * This method validate the device parameters are the same as the saved
+ * parameters in ena_dev.
+ * This method is useful after device reset, to validate the device mac address
+ * and the device offloads are the same as before the reset.
+ *
+ * @return - 0 on success negative value otherwise.
+ */
+int ena_com_validate_version(struct ena_com_dev *ena_dev);
+
+/* ena_com_get_link_params - Retrieve physical link parameters.
+ * @ena_dev: ENA communication layer struct
+ * @resp: Link parameters
+ *
+ * Retrieve the physical link parameters,
+ * like speed, auto-negotiation and full duplex support.
+ *
+ * @return - 0 on Success negative value otherwise.
+ */
+int ena_com_get_link_params(struct ena_com_dev *ena_dev,
+                           struct ena_admin_get_feat_resp *resp);
+
+/* ena_com_get_dma_width - Retrieve physical dma address width the device
+ * supports.
+ * @ena_dev: ENA communication layer struct
+ *
+ * Retrieve the maximum physical address bits the device can handle.
+ *
+ * @return: > 0 on Success and negative value otherwise.
+ */
+int ena_com_get_dma_width(struct ena_com_dev *ena_dev);
+
+/* ena_com_set_aenq_config - Set aenq groups configurations
+ * @ena_dev: ENA communication layer struct
+ * @groups flag: bit fields flags of enum ena_admin_aenq_group.
+ *
+ * Configure which aenq event group the driver would like to receive.
+ *
+ * @return: 0 on Success and negative value otherwise.
+ */
+int ena_com_set_aenq_config(struct ena_com_dev *ena_dev, u32 groups_flag);
+
+/* ena_com_get_dev_attr_feat - Get device features
+ * @ena_dev: ENA communication layer struct
+ * @get_feat_ctx: returned context that contain the get features.
+ *
+ * @return: 0 on Success and negative value otherwise.
+ */
+int ena_com_get_dev_attr_feat(struct ena_com_dev *ena_dev,
+                             struct ena_com_dev_get_features_ctx *get_feat_ctx);
+
+/* ena_com_get_dev_basic_stats - Get device basic statistics
+ * @ena_dev: ENA communication layer struct
+ * @stats: stats return value
+ *
+ * @return: 0 on Success and negative value otherwise.
+ */
+int ena_com_get_dev_basic_stats(struct ena_com_dev *ena_dev,
+                               struct ena_admin_basic_stats *stats);
+
+/* ena_com_set_dev_mtu - Configure the device mtu.
+ * @ena_dev: ENA communication layer struct
+ * @mtu: mtu value
+ *
+ * @return: 0 on Success and negative value otherwise.
+ */
+int ena_com_set_dev_mtu(struct ena_com_dev *ena_dev, int mtu);
+
+/* ena_com_get_offload_settings - Retrieve the device offloads capabilities
+ * @ena_dev: ENA communication layer struct
+ * @offlad: offload return value
+ *
+ * @return: 0 on Success and negative value otherwise.
+ */
+int ena_com_get_offload_settings(struct ena_com_dev *ena_dev,
+                                struct ena_admin_feature_offload_desc *offload);
+
+/* ena_com_rss_init - Init RSS
+ * @ena_dev: ENA communication layer struct
+ * @log_size: indirection log size
+ *
+ * Allocate RSS/RFS resources.
+ * The caller then can configure rss using ena_com_set_hash_function,
+ * ena_com_set_hash_ctrl and ena_com_indirect_table_set.
+ *
+ * @return: 0 on Success and negative value otherwise.
+ */
+int ena_com_rss_init(struct ena_com_dev *ena_dev, u16 log_size);
+
+/* ena_com_rss_destroy - Destroy rss
+ * @ena_dev: ENA communication layer struct
+ *
+ * Free all the RSS/RFS resources.
+ */
+void ena_com_rss_destroy(struct ena_com_dev *ena_dev);
+
+/* ena_com_fill_hash_function - Fill RSS hash function
+ * @ena_dev: ENA communication layer struct
+ * @func: The hash function (Toeplitz or crc)
+ * @key: Hash key (for toeplitz hash)
+ * @key_len: key length (max length 10 DW)
+ * @init_val: initial value for the hash function
+ *
+ * Fill the ena_dev resources with the desire hash function, hash key, key_len
+ * and key initial value (if needed by the hash function).
+ * To flush the key into the device the caller should call
+ * ena_com_set_hash_function.
+ *
+ * @return: 0 on Success and negative value otherwise.
+ */
+int ena_com_fill_hash_function(struct ena_com_dev *ena_dev,
+                              enum ena_admin_hash_functions func,
+                              const u8 *key, u16 key_len, u32 init_val);
+
+/* ena_com_set_hash_function - Flush the hash function and it dependencies to
+ * the device.
+ * @ena_dev: ENA communication layer struct
+ *
+ * Flush the hash function and it dependencies (key, key length and
+ * initial value) if needed.
+ *
+ * @note: Prior to this method the caller should call ena_com_fill_hash_function
+ *
+ * @return: 0 on Success and negative value otherwise.
+ */
+int ena_com_set_hash_function(struct ena_com_dev *ena_dev);
+
+/* ena_com_get_hash_function - Retrieve the hash function and the hash key
+ * from the device.
+ * @ena_dev: ENA communication layer struct
+ * @func: hash function
+ * @key: hash key
+ *
+ * Retrieve the hash function and the hash key from the device.
+ *
+ * @note: If the caller called ena_com_fill_hash_function but didn't flash
+ * it to the device, the new configuration will be lost.
+ *
+ * @return: 0 on Success and negative value otherwise.
+ */
+int ena_com_get_hash_function(struct ena_com_dev *ena_dev,
+                             enum ena_admin_hash_functions *func,
+                             u8 *key);
+
+/* ena_com_fill_hash_ctrl - Fill RSS hash control
+ * @ena_dev: ENA communication layer struct.
+ * @proto: The protocol to configure.
+ * @hash_fields: bit mask of ena_admin_flow_hash_fields
+ *
+ * Fill the ena_dev resources with the desire hash control (the ethernet
+ * fields that take part of the hash) for a specific protocol.
+ * To flush the hash control to the device, the caller should call
+ * ena_com_set_hash_ctrl.
+ *
+ * @return: 0 on Success and negative value otherwise.
+ */
+int ena_com_fill_hash_ctrl(struct ena_com_dev *ena_dev,
+                          enum ena_admin_flow_hash_proto proto,
+                          u16 hash_fields);
+
+/* ena_com_set_hash_ctrl - Flush the hash control resources to the device.
+ * @ena_dev: ENA communication layer struct
+ *
+ * Flush the hash control (the ethernet fields that take part of the hash)
+ *
+ * @note: Prior to this method the caller should call ena_com_fill_hash_ctrl.
+ *
+ * @return: 0 on Success and negative value otherwise.
+ */
+int ena_com_set_hash_ctrl(struct ena_com_dev *ena_dev);
+
+/* ena_com_get_hash_ctrl - Retrieve the hash control from the device.
+ * @ena_dev: ENA communication layer struct
+ * @proto: The protocol to retrieve.
+ * @fields: bit mask of ena_admin_flow_hash_fields.
+ *
+ * Retrieve the hash control from the device.
+ *
+ * @note, If the caller called ena_com_fill_hash_ctrl but didn't flash
+ * it to the device, the new configuration will be lost.
+ *
+ * @return: 0 on Success and negative value otherwise.
+ */
+int ena_com_get_hash_ctrl(struct ena_com_dev *ena_dev,
+                         enum ena_admin_flow_hash_proto proto,
+                         u16 *fields);
+
+/* ena_com_set_default_hash_ctrl - Set the hash control to a default
+ * configuration.
+ * @ena_dev: ENA communication layer struct
+ *
+ * Fill the ena_dev resources with the default hash control configuration.
+ * To flush the hash control to the device, the caller should call
+ * ena_com_set_hash_ctrl.
+ *
+ * @return: 0 on Success and negative value otherwise.
+ */
+int ena_com_set_default_hash_ctrl(struct ena_com_dev *ena_dev);
+
+/* ena_com_indirect_table_fill_entry - Fill a single entry in the RSS
+ * indirection table
+ * @ena_dev: ENA communication layer struct.
+ * @entry_idx - indirection table entry.
+ * @entry_value - redirection value
+ *
+ * Fill a single entry of the RSS indirection table in the ena_dev resources.
+ * To flush the indirection table to the device, the called should call
+ * ena_com_indirect_table_set.
+ *
+ * @return: 0 on Success and negative value otherwise.
+ */
+int ena_com_indirect_table_fill_entry(struct ena_com_dev *ena_dev,
+                                     u16 entry_idx, u16 entry_value);
+
+/* ena_com_indirect_table_set - Flush the indirection table to the device.
+ * @ena_dev: ENA communication layer struct
+ *
+ * Flush the indirection hash control to the device.
+ * Prior to this method the caller should call ena_com_indirect_table_fill_entry
+ *
+ * @return: 0 on Success and negative value otherwise.
+ */
+int ena_com_indirect_table_set(struct ena_com_dev *ena_dev);
+
+/* ena_com_indirect_table_get - Retrieve the indirection table from the device.
+ * @ena_dev: ENA communication layer struct
+ * @ind_tbl: indirection table
+ *
+ * Retrieve the RSS indirection table from the device.
+ *
+ * @note: If the caller called ena_com_indirect_table_fill_entry but didn't flash
+ * it to the device, the new configuration will be lost.
+ *
+ * @return: 0 on Success and negative value otherwise.
+ */
+int ena_com_indirect_table_get(struct ena_com_dev *ena_dev, u32 *ind_tbl);
+
+/* ena_com_allocate_host_info - Allocate host info resources.
+ * @ena_dev: ENA communication layer struct
+ *
+ * @return: 0 on Success and negative value otherwise.
+ */
+int ena_com_allocate_host_info(struct ena_com_dev *ena_dev);
+
+/* ena_com_allocate_debug_area - Allocate debug area.
+ * @ena_dev: ENA communication layer struct
+ * @debug_area_size - debug area size.
+ *
+ * @return: 0 on Success and negative value otherwise.
+ */
+int ena_com_allocate_debug_area(struct ena_com_dev *ena_dev,
+                               u32 debug_area_size);
+
+/* ena_com_delete_debug_area - Free the debug area resources.
+ * @ena_dev: ENA communication layer struct
+ *
+ * Free the allocate debug area.
+ */
+void ena_com_delete_debug_area(struct ena_com_dev *ena_dev);
+
+/* ena_com_delete_host_info - Free the host info resources.
+ * @ena_dev: ENA communication layer struct
+ *
+ * Free the allocate host info.
+ */
+void ena_com_delete_host_info(struct ena_com_dev *ena_dev);
+
+/* ena_com_set_host_attributes - Update the device with the host
+ * attributes (debug area and host info) base address.
+ * @ena_dev: ENA communication layer struct
+ *
+ * @return: 0 on Success and negative value otherwise.
+ */
+int ena_com_set_host_attributes(struct ena_com_dev *ena_dev);
+
+/* ena_com_create_io_cq - Create io completion queue.
+ * @ena_dev: ENA communication layer struct
+ * @io_cq - io completion queue handler
+
+ * Create IO completion queue.
+ *
+ * @return - 0 on success, negative value on failure.
+ */
+int ena_com_create_io_cq(struct ena_com_dev *ena_dev,
+                        struct ena_com_io_cq *io_cq);
+
+/* ena_com_destroy_io_cq - Destroy io completion queue.
+ * @ena_dev: ENA communication layer struct
+ * @io_cq - io completion queue handler
+
+ * Destroy IO completion queue.
+ *
+ * @return - 0 on success, negative value on failure.
+ */
+int ena_com_destroy_io_cq(struct ena_com_dev *ena_dev,
+                         struct ena_com_io_cq *io_cq);
+
+/* ena_com_execute_admin_command - Execute admin command
+ * @admin_queue: admin queue.
+ * @cmd: the admin command to execute.
+ * @cmd_size: the command size.
+ * @cmd_completion: command completion return value.
+ * @cmd_comp_size: command completion size.
+
+ * Submit an admin command and then wait until the device will return a
+ * completion.
+ * The completion will be copyed into cmd_comp.
+ *
+ * @return - 0 on success, negative value on failure.
+ */
+int ena_com_execute_admin_command(struct ena_com_admin_queue *admin_queue,
+                                 struct ena_admin_aq_entry *cmd,
+                                 size_t cmd_size,
+                                 struct ena_admin_acq_entry *cmd_comp,
+                                 size_t cmd_comp_size);
+
+/* ena_com_init_interrupt_moderation - Init interrupt moderation
+ * @ena_dev: ENA communication layer struct
+ *
+ * @return - 0 on success, negative value on failure.
+ */
+int ena_com_init_interrupt_moderation(struct ena_com_dev *ena_dev);
+
+/* ena_com_destroy_interrupt_moderation - Destroy interrupt moderation resources
+ * @ena_dev: ENA communication layer struct
+ */
+void ena_com_destroy_interrupt_moderation(struct ena_com_dev *ena_dev);
+
+/* ena_com_interrupt_moderation_supported - Return if interrupt moderation
+ * capability is supported by the device.
+ *
+ * @return - supported or not.
+ */
+bool ena_com_interrupt_moderation_supported(struct ena_com_dev *ena_dev);
+
+/* ena_com_config_default_interrupt_moderation_table - Restore the interrupt
+ * moderation table back to the default parameters.
+ * @ena_dev: ENA communication layer struct
+ */
+void ena_com_config_default_interrupt_moderation_table(struct ena_com_dev *ena_dev);
+
+/* ena_com_update_nonadaptive_moderation_interval_tx - Update the
+ * non-adaptive interval in Tx direction.
+ * @ena_dev: ENA communication layer struct
+ * @tx_coalesce_usecs: Interval in usec.
+ *
+ * @return - 0 on success, negative value on failure.
+ */
+int ena_com_update_nonadaptive_moderation_interval_tx(struct ena_com_dev *ena_dev,
+                                                     u32 tx_coalesce_usecs);
+
+/* ena_com_update_nonadaptive_moderation_interval_rx - Update the
+ * non-adaptive interval in Rx direction.
+ * @ena_dev: ENA communication layer struct
+ * @rx_coalesce_usecs: Interval in usec.
+ *
+ * @return - 0 on success, negative value on failure.
+ */
+int ena_com_update_nonadaptive_moderation_interval_rx(struct ena_com_dev *ena_dev,
+                                                     u32 rx_coalesce_usecs);
+
+/* ena_com_get_nonadaptive_moderation_interval_tx - Retrieve the
+ * non-adaptive interval in Tx direction.
+ * @ena_dev: ENA communication layer struct
+ *
+ * @return - interval in usec
+ */
+unsigned int ena_com_get_nonadaptive_moderation_interval_tx(struct ena_com_dev *ena_dev);
+
+/* ena_com_get_nonadaptive_moderation_interval_rx - Retrieve the
+ * non-adaptive interval in Rx direction.
+ * @ena_dev: ENA communication layer struct
+ *
+ * @return - interval in usec
+ */
+unsigned int ena_com_get_nonadaptive_moderation_interval_rx(struct ena_com_dev *ena_dev);
+
+/* ena_com_init_intr_moderation_entry - Update a single entry in the interrupt
+ * moderation table.
+ * @ena_dev: ENA communication layer struct
+ * @level: Interrupt moderation table level
+ * @entry: Entry value
+ *
+ * Update a single entry in the interrupt moderation table.
+ */
+void ena_com_init_intr_moderation_entry(struct ena_com_dev *ena_dev,
+                                       enum ena_intr_moder_level level,
+                                       struct ena_intr_moder_entry *entry);
+
+/* ena_com_get_intr_moderation_entry - Init ena_intr_moder_entry.
+ * @ena_dev: ENA communication layer struct
+ * @level: Interrupt moderation table level
+ * @entry: Entry to fill.
+ *
+ * Initialize the entry according to the adaptive interrupt moderation table.
+ */
+void ena_com_get_intr_moderation_entry(struct ena_com_dev *ena_dev,
+                                      enum ena_intr_moder_level level,
+                                      struct ena_intr_moder_entry *entry);
+
+static inline bool ena_com_get_adaptive_moderation_enabled(struct ena_com_dev *ena_dev)
+{
+       return ena_dev->adaptive_coalescing;
+}
+
+static inline void ena_com_enable_adaptive_moderation(struct ena_com_dev *ena_dev)
+{
+       ena_dev->adaptive_coalescing = true;
+}
+
+static inline void ena_com_disable_adaptive_moderation(struct ena_com_dev *ena_dev)
+{
+       ena_dev->adaptive_coalescing = false;
+}
+
+/* ena_com_calculate_interrupt_delay - Calculate new interrupt delay
+ * @ena_dev: ENA communication layer struct
+ * @pkts: Number of packets since the last update
+ * @bytes: Number of bytes received since the last update.
+ * @smoothed_interval: Returned interval
+ * @moder_tbl_idx: Current table level as input update new level as return
+ * value.
+ */
+static inline void ena_com_calculate_interrupt_delay(struct ena_com_dev *ena_dev,
+                                                    unsigned int pkts,
+                                                    unsigned int bytes,
+                                                    unsigned int *smoothed_interval,
+                                                    unsigned int *moder_tbl_idx)
+{
+       enum ena_intr_moder_level curr_moder_idx, new_moder_idx;
+       struct ena_intr_moder_entry *curr_moder_entry;
+       struct ena_intr_moder_entry *pred_moder_entry;
+       struct ena_intr_moder_entry *new_moder_entry;
+       struct ena_intr_moder_entry *intr_moder_tbl = ena_dev->intr_moder_tbl;
+       unsigned int interval;
+
+       /* We apply adaptive moderation on Rx path only.
+        * Tx uses static interrupt moderation.
+        */
+       if (!pkts || !bytes)
+               /* Tx interrupt, or spurious interrupt,
+                * in both cases we just use same delay values
+                */
+               return;
+
+       curr_moder_idx = (enum ena_intr_moder_level)(*moder_tbl_idx);
+       if (unlikely(curr_moder_idx >= ENA_INTR_MAX_NUM_OF_LEVELS)) {
+               pr_err("Wrong moderation index %u\n", curr_moder_idx);
+               return;
+       }
+
+       curr_moder_entry = &intr_moder_tbl[curr_moder_idx];
+       new_moder_idx = curr_moder_idx;
+
+       if (curr_moder_idx == ENA_INTR_MODER_LOWEST) {
+               if ((pkts > curr_moder_entry->pkts_per_interval) ||
+                   (bytes > curr_moder_entry->bytes_per_interval))
+                       new_moder_idx =
+                               (enum ena_intr_moder_level)(curr_moder_idx + ENA_INTR_MODER_LEVEL_STRIDE);
+       } else {
+               pred_moder_entry = &intr_moder_tbl[curr_moder_idx - ENA_INTR_MODER_LEVEL_STRIDE];
+
+               if ((pkts <= pred_moder_entry->pkts_per_interval) ||
+                   (bytes <= pred_moder_entry->bytes_per_interval))
+                       new_moder_idx =
+                               (enum ena_intr_moder_level)(curr_moder_idx - ENA_INTR_MODER_LEVEL_STRIDE);
+               else if ((pkts > curr_moder_entry->pkts_per_interval) ||
+                        (bytes > curr_moder_entry->bytes_per_interval)) {
+                       if (curr_moder_idx != ENA_INTR_MODER_HIGHEST)
+                               new_moder_idx =
+                                       (enum ena_intr_moder_level)(curr_moder_idx + ENA_INTR_MODER_LEVEL_STRIDE);
+               }
+       }
+       new_moder_entry = &intr_moder_tbl[new_moder_idx];
+
+       interval = new_moder_entry->intr_moder_interval;
+       *smoothed_interval = (
+               (interval * ENA_INTR_DELAY_NEW_VALUE_WEIGHT +
+               ENA_INTR_DELAY_OLD_VALUE_WEIGHT * (*smoothed_interval)) + 5) /
+               10;
+
+       *moder_tbl_idx = new_moder_idx;
+}
+
+/* ena_com_update_intr_reg - Prepare interrupt register
+ * @intr_reg: interrupt register to update.
+ * @rx_delay_interval: Rx interval in usecs
+ * @tx_delay_interval: Tx interval in usecs
+ * @unmask: unask enable/disable
+ *
+ * Prepare interrupt update register with the supplied parameters.
+ */
+static inline void ena_com_update_intr_reg(struct ena_eth_io_intr_reg *intr_reg,
+                                          u32 rx_delay_interval,
+                                          u32 tx_delay_interval,
+                                          bool unmask)
+{
+       intr_reg->intr_control = 0;
+       intr_reg->intr_control |= rx_delay_interval &
+               ENA_ETH_IO_INTR_REG_RX_INTR_DELAY_MASK;
+
+       intr_reg->intr_control |=
+               (tx_delay_interval << ENA_ETH_IO_INTR_REG_TX_INTR_DELAY_SHIFT)
+               & ENA_ETH_IO_INTR_REG_TX_INTR_DELAY_MASK;
+
+       if (unmask)
+               intr_reg->intr_control |= ENA_ETH_IO_INTR_REG_INTR_UNMASK_MASK;
+}
+
+#endif /* !(ENA_COM) */
diff --git a/drivers/net/ethernet/amazon/ena/ena_common_defs.h b/drivers/net/ethernet/amazon/ena/ena_common_defs.h
new file mode 100644 (file)
index 0000000..bb8d736
--- /dev/null
@@ -0,0 +1,48 @@
+/*
+ * Copyright 2015 - 2016 Amazon.com, Inc. or its affiliates.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef _ENA_COMMON_H_
+#define _ENA_COMMON_H_
+
+#define ENA_COMMON_SPEC_VERSION_MAJOR  0 /*  */
+#define ENA_COMMON_SPEC_VERSION_MINOR  10 /*  */
+
+/* ENA operates with 48-bit memory addresses. ena_mem_addr_t */
+struct ena_common_mem_addr {
+       u32 mem_addr_low;
+
+       u16 mem_addr_high;
+
+       /* MBZ */
+       u16 reserved16;
+};
+
+#endif /*_ENA_COMMON_H_ */
diff --git a/drivers/net/ethernet/amazon/ena/ena_eth_com.c b/drivers/net/ethernet/amazon/ena/ena_eth_com.c
new file mode 100644 (file)
index 0000000..539c536
--- /dev/null
@@ -0,0 +1,501 @@
+/*
+ * Copyright 2015 Amazon.com, Inc. or its affiliates.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "ena_eth_com.h"
+
+static inline struct ena_eth_io_rx_cdesc_base *ena_com_get_next_rx_cdesc(
+       struct ena_com_io_cq *io_cq)
+{
+       struct ena_eth_io_rx_cdesc_base *cdesc;
+       u16 expected_phase, head_masked;
+       u16 desc_phase;
+
+       head_masked = io_cq->head & (io_cq->q_depth - 1);
+       expected_phase = io_cq->phase;
+
+       cdesc = (struct ena_eth_io_rx_cdesc_base *)(io_cq->cdesc_addr.virt_addr
+                       + (head_masked * io_cq->cdesc_entry_size_in_bytes));
+
+       desc_phase = (cdesc->status & ENA_ETH_IO_RX_CDESC_BASE_PHASE_MASK) >>
+                       ENA_ETH_IO_RX_CDESC_BASE_PHASE_SHIFT;
+
+       if (desc_phase != expected_phase)
+               return NULL;
+
+       return cdesc;
+}
+
+static inline void ena_com_cq_inc_head(struct ena_com_io_cq *io_cq)
+{
+       io_cq->head++;
+
+       /* Switch phase bit in case of wrap around */
+       if (unlikely((io_cq->head & (io_cq->q_depth - 1)) == 0))
+               io_cq->phase ^= 1;
+}
+
+static inline void *get_sq_desc(struct ena_com_io_sq *io_sq)
+{
+       u16 tail_masked;
+       u32 offset;
+
+       tail_masked = io_sq->tail & (io_sq->q_depth - 1);
+
+       offset = tail_masked * io_sq->desc_entry_size;
+
+       return (void *)((uintptr_t)io_sq->desc_addr.virt_addr + offset);
+}
+
+static inline void ena_com_copy_curr_sq_desc_to_dev(struct ena_com_io_sq *io_sq)
+{
+       u16 tail_masked = io_sq->tail & (io_sq->q_depth - 1);
+       u32 offset = tail_masked * io_sq->desc_entry_size;
+
+       /* In case this queue isn't a LLQ */
+       if (io_sq->mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_HOST)
+               return;
+
+       memcpy_toio(io_sq->desc_addr.pbuf_dev_addr + offset,
+                   io_sq->desc_addr.virt_addr + offset,
+                   io_sq->desc_entry_size);
+}
+
+static inline void ena_com_sq_update_tail(struct ena_com_io_sq *io_sq)
+{
+       io_sq->tail++;
+
+       /* Switch phase bit in case of wrap around */
+       if (unlikely((io_sq->tail & (io_sq->q_depth - 1)) == 0))
+               io_sq->phase ^= 1;
+}
+
+static inline int ena_com_write_header(struct ena_com_io_sq *io_sq,
+                                      u8 *head_src, u16 header_len)
+{
+       u16 tail_masked = io_sq->tail & (io_sq->q_depth - 1);
+       u8 __iomem *dev_head_addr =
+               io_sq->header_addr + (tail_masked * io_sq->tx_max_header_size);
+
+       if (io_sq->mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_HOST)
+               return 0;
+
+       if (unlikely(!io_sq->header_addr)) {
+               pr_err("Push buffer header ptr is NULL\n");
+               return -EINVAL;
+       }
+
+       memcpy_toio(dev_head_addr, head_src, header_len);
+
+       return 0;
+}
+
+static inline struct ena_eth_io_rx_cdesc_base *
+       ena_com_rx_cdesc_idx_to_ptr(struct ena_com_io_cq *io_cq, u16 idx)
+{
+       idx &= (io_cq->q_depth - 1);
+       return (struct ena_eth_io_rx_cdesc_base *)
+               ((uintptr_t)io_cq->cdesc_addr.virt_addr +
+               idx * io_cq->cdesc_entry_size_in_bytes);
+}
+
+static inline u16 ena_com_cdesc_rx_pkt_get(struct ena_com_io_cq *io_cq,
+                                          u16 *first_cdesc_idx)
+{
+       struct ena_eth_io_rx_cdesc_base *cdesc;
+       u16 count = 0, head_masked;
+       u32 last = 0;
+
+       do {
+               cdesc = ena_com_get_next_rx_cdesc(io_cq);
+               if (!cdesc)
+                       break;
+
+               ena_com_cq_inc_head(io_cq);
+               count++;
+               last = (cdesc->status & ENA_ETH_IO_RX_CDESC_BASE_LAST_MASK) >>
+                       ENA_ETH_IO_RX_CDESC_BASE_LAST_SHIFT;
+       } while (!last);
+
+       if (last) {
+               *first_cdesc_idx = io_cq->cur_rx_pkt_cdesc_start_idx;
+               count += io_cq->cur_rx_pkt_cdesc_count;
+
+               head_masked = io_cq->head & (io_cq->q_depth - 1);
+
+               io_cq->cur_rx_pkt_cdesc_count = 0;
+               io_cq->cur_rx_pkt_cdesc_start_idx = head_masked;
+
+               pr_debug("ena q_id: %d packets were completed. first desc idx %u descs# %d\n",
+                        io_cq->qid, *first_cdesc_idx, count);
+       } else {
+               io_cq->cur_rx_pkt_cdesc_count += count;
+               count = 0;
+       }
+
+       return count;
+}
+
+static inline bool ena_com_meta_desc_changed(struct ena_com_io_sq *io_sq,
+                                            struct ena_com_tx_ctx *ena_tx_ctx)
+{
+       int rc;
+
+       if (ena_tx_ctx->meta_valid) {
+               rc = memcmp(&io_sq->cached_tx_meta,
+                           &ena_tx_ctx->ena_meta,
+                           sizeof(struct ena_com_tx_meta));
+
+               if (unlikely(rc != 0))
+                       return true;
+       }
+
+       return false;
+}
+
+static inline void ena_com_create_and_store_tx_meta_desc(struct ena_com_io_sq *io_sq,
+                                                        struct ena_com_tx_ctx *ena_tx_ctx)
+{
+       struct ena_eth_io_tx_meta_desc *meta_desc = NULL;
+       struct ena_com_tx_meta *ena_meta = &ena_tx_ctx->ena_meta;
+
+       meta_desc = get_sq_desc(io_sq);
+       memset(meta_desc, 0x0, sizeof(struct ena_eth_io_tx_meta_desc));
+
+       meta_desc->len_ctrl |= ENA_ETH_IO_TX_META_DESC_META_DESC_MASK;
+
+       meta_desc->len_ctrl |= ENA_ETH_IO_TX_META_DESC_EXT_VALID_MASK;
+
+       /* bits 0-9 of the mss */
+       meta_desc->word2 |= (ena_meta->mss <<
+               ENA_ETH_IO_TX_META_DESC_MSS_LO_SHIFT) &
+               ENA_ETH_IO_TX_META_DESC_MSS_LO_MASK;
+       /* bits 10-13 of the mss */
+       meta_desc->len_ctrl |= ((ena_meta->mss >> 10) <<
+               ENA_ETH_IO_TX_META_DESC_MSS_HI_SHIFT) &
+               ENA_ETH_IO_TX_META_DESC_MSS_HI_MASK;
+
+       /* Extended meta desc */
+       meta_desc->len_ctrl |= ENA_ETH_IO_TX_META_DESC_ETH_META_TYPE_MASK;
+       meta_desc->len_ctrl |= ENA_ETH_IO_TX_META_DESC_META_STORE_MASK;
+       meta_desc->len_ctrl |= (io_sq->phase <<
+               ENA_ETH_IO_TX_META_DESC_PHASE_SHIFT) &
+               ENA_ETH_IO_TX_META_DESC_PHASE_MASK;
+
+       meta_desc->len_ctrl |= ENA_ETH_IO_TX_META_DESC_FIRST_MASK;
+       meta_desc->word2 |= ena_meta->l3_hdr_len &
+               ENA_ETH_IO_TX_META_DESC_L3_HDR_LEN_MASK;
+       meta_desc->word2 |= (ena_meta->l3_hdr_offset <<
+               ENA_ETH_IO_TX_META_DESC_L3_HDR_OFF_SHIFT) &
+               ENA_ETH_IO_TX_META_DESC_L3_HDR_OFF_MASK;
+
+       meta_desc->word2 |= (ena_meta->l4_hdr_len <<
+               ENA_ETH_IO_TX_META_DESC_L4_HDR_LEN_IN_WORDS_SHIFT) &
+               ENA_ETH_IO_TX_META_DESC_L4_HDR_LEN_IN_WORDS_MASK;
+
+       meta_desc->len_ctrl |= ENA_ETH_IO_TX_META_DESC_META_STORE_MASK;
+
+       /* Cached the meta desc */
+       memcpy(&io_sq->cached_tx_meta, ena_meta,
+              sizeof(struct ena_com_tx_meta));
+
+       ena_com_copy_curr_sq_desc_to_dev(io_sq);
+       ena_com_sq_update_tail(io_sq);
+}
+
+static inline void ena_com_rx_set_flags(struct ena_com_rx_ctx *ena_rx_ctx,
+                                       struct ena_eth_io_rx_cdesc_base *cdesc)
+{
+       ena_rx_ctx->l3_proto = cdesc->status &
+               ENA_ETH_IO_RX_CDESC_BASE_L3_PROTO_IDX_MASK;
+       ena_rx_ctx->l4_proto =
+               (cdesc->status & ENA_ETH_IO_RX_CDESC_BASE_L4_PROTO_IDX_MASK) >>
+               ENA_ETH_IO_RX_CDESC_BASE_L4_PROTO_IDX_SHIFT;
+       ena_rx_ctx->l3_csum_err =
+               (cdesc->status & ENA_ETH_IO_RX_CDESC_BASE_L3_CSUM_ERR_MASK) >>
+               ENA_ETH_IO_RX_CDESC_BASE_L3_CSUM_ERR_SHIFT;
+       ena_rx_ctx->l4_csum_err =
+               (cdesc->status & ENA_ETH_IO_RX_CDESC_BASE_L4_CSUM_ERR_MASK) >>
+               ENA_ETH_IO_RX_CDESC_BASE_L4_CSUM_ERR_SHIFT;
+       ena_rx_ctx->hash = cdesc->hash;
+       ena_rx_ctx->frag =
+               (cdesc->status & ENA_ETH_IO_RX_CDESC_BASE_IPV4_FRAG_MASK) >>
+               ENA_ETH_IO_RX_CDESC_BASE_IPV4_FRAG_SHIFT;
+
+       pr_debug("ena_rx_ctx->l3_proto %d ena_rx_ctx->l4_proto %d\nena_rx_ctx->l3_csum_err %d ena_rx_ctx->l4_csum_err %d\nhash frag %d frag: %d cdesc_status: %x\n",
+                ena_rx_ctx->l3_proto, ena_rx_ctx->l4_proto,
+                ena_rx_ctx->l3_csum_err, ena_rx_ctx->l4_csum_err,
+                ena_rx_ctx->hash, ena_rx_ctx->frag, cdesc->status);
+}
+
+/*****************************************************************************/
+/*****************************     API      **********************************/
+/*****************************************************************************/
+
+int ena_com_prepare_tx(struct ena_com_io_sq *io_sq,
+                      struct ena_com_tx_ctx *ena_tx_ctx,
+                      int *nb_hw_desc)
+{
+       struct ena_eth_io_tx_desc *desc = NULL;
+       struct ena_com_buf *ena_bufs = ena_tx_ctx->ena_bufs;
+       void *push_header = ena_tx_ctx->push_header;
+       u16 header_len = ena_tx_ctx->header_len;
+       u16 num_bufs = ena_tx_ctx->num_bufs;
+       int total_desc, i, rc;
+       bool have_meta;
+       u64 addr_hi;
+
+       WARN(io_sq->direction != ENA_COM_IO_QUEUE_DIRECTION_TX, "wrong Q type");
+
+       /* num_bufs +1 for potential meta desc */
+       if (ena_com_sq_empty_space(io_sq) < (num_bufs + 1)) {
+               pr_err("Not enough space in the tx queue\n");
+               return -ENOMEM;
+       }
+
+       if (unlikely(header_len > io_sq->tx_max_header_size)) {
+               pr_err("header size is too large %d max header: %d\n",
+                      header_len, io_sq->tx_max_header_size);
+               return -EINVAL;
+       }
+
+       /* start with pushing the header (if needed) */
+       rc = ena_com_write_header(io_sq, push_header, header_len);
+       if (unlikely(rc))
+               return rc;
+
+       have_meta = ena_tx_ctx->meta_valid && ena_com_meta_desc_changed(io_sq,
+                       ena_tx_ctx);
+       if (have_meta)
+               ena_com_create_and_store_tx_meta_desc(io_sq, ena_tx_ctx);
+
+       /* If the caller doesn't want send packets */
+       if (unlikely(!num_bufs && !header_len)) {
+               *nb_hw_desc = have_meta ? 0 : 1;
+               return 0;
+       }
+
+       desc = get_sq_desc(io_sq);
+       memset(desc, 0x0, sizeof(struct ena_eth_io_tx_desc));
+
+       /* Set first desc when we don't have meta descriptor */
+       if (!have_meta)
+               desc->len_ctrl |= ENA_ETH_IO_TX_DESC_FIRST_MASK;
+
+       desc->buff_addr_hi_hdr_sz |= (header_len <<
+               ENA_ETH_IO_TX_DESC_HEADER_LENGTH_SHIFT) &
+               ENA_ETH_IO_TX_DESC_HEADER_LENGTH_MASK;
+       desc->len_ctrl |= (io_sq->phase << ENA_ETH_IO_TX_DESC_PHASE_SHIFT) &
+               ENA_ETH_IO_TX_DESC_PHASE_MASK;
+
+       desc->len_ctrl |= ENA_ETH_IO_TX_DESC_COMP_REQ_MASK;
+
+       /* Bits 0-9 */
+       desc->meta_ctrl |= (ena_tx_ctx->req_id <<
+               ENA_ETH_IO_TX_DESC_REQ_ID_LO_SHIFT) &
+               ENA_ETH_IO_TX_DESC_REQ_ID_LO_MASK;
+
+       desc->meta_ctrl |= (ena_tx_ctx->df <<
+               ENA_ETH_IO_TX_DESC_DF_SHIFT) &
+               ENA_ETH_IO_TX_DESC_DF_MASK;
+
+       /* Bits 10-15 */
+       desc->len_ctrl |= ((ena_tx_ctx->req_id >> 10) <<
+               ENA_ETH_IO_TX_DESC_REQ_ID_HI_SHIFT) &
+               ENA_ETH_IO_TX_DESC_REQ_ID_HI_MASK;
+
+       if (ena_tx_ctx->meta_valid) {
+               desc->meta_ctrl |= (ena_tx_ctx->tso_enable <<
+                       ENA_ETH_IO_TX_DESC_TSO_EN_SHIFT) &
+                       ENA_ETH_IO_TX_DESC_TSO_EN_MASK;
+               desc->meta_ctrl |= ena_tx_ctx->l3_proto &
+                       ENA_ETH_IO_TX_DESC_L3_PROTO_IDX_MASK;
+               desc->meta_ctrl |= (ena_tx_ctx->l4_proto <<
+                       ENA_ETH_IO_TX_DESC_L4_PROTO_IDX_SHIFT) &
+                       ENA_ETH_IO_TX_DESC_L4_PROTO_IDX_MASK;
+               desc->meta_ctrl |= (ena_tx_ctx->l3_csum_enable <<
+                       ENA_ETH_IO_TX_DESC_L3_CSUM_EN_SHIFT) &
+                       ENA_ETH_IO_TX_DESC_L3_CSUM_EN_MASK;
+               desc->meta_ctrl |= (ena_tx_ctx->l4_csum_enable <<
+                       ENA_ETH_IO_TX_DESC_L4_CSUM_EN_SHIFT) &
+                       ENA_ETH_IO_TX_DESC_L4_CSUM_EN_MASK;
+               desc->meta_ctrl |= (ena_tx_ctx->l4_csum_partial <<
+                       ENA_ETH_IO_TX_DESC_L4_CSUM_PARTIAL_SHIFT) &
+                       ENA_ETH_IO_TX_DESC_L4_CSUM_PARTIAL_MASK;
+       }
+
+       for (i = 0; i < num_bufs; i++) {
+               /* The first desc share the same desc as the header */
+               if (likely(i != 0)) {
+                       ena_com_copy_curr_sq_desc_to_dev(io_sq);
+                       ena_com_sq_update_tail(io_sq);
+
+                       desc = get_sq_desc(io_sq);
+                       memset(desc, 0x0, sizeof(struct ena_eth_io_tx_desc));
+
+                       desc->len_ctrl |= (io_sq->phase <<
+                               ENA_ETH_IO_TX_DESC_PHASE_SHIFT) &
+                               ENA_ETH_IO_TX_DESC_PHASE_MASK;
+               }
+
+               desc->len_ctrl |= ena_bufs->len &
+                       ENA_ETH_IO_TX_DESC_LENGTH_MASK;
+
+               addr_hi = ((ena_bufs->paddr &
+                       GENMASK_ULL(io_sq->dma_addr_bits - 1, 32)) >> 32);
+
+               desc->buff_addr_lo = (u32)ena_bufs->paddr;
+               desc->buff_addr_hi_hdr_sz |= addr_hi &
+                       ENA_ETH_IO_TX_DESC_ADDR_HI_MASK;
+               ena_bufs++;
+       }
+
+       /* set the last desc indicator */
+       desc->len_ctrl |= ENA_ETH_IO_TX_DESC_LAST_MASK;
+
+       ena_com_copy_curr_sq_desc_to_dev(io_sq);
+
+       ena_com_sq_update_tail(io_sq);
+
+       total_desc = max_t(u16, num_bufs, 1);
+       total_desc += have_meta ? 1 : 0;
+
+       *nb_hw_desc = total_desc;
+       return 0;
+}
+
+int ena_com_rx_pkt(struct ena_com_io_cq *io_cq,
+                  struct ena_com_io_sq *io_sq,
+                  struct ena_com_rx_ctx *ena_rx_ctx)
+{
+       struct ena_com_rx_buf_info *ena_buf = &ena_rx_ctx->ena_bufs[0];
+       struct ena_eth_io_rx_cdesc_base *cdesc = NULL;
+       u16 cdesc_idx = 0;
+       u16 nb_hw_desc;
+       u16 i;
+
+       WARN(io_cq->direction != ENA_COM_IO_QUEUE_DIRECTION_RX, "wrong Q type");
+
+       nb_hw_desc = ena_com_cdesc_rx_pkt_get(io_cq, &cdesc_idx);
+       if (nb_hw_desc == 0) {
+               ena_rx_ctx->descs = nb_hw_desc;
+               return 0;
+       }
+
+       pr_debug("fetch rx packet: queue %d completed desc: %d\n", io_cq->qid,
+                nb_hw_desc);
+
+       if (unlikely(nb_hw_desc > ena_rx_ctx->max_bufs)) {
+               pr_err("Too many RX cdescs (%d) > MAX(%d)\n", nb_hw_desc,
+                      ena_rx_ctx->max_bufs);
+               return -ENOSPC;
+       }
+
+       for (i = 0; i < nb_hw_desc; i++) {
+               cdesc = ena_com_rx_cdesc_idx_to_ptr(io_cq, cdesc_idx + i);
+
+               ena_buf->len = cdesc->length;
+               ena_buf->req_id = cdesc->req_id;
+               ena_buf++;
+       }
+
+       /* Update SQ head ptr */
+       io_sq->next_to_comp += nb_hw_desc;
+
+       pr_debug("[%s][QID#%d] Updating SQ head to: %d\n", __func__, io_sq->qid,
+                io_sq->next_to_comp);
+
+       /* Get rx flags from the last pkt */
+       ena_com_rx_set_flags(ena_rx_ctx, cdesc);
+
+       ena_rx_ctx->descs = nb_hw_desc;
+       return 0;
+}
+
+int ena_com_add_single_rx_desc(struct ena_com_io_sq *io_sq,
+                              struct ena_com_buf *ena_buf,
+                              u16 req_id)
+{
+       struct ena_eth_io_rx_desc *desc;
+
+       WARN(io_sq->direction != ENA_COM_IO_QUEUE_DIRECTION_RX, "wrong Q type");
+
+       if (unlikely(ena_com_sq_empty_space(io_sq) == 0))
+               return -ENOSPC;
+
+       desc = get_sq_desc(io_sq);
+       memset(desc, 0x0, sizeof(struct ena_eth_io_rx_desc));
+
+       desc->length = ena_buf->len;
+
+       desc->ctrl |= ENA_ETH_IO_RX_DESC_FIRST_MASK;
+       desc->ctrl |= ENA_ETH_IO_RX_DESC_LAST_MASK;
+       desc->ctrl |= io_sq->phase & ENA_ETH_IO_RX_DESC_PHASE_MASK;
+       desc->ctrl |= ENA_ETH_IO_RX_DESC_COMP_REQ_MASK;
+
+       desc->req_id = req_id;
+
+       desc->buff_addr_lo = (u32)ena_buf->paddr;
+       desc->buff_addr_hi =
+               ((ena_buf->paddr & GENMASK_ULL(io_sq->dma_addr_bits - 1, 32)) >> 32);
+
+       ena_com_sq_update_tail(io_sq);
+
+       return 0;
+}
+
+int ena_com_tx_comp_req_id_get(struct ena_com_io_cq *io_cq, u16 *req_id)
+{
+       u8 expected_phase, cdesc_phase;
+       struct ena_eth_io_tx_cdesc *cdesc;
+       u16 masked_head;
+
+       masked_head = io_cq->head & (io_cq->q_depth - 1);
+       expected_phase = io_cq->phase;
+
+       cdesc = (struct ena_eth_io_tx_cdesc *)
+               ((uintptr_t)io_cq->cdesc_addr.virt_addr +
+               (masked_head * io_cq->cdesc_entry_size_in_bytes));
+
+       /* When the current completion descriptor phase isn't the same as the
+        * expected, it mean that the device still didn't update
+        * this completion.
+        */
+       cdesc_phase = cdesc->flags & ENA_ETH_IO_TX_CDESC_PHASE_MASK;
+       if (cdesc_phase != expected_phase)
+               return -EAGAIN;
+
+       ena_com_cq_inc_head(io_cq);
+
+       *req_id = cdesc->req_id;
+
+       return 0;
+}
diff --git a/drivers/net/ethernet/amazon/ena/ena_eth_com.h b/drivers/net/ethernet/amazon/ena/ena_eth_com.h
new file mode 100644 (file)
index 0000000..bb53c3a
--- /dev/null
@@ -0,0 +1,160 @@
+/*
+ * Copyright 2015 Amazon.com, Inc. or its affiliates.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef ENA_ETH_COM_H_
+#define ENA_ETH_COM_H_
+
+#include "ena_com.h"
+
+/* head update threshold in units of (queue size / ENA_COMP_HEAD_THRESH) */
+#define ENA_COMP_HEAD_THRESH 4
+
+struct ena_com_tx_ctx {
+       struct ena_com_tx_meta ena_meta;
+       struct ena_com_buf *ena_bufs;
+       /* For LLQ, header buffer - pushed to the device mem space */
+       void *push_header;
+
+       enum ena_eth_io_l3_proto_index l3_proto;
+       enum ena_eth_io_l4_proto_index l4_proto;
+       u16 num_bufs;
+       u16 req_id;
+       /* For regular queue, indicate the size of the header
+        * For LLQ, indicate the size of the pushed buffer
+        */
+       u16 header_len;
+
+       u8 meta_valid;
+       u8 tso_enable;
+       u8 l3_csum_enable;
+       u8 l4_csum_enable;
+       u8 l4_csum_partial;
+       u8 df; /* Don't fragment */
+};
+
+struct ena_com_rx_ctx {
+       struct ena_com_rx_buf_info *ena_bufs;
+       enum ena_eth_io_l3_proto_index l3_proto;
+       enum ena_eth_io_l4_proto_index l4_proto;
+       bool l3_csum_err;
+       bool l4_csum_err;
+       /* fragmented packet */
+       bool frag;
+       u32 hash;
+       u16 descs;
+       int max_bufs;
+};
+
+int ena_com_prepare_tx(struct ena_com_io_sq *io_sq,
+                      struct ena_com_tx_ctx *ena_tx_ctx,
+                      int *nb_hw_desc);
+
+int ena_com_rx_pkt(struct ena_com_io_cq *io_cq,
+                  struct ena_com_io_sq *io_sq,
+                  struct ena_com_rx_ctx *ena_rx_ctx);
+
+int ena_com_add_single_rx_desc(struct ena_com_io_sq *io_sq,
+                              struct ena_com_buf *ena_buf,
+                              u16 req_id);
+
+int ena_com_tx_comp_req_id_get(struct ena_com_io_cq *io_cq, u16 *req_id);
+
+static inline void ena_com_unmask_intr(struct ena_com_io_cq *io_cq,
+                                      struct ena_eth_io_intr_reg *intr_reg)
+{
+       writel(intr_reg->intr_control, io_cq->unmask_reg);
+}
+
+static inline int ena_com_sq_empty_space(struct ena_com_io_sq *io_sq)
+{
+       u16 tail, next_to_comp, cnt;
+
+       next_to_comp = io_sq->next_to_comp;
+       tail = io_sq->tail;
+       cnt = tail - next_to_comp;
+
+       return io_sq->q_depth - 1 - cnt;
+}
+
+static inline int ena_com_write_sq_doorbell(struct ena_com_io_sq *io_sq)
+{
+       u16 tail;
+
+       tail = io_sq->tail;
+
+       pr_debug("write submission queue doorbell for queue: %d tail: %d\n",
+                io_sq->qid, tail);
+
+       writel(tail, io_sq->db_addr);
+
+       return 0;
+}
+
+static inline int ena_com_update_dev_comp_head(struct ena_com_io_cq *io_cq)
+{
+       u16 unreported_comp, head;
+       bool need_update;
+
+       head = io_cq->head;
+       unreported_comp = head - io_cq->last_head_update;
+       need_update = unreported_comp > (io_cq->q_depth / ENA_COMP_HEAD_THRESH);
+
+       if (io_cq->cq_head_db_reg && need_update) {
+               pr_debug("Write completion queue doorbell for queue %d: head: %d\n",
+                        io_cq->qid, head);
+               writel(head, io_cq->cq_head_db_reg);
+               io_cq->last_head_update = head;
+       }
+
+       return 0;
+}
+
+static inline void ena_com_update_numa_node(struct ena_com_io_cq *io_cq,
+                                           u8 numa_node)
+{
+       struct ena_eth_io_numa_node_cfg_reg numa_cfg;
+
+       if (!io_cq->numa_node_cfg_reg)
+               return;
+
+       numa_cfg.numa_cfg = (numa_node & ENA_ETH_IO_NUMA_NODE_CFG_REG_NUMA_MASK)
+               | ENA_ETH_IO_NUMA_NODE_CFG_REG_ENABLED_MASK;
+
+       writel(numa_cfg.numa_cfg, io_cq->numa_node_cfg_reg);
+}
+
+static inline void ena_com_comp_ack(struct ena_com_io_sq *io_sq, u16 elem)
+{
+       io_sq->next_to_comp += elem;
+}
+
+#endif /* ENA_ETH_COM_H_ */
diff --git a/drivers/net/ethernet/amazon/ena/ena_eth_io_defs.h b/drivers/net/ethernet/amazon/ena/ena_eth_io_defs.h
new file mode 100644 (file)
index 0000000..f320c58
--- /dev/null
@@ -0,0 +1,416 @@
+/*
+ * Copyright 2015 - 2016 Amazon.com, Inc. or its affiliates.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef _ENA_ETH_IO_H_
+#define _ENA_ETH_IO_H_
+
+enum ena_eth_io_l3_proto_index {
+       ENA_ETH_IO_L3_PROTO_UNKNOWN     = 0,
+
+       ENA_ETH_IO_L3_PROTO_IPV4        = 8,
+
+       ENA_ETH_IO_L3_PROTO_IPV6        = 11,
+
+       ENA_ETH_IO_L3_PROTO_FCOE        = 21,
+
+       ENA_ETH_IO_L3_PROTO_ROCE        = 22,
+};
+
+enum ena_eth_io_l4_proto_index {
+       ENA_ETH_IO_L4_PROTO_UNKNOWN             = 0,
+
+       ENA_ETH_IO_L4_PROTO_TCP                 = 12,
+
+       ENA_ETH_IO_L4_PROTO_UDP                 = 13,
+
+       ENA_ETH_IO_L4_PROTO_ROUTEABLE_ROCE      = 23,
+};
+
+struct ena_eth_io_tx_desc {
+       /* 15:0 : length - Buffer length in bytes, must
+        *    include any packet trailers that the ENA supposed
+        *    to update like End-to-End CRC, Authentication GMAC
+        *    etc. This length must not include the
+        *    'Push_Buffer' length. This length must not include
+        *    the 4-byte added in the end for 802.3 Ethernet FCS
+        * 21:16 : req_id_hi - Request ID[15:10]
+        * 22 : reserved22 - MBZ
+        * 23 : meta_desc - MBZ
+        * 24 : phase
+        * 25 : reserved1 - MBZ
+        * 26 : first - Indicates first descriptor in
+        *    transaction
+        * 27 : last - Indicates last descriptor in
+        *    transaction
+        * 28 : comp_req - Indicates whether completion
+        *    should be posted, after packet is transmitted.
+        *    Valid only for first descriptor
+        * 30:29 : reserved29 - MBZ
+        * 31 : reserved31 - MBZ
+        */
+       u32 len_ctrl;
+
+       /* 3:0 : l3_proto_idx - L3 protocol. This field
+        *    required when l3_csum_en,l3_csum or tso_en are set.
+        * 4 : DF - IPv4 DF, must be 0 if packet is IPv4 and
+        *    DF flags of the IPv4 header is 0. Otherwise must
+        *    be set to 1
+        * 6:5 : reserved5
+        * 7 : tso_en - Enable TSO, For TCP only.
+        * 12:8 : l4_proto_idx - L4 protocol. This field need
+        *    to be set when l4_csum_en or tso_en are set.
+        * 13 : l3_csum_en - enable IPv4 header checksum.
+        * 14 : l4_csum_en - enable TCP/UDP checksum.
+        * 15 : ethernet_fcs_dis - when set, the controller
+        *    will not append the 802.3 Ethernet Frame Check
+        *    Sequence to the packet
+        * 16 : reserved16
+        * 17 : l4_csum_partial - L4 partial checksum. when
+        *    set to 0, the ENA calculates the L4 checksum,
+        *    where the Destination Address required for the
+        *    TCP/UDP pseudo-header is taken from the actual
+        *    packet L3 header. when set to 1, the ENA doesn't
+        *    calculate the sum of the pseudo-header, instead,
+        *    the checksum field of the L4 is used instead. When
+        *    TSO enabled, the checksum of the pseudo-header
+        *    must not include the tcp length field. L4 partial
+        *    checksum should be used for IPv6 packet that
+        *    contains Routing Headers.
+        * 20:18 : reserved18 - MBZ
+        * 21 : reserved21 - MBZ
+        * 31:22 : req_id_lo - Request ID[9:0]
+        */
+       u32 meta_ctrl;
+
+       u32 buff_addr_lo;
+
+       /* address high and header size
+        * 15:0 : addr_hi - Buffer Pointer[47:32]
+        * 23:16 : reserved16_w2
+        * 31:24 : header_length - Header length. For Low
+        *    Latency Queues, this fields indicates the number
+        *    of bytes written to the headers' memory. For
+        *    normal queues, if packet is TCP or UDP, and longer
+        *    than max_header_size, then this field should be
+        *    set to the sum of L4 header offset and L4 header
+        *    size(without options), otherwise, this field
+        *    should be set to 0. For both modes, this field
+        *    must not exceed the max_header_size.
+        *    max_header_size value is reported by the Max
+        *    Queues Feature descriptor
+        */
+       u32 buff_addr_hi_hdr_sz;
+};
+
+struct ena_eth_io_tx_meta_desc {
+       /* 9:0 : req_id_lo - Request ID[9:0]
+        * 11:10 : reserved10 - MBZ
+        * 12 : reserved12 - MBZ
+        * 13 : reserved13 - MBZ
+        * 14 : ext_valid - if set, offset fields in Word2
+        *    are valid Also MSS High in Word 0 and bits [31:24]
+        *    in Word 3
+        * 15 : reserved15
+        * 19:16 : mss_hi
+        * 20 : eth_meta_type - 0: Tx Metadata Descriptor, 1:
+        *    Extended Metadata Descriptor
+        * 21 : meta_store - Store extended metadata in queue
+        *    cache
+        * 22 : reserved22 - MBZ
+        * 23 : meta_desc - MBO
+        * 24 : phase
+        * 25 : reserved25 - MBZ
+        * 26 : first - Indicates first descriptor in
+        *    transaction
+        * 27 : last - Indicates last descriptor in
+        *    transaction
+        * 28 : comp_req - Indicates whether completion
+        *    should be posted, after packet is transmitted.
+        *    Valid only for first descriptor
+        * 30:29 : reserved29 - MBZ
+        * 31 : reserved31 - MBZ
+        */
+       u32 len_ctrl;
+
+       /* 5:0 : req_id_hi
+        * 31:6 : reserved6 - MBZ
+        */
+       u32 word1;
+
+       /* 7:0 : l3_hdr_len
+        * 15:8 : l3_hdr_off
+        * 21:16 : l4_hdr_len_in_words - counts the L4 header
+        *    length in words. there is an explicit assumption
+        *    that L4 header appears right after L3 header and
+        *    L4 offset is based on l3_hdr_off+l3_hdr_len
+        * 31:22 : mss_lo
+        */
+       u32 word2;
+
+       u32 reserved;
+};
+
+struct ena_eth_io_tx_cdesc {
+       /* Request ID[15:0] */
+       u16 req_id;
+
+       u8 status;
+
+       /* flags
+        * 0 : phase
+        * 7:1 : reserved1
+        */
+       u8 flags;
+
+       u16 sub_qid;
+
+       u16 sq_head_idx;
+};
+
+struct ena_eth_io_rx_desc {
+       /* In bytes. 0 means 64KB */
+       u16 length;
+
+       /* MBZ */
+       u8 reserved2;
+
+       /* 0 : phase
+        * 1 : reserved1 - MBZ
+        * 2 : first - Indicates first descriptor in
+        *    transaction
+        * 3 : last - Indicates last descriptor in transaction
+        * 4 : comp_req
+        * 5 : reserved5 - MBO
+        * 7:6 : reserved6 - MBZ
+        */
+       u8 ctrl;
+
+       u16 req_id;
+
+       /* MBZ */
+       u16 reserved6;
+
+       u32 buff_addr_lo;
+
+       u16 buff_addr_hi;
+
+       /* MBZ */
+       u16 reserved16_w3;
+};
+
+/* 4-word format Note: all ethernet parsing information are valid only when
+ * last=1
+ */
+struct ena_eth_io_rx_cdesc_base {
+       /* 4:0 : l3_proto_idx
+        * 6:5 : src_vlan_cnt
+        * 7 : reserved7 - MBZ
+        * 12:8 : l4_proto_idx
+        * 13 : l3_csum_err - when set, either the L3
+        *    checksum error detected, or, the controller didn't
+        *    validate the checksum. This bit is valid only when
+        *    l3_proto_idx indicates IPv4 packet
+        * 14 : l4_csum_err - when set, either the L4
+        *    checksum error detected, or, the controller didn't
+        *    validate the checksum. This bit is valid only when
+        *    l4_proto_idx indicates TCP/UDP packet, and,
+        *    ipv4_frag is not set
+        * 15 : ipv4_frag - Indicates IPv4 fragmented packet
+        * 23:16 : reserved16
+        * 24 : phase
+        * 25 : l3_csum2 - second checksum engine result
+        * 26 : first - Indicates first descriptor in
+        *    transaction
+        * 27 : last - Indicates last descriptor in
+        *    transaction
+        * 29:28 : reserved28
+        * 30 : buffer - 0: Metadata descriptor. 1: Buffer
+        *    Descriptor was used
+        * 31 : reserved31
+        */
+       u32 status;
+
+       u16 length;
+
+       u16 req_id;
+
+       /* 32-bit hash result */
+       u32 hash;
+
+       u16 sub_qid;
+
+       u16 reserved;
+};
+
+/* 8-word format */
+struct ena_eth_io_rx_cdesc_ext {
+       struct ena_eth_io_rx_cdesc_base base;
+
+       u32 buff_addr_lo;
+
+       u16 buff_addr_hi;
+
+       u16 reserved16;
+
+       u32 reserved_w6;
+
+       u32 reserved_w7;
+};
+
+struct ena_eth_io_intr_reg {
+       /* 14:0 : rx_intr_delay
+        * 29:15 : tx_intr_delay
+        * 30 : intr_unmask
+        * 31 : reserved
+        */
+       u32 intr_control;
+};
+
+struct ena_eth_io_numa_node_cfg_reg {
+       /* 7:0 : numa
+        * 30:8 : reserved
+        * 31 : enabled
+        */
+       u32 numa_cfg;
+};
+
+/* tx_desc */
+#define ENA_ETH_IO_TX_DESC_LENGTH_MASK GENMASK(15, 0)
+#define ENA_ETH_IO_TX_DESC_REQ_ID_HI_SHIFT 16
+#define ENA_ETH_IO_TX_DESC_REQ_ID_HI_MASK GENMASK(21, 16)
+#define ENA_ETH_IO_TX_DESC_META_DESC_SHIFT 23
+#define ENA_ETH_IO_TX_DESC_META_DESC_MASK BIT(23)
+#define ENA_ETH_IO_TX_DESC_PHASE_SHIFT 24
+#define ENA_ETH_IO_TX_DESC_PHASE_MASK BIT(24)
+#define ENA_ETH_IO_TX_DESC_FIRST_SHIFT 26
+#define ENA_ETH_IO_TX_DESC_FIRST_MASK BIT(26)
+#define ENA_ETH_IO_TX_DESC_LAST_SHIFT 27
+#define ENA_ETH_IO_TX_DESC_LAST_MASK BIT(27)
+#define ENA_ETH_IO_TX_DESC_COMP_REQ_SHIFT 28
+#define ENA_ETH_IO_TX_DESC_COMP_REQ_MASK BIT(28)
+#define ENA_ETH_IO_TX_DESC_L3_PROTO_IDX_MASK GENMASK(3, 0)
+#define ENA_ETH_IO_TX_DESC_DF_SHIFT 4
+#define ENA_ETH_IO_TX_DESC_DF_MASK BIT(4)
+#define ENA_ETH_IO_TX_DESC_TSO_EN_SHIFT 7
+#define ENA_ETH_IO_TX_DESC_TSO_EN_MASK BIT(7)
+#define ENA_ETH_IO_TX_DESC_L4_PROTO_IDX_SHIFT 8
+#define ENA_ETH_IO_TX_DESC_L4_PROTO_IDX_MASK GENMASK(12, 8)
+#define ENA_ETH_IO_TX_DESC_L3_CSUM_EN_SHIFT 13
+#define ENA_ETH_IO_TX_DESC_L3_CSUM_EN_MASK BIT(13)
+#define ENA_ETH_IO_TX_DESC_L4_CSUM_EN_SHIFT 14
+#define ENA_ETH_IO_TX_DESC_L4_CSUM_EN_MASK BIT(14)
+#define ENA_ETH_IO_TX_DESC_ETHERNET_FCS_DIS_SHIFT 15
+#define ENA_ETH_IO_TX_DESC_ETHERNET_FCS_DIS_MASK BIT(15)
+#define ENA_ETH_IO_TX_DESC_L4_CSUM_PARTIAL_SHIFT 17
+#define ENA_ETH_IO_TX_DESC_L4_CSUM_PARTIAL_MASK BIT(17)
+#define ENA_ETH_IO_TX_DESC_REQ_ID_LO_SHIFT 22
+#define ENA_ETH_IO_TX_DESC_REQ_ID_LO_MASK GENMASK(31, 22)
+#define ENA_ETH_IO_TX_DESC_ADDR_HI_MASK GENMASK(15, 0)
+#define ENA_ETH_IO_TX_DESC_HEADER_LENGTH_SHIFT 24
+#define ENA_ETH_IO_TX_DESC_HEADER_LENGTH_MASK GENMASK(31, 24)
+
+/* tx_meta_desc */
+#define ENA_ETH_IO_TX_META_DESC_REQ_ID_LO_MASK GENMASK(9, 0)
+#define ENA_ETH_IO_TX_META_DESC_EXT_VALID_SHIFT 14
+#define ENA_ETH_IO_TX_META_DESC_EXT_VALID_MASK BIT(14)
+#define ENA_ETH_IO_TX_META_DESC_MSS_HI_SHIFT 16
+#define ENA_ETH_IO_TX_META_DESC_MSS_HI_MASK GENMASK(19, 16)
+#define ENA_ETH_IO_TX_META_DESC_ETH_META_TYPE_SHIFT 20
+#define ENA_ETH_IO_TX_META_DESC_ETH_META_TYPE_MASK BIT(20)
+#define ENA_ETH_IO_TX_META_DESC_META_STORE_SHIFT 21
+#define ENA_ETH_IO_TX_META_DESC_META_STORE_MASK BIT(21)
+#define ENA_ETH_IO_TX_META_DESC_META_DESC_SHIFT 23
+#define ENA_ETH_IO_TX_META_DESC_META_DESC_MASK BIT(23)
+#define ENA_ETH_IO_TX_META_DESC_PHASE_SHIFT 24
+#define ENA_ETH_IO_TX_META_DESC_PHASE_MASK BIT(24)
+#define ENA_ETH_IO_TX_META_DESC_FIRST_SHIFT 26
+#define ENA_ETH_IO_TX_META_DESC_FIRST_MASK BIT(26)
+#define ENA_ETH_IO_TX_META_DESC_LAST_SHIFT 27
+#define ENA_ETH_IO_TX_META_DESC_LAST_MASK BIT(27)
+#define ENA_ETH_IO_TX_META_DESC_COMP_REQ_SHIFT 28
+#define ENA_ETH_IO_TX_META_DESC_COMP_REQ_MASK BIT(28)
+#define ENA_ETH_IO_TX_META_DESC_REQ_ID_HI_MASK GENMASK(5, 0)
+#define ENA_ETH_IO_TX_META_DESC_L3_HDR_LEN_MASK GENMASK(7, 0)
+#define ENA_ETH_IO_TX_META_DESC_L3_HDR_OFF_SHIFT 8
+#define ENA_ETH_IO_TX_META_DESC_L3_HDR_OFF_MASK GENMASK(15, 8)
+#define ENA_ETH_IO_TX_META_DESC_L4_HDR_LEN_IN_WORDS_SHIFT 16
+#define ENA_ETH_IO_TX_META_DESC_L4_HDR_LEN_IN_WORDS_MASK GENMASK(21, 16)
+#define ENA_ETH_IO_TX_META_DESC_MSS_LO_SHIFT 22
+#define ENA_ETH_IO_TX_META_DESC_MSS_LO_MASK GENMASK(31, 22)
+
+/* tx_cdesc */
+#define ENA_ETH_IO_TX_CDESC_PHASE_MASK BIT(0)
+
+/* rx_desc */
+#define ENA_ETH_IO_RX_DESC_PHASE_MASK BIT(0)
+#define ENA_ETH_IO_RX_DESC_FIRST_SHIFT 2
+#define ENA_ETH_IO_RX_DESC_FIRST_MASK BIT(2)
+#define ENA_ETH_IO_RX_DESC_LAST_SHIFT 3
+#define ENA_ETH_IO_RX_DESC_LAST_MASK BIT(3)
+#define ENA_ETH_IO_RX_DESC_COMP_REQ_SHIFT 4
+#define ENA_ETH_IO_RX_DESC_COMP_REQ_MASK BIT(4)
+
+/* rx_cdesc_base */
+#define ENA_ETH_IO_RX_CDESC_BASE_L3_PROTO_IDX_MASK GENMASK(4, 0)
+#define ENA_ETH_IO_RX_CDESC_BASE_SRC_VLAN_CNT_SHIFT 5
+#define ENA_ETH_IO_RX_CDESC_BASE_SRC_VLAN_CNT_MASK GENMASK(6, 5)
+#define ENA_ETH_IO_RX_CDESC_BASE_L4_PROTO_IDX_SHIFT 8
+#define ENA_ETH_IO_RX_CDESC_BASE_L4_PROTO_IDX_MASK GENMASK(12, 8)
+#define ENA_ETH_IO_RX_CDESC_BASE_L3_CSUM_ERR_SHIFT 13
+#define ENA_ETH_IO_RX_CDESC_BASE_L3_CSUM_ERR_MASK BIT(13)
+#define ENA_ETH_IO_RX_CDESC_BASE_L4_CSUM_ERR_SHIFT 14
+#define ENA_ETH_IO_RX_CDESC_BASE_L4_CSUM_ERR_MASK BIT(14)
+#define ENA_ETH_IO_RX_CDESC_BASE_IPV4_FRAG_SHIFT 15
+#define ENA_ETH_IO_RX_CDESC_BASE_IPV4_FRAG_MASK BIT(15)
+#define ENA_ETH_IO_RX_CDESC_BASE_PHASE_SHIFT 24
+#define ENA_ETH_IO_RX_CDESC_BASE_PHASE_MASK BIT(24)
+#define ENA_ETH_IO_RX_CDESC_BASE_L3_CSUM2_SHIFT 25
+#define ENA_ETH_IO_RX_CDESC_BASE_L3_CSUM2_MASK BIT(25)
+#define ENA_ETH_IO_RX_CDESC_BASE_FIRST_SHIFT 26
+#define ENA_ETH_IO_RX_CDESC_BASE_FIRST_MASK BIT(26)
+#define ENA_ETH_IO_RX_CDESC_BASE_LAST_SHIFT 27
+#define ENA_ETH_IO_RX_CDESC_BASE_LAST_MASK BIT(27)
+#define ENA_ETH_IO_RX_CDESC_BASE_BUFFER_SHIFT 30
+#define ENA_ETH_IO_RX_CDESC_BASE_BUFFER_MASK BIT(30)
+
+/* intr_reg */
+#define ENA_ETH_IO_INTR_REG_RX_INTR_DELAY_MASK GENMASK(14, 0)
+#define ENA_ETH_IO_INTR_REG_TX_INTR_DELAY_SHIFT 15
+#define ENA_ETH_IO_INTR_REG_TX_INTR_DELAY_MASK GENMASK(29, 15)
+#define ENA_ETH_IO_INTR_REG_INTR_UNMASK_SHIFT 30
+#define ENA_ETH_IO_INTR_REG_INTR_UNMASK_MASK BIT(30)
+
+/* numa_node_cfg_reg */
+#define ENA_ETH_IO_NUMA_NODE_CFG_REG_NUMA_MASK GENMASK(7, 0)
+#define ENA_ETH_IO_NUMA_NODE_CFG_REG_ENABLED_SHIFT 31
+#define ENA_ETH_IO_NUMA_NODE_CFG_REG_ENABLED_MASK BIT(31)
+
+#endif /*_ENA_ETH_IO_H_ */
diff --git a/drivers/net/ethernet/amazon/ena/ena_ethtool.c b/drivers/net/ethernet/amazon/ena/ena_ethtool.c
new file mode 100644 (file)
index 0000000..67b2338
--- /dev/null
@@ -0,0 +1,895 @@
+/*
+ * Copyright 2015 Amazon.com, Inc. or its affiliates.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/pci.h>
+
+#include "ena_netdev.h"
+
+struct ena_stats {
+       char name[ETH_GSTRING_LEN];
+       int stat_offset;
+};
+
+#define ENA_STAT_ENA_COM_ENTRY(stat) { \
+       .name = #stat, \
+       .stat_offset = offsetof(struct ena_com_stats_admin, stat) \
+}
+
+#define ENA_STAT_ENTRY(stat, stat_type) { \
+       .name = #stat, \
+       .stat_offset = offsetof(struct ena_stats_##stat_type, stat) \
+}
+
+#define ENA_STAT_RX_ENTRY(stat) \
+       ENA_STAT_ENTRY(stat, rx)
+
+#define ENA_STAT_TX_ENTRY(stat) \
+       ENA_STAT_ENTRY(stat, tx)
+
+#define ENA_STAT_GLOBAL_ENTRY(stat) \
+       ENA_STAT_ENTRY(stat, dev)
+
+static const struct ena_stats ena_stats_global_strings[] = {
+       ENA_STAT_GLOBAL_ENTRY(tx_timeout),
+       ENA_STAT_GLOBAL_ENTRY(io_suspend),
+       ENA_STAT_GLOBAL_ENTRY(io_resume),
+       ENA_STAT_GLOBAL_ENTRY(wd_expired),
+       ENA_STAT_GLOBAL_ENTRY(interface_up),
+       ENA_STAT_GLOBAL_ENTRY(interface_down),
+       ENA_STAT_GLOBAL_ENTRY(admin_q_pause),
+};
+
+static const struct ena_stats ena_stats_tx_strings[] = {
+       ENA_STAT_TX_ENTRY(cnt),
+       ENA_STAT_TX_ENTRY(bytes),
+       ENA_STAT_TX_ENTRY(queue_stop),
+       ENA_STAT_TX_ENTRY(queue_wakeup),
+       ENA_STAT_TX_ENTRY(dma_mapping_err),
+       ENA_STAT_TX_ENTRY(linearize),
+       ENA_STAT_TX_ENTRY(linearize_failed),
+       ENA_STAT_TX_ENTRY(napi_comp),
+       ENA_STAT_TX_ENTRY(tx_poll),
+       ENA_STAT_TX_ENTRY(doorbells),
+       ENA_STAT_TX_ENTRY(prepare_ctx_err),
+       ENA_STAT_TX_ENTRY(missing_tx_comp),
+       ENA_STAT_TX_ENTRY(bad_req_id),
+};
+
+static const struct ena_stats ena_stats_rx_strings[] = {
+       ENA_STAT_RX_ENTRY(cnt),
+       ENA_STAT_RX_ENTRY(bytes),
+       ENA_STAT_RX_ENTRY(refil_partial),
+       ENA_STAT_RX_ENTRY(bad_csum),
+       ENA_STAT_RX_ENTRY(page_alloc_fail),
+       ENA_STAT_RX_ENTRY(skb_alloc_fail),
+       ENA_STAT_RX_ENTRY(dma_mapping_err),
+       ENA_STAT_RX_ENTRY(bad_desc_num),
+       ENA_STAT_RX_ENTRY(rx_copybreak_pkt),
+};
+
+static const struct ena_stats ena_stats_ena_com_strings[] = {
+       ENA_STAT_ENA_COM_ENTRY(aborted_cmd),
+       ENA_STAT_ENA_COM_ENTRY(submitted_cmd),
+       ENA_STAT_ENA_COM_ENTRY(completed_cmd),
+       ENA_STAT_ENA_COM_ENTRY(out_of_space),
+       ENA_STAT_ENA_COM_ENTRY(no_completion),
+};
+
+#define ENA_STATS_ARRAY_GLOBAL ARRAY_SIZE(ena_stats_global_strings)
+#define ENA_STATS_ARRAY_TX     ARRAY_SIZE(ena_stats_tx_strings)
+#define ENA_STATS_ARRAY_RX     ARRAY_SIZE(ena_stats_rx_strings)
+#define ENA_STATS_ARRAY_ENA_COM        ARRAY_SIZE(ena_stats_ena_com_strings)
+
+static void ena_safe_update_stat(u64 *src, u64 *dst,
+                                struct u64_stats_sync *syncp)
+{
+       unsigned int start;
+
+       do {
+               start = u64_stats_fetch_begin_irq(syncp);
+               *(dst) = *src;
+       } while (u64_stats_fetch_retry_irq(syncp, start));
+}
+
+static void ena_queue_stats(struct ena_adapter *adapter, u64 **data)
+{
+       const struct ena_stats *ena_stats;
+       struct ena_ring *ring;
+
+       u64 *ptr;
+       int i, j;
+
+       for (i = 0; i < adapter->num_queues; i++) {
+               /* Tx stats */
+               ring = &adapter->tx_ring[i];
+
+               for (j = 0; j < ENA_STATS_ARRAY_TX; j++) {
+                       ena_stats = &ena_stats_tx_strings[j];
+
+                       ptr = (u64 *)((uintptr_t)&ring->tx_stats +
+                               (uintptr_t)ena_stats->stat_offset);
+
+                       ena_safe_update_stat(ptr, (*data)++, &ring->syncp);
+               }
+
+               /* Rx stats */
+               ring = &adapter->rx_ring[i];
+
+               for (j = 0; j < ENA_STATS_ARRAY_RX; j++) {
+                       ena_stats = &ena_stats_rx_strings[j];
+
+                       ptr = (u64 *)((uintptr_t)&ring->rx_stats +
+                               (uintptr_t)ena_stats->stat_offset);
+
+                       ena_safe_update_stat(ptr, (*data)++, &ring->syncp);
+               }
+       }
+}
+
+static void ena_dev_admin_queue_stats(struct ena_adapter *adapter, u64 **data)
+{
+       const struct ena_stats *ena_stats;
+       u32 *ptr;
+       int i;
+
+       for (i = 0; i < ENA_STATS_ARRAY_ENA_COM; i++) {
+               ena_stats = &ena_stats_ena_com_strings[i];
+
+               ptr = (u32 *)((uintptr_t)&adapter->ena_dev->admin_queue.stats +
+                       (uintptr_t)ena_stats->stat_offset);
+
+               *(*data)++ = *ptr;
+       }
+}
+
+static void ena_get_ethtool_stats(struct net_device *netdev,
+                                 struct ethtool_stats *stats,
+                                 u64 *data)
+{
+       struct ena_adapter *adapter = netdev_priv(netdev);
+       const struct ena_stats *ena_stats;
+       u64 *ptr;
+       int i;
+
+       for (i = 0; i < ENA_STATS_ARRAY_GLOBAL; i++) {
+               ena_stats = &ena_stats_global_strings[i];
+
+               ptr = (u64 *)((uintptr_t)&adapter->dev_stats +
+                       (uintptr_t)ena_stats->stat_offset);
+
+               ena_safe_update_stat(ptr, data++, &adapter->syncp);
+       }
+
+       ena_queue_stats(adapter, &data);
+       ena_dev_admin_queue_stats(adapter, &data);
+}
+
+int ena_get_sset_count(struct net_device *netdev, int sset)
+{
+       struct ena_adapter *adapter = netdev_priv(netdev);
+
+       if (sset != ETH_SS_STATS)
+               return -EOPNOTSUPP;
+
+       return  adapter->num_queues * (ENA_STATS_ARRAY_TX + ENA_STATS_ARRAY_RX)
+               + ENA_STATS_ARRAY_GLOBAL + ENA_STATS_ARRAY_ENA_COM;
+}
+
+static void ena_queue_strings(struct ena_adapter *adapter, u8 **data)
+{
+       const struct ena_stats *ena_stats;
+       int i, j;
+
+       for (i = 0; i < adapter->num_queues; i++) {
+               /* Tx stats */
+               for (j = 0; j < ENA_STATS_ARRAY_TX; j++) {
+                       ena_stats = &ena_stats_tx_strings[j];
+
+                       snprintf(*data, ETH_GSTRING_LEN,
+                                "queue_%u_tx_%s", i, ena_stats->name);
+                        (*data) += ETH_GSTRING_LEN;
+               }
+               /* Rx stats */
+               for (j = 0; j < ENA_STATS_ARRAY_RX; j++) {
+                       ena_stats = &ena_stats_rx_strings[j];
+
+                       snprintf(*data, ETH_GSTRING_LEN,
+                                "queue_%u_rx_%s", i, ena_stats->name);
+                       (*data) += ETH_GSTRING_LEN;
+               }
+       }
+}
+
+static void ena_com_dev_strings(u8 **data)
+{
+       const struct ena_stats *ena_stats;
+       int i;
+
+       for (i = 0; i < ENA_STATS_ARRAY_ENA_COM; i++) {
+               ena_stats = &ena_stats_ena_com_strings[i];
+
+               snprintf(*data, ETH_GSTRING_LEN,
+                        "ena_admin_q_%s", ena_stats->name);
+               (*data) += ETH_GSTRING_LEN;
+       }
+}
+
+static void ena_get_strings(struct net_device *netdev, u32 sset, u8 *data)
+{
+       struct ena_adapter *adapter = netdev_priv(netdev);
+       const struct ena_stats *ena_stats;
+       int i;
+
+       if (sset != ETH_SS_STATS)
+               return;
+
+       for (i = 0; i < ENA_STATS_ARRAY_GLOBAL; i++) {
+               ena_stats = &ena_stats_global_strings[i];
+
+               memcpy(data, ena_stats->name, ETH_GSTRING_LEN);
+               data += ETH_GSTRING_LEN;
+       }
+
+       ena_queue_strings(adapter, &data);
+       ena_com_dev_strings(&data);
+}
+
+static int ena_get_link_ksettings(struct net_device *netdev,
+                                 struct ethtool_link_ksettings *link_ksettings)
+{
+       struct ena_adapter *adapter = netdev_priv(netdev);
+       struct ena_com_dev *ena_dev = adapter->ena_dev;
+       struct ena_admin_get_feature_link_desc *link;
+       struct ena_admin_get_feat_resp feat_resp;
+       int rc;
+
+       rc = ena_com_get_link_params(ena_dev, &feat_resp);
+       if (rc)
+               return rc;
+
+       link = &feat_resp.u.link;
+       link_ksettings->base.speed = link->speed;
+
+       if (link->flags & ENA_ADMIN_GET_FEATURE_LINK_DESC_AUTONEG_MASK) {
+               ethtool_link_ksettings_add_link_mode(link_ksettings,
+                                                    supported, Autoneg);
+               ethtool_link_ksettings_add_link_mode(link_ksettings,
+                                                    supported, Autoneg);
+       }
+
+       link_ksettings->base.autoneg =
+               (link->flags & ENA_ADMIN_GET_FEATURE_LINK_DESC_AUTONEG_MASK) ?
+               AUTONEG_ENABLE : AUTONEG_DISABLE;
+
+       link_ksettings->base.duplex = DUPLEX_FULL;
+
+       return 0;
+}
+
+static int ena_get_coalesce(struct net_device *net_dev,
+                           struct ethtool_coalesce *coalesce)
+{
+       struct ena_adapter *adapter = netdev_priv(net_dev);
+       struct ena_com_dev *ena_dev = adapter->ena_dev;
+       struct ena_intr_moder_entry intr_moder_entry;
+
+       if (!ena_com_interrupt_moderation_supported(ena_dev)) {
+               /* the devie doesn't support interrupt moderation */
+               return -EOPNOTSUPP;
+       }
+       coalesce->tx_coalesce_usecs =
+               ena_com_get_nonadaptive_moderation_interval_tx(ena_dev) /
+                       ena_dev->intr_delay_resolution;
+       if (!ena_com_get_adaptive_moderation_enabled(ena_dev)) {
+               coalesce->rx_coalesce_usecs =
+                       ena_com_get_nonadaptive_moderation_interval_rx(ena_dev)
+                       / ena_dev->intr_delay_resolution;
+       } else {
+               ena_com_get_intr_moderation_entry(adapter->ena_dev, ENA_INTR_MODER_LOWEST, &intr_moder_entry);
+               coalesce->rx_coalesce_usecs_low = intr_moder_entry.intr_moder_interval;
+               coalesce->rx_max_coalesced_frames_low = intr_moder_entry.pkts_per_interval;
+
+               ena_com_get_intr_moderation_entry(adapter->ena_dev, ENA_INTR_MODER_MID, &intr_moder_entry);
+               coalesce->rx_coalesce_usecs = intr_moder_entry.intr_moder_interval;
+               coalesce->rx_max_coalesced_frames = intr_moder_entry.pkts_per_interval;
+
+               ena_com_get_intr_moderation_entry(adapter->ena_dev, ENA_INTR_MODER_HIGHEST, &intr_moder_entry);
+               coalesce->rx_coalesce_usecs_high = intr_moder_entry.intr_moder_interval;
+               coalesce->rx_max_coalesced_frames_high = intr_moder_entry.pkts_per_interval;
+       }
+       coalesce->use_adaptive_rx_coalesce =
+               ena_com_get_adaptive_moderation_enabled(ena_dev);
+
+       return 0;
+}
+
+static void ena_update_tx_rings_intr_moderation(struct ena_adapter *adapter)
+{
+       unsigned int val;
+       int i;
+
+       val = ena_com_get_nonadaptive_moderation_interval_tx(adapter->ena_dev);
+
+       for (i = 0; i < adapter->num_queues; i++)
+               adapter->tx_ring[i].smoothed_interval = val;
+}
+
+static int ena_set_coalesce(struct net_device *net_dev,
+                           struct ethtool_coalesce *coalesce)
+{
+       struct ena_adapter *adapter = netdev_priv(net_dev);
+       struct ena_com_dev *ena_dev = adapter->ena_dev;
+       struct ena_intr_moder_entry intr_moder_entry;
+       int rc;
+
+       if (!ena_com_interrupt_moderation_supported(ena_dev)) {
+               /* the devie doesn't support interrupt moderation */
+               return -EOPNOTSUPP;
+       }
+
+       if (coalesce->rx_coalesce_usecs_irq ||
+           coalesce->rx_max_coalesced_frames_irq ||
+           coalesce->tx_coalesce_usecs_irq ||
+           coalesce->tx_max_coalesced_frames ||
+           coalesce->tx_max_coalesced_frames_irq ||
+           coalesce->stats_block_coalesce_usecs ||
+           coalesce->use_adaptive_tx_coalesce ||
+           coalesce->pkt_rate_low ||
+           coalesce->tx_coalesce_usecs_low ||
+           coalesce->tx_max_coalesced_frames_low ||
+           coalesce->pkt_rate_high ||
+           coalesce->tx_coalesce_usecs_high ||
+           coalesce->tx_max_coalesced_frames_high ||
+           coalesce->rate_sample_interval)
+               return -EINVAL;
+
+       rc = ena_com_update_nonadaptive_moderation_interval_tx(ena_dev,
+                                                              coalesce->tx_coalesce_usecs);
+       if (rc)
+               return rc;
+
+       ena_update_tx_rings_intr_moderation(adapter);
+
+       if (ena_com_get_adaptive_moderation_enabled(ena_dev)) {
+               if (!coalesce->use_adaptive_rx_coalesce) {
+                       ena_com_disable_adaptive_moderation(ena_dev);
+                       rc = ena_com_update_nonadaptive_moderation_interval_rx(ena_dev,
+                                                                              coalesce->rx_coalesce_usecs);
+                       return rc;
+               }
+       } else { /* was in non-adaptive mode */
+               if (coalesce->use_adaptive_rx_coalesce) {
+                       ena_com_enable_adaptive_moderation(ena_dev);
+               } else {
+                       rc = ena_com_update_nonadaptive_moderation_interval_rx(ena_dev,
+                                                                              coalesce->rx_coalesce_usecs);
+                       return rc;
+               }
+       }
+
+       intr_moder_entry.intr_moder_interval = coalesce->rx_coalesce_usecs_low;
+       intr_moder_entry.pkts_per_interval = coalesce->rx_max_coalesced_frames_low;
+       intr_moder_entry.bytes_per_interval = ENA_INTR_BYTE_COUNT_NOT_SUPPORTED;
+       ena_com_init_intr_moderation_entry(adapter->ena_dev, ENA_INTR_MODER_LOWEST, &intr_moder_entry);
+
+       intr_moder_entry.intr_moder_interval = coalesce->rx_coalesce_usecs;
+       intr_moder_entry.pkts_per_interval = coalesce->rx_max_coalesced_frames;
+       intr_moder_entry.bytes_per_interval = ENA_INTR_BYTE_COUNT_NOT_SUPPORTED;
+       ena_com_init_intr_moderation_entry(adapter->ena_dev, ENA_INTR_MODER_MID, &intr_moder_entry);
+
+       intr_moder_entry.intr_moder_interval = coalesce->rx_coalesce_usecs_high;
+       intr_moder_entry.pkts_per_interval = coalesce->rx_max_coalesced_frames_high;
+       intr_moder_entry.bytes_per_interval = ENA_INTR_BYTE_COUNT_NOT_SUPPORTED;
+       ena_com_init_intr_moderation_entry(adapter->ena_dev, ENA_INTR_MODER_HIGHEST, &intr_moder_entry);
+
+       return 0;
+}
+
+static u32 ena_get_msglevel(struct net_device *netdev)
+{
+       struct ena_adapter *adapter = netdev_priv(netdev);
+
+       return adapter->msg_enable;
+}
+
+static void ena_set_msglevel(struct net_device *netdev, u32 value)
+{
+       struct ena_adapter *adapter = netdev_priv(netdev);
+
+       adapter->msg_enable = value;
+}
+
+static void ena_get_drvinfo(struct net_device *dev,
+                           struct ethtool_drvinfo *info)
+{
+       struct ena_adapter *adapter = netdev_priv(dev);
+
+       strlcpy(info->driver, DRV_MODULE_NAME, sizeof(info->driver));
+       strlcpy(info->version, DRV_MODULE_VERSION, sizeof(info->version));
+       strlcpy(info->bus_info, pci_name(adapter->pdev),
+               sizeof(info->bus_info));
+}
+
+static void ena_get_ringparam(struct net_device *netdev,
+                             struct ethtool_ringparam *ring)
+{
+       struct ena_adapter *adapter = netdev_priv(netdev);
+       struct ena_ring *tx_ring = &adapter->tx_ring[0];
+       struct ena_ring *rx_ring = &adapter->rx_ring[0];
+
+       ring->rx_max_pending = rx_ring->ring_size;
+       ring->tx_max_pending = tx_ring->ring_size;
+       ring->rx_pending = rx_ring->ring_size;
+       ring->tx_pending = tx_ring->ring_size;
+}
+
+static u32 ena_flow_hash_to_flow_type(u16 hash_fields)
+{
+       u32 data = 0;
+
+       if (hash_fields & ENA_ADMIN_RSS_L2_DA)
+               data |= RXH_L2DA;
+
+       if (hash_fields & ENA_ADMIN_RSS_L3_DA)
+               data |= RXH_IP_DST;
+
+       if (hash_fields & ENA_ADMIN_RSS_L3_SA)
+               data |= RXH_IP_SRC;
+
+       if (hash_fields & ENA_ADMIN_RSS_L4_DP)
+               data |= RXH_L4_B_2_3;
+
+       if (hash_fields & ENA_ADMIN_RSS_L4_SP)
+               data |= RXH_L4_B_0_1;
+
+       return data;
+}
+
+static u16 ena_flow_data_to_flow_hash(u32 hash_fields)
+{
+       u16 data = 0;
+
+       if (hash_fields & RXH_L2DA)
+               data |= ENA_ADMIN_RSS_L2_DA;
+
+       if (hash_fields & RXH_IP_DST)
+               data |= ENA_ADMIN_RSS_L3_DA;
+
+       if (hash_fields & RXH_IP_SRC)
+               data |= ENA_ADMIN_RSS_L3_SA;
+
+       if (hash_fields & RXH_L4_B_2_3)
+               data |= ENA_ADMIN_RSS_L4_DP;
+
+       if (hash_fields & RXH_L4_B_0_1)
+               data |= ENA_ADMIN_RSS_L4_SP;
+
+       return data;
+}
+
+static int ena_get_rss_hash(struct ena_com_dev *ena_dev,
+                           struct ethtool_rxnfc *cmd)
+{
+       enum ena_admin_flow_hash_proto proto;
+       u16 hash_fields;
+       int rc;
+
+       cmd->data = 0;
+
+       switch (cmd->flow_type) {
+       case TCP_V4_FLOW:
+               proto = ENA_ADMIN_RSS_TCP4;
+               break;
+       case UDP_V4_FLOW:
+               proto = ENA_ADMIN_RSS_UDP4;
+               break;
+       case TCP_V6_FLOW:
+               proto = ENA_ADMIN_RSS_TCP6;
+               break;
+       case UDP_V6_FLOW:
+               proto = ENA_ADMIN_RSS_UDP6;
+               break;
+       case IPV4_FLOW:
+               proto = ENA_ADMIN_RSS_IP4;
+               break;
+       case IPV6_FLOW:
+               proto = ENA_ADMIN_RSS_IP6;
+               break;
+       case ETHER_FLOW:
+               proto = ENA_ADMIN_RSS_NOT_IP;
+               break;
+       case AH_V4_FLOW:
+       case ESP_V4_FLOW:
+       case AH_V6_FLOW:
+       case ESP_V6_FLOW:
+       case SCTP_V4_FLOW:
+       case AH_ESP_V4_FLOW:
+               return -EOPNOTSUPP;
+       default:
+               return -EINVAL;
+       }
+
+       rc = ena_com_get_hash_ctrl(ena_dev, proto, &hash_fields);
+       if (rc) {
+               /* If device don't have permission, return unsupported */
+               if (rc == -EPERM)
+                       rc = -EOPNOTSUPP;
+               return rc;
+       }
+
+       cmd->data = ena_flow_hash_to_flow_type(hash_fields);
+
+       return 0;
+}
+
+static int ena_set_rss_hash(struct ena_com_dev *ena_dev,
+                           struct ethtool_rxnfc *cmd)
+{
+       enum ena_admin_flow_hash_proto proto;
+       u16 hash_fields;
+
+       switch (cmd->flow_type) {
+       case TCP_V4_FLOW:
+               proto = ENA_ADMIN_RSS_TCP4;
+               break;
+       case UDP_V4_FLOW:
+               proto = ENA_ADMIN_RSS_UDP4;
+               break;
+       case TCP_V6_FLOW:
+               proto = ENA_ADMIN_RSS_TCP6;
+               break;
+       case UDP_V6_FLOW:
+               proto = ENA_ADMIN_RSS_UDP6;
+               break;
+       case IPV4_FLOW:
+               proto = ENA_ADMIN_RSS_IP4;
+               break;
+       case IPV6_FLOW:
+               proto = ENA_ADMIN_RSS_IP6;
+               break;
+       case ETHER_FLOW:
+               proto = ENA_ADMIN_RSS_NOT_IP;
+               break;
+       case AH_V4_FLOW:
+       case ESP_V4_FLOW:
+       case AH_V6_FLOW:
+       case ESP_V6_FLOW:
+       case SCTP_V4_FLOW:
+       case AH_ESP_V4_FLOW:
+               return -EOPNOTSUPP;
+       default:
+               return -EINVAL;
+       }
+
+       hash_fields = ena_flow_data_to_flow_hash(cmd->data);
+
+       return ena_com_fill_hash_ctrl(ena_dev, proto, hash_fields);
+}
+
+static int ena_set_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *info)
+{
+       struct ena_adapter *adapter = netdev_priv(netdev);
+       int rc = 0;
+
+       switch (info->cmd) {
+       case ETHTOOL_SRXFH:
+               rc = ena_set_rss_hash(adapter->ena_dev, info);
+               break;
+       case ETHTOOL_SRXCLSRLDEL:
+       case ETHTOOL_SRXCLSRLINS:
+       default:
+               netif_err(adapter, drv, netdev,
+                         "Command parameter %d is not supported\n", info->cmd);
+               rc = -EOPNOTSUPP;
+       }
+
+       return (rc == -EPERM) ? -EOPNOTSUPP : rc;
+}
+
+static int ena_get_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *info,
+                        u32 *rules)
+{
+       struct ena_adapter *adapter = netdev_priv(netdev);
+       int rc = 0;
+
+       switch (info->cmd) {
+       case ETHTOOL_GRXRINGS:
+               info->data = adapter->num_queues;
+               rc = 0;
+               break;
+       case ETHTOOL_GRXFH:
+               rc = ena_get_rss_hash(adapter->ena_dev, info);
+               break;
+       case ETHTOOL_GRXCLSRLCNT:
+       case ETHTOOL_GRXCLSRULE:
+       case ETHTOOL_GRXCLSRLALL:
+       default:
+               netif_err(adapter, drv, netdev,
+                         "Command parameter %d is not supported\n", info->cmd);
+               rc = -EOPNOTSUPP;
+       }
+
+       return (rc == -EPERM) ? -EOPNOTSUPP : rc;
+}
+
+static u32 ena_get_rxfh_indir_size(struct net_device *netdev)
+{
+       return ENA_RX_RSS_TABLE_SIZE;
+}
+
+static u32 ena_get_rxfh_key_size(struct net_device *netdev)
+{
+       return ENA_HASH_KEY_SIZE;
+}
+
+static int ena_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key,
+                       u8 *hfunc)
+{
+       struct ena_adapter *adapter = netdev_priv(netdev);
+       enum ena_admin_hash_functions ena_func;
+       u8 func;
+       int rc;
+
+       rc = ena_com_indirect_table_get(adapter->ena_dev, indir);
+       if (rc)
+               return rc;
+
+       rc = ena_com_get_hash_function(adapter->ena_dev, &ena_func, key);
+       if (rc)
+               return rc;
+
+       switch (ena_func) {
+       case ENA_ADMIN_TOEPLITZ:
+               func = ETH_RSS_HASH_TOP;
+               break;
+       case ENA_ADMIN_CRC32:
+               func = ETH_RSS_HASH_XOR;
+               break;
+       default:
+               netif_err(adapter, drv, netdev,
+                         "Command parameter is not supported\n");
+               return -EOPNOTSUPP;
+       }
+
+       if (hfunc)
+               *hfunc = func;
+
+       return rc;
+}
+
+static int ena_set_rxfh(struct net_device *netdev, const u32 *indir,
+                       const u8 *key, const u8 hfunc)
+{
+       struct ena_adapter *adapter = netdev_priv(netdev);
+       struct ena_com_dev *ena_dev = adapter->ena_dev;
+       enum ena_admin_hash_functions func;
+       int rc, i;
+
+       if (indir) {
+               for (i = 0; i < ENA_RX_RSS_TABLE_SIZE; i++) {
+                       rc = ena_com_indirect_table_fill_entry(ena_dev,
+                                                              ENA_IO_RXQ_IDX(indir[i]),
+                                                              i);
+                       if (unlikely(rc)) {
+                               netif_err(adapter, drv, netdev,
+                                         "Cannot fill indirect table (index is too large)\n");
+                               return rc;
+                       }
+               }
+
+               rc = ena_com_indirect_table_set(ena_dev);
+               if (rc) {
+                       netif_err(adapter, drv, netdev,
+                                 "Cannot set indirect table\n");
+                       return rc == -EPERM ? -EOPNOTSUPP : rc;
+               }
+       }
+
+       switch (hfunc) {
+       case ETH_RSS_HASH_TOP:
+               func = ENA_ADMIN_TOEPLITZ;
+               break;
+       case ETH_RSS_HASH_XOR:
+               func = ENA_ADMIN_CRC32;
+               break;
+       default:
+               netif_err(adapter, drv, netdev, "Unsupported hfunc %d\n",
+                         hfunc);
+               return -EOPNOTSUPP;
+       }
+
+       if (key) {
+               rc = ena_com_fill_hash_function(ena_dev, func, key,
+                                               ENA_HASH_KEY_SIZE,
+                                               0xFFFFFFFF);
+               if (unlikely(rc)) {
+                       netif_err(adapter, drv, netdev, "Cannot fill key\n");
+                       return rc == -EPERM ? -EOPNOTSUPP : rc;
+               }
+       }
+
+       return 0;
+}
+
+static void ena_get_channels(struct net_device *netdev,
+                            struct ethtool_channels *channels)
+{
+       struct ena_adapter *adapter = netdev_priv(netdev);
+
+       channels->max_rx = ENA_MAX_NUM_IO_QUEUES;
+       channels->max_tx = ENA_MAX_NUM_IO_QUEUES;
+       channels->max_other = 0;
+       channels->max_combined = 0;
+       channels->rx_count = adapter->num_queues;
+       channels->tx_count = adapter->num_queues;
+       channels->other_count = 0;
+       channels->combined_count = 0;
+}
+
+static int ena_get_tunable(struct net_device *netdev,
+                          const struct ethtool_tunable *tuna, void *data)
+{
+       struct ena_adapter *adapter = netdev_priv(netdev);
+       int ret = 0;
+
+       switch (tuna->id) {
+       case ETHTOOL_RX_COPYBREAK:
+               *(u32 *)data = adapter->rx_copybreak;
+               break;
+       default:
+               ret = -EINVAL;
+               break;
+       }
+
+       return ret;
+}
+
+static int ena_set_tunable(struct net_device *netdev,
+                          const struct ethtool_tunable *tuna,
+                          const void *data)
+{
+       struct ena_adapter *adapter = netdev_priv(netdev);
+       int ret = 0;
+       u32 len;
+
+       switch (tuna->id) {
+       case ETHTOOL_RX_COPYBREAK:
+               len = *(u32 *)data;
+               if (len > adapter->netdev->mtu) {
+                       ret = -EINVAL;
+                       break;
+               }
+               adapter->rx_copybreak = len;
+               break;
+       default:
+               ret = -EINVAL;
+               break;
+       }
+
+       return ret;
+}
+
+static const struct ethtool_ops ena_ethtool_ops = {
+       .get_link_ksettings     = ena_get_link_ksettings,
+       .get_drvinfo            = ena_get_drvinfo,
+       .get_msglevel           = ena_get_msglevel,
+       .set_msglevel           = ena_set_msglevel,
+       .get_link               = ethtool_op_get_link,
+       .get_coalesce           = ena_get_coalesce,
+       .set_coalesce           = ena_set_coalesce,
+       .get_ringparam          = ena_get_ringparam,
+       .get_sset_count         = ena_get_sset_count,
+       .get_strings            = ena_get_strings,
+       .get_ethtool_stats      = ena_get_ethtool_stats,
+       .get_rxnfc              = ena_get_rxnfc,
+       .set_rxnfc              = ena_set_rxnfc,
+       .get_rxfh_indir_size    = ena_get_rxfh_indir_size,
+       .get_rxfh_key_size      = ena_get_rxfh_key_size,
+       .get_rxfh               = ena_get_rxfh,
+       .set_rxfh               = ena_set_rxfh,
+       .get_channels           = ena_get_channels,
+       .get_tunable            = ena_get_tunable,
+       .set_tunable            = ena_set_tunable,
+};
+
+void ena_set_ethtool_ops(struct net_device *netdev)
+{
+       netdev->ethtool_ops = &ena_ethtool_ops;
+}
+
+static void ena_dump_stats_ex(struct ena_adapter *adapter, u8 *buf)
+{
+       struct net_device *netdev = adapter->netdev;
+       u8 *strings_buf;
+       u64 *data_buf;
+       int strings_num;
+       int i, rc;
+
+       strings_num = ena_get_sset_count(netdev, ETH_SS_STATS);
+       if (strings_num <= 0) {
+               netif_err(adapter, drv, netdev, "Can't get stats num\n");
+               return;
+       }
+
+       strings_buf = devm_kzalloc(&adapter->pdev->dev,
+                                  strings_num * ETH_GSTRING_LEN,
+                                  GFP_ATOMIC);
+       if (!strings_buf) {
+               netif_err(adapter, drv, netdev,
+                         "failed to alloc strings_buf\n");
+               return;
+       }
+
+       data_buf = devm_kzalloc(&adapter->pdev->dev,
+                               strings_num * sizeof(u64),
+                               GFP_ATOMIC);
+       if (!data_buf) {
+               netif_err(adapter, drv, netdev,
+                         "failed to allocate data buf\n");
+               devm_kfree(&adapter->pdev->dev, strings_buf);
+               return;
+       }
+
+       ena_get_strings(netdev, ETH_SS_STATS, strings_buf);
+       ena_get_ethtool_stats(netdev, NULL, data_buf);
+
+       /* If there is a buffer, dump stats, otherwise print them to dmesg */
+       if (buf)
+               for (i = 0; i < strings_num; i++) {
+                       rc = snprintf(buf, ETH_GSTRING_LEN + sizeof(u64),
+                                     "%s %llu\n",
+                                     strings_buf + i * ETH_GSTRING_LEN,
+                                     data_buf[i]);
+                       buf += rc;
+               }
+       else
+               for (i = 0; i < strings_num; i++)
+                       netif_err(adapter, drv, netdev, "%s: %llu\n",
+                                 strings_buf + i * ETH_GSTRING_LEN,
+                                 data_buf[i]);
+
+       devm_kfree(&adapter->pdev->dev, strings_buf);
+       devm_kfree(&adapter->pdev->dev, data_buf);
+}
+
+void ena_dump_stats_to_buf(struct ena_adapter *adapter, u8 *buf)
+{
+       if (!buf)
+               return;
+
+       ena_dump_stats_ex(adapter, buf);
+}
+
+void ena_dump_stats_to_dmesg(struct ena_adapter *adapter)
+{
+       ena_dump_stats_ex(adapter, NULL);
+}
diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c
new file mode 100644 (file)
index 0000000..5c536b8
--- /dev/null
@@ -0,0 +1,3278 @@
+/*
+ * Copyright 2015 Amazon.com, Inc. or its affiliates.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#ifdef CONFIG_RFS_ACCEL
+#include <linux/cpu_rmap.h>
+#endif /* CONFIG_RFS_ACCEL */
+#include <linux/ethtool.h>
+#include <linux/if_vlan.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/numa.h>
+#include <linux/pci.h>
+#include <linux/utsname.h>
+#include <linux/version.h>
+#include <linux/vmalloc.h>
+#include <net/ip.h>
+
+#include "ena_netdev.h"
+#include "ena_pci_id_tbl.h"
+
+static char version[] = DEVICE_NAME " v" DRV_MODULE_VERSION "\n";
+
+MODULE_AUTHOR("Amazon.com, Inc. or its affiliates");
+MODULE_DESCRIPTION(DEVICE_NAME);
+MODULE_LICENSE("GPL");
+MODULE_VERSION(DRV_MODULE_VERSION);
+
+/* Time in jiffies before concluding the transmitter is hung. */
+#define TX_TIMEOUT  (5 * HZ)
+
+#define ENA_NAPI_BUDGET 64
+
+#define DEFAULT_MSG_ENABLE (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_IFUP | \
+               NETIF_MSG_TX_DONE | NETIF_MSG_TX_ERR | NETIF_MSG_RX_ERR)
+static int debug = -1;
+module_param(debug, int, 0);
+MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)");
+
+static struct ena_aenq_handlers aenq_handlers;
+
+static struct workqueue_struct *ena_wq;
+
+MODULE_DEVICE_TABLE(pci, ena_pci_tbl);
+
+static int ena_rss_init_default(struct ena_adapter *adapter);
+
+static void ena_tx_timeout(struct net_device *dev)
+{
+       struct ena_adapter *adapter = netdev_priv(dev);
+
+       u64_stats_update_begin(&adapter->syncp);
+       adapter->dev_stats.tx_timeout++;
+       u64_stats_update_end(&adapter->syncp);
+
+       netif_err(adapter, tx_err, dev, "Transmit time out\n");
+
+       /* Change the state of the device to trigger reset */
+       set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags);
+}
+
+static void update_rx_ring_mtu(struct ena_adapter *adapter, int mtu)
+{
+       int i;
+
+       for (i = 0; i < adapter->num_queues; i++)
+               adapter->rx_ring[i].mtu = mtu;
+}
+
+static int ena_change_mtu(struct net_device *dev, int new_mtu)
+{
+       struct ena_adapter *adapter = netdev_priv(dev);
+       int ret;
+
+       if ((new_mtu > adapter->max_mtu) || (new_mtu < ENA_MIN_MTU)) {
+               netif_err(adapter, drv, dev,
+                         "Invalid MTU setting. new_mtu: %d\n", new_mtu);
+
+               return -EINVAL;
+       }
+
+       ret = ena_com_set_dev_mtu(adapter->ena_dev, new_mtu);
+       if (!ret) {
+               netif_dbg(adapter, drv, dev, "set MTU to %d\n", new_mtu);
+               update_rx_ring_mtu(adapter, new_mtu);
+               dev->mtu = new_mtu;
+       } else {
+               netif_err(adapter, drv, dev, "Failed to set MTU to %d\n",
+                         new_mtu);
+       }
+
+       return ret;
+}
+
+static int ena_init_rx_cpu_rmap(struct ena_adapter *adapter)
+{
+#ifdef CONFIG_RFS_ACCEL
+       u32 i;
+       int rc;
+
+       adapter->netdev->rx_cpu_rmap = alloc_irq_cpu_rmap(adapter->num_queues);
+       if (!adapter->netdev->rx_cpu_rmap)
+               return -ENOMEM;
+       for (i = 0; i < adapter->num_queues; i++) {
+               int irq_idx = ENA_IO_IRQ_IDX(i);
+
+               rc = irq_cpu_rmap_add(adapter->netdev->rx_cpu_rmap,
+                                     adapter->msix_entries[irq_idx].vector);
+               if (rc) {
+                       free_irq_cpu_rmap(adapter->netdev->rx_cpu_rmap);
+                       adapter->netdev->rx_cpu_rmap = NULL;
+                       return rc;
+               }
+       }
+#endif /* CONFIG_RFS_ACCEL */
+       return 0;
+}
+
+static void ena_init_io_rings_common(struct ena_adapter *adapter,
+                                    struct ena_ring *ring, u16 qid)
+{
+       ring->qid = qid;
+       ring->pdev = adapter->pdev;
+       ring->dev = &adapter->pdev->dev;
+       ring->netdev = adapter->netdev;
+       ring->napi = &adapter->ena_napi[qid].napi;
+       ring->adapter = adapter;
+       ring->ena_dev = adapter->ena_dev;
+       ring->per_napi_packets = 0;
+       ring->per_napi_bytes = 0;
+       ring->cpu = 0;
+       u64_stats_init(&ring->syncp);
+}
+
+static void ena_init_io_rings(struct ena_adapter *adapter)
+{
+       struct ena_com_dev *ena_dev;
+       struct ena_ring *txr, *rxr;
+       int i;
+
+       ena_dev = adapter->ena_dev;
+
+       for (i = 0; i < adapter->num_queues; i++) {
+               txr = &adapter->tx_ring[i];
+               rxr = &adapter->rx_ring[i];
+
+               /* TX/RX common ring state */
+               ena_init_io_rings_common(adapter, txr, i);
+               ena_init_io_rings_common(adapter, rxr, i);
+
+               /* TX specific ring state */
+               txr->ring_size = adapter->tx_ring_size;
+               txr->tx_max_header_size = ena_dev->tx_max_header_size;
+               txr->tx_mem_queue_type = ena_dev->tx_mem_queue_type;
+               txr->sgl_size = adapter->max_tx_sgl_size;
+               txr->smoothed_interval =
+                       ena_com_get_nonadaptive_moderation_interval_tx(ena_dev);
+
+               /* RX specific ring state */
+               rxr->ring_size = adapter->rx_ring_size;
+               rxr->rx_copybreak = adapter->rx_copybreak;
+               rxr->sgl_size = adapter->max_rx_sgl_size;
+               rxr->smoothed_interval =
+                       ena_com_get_nonadaptive_moderation_interval_rx(ena_dev);
+       }
+}
+
+/* ena_setup_tx_resources - allocate I/O Tx resources (Descriptors)
+ * @adapter: network interface device structure
+ * @qid: queue index
+ *
+ * Return 0 on success, negative on failure
+ */
+static int ena_setup_tx_resources(struct ena_adapter *adapter, int qid)
+{
+       struct ena_ring *tx_ring = &adapter->tx_ring[qid];
+       struct ena_irq *ena_irq = &adapter->irq_tbl[ENA_IO_IRQ_IDX(qid)];
+       int size, i, node;
+
+       if (tx_ring->tx_buffer_info) {
+               netif_err(adapter, ifup,
+                         adapter->netdev, "tx_buffer_info info is not NULL");
+               return -EEXIST;
+       }
+
+       size = sizeof(struct ena_tx_buffer) * tx_ring->ring_size;
+       node = cpu_to_node(ena_irq->cpu);
+
+       tx_ring->tx_buffer_info = vzalloc_node(size, node);
+       if (!tx_ring->tx_buffer_info) {
+               tx_ring->tx_buffer_info = vzalloc(size);
+               if (!tx_ring->tx_buffer_info)
+                       return -ENOMEM;
+       }
+
+       size = sizeof(u16) * tx_ring->ring_size;
+       tx_ring->free_tx_ids = vzalloc_node(size, node);
+       if (!tx_ring->free_tx_ids) {
+               tx_ring->free_tx_ids = vzalloc(size);
+               if (!tx_ring->free_tx_ids) {
+                       vfree(tx_ring->tx_buffer_info);
+                       return -ENOMEM;
+               }
+       }
+
+       /* Req id ring for TX out of order completions */
+       for (i = 0; i < tx_ring->ring_size; i++)
+               tx_ring->free_tx_ids[i] = i;
+
+       /* Reset tx statistics */
+       memset(&tx_ring->tx_stats, 0x0, sizeof(tx_ring->tx_stats));
+
+       tx_ring->next_to_use = 0;
+       tx_ring->next_to_clean = 0;
+       tx_ring->cpu = ena_irq->cpu;
+       return 0;
+}
+
+/* ena_free_tx_resources - Free I/O Tx Resources per Queue
+ * @adapter: network interface device structure
+ * @qid: queue index
+ *
+ * Free all transmit software resources
+ */
+static void ena_free_tx_resources(struct ena_adapter *adapter, int qid)
+{
+       struct ena_ring *tx_ring = &adapter->tx_ring[qid];
+
+       vfree(tx_ring->tx_buffer_info);
+       tx_ring->tx_buffer_info = NULL;
+
+       vfree(tx_ring->free_tx_ids);
+       tx_ring->free_tx_ids = NULL;
+}
+
+/* ena_setup_all_tx_resources - allocate I/O Tx queues resources for All queues
+ * @adapter: private structure
+ *
+ * Return 0 on success, negative on failure
+ */
+static int ena_setup_all_tx_resources(struct ena_adapter *adapter)
+{
+       int i, rc = 0;
+
+       for (i = 0; i < adapter->num_queues; i++) {
+               rc = ena_setup_tx_resources(adapter, i);
+               if (rc)
+                       goto err_setup_tx;
+       }
+
+       return 0;
+
+err_setup_tx:
+
+       netif_err(adapter, ifup, adapter->netdev,
+                 "Tx queue %d: allocation failed\n", i);
+
+       /* rewind the index freeing the rings as we go */
+       while (i--)
+               ena_free_tx_resources(adapter, i);
+       return rc;
+}
+
+/* ena_free_all_io_tx_resources - Free I/O Tx Resources for All Queues
+ * @adapter: board private structure
+ *
+ * Free all transmit software resources
+ */
+static void ena_free_all_io_tx_resources(struct ena_adapter *adapter)
+{
+       int i;
+
+       for (i = 0; i < adapter->num_queues; i++)
+               ena_free_tx_resources(adapter, i);
+}
+
+/* ena_setup_rx_resources - allocate I/O Rx resources (Descriptors)
+ * @adapter: network interface device structure
+ * @qid: queue index
+ *
+ * Returns 0 on success, negative on failure
+ */
+static int ena_setup_rx_resources(struct ena_adapter *adapter,
+                                 u32 qid)
+{
+       struct ena_ring *rx_ring = &adapter->rx_ring[qid];
+       struct ena_irq *ena_irq = &adapter->irq_tbl[ENA_IO_IRQ_IDX(qid)];
+       int size, node;
+
+       if (rx_ring->rx_buffer_info) {
+               netif_err(adapter, ifup, adapter->netdev,
+                         "rx_buffer_info is not NULL");
+               return -EEXIST;
+       }
+
+       /* alloc extra element so in rx path
+        * we can always prefetch rx_info + 1
+        */
+       size = sizeof(struct ena_rx_buffer) * (rx_ring->ring_size + 1);
+       node = cpu_to_node(ena_irq->cpu);
+
+       rx_ring->rx_buffer_info = vzalloc_node(size, node);
+       if (!rx_ring->rx_buffer_info) {
+               rx_ring->rx_buffer_info = vzalloc(size);
+               if (!rx_ring->rx_buffer_info)
+                       return -ENOMEM;
+       }
+
+       /* Reset rx statistics */
+       memset(&rx_ring->rx_stats, 0x0, sizeof(rx_ring->rx_stats));
+
+       rx_ring->next_to_clean = 0;
+       rx_ring->next_to_use = 0;
+       rx_ring->cpu = ena_irq->cpu;
+
+       return 0;
+}
+
+/* ena_free_rx_resources - Free I/O Rx Resources
+ * @adapter: network interface device structure
+ * @qid: queue index
+ *
+ * Free all receive software resources
+ */
+static void ena_free_rx_resources(struct ena_adapter *adapter,
+                                 u32 qid)
+{
+       struct ena_ring *rx_ring = &adapter->rx_ring[qid];
+
+       vfree(rx_ring->rx_buffer_info);
+       rx_ring->rx_buffer_info = NULL;
+}
+
+/* ena_setup_all_rx_resources - allocate I/O Rx queues resources for all queues
+ * @adapter: board private structure
+ *
+ * Return 0 on success, negative on failure
+ */
+static int ena_setup_all_rx_resources(struct ena_adapter *adapter)
+{
+       int i, rc = 0;
+
+       for (i = 0; i < adapter->num_queues; i++) {
+               rc = ena_setup_rx_resources(adapter, i);
+               if (rc)
+                       goto err_setup_rx;
+       }
+
+       return 0;
+
+err_setup_rx:
+
+       netif_err(adapter, ifup, adapter->netdev,
+                 "Rx queue %d: allocation failed\n", i);
+
+       /* rewind the index freeing the rings as we go */
+       while (i--)
+               ena_free_rx_resources(adapter, i);
+       return rc;
+}
+
+/* ena_free_all_io_rx_resources - Free I/O Rx Resources for All Queues
+ * @adapter: board private structure
+ *
+ * Free all receive software resources
+ */
+static void ena_free_all_io_rx_resources(struct ena_adapter *adapter)
+{
+       int i;
+
+       for (i = 0; i < adapter->num_queues; i++)
+               ena_free_rx_resources(adapter, i);
+}
+
+static inline int ena_alloc_rx_page(struct ena_ring *rx_ring,
+                                   struct ena_rx_buffer *rx_info, gfp_t gfp)
+{
+       struct ena_com_buf *ena_buf;
+       struct page *page;
+       dma_addr_t dma;
+
+       /* if previous allocated page is not used */
+       if (unlikely(rx_info->page))
+               return 0;
+
+       page = alloc_page(gfp);
+       if (unlikely(!page)) {
+               u64_stats_update_begin(&rx_ring->syncp);
+               rx_ring->rx_stats.page_alloc_fail++;
+               u64_stats_update_end(&rx_ring->syncp);
+               return -ENOMEM;
+       }
+
+       dma = dma_map_page(rx_ring->dev, page, 0, PAGE_SIZE,
+                          DMA_FROM_DEVICE);
+       if (unlikely(dma_mapping_error(rx_ring->dev, dma))) {
+               u64_stats_update_begin(&rx_ring->syncp);
+               rx_ring->rx_stats.dma_mapping_err++;
+               u64_stats_update_end(&rx_ring->syncp);
+
+               __free_page(page);
+               return -EIO;
+       }
+       netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev,
+                 "alloc page %p, rx_info %p\n", page, rx_info);
+
+       rx_info->page = page;
+       rx_info->page_offset = 0;
+       ena_buf = &rx_info->ena_buf;
+       ena_buf->paddr = dma;
+       ena_buf->len = PAGE_SIZE;
+
+       return 0;
+}
+
+static void ena_free_rx_page(struct ena_ring *rx_ring,
+                            struct ena_rx_buffer *rx_info)
+{
+       struct page *page = rx_info->page;
+       struct ena_com_buf *ena_buf = &rx_info->ena_buf;
+
+       if (unlikely(!page)) {
+               netif_warn(rx_ring->adapter, rx_err, rx_ring->netdev,
+                          "Trying to free unallocated buffer\n");
+               return;
+       }
+
+       dma_unmap_page(rx_ring->dev, ena_buf->paddr, PAGE_SIZE,
+                      DMA_FROM_DEVICE);
+
+       __free_page(page);
+       rx_info->page = NULL;
+}
+
+static int ena_refill_rx_bufs(struct ena_ring *rx_ring, u32 num)
+{
+       u16 next_to_use;
+       u32 i;
+       int rc;
+
+       next_to_use = rx_ring->next_to_use;
+
+       for (i = 0; i < num; i++) {
+               struct ena_rx_buffer *rx_info =
+                       &rx_ring->rx_buffer_info[next_to_use];
+
+               rc = ena_alloc_rx_page(rx_ring, rx_info,
+                                      __GFP_COLD | GFP_ATOMIC | __GFP_COMP);
+               if (unlikely(rc < 0)) {
+                       netif_warn(rx_ring->adapter, rx_err, rx_ring->netdev,
+                                  "failed to alloc buffer for rx queue %d\n",
+                                  rx_ring->qid);
+                       break;
+               }
+               rc = ena_com_add_single_rx_desc(rx_ring->ena_com_io_sq,
+                                               &rx_info->ena_buf,
+                                               next_to_use);
+               if (unlikely(rc)) {
+                       netif_warn(rx_ring->adapter, rx_status, rx_ring->netdev,
+                                  "failed to add buffer for rx queue %d\n",
+                                  rx_ring->qid);
+                       break;
+               }
+               next_to_use = ENA_RX_RING_IDX_NEXT(next_to_use,
+                                                  rx_ring->ring_size);
+       }
+
+       if (unlikely(i < num)) {
+               u64_stats_update_begin(&rx_ring->syncp);
+               rx_ring->rx_stats.refil_partial++;
+               u64_stats_update_end(&rx_ring->syncp);
+               netdev_warn(rx_ring->netdev,
+                           "refilled rx qid %d with only %d buffers (from %d)\n",
+                           rx_ring->qid, i, num);
+       }
+
+       if (likely(i)) {
+               /* Add memory barrier to make sure the desc were written before
+                * issue a doorbell
+                */
+               wmb();
+               ena_com_write_sq_doorbell(rx_ring->ena_com_io_sq);
+       }
+
+       rx_ring->next_to_use = next_to_use;
+
+       return i;
+}
+
+static void ena_free_rx_bufs(struct ena_adapter *adapter,
+                            u32 qid)
+{
+       struct ena_ring *rx_ring = &adapter->rx_ring[qid];
+       u32 i;
+
+       for (i = 0; i < rx_ring->ring_size; i++) {
+               struct ena_rx_buffer *rx_info = &rx_ring->rx_buffer_info[i];
+
+               if (rx_info->page)
+                       ena_free_rx_page(rx_ring, rx_info);
+       }
+}
+
+/* ena_refill_all_rx_bufs - allocate all queues Rx buffers
+ * @adapter: board private structure
+ *
+ */
+static void ena_refill_all_rx_bufs(struct ena_adapter *adapter)
+{
+       struct ena_ring *rx_ring;
+       int i, rc, bufs_num;
+
+       for (i = 0; i < adapter->num_queues; i++) {
+               rx_ring = &adapter->rx_ring[i];
+               bufs_num = rx_ring->ring_size - 1;
+               rc = ena_refill_rx_bufs(rx_ring, bufs_num);
+
+               if (unlikely(rc != bufs_num))
+                       netif_warn(rx_ring->adapter, rx_status, rx_ring->netdev,
+                                  "refilling Queue %d failed. allocated %d buffers from: %d\n",
+                                  i, rc, bufs_num);
+       }
+}
+
+static void ena_free_all_rx_bufs(struct ena_adapter *adapter)
+{
+       int i;
+
+       for (i = 0; i < adapter->num_queues; i++)
+               ena_free_rx_bufs(adapter, i);
+}
+
+/* ena_free_tx_bufs - Free Tx Buffers per Queue
+ * @tx_ring: TX ring for which buffers be freed
+ */
+static void ena_free_tx_bufs(struct ena_ring *tx_ring)
+{
+       u32 i;
+
+       for (i = 0; i < tx_ring->ring_size; i++) {
+               struct ena_tx_buffer *tx_info = &tx_ring->tx_buffer_info[i];
+               struct ena_com_buf *ena_buf;
+               int nr_frags;
+               int j;
+
+               if (!tx_info->skb)
+                       continue;
+
+               netdev_notice(tx_ring->netdev,
+                             "free uncompleted tx skb qid %d idx 0x%x\n",
+                             tx_ring->qid, i);
+
+               ena_buf = tx_info->bufs;
+               dma_unmap_single(tx_ring->dev,
+                                ena_buf->paddr,
+                                ena_buf->len,
+                                DMA_TO_DEVICE);
+
+               /* unmap remaining mapped pages */
+               nr_frags = tx_info->num_of_bufs - 1;
+               for (j = 0; j < nr_frags; j++) {
+                       ena_buf++;
+                       dma_unmap_page(tx_ring->dev,
+                                      ena_buf->paddr,
+                                      ena_buf->len,
+                                      DMA_TO_DEVICE);
+               }
+
+               dev_kfree_skb_any(tx_info->skb);
+       }
+       netdev_tx_reset_queue(netdev_get_tx_queue(tx_ring->netdev,
+                                                 tx_ring->qid));
+}
+
+static void ena_free_all_tx_bufs(struct ena_adapter *adapter)
+{
+       struct ena_ring *tx_ring;
+       int i;
+
+       for (i = 0; i < adapter->num_queues; i++) {
+               tx_ring = &adapter->tx_ring[i];
+               ena_free_tx_bufs(tx_ring);
+       }
+}
+
+static void ena_destroy_all_tx_queues(struct ena_adapter *adapter)
+{
+       u16 ena_qid;
+       int i;
+
+       for (i = 0; i < adapter->num_queues; i++) {
+               ena_qid = ENA_IO_TXQ_IDX(i);
+               ena_com_destroy_io_queue(adapter->ena_dev, ena_qid);
+       }
+}
+
+static void ena_destroy_all_rx_queues(struct ena_adapter *adapter)
+{
+       u16 ena_qid;
+       int i;
+
+       for (i = 0; i < adapter->num_queues; i++) {
+               ena_qid = ENA_IO_RXQ_IDX(i);
+               ena_com_destroy_io_queue(adapter->ena_dev, ena_qid);
+       }
+}
+
+static void ena_destroy_all_io_queues(struct ena_adapter *adapter)
+{
+       ena_destroy_all_tx_queues(adapter);
+       ena_destroy_all_rx_queues(adapter);
+}
+
+static int validate_tx_req_id(struct ena_ring *tx_ring, u16 req_id)
+{
+       struct ena_tx_buffer *tx_info = NULL;
+
+       if (likely(req_id < tx_ring->ring_size)) {
+               tx_info = &tx_ring->tx_buffer_info[req_id];
+               if (likely(tx_info->skb))
+                       return 0;
+       }
+
+       if (tx_info)
+               netif_err(tx_ring->adapter, tx_done, tx_ring->netdev,
+                         "tx_info doesn't have valid skb\n");
+       else
+               netif_err(tx_ring->adapter, tx_done, tx_ring->netdev,
+                         "Invalid req_id: %hu\n", req_id);
+
+       u64_stats_update_begin(&tx_ring->syncp);
+       tx_ring->tx_stats.bad_req_id++;
+       u64_stats_update_end(&tx_ring->syncp);
+
+       /* Trigger device reset */
+       set_bit(ENA_FLAG_TRIGGER_RESET, &tx_ring->adapter->flags);
+       return -EFAULT;
+}
+
+static int ena_clean_tx_irq(struct ena_ring *tx_ring, u32 budget)
+{
+       struct netdev_queue *txq;
+       bool above_thresh;
+       u32 tx_bytes = 0;
+       u32 total_done = 0;
+       u16 next_to_clean;
+       u16 req_id;
+       int tx_pkts = 0;
+       int rc;
+
+       next_to_clean = tx_ring->next_to_clean;
+       txq = netdev_get_tx_queue(tx_ring->netdev, tx_ring->qid);
+
+       while (tx_pkts < budget) {
+               struct ena_tx_buffer *tx_info;
+               struct sk_buff *skb;
+               struct ena_com_buf *ena_buf;
+               int i, nr_frags;
+
+               rc = ena_com_tx_comp_req_id_get(tx_ring->ena_com_io_cq,
+                                               &req_id);
+               if (rc)
+                       break;
+
+               rc = validate_tx_req_id(tx_ring, req_id);
+               if (rc)
+                       break;
+
+               tx_info = &tx_ring->tx_buffer_info[req_id];
+               skb = tx_info->skb;
+
+               /* prefetch skb_end_pointer() to speedup skb_shinfo(skb) */
+               prefetch(&skb->end);
+
+               tx_info->skb = NULL;
+               tx_info->last_jiffies = 0;
+
+               if (likely(tx_info->num_of_bufs != 0)) {
+                       ena_buf = tx_info->bufs;
+
+                       dma_unmap_single(tx_ring->dev,
+                                        dma_unmap_addr(ena_buf, paddr),
+                                        dma_unmap_len(ena_buf, len),
+                                        DMA_TO_DEVICE);
+
+                       /* unmap remaining mapped pages */
+                       nr_frags = tx_info->num_of_bufs - 1;
+                       for (i = 0; i < nr_frags; i++) {
+                               ena_buf++;
+                               dma_unmap_page(tx_ring->dev,
+                                              dma_unmap_addr(ena_buf, paddr),
+                                              dma_unmap_len(ena_buf, len),
+                                              DMA_TO_DEVICE);
+                       }
+               }
+
+               netif_dbg(tx_ring->adapter, tx_done, tx_ring->netdev,
+                         "tx_poll: q %d skb %p completed\n", tx_ring->qid,
+                         skb);
+
+               tx_bytes += skb->len;
+               dev_kfree_skb(skb);
+               tx_pkts++;
+               total_done += tx_info->tx_descs;
+
+               tx_ring->free_tx_ids[next_to_clean] = req_id;
+               next_to_clean = ENA_TX_RING_IDX_NEXT(next_to_clean,
+                                                    tx_ring->ring_size);
+       }
+
+       tx_ring->next_to_clean = next_to_clean;
+       ena_com_comp_ack(tx_ring->ena_com_io_sq, total_done);
+       ena_com_update_dev_comp_head(tx_ring->ena_com_io_cq);
+
+       netdev_tx_completed_queue(txq, tx_pkts, tx_bytes);
+
+       netif_dbg(tx_ring->adapter, tx_done, tx_ring->netdev,
+                 "tx_poll: q %d done. total pkts: %d\n",
+                 tx_ring->qid, tx_pkts);
+
+       /* need to make the rings circular update visible to
+        * ena_start_xmit() before checking for netif_queue_stopped().
+        */
+       smp_mb();
+
+       above_thresh = ena_com_sq_empty_space(tx_ring->ena_com_io_sq) >
+               ENA_TX_WAKEUP_THRESH;
+       if (unlikely(netif_tx_queue_stopped(txq) && above_thresh)) {
+               __netif_tx_lock(txq, smp_processor_id());
+               above_thresh = ena_com_sq_empty_space(tx_ring->ena_com_io_sq) >
+                       ENA_TX_WAKEUP_THRESH;
+               if (netif_tx_queue_stopped(txq) && above_thresh) {
+                       netif_tx_wake_queue(txq);
+                       u64_stats_update_begin(&tx_ring->syncp);
+                       tx_ring->tx_stats.queue_wakeup++;
+                       u64_stats_update_end(&tx_ring->syncp);
+               }
+               __netif_tx_unlock(txq);
+       }
+
+       tx_ring->per_napi_bytes += tx_bytes;
+       tx_ring->per_napi_packets += tx_pkts;
+
+       return tx_pkts;
+}
+
+static struct sk_buff *ena_rx_skb(struct ena_ring *rx_ring,
+                                 struct ena_com_rx_buf_info *ena_bufs,
+                                 u32 descs,
+                                 u16 *next_to_clean)
+{
+       struct sk_buff *skb;
+       struct ena_rx_buffer *rx_info =
+               &rx_ring->rx_buffer_info[*next_to_clean];
+       u32 len;
+       u32 buf = 0;
+       void *va;
+
+       len = ena_bufs[0].len;
+       if (unlikely(!rx_info->page)) {
+               netif_err(rx_ring->adapter, rx_err, rx_ring->netdev,
+                         "Page is NULL\n");
+               return NULL;
+       }
+
+       netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev,
+                 "rx_info %p page %p\n",
+                 rx_info, rx_info->page);
+
+       /* save virt address of first buffer */
+       va = page_address(rx_info->page) + rx_info->page_offset;
+       prefetch(va + NET_IP_ALIGN);
+
+       if (len <= rx_ring->rx_copybreak) {
+               skb = netdev_alloc_skb_ip_align(rx_ring->netdev,
+                                               rx_ring->rx_copybreak);
+               if (unlikely(!skb)) {
+                       u64_stats_update_begin(&rx_ring->syncp);
+                       rx_ring->rx_stats.skb_alloc_fail++;
+                       u64_stats_update_end(&rx_ring->syncp);
+                       netif_err(rx_ring->adapter, rx_err, rx_ring->netdev,
+                                 "Failed to allocate skb\n");
+                       return NULL;
+               }
+
+               netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev,
+                         "rx allocated small packet. len %d. data_len %d\n",
+                         skb->len, skb->data_len);
+
+               /* sync this buffer for CPU use */
+               dma_sync_single_for_cpu(rx_ring->dev,
+                                       dma_unmap_addr(&rx_info->ena_buf, paddr),
+                                       len,
+                                       DMA_FROM_DEVICE);
+               skb_copy_to_linear_data(skb, va, len);
+               dma_sync_single_for_device(rx_ring->dev,
+                                          dma_unmap_addr(&rx_info->ena_buf, paddr),
+                                          len,
+                                          DMA_FROM_DEVICE);
+
+               skb_put(skb, len);
+               skb->protocol = eth_type_trans(skb, rx_ring->netdev);
+               *next_to_clean = ENA_RX_RING_IDX_ADD(*next_to_clean, descs,
+                                                    rx_ring->ring_size);
+               return skb;
+       }
+
+       skb = napi_get_frags(rx_ring->napi);
+       if (unlikely(!skb)) {
+               netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev,
+                         "Failed allocating skb\n");
+               u64_stats_update_begin(&rx_ring->syncp);
+               rx_ring->rx_stats.skb_alloc_fail++;
+               u64_stats_update_end(&rx_ring->syncp);
+               return NULL;
+       }
+
+       do {
+               dma_unmap_page(rx_ring->dev,
+                              dma_unmap_addr(&rx_info->ena_buf, paddr),
+                              PAGE_SIZE, DMA_FROM_DEVICE);
+
+               skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_info->page,
+                               rx_info->page_offset, len, PAGE_SIZE);
+
+               netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev,
+                         "rx skb updated. len %d. data_len %d\n",
+                         skb->len, skb->data_len);
+
+               rx_info->page = NULL;
+               *next_to_clean =
+                       ENA_RX_RING_IDX_NEXT(*next_to_clean,
+                                            rx_ring->ring_size);
+               if (likely(--descs == 0))
+                       break;
+               rx_info = &rx_ring->rx_buffer_info[*next_to_clean];
+               len = ena_bufs[++buf].len;
+       } while (1);
+
+       return skb;
+}
+
+/* ena_rx_checksum - indicate in skb if hw indicated a good cksum
+ * @adapter: structure containing adapter specific data
+ * @ena_rx_ctx: received packet context/metadata
+ * @skb: skb currently being received and modified
+ */
+static inline void ena_rx_checksum(struct ena_ring *rx_ring,
+                                  struct ena_com_rx_ctx *ena_rx_ctx,
+                                  struct sk_buff *skb)
+{
+       /* Rx csum disabled */
+       if (unlikely(!(rx_ring->netdev->features & NETIF_F_RXCSUM))) {
+               skb->ip_summed = CHECKSUM_NONE;
+               return;
+       }
+
+       /* For fragmented packets the checksum isn't valid */
+       if (ena_rx_ctx->frag) {
+               skb->ip_summed = CHECKSUM_NONE;
+               return;
+       }
+
+       /* if IP and error */
+       if (unlikely((ena_rx_ctx->l3_proto == ENA_ETH_IO_L3_PROTO_IPV4) &&
+                    (ena_rx_ctx->l3_csum_err))) {
+               /* ipv4 checksum error */
+               skb->ip_summed = CHECKSUM_NONE;
+               u64_stats_update_begin(&rx_ring->syncp);
+               rx_ring->rx_stats.bad_csum++;
+               u64_stats_update_end(&rx_ring->syncp);
+               netif_err(rx_ring->adapter, rx_err, rx_ring->netdev,
+                         "RX IPv4 header checksum error\n");
+               return;
+       }
+
+       /* if TCP/UDP */
+       if (likely((ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_TCP) ||
+                  (ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_UDP))) {
+               if (unlikely(ena_rx_ctx->l4_csum_err)) {
+                       /* TCP/UDP checksum error */
+                       u64_stats_update_begin(&rx_ring->syncp);
+                       rx_ring->rx_stats.bad_csum++;
+                       u64_stats_update_end(&rx_ring->syncp);
+                       netif_err(rx_ring->adapter, rx_err, rx_ring->netdev,
+                                 "RX L4 checksum error\n");
+                       skb->ip_summed = CHECKSUM_NONE;
+                       return;
+               }
+
+               skb->ip_summed = CHECKSUM_UNNECESSARY;
+       }
+}
+
+static void ena_set_rx_hash(struct ena_ring *rx_ring,
+                           struct ena_com_rx_ctx *ena_rx_ctx,
+                           struct sk_buff *skb)
+{
+       enum pkt_hash_types hash_type;
+
+       if (likely(rx_ring->netdev->features & NETIF_F_RXHASH)) {
+               if (likely((ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_TCP) ||
+                          (ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_UDP)))
+
+                       hash_type = PKT_HASH_TYPE_L4;
+               else
+                       hash_type = PKT_HASH_TYPE_NONE;
+
+               /* Override hash type if the packet is fragmented */
+               if (ena_rx_ctx->frag)
+                       hash_type = PKT_HASH_TYPE_NONE;
+
+               skb_set_hash(skb, ena_rx_ctx->hash, hash_type);
+       }
+}
+
+/* ena_clean_rx_irq - Cleanup RX irq
+ * @rx_ring: RX ring to clean
+ * @napi: napi handler
+ * @budget: how many packets driver is allowed to clean
+ *
+ * Returns the number of cleaned buffers.
+ */
+static int ena_clean_rx_irq(struct ena_ring *rx_ring, struct napi_struct *napi,
+                           u32 budget)
+{
+       u16 next_to_clean = rx_ring->next_to_clean;
+       u32 res_budget, work_done;
+
+       struct ena_com_rx_ctx ena_rx_ctx;
+       struct ena_adapter *adapter;
+       struct sk_buff *skb;
+       int refill_required;
+       int refill_threshold;
+       int rc = 0;
+       int total_len = 0;
+       int rx_copybreak_pkt = 0;
+
+       netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev,
+                 "%s qid %d\n", __func__, rx_ring->qid);
+       res_budget = budget;
+
+       do {
+               ena_rx_ctx.ena_bufs = rx_ring->ena_bufs;
+               ena_rx_ctx.max_bufs = rx_ring->sgl_size;
+               ena_rx_ctx.descs = 0;
+               rc = ena_com_rx_pkt(rx_ring->ena_com_io_cq,
+                                   rx_ring->ena_com_io_sq,
+                                   &ena_rx_ctx);
+               if (unlikely(rc))
+                       goto error;
+
+               if (unlikely(ena_rx_ctx.descs == 0))
+                       break;
+
+               netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev,
+                         "rx_poll: q %d got packet from ena. descs #: %d l3 proto %d l4 proto %d hash: %x\n",
+                         rx_ring->qid, ena_rx_ctx.descs, ena_rx_ctx.l3_proto,
+                         ena_rx_ctx.l4_proto, ena_rx_ctx.hash);
+
+               /* allocate skb and fill it */
+               skb = ena_rx_skb(rx_ring, rx_ring->ena_bufs, ena_rx_ctx.descs,
+                                &next_to_clean);
+
+               /* exit if we failed to retrieve a buffer */
+               if (unlikely(!skb)) {
+                       next_to_clean = ENA_RX_RING_IDX_ADD(next_to_clean,
+                                                           ena_rx_ctx.descs,
+                                                           rx_ring->ring_size);
+                       break;
+               }
+
+               ena_rx_checksum(rx_ring, &ena_rx_ctx, skb);
+
+               ena_set_rx_hash(rx_ring, &ena_rx_ctx, skb);
+
+               skb_record_rx_queue(skb, rx_ring->qid);
+
+               if (rx_ring->ena_bufs[0].len <= rx_ring->rx_copybreak) {
+                       total_len += rx_ring->ena_bufs[0].len;
+                       rx_copybreak_pkt++;
+                       napi_gro_receive(napi, skb);
+               } else {
+                       total_len += skb->len;
+                       napi_gro_frags(napi);
+               }
+
+               res_budget--;
+       } while (likely(res_budget));
+
+       work_done = budget - res_budget;
+       rx_ring->per_napi_bytes += total_len;
+       rx_ring->per_napi_packets += work_done;
+       u64_stats_update_begin(&rx_ring->syncp);
+       rx_ring->rx_stats.bytes += total_len;
+       rx_ring->rx_stats.cnt += work_done;
+       rx_ring->rx_stats.rx_copybreak_pkt += rx_copybreak_pkt;
+       u64_stats_update_end(&rx_ring->syncp);
+
+       rx_ring->next_to_clean = next_to_clean;
+
+       refill_required = ena_com_sq_empty_space(rx_ring->ena_com_io_sq);
+       refill_threshold = rx_ring->ring_size / ENA_RX_REFILL_THRESH_DIVIDER;
+
+       /* Optimization, try to batch new rx buffers */
+       if (refill_required > refill_threshold) {
+               ena_com_update_dev_comp_head(rx_ring->ena_com_io_cq);
+               ena_refill_rx_bufs(rx_ring, refill_required);
+       }
+
+       return work_done;
+
+error:
+       adapter = netdev_priv(rx_ring->netdev);
+
+       u64_stats_update_begin(&rx_ring->syncp);
+       rx_ring->rx_stats.bad_desc_num++;
+       u64_stats_update_end(&rx_ring->syncp);
+
+       /* Too many desc from the device. Trigger reset */
+       set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags);
+
+       return 0;
+}
+
+inline void ena_adjust_intr_moderation(struct ena_ring *rx_ring,
+                                      struct ena_ring *tx_ring)
+{
+       /* We apply adaptive moderation on Rx path only.
+        * Tx uses static interrupt moderation.
+        */
+       ena_com_calculate_interrupt_delay(rx_ring->ena_dev,
+                                         rx_ring->per_napi_packets,
+                                         rx_ring->per_napi_bytes,
+                                         &rx_ring->smoothed_interval,
+                                         &rx_ring->moder_tbl_idx);
+
+       /* Reset per napi packets/bytes */
+       tx_ring->per_napi_packets = 0;
+       tx_ring->per_napi_bytes = 0;
+       rx_ring->per_napi_packets = 0;
+       rx_ring->per_napi_bytes = 0;
+}
+
+static inline void ena_update_ring_numa_node(struct ena_ring *tx_ring,
+                                            struct ena_ring *rx_ring)
+{
+       int cpu = get_cpu();
+       int numa_node;
+
+       /* Check only one ring since the 2 rings are running on the same cpu */
+       if (likely(tx_ring->cpu == cpu))
+               goto out;
+
+       numa_node = cpu_to_node(cpu);
+       put_cpu();
+
+       if (numa_node != NUMA_NO_NODE) {
+               ena_com_update_numa_node(tx_ring->ena_com_io_cq, numa_node);
+               ena_com_update_numa_node(rx_ring->ena_com_io_cq, numa_node);
+       }
+
+       tx_ring->cpu = cpu;
+       rx_ring->cpu = cpu;
+
+       return;
+out:
+       put_cpu();
+}
+
+static int ena_io_poll(struct napi_struct *napi, int budget)
+{
+       struct ena_napi *ena_napi = container_of(napi, struct ena_napi, napi);
+       struct ena_ring *tx_ring, *rx_ring;
+       struct ena_eth_io_intr_reg intr_reg;
+
+       u32 tx_work_done;
+       u32 rx_work_done;
+       int tx_budget;
+       int napi_comp_call = 0;
+       int ret;
+
+       tx_ring = ena_napi->tx_ring;
+       rx_ring = ena_napi->rx_ring;
+
+       tx_budget = tx_ring->ring_size / ENA_TX_POLL_BUDGET_DIVIDER;
+
+       if (!test_bit(ENA_FLAG_DEV_UP, &tx_ring->adapter->flags)) {
+               napi_complete_done(napi, 0);
+               return 0;
+       }
+
+       tx_work_done = ena_clean_tx_irq(tx_ring, tx_budget);
+       rx_work_done = ena_clean_rx_irq(rx_ring, napi, budget);
+
+       if ((budget > rx_work_done) && (tx_budget > tx_work_done)) {
+               napi_complete_done(napi, rx_work_done);
+
+               napi_comp_call = 1;
+               /* Tx and Rx share the same interrupt vector */
+               if (ena_com_get_adaptive_moderation_enabled(rx_ring->ena_dev))
+                       ena_adjust_intr_moderation(rx_ring, tx_ring);
+
+               /* Update intr register: rx intr delay, tx intr delay and
+                * interrupt unmask
+                */
+               ena_com_update_intr_reg(&intr_reg,
+                                       rx_ring->smoothed_interval,
+                                       tx_ring->smoothed_interval,
+                                       true);
+
+               /* It is a shared MSI-X. Tx and Rx CQ have pointer to it.
+                * So we use one of them to reach the intr reg
+                */
+               ena_com_unmask_intr(rx_ring->ena_com_io_cq, &intr_reg);
+
+               ena_update_ring_numa_node(tx_ring, rx_ring);
+
+               ret = rx_work_done;
+       } else {
+               ret = budget;
+       }
+
+       u64_stats_update_begin(&tx_ring->syncp);
+       tx_ring->tx_stats.napi_comp += napi_comp_call;
+       tx_ring->tx_stats.tx_poll++;
+       u64_stats_update_end(&tx_ring->syncp);
+
+       return ret;
+}
+
+static irqreturn_t ena_intr_msix_mgmnt(int irq, void *data)
+{
+       struct ena_adapter *adapter = (struct ena_adapter *)data;
+
+       ena_com_admin_q_comp_intr_handler(adapter->ena_dev);
+
+       /* Don't call the aenq handler before probe is done */
+       if (likely(test_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags)))
+               ena_com_aenq_intr_handler(adapter->ena_dev, data);
+
+       return IRQ_HANDLED;
+}
+
+/* ena_intr_msix_io - MSI-X Interrupt Handler for Tx/Rx
+ * @irq: interrupt number
+ * @data: pointer to a network interface private napi device structure
+ */
+static irqreturn_t ena_intr_msix_io(int irq, void *data)
+{
+       struct ena_napi *ena_napi = data;
+
+       napi_schedule(&ena_napi->napi);
+
+       return IRQ_HANDLED;
+}
+
+static int ena_enable_msix(struct ena_adapter *adapter, int num_queues)
+{
+       int i, msix_vecs, rc;
+
+       if (test_bit(ENA_FLAG_MSIX_ENABLED, &adapter->flags)) {
+               netif_err(adapter, probe, adapter->netdev,
+                         "Error, MSI-X is already enabled\n");
+               return -EPERM;
+       }
+
+       /* Reserved the max msix vectors we might need */
+       msix_vecs = ENA_MAX_MSIX_VEC(num_queues);
+
+       netif_dbg(adapter, probe, adapter->netdev,
+                 "trying to enable MSI-X, vectors %d\n", msix_vecs);
+
+       adapter->msix_entries = vzalloc(msix_vecs * sizeof(struct msix_entry));
+
+       if (!adapter->msix_entries)
+               return -ENOMEM;
+
+       for (i = 0; i < msix_vecs; i++)
+               adapter->msix_entries[i].entry = i;
+
+       rc = pci_enable_msix(adapter->pdev, adapter->msix_entries, msix_vecs);
+       if (rc != 0) {
+               netif_err(adapter, probe, adapter->netdev,
+                         "Failed to enable MSI-X, vectors %d rc %d\n",
+                         msix_vecs, rc);
+               return -ENOSPC;
+       }
+
+       netif_dbg(adapter, probe, adapter->netdev, "enable MSI-X, vectors %d\n",
+                 msix_vecs);
+
+       if (msix_vecs >= 1) {
+               if (ena_init_rx_cpu_rmap(adapter))
+                       netif_warn(adapter, probe, adapter->netdev,
+                                  "Failed to map IRQs to CPUs\n");
+       }
+
+       adapter->msix_vecs = msix_vecs;
+       set_bit(ENA_FLAG_MSIX_ENABLED, &adapter->flags);
+
+       return 0;
+}
+
+static void ena_setup_mgmnt_intr(struct ena_adapter *adapter)
+{
+       u32 cpu;
+
+       snprintf(adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].name,
+                ENA_IRQNAME_SIZE, "ena-mgmnt@pci:%s",
+                pci_name(adapter->pdev));
+       adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].handler =
+               ena_intr_msix_mgmnt;
+       adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].data = adapter;
+       adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].vector =
+               adapter->msix_entries[ENA_MGMNT_IRQ_IDX].vector;
+       cpu = cpumask_first(cpu_online_mask);
+       adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].cpu = cpu;
+       cpumask_set_cpu(cpu,
+                       &adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].affinity_hint_mask);
+}
+
+static void ena_setup_io_intr(struct ena_adapter *adapter)
+{
+       struct net_device *netdev;
+       int irq_idx, i, cpu;
+
+       netdev = adapter->netdev;
+
+       for (i = 0; i < adapter->num_queues; i++) {
+               irq_idx = ENA_IO_IRQ_IDX(i);
+               cpu = i % num_online_cpus();
+
+               snprintf(adapter->irq_tbl[irq_idx].name, ENA_IRQNAME_SIZE,
+                        "%s-Tx-Rx-%d", netdev->name, i);
+               adapter->irq_tbl[irq_idx].handler = ena_intr_msix_io;
+               adapter->irq_tbl[irq_idx].data = &adapter->ena_napi[i];
+               adapter->irq_tbl[irq_idx].vector =
+                       adapter->msix_entries[irq_idx].vector;
+               adapter->irq_tbl[irq_idx].cpu = cpu;
+
+               cpumask_set_cpu(cpu,
+                               &adapter->irq_tbl[irq_idx].affinity_hint_mask);
+       }
+}
+
+static int ena_request_mgmnt_irq(struct ena_adapter *adapter)
+{
+       unsigned long flags = 0;
+       struct ena_irq *irq;
+       int rc;
+
+       irq = &adapter->irq_tbl[ENA_MGMNT_IRQ_IDX];
+       rc = request_irq(irq->vector, irq->handler, flags, irq->name,
+                        irq->data);
+       if (rc) {
+               netif_err(adapter, probe, adapter->netdev,
+                         "failed to request admin irq\n");
+               return rc;
+       }
+
+       netif_dbg(adapter, probe, adapter->netdev,
+                 "set affinity hint of mgmnt irq.to 0x%lx (irq vector: %d)\n",
+                 irq->affinity_hint_mask.bits[0], irq->vector);
+
+       irq_set_affinity_hint(irq->vector, &irq->affinity_hint_mask);
+
+       return rc;
+}
+
+static int ena_request_io_irq(struct ena_adapter *adapter)
+{
+       unsigned long flags = 0;
+       struct ena_irq *irq;
+       int rc = 0, i, k;
+
+       if (!test_bit(ENA_FLAG_MSIX_ENABLED, &adapter->flags)) {
+               netif_err(adapter, ifup, adapter->netdev,
+                         "Failed to request I/O IRQ: MSI-X is not enabled\n");
+               return -EINVAL;
+       }
+
+       for (i = ENA_IO_IRQ_FIRST_IDX; i < adapter->msix_vecs; i++) {
+               irq = &adapter->irq_tbl[i];
+               rc = request_irq(irq->vector, irq->handler, flags, irq->name,
+                                irq->data);
+               if (rc) {
+                       netif_err(adapter, ifup, adapter->netdev,
+                                 "Failed to request I/O IRQ. index %d rc %d\n",
+                                  i, rc);
+                       goto err;
+               }
+
+               netif_dbg(adapter, ifup, adapter->netdev,
+                         "set affinity hint of irq. index %d to 0x%lx (irq vector: %d)\n",
+                         i, irq->affinity_hint_mask.bits[0], irq->vector);
+
+               irq_set_affinity_hint(irq->vector, &irq->affinity_hint_mask);
+       }
+
+       return rc;
+
+err:
+       for (k = ENA_IO_IRQ_FIRST_IDX; k < i; k++) {
+               irq = &adapter->irq_tbl[k];
+               free_irq(irq->vector, irq->data);
+       }
+
+       return rc;
+}
+
+static void ena_free_mgmnt_irq(struct ena_adapter *adapter)
+{
+       struct ena_irq *irq;
+
+       irq = &adapter->irq_tbl[ENA_MGMNT_IRQ_IDX];
+       synchronize_irq(irq->vector);
+       irq_set_affinity_hint(irq->vector, NULL);
+       free_irq(irq->vector, irq->data);
+}
+
+static void ena_free_io_irq(struct ena_adapter *adapter)
+{
+       struct ena_irq *irq;
+       int i;
+
+#ifdef CONFIG_RFS_ACCEL
+       if (adapter->msix_vecs >= 1) {
+               free_irq_cpu_rmap(adapter->netdev->rx_cpu_rmap);
+               adapter->netdev->rx_cpu_rmap = NULL;
+       }
+#endif /* CONFIG_RFS_ACCEL */
+
+       for (i = ENA_IO_IRQ_FIRST_IDX; i < adapter->msix_vecs; i++) {
+               irq = &adapter->irq_tbl[i];
+               irq_set_affinity_hint(irq->vector, NULL);
+               free_irq(irq->vector, irq->data);
+       }
+}
+
+static void ena_disable_msix(struct ena_adapter *adapter)
+{
+       if (test_and_clear_bit(ENA_FLAG_MSIX_ENABLED, &adapter->flags))
+               pci_disable_msix(adapter->pdev);
+
+       if (adapter->msix_entries)
+               vfree(adapter->msix_entries);
+       adapter->msix_entries = NULL;
+}
+
+static void ena_disable_io_intr_sync(struct ena_adapter *adapter)
+{
+       int i;
+
+       if (!netif_running(adapter->netdev))
+               return;
+
+       for (i = ENA_IO_IRQ_FIRST_IDX; i < adapter->msix_vecs; i++)
+               synchronize_irq(adapter->irq_tbl[i].vector);
+}
+
+static void ena_del_napi(struct ena_adapter *adapter)
+{
+       int i;
+
+       for (i = 0; i < adapter->num_queues; i++)
+               netif_napi_del(&adapter->ena_napi[i].napi);
+}
+
+static void ena_init_napi(struct ena_adapter *adapter)
+{
+       struct ena_napi *napi;
+       int i;
+
+       for (i = 0; i < adapter->num_queues; i++) {
+               napi = &adapter->ena_napi[i];
+
+               netif_napi_add(adapter->netdev,
+                              &adapter->ena_napi[i].napi,
+                              ena_io_poll,
+                              ENA_NAPI_BUDGET);
+               napi->rx_ring = &adapter->rx_ring[i];
+               napi->tx_ring = &adapter->tx_ring[i];
+               napi->qid = i;
+       }
+}
+
+static void ena_napi_disable_all(struct ena_adapter *adapter)
+{
+       int i;
+
+       for (i = 0; i < adapter->num_queues; i++)
+               napi_disable(&adapter->ena_napi[i].napi);
+}
+
+static void ena_napi_enable_all(struct ena_adapter *adapter)
+{
+       int i;
+
+       for (i = 0; i < adapter->num_queues; i++)
+               napi_enable(&adapter->ena_napi[i].napi);
+}
+
+static void ena_restore_ethtool_params(struct ena_adapter *adapter)
+{
+       adapter->tx_usecs = 0;
+       adapter->rx_usecs = 0;
+       adapter->tx_frames = 1;
+       adapter->rx_frames = 1;
+}
+
+/* Configure the Rx forwarding */
+static int ena_rss_configure(struct ena_adapter *adapter)
+{
+       struct ena_com_dev *ena_dev = adapter->ena_dev;
+       int rc;
+
+       /* In case the RSS table wasn't initialized by probe */
+       if (!ena_dev->rss.tbl_log_size) {
+               rc = ena_rss_init_default(adapter);
+               if (rc && (rc != -EPERM)) {
+                       netif_err(adapter, ifup, adapter->netdev,
+                                 "Failed to init RSS rc: %d\n", rc);
+                       return rc;
+               }
+       }
+
+       /* Set indirect table */
+       rc = ena_com_indirect_table_set(ena_dev);
+       if (unlikely(rc && rc != -EPERM))
+               return rc;
+
+       /* Configure hash function (if supported) */
+       rc = ena_com_set_hash_function(ena_dev);
+       if (unlikely(rc && (rc != -EPERM)))
+               return rc;
+
+       /* Configure hash inputs (if supported) */
+       rc = ena_com_set_hash_ctrl(ena_dev);
+       if (unlikely(rc && (rc != -EPERM)))
+               return rc;
+
+       return 0;
+}
+
+static int ena_up_complete(struct ena_adapter *adapter)
+{
+       int rc, i;
+
+       rc = ena_rss_configure(adapter);
+       if (rc)
+               return rc;
+
+       ena_init_napi(adapter);
+
+       ena_change_mtu(adapter->netdev, adapter->netdev->mtu);
+
+       ena_refill_all_rx_bufs(adapter);
+
+       /* enable transmits */
+       netif_tx_start_all_queues(adapter->netdev);
+
+       ena_restore_ethtool_params(adapter);
+
+       ena_napi_enable_all(adapter);
+
+       /* schedule napi in case we had pending packets
+        * from the last time we disable napi
+        */
+       for (i = 0; i < adapter->num_queues; i++)
+               napi_schedule(&adapter->ena_napi[i].napi);
+
+       return 0;
+}
+
+static int ena_create_io_tx_queue(struct ena_adapter *adapter, int qid)
+{
+       struct ena_com_create_io_ctx ctx = { 0 };
+       struct ena_com_dev *ena_dev;
+       struct ena_ring *tx_ring;
+       u32 msix_vector;
+       u16 ena_qid;
+       int rc;
+
+       ena_dev = adapter->ena_dev;
+
+       tx_ring = &adapter->tx_ring[qid];
+       msix_vector = ENA_IO_IRQ_IDX(qid);
+       ena_qid = ENA_IO_TXQ_IDX(qid);
+
+       ctx.direction = ENA_COM_IO_QUEUE_DIRECTION_TX;
+       ctx.qid = ena_qid;
+       ctx.mem_queue_type = ena_dev->tx_mem_queue_type;
+       ctx.msix_vector = msix_vector;
+       ctx.queue_size = adapter->tx_ring_size;
+       ctx.numa_node = cpu_to_node(tx_ring->cpu);
+
+       rc = ena_com_create_io_queue(ena_dev, &ctx);
+       if (rc) {
+               netif_err(adapter, ifup, adapter->netdev,
+                         "Failed to create I/O TX queue num %d rc: %d\n",
+                         qid, rc);
+               return rc;
+       }
+
+       rc = ena_com_get_io_handlers(ena_dev, ena_qid,
+                                    &tx_ring->ena_com_io_sq,
+                                    &tx_ring->ena_com_io_cq);
+       if (rc) {
+               netif_err(adapter, ifup, adapter->netdev,
+                         "Failed to get TX queue handlers. TX queue num %d rc: %d\n",
+                         qid, rc);
+               ena_com_destroy_io_queue(ena_dev, ena_qid);
+       }
+
+       ena_com_update_numa_node(tx_ring->ena_com_io_cq, ctx.numa_node);
+       return rc;
+}
+
+static int ena_create_all_io_tx_queues(struct ena_adapter *adapter)
+{
+       struct ena_com_dev *ena_dev = adapter->ena_dev;
+       int rc, i;
+
+       for (i = 0; i < adapter->num_queues; i++) {
+               rc = ena_create_io_tx_queue(adapter, i);
+               if (rc)
+                       goto create_err;
+       }
+
+       return 0;
+
+create_err:
+       while (i--)
+               ena_com_destroy_io_queue(ena_dev, ENA_IO_TXQ_IDX(i));
+
+       return rc;
+}
+
+static int ena_create_io_rx_queue(struct ena_adapter *adapter, int qid)
+{
+       struct ena_com_dev *ena_dev;
+       struct ena_com_create_io_ctx ctx = { 0 };
+       struct ena_ring *rx_ring;
+       u32 msix_vector;
+       u16 ena_qid;
+       int rc;
+
+       ena_dev = adapter->ena_dev;
+
+       rx_ring = &adapter->rx_ring[qid];
+       msix_vector = ENA_IO_IRQ_IDX(qid);
+       ena_qid = ENA_IO_RXQ_IDX(qid);
+
+       ctx.qid = ena_qid;
+       ctx.direction = ENA_COM_IO_QUEUE_DIRECTION_RX;
+       ctx.mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST;
+       ctx.msix_vector = msix_vector;
+       ctx.queue_size = adapter->rx_ring_size;
+       ctx.numa_node = cpu_to_node(rx_ring->cpu);
+
+       rc = ena_com_create_io_queue(ena_dev, &ctx);
+       if (rc) {
+               netif_err(adapter, ifup, adapter->netdev,
+                         "Failed to create I/O RX queue num %d rc: %d\n",
+                         qid, rc);
+               return rc;
+       }
+
+       rc = ena_com_get_io_handlers(ena_dev, ena_qid,
+                                    &rx_ring->ena_com_io_sq,
+                                    &rx_ring->ena_com_io_cq);
+       if (rc) {
+               netif_err(adapter, ifup, adapter->netdev,
+                         "Failed to get RX queue handlers. RX queue num %d rc: %d\n",
+                         qid, rc);
+               ena_com_destroy_io_queue(ena_dev, ena_qid);
+       }
+
+       ena_com_update_numa_node(rx_ring->ena_com_io_cq, ctx.numa_node);
+
+       return rc;
+}
+
+static int ena_create_all_io_rx_queues(struct ena_adapter *adapter)
+{
+       struct ena_com_dev *ena_dev = adapter->ena_dev;
+       int rc, i;
+
+       for (i = 0; i < adapter->num_queues; i++) {
+               rc = ena_create_io_rx_queue(adapter, i);
+               if (rc)
+                       goto create_err;
+       }
+
+       return 0;
+
+create_err:
+       while (i--)
+               ena_com_destroy_io_queue(ena_dev, ENA_IO_RXQ_IDX(i));
+
+       return rc;
+}
+
+static int ena_up(struct ena_adapter *adapter)
+{
+       int rc;
+
+       netdev_dbg(adapter->netdev, "%s\n", __func__);
+
+       ena_setup_io_intr(adapter);
+
+       rc = ena_request_io_irq(adapter);
+       if (rc)
+               goto err_req_irq;
+
+       /* allocate transmit descriptors */
+       rc = ena_setup_all_tx_resources(adapter);
+       if (rc)
+               goto err_setup_tx;
+
+       /* allocate receive descriptors */
+       rc = ena_setup_all_rx_resources(adapter);
+       if (rc)
+               goto err_setup_rx;
+
+       /* Create TX queues */
+       rc = ena_create_all_io_tx_queues(adapter);
+       if (rc)
+               goto err_create_tx_queues;
+
+       /* Create RX queues */
+       rc = ena_create_all_io_rx_queues(adapter);
+       if (rc)
+               goto err_create_rx_queues;
+
+       rc = ena_up_complete(adapter);
+       if (rc)
+               goto err_up;
+
+       if (test_bit(ENA_FLAG_LINK_UP, &adapter->flags))
+               netif_carrier_on(adapter->netdev);
+
+       u64_stats_update_begin(&adapter->syncp);
+       adapter->dev_stats.interface_up++;
+       u64_stats_update_end(&adapter->syncp);
+
+       set_bit(ENA_FLAG_DEV_UP, &adapter->flags);
+
+       return rc;
+
+err_up:
+       ena_destroy_all_rx_queues(adapter);
+err_create_rx_queues:
+       ena_destroy_all_tx_queues(adapter);
+err_create_tx_queues:
+       ena_free_all_io_rx_resources(adapter);
+err_setup_rx:
+       ena_free_all_io_tx_resources(adapter);
+err_setup_tx:
+       ena_free_io_irq(adapter);
+err_req_irq:
+
+       return rc;
+}
+
+static void ena_down(struct ena_adapter *adapter)
+{
+       netif_info(adapter, ifdown, adapter->netdev, "%s\n", __func__);
+
+       clear_bit(ENA_FLAG_DEV_UP, &adapter->flags);
+
+       u64_stats_update_begin(&adapter->syncp);
+       adapter->dev_stats.interface_down++;
+       u64_stats_update_end(&adapter->syncp);
+
+       /* After this point the napi handler won't enable the tx queue */
+       ena_napi_disable_all(adapter);
+       netif_carrier_off(adapter->netdev);
+       netif_tx_disable(adapter->netdev);
+
+       /* After destroy the queue there won't be any new interrupts */
+       ena_destroy_all_io_queues(adapter);
+
+       ena_disable_io_intr_sync(adapter);
+       ena_free_io_irq(adapter);
+       ena_del_napi(adapter);
+
+       ena_free_all_tx_bufs(adapter);
+       ena_free_all_rx_bufs(adapter);
+       ena_free_all_io_tx_resources(adapter);
+       ena_free_all_io_rx_resources(adapter);
+}
+
+/* ena_open - Called when a network interface is made active
+ * @netdev: network interface device structure
+ *
+ * Returns 0 on success, negative value on failure
+ *
+ * The open entry point is called when a network interface is made
+ * active by the system (IFF_UP).  At this point all resources needed
+ * for transmit and receive operations are allocated, the interrupt
+ * handler is registered with the OS, the watchdog timer is started,
+ * and the stack is notified that the interface is ready.
+ */
+static int ena_open(struct net_device *netdev)
+{
+       struct ena_adapter *adapter = netdev_priv(netdev);
+       int rc;
+
+       /* Notify the stack of the actual queue counts. */
+       rc = netif_set_real_num_tx_queues(netdev, adapter->num_queues);
+       if (rc) {
+               netif_err(adapter, ifup, netdev, "Can't set num tx queues\n");
+               return rc;
+       }
+
+       rc = netif_set_real_num_rx_queues(netdev, adapter->num_queues);
+       if (rc) {
+               netif_err(adapter, ifup, netdev, "Can't set num rx queues\n");
+               return rc;
+       }
+
+       rc = ena_up(adapter);
+       if (rc)
+               return rc;
+
+       return rc;
+}
+
+/* ena_close - Disables a network interface
+ * @netdev: network interface device structure
+ *
+ * Returns 0, this is not allowed to fail
+ *
+ * The close entry point is called when an interface is de-activated
+ * by the OS.  The hardware is still under the drivers control, but
+ * needs to be disabled.  A global MAC reset is issued to stop the
+ * hardware, and all transmit and receive resources are freed.
+ */
+static int ena_close(struct net_device *netdev)
+{
+       struct ena_adapter *adapter = netdev_priv(netdev);
+
+       netif_dbg(adapter, ifdown, netdev, "%s\n", __func__);
+
+       if (test_bit(ENA_FLAG_DEV_UP, &adapter->flags))
+               ena_down(adapter);
+
+       return 0;
+}
+
+static void ena_tx_csum(struct ena_com_tx_ctx *ena_tx_ctx, struct sk_buff *skb)
+{
+       u32 mss = skb_shinfo(skb)->gso_size;
+       struct ena_com_tx_meta *ena_meta = &ena_tx_ctx->ena_meta;
+       u8 l4_protocol = 0;
+
+       if ((skb->ip_summed == CHECKSUM_PARTIAL) || mss) {
+               ena_tx_ctx->l4_csum_enable = 1;
+               if (mss) {
+                       ena_tx_ctx->tso_enable = 1;
+                       ena_meta->l4_hdr_len = tcp_hdr(skb)->doff;
+                       ena_tx_ctx->l4_csum_partial = 0;
+               } else {
+                       ena_tx_ctx->tso_enable = 0;
+                       ena_meta->l4_hdr_len = 0;
+                       ena_tx_ctx->l4_csum_partial = 1;
+               }
+
+               switch (ip_hdr(skb)->version) {
+               case IPVERSION:
+                       ena_tx_ctx->l3_proto = ENA_ETH_IO_L3_PROTO_IPV4;
+                       if (ip_hdr(skb)->frag_off & htons(IP_DF))
+                               ena_tx_ctx->df = 1;
+                       if (mss)
+                               ena_tx_ctx->l3_csum_enable = 1;
+                       l4_protocol = ip_hdr(skb)->protocol;
+                       break;
+               case 6:
+                       ena_tx_ctx->l3_proto = ENA_ETH_IO_L3_PROTO_IPV6;
+                       l4_protocol = ipv6_hdr(skb)->nexthdr;
+                       break;
+               default:
+                       break;
+               }
+
+               if (l4_protocol == IPPROTO_TCP)
+                       ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_TCP;
+               else
+                       ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_UDP;
+
+               ena_meta->mss = mss;
+               ena_meta->l3_hdr_len = skb_network_header_len(skb);
+               ena_meta->l3_hdr_offset = skb_network_offset(skb);
+               ena_tx_ctx->meta_valid = 1;
+
+       } else {
+               ena_tx_ctx->meta_valid = 0;
+       }
+}
+
+static int ena_check_and_linearize_skb(struct ena_ring *tx_ring,
+                                      struct sk_buff *skb)
+{
+       int num_frags, header_len, rc;
+
+       num_frags = skb_shinfo(skb)->nr_frags;
+       header_len = skb_headlen(skb);
+
+       if (num_frags < tx_ring->sgl_size)
+               return 0;
+
+       if ((num_frags == tx_ring->sgl_size) &&
+           (header_len < tx_ring->tx_max_header_size))
+               return 0;
+
+       u64_stats_update_begin(&tx_ring->syncp);
+       tx_ring->tx_stats.linearize++;
+       u64_stats_update_end(&tx_ring->syncp);
+
+       rc = skb_linearize(skb);
+       if (unlikely(rc)) {
+               u64_stats_update_begin(&tx_ring->syncp);
+               tx_ring->tx_stats.linearize_failed++;
+               u64_stats_update_end(&tx_ring->syncp);
+       }
+
+       return rc;
+}
+
+/* Called with netif_tx_lock. */
+static netdev_tx_t ena_start_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+       struct ena_adapter *adapter = netdev_priv(dev);
+       struct ena_tx_buffer *tx_info;
+       struct ena_com_tx_ctx ena_tx_ctx;
+       struct ena_ring *tx_ring;
+       struct netdev_queue *txq;
+       struct ena_com_buf *ena_buf;
+       void *push_hdr;
+       u32 len, last_frag;
+       u16 next_to_use;
+       u16 req_id;
+       u16 push_len;
+       u16 header_len;
+       dma_addr_t dma;
+       int qid, rc, nb_hw_desc;
+       int i = -1;
+
+       netif_dbg(adapter, tx_queued, dev, "%s skb %p\n", __func__, skb);
+       /*  Determine which tx ring we will be placed on */
+       qid = skb_get_queue_mapping(skb);
+       tx_ring = &adapter->tx_ring[qid];
+       txq = netdev_get_tx_queue(dev, qid);
+
+       rc = ena_check_and_linearize_skb(tx_ring, skb);
+       if (unlikely(rc))
+               goto error_drop_packet;
+
+       skb_tx_timestamp(skb);
+       len = skb_headlen(skb);
+
+       next_to_use = tx_ring->next_to_use;
+       req_id = tx_ring->free_tx_ids[next_to_use];
+       tx_info = &tx_ring->tx_buffer_info[req_id];
+       tx_info->num_of_bufs = 0;
+
+       WARN(tx_info->skb, "SKB isn't NULL req_id %d\n", req_id);
+       ena_buf = tx_info->bufs;
+       tx_info->skb = skb;
+
+       if (tx_ring->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) {
+               /* prepared the push buffer */
+               push_len = min_t(u32, len, tx_ring->tx_max_header_size);
+               header_len = push_len;
+               push_hdr = skb->data;
+       } else {
+               push_len = 0;
+               header_len = min_t(u32, len, tx_ring->tx_max_header_size);
+               push_hdr = NULL;
+       }
+
+       netif_dbg(adapter, tx_queued, dev,
+                 "skb: %p header_buf->vaddr: %p push_len: %d\n", skb,
+                 push_hdr, push_len);
+
+       if (len > push_len) {
+               dma = dma_map_single(tx_ring->dev, skb->data + push_len,
+                                    len - push_len, DMA_TO_DEVICE);
+               if (dma_mapping_error(tx_ring->dev, dma))
+                       goto error_report_dma_error;
+
+               ena_buf->paddr = dma;
+               ena_buf->len = len - push_len;
+
+               ena_buf++;
+               tx_info->num_of_bufs++;
+       }
+
+       last_frag = skb_shinfo(skb)->nr_frags;
+
+       for (i = 0; i < last_frag; i++) {
+               const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+
+               len = skb_frag_size(frag);
+               dma = skb_frag_dma_map(tx_ring->dev, frag, 0, len,
+                                      DMA_TO_DEVICE);
+               if (dma_mapping_error(tx_ring->dev, dma))
+                       goto error_report_dma_error;
+
+               ena_buf->paddr = dma;
+               ena_buf->len = len;
+               ena_buf++;
+       }
+
+       tx_info->num_of_bufs += last_frag;
+
+       memset(&ena_tx_ctx, 0x0, sizeof(struct ena_com_tx_ctx));
+       ena_tx_ctx.ena_bufs = tx_info->bufs;
+       ena_tx_ctx.push_header = push_hdr;
+       ena_tx_ctx.num_bufs = tx_info->num_of_bufs;
+       ena_tx_ctx.req_id = req_id;
+       ena_tx_ctx.header_len = header_len;
+
+       /* set flags and meta data */
+       ena_tx_csum(&ena_tx_ctx, skb);
+
+       /* prepare the packet's descriptors to dma engine */
+       rc = ena_com_prepare_tx(tx_ring->ena_com_io_sq, &ena_tx_ctx,
+                               &nb_hw_desc);
+
+       if (unlikely(rc)) {
+               netif_err(adapter, tx_queued, dev,
+                         "failed to prepare tx bufs\n");
+               u64_stats_update_begin(&tx_ring->syncp);
+               tx_ring->tx_stats.queue_stop++;
+               tx_ring->tx_stats.prepare_ctx_err++;
+               u64_stats_update_end(&tx_ring->syncp);
+               netif_tx_stop_queue(txq);
+               goto error_unmap_dma;
+       }
+
+       netdev_tx_sent_queue(txq, skb->len);
+
+       u64_stats_update_begin(&tx_ring->syncp);
+       tx_ring->tx_stats.cnt++;
+       tx_ring->tx_stats.bytes += skb->len;
+       u64_stats_update_end(&tx_ring->syncp);
+
+       tx_info->tx_descs = nb_hw_desc;
+       tx_info->last_jiffies = jiffies;
+
+       tx_ring->next_to_use = ENA_TX_RING_IDX_NEXT(next_to_use,
+               tx_ring->ring_size);
+
+       /* This WMB is aimed to:
+        * 1 - perform smp barrier before reading next_to_completion
+        * 2 - make sure the desc were written before trigger DB
+        */
+       wmb();
+
+       /* stop the queue when no more space available, the packet can have up
+        * to sgl_size + 2. one for the meta descriptor and one for header
+        * (if the header is larger than tx_max_header_size).
+        */
+       if (unlikely(ena_com_sq_empty_space(tx_ring->ena_com_io_sq) <
+                    (tx_ring->sgl_size + 2))) {
+               netif_dbg(adapter, tx_queued, dev, "%s stop queue %d\n",
+                         __func__, qid);
+
+               netif_tx_stop_queue(txq);
+               u64_stats_update_begin(&tx_ring->syncp);
+               tx_ring->tx_stats.queue_stop++;
+               u64_stats_update_end(&tx_ring->syncp);
+
+               /* There is a rare condition where this function decide to
+                * stop the queue but meanwhile clean_tx_irq updates
+                * next_to_completion and terminates.
+                * The queue will remain stopped forever.
+                * To solve this issue this function perform rmb, check
+                * the wakeup condition and wake up the queue if needed.
+                */
+               smp_rmb();
+
+               if (ena_com_sq_empty_space(tx_ring->ena_com_io_sq)
+                               > ENA_TX_WAKEUP_THRESH) {
+                       netif_tx_wake_queue(txq);
+                       u64_stats_update_begin(&tx_ring->syncp);
+                       tx_ring->tx_stats.queue_wakeup++;
+                       u64_stats_update_end(&tx_ring->syncp);
+               }
+       }
+
+       if (netif_xmit_stopped(txq) || !skb->xmit_more) {
+               /* trigger the dma engine */
+               ena_com_write_sq_doorbell(tx_ring->ena_com_io_sq);
+               u64_stats_update_begin(&tx_ring->syncp);
+               tx_ring->tx_stats.doorbells++;
+               u64_stats_update_end(&tx_ring->syncp);
+       }
+
+       return NETDEV_TX_OK;
+
+error_report_dma_error:
+       u64_stats_update_begin(&tx_ring->syncp);
+       tx_ring->tx_stats.dma_mapping_err++;
+       u64_stats_update_end(&tx_ring->syncp);
+       netdev_warn(adapter->netdev, "failed to map skb\n");
+
+       tx_info->skb = NULL;
+
+error_unmap_dma:
+       if (i >= 0) {
+               /* save value of frag that failed */
+               last_frag = i;
+
+               /* start back at beginning and unmap skb */
+               tx_info->skb = NULL;
+               ena_buf = tx_info->bufs;
+               dma_unmap_single(tx_ring->dev, dma_unmap_addr(ena_buf, paddr),
+                                dma_unmap_len(ena_buf, len), DMA_TO_DEVICE);
+
+               /* unmap remaining mapped pages */
+               for (i = 0; i < last_frag; i++) {
+                       ena_buf++;
+                       dma_unmap_page(tx_ring->dev, dma_unmap_addr(ena_buf, paddr),
+                                      dma_unmap_len(ena_buf, len), DMA_TO_DEVICE);
+               }
+       }
+
+error_drop_packet:
+
+       dev_kfree_skb(skb);
+       return NETDEV_TX_OK;
+}
+
+#ifdef CONFIG_NET_POLL_CONTROLLER
+static void ena_netpoll(struct net_device *netdev)
+{
+       struct ena_adapter *adapter = netdev_priv(netdev);
+       int i;
+
+       for (i = 0; i < adapter->num_queues; i++)
+               napi_schedule(&adapter->ena_napi[i].napi);
+}
+#endif /* CONFIG_NET_POLL_CONTROLLER */
+
+static u16 ena_select_queue(struct net_device *dev, struct sk_buff *skb,
+                           void *accel_priv, select_queue_fallback_t fallback)
+{
+       u16 qid;
+       /* we suspect that this is good for in--kernel network services that
+        * want to loop incoming skb rx to tx in normal user generated traffic,
+        * most probably we will not get to this
+        */
+       if (skb_rx_queue_recorded(skb))
+               qid = skb_get_rx_queue(skb);
+       else
+               qid = fallback(dev, skb);
+
+       return qid;
+}
+
+static void ena_config_host_info(struct ena_com_dev *ena_dev)
+{
+       struct ena_admin_host_info *host_info;
+       int rc;
+
+       /* Allocate only the host info */
+       rc = ena_com_allocate_host_info(ena_dev);
+       if (rc) {
+               pr_err("Cannot allocate host info\n");
+               return;
+       }
+
+       host_info = ena_dev->host_attr.host_info;
+
+       host_info->os_type = ENA_ADMIN_OS_LINUX;
+       host_info->kernel_ver = LINUX_VERSION_CODE;
+       strncpy(host_info->kernel_ver_str, utsname()->version,
+               sizeof(host_info->kernel_ver_str) - 1);
+       host_info->os_dist = 0;
+       strncpy(host_info->os_dist_str, utsname()->release,
+               sizeof(host_info->os_dist_str) - 1);
+       host_info->driver_version =
+               (DRV_MODULE_VER_MAJOR) |
+               (DRV_MODULE_VER_MINOR << ENA_ADMIN_HOST_INFO_MINOR_SHIFT) |
+               (DRV_MODULE_VER_SUBMINOR << ENA_ADMIN_HOST_INFO_SUB_MINOR_SHIFT);
+
+       rc = ena_com_set_host_attributes(ena_dev);
+       if (rc) {
+               if (rc == -EPERM)
+                       pr_warn("Cannot set host attributes\n");
+               else
+                       pr_err("Cannot set host attributes\n");
+
+               goto err;
+       }
+
+       return;
+
+err:
+       ena_com_delete_host_info(ena_dev);
+}
+
+static void ena_config_debug_area(struct ena_adapter *adapter)
+{
+       u32 debug_area_size;
+       int rc, ss_count;
+
+       ss_count = ena_get_sset_count(adapter->netdev, ETH_SS_STATS);
+       if (ss_count <= 0) {
+               netif_err(adapter, drv, adapter->netdev,
+                         "SS count is negative\n");
+               return;
+       }
+
+       /* allocate 32 bytes for each string and 64bit for the value */
+       debug_area_size = ss_count * ETH_GSTRING_LEN + sizeof(u64) * ss_count;
+
+       rc = ena_com_allocate_debug_area(adapter->ena_dev, debug_area_size);
+       if (rc) {
+               pr_err("Cannot allocate debug area\n");
+               return;
+       }
+
+       rc = ena_com_set_host_attributes(adapter->ena_dev);
+       if (rc) {
+               if (rc == -EPERM)
+                       netif_warn(adapter, drv, adapter->netdev,
+                                  "Cannot set host attributes\n");
+               else
+                       netif_err(adapter, drv, adapter->netdev,
+                                 "Cannot set host attributes\n");
+               goto err;
+       }
+
+       return;
+err:
+       ena_com_delete_debug_area(adapter->ena_dev);
+}
+
+static struct rtnl_link_stats64 *ena_get_stats64(struct net_device *netdev,
+                                                struct rtnl_link_stats64 *stats)
+{
+       struct ena_adapter *adapter = netdev_priv(netdev);
+       struct ena_admin_basic_stats ena_stats;
+       int rc;
+
+       if (!test_bit(ENA_FLAG_DEV_UP, &adapter->flags))
+               return NULL;
+
+       rc = ena_com_get_dev_basic_stats(adapter->ena_dev, &ena_stats);
+       if (rc)
+               return NULL;
+
+       stats->tx_bytes = ((u64)ena_stats.tx_bytes_high << 32) |
+               ena_stats.tx_bytes_low;
+       stats->rx_bytes = ((u64)ena_stats.rx_bytes_high << 32) |
+               ena_stats.rx_bytes_low;
+
+       stats->rx_packets = ((u64)ena_stats.rx_pkts_high << 32) |
+               ena_stats.rx_pkts_low;
+       stats->tx_packets = ((u64)ena_stats.tx_pkts_high << 32) |
+               ena_stats.tx_pkts_low;
+
+       stats->rx_dropped = ((u64)ena_stats.rx_drops_high << 32) |
+               ena_stats.rx_drops_low;
+
+       stats->multicast = 0;
+       stats->collisions = 0;
+
+       stats->rx_length_errors = 0;
+       stats->rx_crc_errors = 0;
+       stats->rx_frame_errors = 0;
+       stats->rx_fifo_errors = 0;
+       stats->rx_missed_errors = 0;
+       stats->tx_window_errors = 0;
+
+       stats->rx_errors = 0;
+       stats->tx_errors = 0;
+
+       return stats;
+}
+
+static const struct net_device_ops ena_netdev_ops = {
+       .ndo_open               = ena_open,
+       .ndo_stop               = ena_close,
+       .ndo_start_xmit         = ena_start_xmit,
+       .ndo_select_queue       = ena_select_queue,
+       .ndo_get_stats64        = ena_get_stats64,
+       .ndo_tx_timeout         = ena_tx_timeout,
+       .ndo_change_mtu         = ena_change_mtu,
+       .ndo_set_mac_address    = NULL,
+       .ndo_validate_addr      = eth_validate_addr,
+#ifdef CONFIG_NET_POLL_CONTROLLER
+       .ndo_poll_controller    = ena_netpoll,
+#endif /* CONFIG_NET_POLL_CONTROLLER */
+};
+
+static void ena_device_io_suspend(struct work_struct *work)
+{
+       struct ena_adapter *adapter =
+               container_of(work, struct ena_adapter, suspend_io_task);
+       struct net_device *netdev = adapter->netdev;
+
+       /* ena_napi_disable_all disables only the IO handling.
+        * We are still subject to AENQ keep alive watchdog.
+        */
+       u64_stats_update_begin(&adapter->syncp);
+       adapter->dev_stats.io_suspend++;
+       u64_stats_update_begin(&adapter->syncp);
+       ena_napi_disable_all(adapter);
+       netif_tx_lock(netdev);
+       netif_device_detach(netdev);
+       netif_tx_unlock(netdev);
+}
+
+static void ena_device_io_resume(struct work_struct *work)
+{
+       struct ena_adapter *adapter =
+               container_of(work, struct ena_adapter, resume_io_task);
+       struct net_device *netdev = adapter->netdev;
+
+       u64_stats_update_begin(&adapter->syncp);
+       adapter->dev_stats.io_resume++;
+       u64_stats_update_end(&adapter->syncp);
+
+       netif_device_attach(netdev);
+       ena_napi_enable_all(adapter);
+}
+
+static int ena_device_validate_params(struct ena_adapter *adapter,
+                                     struct ena_com_dev_get_features_ctx *get_feat_ctx)
+{
+       struct net_device *netdev = adapter->netdev;
+       int rc;
+
+       rc = ether_addr_equal(get_feat_ctx->dev_attr.mac_addr,
+                             adapter->mac_addr);
+       if (!rc) {
+               netif_err(adapter, drv, netdev,
+                         "Error, mac address are different\n");
+               return -EINVAL;
+       }
+
+       if ((get_feat_ctx->max_queues.max_cq_num < adapter->num_queues) ||
+           (get_feat_ctx->max_queues.max_sq_num < adapter->num_queues)) {
+               netif_err(adapter, drv, netdev,
+                         "Error, device doesn't support enough queues\n");
+               return -EINVAL;
+       }
+
+       if (get_feat_ctx->dev_attr.max_mtu < netdev->mtu) {
+               netif_err(adapter, drv, netdev,
+                         "Error, device max mtu is smaller than netdev MTU\n");
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+static int ena_device_init(struct ena_com_dev *ena_dev, struct pci_dev *pdev,
+                          struct ena_com_dev_get_features_ctx *get_feat_ctx,
+                          bool *wd_state)
+{
+       struct device *dev = &pdev->dev;
+       bool readless_supported;
+       u32 aenq_groups;
+       int dma_width;
+       int rc;
+
+       rc = ena_com_mmio_reg_read_request_init(ena_dev);
+       if (rc) {
+               dev_err(dev, "failed to init mmio read less\n");
+               return rc;
+       }
+
+       /* The PCIe configuration space revision id indicate if mmio reg
+        * read is disabled
+        */
+       readless_supported = !(pdev->revision & ENA_MMIO_DISABLE_REG_READ);
+       ena_com_set_mmio_read_mode(ena_dev, readless_supported);
+
+       rc = ena_com_dev_reset(ena_dev);
+       if (rc) {
+               dev_err(dev, "Can not reset device\n");
+               goto err_mmio_read_less;
+       }
+
+       rc = ena_com_validate_version(ena_dev);
+       if (rc) {
+               dev_err(dev, "device version is too low\n");
+               goto err_mmio_read_less;
+       }
+
+       dma_width = ena_com_get_dma_width(ena_dev);
+       if (dma_width < 0) {
+               dev_err(dev, "Invalid dma width value %d", dma_width);
+               rc = dma_width;
+               goto err_mmio_read_less;
+       }
+
+       rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(dma_width));
+       if (rc) {
+               dev_err(dev, "pci_set_dma_mask failed 0x%x\n", rc);
+               goto err_mmio_read_less;
+       }
+
+       rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(dma_width));
+       if (rc) {
+               dev_err(dev, "err_pci_set_consistent_dma_mask failed 0x%x\n",
+                       rc);
+               goto err_mmio_read_less;
+       }
+
+       /* ENA admin level init */
+       rc = ena_com_admin_init(ena_dev, &aenq_handlers, true);
+       if (rc) {
+               dev_err(dev,
+                       "Can not initialize ena admin queue with device\n");
+               goto err_mmio_read_less;
+       }
+
+       /* To enable the msix interrupts the driver needs to know the number
+        * of queues. So the driver uses polling mode to retrieve this
+        * information
+        */
+       ena_com_set_admin_polling_mode(ena_dev, true);
+
+       /* Get Device Attributes*/
+       rc = ena_com_get_dev_attr_feat(ena_dev, get_feat_ctx);
+       if (rc) {
+               dev_err(dev, "Cannot get attribute for ena device rc=%d\n", rc);
+               goto err_admin_init;
+       }
+
+       /* Try to turn all the available aenq groups */
+       aenq_groups = BIT(ENA_ADMIN_LINK_CHANGE) |
+               BIT(ENA_ADMIN_FATAL_ERROR) |
+               BIT(ENA_ADMIN_WARNING) |
+               BIT(ENA_ADMIN_NOTIFICATION) |
+               BIT(ENA_ADMIN_KEEP_ALIVE);
+
+       aenq_groups &= get_feat_ctx->aenq.supported_groups;
+
+       rc = ena_com_set_aenq_config(ena_dev, aenq_groups);
+       if (rc) {
+               dev_err(dev, "Cannot configure aenq groups rc= %d\n", rc);
+               goto err_admin_init;
+       }
+
+       *wd_state = !!(aenq_groups & BIT(ENA_ADMIN_KEEP_ALIVE));
+
+       ena_config_host_info(ena_dev);
+
+       return 0;
+
+err_admin_init:
+       ena_com_admin_destroy(ena_dev);
+err_mmio_read_less:
+       ena_com_mmio_reg_read_request_destroy(ena_dev);
+
+       return rc;
+}
+
+static int ena_enable_msix_and_set_admin_interrupts(struct ena_adapter *adapter,
+                                                   int io_vectors)
+{
+       struct ena_com_dev *ena_dev = adapter->ena_dev;
+       struct device *dev = &adapter->pdev->dev;
+       int rc;
+
+       rc = ena_enable_msix(adapter, io_vectors);
+       if (rc) {
+               dev_err(dev, "Can not reserve msix vectors\n");
+               return rc;
+       }
+
+       ena_setup_mgmnt_intr(adapter);
+
+       rc = ena_request_mgmnt_irq(adapter);
+       if (rc) {
+               dev_err(dev, "Can not setup management interrupts\n");
+               goto err_disable_msix;
+       }
+
+       ena_com_set_admin_polling_mode(ena_dev, false);
+
+       ena_com_admin_aenq_enable(ena_dev);
+
+       return 0;
+
+err_disable_msix:
+       ena_disable_msix(adapter);
+
+       return rc;
+}
+
+static void ena_fw_reset_device(struct work_struct *work)
+{
+       struct ena_com_dev_get_features_ctx get_feat_ctx;
+       struct ena_adapter *adapter =
+               container_of(work, struct ena_adapter, reset_task);
+       struct net_device *netdev = adapter->netdev;
+       struct ena_com_dev *ena_dev = adapter->ena_dev;
+       struct pci_dev *pdev = adapter->pdev;
+       bool dev_up, wd_state;
+       int rc;
+
+       del_timer_sync(&adapter->timer_service);
+
+       rtnl_lock();
+
+       dev_up = test_bit(ENA_FLAG_DEV_UP, &adapter->flags);
+       ena_com_set_admin_running_state(ena_dev, false);
+
+       /* After calling ena_close the tx queues and the napi
+        * are disabled so no one can interfere or touch the
+        * data structures
+        */
+       ena_close(netdev);
+
+       rc = ena_com_dev_reset(ena_dev);
+       if (rc) {
+               dev_err(&pdev->dev, "Device reset failed\n");
+               goto err;
+       }
+
+       ena_free_mgmnt_irq(adapter);
+
+       ena_disable_msix(adapter);
+
+       ena_com_abort_admin_commands(ena_dev);
+
+       ena_com_wait_for_abort_completion(ena_dev);
+
+       ena_com_admin_destroy(ena_dev);
+
+       ena_com_mmio_reg_read_request_destroy(ena_dev);
+
+       /* Finish with the destroy part. Start the init part */
+
+       rc = ena_device_init(ena_dev, adapter->pdev, &get_feat_ctx, &wd_state);
+       if (rc) {
+               dev_err(&pdev->dev, "Can not initialize device\n");
+               goto err;
+       }
+       adapter->wd_state = wd_state;
+
+       rc = ena_device_validate_params(adapter, &get_feat_ctx);
+       if (rc) {
+               dev_err(&pdev->dev, "Validation of device parameters failed\n");
+               goto err_device_destroy;
+       }
+
+       rc = ena_enable_msix_and_set_admin_interrupts(adapter,
+                                                     adapter->num_queues);
+       if (rc) {
+               dev_err(&pdev->dev, "Enable MSI-X failed\n");
+               goto err_device_destroy;
+       }
+       /* If the interface was up before the reset bring it up */
+       if (dev_up) {
+               rc = ena_up(adapter);
+               if (rc) {
+                       dev_err(&pdev->dev, "Failed to create I/O queues\n");
+                       goto err_disable_msix;
+               }
+       }
+
+       mod_timer(&adapter->timer_service, round_jiffies(jiffies + HZ));
+
+       rtnl_unlock();
+
+       dev_err(&pdev->dev, "Device reset completed successfully\n");
+
+       return;
+err_disable_msix:
+       ena_free_mgmnt_irq(adapter);
+       ena_disable_msix(adapter);
+err_device_destroy:
+       ena_com_admin_destroy(ena_dev);
+err:
+       rtnl_unlock();
+
+       dev_err(&pdev->dev,
+               "Reset attempt failed. Can not reset the device\n");
+}
+
+static void check_for_missing_tx_completions(struct ena_adapter *adapter)
+{
+       struct ena_tx_buffer *tx_buf;
+       unsigned long last_jiffies;
+       struct ena_ring *tx_ring;
+       int i, j, budget;
+       u32 missed_tx;
+
+       /* Make sure the driver doesn't turn the device in other process */
+       smp_rmb();
+
+       if (!test_bit(ENA_FLAG_DEV_UP, &adapter->flags))
+               return;
+
+       budget = ENA_MONITORED_TX_QUEUES;
+
+       for (i = adapter->last_monitored_tx_qid; i < adapter->num_queues; i++) {
+               tx_ring = &adapter->tx_ring[i];
+
+               for (j = 0; j < tx_ring->ring_size; j++) {
+                       tx_buf = &tx_ring->tx_buffer_info[j];
+                       last_jiffies = tx_buf->last_jiffies;
+                       if (unlikely(last_jiffies && time_is_before_jiffies(last_jiffies + TX_TIMEOUT))) {
+                               netif_notice(adapter, tx_err, adapter->netdev,
+                                            "Found a Tx that wasn't completed on time, qid %d, index %d.\n",
+                                            tx_ring->qid, j);
+
+                               u64_stats_update_begin(&tx_ring->syncp);
+                               missed_tx = tx_ring->tx_stats.missing_tx_comp++;
+                               u64_stats_update_end(&tx_ring->syncp);
+
+                               /* Clear last jiffies so the lost buffer won't
+                                * be counted twice.
+                                */
+                               tx_buf->last_jiffies = 0;
+
+                               if (unlikely(missed_tx > MAX_NUM_OF_TIMEOUTED_PACKETS)) {
+                                       netif_err(adapter, tx_err, adapter->netdev,
+                                                 "The number of lost tx completion is above the threshold (%d > %d). Reset the device\n",
+                                                 missed_tx, MAX_NUM_OF_TIMEOUTED_PACKETS);
+                                       set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags);
+                               }
+                       }
+               }
+
+               budget--;
+               if (!budget)
+                       break;
+       }
+
+       adapter->last_monitored_tx_qid = i % adapter->num_queues;
+}
+
+/* Check for keep alive expiration */
+static void check_for_missing_keep_alive(struct ena_adapter *adapter)
+{
+       unsigned long keep_alive_expired;
+
+       if (!adapter->wd_state)
+               return;
+
+       keep_alive_expired = round_jiffies(adapter->last_keep_alive_jiffies
+                                          + ENA_DEVICE_KALIVE_TIMEOUT);
+       if (unlikely(time_is_before_jiffies(keep_alive_expired))) {
+               netif_err(adapter, drv, adapter->netdev,
+                         "Keep alive watchdog timeout.\n");
+               u64_stats_update_begin(&adapter->syncp);
+               adapter->dev_stats.wd_expired++;
+               u64_stats_update_end(&adapter->syncp);
+               set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags);
+       }
+}
+
+static void check_for_admin_com_state(struct ena_adapter *adapter)
+{
+       if (unlikely(!ena_com_get_admin_running_state(adapter->ena_dev))) {
+               netif_err(adapter, drv, adapter->netdev,
+                         "ENA admin queue is not in running state!\n");
+               u64_stats_update_begin(&adapter->syncp);
+               adapter->dev_stats.admin_q_pause++;
+               u64_stats_update_end(&adapter->syncp);
+               set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags);
+       }
+}
+
+static void ena_update_host_info(struct ena_admin_host_info *host_info,
+                                struct net_device *netdev)
+{
+       host_info->supported_network_features[0] =
+               netdev->features & GENMASK_ULL(31, 0);
+       host_info->supported_network_features[1] =
+               (netdev->features & GENMASK_ULL(63, 32)) >> 32;
+}
+
+static void ena_timer_service(unsigned long data)
+{
+       struct ena_adapter *adapter = (struct ena_adapter *)data;
+       u8 *debug_area = adapter->ena_dev->host_attr.debug_area_virt_addr;
+       struct ena_admin_host_info *host_info =
+               adapter->ena_dev->host_attr.host_info;
+
+       check_for_missing_keep_alive(adapter);
+
+       check_for_admin_com_state(adapter);
+
+       check_for_missing_tx_completions(adapter);
+
+       if (debug_area)
+               ena_dump_stats_to_buf(adapter, debug_area);
+
+       if (host_info)
+               ena_update_host_info(host_info, adapter->netdev);
+
+       if (unlikely(test_and_clear_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))) {
+               netif_err(adapter, drv, adapter->netdev,
+                         "Trigger reset is on\n");
+               ena_dump_stats_to_dmesg(adapter);
+               queue_work(ena_wq, &adapter->reset_task);
+               return;
+       }
+
+       /* Reset the timer */
+       mod_timer(&adapter->timer_service, jiffies + HZ);
+}
+
+static int ena_calc_io_queue_num(struct pci_dev *pdev,
+                                struct ena_com_dev *ena_dev,
+                                struct ena_com_dev_get_features_ctx *get_feat_ctx)
+{
+       int io_sq_num, io_queue_num;
+
+       /* In case of LLQ use the llq number in the get feature cmd */
+       if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) {
+               io_sq_num = get_feat_ctx->max_queues.max_llq_num;
+
+               if (io_sq_num == 0) {
+                       dev_err(&pdev->dev,
+                               "Trying to use LLQ but llq_num is 0. Fall back into regular queues\n");
+
+                       ena_dev->tx_mem_queue_type =
+                               ENA_ADMIN_PLACEMENT_POLICY_HOST;
+                       io_sq_num = get_feat_ctx->max_queues.max_sq_num;
+               }
+       } else {
+               io_sq_num = get_feat_ctx->max_queues.max_sq_num;
+       }
+
+       io_queue_num = min_t(int, num_possible_cpus(), ENA_MAX_NUM_IO_QUEUES);
+       io_queue_num = min_t(int, io_queue_num, io_sq_num);
+       io_queue_num = min_t(int, io_queue_num,
+                            get_feat_ctx->max_queues.max_cq_num);
+       /* 1 IRQ for for mgmnt and 1 IRQs for each IO direction */
+       io_queue_num = min_t(int, io_queue_num, pci_msix_vec_count(pdev) - 1);
+       if (unlikely(!io_queue_num)) {
+               dev_err(&pdev->dev, "The device doesn't have io queues\n");
+               return -EFAULT;
+       }
+
+       return io_queue_num;
+}
+
+static int ena_set_push_mode(struct pci_dev *pdev, struct ena_com_dev *ena_dev,
+                            struct ena_com_dev_get_features_ctx *get_feat_ctx)
+{
+       bool has_mem_bar;
+
+       has_mem_bar = pci_select_bars(pdev, IORESOURCE_MEM) & BIT(ENA_MEM_BAR);
+
+       /* Enable push mode if device supports LLQ */
+       if (has_mem_bar && (get_feat_ctx->max_queues.max_llq_num > 0))
+               ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_DEV;
+       else
+               ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST;
+
+       return 0;
+}
+
+static void ena_set_dev_offloads(struct ena_com_dev_get_features_ctx *feat,
+                                struct net_device *netdev)
+{
+       netdev_features_t dev_features = 0;
+
+       /* Set offload features */
+       if (feat->offload.tx &
+               ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_PART_MASK)
+               dev_features |= NETIF_F_IP_CSUM;
+
+       if (feat->offload.tx &
+               ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV6_CSUM_PART_MASK)
+               dev_features |= NETIF_F_IPV6_CSUM;
+
+       if (feat->offload.tx & ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_IPV4_MASK)
+               dev_features |= NETIF_F_TSO;
+
+       if (feat->offload.tx & ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_IPV6_MASK)
+               dev_features |= NETIF_F_TSO6;
+
+       if (feat->offload.tx & ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_ECN_MASK)
+               dev_features |= NETIF_F_TSO_ECN;
+
+       if (feat->offload.rx_supported &
+               ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV4_CSUM_MASK)
+               dev_features |= NETIF_F_RXCSUM;
+
+       if (feat->offload.rx_supported &
+               ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV6_CSUM_MASK)
+               dev_features |= NETIF_F_RXCSUM;
+
+       netdev->features =
+               dev_features |
+               NETIF_F_SG |
+               NETIF_F_NTUPLE |
+               NETIF_F_RXHASH |
+               NETIF_F_HIGHDMA;
+
+       netdev->hw_features |= netdev->features;
+       netdev->vlan_features |= netdev->features;
+}
+
+static void ena_set_conf_feat_params(struct ena_adapter *adapter,
+                                    struct ena_com_dev_get_features_ctx *feat)
+{
+       struct net_device *netdev = adapter->netdev;
+
+       /* Copy mac address */
+       if (!is_valid_ether_addr(feat->dev_attr.mac_addr)) {
+               eth_hw_addr_random(netdev);
+               ether_addr_copy(adapter->mac_addr, netdev->dev_addr);
+       } else {
+               ether_addr_copy(adapter->mac_addr, feat->dev_attr.mac_addr);
+               ether_addr_copy(netdev->dev_addr, adapter->mac_addr);
+       }
+
+       /* Set offload features */
+       ena_set_dev_offloads(feat, netdev);
+
+       adapter->max_mtu = feat->dev_attr.max_mtu;
+}
+
+static int ena_rss_init_default(struct ena_adapter *adapter)
+{
+       struct ena_com_dev *ena_dev = adapter->ena_dev;
+       struct device *dev = &adapter->pdev->dev;
+       int rc, i;
+       u32 val;
+
+       rc = ena_com_rss_init(ena_dev, ENA_RX_RSS_TABLE_LOG_SIZE);
+       if (unlikely(rc)) {
+               dev_err(dev, "Cannot init indirect table\n");
+               goto err_rss_init;
+       }
+
+       for (i = 0; i < ENA_RX_RSS_TABLE_SIZE; i++) {
+               val = ethtool_rxfh_indir_default(i, adapter->num_queues);
+               rc = ena_com_indirect_table_fill_entry(ena_dev, i,
+                                                      ENA_IO_RXQ_IDX(val));
+               if (unlikely(rc && (rc != -EPERM))) {
+                       dev_err(dev, "Cannot fill indirect table\n");
+                       goto err_fill_indir;
+               }
+       }
+
+       rc = ena_com_fill_hash_function(ena_dev, ENA_ADMIN_CRC32, NULL,
+                                       ENA_HASH_KEY_SIZE, 0xFFFFFFFF);
+       if (unlikely(rc && (rc != -EPERM))) {
+               dev_err(dev, "Cannot fill hash function\n");
+               goto err_fill_indir;
+       }
+
+       rc = ena_com_set_default_hash_ctrl(ena_dev);
+       if (unlikely(rc && (rc != -EPERM))) {
+               dev_err(dev, "Cannot fill hash control\n");
+               goto err_fill_indir;
+       }
+
+       return 0;
+
+err_fill_indir:
+       ena_com_rss_destroy(ena_dev);
+err_rss_init:
+
+       return rc;
+}
+
+static void ena_release_bars(struct ena_com_dev *ena_dev, struct pci_dev *pdev)
+{
+       int release_bars;
+
+       release_bars = pci_select_bars(pdev, IORESOURCE_MEM) & ENA_BAR_MASK;
+       pci_release_selected_regions(pdev, release_bars);
+}
+
+static int ena_calc_queue_size(struct pci_dev *pdev,
+                              struct ena_com_dev *ena_dev,
+                              u16 *max_tx_sgl_size,
+                              u16 *max_rx_sgl_size,
+                              struct ena_com_dev_get_features_ctx *get_feat_ctx)
+{
+       u32 queue_size = ENA_DEFAULT_RING_SIZE;
+
+       queue_size = min_t(u32, queue_size,
+                          get_feat_ctx->max_queues.max_cq_depth);
+       queue_size = min_t(u32, queue_size,
+                          get_feat_ctx->max_queues.max_sq_depth);
+
+       if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV)
+               queue_size = min_t(u32, queue_size,
+                                  get_feat_ctx->max_queues.max_llq_depth);
+
+       queue_size = rounddown_pow_of_two(queue_size);
+
+       if (unlikely(!queue_size)) {
+               dev_err(&pdev->dev, "Invalid queue size\n");
+               return -EFAULT;
+       }
+
+       *max_tx_sgl_size = min_t(u16, ENA_PKT_MAX_BUFS,
+                                get_feat_ctx->max_queues.max_packet_tx_descs);
+       *max_rx_sgl_size = min_t(u16, ENA_PKT_MAX_BUFS,
+                                get_feat_ctx->max_queues.max_packet_rx_descs);
+
+       return queue_size;
+}
+
+/* ena_probe - Device Initialization Routine
+ * @pdev: PCI device information struct
+ * @ent: entry in ena_pci_tbl
+ *
+ * Returns 0 on success, negative on failure
+ *
+ * ena_probe initializes an adapter identified by a pci_dev structure.
+ * The OS initialization, configuring of the adapter private structure,
+ * and a hardware reset occur.
+ */
+static int ena_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
+{
+       struct ena_com_dev_get_features_ctx get_feat_ctx;
+       static int version_printed;
+       struct net_device *netdev;
+       struct ena_adapter *adapter;
+       struct ena_com_dev *ena_dev = NULL;
+       static int adapters_found;
+       int io_queue_num, bars, rc;
+       int queue_size;
+       u16 tx_sgl_size = 0;
+       u16 rx_sgl_size = 0;
+       bool wd_state;
+
+       dev_dbg(&pdev->dev, "%s\n", __func__);
+
+       if (version_printed++ == 0)
+               dev_info(&pdev->dev, "%s", version);
+
+       rc = pci_enable_device_mem(pdev);
+       if (rc) {
+               dev_err(&pdev->dev, "pci_enable_device_mem() failed!\n");
+               return rc;
+       }
+
+       pci_set_master(pdev);
+
+       ena_dev = vzalloc(sizeof(*ena_dev));
+       if (!ena_dev) {
+               rc = -ENOMEM;
+               goto err_disable_device;
+       }
+
+       bars = pci_select_bars(pdev, IORESOURCE_MEM) & ENA_BAR_MASK;
+       rc = pci_request_selected_regions(pdev, bars, DRV_MODULE_NAME);
+       if (rc) {
+               dev_err(&pdev->dev, "pci_request_selected_regions failed %d\n",
+                       rc);
+               goto err_free_ena_dev;
+       }
+
+       ena_dev->reg_bar = ioremap(pci_resource_start(pdev, ENA_REG_BAR),
+                                  pci_resource_len(pdev, ENA_REG_BAR));
+       if (!ena_dev->reg_bar) {
+               dev_err(&pdev->dev, "failed to remap regs bar\n");
+               rc = -EFAULT;
+               goto err_free_region;
+       }
+
+       ena_dev->dmadev = &pdev->dev;
+
+       rc = ena_device_init(ena_dev, pdev, &get_feat_ctx, &wd_state);
+       if (rc) {
+               dev_err(&pdev->dev, "ena device init failed\n");
+               if (rc == -ETIME)
+                       rc = -EPROBE_DEFER;
+               goto err_free_region;
+       }
+
+       rc = ena_set_push_mode(pdev, ena_dev, &get_feat_ctx);
+       if (rc) {
+               dev_err(&pdev->dev, "Invalid module param(push_mode)\n");
+               goto err_device_destroy;
+       }
+
+       if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) {
+               ena_dev->mem_bar = ioremap_wc(pci_resource_start(pdev, ENA_MEM_BAR),
+                                             pci_resource_len(pdev, ENA_MEM_BAR));
+               if (!ena_dev->mem_bar) {
+                       rc = -EFAULT;
+                       goto err_device_destroy;
+               }
+       }
+
+       /* initial Tx interrupt delay, Assumes 1 usec granularity.
+       * Updated during device initialization with the real granularity
+       */
+       ena_dev->intr_moder_tx_interval = ENA_INTR_INITIAL_TX_INTERVAL_USECS;
+       io_queue_num = ena_calc_io_queue_num(pdev, ena_dev, &get_feat_ctx);
+       queue_size = ena_calc_queue_size(pdev, ena_dev, &tx_sgl_size,
+                                        &rx_sgl_size, &get_feat_ctx);
+       if ((queue_size <= 0) || (io_queue_num <= 0)) {
+               rc = -EFAULT;
+               goto err_device_destroy;
+       }
+
+       dev_info(&pdev->dev, "creating %d io queues. queue size: %d\n",
+                io_queue_num, queue_size);
+
+       /* dev zeroed in init_etherdev */
+       netdev = alloc_etherdev_mq(sizeof(struct ena_adapter), io_queue_num);
+       if (!netdev) {
+               dev_err(&pdev->dev, "alloc_etherdev_mq failed\n");
+               rc = -ENOMEM;
+               goto err_device_destroy;
+       }
+
+       SET_NETDEV_DEV(netdev, &pdev->dev);
+
+       adapter = netdev_priv(netdev);
+       pci_set_drvdata(pdev, adapter);
+
+       adapter->ena_dev = ena_dev;
+       adapter->netdev = netdev;
+       adapter->pdev = pdev;
+
+       ena_set_conf_feat_params(adapter, &get_feat_ctx);
+
+       adapter->msg_enable = netif_msg_init(debug, DEFAULT_MSG_ENABLE);
+
+       adapter->tx_ring_size = queue_size;
+       adapter->rx_ring_size = queue_size;
+
+       adapter->max_tx_sgl_size = tx_sgl_size;
+       adapter->max_rx_sgl_size = rx_sgl_size;
+
+       adapter->num_queues = io_queue_num;
+       adapter->last_monitored_tx_qid = 0;
+
+       adapter->rx_copybreak = ENA_DEFAULT_RX_COPYBREAK;
+       adapter->wd_state = wd_state;
+
+       snprintf(adapter->name, ENA_NAME_MAX_LEN, "ena_%d", adapters_found);
+
+       rc = ena_com_init_interrupt_moderation(adapter->ena_dev);
+       if (rc) {
+               dev_err(&pdev->dev,
+                       "Failed to query interrupt moderation feature\n");
+               goto err_netdev_destroy;
+       }
+       ena_init_io_rings(adapter);
+
+       netdev->netdev_ops = &ena_netdev_ops;
+       netdev->watchdog_timeo = TX_TIMEOUT;
+       ena_set_ethtool_ops(netdev);
+
+       netdev->priv_flags |= IFF_UNICAST_FLT;
+
+       u64_stats_init(&adapter->syncp);
+
+       rc = ena_enable_msix_and_set_admin_interrupts(adapter, io_queue_num);
+       if (rc) {
+               dev_err(&pdev->dev,
+                       "Failed to enable and set the admin interrupts\n");
+               goto err_worker_destroy;
+       }
+       rc = ena_rss_init_default(adapter);
+       if (rc && (rc != -EPERM)) {
+               dev_err(&pdev->dev, "Cannot init RSS rc: %d\n", rc);
+               goto err_free_msix;
+       }
+
+       ena_config_debug_area(adapter);
+
+       memcpy(adapter->netdev->perm_addr, adapter->mac_addr, netdev->addr_len);
+
+       netif_carrier_off(netdev);
+
+       rc = register_netdev(netdev);
+       if (rc) {
+               dev_err(&pdev->dev, "Cannot register net device\n");
+               goto err_rss;
+       }
+
+       INIT_WORK(&adapter->suspend_io_task, ena_device_io_suspend);
+       INIT_WORK(&adapter->resume_io_task, ena_device_io_resume);
+       INIT_WORK(&adapter->reset_task, ena_fw_reset_device);
+
+       adapter->last_keep_alive_jiffies = jiffies;
+
+       init_timer(&adapter->timer_service);
+       adapter->timer_service.expires = round_jiffies(jiffies + HZ);
+       adapter->timer_service.function = ena_timer_service;
+       adapter->timer_service.data = (unsigned long)adapter;
+
+       add_timer(&adapter->timer_service);
+
+       dev_info(&pdev->dev, "%s found at mem %lx, mac addr %pM Queues %d\n",
+                DEVICE_NAME, (long)pci_resource_start(pdev, 0),
+                netdev->dev_addr, io_queue_num);
+
+       set_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags);
+
+       adapters_found++;
+
+       return 0;
+
+err_rss:
+       ena_com_delete_debug_area(ena_dev);
+       ena_com_rss_destroy(ena_dev);
+err_free_msix:
+       ena_com_dev_reset(ena_dev);
+       ena_free_mgmnt_irq(adapter);
+       ena_disable_msix(adapter);
+err_worker_destroy:
+       ena_com_destroy_interrupt_moderation(ena_dev);
+       del_timer(&adapter->timer_service);
+       cancel_work_sync(&adapter->suspend_io_task);
+       cancel_work_sync(&adapter->resume_io_task);
+err_netdev_destroy:
+       free_netdev(netdev);
+err_device_destroy:
+       ena_com_delete_host_info(ena_dev);
+       ena_com_admin_destroy(ena_dev);
+err_free_region:
+       ena_release_bars(ena_dev, pdev);
+err_free_ena_dev:
+       vfree(ena_dev);
+err_disable_device:
+       pci_disable_device(pdev);
+       return rc;
+}
+
+/*****************************************************************************/
+static int ena_sriov_configure(struct pci_dev *dev, int numvfs)
+{
+       int rc;
+
+       if (numvfs > 0) {
+               rc = pci_enable_sriov(dev, numvfs);
+               if (rc != 0) {
+                       dev_err(&dev->dev,
+                               "pci_enable_sriov failed to enable: %d vfs with the error: %d\n",
+                               numvfs, rc);
+                       return rc;
+               }
+
+               return numvfs;
+       }
+
+       if (numvfs == 0) {
+               pci_disable_sriov(dev);
+               return 0;
+       }
+
+       return -EINVAL;
+}
+
+/*****************************************************************************/
+/*****************************************************************************/
+
+/* ena_remove - Device Removal Routine
+ * @pdev: PCI device information struct
+ *
+ * ena_remove is called by the PCI subsystem to alert the driver
+ * that it should release a PCI device.
+ */
+static void ena_remove(struct pci_dev *pdev)
+{
+       struct ena_adapter *adapter = pci_get_drvdata(pdev);
+       struct ena_com_dev *ena_dev;
+       struct net_device *netdev;
+
+       if (!adapter)
+               /* This device didn't load properly and it's resources
+                * already released, nothing to do
+                */
+               return;
+
+       ena_dev = adapter->ena_dev;
+       netdev = adapter->netdev;
+
+#ifdef CONFIG_RFS_ACCEL
+       if ((adapter->msix_vecs >= 1) && (netdev->rx_cpu_rmap)) {
+               free_irq_cpu_rmap(netdev->rx_cpu_rmap);
+               netdev->rx_cpu_rmap = NULL;
+       }
+#endif /* CONFIG_RFS_ACCEL */
+
+       unregister_netdev(netdev);
+       del_timer_sync(&adapter->timer_service);
+
+       cancel_work_sync(&adapter->reset_task);
+
+       cancel_work_sync(&adapter->suspend_io_task);
+
+       cancel_work_sync(&adapter->resume_io_task);
+
+       ena_com_dev_reset(ena_dev);
+
+       ena_free_mgmnt_irq(adapter);
+
+       ena_disable_msix(adapter);
+
+       free_netdev(netdev);
+
+       ena_com_mmio_reg_read_request_destroy(ena_dev);
+
+       ena_com_abort_admin_commands(ena_dev);
+
+       ena_com_wait_for_abort_completion(ena_dev);
+
+       ena_com_admin_destroy(ena_dev);
+
+       ena_com_rss_destroy(ena_dev);
+
+       ena_com_delete_debug_area(ena_dev);
+
+       ena_com_delete_host_info(ena_dev);
+
+       ena_release_bars(ena_dev, pdev);
+
+       pci_disable_device(pdev);
+
+       ena_com_destroy_interrupt_moderation(ena_dev);
+
+       vfree(ena_dev);
+}
+
+static struct pci_driver ena_pci_driver = {
+       .name           = DRV_MODULE_NAME,
+       .id_table       = ena_pci_tbl,
+       .probe          = ena_probe,
+       .remove         = ena_remove,
+       .sriov_configure = ena_sriov_configure,
+};
+
+static int __init ena_init(void)
+{
+       pr_info("%s", version);
+
+       ena_wq = create_singlethread_workqueue(DRV_MODULE_NAME);
+       if (!ena_wq) {
+               pr_err("Failed to create workqueue\n");
+               return -ENOMEM;
+       }
+
+       return pci_register_driver(&ena_pci_driver);
+}
+
+static void __exit ena_cleanup(void)
+{
+       pci_unregister_driver(&ena_pci_driver);
+
+       if (ena_wq) {
+               destroy_workqueue(ena_wq);
+               ena_wq = NULL;
+       }
+}
+
+/******************************************************************************
+ ******************************** AENQ Handlers *******************************
+ *****************************************************************************/
+/* ena_update_on_link_change:
+ * Notify the network interface about the change in link status
+ */
+static void ena_update_on_link_change(void *adapter_data,
+                                     struct ena_admin_aenq_entry *aenq_e)
+{
+       struct ena_adapter *adapter = (struct ena_adapter *)adapter_data;
+       struct ena_admin_aenq_link_change_desc *aenq_desc =
+               (struct ena_admin_aenq_link_change_desc *)aenq_e;
+       int status = aenq_desc->flags &
+               ENA_ADMIN_AENQ_LINK_CHANGE_DESC_LINK_STATUS_MASK;
+
+       if (status) {
+               netdev_dbg(adapter->netdev, "%s\n", __func__);
+               set_bit(ENA_FLAG_LINK_UP, &adapter->flags);
+               netif_carrier_on(adapter->netdev);
+       } else {
+               clear_bit(ENA_FLAG_LINK_UP, &adapter->flags);
+               netif_carrier_off(adapter->netdev);
+       }
+}
+
+static void ena_keep_alive_wd(void *adapter_data,
+                             struct ena_admin_aenq_entry *aenq_e)
+{
+       struct ena_adapter *adapter = (struct ena_adapter *)adapter_data;
+
+       adapter->last_keep_alive_jiffies = jiffies;
+}
+
+static void ena_notification(void *adapter_data,
+                            struct ena_admin_aenq_entry *aenq_e)
+{
+       struct ena_adapter *adapter = (struct ena_adapter *)adapter_data;
+
+       WARN(aenq_e->aenq_common_desc.group != ENA_ADMIN_NOTIFICATION,
+            "Invalid group(%x) expected %x\n",
+            aenq_e->aenq_common_desc.group,
+            ENA_ADMIN_NOTIFICATION);
+
+       switch (aenq_e->aenq_common_desc.syndrom) {
+       case ENA_ADMIN_SUSPEND:
+               /* Suspend just the IO queues.
+                * We deliberately don't suspend admin so the timer and
+                * the keep_alive events should remain.
+                */
+               queue_work(ena_wq, &adapter->suspend_io_task);
+               break;
+       case ENA_ADMIN_RESUME:
+               queue_work(ena_wq, &adapter->resume_io_task);
+               break;
+       default:
+               netif_err(adapter, drv, adapter->netdev,
+                         "Invalid aenq notification link state %d\n",
+                         aenq_e->aenq_common_desc.syndrom);
+       }
+}
+
+/* This handler will called for unknown event group or unimplemented handlers*/
+static void unimplemented_aenq_handler(void *data,
+                                      struct ena_admin_aenq_entry *aenq_e)
+{
+       struct ena_adapter *adapter = (struct ena_adapter *)data;
+
+       netif_err(adapter, drv, adapter->netdev,
+                 "Unknown event was received or event with unimplemented handler\n");
+}
+
+static struct ena_aenq_handlers aenq_handlers = {
+       .handlers = {
+               [ENA_ADMIN_LINK_CHANGE] = ena_update_on_link_change,
+               [ENA_ADMIN_NOTIFICATION] = ena_notification,
+               [ENA_ADMIN_KEEP_ALIVE] = ena_keep_alive_wd,
+       },
+       .unimplemented_handler = unimplemented_aenq_handler
+};
+
+module_init(ena_init);
+module_exit(ena_cleanup);
diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.h b/drivers/net/ethernet/amazon/ena/ena_netdev.h
new file mode 100644 (file)
index 0000000..69d7e9e
--- /dev/null
@@ -0,0 +1,324 @@
+/*
+ * Copyright 2015 Amazon.com, Inc. or its affiliates.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef ENA_H
+#define ENA_H
+
+#include <linux/bitops.h>
+#include <linux/etherdevice.h>
+#include <linux/inetdevice.h>
+#include <linux/interrupt.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+
+#include "ena_com.h"
+#include "ena_eth_com.h"
+
+#define DRV_MODULE_VER_MAJOR   1
+#define DRV_MODULE_VER_MINOR   0
+#define DRV_MODULE_VER_SUBMINOR 2
+
+#define DRV_MODULE_NAME                "ena"
+#ifndef DRV_MODULE_VERSION
+#define DRV_MODULE_VERSION \
+       __stringify(DRV_MODULE_VER_MAJOR) "."   \
+       __stringify(DRV_MODULE_VER_MINOR) "."   \
+       __stringify(DRV_MODULE_VER_SUBMINOR)
+#endif
+
+#define DEVICE_NAME    "Elastic Network Adapter (ENA)"
+
+/* 1 for AENQ + ADMIN */
+#define ENA_MAX_MSIX_VEC(io_queues)    (1 + (io_queues))
+
+#define ENA_REG_BAR                    0
+#define ENA_MEM_BAR                    2
+#define ENA_BAR_MASK (BIT(ENA_REG_BAR) | BIT(ENA_MEM_BAR))
+
+#define ENA_DEFAULT_RING_SIZE  (1024)
+
+#define ENA_TX_WAKEUP_THRESH           (MAX_SKB_FRAGS + 2)
+#define ENA_DEFAULT_RX_COPYBREAK       (128 - NET_IP_ALIGN)
+
+/* limit the buffer size to 600 bytes to handle MTU changes from very
+ * small to very large, in which case the number of buffers per packet
+ * could exceed ENA_PKT_MAX_BUFS
+ */
+#define ENA_DEFAULT_MIN_RX_BUFF_ALLOC_SIZE 600
+
+#define ENA_MIN_MTU            128
+
+#define ENA_NAME_MAX_LEN       20
+#define ENA_IRQNAME_SIZE       40
+
+#define ENA_PKT_MAX_BUFS       19
+
+#define ENA_RX_RSS_TABLE_LOG_SIZE  7
+#define ENA_RX_RSS_TABLE_SIZE  (1 << ENA_RX_RSS_TABLE_LOG_SIZE)
+
+#define ENA_HASH_KEY_SIZE      40
+
+/* The number of tx packet completions that will be handled each NAPI poll
+ * cycle is ring_size / ENA_TX_POLL_BUDGET_DIVIDER.
+ */
+#define ENA_TX_POLL_BUDGET_DIVIDER     4
+
+/* Refill Rx queue when number of available descriptors is below
+ * QUEUE_SIZE / ENA_RX_REFILL_THRESH_DIVIDER
+ */
+#define ENA_RX_REFILL_THRESH_DIVIDER   8
+
+/* Number of queues to check for missing queues per timer service */
+#define ENA_MONITORED_TX_QUEUES        4
+/* Max timeout packets before device reset */
+#define MAX_NUM_OF_TIMEOUTED_PACKETS 32
+
+#define ENA_TX_RING_IDX_NEXT(idx, ring_size) (((idx) + 1) & ((ring_size) - 1))
+
+#define ENA_RX_RING_IDX_NEXT(idx, ring_size) (((idx) + 1) & ((ring_size) - 1))
+#define ENA_RX_RING_IDX_ADD(idx, n, ring_size) \
+       (((idx) + (n)) & ((ring_size) - 1))
+
+#define ENA_IO_TXQ_IDX(q)      (2 * (q))
+#define ENA_IO_RXQ_IDX(q)      (2 * (q) + 1)
+
+#define ENA_MGMNT_IRQ_IDX              0
+#define ENA_IO_IRQ_FIRST_IDX           1
+#define ENA_IO_IRQ_IDX(q)              (ENA_IO_IRQ_FIRST_IDX + (q))
+
+/* ENA device should send keep alive msg every 1 sec.
+ * We wait for 3 sec just to be on the safe side.
+ */
+#define ENA_DEVICE_KALIVE_TIMEOUT      (3 * HZ)
+
+#define ENA_MMIO_DISABLE_REG_READ      BIT(0)
+
+struct ena_irq {
+       irq_handler_t handler;
+       void *data;
+       int cpu;
+       u32 vector;
+       cpumask_t affinity_hint_mask;
+       char name[ENA_IRQNAME_SIZE];
+};
+
+struct ena_napi {
+       struct napi_struct napi ____cacheline_aligned;
+       struct ena_ring *tx_ring;
+       struct ena_ring *rx_ring;
+       u32 qid;
+};
+
+struct ena_tx_buffer {
+       struct sk_buff *skb;
+       /* num of ena desc for this specific skb
+        * (includes data desc and metadata desc)
+        */
+       u32 tx_descs;
+       /* num of buffers used by this skb */
+       u32 num_of_bufs;
+       /* Save the last jiffies to detect missing tx packets */
+       unsigned long last_jiffies;
+       struct ena_com_buf bufs[ENA_PKT_MAX_BUFS];
+} ____cacheline_aligned;
+
+struct ena_rx_buffer {
+       struct sk_buff *skb;
+       struct page *page;
+       u32 page_offset;
+       struct ena_com_buf ena_buf;
+} ____cacheline_aligned;
+
+struct ena_stats_tx {
+       u64 cnt;
+       u64 bytes;
+       u64 queue_stop;
+       u64 prepare_ctx_err;
+       u64 queue_wakeup;
+       u64 dma_mapping_err;
+       u64 linearize;
+       u64 linearize_failed;
+       u64 napi_comp;
+       u64 tx_poll;
+       u64 doorbells;
+       u64 missing_tx_comp;
+       u64 bad_req_id;
+};
+
+struct ena_stats_rx {
+       u64 cnt;
+       u64 bytes;
+       u64 refil_partial;
+       u64 bad_csum;
+       u64 page_alloc_fail;
+       u64 skb_alloc_fail;
+       u64 dma_mapping_err;
+       u64 bad_desc_num;
+       u64 rx_copybreak_pkt;
+};
+
+struct ena_ring {
+       /* Holds the empty requests for TX out of order completions */
+       u16 *free_tx_ids;
+       union {
+               struct ena_tx_buffer *tx_buffer_info;
+               struct ena_rx_buffer *rx_buffer_info;
+       };
+
+       /* cache ptr to avoid using the adapter */
+       struct device *dev;
+       struct pci_dev *pdev;
+       struct napi_struct *napi;
+       struct net_device *netdev;
+       struct ena_com_dev *ena_dev;
+       struct ena_adapter *adapter;
+       struct ena_com_io_cq *ena_com_io_cq;
+       struct ena_com_io_sq *ena_com_io_sq;
+
+       u16 next_to_use;
+       u16 next_to_clean;
+       u16 rx_copybreak;
+       u16 qid;
+       u16 mtu;
+       u16 sgl_size;
+
+       /* The maximum header length the device can handle */
+       u8 tx_max_header_size;
+
+       /* cpu for TPH */
+       int cpu;
+        /* number of tx/rx_buffer_info's entries */
+       int ring_size;
+
+       enum ena_admin_placement_policy_type tx_mem_queue_type;
+
+       struct ena_com_rx_buf_info ena_bufs[ENA_PKT_MAX_BUFS];
+       u32  smoothed_interval;
+       u32  per_napi_packets;
+       u32  per_napi_bytes;
+       enum ena_intr_moder_level moder_tbl_idx;
+       struct u64_stats_sync syncp;
+       union {
+               struct ena_stats_tx tx_stats;
+               struct ena_stats_rx rx_stats;
+       };
+} ____cacheline_aligned;
+
+struct ena_stats_dev {
+       u64 tx_timeout;
+       u64 io_suspend;
+       u64 io_resume;
+       u64 wd_expired;
+       u64 interface_up;
+       u64 interface_down;
+       u64 admin_q_pause;
+};
+
+enum ena_flags_t {
+       ENA_FLAG_DEVICE_RUNNING,
+       ENA_FLAG_DEV_UP,
+       ENA_FLAG_LINK_UP,
+       ENA_FLAG_MSIX_ENABLED,
+       ENA_FLAG_TRIGGER_RESET
+};
+
+/* adapter specific private data structure */
+struct ena_adapter {
+       struct ena_com_dev *ena_dev;
+       /* OS defined structs */
+       struct net_device *netdev;
+       struct pci_dev *pdev;
+
+       /* rx packets that shorter that this len will be copied to the skb
+        * header
+        */
+       u32 rx_copybreak;
+       u32 max_mtu;
+
+       int num_queues;
+
+       struct msix_entry *msix_entries;
+       int msix_vecs;
+
+       u32 tx_usecs, rx_usecs; /* interrupt moderation */
+       u32 tx_frames, rx_frames; /* interrupt moderation */
+
+       u32 tx_ring_size;
+       u32 rx_ring_size;
+
+       u32 msg_enable;
+
+       u16 max_tx_sgl_size;
+       u16 max_rx_sgl_size;
+
+       u8 mac_addr[ETH_ALEN];
+
+       char name[ENA_NAME_MAX_LEN];
+
+       unsigned long flags;
+       /* TX */
+       struct ena_ring tx_ring[ENA_MAX_NUM_IO_QUEUES]
+               ____cacheline_aligned_in_smp;
+
+       /* RX */
+       struct ena_ring rx_ring[ENA_MAX_NUM_IO_QUEUES]
+               ____cacheline_aligned_in_smp;
+
+       struct ena_napi ena_napi[ENA_MAX_NUM_IO_QUEUES];
+
+       struct ena_irq irq_tbl[ENA_MAX_MSIX_VEC(ENA_MAX_NUM_IO_QUEUES)];
+
+       /* timer service */
+       struct work_struct reset_task;
+       struct work_struct suspend_io_task;
+       struct work_struct resume_io_task;
+       struct timer_list timer_service;
+
+       bool wd_state;
+       unsigned long last_keep_alive_jiffies;
+
+       struct u64_stats_sync syncp;
+       struct ena_stats_dev dev_stats;
+
+       /* last queue index that was checked for uncompleted tx packets */
+       u32 last_monitored_tx_qid;
+};
+
+void ena_set_ethtool_ops(struct net_device *netdev);
+
+void ena_dump_stats_to_dmesg(struct ena_adapter *adapter);
+
+void ena_dump_stats_to_buf(struct ena_adapter *adapter, u8 *buf);
+
+int ena_get_sset_count(struct net_device *netdev, int sset);
+
+#endif /* !(ENA_H) */
diff --git a/drivers/net/ethernet/amazon/ena/ena_pci_id_tbl.h b/drivers/net/ethernet/amazon/ena/ena_pci_id_tbl.h
new file mode 100644 (file)
index 0000000..f80d2a4
--- /dev/null
@@ -0,0 +1,67 @@
+/*
+ * Copyright 2015 Amazon.com, Inc. or its affiliates.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef ENA_PCI_ID_TBL_H_
+#define ENA_PCI_ID_TBL_H_
+
+#ifndef PCI_VENDOR_ID_AMAZON
+#define PCI_VENDOR_ID_AMAZON 0x1d0f
+#endif
+
+#ifndef PCI_DEV_ID_ENA_PF
+#define PCI_DEV_ID_ENA_PF      0x0ec2
+#endif
+
+#ifndef PCI_DEV_ID_ENA_LLQ_PF
+#define PCI_DEV_ID_ENA_LLQ_PF  0x1ec2
+#endif
+
+#ifndef PCI_DEV_ID_ENA_VF
+#define PCI_DEV_ID_ENA_VF      0xec20
+#endif
+
+#ifndef PCI_DEV_ID_ENA_LLQ_VF
+#define PCI_DEV_ID_ENA_LLQ_VF  0xec21
+#endif
+
+#define ENA_PCI_ID_TABLE_ENTRY(devid) \
+       {PCI_DEVICE(PCI_VENDOR_ID_AMAZON, devid)},
+
+static const struct pci_device_id ena_pci_tbl[] = {
+       ENA_PCI_ID_TABLE_ENTRY(PCI_DEV_ID_ENA_PF)
+       ENA_PCI_ID_TABLE_ENTRY(PCI_DEV_ID_ENA_LLQ_PF)
+       ENA_PCI_ID_TABLE_ENTRY(PCI_DEV_ID_ENA_VF)
+       ENA_PCI_ID_TABLE_ENTRY(PCI_DEV_ID_ENA_LLQ_VF)
+       { }
+};
+
+#endif /* ENA_PCI_ID_TBL_H_ */
diff --git a/drivers/net/ethernet/amazon/ena/ena_regs_defs.h b/drivers/net/ethernet/amazon/ena/ena_regs_defs.h
new file mode 100644 (file)
index 0000000..26097a2
--- /dev/null
@@ -0,0 +1,133 @@
+/*
+ * Copyright 2015 - 2016 Amazon.com, Inc. or its affiliates.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef _ENA_REGS_H_
+#define _ENA_REGS_H_
+
+/* ena_registers offsets */
+#define ENA_REGS_VERSION_OFF           0x0
+#define ENA_REGS_CONTROLLER_VERSION_OFF                0x4
+#define ENA_REGS_CAPS_OFF              0x8
+#define ENA_REGS_CAPS_EXT_OFF          0xc
+#define ENA_REGS_AQ_BASE_LO_OFF                0x10
+#define ENA_REGS_AQ_BASE_HI_OFF                0x14
+#define ENA_REGS_AQ_CAPS_OFF           0x18
+#define ENA_REGS_ACQ_BASE_LO_OFF               0x20
+#define ENA_REGS_ACQ_BASE_HI_OFF               0x24
+#define ENA_REGS_ACQ_CAPS_OFF          0x28
+#define ENA_REGS_AQ_DB_OFF             0x2c
+#define ENA_REGS_ACQ_TAIL_OFF          0x30
+#define ENA_REGS_AENQ_CAPS_OFF         0x34
+#define ENA_REGS_AENQ_BASE_LO_OFF              0x38
+#define ENA_REGS_AENQ_BASE_HI_OFF              0x3c
+#define ENA_REGS_AENQ_HEAD_DB_OFF              0x40
+#define ENA_REGS_AENQ_TAIL_OFF         0x44
+#define ENA_REGS_INTR_MASK_OFF         0x4c
+#define ENA_REGS_DEV_CTL_OFF           0x54
+#define ENA_REGS_DEV_STS_OFF           0x58
+#define ENA_REGS_MMIO_REG_READ_OFF             0x5c
+#define ENA_REGS_MMIO_RESP_LO_OFF              0x60
+#define ENA_REGS_MMIO_RESP_HI_OFF              0x64
+#define ENA_REGS_RSS_IND_ENTRY_UPDATE_OFF              0x68
+
+/* version register */
+#define ENA_REGS_VERSION_MINOR_VERSION_MASK            0xff
+#define ENA_REGS_VERSION_MAJOR_VERSION_SHIFT           8
+#define ENA_REGS_VERSION_MAJOR_VERSION_MASK            0xff00
+
+/* controller_version register */
+#define ENA_REGS_CONTROLLER_VERSION_SUBMINOR_VERSION_MASK              0xff
+#define ENA_REGS_CONTROLLER_VERSION_MINOR_VERSION_SHIFT                8
+#define ENA_REGS_CONTROLLER_VERSION_MINOR_VERSION_MASK         0xff00
+#define ENA_REGS_CONTROLLER_VERSION_MAJOR_VERSION_SHIFT                16
+#define ENA_REGS_CONTROLLER_VERSION_MAJOR_VERSION_MASK         0xff0000
+#define ENA_REGS_CONTROLLER_VERSION_IMPL_ID_SHIFT              24
+#define ENA_REGS_CONTROLLER_VERSION_IMPL_ID_MASK               0xff000000
+
+/* caps register */
+#define ENA_REGS_CAPS_CONTIGUOUS_QUEUE_REQUIRED_MASK           0x1
+#define ENA_REGS_CAPS_RESET_TIMEOUT_SHIFT              1
+#define ENA_REGS_CAPS_RESET_TIMEOUT_MASK               0x3e
+#define ENA_REGS_CAPS_DMA_ADDR_WIDTH_SHIFT             8
+#define ENA_REGS_CAPS_DMA_ADDR_WIDTH_MASK              0xff00
+
+/* aq_caps register */
+#define ENA_REGS_AQ_CAPS_AQ_DEPTH_MASK         0xffff
+#define ENA_REGS_AQ_CAPS_AQ_ENTRY_SIZE_SHIFT           16
+#define ENA_REGS_AQ_CAPS_AQ_ENTRY_SIZE_MASK            0xffff0000
+
+/* acq_caps register */
+#define ENA_REGS_ACQ_CAPS_ACQ_DEPTH_MASK               0xffff
+#define ENA_REGS_ACQ_CAPS_ACQ_ENTRY_SIZE_SHIFT         16
+#define ENA_REGS_ACQ_CAPS_ACQ_ENTRY_SIZE_MASK          0xffff0000
+
+/* aenq_caps register */
+#define ENA_REGS_AENQ_CAPS_AENQ_DEPTH_MASK             0xffff
+#define ENA_REGS_AENQ_CAPS_AENQ_ENTRY_SIZE_SHIFT               16
+#define ENA_REGS_AENQ_CAPS_AENQ_ENTRY_SIZE_MASK                0xffff0000
+
+/* dev_ctl register */
+#define ENA_REGS_DEV_CTL_DEV_RESET_MASK                0x1
+#define ENA_REGS_DEV_CTL_AQ_RESTART_SHIFT              1
+#define ENA_REGS_DEV_CTL_AQ_RESTART_MASK               0x2
+#define ENA_REGS_DEV_CTL_QUIESCENT_SHIFT               2
+#define ENA_REGS_DEV_CTL_QUIESCENT_MASK                0x4
+#define ENA_REGS_DEV_CTL_IO_RESUME_SHIFT               3
+#define ENA_REGS_DEV_CTL_IO_RESUME_MASK                0x8
+
+/* dev_sts register */
+#define ENA_REGS_DEV_STS_READY_MASK            0x1
+#define ENA_REGS_DEV_STS_AQ_RESTART_IN_PROGRESS_SHIFT          1
+#define ENA_REGS_DEV_STS_AQ_RESTART_IN_PROGRESS_MASK           0x2
+#define ENA_REGS_DEV_STS_AQ_RESTART_FINISHED_SHIFT             2
+#define ENA_REGS_DEV_STS_AQ_RESTART_FINISHED_MASK              0x4
+#define ENA_REGS_DEV_STS_RESET_IN_PROGRESS_SHIFT               3
+#define ENA_REGS_DEV_STS_RESET_IN_PROGRESS_MASK                0x8
+#define ENA_REGS_DEV_STS_RESET_FINISHED_SHIFT          4
+#define ENA_REGS_DEV_STS_RESET_FINISHED_MASK           0x10
+#define ENA_REGS_DEV_STS_FATAL_ERROR_SHIFT             5
+#define ENA_REGS_DEV_STS_FATAL_ERROR_MASK              0x20
+#define ENA_REGS_DEV_STS_QUIESCENT_STATE_IN_PROGRESS_SHIFT             6
+#define ENA_REGS_DEV_STS_QUIESCENT_STATE_IN_PROGRESS_MASK              0x40
+#define ENA_REGS_DEV_STS_QUIESCENT_STATE_ACHIEVED_SHIFT                7
+#define ENA_REGS_DEV_STS_QUIESCENT_STATE_ACHIEVED_MASK         0x80
+
+/* mmio_reg_read register */
+#define ENA_REGS_MMIO_REG_READ_REQ_ID_MASK             0xffff
+#define ENA_REGS_MMIO_REG_READ_REG_OFF_SHIFT           16
+#define ENA_REGS_MMIO_REG_READ_REG_OFF_MASK            0xffff0000
+
+/* rss_ind_entry_update register */
+#define ENA_REGS_RSS_IND_ENTRY_UPDATE_INDEX_MASK               0xffff
+#define ENA_REGS_RSS_IND_ENTRY_UPDATE_CQ_IDX_SHIFT             16
+#define ENA_REGS_RSS_IND_ENTRY_UPDATE_CQ_IDX_MASK              0xffff0000
+
+#endif /*_ENA_REGS_H_ */
index 300e3b5..afccb03 100644 (file)
@@ -4,6 +4,7 @@ config NET_XGENE
        depends on ARCH_XGENE || COMPILE_TEST
        select PHYLIB
        select MDIO_XGENE
+       select GPIOLIB
        help
          This is the Ethernet driver for the on-chip ethernet interface on the
          APM X-Gene SoC.
index 472c0fb..23d72af 100644 (file)
@@ -32,12 +32,19 @@ static void xgene_cle_sband_to_hw(u8 frag, enum xgene_cle_prot_version ver,
                SET_VAL(SB_HDRLEN, len);
 }
 
-static void xgene_cle_idt_to_hw(u32 dstqid, u32 fpsel,
+static void xgene_cle_idt_to_hw(struct xgene_enet_pdata *pdata,
+                               u32 dstqid, u32 fpsel,
                                u32 nfpsel, u32 *idt_reg)
 {
-       *idt_reg =  SET_VAL(IDT_DSTQID, dstqid) |
-                   SET_VAL(IDT_FPSEL, fpsel) |
-                   SET_VAL(IDT_NFPSEL, nfpsel);
+       if (pdata->enet_id == XGENE_ENET1) {
+               *idt_reg = SET_VAL(IDT_DSTQID, dstqid) |
+                          SET_VAL(IDT_FPSEL1, fpsel)  |
+                          SET_VAL(IDT_NFPSEL1, nfpsel);
+       } else {
+               *idt_reg = SET_VAL(IDT_DSTQID, dstqid) |
+                          SET_VAL(IDT_FPSEL, fpsel)   |
+                          SET_VAL(IDT_NFPSEL, nfpsel);
+       }
 }
 
 static void xgene_cle_dbptr_to_hw(struct xgene_enet_pdata *pdata,
@@ -344,7 +351,7 @@ static int xgene_cle_set_rss_idt(struct xgene_enet_pdata *pdata)
                nfpsel = 0;
                idt_reg = 0;
 
-               xgene_cle_idt_to_hw(dstqid, fpsel, nfpsel, &idt_reg);
+               xgene_cle_idt_to_hw(pdata, dstqid, fpsel, nfpsel, &idt_reg);
                ret = xgene_cle_dram_wr(&pdata->cle, &idt_reg, 1, i,
                                        RSS_IDT, CLE_CMD_WR);
                if (ret)
index 33c5f6b..9ac9f8e 100644 (file)
@@ -196,9 +196,13 @@ enum xgene_cle_ptree_dbptrs {
 #define IDT_DSTQID_POS         0
 #define IDT_DSTQID_LEN         12
 #define IDT_FPSEL_POS          12
-#define IDT_FPSEL_LEN          4
-#define IDT_NFPSEL_POS         16
-#define IDT_NFPSEL_LEN         4
+#define IDT_FPSEL_LEN          5
+#define IDT_NFPSEL_POS         17
+#define IDT_NFPSEL_LEN         5
+#define IDT_FPSEL1_POS         12
+#define IDT_FPSEL1_LEN         4
+#define IDT_NFPSEL1_POS                16
+#define IDT_NFPSEL1_LEN                4
 
 struct xgene_cle_ptree_branch {
        bool valid;
index 18bb955..321fb19 100644 (file)
@@ -761,18 +761,18 @@ int xgene_enet_phy_connect(struct net_device *ndev)
        if (dev->of_node) {
                for (i = 0 ; i < 2; i++) {
                        np = of_parse_phandle(dev->of_node, "phy-handle", i);
-                       if (np)
-                               break;
-               }
 
-               if (!np) {
-                       netdev_dbg(ndev, "No phy-handle found in DT\n");
-                       return -ENODEV;
+                       if (!np)
+                               continue;
+
+                       phy_dev = of_phy_connect(ndev, np,
+                                                &xgene_enet_adjust_link,
+                                                0, pdata->phy_mode);
+                       of_node_put(np);
+                       if (phy_dev)
+                               break;
                }
 
-               phy_dev = of_phy_connect(ndev, np, &xgene_enet_adjust_link,
-                                        0, pdata->phy_mode);
-               of_node_put(np);
                if (!phy_dev) {
                        netdev_err(ndev, "Could not connect to PHY\n");
                        return -ENODEV;
index 179a44d..8a8d055 100644 (file)
@@ -124,6 +124,12 @@ enum xgene_enet_rm {
 #define MAC_READ_REG_OFFSET            0x0c
 #define MAC_COMMAND_DONE_REG_OFFSET    0x10
 
+#define PCS_ADDR_REG_OFFSET            0x00
+#define PCS_COMMAND_REG_OFFSET         0x04
+#define PCS_WRITE_REG_OFFSET           0x08
+#define PCS_READ_REG_OFFSET            0x0c
+#define PCS_COMMAND_DONE_REG_OFFSET    0x10
+
 #define MII_MGMT_CONFIG_ADDR           0x20
 #define MII_MGMT_COMMAND_ADDR          0x24
 #define MII_MGMT_ADDRESS_ADDR          0x28
index d1d6b5e..b8b9495 100644 (file)
@@ -19,6 +19,7 @@
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
+#include <linux/gpio.h>
 #include "xgene_enet_main.h"
 #include "xgene_enet_hw.h"
 #include "xgene_enet_sgmac.h"
@@ -72,7 +73,6 @@ static int xgene_enet_refill_bufpool(struct xgene_enet_desc_ring *buf_pool,
                skb = netdev_alloc_skb_ip_align(ndev, len);
                if (unlikely(!skb))
                        return -ENOMEM;
-               buf_pool->rx_skb[tail] = skb;
 
                dma_addr = dma_map_single(dev, skb->data, len, DMA_FROM_DEVICE);
                if (dma_mapping_error(dev, dma_addr)) {
@@ -81,6 +81,8 @@ static int xgene_enet_refill_bufpool(struct xgene_enet_desc_ring *buf_pool,
                        return -EINVAL;
                }
 
+               buf_pool->rx_skb[tail] = skb;
+
                raw_desc->m1 = cpu_to_le64(SET_VAL(DATAADDR, dma_addr) |
                                           SET_VAL(BUFDATALEN, bufdatalen) |
                                           SET_BIT(COHERENT));
@@ -102,12 +104,21 @@ static u8 xgene_enet_hdr_len(const void *data)
 
 static void xgene_enet_delete_bufpool(struct xgene_enet_desc_ring *buf_pool)
 {
+       struct device *dev = ndev_to_dev(buf_pool->ndev);
+       struct xgene_enet_raw_desc16 *raw_desc;
+       dma_addr_t dma_addr;
        int i;
 
        /* Free up the buffers held by hardware */
        for (i = 0; i < buf_pool->slots; i++) {
-               if (buf_pool->rx_skb[i])
+               if (buf_pool->rx_skb[i]) {
                        dev_kfree_skb_any(buf_pool->rx_skb[i]);
+
+                       raw_desc = &buf_pool->raw_desc16[i];
+                       dma_addr = GET_VAL(DATAADDR, le64_to_cpu(raw_desc->m1));
+                       dma_unmap_single(dev, dma_addr, XGENE_ENET_MAX_MTU,
+                                        DMA_FROM_DEVICE);
+               }
        }
 }
 
@@ -452,7 +463,6 @@ static int xgene_enet_rx_frame(struct xgene_enet_desc_ring *rx_ring,
                               struct xgene_enet_raw_desc *raw_desc)
 {
        struct net_device *ndev;
-       struct xgene_enet_pdata *pdata;
        struct device *dev;
        struct xgene_enet_desc_ring *buf_pool;
        u32 datalen, skb_index;
@@ -461,7 +471,6 @@ static int xgene_enet_rx_frame(struct xgene_enet_desc_ring *rx_ring,
        int ret = 0;
 
        ndev = rx_ring->ndev;
-       pdata = netdev_priv(ndev);
        dev = ndev_to_dev(rx_ring->ndev);
        buf_pool = rx_ring->buf_pool;
 
@@ -1312,6 +1321,18 @@ static int xgene_enet_check_phy_handle(struct xgene_enet_pdata *pdata)
        return 0;
 }
 
+static void xgene_enet_gpiod_get(struct xgene_enet_pdata *pdata)
+{
+       struct device *dev = &pdata->pdev->dev;
+
+       if (pdata->phy_mode != PHY_INTERFACE_MODE_XGMII)
+               return;
+
+       pdata->sfp_rdy = gpiod_get(dev, "rxlos", GPIOD_IN);
+       if (IS_ERR(pdata->sfp_rdy))
+               pdata->sfp_rdy = gpiod_get(dev, "sfp", GPIOD_IN);
+}
+
 static int xgene_enet_get_resources(struct xgene_enet_pdata *pdata)
 {
        struct platform_device *pdev;
@@ -1401,6 +1422,8 @@ static int xgene_enet_get_resources(struct xgene_enet_pdata *pdata)
        if (ret)
                return ret;
 
+       xgene_enet_gpiod_get(pdata);
+
        pdata->clk = devm_clk_get(&pdev->dev, NULL);
        if (IS_ERR(pdata->clk)) {
                /* Firmware may have set up the clock already. */
@@ -1425,6 +1448,7 @@ static int xgene_enet_get_resources(struct xgene_enet_pdata *pdata)
        } else {
                pdata->mcx_mac_addr = base_addr + BLOCK_AXG_MAC_OFFSET;
                pdata->mcx_mac_csr_addr = base_addr + BLOCK_AXG_MAC_CSR_OFFSET;
+               pdata->pcs_addr = base_addr + BLOCK_PCS_OFFSET;
        }
        pdata->rx_buff_cnt = NUM_PKT_BUF;
 
@@ -1454,10 +1478,8 @@ static int xgene_enet_init_hw(struct xgene_enet_pdata *pdata)
                buf_pool = pdata->rx_ring[i]->buf_pool;
                xgene_enet_init_bufpool(buf_pool);
                ret = xgene_enet_refill_bufpool(buf_pool, pdata->rx_buff_cnt);
-               if (ret) {
-                       xgene_enet_delete_desc_rings(pdata);
-                       return ret;
-               }
+               if (ret)
+                       goto err;
        }
 
        dst_ring_num = xgene_enet_dst_ring_num(pdata->rx_ring[0]);
@@ -1474,7 +1496,7 @@ static int xgene_enet_init_hw(struct xgene_enet_pdata *pdata)
                ret = pdata->cle_ops->cle_init(pdata);
                if (ret) {
                        netdev_err(ndev, "Preclass Tree init error\n");
-                       return ret;
+                       goto err;
                }
        } else {
                pdata->port_ops->cle_bypass(pdata, dst_ring_num, buf_pool->id);
@@ -1484,6 +1506,10 @@ static int xgene_enet_init_hw(struct xgene_enet_pdata *pdata)
        pdata->mac_ops->init(pdata);
 
        return ret;
+
+err:
+       xgene_enet_delete_desc_rings(pdata);
+       return ret;
 }
 
 static void xgene_enet_setup_ops(struct xgene_enet_pdata *pdata)
@@ -1631,8 +1657,8 @@ static int xgene_enet_probe(struct platform_device *pdev)
        }
 #endif
        if (!pdata->enet_id) {
-               free_netdev(ndev);
-               return -ENODEV;
+               ret = -ENODEV;
+               goto err;
        }
 
        ret = xgene_enet_get_resources(pdata);
@@ -1655,7 +1681,7 @@ static int xgene_enet_probe(struct platform_device *pdev)
 
        ret = xgene_enet_init_hw(pdata);
        if (ret)
-               goto err_netdev;
+               goto err;
 
        link_state = pdata->mac_ops->link_state;
        if (pdata->phy_mode == PHY_INTERFACE_MODE_XGMII) {
@@ -1665,21 +1691,32 @@ static int xgene_enet_probe(struct platform_device *pdev)
                        ret = xgene_enet_mdio_config(pdata);
                else
                        INIT_DELAYED_WORK(&pdata->link_work, link_state);
+
+               if (ret)
+                       goto err1;
        }
-       if (ret)
-               goto err;
 
        xgene_enet_napi_add(pdata);
        ret = register_netdev(ndev);
        if (ret) {
                netdev_err(ndev, "Failed to register netdev\n");
-               goto err;
+               goto err2;
        }
 
        return 0;
 
-err_netdev:
-       unregister_netdev(ndev);
+err2:
+       /*
+        * If necessary, free_netdev() will call netif_napi_del() and undo
+        * the effects of xgene_enet_napi_add()'s calls to netif_napi_add().
+        */
+
+       if (pdata->mdio_driver)
+               xgene_enet_phy_disconnect(pdata);
+       else if (pdata->phy_mode == PHY_INTERFACE_MODE_RGMII)
+               xgene_enet_mdio_remove(pdata);
+err1:
+       xgene_enet_delete_desc_rings(pdata);
 err:
        free_netdev(ndev);
        return ret;
@@ -1688,11 +1725,9 @@ err:
 static int xgene_enet_remove(struct platform_device *pdev)
 {
        struct xgene_enet_pdata *pdata;
-       const struct xgene_mac_ops *mac_ops;
        struct net_device *ndev;
 
        pdata = platform_get_drvdata(pdev);
-       mac_ops = pdata->mac_ops;
        ndev = pdata->ndev;
 
        rtnl_lock();
index 217546e..b339fc1 100644 (file)
@@ -196,6 +196,7 @@ struct xgene_enet_pdata {
        void __iomem *mcx_mac_addr;
        void __iomem *mcx_mac_csr_addr;
        void __iomem *base_addr;
+       void __iomem *pcs_addr;
        void __iomem *ring_csr_addr;
        void __iomem *ring_cmd_addr;
        int phy_mode;
@@ -216,6 +217,7 @@ struct xgene_enet_pdata {
        u8 tx_delay;
        u8 rx_delay;
        bool mdio_driver;
+       struct gpio_desc *sfp_rdy;
 };
 
 struct xgene_indirect_ctl {
index 9c6ad0d..d672e71 100644 (file)
@@ -18,6 +18,8 @@
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
+#include <linux/of_gpio.h>
+#include <linux/gpio.h>
 #include "xgene_enet_main.h"
 #include "xgene_enet_hw.h"
 #include "xgene_enet_xgmac.h"
@@ -84,6 +86,21 @@ static void xgene_enet_wr_mac(struct xgene_enet_pdata *pdata,
                           wr_addr);
 }
 
+static void xgene_enet_wr_pcs(struct xgene_enet_pdata *pdata,
+                             u32 wr_addr, u32 wr_data)
+{
+       void __iomem *addr, *wr, *cmd, *cmd_done;
+
+       addr = pdata->pcs_addr + PCS_ADDR_REG_OFFSET;
+       wr = pdata->pcs_addr + PCS_WRITE_REG_OFFSET;
+       cmd = pdata->pcs_addr + PCS_COMMAND_REG_OFFSET;
+       cmd_done = pdata->pcs_addr + PCS_COMMAND_DONE_REG_OFFSET;
+
+       if (!xgene_enet_wr_indirect(addr, wr, cmd, cmd_done, wr_addr, wr_data))
+               netdev_err(pdata->ndev, "PCS write failed, addr: %04x\n",
+                          wr_addr);
+}
+
 static void xgene_enet_rd_csr(struct xgene_enet_pdata *pdata,
                              u32 offset, u32 *val)
 {
@@ -122,6 +139,7 @@ static bool xgene_enet_rd_indirect(void __iomem *addr, void __iomem *rd,
 
        return true;
 }
+
 static void xgene_enet_rd_mac(struct xgene_enet_pdata *pdata,
                              u32 rd_addr, u32 *rd_data)
 {
@@ -137,6 +155,21 @@ static void xgene_enet_rd_mac(struct xgene_enet_pdata *pdata,
                           rd_addr);
 }
 
+static void xgene_enet_rd_pcs(struct xgene_enet_pdata *pdata,
+                             u32 rd_addr, u32 *rd_data)
+{
+       void __iomem *addr, *rd, *cmd, *cmd_done;
+
+       addr = pdata->pcs_addr + PCS_ADDR_REG_OFFSET;
+       rd = pdata->pcs_addr + PCS_READ_REG_OFFSET;
+       cmd = pdata->pcs_addr + PCS_COMMAND_REG_OFFSET;
+       cmd_done = pdata->pcs_addr + PCS_COMMAND_DONE_REG_OFFSET;
+
+       if (!xgene_enet_rd_indirect(addr, rd, cmd, cmd_done, rd_addr, rd_data))
+               netdev_err(pdata->ndev, "PCS read failed, addr: %04x\n",
+                          rd_addr);
+}
+
 static int xgene_enet_ecc_init(struct xgene_enet_pdata *pdata)
 {
        struct net_device *ndev = pdata->ndev;
@@ -171,6 +204,15 @@ static void xgene_xgmac_reset(struct xgene_enet_pdata *pdata)
        xgene_enet_wr_mac(pdata, AXGMAC_CONFIG_0, 0);
 }
 
+static void xgene_pcs_reset(struct xgene_enet_pdata *pdata)
+{
+       u32 data;
+
+       xgene_enet_rd_pcs(pdata, PCS_CONTROL_1, &data);
+       xgene_enet_wr_pcs(pdata, PCS_CONTROL_1, data | PCS_CTRL_PCS_RST);
+       xgene_enet_wr_pcs(pdata, PCS_CONTROL_1, data & ~PCS_CTRL_PCS_RST);
+}
+
 static void xgene_xgmac_set_mac_addr(struct xgene_enet_pdata *pdata)
 {
        u32 addr0, addr1;
@@ -216,12 +258,12 @@ static void xgene_xgmac_init(struct xgene_enet_pdata *pdata)
        data |= CFG_RSIF_FPBUFF_TIMEOUT_EN;
        xgene_enet_wr_csr(pdata, XG_RSIF_CONFIG_REG_ADDR, data);
 
-       xgene_enet_wr_csr(pdata, XG_CFG_BYPASS_ADDR, RESUME_TX);
-       xgene_enet_wr_csr(pdata, XGENET_RX_DV_GATE_REG_0_ADDR, 0);
        xgene_enet_rd_csr(pdata, XG_ENET_SPARE_CFG_REG_ADDR, &data);
        data |= BIT(12);
        xgene_enet_wr_csr(pdata, XG_ENET_SPARE_CFG_REG_ADDR, data);
        xgene_enet_wr_csr(pdata, XG_ENET_SPARE_CFG_REG_1_ADDR, 0x82);
+       xgene_enet_wr_csr(pdata, XGENET_RX_DV_GATE_REG_0_ADDR, 0);
+       xgene_enet_wr_csr(pdata, XG_CFG_BYPASS_ADDR, RESUME_TX);
 }
 
 static void xgene_xgmac_rx_enable(struct xgene_enet_pdata *pdata)
@@ -359,14 +401,17 @@ static void xgene_enet_link_state(struct work_struct *work)
 {
        struct xgene_enet_pdata *pdata = container_of(to_delayed_work(work),
                                         struct xgene_enet_pdata, link_work);
+       struct gpio_desc *sfp_rdy = pdata->sfp_rdy;
        struct net_device *ndev = pdata->ndev;
        u32 link_status, poll_interval;
 
        link_status = xgene_enet_link_status(pdata);
+       if (link_status && !IS_ERR(sfp_rdy) && !gpiod_get_value(sfp_rdy))
+               link_status = 0;
+
        if (link_status) {
                if (!netif_carrier_ok(ndev)) {
                        netif_carrier_on(ndev);
-                       xgene_xgmac_init(pdata);
                        xgene_xgmac_rx_enable(pdata);
                        xgene_xgmac_tx_enable(pdata);
                        netdev_info(ndev, "Link is Up - 10Gbps\n");
@@ -380,6 +425,8 @@ static void xgene_enet_link_state(struct work_struct *work)
                        netdev_info(ndev, "Link is Down\n");
                }
                poll_interval = PHY_POLL_LINK_OFF;
+
+               xgene_pcs_reset(pdata);
        }
 
        schedule_delayed_work(&pdata->link_work, poll_interval);
index f1ea485..360ccbd 100644 (file)
@@ -24,6 +24,7 @@
 #define X2_BLOCK_ETH_MAC_CSR_OFFSET    0x3000
 #define BLOCK_AXG_MAC_OFFSET           0x0800
 #define BLOCK_AXG_MAC_CSR_OFFSET       0x2000
+#define BLOCK_PCS_OFFSET               0x3800
 
 #define XGENET_CONFIG_REG_ADDR         0x20
 #define XGENET_SRST_ADDR               0x00
@@ -72,6 +73,9 @@
 #define XG_MCX_ICM_CONFIG0_REG_0_ADDR  0x00e0
 #define XG_MCX_ICM_CONFIG2_REG_0_ADDR  0x00e8
 
+#define PCS_CONTROL_1                  0x0000
+#define PCS_CTRL_PCS_RST               BIT(15)
+
 extern const struct xgene_mac_ops xgene_xgmac_ops;
 extern const struct xgene_port_ops xgene_xgport_ops;
 
index 771cc26..f9df4b5 100644 (file)
@@ -54,9 +54,7 @@ MODULE_PARM_DESC(bna_debugfs_enable, "Enables debugfs feature, default=1,"
  * Global variables
  */
 static u32 bnad_rxqs_per_cq = 2;
-static u32 bna_id;
-static struct mutex bnad_list_mutex;
-static LIST_HEAD(bnad_list);
+static atomic_t bna_id;
 static const u8 bnad_bcast_addr[] __aligned(2) =
        { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
 
@@ -76,23 +74,6 @@ do {                                                         \
        (_res_info)->res_u.mem_info.len = (_size);              \
 } while (0)
 
-static void
-bnad_add_to_list(struct bnad *bnad)
-{
-       mutex_lock(&bnad_list_mutex);
-       list_add_tail(&bnad->list_entry, &bnad_list);
-       bnad->id = bna_id++;
-       mutex_unlock(&bnad_list_mutex);
-}
-
-static void
-bnad_remove_from_list(struct bnad *bnad)
-{
-       mutex_lock(&bnad_list_mutex);
-       list_del(&bnad->list_entry);
-       mutex_unlock(&bnad_list_mutex);
-}
-
 /*
  * Reinitialize completions in CQ, once Rx is taken down
  */
@@ -3573,14 +3554,12 @@ bnad_lock_init(struct bnad *bnad)
 {
        spin_lock_init(&bnad->bna_lock);
        mutex_init(&bnad->conf_mutex);
-       mutex_init(&bnad_list_mutex);
 }
 
 static void
 bnad_lock_uninit(struct bnad *bnad)
 {
        mutex_destroy(&bnad->conf_mutex);
-       mutex_destroy(&bnad_list_mutex);
 }
 
 /* PCI Initialization */
@@ -3653,7 +3632,7 @@ bnad_pci_probe(struct pci_dev *pdev,
        }
        bnad = netdev_priv(netdev);
        bnad_lock_init(bnad);
-       bnad_add_to_list(bnad);
+       bnad->id = atomic_inc_return(&bna_id) - 1;
 
        mutex_lock(&bnad->conf_mutex);
        /*
@@ -3807,7 +3786,6 @@ pci_uninit:
        bnad_pci_uninit(pdev);
 unlock_mutex:
        mutex_unlock(&bnad->conf_mutex);
-       bnad_remove_from_list(bnad);
        bnad_lock_uninit(bnad);
        free_netdev(netdev);
        return err;
@@ -3845,7 +3823,6 @@ bnad_pci_remove(struct pci_dev *pdev)
        bnad_disable_msix(bnad);
        bnad_pci_uninit(pdev);
        mutex_unlock(&bnad->conf_mutex);
-       bnad_remove_from_list(bnad);
        bnad_lock_uninit(bnad);
        /* Remove the debugfs node for this bnad */
        kfree(bnad->regdata);
index f4ed816..46f7b84 100644 (file)
@@ -288,7 +288,6 @@ struct bnad_rx_unmap_q {
 struct bnad {
        struct net_device       *netdev;
        u32                     id;
-       struct list_head        list_entry;
 
        /* Data path */
        struct bnad_tx_info tx_info[BNAD_MAX_TX];
index 89c0cfa..dbce938 100644 (file)
@@ -541,6 +541,14 @@ static void macb_tx_unmap(struct macb *bp, struct macb_tx_skb *tx_skb)
        }
 }
 
+static inline void macb_set_addr(struct macb_dma_desc *desc, dma_addr_t addr)
+{
+       desc->addr = (u32)addr;
+#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
+       desc->addrh = (u32)(addr >> 32);
+#endif
+}
+
 static void macb_tx_error_task(struct work_struct *work)
 {
        struct macb_queue       *queue = container_of(work, struct macb_queue,
@@ -621,14 +629,17 @@ static void macb_tx_error_task(struct work_struct *work)
 
        /* Set end of TX queue */
        desc = macb_tx_desc(queue, 0);
-       desc->addr = 0;
+       macb_set_addr(desc, 0);
        desc->ctrl = MACB_BIT(TX_USED);
 
        /* Make descriptor updates visible to hardware */
        wmb();
 
        /* Reinitialize the TX desc queue */
-       queue_writel(queue, TBQP, queue->tx_ring_dma);
+       queue_writel(queue, TBQP, (u32)(queue->tx_ring_dma));
+#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
+       queue_writel(queue, TBQPH, (u32)(queue->tx_ring_dma >> 32));
+#endif
        /* Make TX ring reflect state of hardware */
        queue->tx_head = 0;
        queue->tx_tail = 0;
@@ -750,7 +761,7 @@ static void gem_rx_refill(struct macb *bp)
 
                        if (entry == RX_RING_SIZE - 1)
                                paddr |= MACB_BIT(RX_WRAP);
-                       bp->rx_ring[entry].addr = paddr;
+                       macb_set_addr(&(bp->rx_ring[entry]), paddr);
                        bp->rx_ring[entry].ctrl = 0;
 
                        /* properly align Ethernet header */
@@ -798,7 +809,9 @@ static int gem_rx(struct macb *bp, int budget)
        int                     count = 0;
 
        while (count < budget) {
-               u32 addr, ctrl;
+               u32 ctrl;
+               dma_addr_t addr;
+               bool rxused;
 
                entry = macb_rx_ring_wrap(bp->rx_tail);
                desc = &bp->rx_ring[entry];
@@ -806,10 +819,14 @@ static int gem_rx(struct macb *bp, int budget)
                /* Make hw descriptor updates visible to CPU */
                rmb();
 
-               addr = desc->addr;
+               rxused = (desc->addr & MACB_BIT(RX_USED)) ? true : false;
+               addr = MACB_BF(RX_WADDR, MACB_BFEXT(RX_WADDR, desc->addr));
+#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
+               addr |= ((u64)(desc->addrh) << 32);
+#endif
                ctrl = desc->ctrl;
 
-               if (!(addr & MACB_BIT(RX_USED)))
+               if (!rxused)
                        break;
 
                bp->rx_tail++;
@@ -835,7 +852,6 @@ static int gem_rx(struct macb *bp, int budget)
                netdev_vdbg(bp->dev, "gem_rx %u (len %u)\n", entry, len);
 
                skb_put(skb, len);
-               addr = MACB_BF(RX_WADDR, MACB_BFEXT(RX_WADDR, addr));
                dma_unmap_single(&bp->pdev->dev, addr,
                                 bp->rx_buffer_size, DMA_FROM_DEVICE);
 
@@ -1299,7 +1315,7 @@ static unsigned int macb_tx_map(struct macb *bp,
                        ctrl |= MACB_BIT(TX_WRAP);
 
                /* Set TX buffer descriptor */
-               desc->addr = tx_skb->mapping;
+               macb_set_addr(desc, tx_skb->mapping);
                /* desc->addr must be visible to hardware before clearing
                 * 'TX_USED' bit in desc->ctrl.
                 */
@@ -1422,6 +1438,9 @@ static void gem_free_rx_buffers(struct macb *bp)
 
                desc = &bp->rx_ring[i];
                addr = MACB_BF(RX_WADDR, MACB_BFEXT(RX_WADDR, desc->addr));
+#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
+               addr |= ((u64)(desc->addrh) << 32);
+#endif
                dma_unmap_single(&bp->pdev->dev, addr, bp->rx_buffer_size,
                                 DMA_FROM_DEVICE);
                dev_kfree_skb_any(skb);
@@ -1547,7 +1566,7 @@ static void gem_init_rings(struct macb *bp)
 
        for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) {
                for (i = 0; i < TX_RING_SIZE; i++) {
-                       queue->tx_ring[i].addr = 0;
+                       macb_set_addr(&(queue->tx_ring[i]), 0);
                        queue->tx_ring[i].ctrl = MACB_BIT(TX_USED);
                }
                queue->tx_ring[TX_RING_SIZE - 1].ctrl |= MACB_BIT(TX_WRAP);
@@ -1694,6 +1713,10 @@ static void macb_configure_dma(struct macb *bp)
                        dmacfg |= GEM_BIT(TXCOEN);
                else
                        dmacfg &= ~GEM_BIT(TXCOEN);
+
+#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
+               dmacfg |= GEM_BIT(ADDR64);
+#endif
                netdev_dbg(bp->dev, "Cadence configure DMA with 0x%08x\n",
                           dmacfg);
                gem_writel(bp, DMACFG, dmacfg);
@@ -1739,9 +1762,15 @@ static void macb_init_hw(struct macb *bp)
        macb_configure_dma(bp);
 
        /* Initialize TX and RX buffers */
-       macb_writel(bp, RBQP, bp->rx_ring_dma);
+       macb_writel(bp, RBQP, (u32)(bp->rx_ring_dma));
+#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
+       macb_writel(bp, RBQPH, (u32)(bp->rx_ring_dma >> 32));
+#endif
        for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) {
-               queue_writel(queue, TBQP, queue->tx_ring_dma);
+               queue_writel(queue, TBQP, (u32)(queue->tx_ring_dma));
+#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
+               queue_writel(queue, TBQPH, (u32)(queue->tx_ring_dma >> 32));
+#endif
 
                /* Enable interrupts */
                queue_writel(queue, IER,
@@ -2379,6 +2408,9 @@ static int macb_init(struct platform_device *pdev)
                        queue->IDR  = GEM_IDR(hw_q - 1);
                        queue->IMR  = GEM_IMR(hw_q - 1);
                        queue->TBQP = GEM_TBQP(hw_q - 1);
+#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
+                       queue->TBQPH = GEM_TBQPH(hw_q -1);
+#endif
                } else {
                        /* queue0 uses legacy registers */
                        queue->ISR  = MACB_ISR;
@@ -2386,6 +2418,9 @@ static int macb_init(struct platform_device *pdev)
                        queue->IDR  = MACB_IDR;
                        queue->IMR  = MACB_IMR;
                        queue->TBQP = MACB_TBQP;
+#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
+                       queue->TBQPH = MACB_TBQPH;
+#endif
                }
 
                /* get irq: here we use the linux queue index, not the hardware
@@ -2935,6 +2970,11 @@ static int macb_probe(struct platform_device *pdev)
                bp->wol |= MACB_WOL_HAS_MAGIC_PACKET;
        device_init_wakeup(&pdev->dev, bp->wol & MACB_WOL_HAS_MAGIC_PACKET);
 
+#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
+       if (GEM_BFEXT(DBWDEF, gem_readl(bp, DCFG1)) > GEM_DBW32)
+               dma_set_mask(&pdev->dev, DMA_BIT_MASK(44));
+#endif
+
        spin_lock_init(&bp->lock);
 
        /* setup capabilities */
@@ -2945,7 +2985,7 @@ static int macb_probe(struct platform_device *pdev)
        dev->irq = platform_get_irq(pdev, 0);
        if (dev->irq < 0) {
                err = dev->irq;
-               goto err_disable_clocks;
+               goto err_out_free_netdev;
        }
 
        mac = of_get_mac_address(np);
index b6fcf10..aa3aeec 100644 (file)
@@ -66,6 +66,8 @@
 #define MACB_USRIO             0x00c0
 #define MACB_WOL               0x00c4
 #define MACB_MID               0x00fc
+#define MACB_TBQPH             0x04C8
+#define MACB_RBQPH             0x04D4
 
 /* GEM register offsets. */
 #define GEM_NCFGR              0x0004 /* Network Config */
 
 #define GEM_ISR(hw_q)          (0x0400 + ((hw_q) << 2))
 #define GEM_TBQP(hw_q)         (0x0440 + ((hw_q) << 2))
+#define GEM_TBQPH(hw_q)                (0x04C8)
 #define GEM_RBQP(hw_q)         (0x0480 + ((hw_q) << 2))
 #define GEM_IER(hw_q)          (0x0600 + ((hw_q) << 2))
 #define GEM_IDR(hw_q)          (0x0620 + ((hw_q) << 2))
 #define GEM_RXBS_SIZE          8
 #define GEM_DDRP_OFFSET                24 /* disc_when_no_ahb */
 #define GEM_DDRP_SIZE          1
+#define GEM_ADDR64_OFFSET      30 /* Address bus width - 64b or 32b */
+#define GEM_ADDR64_SIZE                1
 
 
 /* Bitfields in NSR */
 struct macb_dma_desc {
        u32     addr;
        u32     ctrl;
+#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
+       u32     addrh;
+       u32     resvd;
+#endif
 };
 
 /* DMA descriptor bitfields */
@@ -777,6 +786,7 @@ struct macb_queue {
        unsigned int            IDR;
        unsigned int            IMR;
        unsigned int            TBQP;
+       unsigned int            TBQPH;
 
        unsigned int            tx_head, tx_tail;
        struct macb_dma_desc    *tx_ring;
index 0ef232d..e1b78b5 100644 (file)
@@ -36,10 +36,20 @@ config      THUNDER_NIC_BGX
        depends on 64BIT
        select PHYLIB
        select MDIO_THUNDER
+       select THUNDER_NIC_RGX
        ---help---
          This driver supports programming and controlling of MAC
          interface from NIC physical function driver.
 
+config THUNDER_NIC_RGX
+       tristate "Thunder MAC interface driver (RGX)"
+       depends on 64BIT
+       select PHYLIB
+       select MDIO_THUNDER
+       ---help---
+         This driver supports configuring XCV block of RGX interface
+         present on CN81XX chip.
+
 config LIQUIDIO
        tristate "Cavium LiquidIO support"
        depends on 64BIT
index 5c4615c..6b4d4ad 100644 (file)
@@ -2,6 +2,7 @@
 # Makefile for Cavium's Thunder ethernet device
 #
 
+obj-$(CONFIG_THUNDER_NIC_RGX) += thunder_xcv.o
 obj-$(CONFIG_THUNDER_NIC_BGX) += thunder_bgx.o
 obj-$(CONFIG_THUNDER_NIC_PF) += nicpf.o
 obj-$(CONFIG_THUNDER_NIC_VF) += nicvf.o
index 83025bb..dd63f96 100644 (file)
 #define        PCI_DEVICE_ID_THUNDER_NIC_VF            0xA034
 #define        PCI_DEVICE_ID_THUNDER_BGX               0xA026
 
+/* Subsystem device IDs */
+#define PCI_SUBSYS_DEVID_88XX_NIC_PF           0xA11E
+#define PCI_SUBSYS_DEVID_81XX_NIC_PF           0xA21E
+#define PCI_SUBSYS_DEVID_83XX_NIC_PF           0xA31E
+
+#define PCI_SUBSYS_DEVID_88XX_PASS1_NIC_VF     0xA11E
+#define PCI_SUBSYS_DEVID_88XX_NIC_VF           0xA134
+#define PCI_SUBSYS_DEVID_81XX_NIC_VF           0xA234
+#define PCI_SUBSYS_DEVID_83XX_NIC_VF           0xA334
+
+
 /* PCI BAR nos */
 #define        PCI_CFG_REG_BAR_NUM             0
 #define        PCI_MSIX_REG_BAR_NUM            4
 /* Max pkinds */
 #define        NIC_MAX_PKIND                   16
 
-/* Rx Channels */
-/* Receive channel configuration in TNS bypass mode
- * Below is configuration in TNS bypass mode
- * BGX0-LMAC0-CHAN0 - VNIC CHAN0
- * BGX0-LMAC1-CHAN0 - VNIC CHAN16
- * ...
- * BGX1-LMAC0-CHAN0 - VNIC CHAN128
- * ...
- * BGX1-LMAC3-CHAN0 - VNIC CHAN174
- */
-#define        NIC_INTF_COUNT                  2  /* Interfaces btw VNIC and TNS/BGX */
-#define        NIC_CHANS_PER_INF               128
-#define        NIC_MAX_CHANS                   (NIC_INTF_COUNT * NIC_CHANS_PER_INF)
-#define        NIC_CPI_COUNT                   2048 /* No of channel parse indices */
-
-/* TNS bypass mode: 1-1 mapping between VNIC and BGX:LMAC */
-#define NIC_MAX_BGX                    MAX_BGX_PER_CN88XX
-#define        NIC_CPI_PER_BGX                 (NIC_CPI_COUNT / NIC_MAX_BGX)
-#define        NIC_MAX_CPI_PER_LMAC            64 /* Max when CPI_ALG is IP diffserv */
-#define        NIC_RSSI_PER_BGX                (NIC_RSSI_COUNT / NIC_MAX_BGX)
-
-/* Tx scheduling */
-#define        NIC_MAX_TL4                     1024
-#define        NIC_MAX_TL4_SHAPERS             256 /* 1 shaper for 4 TL4s */
-#define        NIC_MAX_TL3                     256
-#define        NIC_MAX_TL3_SHAPERS             64  /* 1 shaper for 4 TL3s */
-#define        NIC_MAX_TL2                     64
-#define        NIC_MAX_TL2_SHAPERS             2  /* 1 shaper for 32 TL2s */
-#define        NIC_MAX_TL1                     2
-
-/* TNS bypass mode */
-#define        NIC_TL2_PER_BGX                 32
-#define        NIC_TL4_PER_BGX                 (NIC_MAX_TL4 / NIC_MAX_BGX)
-#define        NIC_TL4_PER_LMAC                (NIC_MAX_TL4 / NIC_CHANS_PER_INF)
+/* Max when CPI_ALG is IP diffserv */
+#define        NIC_MAX_CPI_PER_LMAC            64
 
 /* NIC VF Interrupts */
 #define        NICVF_INTR_CQ                   0
@@ -148,7 +127,6 @@ struct nicvf_cq_poll {
        struct  napi_struct napi;
 };
 
-#define        NIC_RSSI_COUNT                  4096 /* Total no of RSS indices */
 #define NIC_MAX_RSS_HASH_BITS          8
 #define NIC_MAX_RSS_IDR_TBL_SIZE       (1 << NIC_MAX_RSS_HASH_BITS)
 #define RSS_HASH_KEY_SIZE              5 /* 320 bit key */
@@ -273,6 +251,7 @@ struct nicvf {
        struct net_device       *netdev;
        struct pci_dev          *pdev;
        void __iomem            *reg_base;
+#define        MAX_QUEUES_PER_QSET                     8
        struct queue_set        *qs;
        struct nicvf_cq_poll    *napi[8];
        u8                      vf_id;
@@ -368,6 +347,7 @@ struct nicvf {
 #define        NIC_MBOX_MSG_PNICVF_PTR         0x14    /* Get primary qset nicvf ptr */
 #define        NIC_MBOX_MSG_SNICVF_PTR         0x15    /* Send sqet nicvf ptr to PVF */
 #define        NIC_MBOX_MSG_LOOPBACK           0x16    /* Set interface in loopback */
+#define        NIC_MBOX_MSG_RESET_STAT_COUNTER 0x17    /* Reset statistics counters */
 #define        NIC_MBOX_MSG_CFG_DONE           0xF0    /* VF configuration done */
 #define        NIC_MBOX_MSG_SHUTDOWN           0xF1    /* VF is being shutdown */
 
@@ -484,6 +464,31 @@ struct set_loopback {
        bool  enable;
 };
 
+/* Reset statistics counters */
+struct reset_stat_cfg {
+       u8    msg;
+       /* Bitmap to select NIC_PF_VNIC(vf_id)_RX_STAT(0..13) */
+       u16   rx_stat_mask;
+       /* Bitmap to select NIC_PF_VNIC(vf_id)_TX_STAT(0..4) */
+       u8    tx_stat_mask;
+       /* Bitmap to select NIC_PF_QS(0..127)_RQ(0..7)_STAT(0..1)
+        * bit14, bit15 NIC_PF_QS(vf_id)_RQ7_STAT(0..1)
+        * bit12, bit13 NIC_PF_QS(vf_id)_RQ6_STAT(0..1)
+        * ..
+        * bit2, bit3 NIC_PF_QS(vf_id)_RQ1_STAT(0..1)
+        * bit0, bit1 NIC_PF_QS(vf_id)_RQ0_STAT(0..1)
+        */
+       u16   rq_stat_mask;
+       /* Bitmap to select NIC_PF_QS(0..127)_SQ(0..7)_STAT(0..1)
+        * bit14, bit15 NIC_PF_QS(vf_id)_SQ7_STAT(0..1)
+        * bit12, bit13 NIC_PF_QS(vf_id)_SQ6_STAT(0..1)
+        * ..
+        * bit2, bit3 NIC_PF_QS(vf_id)_SQ1_STAT(0..1)
+        * bit0, bit1 NIC_PF_QS(vf_id)_SQ0_STAT(0..1)
+        */
+       u16   sq_stat_mask;
+};
+
 /* 128 bit shared memory between PF and each VF */
 union nic_mbx {
        struct { u8 msg; }      msg;
@@ -501,6 +506,7 @@ union nic_mbx {
        struct sqs_alloc        sqs_alloc;
        struct nicvf_ptr        nicvf;
        struct set_loopback     lbk;
+       struct reset_stat_cfg   reset_stat;
 };
 
 #define NIC_NODE_ID_MASK       0x03
@@ -514,7 +520,14 @@ static inline int nic_get_node_id(struct pci_dev *pdev)
 
 static inline bool pass1_silicon(struct pci_dev *pdev)
 {
-       return pdev->revision < 8;
+       return (pdev->revision < 8) &&
+               (pdev->subsystem_device == PCI_SUBSYS_DEVID_88XX_NIC_PF);
+}
+
+static inline bool pass2_silicon(struct pci_dev *pdev)
+{
+       return (pdev->revision >= 8) &&
+               (pdev->subsystem_device == PCI_SUBSYS_DEVID_88XX_NIC_PF);
 }
 
 int nicvf_set_real_num_queues(struct net_device *netdev,
index 16ed203..25618d2 100644 (file)
 #define DRV_NAME       "thunder-nic"
 #define DRV_VERSION    "1.0"
 
+struct hw_info {
+       u8              bgx_cnt;
+       u8              chans_per_lmac;
+       u8              chans_per_bgx; /* Rx/Tx chans */
+       u8              chans_per_rgx;
+       u8              chans_per_lbk;
+       u16             cpi_cnt;
+       u16             rssi_cnt;
+       u16             rss_ind_tbl_size;
+       u16             tl4_cnt;
+       u16             tl3_cnt;
+       u8              tl2_cnt;
+       u8              tl1_cnt;
+       bool            tl1_per_bgx; /* TL1 per BGX or per LMAC */
+};
+
 struct nicpf {
        struct pci_dev          *pdev;
+       struct hw_info          *hw;
        u8                      node;
        unsigned int            flags;
        u8                      num_vf_en;      /* No of VF enabled */
@@ -36,22 +53,22 @@ struct nicpf {
 #define        NIC_SET_VF_LMAC_MAP(bgx, lmac)  (((bgx & 0xF) << 4) | (lmac & 0xF))
 #define        NIC_GET_BGX_FROM_VF_LMAC_MAP(map)       ((map >> 4) & 0xF)
 #define        NIC_GET_LMAC_FROM_VF_LMAC_MAP(map)      (map & 0xF)
-       u8                      vf_lmac_map[MAX_LMAC];
+       u8                      *vf_lmac_map;
        struct delayed_work     dwork;
        struct workqueue_struct *check_link;
-       u8                      link[MAX_LMAC];
-       u8                      duplex[MAX_LMAC];
-       u32                     speed[MAX_LMAC];
+       u8                      *link;
+       u8                      *duplex;
+       u32                     *speed;
        u16                     cpi_base[MAX_NUM_VFS_SUPPORTED];
        u16                     rssi_base[MAX_NUM_VFS_SUPPORTED];
-       u16                     rss_ind_tbl_size;
        bool                    mbx_lock[MAX_NUM_VFS_SUPPORTED];
 
        /* MSI-X */
        bool                    msix_enabled;
        u8                      num_vec;
-       struct msix_entry       msix_entries[NIC_PF_MSIX_VECTORS];
+       struct msix_entry       *msix_entries;
        bool                    irq_allocated[NIC_PF_MSIX_VECTORS];
+       char                    irq_name[NIC_PF_MSIX_VECTORS][20];
 };
 
 /* Supported devices */
@@ -89,9 +106,22 @@ static u64 nic_reg_read(struct nicpf *nic, u64 offset)
 /* PF -> VF mailbox communication APIs */
 static void nic_enable_mbx_intr(struct nicpf *nic)
 {
-       /* Enable mailbox interrupt for all 128 VFs */
-       nic_reg_write(nic, NIC_PF_MAILBOX_ENA_W1S, ~0ull);
-       nic_reg_write(nic, NIC_PF_MAILBOX_ENA_W1S + sizeof(u64), ~0ull);
+       int vf_cnt = pci_sriov_get_totalvfs(nic->pdev);
+
+#define INTR_MASK(vfs) ((vfs < 64) ? (BIT_ULL(vfs) - 1) : (~0ull))
+
+       /* Clear it, to avoid spurious interrupts (if any) */
+       nic_reg_write(nic, NIC_PF_MAILBOX_INT, INTR_MASK(vf_cnt));
+
+       /* Enable mailbox interrupt for all VFs */
+       nic_reg_write(nic, NIC_PF_MAILBOX_ENA_W1S, INTR_MASK(vf_cnt));
+       /* One mailbox intr enable reg per 64 VFs */
+       if (vf_cnt > 64) {
+               nic_reg_write(nic, NIC_PF_MAILBOX_INT + sizeof(u64),
+                             INTR_MASK(vf_cnt - 64));
+               nic_reg_write(nic, NIC_PF_MAILBOX_ENA_W1S + sizeof(u64),
+                             INTR_MASK(vf_cnt - 64));
+       }
 }
 
 static void nic_clear_mbx_intr(struct nicpf *nic, int vf, int mbx_reg)
@@ -144,7 +174,7 @@ static void nic_mbx_send_ready(struct nicpf *nic, int vf)
 
        mbx.nic_cfg.tns_mode = NIC_TNS_BYPASS_MODE;
 
-       if (vf < MAX_LMAC) {
+       if (vf < nic->num_vf_en) {
                bgx_idx = NIC_GET_BGX_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]);
                lmac = NIC_GET_LMAC_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]);
 
@@ -155,7 +185,7 @@ static void nic_mbx_send_ready(struct nicpf *nic, int vf)
        mbx.nic_cfg.sqs_mode = (vf >= nic->num_vf_en) ? true : false;
        mbx.nic_cfg.node_id = nic->node;
 
-       mbx.nic_cfg.loopback_supported = vf < MAX_LMAC;
+       mbx.nic_cfg.loopback_supported = vf < nic->num_vf_en;
 
        nic_send_msg_to_vf(nic, vf, &mbx);
 }
@@ -248,14 +278,22 @@ static int nic_update_hw_frs(struct nicpf *nic, int new_frs, int vf)
 /* Set minimum transmit packet size */
 static void nic_set_tx_pkt_pad(struct nicpf *nic, int size)
 {
-       int lmac;
+       int lmac, max_lmac;
+       u16 sdevid;
        u64 lmac_cfg;
 
        /* Max value that can be set is 60 */
        if (size > 60)
                size = 60;
 
-       for (lmac = 0; lmac < (MAX_BGX_PER_CN88XX * MAX_LMAC_PER_BGX); lmac++) {
+       pci_read_config_word(nic->pdev, PCI_SUBSYSTEM_ID, &sdevid);
+       /* 81xx's RGX has only one LMAC */
+       if (sdevid == PCI_SUBSYS_DEVID_81XX_NIC_PF)
+               max_lmac = ((nic->hw->bgx_cnt - 1) * MAX_LMAC_PER_BGX) + 1;
+       else
+               max_lmac = nic->hw->bgx_cnt * MAX_LMAC_PER_BGX;
+
+       for (lmac = 0; lmac < max_lmac; lmac++) {
                lmac_cfg = nic_reg_read(nic, NIC_PF_LMAC_0_7_CFG | (lmac << 3));
                lmac_cfg &= ~(0xF << 2);
                lmac_cfg |= ((size / 4) << 2);
@@ -275,7 +313,7 @@ static void nic_set_lmac_vf_mapping(struct nicpf *nic)
 
        nic->num_vf_en = 0;
 
-       for (bgx = 0; bgx < NIC_MAX_BGX; bgx++) {
+       for (bgx = 0; bgx < nic->hw->bgx_cnt; bgx++) {
                if (!(bgx_map & (1 << bgx)))
                        continue;
                lmac_cnt = bgx_get_lmac_count(nic->node, bgx);
@@ -295,28 +333,125 @@ static void nic_set_lmac_vf_mapping(struct nicpf *nic)
                        nic_reg_write(nic,
                                      NIC_PF_LMAC_0_7_CREDIT + (lmac * 8),
                                      lmac_credit);
+
+               /* On CN81XX there are only 8 VFs but max possible no of
+                * interfaces are 9.
+                */
+               if (nic->num_vf_en >= pci_sriov_get_totalvfs(nic->pdev)) {
+                       nic->num_vf_en = pci_sriov_get_totalvfs(nic->pdev);
+                       break;
+               }
        }
 }
 
+static void nic_free_lmacmem(struct nicpf *nic)
+{
+       kfree(nic->vf_lmac_map);
+       kfree(nic->link);
+       kfree(nic->duplex);
+       kfree(nic->speed);
+}
+
+static int nic_get_hw_info(struct nicpf *nic)
+{
+       u8 max_lmac;
+       u16 sdevid;
+       struct hw_info *hw = nic->hw;
+
+       pci_read_config_word(nic->pdev, PCI_SUBSYSTEM_ID, &sdevid);
+
+       switch (sdevid) {
+       case PCI_SUBSYS_DEVID_88XX_NIC_PF:
+               hw->bgx_cnt = MAX_BGX_PER_CN88XX;
+               hw->chans_per_lmac = 16;
+               hw->chans_per_bgx = 128;
+               hw->cpi_cnt = 2048;
+               hw->rssi_cnt = 4096;
+               hw->rss_ind_tbl_size = NIC_MAX_RSS_IDR_TBL_SIZE;
+               hw->tl3_cnt = 256;
+               hw->tl2_cnt = 64;
+               hw->tl1_cnt = 2;
+               hw->tl1_per_bgx = true;
+               break;
+       case PCI_SUBSYS_DEVID_81XX_NIC_PF:
+               hw->bgx_cnt = MAX_BGX_PER_CN81XX;
+               hw->chans_per_lmac = 8;
+               hw->chans_per_bgx = 32;
+               hw->chans_per_rgx = 8;
+               hw->chans_per_lbk = 24;
+               hw->cpi_cnt = 512;
+               hw->rssi_cnt = 256;
+               hw->rss_ind_tbl_size = 32; /* Max RSSI / Max interfaces */
+               hw->tl3_cnt = 64;
+               hw->tl2_cnt = 16;
+               hw->tl1_cnt = 10;
+               hw->tl1_per_bgx = false;
+               break;
+       case PCI_SUBSYS_DEVID_83XX_NIC_PF:
+               hw->bgx_cnt = MAX_BGX_PER_CN83XX;
+               hw->chans_per_lmac = 8;
+               hw->chans_per_bgx = 32;
+               hw->chans_per_lbk = 64;
+               hw->cpi_cnt = 2048;
+               hw->rssi_cnt = 1024;
+               hw->rss_ind_tbl_size = 64; /* Max RSSI / Max interfaces */
+               hw->tl3_cnt = 256;
+               hw->tl2_cnt = 64;
+               hw->tl1_cnt = 18;
+               hw->tl1_per_bgx = false;
+               break;
+       }
+       hw->tl4_cnt = MAX_QUEUES_PER_QSET * pci_sriov_get_totalvfs(nic->pdev);
+
+       /* Allocate memory for LMAC tracking elements */
+       max_lmac = hw->bgx_cnt * MAX_LMAC_PER_BGX;
+       nic->vf_lmac_map = kmalloc_array(max_lmac, sizeof(u8), GFP_KERNEL);
+       if (!nic->vf_lmac_map)
+               goto error;
+       nic->link = kmalloc_array(max_lmac, sizeof(u8), GFP_KERNEL);
+       if (!nic->link)
+               goto error;
+       nic->duplex = kmalloc_array(max_lmac, sizeof(u8), GFP_KERNEL);
+       if (!nic->duplex)
+               goto error;
+       nic->speed = kmalloc_array(max_lmac, sizeof(u32), GFP_KERNEL);
+       if (!nic->speed)
+               goto error;
+       return 0;
+
+error:
+       nic_free_lmacmem(nic);
+       return -ENOMEM;
+}
+
 #define BGX0_BLOCK 8
 #define BGX1_BLOCK 9
 
-static void nic_init_hw(struct nicpf *nic)
+static int nic_init_hw(struct nicpf *nic)
 {
-       int i;
+       int i, err;
        u64 cqm_cfg;
 
+       /* Get HW capability info */
+       err = nic_get_hw_info(nic);
+       if (err)
+               return err;
+
        /* Enable NIC HW block */
        nic_reg_write(nic, NIC_PF_CFG, 0x3);
 
        /* Enable backpressure */
        nic_reg_write(nic, NIC_PF_BP_CFG, (1ULL << 6) | 0x03);
 
-       /* Disable TNS mode on both interfaces */
-       nic_reg_write(nic, NIC_PF_INTF_0_1_SEND_CFG,
-                     (NIC_TNS_BYPASS_MODE << 7) | BGX0_BLOCK);
-       nic_reg_write(nic, NIC_PF_INTF_0_1_SEND_CFG | (1 << 8),
-                     (NIC_TNS_BYPASS_MODE << 7) | BGX1_BLOCK);
+       /* TNS and TNS bypass modes are present only on 88xx */
+       if (nic->pdev->subsystem_device == PCI_SUBSYS_DEVID_88XX_NIC_PF) {
+               /* Disable TNS mode on both interfaces */
+               nic_reg_write(nic, NIC_PF_INTF_0_1_SEND_CFG,
+                             (NIC_TNS_BYPASS_MODE << 7) | BGX0_BLOCK);
+               nic_reg_write(nic, NIC_PF_INTF_0_1_SEND_CFG | (1 << 8),
+                             (NIC_TNS_BYPASS_MODE << 7) | BGX1_BLOCK);
+       }
+
        nic_reg_write(nic, NIC_PF_INTF_0_1_BP_CFG,
                      (1ULL << 63) | BGX0_BLOCK);
        nic_reg_write(nic, NIC_PF_INTF_0_1_BP_CFG + (1 << 8),
@@ -346,11 +481,14 @@ static void nic_init_hw(struct nicpf *nic)
        cqm_cfg = nic_reg_read(nic, NIC_PF_CQM_CFG);
        if (cqm_cfg < NICPF_CQM_MIN_DROP_LEVEL)
                nic_reg_write(nic, NIC_PF_CQM_CFG, NICPF_CQM_MIN_DROP_LEVEL);
+
+       return 0;
 }
 
 /* Channel parse index configuration */
 static void nic_config_cpi(struct nicpf *nic, struct cpi_cfg_msg *cfg)
 {
+       struct hw_info *hw = nic->hw;
        u32 vnic, bgx, lmac, chan;
        u32 padd, cpi_count = 0;
        u64 cpi_base, cpi, rssi_base, rssi;
@@ -360,9 +498,9 @@ static void nic_config_cpi(struct nicpf *nic, struct cpi_cfg_msg *cfg)
        bgx = NIC_GET_BGX_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vnic]);
        lmac = NIC_GET_LMAC_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vnic]);
 
-       chan = (lmac * MAX_BGX_CHANS_PER_LMAC) + (bgx * NIC_CHANS_PER_INF);
-       cpi_base = (lmac * NIC_MAX_CPI_PER_LMAC) + (bgx * NIC_CPI_PER_BGX);
-       rssi_base = (lmac * nic->rss_ind_tbl_size) + (bgx * NIC_RSSI_PER_BGX);
+       chan = (lmac * hw->chans_per_lmac) + (bgx * hw->chans_per_bgx);
+       cpi_base = vnic * NIC_MAX_CPI_PER_LMAC;
+       rssi_base = vnic * hw->rss_ind_tbl_size;
 
        /* Rx channel configuration */
        nic_reg_write(nic, NIC_PF_CHAN_0_255_RX_BP_CFG | (chan << 3),
@@ -434,7 +572,7 @@ static void nic_send_rss_size(struct nicpf *nic, int vf)
        msg = (u64 *)&mbx;
 
        mbx.rss_size.msg = NIC_MBOX_MSG_RSS_SIZE;
-       mbx.rss_size.ind_tbl_size = nic->rss_ind_tbl_size;
+       mbx.rss_size.ind_tbl_size = nic->hw->rss_ind_tbl_size;
        nic_send_msg_to_vf(nic, vf, &mbx);
 }
 
@@ -481,7 +619,7 @@ static void nic_config_rss(struct nicpf *nic, struct rss_cfg_msg *cfg)
 /* 4 level transmit side scheduler configutation
  * for TNS bypass mode
  *
- * Sample configuration for SQ0
+ * Sample configuration for SQ0 on 88xx
  * VNIC0-SQ0 -> TL4(0)   -> TL3[0]   -> TL2[0]  -> TL1[0] -> BGX0
  * VNIC1-SQ0 -> TL4(8)   -> TL3[2]   -> TL2[0]  -> TL1[0] -> BGX0
  * VNIC2-SQ0 -> TL4(16)  -> TL3[4]   -> TL2[1]  -> TL1[0] -> BGX0
@@ -494,6 +632,7 @@ static void nic_config_rss(struct nicpf *nic, struct rss_cfg_msg *cfg)
 static void nic_tx_channel_cfg(struct nicpf *nic, u8 vnic,
                               struct sq_cfg_msg *sq)
 {
+       struct hw_info *hw = nic->hw;
        u32 bgx, lmac, chan;
        u32 tl2, tl3, tl4;
        u32 rr_quantum;
@@ -512,21 +651,28 @@ static void nic_tx_channel_cfg(struct nicpf *nic, u8 vnic,
        /* 24 bytes for FCS, IPG and preamble */
        rr_quantum = ((NIC_HW_MAX_FRS + 24) / 4);
 
-       if (!sq->sqs_mode) {
-               tl4 = (lmac * NIC_TL4_PER_LMAC) + (bgx * NIC_TL4_PER_BGX);
-       } else {
-               for (svf = 0; svf < MAX_SQS_PER_VF; svf++) {
-                       if (nic->vf_sqs[pqs_vnic][svf] == vnic)
-                               break;
+       /* For 88xx 0-511 TL4 transmits via BGX0 and
+        * 512-1023 TL4s transmit via BGX1.
+        */
+       if (hw->tl1_per_bgx) {
+               tl4 = bgx * (hw->tl4_cnt / hw->bgx_cnt);
+               if (!sq->sqs_mode) {
+                       tl4 += (lmac * MAX_QUEUES_PER_QSET);
+               } else {
+                       for (svf = 0; svf < MAX_SQS_PER_VF; svf++) {
+                               if (nic->vf_sqs[pqs_vnic][svf] == vnic)
+                                       break;
+                       }
+                       tl4 += (MAX_LMAC_PER_BGX * MAX_QUEUES_PER_QSET);
+                       tl4 += (lmac * MAX_QUEUES_PER_QSET * MAX_SQS_PER_VF);
+                       tl4 += (svf * MAX_QUEUES_PER_QSET);
                }
-               tl4 = (MAX_LMAC_PER_BGX * NIC_TL4_PER_LMAC);
-               tl4 += (lmac * NIC_TL4_PER_LMAC * MAX_SQS_PER_VF);
-               tl4 += (svf * NIC_TL4_PER_LMAC);
-               tl4 += (bgx * NIC_TL4_PER_BGX);
+       } else {
+               tl4 = (vnic * MAX_QUEUES_PER_QSET);
        }
        tl4 += sq_idx;
 
-       tl3 = tl4 / (NIC_MAX_TL4 / NIC_MAX_TL3);
+       tl3 = tl4 / (hw->tl4_cnt / hw->tl3_cnt);
        nic_reg_write(nic, NIC_PF_QSET_0_127_SQ_0_7_CFG2 |
                      ((u64)vnic << NIC_QS_ID_SHIFT) |
                      ((u32)sq_idx << NIC_Q_NUM_SHIFT), tl4);
@@ -534,8 +680,19 @@ static void nic_tx_channel_cfg(struct nicpf *nic, u8 vnic,
                      ((u64)vnic << 27) | ((u32)sq_idx << 24) | rr_quantum);
 
        nic_reg_write(nic, NIC_PF_TL3_0_255_CFG | (tl3 << 3), rr_quantum);
-       chan = (lmac * MAX_BGX_CHANS_PER_LMAC) + (bgx * NIC_CHANS_PER_INF);
-       nic_reg_write(nic, NIC_PF_TL3_0_255_CHAN | (tl3 << 3), chan);
+
+       /* On 88xx 0-127 channels are for BGX0 and
+        * 127-255 channels for BGX1.
+        *
+        * On 81xx/83xx TL3_CHAN reg should be configured with channel
+        * within LMAC i.e 0-7 and not the actual channel number like on 88xx
+        */
+       chan = (lmac * hw->chans_per_lmac) + (bgx * hw->chans_per_bgx);
+       if (hw->tl1_per_bgx)
+               nic_reg_write(nic, NIC_PF_TL3_0_255_CHAN | (tl3 << 3), chan);
+       else
+               nic_reg_write(nic, NIC_PF_TL3_0_255_CHAN | (tl3 << 3), 0);
+
        /* Enable backpressure on the channel */
        nic_reg_write(nic, NIC_PF_CHAN_0_255_TX_CFG | (chan << 3), 1);
 
@@ -544,6 +701,16 @@ static void nic_tx_channel_cfg(struct nicpf *nic, u8 vnic,
        nic_reg_write(nic, NIC_PF_TL2_0_63_CFG | (tl2 << 3), rr_quantum);
        /* No priorities as of now */
        nic_reg_write(nic, NIC_PF_TL2_0_63_PRI | (tl2 << 3), 0x00);
+
+       /* Unlike 88xx where TL2s 0-31 transmits to TL1 '0' and rest to TL1 '1'
+        * on 81xx/83xx TL2 needs to be configured to transmit to one of the
+        * possible LMACs.
+        *
+        * This register doesn't exist on 88xx.
+        */
+       if (!hw->tl1_per_bgx)
+               nic_reg_write(nic, NIC_PF_TL2_LMAC | (tl2 << 3),
+                             lmac + (bgx * MAX_LMAC_PER_BGX));
 }
 
 /* Send primary nicvf pointer to secondary QS's VF */
@@ -615,7 +782,7 @@ static int nic_config_loopback(struct nicpf *nic, struct set_loopback *lbk)
 {
        int bgx_idx, lmac_idx;
 
-       if (lbk->vf_id > MAX_LMAC)
+       if (lbk->vf_id >= nic->num_vf_en)
                return -1;
 
        bgx_idx = NIC_GET_BGX_FROM_VF_LMAC_MAP(nic->vf_lmac_map[lbk->vf_id]);
@@ -626,6 +793,67 @@ static int nic_config_loopback(struct nicpf *nic, struct set_loopback *lbk)
        return 0;
 }
 
+/* Reset statistics counters */
+static int nic_reset_stat_counters(struct nicpf *nic,
+                                  int vf, struct reset_stat_cfg *cfg)
+{
+       int i, stat, qnum;
+       u64 reg_addr;
+
+       for (i = 0; i < RX_STATS_ENUM_LAST; i++) {
+               if (cfg->rx_stat_mask & BIT(i)) {
+                       reg_addr = NIC_PF_VNIC_0_127_RX_STAT_0_13 |
+                                  (vf << NIC_QS_ID_SHIFT) |
+                                  (i << 3);
+                       nic_reg_write(nic, reg_addr, 0);
+               }
+       }
+
+       for (i = 0; i < TX_STATS_ENUM_LAST; i++) {
+               if (cfg->tx_stat_mask & BIT(i)) {
+                       reg_addr = NIC_PF_VNIC_0_127_TX_STAT_0_4 |
+                                  (vf << NIC_QS_ID_SHIFT) |
+                                  (i << 3);
+                       nic_reg_write(nic, reg_addr, 0);
+               }
+       }
+
+       for (i = 0; i <= 15; i++) {
+               qnum = i >> 1;
+               stat = i & 1 ? 1 : 0;
+               reg_addr = (vf << NIC_QS_ID_SHIFT) |
+                          (qnum << NIC_Q_NUM_SHIFT) | (stat << 3);
+               if (cfg->rq_stat_mask & BIT(i)) {
+                       reg_addr |= NIC_PF_QSET_0_127_RQ_0_7_STAT_0_1;
+                       nic_reg_write(nic, reg_addr, 0);
+               }
+               if (cfg->sq_stat_mask & BIT(i)) {
+                       reg_addr |= NIC_PF_QSET_0_127_SQ_0_7_STAT_0_1;
+                       nic_reg_write(nic, reg_addr, 0);
+               }
+       }
+       return 0;
+}
+
+static void nic_enable_tunnel_parsing(struct nicpf *nic, int vf)
+{
+       u64 prot_def = (IPV6_PROT << 32) | (IPV4_PROT << 16) | ET_PROT;
+       u64 vxlan_prot_def = (IPV6_PROT_DEF << 32) |
+                             (IPV4_PROT_DEF) << 16 | ET_PROT_DEF;
+
+       /* Configure tunnel parsing parameters */
+       nic_reg_write(nic, NIC_PF_RX_GENEVE_DEF,
+                     (1ULL << 63 | UDP_GENEVE_PORT_NUM));
+       nic_reg_write(nic, NIC_PF_RX_GENEVE_PROT_DEF,
+                     ((7ULL << 61) | prot_def));
+       nic_reg_write(nic, NIC_PF_RX_NVGRE_PROT_DEF,
+                     ((7ULL << 61) | prot_def));
+       nic_reg_write(nic, NIC_PF_RX_VXLAN_DEF_0_1,
+                     ((1ULL << 63) | UDP_VXLAN_PORT_NUM));
+       nic_reg_write(nic, NIC_PF_RX_VXLAN_PROT_DEF,
+                     ((0xfULL << 60) | vxlan_prot_def));
+}
+
 static void nic_enable_vf(struct nicpf *nic, int vf, bool enable)
 {
        int bgx, lmac;
@@ -664,18 +892,17 @@ static void nic_handle_mbx_intr(struct nicpf *nic, int vf)
                mbx_addr += sizeof(u64);
        }
 
-       dev_dbg(&nic->pdev->dev, "%s: Mailbox msg %d from VF%d\n",
+       dev_dbg(&nic->pdev->dev, "%s: Mailbox msg 0x%02x from VF%d\n",
                __func__, mbx.msg.msg, vf);
        switch (mbx.msg.msg) {
        case NIC_MBOX_MSG_READY:
                nic_mbx_send_ready(nic, vf);
-               if (vf < MAX_LMAC) {
+               if (vf < nic->num_vf_en) {
                        nic->link[vf] = 0;
                        nic->duplex[vf] = 0;
                        nic->speed[vf] = 0;
                }
-               ret = 1;
-               break;
+               goto unlock;
        case NIC_MBOX_MSG_QS_CFG:
                reg_addr = NIC_PF_QSET_0_127_CFG |
                           (mbx.qs.num << NIC_QS_ID_SHIFT);
@@ -693,6 +920,15 @@ static void nic_handle_mbx_intr(struct nicpf *nic, int vf)
                           (mbx.rq.qs_num << NIC_QS_ID_SHIFT) |
                           (mbx.rq.rq_num << NIC_Q_NUM_SHIFT);
                nic_reg_write(nic, reg_addr, mbx.rq.cfg);
+               /* Enable CQE_RX2_S extension in CQE_RX descriptor.
+                * This gets appended by default on 81xx/83xx chips,
+                * for consistency enabling the same on 88xx pass2
+                * where this is introduced.
+                */
+               if (pass2_silicon(nic->pdev))
+                       nic_reg_write(nic, NIC_PF_RX_CFG, 0x01);
+               if (!pass1_silicon(nic->pdev))
+                       nic_enable_tunnel_parsing(nic, vf);
                break;
        case NIC_MBOX_MSG_RQ_BP_CFG:
                reg_addr = NIC_PF_QSET_0_127_RQ_0_7_BP_CFG |
@@ -717,8 +953,10 @@ static void nic_handle_mbx_intr(struct nicpf *nic, int vf)
                nic_tx_channel_cfg(nic, mbx.qs.num, &mbx.sq);
                break;
        case NIC_MBOX_MSG_SET_MAC:
-               if (vf >= nic->num_vf_en)
+               if (vf >= nic->num_vf_en) {
+                       ret = -1; /* NACK */
                        break;
+               }
                lmac = mbx.mac.vf_id;
                bgx = NIC_GET_BGX_FROM_VF_LMAC_MAP(nic->vf_lmac_map[lmac]);
                lmac = NIC_GET_LMAC_FROM_VF_LMAC_MAP(nic->vf_lmac_map[lmac]);
@@ -767,25 +1005,38 @@ static void nic_handle_mbx_intr(struct nicpf *nic, int vf)
        case NIC_MBOX_MSG_LOOPBACK:
                ret = nic_config_loopback(nic, &mbx.lbk);
                break;
+       case NIC_MBOX_MSG_RESET_STAT_COUNTER:
+               ret = nic_reset_stat_counters(nic, vf, &mbx.reset_stat);
+               break;
        default:
                dev_err(&nic->pdev->dev,
                        "Invalid msg from VF%d, msg 0x%x\n", vf, mbx.msg.msg);
                break;
        }
 
-       if (!ret)
+       if (!ret) {
                nic_mbx_send_ack(nic, vf);
-       else if (mbx.msg.msg != NIC_MBOX_MSG_READY)
+       } else if (mbx.msg.msg != NIC_MBOX_MSG_READY) {
+               dev_err(&nic->pdev->dev, "NACK for MBOX 0x%02x from VF %d\n",
+                       mbx.msg.msg, vf);
                nic_mbx_send_nack(nic, vf);
+       }
 unlock:
        nic->mbx_lock[vf] = false;
 }
 
-static void nic_mbx_intr_handler (struct nicpf *nic, int mbx)
+static irqreturn_t nic_mbx_intr_handler(int irq, void *nic_irq)
 {
+       struct nicpf *nic = (struct nicpf *)nic_irq;
+       int mbx;
        u64 intr;
        u8  vf, vf_per_mbx_reg = 64;
 
+       if (irq == nic->msix_entries[NIC_PF_INTR_ID_MBOX0].vector)
+               mbx = 0;
+       else
+               mbx = 1;
+
        intr = nic_reg_read(nic, NIC_PF_MAILBOX_INT + (mbx << 3));
        dev_dbg(&nic->pdev->dev, "PF interrupt Mbox%d 0x%llx\n", mbx, intr);
        for (vf = 0; vf < vf_per_mbx_reg; vf++) {
@@ -797,23 +1048,6 @@ static void nic_mbx_intr_handler (struct nicpf *nic, int mbx)
                        nic_clear_mbx_intr(nic, vf, mbx);
                }
        }
-}
-
-static irqreturn_t nic_mbx0_intr_handler (int irq, void *nic_irq)
-{
-       struct nicpf *nic = (struct nicpf *)nic_irq;
-
-       nic_mbx_intr_handler(nic, 0);
-
-       return IRQ_HANDLED;
-}
-
-static irqreturn_t nic_mbx1_intr_handler (int irq, void *nic_irq)
-{
-       struct nicpf *nic = (struct nicpf *)nic_irq;
-
-       nic_mbx_intr_handler(nic, 1);
-
        return IRQ_HANDLED;
 }
 
@@ -821,7 +1055,13 @@ static int nic_enable_msix(struct nicpf *nic)
 {
        int i, ret;
 
-       nic->num_vec = NIC_PF_MSIX_VECTORS;
+       nic->num_vec = pci_msix_vec_count(nic->pdev);
+
+       nic->msix_entries = kmalloc_array(nic->num_vec,
+                                         sizeof(struct msix_entry),
+                                         GFP_KERNEL);
+       if (!nic->msix_entries)
+               return -ENOMEM;
 
        for (i = 0; i < nic->num_vec; i++)
                nic->msix_entries[i].entry = i;
@@ -829,8 +1069,9 @@ static int nic_enable_msix(struct nicpf *nic)
        ret = pci_enable_msix(nic->pdev, nic->msix_entries, nic->num_vec);
        if (ret) {
                dev_err(&nic->pdev->dev,
-                       "Request for #%d msix vectors failed\n",
-                          nic->num_vec);
+                       "Request for #%d msix vectors failed, returned %d\n",
+                          nic->num_vec, ret);
+               kfree(nic->msix_entries);
                return ret;
        }
 
@@ -842,6 +1083,7 @@ static void nic_disable_msix(struct nicpf *nic)
 {
        if (nic->msix_enabled) {
                pci_disable_msix(nic->pdev);
+               kfree(nic->msix_entries);
                nic->msix_enabled = 0;
                nic->num_vec = 0;
        }
@@ -860,27 +1102,26 @@ static void nic_free_all_interrupts(struct nicpf *nic)
 
 static int nic_register_interrupts(struct nicpf *nic)
 {
-       int ret;
+       int i, ret;
 
        /* Enable MSI-X */
        ret = nic_enable_msix(nic);
        if (ret)
                return ret;
 
-       /* Register mailbox interrupt handlers */
-       ret = request_irq(nic->msix_entries[NIC_PF_INTR_ID_MBOX0].vector,
-                         nic_mbx0_intr_handler, 0, "NIC Mbox0", nic);
-       if (ret)
-               goto fail;
-
-       nic->irq_allocated[NIC_PF_INTR_ID_MBOX0] = true;
+       /* Register mailbox interrupt handler */
+       for (i = NIC_PF_INTR_ID_MBOX0; i < nic->num_vec; i++) {
+               sprintf(nic->irq_name[i],
+                       "NICPF Mbox%d", (i - NIC_PF_INTR_ID_MBOX0));
 
-       ret = request_irq(nic->msix_entries[NIC_PF_INTR_ID_MBOX1].vector,
-                         nic_mbx1_intr_handler, 0, "NIC Mbox1", nic);
-       if (ret)
-               goto fail;
+               ret = request_irq(nic->msix_entries[i].vector,
+                                 nic_mbx_intr_handler, 0,
+                                 nic->irq_name[i], nic);
+               if (ret)
+                       goto fail;
 
-       nic->irq_allocated[NIC_PF_INTR_ID_MBOX1] = true;
+               nic->irq_allocated[i] = true;
+       }
 
        /* Enable mailbox interrupt */
        nic_enable_mbx_intr(nic);
@@ -889,6 +1130,7 @@ static int nic_register_interrupts(struct nicpf *nic)
 fail:
        dev_err(&nic->pdev->dev, "Request irq failed\n");
        nic_free_all_interrupts(nic);
+       nic_disable_msix(nic);
        return ret;
 }
 
@@ -903,6 +1145,12 @@ static int nic_num_sqs_en(struct nicpf *nic, int vf_en)
        int pos, sqs_per_vf = MAX_SQS_PER_VF_SINGLE_NODE;
        u16 total_vf;
 
+       /* Secondary Qsets are needed only if CPU count is
+        * morethan MAX_QUEUES_PER_QSET.
+        */
+       if (num_online_cpus() <= MAX_QUEUES_PER_QSET)
+               return 0;
+
        /* Check if its a multi-node environment */
        if (nr_node_ids > 1)
                sqs_per_vf = MAX_SQS_PER_VF;
@@ -1008,6 +1256,12 @@ static int nic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
        if (!nic)
                return -ENOMEM;
 
+       nic->hw = devm_kzalloc(dev, sizeof(struct hw_info), GFP_KERNEL);
+       if (!nic->hw) {
+               devm_kfree(dev, nic);
+               return -ENOMEM;
+       }
+
        pci_set_drvdata(pdev, nic);
 
        nic->pdev = pdev;
@@ -1047,13 +1301,12 @@ static int nic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 
        nic->node = nic_get_node_id(pdev);
 
-       nic_set_lmac_vf_mapping(nic);
-
        /* Initialize hardware */
-       nic_init_hw(nic);
+       err = nic_init_hw(nic);
+       if (err)
+               goto err_release_regions;
 
-       /* Set RSS TBL size for each VF */
-       nic->rss_ind_tbl_size = NIC_MAX_RSS_IDR_TBL_SIZE;
+       nic_set_lmac_vf_mapping(nic);
 
        /* Register interrupts */
        err = nic_register_interrupts(nic);
@@ -1086,6 +1339,9 @@ err_unregister_interrupts:
 err_release_regions:
        pci_release_regions(pdev);
 err_disable_device:
+       nic_free_lmacmem(nic);
+       devm_kfree(dev, nic->hw);
+       devm_kfree(dev, nic);
        pci_disable_device(pdev);
        pci_set_drvdata(pdev, NULL);
        return err;
@@ -1106,6 +1362,11 @@ static void nic_remove(struct pci_dev *pdev)
 
        nic_unregister_interrupts(nic);
        pci_release_regions(pdev);
+
+       nic_free_lmacmem(nic);
+       devm_kfree(&pdev->dev, nic->hw);
+       devm_kfree(&pdev->dev, nic);
+
        pci_disable_device(pdev);
        pci_set_drvdata(pdev, NULL);
 }
index afb10e3..db9c632 100644 (file)
 #define   NIC_PF_MAILBOX_ENA_W1C               (0x0450)
 #define   NIC_PF_MAILBOX_ENA_W1S               (0x0470)
 #define   NIC_PF_RX_ETYPE_0_7                  (0x0500)
+#define   NIC_PF_RX_GENEVE_DEF                 (0x0580)
+#define    UDP_GENEVE_PORT_NUM                         0x17C1ULL
+#define   NIC_PF_RX_GENEVE_PROT_DEF            (0x0588)
+#define    IPV6_PROT                                   0x86DDULL
+#define    IPV4_PROT                                   0x800ULL
+#define    ET_PROT                                     0x6558ULL
+#define   NIC_PF_RX_NVGRE_PROT_DEF             (0x0598)
+#define   NIC_PF_RX_VXLAN_DEF_0_1              (0x05A0)
+#define    UDP_VXLAN_PORT_NUM                          0x12B5
+#define   NIC_PF_RX_VXLAN_PROT_DEF             (0x05B0)
+#define    IPV6_PROT_DEF                               0x2ULL
+#define    IPV4_PROT_DEF                               0x1ULL
+#define    ET_PROT_DEF                                 0x3ULL
+#define   NIC_PF_RX_CFG                                (0x05D0)
 #define   NIC_PF_PKIND_0_15_CFG                        (0x0600)
 #define   NIC_PF_ECC0_FLIP0                    (0x1000)
 #define   NIC_PF_ECC1_FLIP0                    (0x1008)
 #define   NIC_PF_SW_SYNC_RX_DONE               (0x490008)
 #define   NIC_PF_TL2_0_63_CFG                  (0x500000)
 #define   NIC_PF_TL2_0_63_PRI                  (0x520000)
+#define   NIC_PF_TL2_LMAC                      (0x540000)
 #define   NIC_PF_TL2_0_63_SH_STATUS            (0x580000)
 #define   NIC_PF_TL3A_0_63_CFG                 (0x5F0000)
 #define   NIC_PF_TL3_0_255_CFG                 (0x600000)
index a19e73f..06c014e 100644 (file)
 static const struct pci_device_id nicvf_id_table[] = {
        { PCI_DEVICE_SUB(PCI_VENDOR_ID_CAVIUM,
                         PCI_DEVICE_ID_THUNDER_NIC_VF,
-                        PCI_VENDOR_ID_CAVIUM, 0xA134) },
+                        PCI_VENDOR_ID_CAVIUM,
+                        PCI_SUBSYS_DEVID_88XX_NIC_VF) },
        { PCI_DEVICE_SUB(PCI_VENDOR_ID_CAVIUM,
                         PCI_DEVICE_ID_THUNDER_PASS1_NIC_VF,
-                        PCI_VENDOR_ID_CAVIUM, 0xA11E) },
+                        PCI_VENDOR_ID_CAVIUM,
+                        PCI_SUBSYS_DEVID_88XX_PASS1_NIC_VF) },
+       { PCI_DEVICE_SUB(PCI_VENDOR_ID_CAVIUM,
+                        PCI_DEVICE_ID_THUNDER_NIC_VF,
+                        PCI_VENDOR_ID_CAVIUM,
+                        PCI_SUBSYS_DEVID_81XX_NIC_VF) },
+       { PCI_DEVICE_SUB(PCI_VENDOR_ID_CAVIUM,
+                        PCI_DEVICE_ID_THUNDER_NIC_VF,
+                        PCI_VENDOR_ID_CAVIUM,
+                        PCI_SUBSYS_DEVID_83XX_NIC_VF) },
        { 0, }  /* end of table */
 };
 
@@ -134,15 +144,19 @@ int nicvf_send_msg_to_pf(struct nicvf *nic, union nic_mbx *mbx)
 
        /* Wait for previous message to be acked, timeout 2sec */
        while (!nic->pf_acked) {
-               if (nic->pf_nacked)
+               if (nic->pf_nacked) {
+                       netdev_err(nic->netdev,
+                                  "PF NACK to mbox msg 0x%02x from VF%d\n",
+                                  (mbx->msg.msg & 0xFF), nic->vf_id);
                        return -EINVAL;
+               }
                msleep(sleep);
                if (nic->pf_acked)
                        break;
                timeout -= sleep;
                if (!timeout) {
                        netdev_err(nic->netdev,
-                                  "PF didn't ack to mbox msg %d from VF%d\n",
+                                  "PF didn't ACK to mbox msg 0x%02x from VF%d\n",
                                   (mbx->msg.msg & 0xFF), nic->vf_id);
                        return -EBUSY;
                }
@@ -352,13 +366,7 @@ static int nicvf_rss_init(struct nicvf *nic)
 
        rss->enable = true;
 
-       /* Using the HW reset value for now */
-       rss->key[0] = 0xFEED0BADFEED0BADULL;
-       rss->key[1] = 0xFEED0BADFEED0BADULL;
-       rss->key[2] = 0xFEED0BADFEED0BADULL;
-       rss->key[3] = 0xFEED0BADFEED0BADULL;
-       rss->key[4] = 0xFEED0BADFEED0BADULL;
-
+       netdev_rss_key_fill(rss->key, RSS_HASH_KEY_SIZE * sizeof(u64));
        nicvf_set_rss_key(nic);
 
        rss->cfg = RSS_IP_HASH_ENA | RSS_TCP_HASH_ENA | RSS_UDP_HASH_ENA;
@@ -507,7 +515,8 @@ static int nicvf_init_resources(struct nicvf *nic)
 
 static void nicvf_snd_pkt_handler(struct net_device *netdev,
                                  struct cmp_queue *cq,
-                                 struct cqe_send_t *cqe_tx, int cqe_type)
+                                 struct cqe_send_t *cqe_tx,
+                                 int cqe_type, int budget)
 {
        struct sk_buff *skb = NULL;
        struct nicvf *nic = netdev_priv(netdev);
@@ -531,7 +540,7 @@ static void nicvf_snd_pkt_handler(struct net_device *netdev,
        if (skb) {
                nicvf_put_sq_desc(sq, hdr->subdesc_cnt + 1);
                prefetch(skb);
-               dev_consume_skb_any(skb);
+               napi_consume_skb(skb, budget);
                sq->skbuff[cqe_tx->sqe_ptr] = (u64)NULL;
        } else {
                /* In case of HW TSO, HW sends a CQE for each segment of a TSO
@@ -686,7 +695,8 @@ loop:
                break;
                case CQE_TYPE_SEND:
                        nicvf_snd_pkt_handler(netdev, cq,
-                                             (void *)cq_desc, CQE_TYPE_SEND);
+                                             (void *)cq_desc, CQE_TYPE_SEND,
+                                             budget);
                        tx_done++;
                break;
                case CQE_TYPE_INVALID:
@@ -928,16 +938,19 @@ static int nicvf_register_interrupts(struct nicvf *nic)
        int vector;
 
        for_each_cq_irq(irq)
-               sprintf(nic->irq_name[irq], "NICVF%d CQ%d",
-                       nic->vf_id, irq);
+               sprintf(nic->irq_name[irq], "%s-rxtx-%d",
+                       nic->pnicvf->netdev->name,
+                       nicvf_netdev_qidx(nic, irq));
 
        for_each_sq_irq(irq)
-               sprintf(nic->irq_name[irq], "NICVF%d SQ%d",
-                       nic->vf_id, irq - NICVF_INTR_ID_SQ);
+               sprintf(nic->irq_name[irq], "%s-sq-%d",
+                       nic->pnicvf->netdev->name,
+                       nicvf_netdev_qidx(nic, irq - NICVF_INTR_ID_SQ));
 
        for_each_rbdr_irq(irq)
-               sprintf(nic->irq_name[irq], "NICVF%d RBDR%d",
-                       nic->vf_id, irq - NICVF_INTR_ID_RBDR);
+               sprintf(nic->irq_name[irq], "%s-rbdr-%d",
+                       nic->pnicvf->netdev->name,
+                       nic->sqs_mode ? (nic->sqs_id + 1) : 0);
 
        /* Register CQ interrupts */
        for (irq = 0; irq < nic->qs->cq_cnt; irq++) {
@@ -961,8 +974,9 @@ static int nicvf_register_interrupts(struct nicvf *nic)
        }
 
        /* Register QS error interrupt */
-       sprintf(nic->irq_name[NICVF_INTR_ID_QS_ERR],
-               "NICVF%d Qset error", nic->vf_id);
+       sprintf(nic->irq_name[NICVF_INTR_ID_QS_ERR], "%s-qset-err-%d",
+               nic->pnicvf->netdev->name,
+               nic->sqs_mode ? (nic->sqs_id + 1) : 0);
        irq = NICVF_INTR_ID_QS_ERR;
        ret = request_irq(nic->msix_entries[irq].vector,
                          nicvf_qs_err_intr_handler,
@@ -1191,7 +1205,7 @@ int nicvf_open(struct net_device *netdev)
        }
 
        /* Check if we got MAC address from PF or else generate a radom MAC */
-       if (is_zero_ether_addr(netdev->dev_addr)) {
+       if (!nic->sqs_mode && is_zero_ether_addr(netdev->dev_addr)) {
                eth_hw_addr_random(netdev);
                nicvf_hw_set_mac_addr(nic, netdev);
        }
@@ -1527,14 +1541,13 @@ static int nicvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
                goto err_release_regions;
        }
 
-       qcount = MAX_CMP_QUEUES_PER_QS;
+       qcount = netif_get_num_default_rss_queues();
 
        /* Restrict multiqset support only for host bound VFs */
        if (pdev->is_virtfn) {
                /* Set max number of queues per VF */
-               qcount = roundup(num_online_cpus(), MAX_CMP_QUEUES_PER_QS);
-               qcount = min(qcount,
-                            (MAX_SQS_PER_VF + 1) * MAX_CMP_QUEUES_PER_QS);
+               qcount = min_t(int, num_online_cpus(),
+                              (MAX_SQS_PER_VF + 1) * MAX_CMP_QUEUES_PER_QS);
        }
 
        netdev = alloc_etherdev_mqs(sizeof(struct nicvf), qcount, qcount);
index 0ff8e60..7d90856 100644 (file)
@@ -479,6 +479,16 @@ void nicvf_config_vlan_stripping(struct nicvf *nic, netdev_features_t features)
                                              NIC_QSET_RQ_GEN_CFG, 0, rq_cfg);
 }
 
+static void nicvf_reset_rcv_queue_stats(struct nicvf *nic)
+{
+       union nic_mbx mbx = {};
+
+       /* Reset all RXQ's stats */
+       mbx.reset_stat.msg = NIC_MBOX_MSG_RESET_STAT_COUNTER;
+       mbx.reset_stat.rq_stat_mask = 0xFFFF;
+       nicvf_send_msg_to_pf(nic, &mbx);
+}
+
 /* Configures receive queue */
 static void nicvf_rcv_queue_config(struct nicvf *nic, struct queue_set *qs,
                                   int qidx, bool enable)
@@ -762,10 +772,10 @@ int nicvf_set_qset_resources(struct nicvf *nic)
        nic->qs = qs;
 
        /* Set count of each queue */
-       qs->rbdr_cnt = RBDR_CNT;
-       qs->rq_cnt = RCV_QUEUE_CNT;
-       qs->sq_cnt = SND_QUEUE_CNT;
-       qs->cq_cnt = CMP_QUEUE_CNT;
+       qs->rbdr_cnt = DEFAULT_RBDR_CNT;
+       qs->rq_cnt = min_t(u8, MAX_RCV_QUEUES_PER_QS, num_online_cpus());
+       qs->sq_cnt = min_t(u8, MAX_SND_QUEUES_PER_QS, num_online_cpus());
+       qs->cq_cnt = max_t(u8, qs->rq_cnt, qs->sq_cnt);
 
        /* Set queue lengths */
        qs->rbdr_len = RCV_BUF_COUNT;
@@ -812,6 +822,11 @@ int nicvf_config_data_transfer(struct nicvf *nic, bool enable)
                nicvf_free_resources(nic);
        }
 
+       /* Reset RXQ's stats.
+        * SQ's stats will get reset automatically once SQ is reset.
+        */
+       nicvf_reset_rcv_queue_stats(nic);
+
        return 0;
 }
 
@@ -1184,13 +1199,23 @@ struct sk_buff *nicvf_get_rcv_skb(struct nicvf *nic, struct cqe_rx_t *cqe_rx)
        int frag;
        int payload_len = 0;
        struct sk_buff *skb = NULL;
-       struct sk_buff *skb_frag = NULL;
-       struct sk_buff *prev_frag = NULL;
+       struct page *page;
+       int offset;
        u16 *rb_lens = NULL;
        u64 *rb_ptrs = NULL;
 
        rb_lens = (void *)cqe_rx + (3 * sizeof(u64));
-       rb_ptrs = (void *)cqe_rx + (6 * sizeof(u64));
+       /* Except 88xx pass1 on all other chips CQE_RX2_S is added to
+        * CQE_RX at word6, hence buffer pointers move by word
+        *
+        * Use existing 'hw_tso' flag which will be set for all chips
+        * except 88xx pass1 instead of a additional cache line
+        * access (or miss) by using pci dev's revision.
+        */
+       if (!nic->hw_tso)
+               rb_ptrs = (void *)cqe_rx + (6 * sizeof(u64));
+       else
+               rb_ptrs = (void *)cqe_rx + (7 * sizeof(u64));
 
        netdev_dbg(nic->netdev, "%s rb_cnt %d rb0_ptr %llx rb0_sz %d\n",
                   __func__, cqe_rx->rb_cnt, cqe_rx->rb0_ptr, cqe_rx->rb0_sz);
@@ -1208,22 +1233,10 @@ struct sk_buff *nicvf_get_rcv_skb(struct nicvf *nic, struct cqe_rx_t *cqe_rx)
                        skb_put(skb, payload_len);
                } else {
                        /* Add fragments */
-                       skb_frag = nicvf_rb_ptr_to_skb(nic, *rb_ptrs,
-                                                      payload_len);
-                       if (!skb_frag) {
-                               dev_kfree_skb(skb);
-                               return NULL;
-                       }
-
-                       if (!skb_shinfo(skb)->frag_list)
-                               skb_shinfo(skb)->frag_list = skb_frag;
-                       else
-                               prev_frag->next = skb_frag;
-
-                       prev_frag = skb_frag;
-                       skb->len += payload_len;
-                       skb->data_len += payload_len;
-                       skb_frag->len = payload_len;
+                       page = virt_to_page(phys_to_virt(*rb_ptrs));
+                       offset = phys_to_virt(*rb_ptrs) - page_address(page);
+                       skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page,
+                                       offset, payload_len, RCV_FRAG_LEN);
                }
                /* Next buffer pointer */
                rb_ptrs++;
index 6673e11..869f338 100644 (file)
 #define CMP_QUEUE_SIZE6                6ULL /* 64K entries */
 
 /* Default queue count per QS, its lengths and threshold values */
-#define RBDR_CNT               1
-#define RCV_QUEUE_CNT          8
-#define SND_QUEUE_CNT          8
-#define CMP_QUEUE_CNT          8 /* Max of RCV and SND qcount */
+#define DEFAULT_RBDR_CNT       1
 
 #define SND_QSIZE              SND_QUEUE_SIZE2
 #define SND_QUEUE_LEN          (1ULL << (SND_QSIZE + 10))
index 63a39ac..8bbaedb 100644 (file)
@@ -28,6 +28,9 @@ struct lmac {
        struct bgx              *bgx;
        int                     dmac;
        u8                      mac[ETH_ALEN];
+       u8                      lmac_type;
+       u8                      lane_to_sds;
+       bool                    use_training;
        bool                    link_up;
        int                     lmacid; /* ID within BGX */
        int                     lmacid_bd; /* ID on board */
@@ -43,14 +46,13 @@ struct lmac {
 
 struct bgx {
        u8                      bgx_id;
-       u8                      qlm_mode;
        struct  lmac            lmac[MAX_LMAC_PER_BGX];
        int                     lmac_count;
-       int                     lmac_type;
-       int                     lane_to_sds;
-       int                     use_training;
+       u8                      max_lmac;
        void __iomem            *reg_base;
        struct pci_dev          *pdev;
+       bool                    is_dlm;
+       bool                    is_rgx;
 };
 
 static struct bgx *bgx_vnic[MAX_BGX_THUNDER];
@@ -61,6 +63,7 @@ static int bgx_xaui_check_link(struct lmac *lmac);
 /* Supported devices */
 static const struct pci_device_id bgx_id_table[] = {
        { PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVICE_ID_THUNDER_BGX) },
+       { PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVICE_ID_THUNDER_RGX) },
        { 0, }  /* end of table */
 };
 
@@ -124,8 +127,8 @@ unsigned bgx_get_map(int node)
        int i;
        unsigned map = 0;
 
-       for (i = 0; i < MAX_BGX_PER_CN88XX; i++) {
-               if (bgx_vnic[(node * MAX_BGX_PER_CN88XX) + i])
+       for (i = 0; i < MAX_BGX_PER_NODE; i++) {
+               if (bgx_vnic[(node * MAX_BGX_PER_NODE) + i])
                        map |= (1 << i);
        }
 
@@ -138,7 +141,7 @@ int bgx_get_lmac_count(int node, int bgx_idx)
 {
        struct bgx *bgx;
 
-       bgx = bgx_vnic[(node * MAX_BGX_PER_CN88XX) + bgx_idx];
+       bgx = bgx_vnic[(node * MAX_BGX_PER_NODE) + bgx_idx];
        if (bgx)
                return bgx->lmac_count;
 
@@ -153,7 +156,7 @@ void bgx_get_lmac_link_state(int node, int bgx_idx, int lmacid, void *status)
        struct bgx *bgx;
        struct lmac *lmac;
 
-       bgx = bgx_vnic[(node * MAX_BGX_PER_CN88XX) + bgx_idx];
+       bgx = bgx_vnic[(node * MAX_BGX_PER_NODE) + bgx_idx];
        if (!bgx)
                return;
 
@@ -166,7 +169,7 @@ EXPORT_SYMBOL(bgx_get_lmac_link_state);
 
 const u8 *bgx_get_lmac_mac(int node, int bgx_idx, int lmacid)
 {
-       struct bgx *bgx = bgx_vnic[(node * MAX_BGX_PER_CN88XX) + bgx_idx];
+       struct bgx *bgx = bgx_vnic[(node * MAX_BGX_PER_NODE) + bgx_idx];
 
        if (bgx)
                return bgx->lmac[lmacid].mac;
@@ -177,7 +180,7 @@ EXPORT_SYMBOL(bgx_get_lmac_mac);
 
 void bgx_set_lmac_mac(int node, int bgx_idx, int lmacid, const u8 *mac)
 {
-       struct bgx *bgx = bgx_vnic[(node * MAX_BGX_PER_CN88XX) + bgx_idx];
+       struct bgx *bgx = bgx_vnic[(node * MAX_BGX_PER_NODE) + bgx_idx];
 
        if (!bgx)
                return;
@@ -188,11 +191,13 @@ EXPORT_SYMBOL(bgx_set_lmac_mac);
 
 void bgx_lmac_rx_tx_enable(int node, int bgx_idx, int lmacid, bool enable)
 {
-       struct bgx *bgx = bgx_vnic[(node * MAX_BGX_PER_CN88XX) + bgx_idx];
+       struct bgx *bgx = bgx_vnic[(node * MAX_BGX_PER_NODE) + bgx_idx];
+       struct lmac *lmac;
        u64 cfg;
 
        if (!bgx)
                return;
+       lmac = &bgx->lmac[lmacid];
 
        cfg = bgx_reg_read(bgx, lmacid, BGX_CMRX_CFG);
        if (enable)
@@ -200,6 +205,9 @@ void bgx_lmac_rx_tx_enable(int node, int bgx_idx, int lmacid, bool enable)
        else
                cfg &= ~(CMR_PKT_RX_EN | CMR_PKT_TX_EN);
        bgx_reg_write(bgx, lmacid, BGX_CMRX_CFG, cfg);
+
+       if (bgx->is_rgx)
+               xcv_setup_link(enable ? lmac->link_up : 0, lmac->last_speed);
 }
 EXPORT_SYMBOL(bgx_lmac_rx_tx_enable);
 
@@ -266,9 +274,12 @@ static void bgx_sgmii_change_link_state(struct lmac *lmac)
 
        port_cfg = bgx_reg_read(bgx, lmac->lmacid, BGX_GMP_GMI_PRTX_CFG);
 
-       /* renable lmac */
+       /* Re-enable lmac */
        cmr_cfg |= CMR_EN;
        bgx_reg_write(bgx, lmac->lmacid, BGX_CMRX_CFG, cmr_cfg);
+
+       if (bgx->is_rgx && (cmr_cfg & (CMR_PKT_RX_EN | CMR_PKT_TX_EN)))
+               xcv_setup_link(lmac->link_up, lmac->last_speed);
 }
 
 static void bgx_lmac_handler(struct net_device *netdev)
@@ -314,7 +325,7 @@ u64 bgx_get_rx_stats(int node, int bgx_idx, int lmac, int idx)
 {
        struct bgx *bgx;
 
-       bgx = bgx_vnic[(node * MAX_BGX_PER_CN88XX) + bgx_idx];
+       bgx = bgx_vnic[(node * MAX_BGX_PER_NODE) + bgx_idx];
        if (!bgx)
                return 0;
 
@@ -328,7 +339,7 @@ u64 bgx_get_tx_stats(int node, int bgx_idx, int lmac, int idx)
 {
        struct bgx *bgx;
 
-       bgx = bgx_vnic[(node * MAX_BGX_PER_CN88XX) + bgx_idx];
+       bgx = bgx_vnic[(node * MAX_BGX_PER_NODE) + bgx_idx];
        if (!bgx)
                return 0;
 
@@ -356,7 +367,7 @@ void bgx_lmac_internal_loopback(int node, int bgx_idx,
        struct lmac *lmac;
        u64    cfg;
 
-       bgx = bgx_vnic[(node * MAX_BGX_PER_CN88XX) + bgx_idx];
+       bgx = bgx_vnic[(node * MAX_BGX_PER_NODE) + bgx_idx];
        if (!bgx)
                return;
 
@@ -379,8 +390,9 @@ void bgx_lmac_internal_loopback(int node, int bgx_idx,
 }
 EXPORT_SYMBOL(bgx_lmac_internal_loopback);
 
-static int bgx_lmac_sgmii_init(struct bgx *bgx, int lmacid)
+static int bgx_lmac_sgmii_init(struct bgx *bgx, struct lmac *lmac)
 {
+       int lmacid = lmac->lmacid;
        u64 cfg;
 
        bgx_reg_modify(bgx, lmacid, BGX_GMP_GMI_TXX_THRESH, 0x30);
@@ -409,18 +421,29 @@ static int bgx_lmac_sgmii_init(struct bgx *bgx, int lmacid)
        cfg |= (PCS_MRX_CTL_RST_AN | PCS_MRX_CTL_AN_EN);
        bgx_reg_write(bgx, lmacid, BGX_GMP_PCS_MRX_CTL, cfg);
 
-       if (bgx_poll_reg(bgx, lmacid, BGX_GMP_PCS_MRX_STATUS,
-                        PCS_MRX_STATUS_AN_CPT, false)) {
-               dev_err(&bgx->pdev->dev, "BGX AN_CPT not completed\n");
-               return -1;
+       if (lmac->lmac_type == BGX_MODE_QSGMII) {
+               /* Disable disparity check for QSGMII */
+               cfg = bgx_reg_read(bgx, lmacid, BGX_GMP_PCS_MISCX_CTL);
+               cfg &= ~PCS_MISC_CTL_DISP_EN;
+               bgx_reg_write(bgx, lmacid, BGX_GMP_PCS_MISCX_CTL, cfg);
+               return 0;
+       }
+
+       if (lmac->lmac_type == BGX_MODE_SGMII) {
+               if (bgx_poll_reg(bgx, lmacid, BGX_GMP_PCS_MRX_STATUS,
+                                PCS_MRX_STATUS_AN_CPT, false)) {
+                       dev_err(&bgx->pdev->dev, "BGX AN_CPT not completed\n");
+                       return -1;
+               }
        }
 
        return 0;
 }
 
-static int bgx_lmac_xaui_init(struct bgx *bgx, int lmacid, int lmac_type)
+static int bgx_lmac_xaui_init(struct bgx *bgx, struct lmac *lmac)
 {
        u64 cfg;
+       int lmacid = lmac->lmacid;
 
        /* Reset SPU */
        bgx_reg_modify(bgx, lmacid, BGX_SPUX_CONTROL1, SPU_CTL_RESET);
@@ -436,12 +459,14 @@ static int bgx_lmac_xaui_init(struct bgx *bgx, int lmacid, int lmac_type)
 
        bgx_reg_modify(bgx, lmacid, BGX_SPUX_CONTROL1, SPU_CTL_LOW_POWER);
        /* Set interleaved running disparity for RXAUI */
-       if (bgx->lmac_type != BGX_MODE_RXAUI)
-               bgx_reg_modify(bgx, lmacid,
-                              BGX_SPUX_MISC_CONTROL, SPU_MISC_CTL_RX_DIS);
-       else
+       if (lmac->lmac_type == BGX_MODE_RXAUI)
                bgx_reg_modify(bgx, lmacid, BGX_SPUX_MISC_CONTROL,
-                              SPU_MISC_CTL_RX_DIS | SPU_MISC_CTL_INTLV_RDISP);
+                              SPU_MISC_CTL_INTLV_RDISP);
+
+       /* Clear receive packet disable */
+       cfg = bgx_reg_read(bgx, lmacid, BGX_SPUX_MISC_CONTROL);
+       cfg &= ~SPU_MISC_CTL_RX_DIS;
+       bgx_reg_write(bgx, lmacid, BGX_SPUX_MISC_CONTROL, cfg);
 
        /* clear all interrupts */
        cfg = bgx_reg_read(bgx, lmacid, BGX_SMUX_RX_INT);
@@ -451,7 +476,7 @@ static int bgx_lmac_xaui_init(struct bgx *bgx, int lmacid, int lmac_type)
        cfg = bgx_reg_read(bgx, lmacid, BGX_SPUX_INT);
        bgx_reg_write(bgx, lmacid, BGX_SPUX_INT, cfg);
 
-       if (bgx->use_training) {
+       if (lmac->use_training) {
                bgx_reg_write(bgx, lmacid, BGX_SPUX_BR_PMD_LP_CUP, 0x00);
                bgx_reg_write(bgx, lmacid, BGX_SPUX_BR_PMD_LD_CUP, 0x00);
                bgx_reg_write(bgx, lmacid, BGX_SPUX_BR_PMD_LD_REP, 0x00);
@@ -474,9 +499,9 @@ static int bgx_lmac_xaui_init(struct bgx *bgx, int lmacid, int lmac_type)
        bgx_reg_write(bgx, lmacid, BGX_SPUX_AN_CONTROL, cfg);
 
        cfg = bgx_reg_read(bgx, lmacid, BGX_SPUX_AN_ADV);
-       if (bgx->lmac_type == BGX_MODE_10G_KR)
+       if (lmac->lmac_type == BGX_MODE_10G_KR)
                cfg |= (1 << 23);
-       else if (bgx->lmac_type == BGX_MODE_40G_KR)
+       else if (lmac->lmac_type == BGX_MODE_40G_KR)
                cfg |= (1 << 24);
        else
                cfg &= ~((1 << 23) | (1 << 24));
@@ -511,11 +536,10 @@ static int bgx_xaui_check_link(struct lmac *lmac)
 {
        struct bgx *bgx = lmac->bgx;
        int lmacid = lmac->lmacid;
-       int lmac_type = bgx->lmac_type;
+       int lmac_type = lmac->lmac_type;
        u64 cfg;
 
-       bgx_reg_modify(bgx, lmacid, BGX_SPUX_MISC_CONTROL, SPU_MISC_CTL_RX_DIS);
-       if (bgx->use_training) {
+       if (lmac->use_training) {
                cfg = bgx_reg_read(bgx, lmacid, BGX_SPUX_INT);
                if (!(cfg & (1ull << 13))) {
                        cfg = (1ull << 13) | (1ull << 14);
@@ -556,7 +580,7 @@ static int bgx_xaui_check_link(struct lmac *lmac)
                               BGX_SPUX_STATUS2, SPU_STATUS2_RCVFLT);
        if (bgx_reg_read(bgx, lmacid, BGX_SPUX_STATUS2) & SPU_STATUS2_RCVFLT) {
                dev_err(&bgx->pdev->dev, "Receive fault, retry training\n");
-               if (bgx->use_training) {
+               if (lmac->use_training) {
                        cfg = bgx_reg_read(bgx, lmacid, BGX_SPUX_INT);
                        if (!(cfg & (1ull << 13))) {
                                cfg = (1ull << 13) | (1ull << 14);
@@ -584,11 +608,6 @@ static int bgx_xaui_check_link(struct lmac *lmac)
                return -1;
        }
 
-       /* Clear receive packet disable */
-       cfg = bgx_reg_read(bgx, lmacid, BGX_SPUX_MISC_CONTROL);
-       cfg &= ~SPU_MISC_CTL_RX_DIS;
-       bgx_reg_write(bgx, lmacid, BGX_SPUX_MISC_CONTROL, cfg);
-
        /* Check for MAC RX faults */
        cfg = bgx_reg_read(bgx, lmacid, BGX_SMUX_RX_CTL);
        /* 0 - Link is okay, 1 - Local fault, 2 - Remote fault */
@@ -599,7 +618,7 @@ static int bgx_xaui_check_link(struct lmac *lmac)
        /* Rx local/remote fault seen.
         * Do lmac reinit to see if condition recovers
         */
-       bgx_lmac_xaui_init(bgx, lmacid, bgx->lmac_type);
+       bgx_lmac_xaui_init(bgx, lmac);
 
        return -1;
 }
@@ -623,7 +642,7 @@ static void bgx_poll_for_link(struct work_struct *work)
        if ((spu_link & SPU_STATUS1_RCV_LNK) &&
            !(smu_link & SMU_RX_CTL_STATUS)) {
                lmac->link_up = 1;
-               if (lmac->bgx->lmac_type == BGX_MODE_XLAUI)
+               if (lmac->lmac_type == BGX_MODE_XLAUI)
                        lmac->last_speed = 40000;
                else
                        lmac->last_speed = 10000;
@@ -649,6 +668,16 @@ static void bgx_poll_for_link(struct work_struct *work)
        queue_delayed_work(lmac->check_link, &lmac->dwork, HZ * 2);
 }
 
+static int phy_interface_mode(u8 lmac_type)
+{
+       if (lmac_type == BGX_MODE_QSGMII)
+               return PHY_INTERFACE_MODE_QSGMII;
+       if (lmac_type == BGX_MODE_RGMII)
+               return PHY_INTERFACE_MODE_RGMII;
+
+       return PHY_INTERFACE_MODE_SGMII;
+}
+
 static int bgx_lmac_enable(struct bgx *bgx, u8 lmacid)
 {
        struct lmac *lmac;
@@ -657,13 +686,15 @@ static int bgx_lmac_enable(struct bgx *bgx, u8 lmacid)
        lmac = &bgx->lmac[lmacid];
        lmac->bgx = bgx;
 
-       if (bgx->lmac_type == BGX_MODE_SGMII) {
+       if ((lmac->lmac_type == BGX_MODE_SGMII) ||
+           (lmac->lmac_type == BGX_MODE_QSGMII) ||
+           (lmac->lmac_type == BGX_MODE_RGMII)) {
                lmac->is_sgmii = 1;
-               if (bgx_lmac_sgmii_init(bgx, lmacid))
+               if (bgx_lmac_sgmii_init(bgx, lmac))
                        return -1;
        } else {
                lmac->is_sgmii = 0;
-               if (bgx_lmac_xaui_init(bgx, lmacid, bgx->lmac_type))
+               if (bgx_lmac_xaui_init(bgx, lmac))
                        return -1;
        }
 
@@ -685,10 +716,10 @@ static int bgx_lmac_enable(struct bgx *bgx, u8 lmacid)
        /* Restore default cfg, incase low level firmware changed it */
        bgx_reg_write(bgx, lmacid, BGX_CMRX_RX_DMAC_CTL, 0x03);
 
-       if ((bgx->lmac_type != BGX_MODE_XFI) &&
-           (bgx->lmac_type != BGX_MODE_XLAUI) &&
-           (bgx->lmac_type != BGX_MODE_40G_KR) &&
-           (bgx->lmac_type != BGX_MODE_10G_KR)) {
+       if ((lmac->lmac_type != BGX_MODE_XFI) &&
+           (lmac->lmac_type != BGX_MODE_XLAUI) &&
+           (lmac->lmac_type != BGX_MODE_40G_KR) &&
+           (lmac->lmac_type != BGX_MODE_10G_KR)) {
                if (!lmac->phydev)
                        return -ENODEV;
 
@@ -696,7 +727,7 @@ static int bgx_lmac_enable(struct bgx *bgx, u8 lmacid)
 
                if (phy_connect_direct(&lmac->netdev, lmac->phydev,
                                       bgx_lmac_handler,
-                                      PHY_INTERFACE_MODE_SGMII))
+                                      phy_interface_mode(lmac->lmac_type)))
                        return -ENODEV;
 
                phy_start_aneg(lmac->phydev);
@@ -753,76 +784,19 @@ static void bgx_lmac_disable(struct bgx *bgx, u8 lmacid)
 
        bgx_flush_dmac_addrs(bgx, lmacid);
 
-       if ((bgx->lmac_type != BGX_MODE_XFI) &&
-           (bgx->lmac_type != BGX_MODE_XLAUI) &&
-           (bgx->lmac_type != BGX_MODE_40G_KR) &&
-           (bgx->lmac_type != BGX_MODE_10G_KR) && lmac->phydev)
+       if ((lmac->lmac_type != BGX_MODE_XFI) &&
+           (lmac->lmac_type != BGX_MODE_XLAUI) &&
+           (lmac->lmac_type != BGX_MODE_40G_KR) &&
+           (lmac->lmac_type != BGX_MODE_10G_KR) && lmac->phydev)
                phy_disconnect(lmac->phydev);
 
        lmac->phydev = NULL;
 }
 
-static void bgx_set_num_ports(struct bgx *bgx)
-{
-       u64 lmac_count;
-
-       switch (bgx->qlm_mode) {
-       case QLM_MODE_SGMII:
-               bgx->lmac_count = 4;
-               bgx->lmac_type = BGX_MODE_SGMII;
-               bgx->lane_to_sds = 0;
-               break;
-       case QLM_MODE_XAUI_1X4:
-               bgx->lmac_count = 1;
-               bgx->lmac_type = BGX_MODE_XAUI;
-               bgx->lane_to_sds = 0xE4;
-                       break;
-       case QLM_MODE_RXAUI_2X2:
-               bgx->lmac_count = 2;
-               bgx->lmac_type = BGX_MODE_RXAUI;
-               bgx->lane_to_sds = 0xE4;
-                       break;
-       case QLM_MODE_XFI_4X1:
-               bgx->lmac_count = 4;
-               bgx->lmac_type = BGX_MODE_XFI;
-               bgx->lane_to_sds = 0;
-               break;
-       case QLM_MODE_XLAUI_1X4:
-               bgx->lmac_count = 1;
-               bgx->lmac_type = BGX_MODE_XLAUI;
-               bgx->lane_to_sds = 0xE4;
-               break;
-       case QLM_MODE_10G_KR_4X1:
-               bgx->lmac_count = 4;
-               bgx->lmac_type = BGX_MODE_10G_KR;
-               bgx->lane_to_sds = 0;
-               bgx->use_training = 1;
-               break;
-       case QLM_MODE_40G_KR4_1X4:
-               bgx->lmac_count = 1;
-               bgx->lmac_type = BGX_MODE_40G_KR;
-               bgx->lane_to_sds = 0xE4;
-               bgx->use_training = 1;
-               break;
-       default:
-               bgx->lmac_count = 0;
-               break;
-       }
-
-       /* Check if low level firmware has programmed LMAC count
-        * based on board type, if yes consider that otherwise
-        * the default static values
-        */
-       lmac_count = bgx_reg_read(bgx, 0, BGX_CMR_RX_LMACS) & 0x7;
-       if (lmac_count != 4)
-               bgx->lmac_count = lmac_count;
-}
-
 static void bgx_init_hw(struct bgx *bgx)
 {
        int i;
-
-       bgx_set_num_ports(bgx);
+       struct lmac *lmac;
 
        bgx_reg_modify(bgx, 0, BGX_CMR_GLOBAL_CFG, CMR_GLOBAL_CFG_FCS_STRIP);
        if (bgx_reg_read(bgx, 0, BGX_CMR_BIST_STATUS))
@@ -830,17 +804,9 @@ static void bgx_init_hw(struct bgx *bgx)
 
        /* Set lmac type and lane2serdes mapping */
        for (i = 0; i < bgx->lmac_count; i++) {
-               if (bgx->lmac_type == BGX_MODE_RXAUI) {
-                       if (i)
-                               bgx->lane_to_sds = 0x0e;
-                       else
-                               bgx->lane_to_sds = 0x04;
-                       bgx_reg_write(bgx, i, BGX_CMRX_CFG,
-                                     (bgx->lmac_type << 8) | bgx->lane_to_sds);
-                       continue;
-               }
+               lmac = &bgx->lmac[i];
                bgx_reg_write(bgx, i, BGX_CMRX_CFG,
-                             (bgx->lmac_type << 8) | (bgx->lane_to_sds + i));
+                             (lmac->lmac_type << 8) | lmac->lane_to_sds);
                bgx->lmac[i].lmacid_bd = lmac_count;
                lmac_count++;
        }
@@ -863,55 +829,212 @@ static void bgx_init_hw(struct bgx *bgx)
                bgx_reg_write(bgx, 0, BGX_CMR_RX_STREERING + (i * 8), 0x00);
 }
 
-static void bgx_get_qlm_mode(struct bgx *bgx)
+static u8 bgx_get_lane2sds_cfg(struct bgx *bgx, struct lmac *lmac)
+{
+       return (u8)(bgx_reg_read(bgx, lmac->lmacid, BGX_CMRX_CFG) & 0xFF);
+}
+
+static void bgx_print_qlm_mode(struct bgx *bgx, u8 lmacid)
 {
        struct device *dev = &bgx->pdev->dev;
-       int lmac_type;
-       int train_en;
+       struct lmac *lmac;
+       char str[20];
+       u8 dlm;
 
-       /* Read LMAC0 type to figure out QLM mode
-        * This is configured by low level firmware
-        */
-       lmac_type = bgx_reg_read(bgx, 0, BGX_CMRX_CFG);
-       lmac_type = (lmac_type >> 8) & 0x07;
+       if (lmacid > bgx->max_lmac)
+               return;
 
-       train_en = bgx_reg_read(bgx, 0, BGX_SPUX_BR_PMD_CRTL) &
-                               SPU_PMD_CRTL_TRAIN_EN;
+       lmac = &bgx->lmac[lmacid];
+       dlm = (lmacid / 2) + (bgx->bgx_id * 2);
+       if (!bgx->is_dlm)
+               sprintf(str, "BGX%d QLM mode", bgx->bgx_id);
+       else
+               sprintf(str, "BGX%d DLM%d mode", bgx->bgx_id, dlm);
 
-       switch (lmac_type) {
+       switch (lmac->lmac_type) {
        case BGX_MODE_SGMII:
-               bgx->qlm_mode = QLM_MODE_SGMII;
-               dev_info(dev, "BGX%d QLM mode: SGMII\n", bgx->bgx_id);
+               dev_info(dev, "%s: SGMII\n", (char *)str);
                break;
        case BGX_MODE_XAUI:
-               bgx->qlm_mode = QLM_MODE_XAUI_1X4;
-               dev_info(dev, "BGX%d QLM mode: XAUI\n", bgx->bgx_id);
+               dev_info(dev, "%s: XAUI\n", (char *)str);
                break;
        case BGX_MODE_RXAUI:
-               bgx->qlm_mode = QLM_MODE_RXAUI_2X2;
-               dev_info(dev, "BGX%d QLM mode: RXAUI\n", bgx->bgx_id);
+               dev_info(dev, "%s: RXAUI\n", (char *)str);
                break;
        case BGX_MODE_XFI:
-               if (!train_en) {
-                       bgx->qlm_mode = QLM_MODE_XFI_4X1;
-                       dev_info(dev, "BGX%d QLM mode: XFI\n", bgx->bgx_id);
-               } else {
-                       bgx->qlm_mode = QLM_MODE_10G_KR_4X1;
-                       dev_info(dev, "BGX%d QLM mode: 10G_KR\n", bgx->bgx_id);
-               }
+               if (!lmac->use_training)
+                       dev_info(dev, "%s: XFI\n", (char *)str);
+               else
+                       dev_info(dev, "%s: 10G_KR\n", (char *)str);
                break;
        case BGX_MODE_XLAUI:
-               if (!train_en) {
-                       bgx->qlm_mode = QLM_MODE_XLAUI_1X4;
-                       dev_info(dev, "BGX%d QLM mode: XLAUI\n", bgx->bgx_id);
-               } else {
-                       bgx->qlm_mode = QLM_MODE_40G_KR4_1X4;
-                       dev_info(dev, "BGX%d QLM mode: 40G_KR4\n", bgx->bgx_id);
-               }
+               if (!lmac->use_training)
+                       dev_info(dev, "%s: XLAUI\n", (char *)str);
+               else
+                       dev_info(dev, "%s: 40G_KR4\n", (char *)str);
+               break;
+       case BGX_MODE_QSGMII:
+               if ((lmacid == 0) &&
+                   (bgx_get_lane2sds_cfg(bgx, lmac) != lmacid))
+                       return;
+               if ((lmacid == 2) &&
+                   (bgx_get_lane2sds_cfg(bgx, lmac) == lmacid))
+                       return;
+               dev_info(dev, "%s: QSGMII\n", (char *)str);
+               break;
+       case BGX_MODE_RGMII:
+               dev_info(dev, "%s: RGMII\n", (char *)str);
+               break;
+       case BGX_MODE_INVALID:
+               /* Nothing to do */
+               break;
+       }
+}
+
+static void lmac_set_lane2sds(struct bgx *bgx, struct lmac *lmac)
+{
+       switch (lmac->lmac_type) {
+       case BGX_MODE_SGMII:
+       case BGX_MODE_XFI:
+               lmac->lane_to_sds = lmac->lmacid;
+               break;
+       case BGX_MODE_XAUI:
+       case BGX_MODE_XLAUI:
+       case BGX_MODE_RGMII:
+               lmac->lane_to_sds = 0xE4;
+               break;
+       case BGX_MODE_RXAUI:
+               lmac->lane_to_sds = (lmac->lmacid) ? 0xE : 0x4;
+               break;
+       case BGX_MODE_QSGMII:
+               /* There is no way to determine if DLM0/2 is QSGMII or
+                * DLM1/3 is configured to QSGMII as bootloader will
+                * configure all LMACs, so take whatever is configured
+                * by low level firmware.
+                */
+               lmac->lane_to_sds = bgx_get_lane2sds_cfg(bgx, lmac);
                break;
        default:
-               bgx->qlm_mode = QLM_MODE_SGMII;
-               dev_info(dev, "BGX%d QLM default mode: SGMII\n", bgx->bgx_id);
+               lmac->lane_to_sds = 0;
+               break;
+       }
+}
+
+static void lmac_set_training(struct bgx *bgx, struct lmac *lmac, int lmacid)
+{
+       if ((lmac->lmac_type != BGX_MODE_10G_KR) &&
+           (lmac->lmac_type != BGX_MODE_40G_KR)) {
+               lmac->use_training = 0;
+               return;
+       }
+
+       lmac->use_training = bgx_reg_read(bgx, lmacid, BGX_SPUX_BR_PMD_CRTL) &
+                                                       SPU_PMD_CRTL_TRAIN_EN;
+}
+
+static void bgx_set_lmac_config(struct bgx *bgx, u8 idx)
+{
+       struct lmac *lmac;
+       struct lmac *olmac;
+       u64 cmr_cfg;
+       u8 lmac_type;
+       u8 lane_to_sds;
+
+       lmac = &bgx->lmac[idx];
+
+       if (!bgx->is_dlm || bgx->is_rgx) {
+               /* Read LMAC0 type to figure out QLM mode
+                * This is configured by low level firmware
+                */
+               cmr_cfg = bgx_reg_read(bgx, 0, BGX_CMRX_CFG);
+               lmac->lmac_type = (cmr_cfg >> 8) & 0x07;
+               if (bgx->is_rgx)
+                       lmac->lmac_type = BGX_MODE_RGMII;
+               lmac_set_training(bgx, lmac, 0);
+               lmac_set_lane2sds(bgx, lmac);
+               return;
+       }
+
+       /* On 81xx BGX can be split across 2 DLMs
+        * firmware programs lmac_type of LMAC0 and LMAC2
+        */
+       if ((idx == 0) || (idx == 2)) {
+               cmr_cfg = bgx_reg_read(bgx, idx, BGX_CMRX_CFG);
+               lmac_type = (u8)((cmr_cfg >> 8) & 0x07);
+               lane_to_sds = (u8)(cmr_cfg & 0xFF);
+               /* Check if config is not reset value */
+               if ((lmac_type == 0) && (lane_to_sds == 0xE4))
+                       lmac->lmac_type = BGX_MODE_INVALID;
+               else
+                       lmac->lmac_type = lmac_type;
+               lmac_set_training(bgx, lmac, lmac->lmacid);
+               lmac_set_lane2sds(bgx, lmac);
+
+               /* Set LMAC type of other lmac on same DLM i.e LMAC 1/3 */
+               olmac = &bgx->lmac[idx + 1];
+               olmac->lmac_type = lmac->lmac_type;
+               lmac_set_training(bgx, olmac, olmac->lmacid);
+               lmac_set_lane2sds(bgx, olmac);
+       }
+}
+
+static bool is_dlm0_in_bgx_mode(struct bgx *bgx)
+{
+       struct lmac *lmac;
+
+       if (!bgx->is_dlm)
+               return true;
+
+       lmac = &bgx->lmac[0];
+       if (lmac->lmac_type == BGX_MODE_INVALID)
+               return false;
+
+       return true;
+}
+
+static void bgx_get_qlm_mode(struct bgx *bgx)
+{
+       struct lmac *lmac;
+       struct lmac *lmac01;
+       struct lmac *lmac23;
+       u8  idx;
+
+       /* Init all LMAC's type to invalid */
+       for (idx = 0; idx < bgx->max_lmac; idx++) {
+               lmac = &bgx->lmac[idx];
+               lmac->lmacid = idx;
+               lmac->lmac_type = BGX_MODE_INVALID;
+               lmac->use_training = false;
+       }
+
+       /* It is assumed that low level firmware sets this value */
+       bgx->lmac_count = bgx_reg_read(bgx, 0, BGX_CMR_RX_LMACS) & 0x7;
+       if (bgx->lmac_count > bgx->max_lmac)
+               bgx->lmac_count = bgx->max_lmac;
+
+       for (idx = 0; idx < bgx->max_lmac; idx++)
+               bgx_set_lmac_config(bgx, idx);
+
+       if (!bgx->is_dlm || bgx->is_rgx) {
+               bgx_print_qlm_mode(bgx, 0);
+               return;
+       }
+
+       if (bgx->lmac_count) {
+               bgx_print_qlm_mode(bgx, 0);
+               bgx_print_qlm_mode(bgx, 2);
+       }
+
+       /* If DLM0 is not in BGX mode then LMAC0/1 have
+        * to be configured with serdes lanes of DLM1
+        */
+       if (is_dlm0_in_bgx_mode(bgx) || (bgx->lmac_count > 2))
+               return;
+       for (idx = 0; idx < bgx->lmac_count; idx++) {
+               lmac01 = &bgx->lmac[idx];
+               lmac23 = &bgx->lmac[idx + 2];
+               lmac01->lmac_type = lmac23->lmac_type;
+               lmac01->lane_to_sds = lmac23->lane_to_sds;
        }
 }
 
@@ -1042,7 +1165,7 @@ static int bgx_init_of_phy(struct bgx *bgx)
                }
 
                lmac++;
-               if (lmac == MAX_LMAC_PER_BGX) {
+               if (lmac == bgx->max_lmac) {
                        of_node_put(node);
                        break;
                }
@@ -1087,6 +1210,7 @@ static int bgx_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
        struct device *dev = &pdev->dev;
        struct bgx *bgx = NULL;
        u8 lmac;
+       u16 sdevid;
 
        bgx = devm_kzalloc(dev, sizeof(*bgx), GFP_KERNEL);
        if (!bgx)
@@ -1115,10 +1239,30 @@ static int bgx_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
                err = -ENOMEM;
                goto err_release_regions;
        }
-       bgx->bgx_id = (pci_resource_start(pdev, PCI_CFG_REG_BAR_NUM) >> 24) & 1;
-       bgx->bgx_id += nic_get_node_id(pdev) * MAX_BGX_PER_CN88XX;
 
-       bgx_vnic[bgx->bgx_id] = bgx;
+       pci_read_config_word(pdev, PCI_DEVICE_ID, &sdevid);
+       if (sdevid != PCI_DEVICE_ID_THUNDER_RGX) {
+               bgx->bgx_id =
+                   (pci_resource_start(pdev, PCI_CFG_REG_BAR_NUM) >> 24) & 1;
+               bgx->bgx_id += nic_get_node_id(pdev) * MAX_BGX_PER_NODE;
+               bgx->max_lmac = MAX_LMAC_PER_BGX;
+               bgx_vnic[bgx->bgx_id] = bgx;
+       } else {
+               bgx->is_rgx = true;
+               bgx->max_lmac = 1;
+               bgx->bgx_id = MAX_BGX_PER_CN81XX - 1;
+               bgx_vnic[bgx->bgx_id] = bgx;
+               xcv_init_hw();
+       }
+
+       /* On 81xx all are DLMs and on 83xx there are 3 BGX QLMs and one
+        * BGX i.e BGX2 can be split across 2 DLMs.
+        */
+       pci_read_config_word(pdev, PCI_SUBSYSTEM_ID, &sdevid);
+       if ((sdevid == PCI_SUBSYS_DEVID_81XX_BGX) ||
+           ((sdevid == PCI_SUBSYS_DEVID_83XX_BGX) && (bgx->bgx_id == 2)))
+               bgx->is_dlm = true;
+
        bgx_get_qlm_mode(bgx);
 
        err = bgx_init_phy(bgx);
@@ -1133,6 +1277,8 @@ static int bgx_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
                if (err) {
                        dev_err(dev, "BGX%d failed to enable lmac%d\n",
                                bgx->bgx_id, lmac);
+                       while (lmac)
+                               bgx_lmac_disable(bgx, --lmac);
                        goto err_enable;
                }
        }
index 42010d2..d59c71e 100644 (file)
@@ -9,8 +9,20 @@
 #ifndef THUNDER_BGX_H
 #define THUNDER_BGX_H
 
-#define    MAX_BGX_THUNDER                     8 /* Max 4 nodes, 2 per node */
+/* PCI device ID */
+#define        PCI_DEVICE_ID_THUNDER_BGX               0xA026
+#define        PCI_DEVICE_ID_THUNDER_RGX               0xA054
+
+/* Subsystem device IDs */
+#define PCI_SUBSYS_DEVID_88XX_BGX              0xA126
+#define PCI_SUBSYS_DEVID_81XX_BGX              0xA226
+#define PCI_SUBSYS_DEVID_83XX_BGX              0xA326
+
+#define    MAX_BGX_THUNDER                     8 /* Max 2 nodes, 4 per node */
 #define    MAX_BGX_PER_CN88XX                  2
+#define    MAX_BGX_PER_CN81XX                  3 /* 2 BGXs + 1 RGX */
+#define    MAX_BGX_PER_CN83XX                  4
+#define    MAX_BGX_PER_NODE                    4
 #define    MAX_LMAC_PER_BGX                    4
 #define    MAX_BGX_CHANS_PER_LMAC              16
 #define    MAX_DMAC_PER_LMAC                   8
@@ -18,8 +30,6 @@
 
 #define    MAX_DMAC_PER_LMAC_TNS_BYPASS_MODE   2
 
-#define    MAX_LMAC    (MAX_BGX_PER_CN88XX * MAX_LMAC_PER_BGX)
-
 /* Registers */
 #define BGX_CMRX_CFG                   0x00
 #define  CMR_PKT_TX_EN                         BIT_ULL(13)
 #define BGX_GMP_PCS_ANX_AN_RESULTS     0x30020
 #define BGX_GMP_PCS_SGM_AN_ADV         0x30068
 #define BGX_GMP_PCS_MISCX_CTL          0x30078
+#define  PCS_MISC_CTL_DISP_EN                  BIT_ULL(13)
 #define  PCS_MISC_CTL_GMX_ENO                  BIT_ULL(11)
 #define  PCS_MISC_CTL_SAMP_PT_MASK     0x7Full
 #define BGX_GMP_GMI_PRTX_CFG           0x38020
@@ -194,6 +205,9 @@ void bgx_set_lmac_mac(int node, int bgx_idx, int lmacid, const u8 *mac);
 void bgx_get_lmac_link_state(int node, int bgx_idx, int lmacid, void *status);
 void bgx_lmac_internal_loopback(int node, int bgx_idx,
                                int lmac_idx, bool enable);
+void xcv_init_hw(void);
+void xcv_setup_link(bool link_up, int link_speed);
+
 u64 bgx_get_rx_stats(int node, int bgx_idx, int lmac, int idx);
 u64 bgx_get_tx_stats(int node, int bgx_idx, int lmac, int idx);
 #define BGX_RX_STATS_COUNT 11
@@ -213,16 +227,9 @@ enum LMAC_TYPE {
        BGX_MODE_XLAUI = 4, /* 4 lanes, 10.3125 Gbaud */
        BGX_MODE_10G_KR = 3,/* 1 lane, 10.3125 Gbaud */
        BGX_MODE_40G_KR = 4,/* 4 lanes, 10.3125 Gbaud */
-};
-
-enum qlm_mode {
-       QLM_MODE_SGMII,         /* SGMII, each lane independent */
-       QLM_MODE_XAUI_1X4,      /* 1 XAUI or DXAUI, 4 lanes */
-       QLM_MODE_RXAUI_2X2,     /* 2 RXAUI, 2 lanes each */
-       QLM_MODE_XFI_4X1,       /* 4 XFI, 1 lane each */
-       QLM_MODE_XLAUI_1X4,     /* 1 XLAUI, 4 lanes each */
-       QLM_MODE_10G_KR_4X1,    /* 4 10GBASE-KR, 1 lane each */
-       QLM_MODE_40G_KR4_1X4,   /* 1 40GBASE-KR4, 4 lanes each */
+       BGX_MODE_RGMII = 5,
+       BGX_MODE_QSGMII = 6,
+       BGX_MODE_INVALID = 7,
 };
 
 #endif /* THUNDER_BGX_H */
diff --git a/drivers/net/ethernet/cavium/thunder/thunder_xcv.c b/drivers/net/ethernet/cavium/thunder/thunder_xcv.c
new file mode 100644 (file)
index 0000000..67befed
--- /dev/null
@@ -0,0 +1,235 @@
+/*
+ * Copyright (C) 2016 Cavium, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License
+ * as published by the Free Software Foundation.
+ */
+
+#include <linux/acpi.h>
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/pci.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/phy.h>
+#include <linux/of.h>
+#include <linux/of_mdio.h>
+#include <linux/of_net.h>
+
+#include "nic.h"
+#include "thunder_bgx.h"
+
+#define DRV_NAME       "thunder-xcv"
+#define DRV_VERSION    "1.0"
+
+/* Register offsets */
+#define XCV_RESET              0x00
+#define   PORT_EN              BIT_ULL(63)
+#define   CLK_RESET            BIT_ULL(15)
+#define   DLL_RESET            BIT_ULL(11)
+#define   COMP_EN              BIT_ULL(7)
+#define   TX_PKT_RESET         BIT_ULL(3)
+#define   TX_DATA_RESET                BIT_ULL(2)
+#define   RX_PKT_RESET         BIT_ULL(1)
+#define   RX_DATA_RESET                BIT_ULL(0)
+#define XCV_DLL_CTL            0x10
+#define   CLKRX_BYP            BIT_ULL(23)
+#define   CLKTX_BYP            BIT_ULL(15)
+#define XCV_COMP_CTL           0x20
+#define   DRV_BYP              BIT_ULL(63)
+#define XCV_CTL                        0x30
+#define XCV_INT                        0x40
+#define XCV_INT_W1S            0x48
+#define XCV_INT_ENA_W1C                0x50
+#define XCV_INT_ENA_W1S                0x58
+#define XCV_INBND_STATUS       0x80
+#define XCV_BATCH_CRD_RET      0x100
+
+struct xcv {
+       void __iomem            *reg_base;
+       struct pci_dev          *pdev;
+};
+
+static struct xcv *xcv;
+
+/* Supported devices */
+static const struct pci_device_id xcv_id_table[] = {
+       { PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, 0xA056) },
+       { 0, }  /* end of table */
+};
+
+MODULE_AUTHOR("Cavium Inc");
+MODULE_DESCRIPTION("Cavium Thunder RGX/XCV Driver");
+MODULE_LICENSE("GPL v2");
+MODULE_VERSION(DRV_VERSION);
+MODULE_DEVICE_TABLE(pci, xcv_id_table);
+
+void xcv_init_hw(void)
+{
+       u64  cfg;
+
+       /* Take DLL out of reset */
+       cfg = readq_relaxed(xcv->reg_base + XCV_RESET);
+       cfg &= ~DLL_RESET;
+       writeq_relaxed(cfg, xcv->reg_base + XCV_RESET);
+
+       /* Take clock tree out of reset */
+       cfg = readq_relaxed(xcv->reg_base + XCV_RESET);
+       cfg &= ~CLK_RESET;
+       writeq_relaxed(cfg, xcv->reg_base + XCV_RESET);
+       /* Wait for DLL to lock */
+       msleep(1);
+
+       /* Configure DLL - enable or bypass
+        * TX no bypass, RX bypass
+        */
+       cfg = readq_relaxed(xcv->reg_base + XCV_DLL_CTL);
+       cfg &= ~0xFF03;
+       cfg |= CLKRX_BYP;
+       writeq_relaxed(cfg, xcv->reg_base + XCV_DLL_CTL);
+
+       /* Enable compensation controller and force the
+        * write to be visible to HW by readig back.
+        */
+       cfg = readq_relaxed(xcv->reg_base + XCV_RESET);
+       cfg |= COMP_EN;
+       writeq_relaxed(cfg, xcv->reg_base + XCV_RESET);
+       readq_relaxed(xcv->reg_base + XCV_RESET);
+       /* Wait for compensation state machine to lock */
+       msleep(10);
+
+       /* enable the XCV block */
+       cfg = readq_relaxed(xcv->reg_base + XCV_RESET);
+       cfg |= PORT_EN;
+       writeq_relaxed(cfg, xcv->reg_base + XCV_RESET);
+
+       cfg = readq_relaxed(xcv->reg_base + XCV_RESET);
+       cfg |= CLK_RESET;
+       writeq_relaxed(cfg, xcv->reg_base + XCV_RESET);
+}
+EXPORT_SYMBOL(xcv_init_hw);
+
+void xcv_setup_link(bool link_up, int link_speed)
+{
+       u64  cfg;
+       int speed = 2;
+
+       if (!xcv) {
+               dev_err(&xcv->pdev->dev,
+                       "XCV init not done, probe may have failed\n");
+               return;
+       }
+
+       if (link_speed == 100)
+               speed = 1;
+       else if (link_speed == 10)
+               speed = 0;
+
+       if (link_up) {
+               /* set operating speed */
+               cfg = readq_relaxed(xcv->reg_base + XCV_CTL);
+               cfg &= ~0x03;
+               cfg |= speed;
+               writeq_relaxed(cfg, xcv->reg_base + XCV_CTL);
+
+               /* Reset datapaths */
+               cfg = readq_relaxed(xcv->reg_base + XCV_RESET);
+               cfg |= TX_DATA_RESET | RX_DATA_RESET;
+               writeq_relaxed(cfg, xcv->reg_base + XCV_RESET);
+
+               /* Enable the packet flow */
+               cfg = readq_relaxed(xcv->reg_base + XCV_RESET);
+               cfg |= TX_PKT_RESET | RX_PKT_RESET;
+               writeq_relaxed(cfg, xcv->reg_base + XCV_RESET);
+
+               /* Return credits to RGX */
+               writeq_relaxed(0x01, xcv->reg_base + XCV_BATCH_CRD_RET);
+       } else {
+               /* Disable packet flow */
+               cfg = readq_relaxed(xcv->reg_base + XCV_RESET);
+               cfg &= ~(TX_PKT_RESET | RX_PKT_RESET);
+               writeq_relaxed(cfg, xcv->reg_base + XCV_RESET);
+               readq_relaxed(xcv->reg_base + XCV_RESET);
+       }
+}
+EXPORT_SYMBOL(xcv_setup_link);
+
+static int xcv_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
+{
+       int err;
+       struct device *dev = &pdev->dev;
+
+       xcv = devm_kzalloc(dev, sizeof(struct xcv), GFP_KERNEL);
+       if (!xcv)
+               return -ENOMEM;
+       xcv->pdev = pdev;
+
+       pci_set_drvdata(pdev, xcv);
+
+       err = pci_enable_device(pdev);
+       if (err) {
+               dev_err(dev, "Failed to enable PCI device\n");
+               goto err_kfree;
+       }
+
+       err = pci_request_regions(pdev, DRV_NAME);
+       if (err) {
+               dev_err(dev, "PCI request regions failed 0x%x\n", err);
+               goto err_disable_device;
+       }
+
+       /* MAP configuration registers */
+       xcv->reg_base = pcim_iomap(pdev, PCI_CFG_REG_BAR_NUM, 0);
+       if (!xcv->reg_base) {
+               dev_err(dev, "XCV: Cannot map CSR memory space, aborting\n");
+               err = -ENOMEM;
+               goto err_release_regions;
+       }
+
+       return 0;
+
+err_release_regions:
+       pci_release_regions(pdev);
+err_disable_device:
+       pci_disable_device(pdev);
+err_kfree:
+       devm_kfree(dev, xcv);
+       xcv = NULL;
+       return err;
+}
+
+static void xcv_remove(struct pci_dev *pdev)
+{
+       struct device *dev = &pdev->dev;
+
+       if (xcv) {
+               devm_kfree(dev, xcv);
+               xcv = NULL;
+       }
+
+       pci_release_regions(pdev);
+       pci_disable_device(pdev);
+}
+
+static struct pci_driver xcv_driver = {
+       .name = DRV_NAME,
+       .id_table = xcv_id_table,
+       .probe = xcv_probe,
+       .remove = xcv_remove,
+};
+
+static int __init xcv_init_module(void)
+{
+       pr_info("%s, ver %s\n", DRV_NAME, DRV_VERSION);
+
+       return pci_register_driver(&xcv_driver);
+}
+
+static void __exit xcv_cleanup_module(void)
+{
+       pci_unregister_driver(&xcv_driver);
+}
+
+module_init(xcv_init_module);
+module_exit(xcv_cleanup_module);
index 2e2aa9f..bcfa512 100644 (file)
@@ -1521,4 +1521,7 @@ void t4_idma_monitor_init(struct adapter *adapter,
 void t4_idma_monitor(struct adapter *adapter,
                     struct sge_idma_monitor_state *idma,
                     int hz, int ticks);
+int t4_set_vf_mac_acl(struct adapter *adapter, unsigned int vf,
+                     unsigned int naddr, u8 *addr);
+
 #endif /* __CXGB4_H__ */
index c45de49..2bb804c 100644 (file)
@@ -3078,6 +3078,26 @@ static int cxgb_change_mtu(struct net_device *dev, int new_mtu)
        return ret;
 }
 
+#ifdef CONFIG_PCI_IOV
+static int cxgb_set_vf_mac(struct net_device *dev, int vf, u8 *mac)
+{
+       struct port_info *pi = netdev_priv(dev);
+       struct adapter *adap = pi->adapter;
+
+       /* verify MAC addr is valid */
+       if (!is_valid_ether_addr(mac)) {
+               dev_err(pi->adapter->pdev_dev,
+                       "Invalid Ethernet address %pM for VF %d\n",
+                       mac, vf);
+               return -EINVAL;
+       }
+
+       dev_info(pi->adapter->pdev_dev,
+                "Setting MAC %pM on VF %d\n", mac, vf);
+       return t4_set_vf_mac_acl(adap, vf + 1, 1, mac);
+}
+#endif
+
 static int cxgb_set_mac_addr(struct net_device *dev, void *p)
 {
        int ret;
@@ -3136,7 +3156,27 @@ static const struct net_device_ops cxgb4_netdev_ops = {
 #ifdef CONFIG_NET_RX_BUSY_POLL
        .ndo_busy_poll        = cxgb_busy_poll,
 #endif
+};
 
+static const struct net_device_ops cxgb4_mgmt_netdev_ops = {
+#ifdef CONFIG_PCI_IOV
+       .ndo_set_vf_mac       = cxgb_set_vf_mac,
+#endif
+};
+
+static void get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info)
+{
+       struct adapter *adapter = netdev2adap(dev);
+
+       strlcpy(info->driver, cxgb4_driver_name, sizeof(info->driver));
+       strlcpy(info->version, cxgb4_driver_version,
+               sizeof(info->version));
+       strlcpy(info->bus_info, pci_name(adapter->pdev),
+               sizeof(info->bus_info));
+}
+
+static const struct ethtool_ops cxgb4_mgmt_ethtool_ops = {
+       .get_drvinfo       = get_drvinfo,
 };
 
 void t4_fatal_err(struct adapter *adap)
@@ -4836,19 +4876,12 @@ static int get_chip_type(struct pci_dev *pdev, u32 pl_rev)
 #ifdef CONFIG_PCI_IOV
 static int cxgb4_iov_configure(struct pci_dev *pdev, int num_vfs)
 {
+       struct adapter *adap = pci_get_drvdata(pdev);
        int err = 0;
        int current_vfs = pci_num_vf(pdev);
        u32 pcie_fw;
-       void __iomem *regs;
 
-       regs = pci_ioremap_bar(pdev, 0);
-       if (!regs) {
-               dev_err(&pdev->dev, "cannot map device registers\n");
-               return -ENOMEM;
-       }
-
-       pcie_fw = readl(regs + PCIE_FW_A);
-       iounmap(regs);
+       pcie_fw = readl(adap->regs + PCIE_FW_A);
        /* Check if cxgb4 is the MASTER and fw is initialized */
        if (!(pcie_fw & PCIE_FW_INIT_F) ||
            !(pcie_fw & PCIE_FW_MASTER_VLD_F) ||
@@ -4875,6 +4908,8 @@ static int cxgb4_iov_configure(struct pci_dev *pdev, int num_vfs)
         */
        if (!num_vfs) {
                pci_disable_sriov(pdev);
+               if (adap->port[0]->reg_state == NETREG_REGISTERED)
+                       unregister_netdev(adap->port[0]);
                return num_vfs;
        }
 
@@ -4882,6 +4917,12 @@ static int cxgb4_iov_configure(struct pci_dev *pdev, int num_vfs)
                err = pci_enable_sriov(pdev, num_vfs);
                if (err)
                        return err;
+
+               if (adap->port[0]->reg_state == NETREG_UNINITIALIZED) {
+                       err = register_netdev(adap->port[0]);
+                       if (err < 0)
+                               pr_info("Unable to register VF mgmt netdev\n");
+               }
        }
        return num_vfs;
 }
@@ -4893,9 +4934,14 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
        struct port_info *pi;
        bool highdma = false;
        struct adapter *adapter = NULL;
+       struct net_device *netdev;
+#ifdef CONFIG_PCI_IOV
+       char name[IFNAMSIZ];
+#endif
        void __iomem *regs;
        u32 whoami, pl_rev;
        enum chip_type chip;
+       static int adap_idx = 1;
 
        printk_once(KERN_INFO "%s - version %s\n", DRV_DESC, DRV_VERSION);
 
@@ -4930,7 +4976,9 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
        func = CHELSIO_CHIP_VERSION(chip) <= CHELSIO_T5 ?
                SOURCEPF_G(whoami) : T6_SOURCEPF_G(whoami);
        if (func != ent->driver_data) {
+#ifndef CONFIG_PCI_IOV
                iounmap(regs);
+#endif
                pci_disable_device(pdev);
                pci_save_state(pdev);        /* to restore SR-IOV later */
                goto sriov;
@@ -4962,6 +5010,7 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
                err = -ENOMEM;
                goto out_unmap_bar0;
        }
+       adap_idx++;
 
        adapter->workq = create_singlethread_workqueue("cxgb4");
        if (!adapter->workq) {
@@ -5048,8 +5097,6 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
                              T6_STATMODE_V(0)));
 
        for_each_port(adapter, i) {
-               struct net_device *netdev;
-
                netdev = alloc_etherdev_mq(sizeof(struct port_info),
                                           MAX_ETH_QSETS);
                if (!netdev) {
@@ -5217,6 +5264,7 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
                attach_ulds(adapter);
 
        print_adapter_info(adapter);
+       return 0;
 
 sriov:
 #ifdef CONFIG_PCI_IOV
@@ -5230,8 +5278,57 @@ sriov:
                                 "instantiated %u virtual functions\n",
                                 num_vf[func]);
        }
-#endif
+
+       adapter = kzalloc(sizeof(*adapter), GFP_KERNEL);
+       if (!adapter) {
+               err = -ENOMEM;
+               goto free_pci_region;
+       }
+
+       snprintf(name, IFNAMSIZ, "mgmtpf%d%d", adap_idx, func);
+       netdev = alloc_netdev(0, name, NET_NAME_UNKNOWN, ether_setup);
+       if (!netdev) {
+               err = -ENOMEM;
+               goto free_adapter;
+       }
+
+       adapter->pdev = pdev;
+       adapter->pdev_dev = &pdev->dev;
+       adapter->name = pci_name(pdev);
+       adapter->mbox = func;
+       adapter->pf = func;
+       adapter->regs = regs;
+       adapter->mbox_log = kzalloc(sizeof(*adapter->mbox_log) +
+                                   (sizeof(struct mbox_cmd) *
+                                    T4_OS_LOG_MBOX_CMDS),
+                                   GFP_KERNEL);
+       if (!adapter->mbox_log) {
+               err = -ENOMEM;
+               goto free_netdevice;
+       }
+       pi = netdev_priv(netdev);
+       pi->adapter = adapter;
+       SET_NETDEV_DEV(netdev, &pdev->dev);
+       pci_set_drvdata(pdev, adapter);
+
+       adapter->port[0] = netdev;
+       netdev->netdev_ops = &cxgb4_mgmt_netdev_ops;
+       netdev->ethtool_ops = &cxgb4_mgmt_ethtool_ops;
+
+       return 0;
+
+ free_netdevice:
+       free_netdev(adapter->port[0]);
+ free_adapter:
+       kfree(adapter);
+ free_pci_region:
+       iounmap(regs);
+       pci_disable_sriov(pdev);
+       pci_release_regions(pdev);
+       return err;
+#else
        return 0;
+#endif
 
  out_free_dev:
        free_some_resources(adapter);
@@ -5258,12 +5355,12 @@ static void remove_one(struct pci_dev *pdev)
 {
        struct adapter *adapter = pci_get_drvdata(pdev);
 
-#ifdef CONFIG_PCI_IOV
-       pci_disable_sriov(pdev);
-
-#endif
+       if (!adapter) {
+               pci_release_regions(pdev);
+               return;
+       }
 
-       if (adapter) {
+       if (adapter->pf == 4) {
                int i;
 
                /* Tear down per-adapter Work Queue first since it can contain
@@ -5312,8 +5409,18 @@ static void remove_one(struct pci_dev *pdev)
                kfree(adapter->mbox_log);
                synchronize_rcu();
                kfree(adapter);
-       } else
+       }
+#ifdef CONFIG_PCI_IOV
+       else {
+               if (adapter->port[0]->reg_state == NETREG_REGISTERED)
+                       unregister_netdev(adapter->port[0]);
+               free_netdev(adapter->port[0]);
+               iounmap(adapter->regs);
+               kfree(adapter);
+               pci_disable_sriov(pdev);
                pci_release_regions(pdev);
+       }
+#endif
 }
 
 static struct pci_driver cxgb4_driver = {
index dc92c80..2a476cc 100644 (file)
@@ -8264,3 +8264,44 @@ void t4_idma_monitor(struct adapter *adapter,
                t4_sge_decode_idma_state(adapter, idma->idma_state[i]);
        }
 }
+
+/**
+ *     t4_set_vf_mac - Set MAC address for the specified VF
+ *     @adapter: The adapter
+ *     @vf: one of the VFs instantiated by the specified PF
+ *     @naddr: the number of MAC addresses
+ *     @addr: the MAC address(es) to be set to the specified VF
+ */
+int t4_set_vf_mac_acl(struct adapter *adapter, unsigned int vf,
+                     unsigned int naddr, u8 *addr)
+{
+       struct fw_acl_mac_cmd cmd;
+
+       memset(&cmd, 0, sizeof(cmd));
+       cmd.op_to_vfn = cpu_to_be32(FW_CMD_OP_V(FW_ACL_MAC_CMD) |
+                                   FW_CMD_REQUEST_F |
+                                   FW_CMD_WRITE_F |
+                                   FW_ACL_MAC_CMD_PFN_V(adapter->pf) |
+                                   FW_ACL_MAC_CMD_VFN_V(vf));
+
+       /* Note: Do not enable the ACL */
+       cmd.en_to_len16 = cpu_to_be32((unsigned int)FW_LEN16(cmd));
+       cmd.nmac = naddr;
+
+       switch (adapter->pf) {
+       case 3:
+               memcpy(cmd.macaddr3, addr, sizeof(cmd.macaddr3));
+               break;
+       case 2:
+               memcpy(cmd.macaddr2, addr, sizeof(cmd.macaddr2));
+               break;
+       case 1:
+               memcpy(cmd.macaddr1, addr, sizeof(cmd.macaddr1));
+               break;
+       case 0:
+               memcpy(cmd.macaddr0, addr, sizeof(cmd.macaddr0));
+               break;
+       }
+
+       return t4_wr_mbox(adapter, adapter->mbox, &cmd, sizeof(cmd), &cmd);
+}
index e116bb8..f2951bf 100644 (file)
@@ -2777,6 +2777,7 @@ static int cxgb4vf_pci_probe(struct pci_dev *pdev,
        struct adapter *adapter;
        struct port_info *pi;
        struct net_device *netdev;
+       unsigned int pf;
 
        /*
         * Print our driver banner the first time we're called to initialize a
@@ -2903,8 +2904,11 @@ static int cxgb4vf_pci_probe(struct pci_dev *pdev,
         * Allocate our "adapter ports" and stitch everything together.
         */
        pmask = adapter->params.vfres.pmask;
+       pf = t4vf_get_pf_from_vf(adapter);
        for_each_port(adapter, pidx) {
                int port_id, viid;
+               u8 mac[ETH_ALEN];
+               unsigned int naddr = 1;
 
                /*
                 * We simplistically allocate our virtual interfaces
@@ -2975,6 +2979,26 @@ static int cxgb4vf_pci_probe(struct pci_dev *pdev,
                                pidx);
                        goto err_free_dev;
                }
+
+               err = t4vf_get_vf_mac_acl(adapter, pf, &naddr, mac);
+               if (err) {
+                       dev_err(&pdev->dev,
+                               "unable to determine MAC ACL address, "
+                               "continuing anyway.. (status %d)\n", err);
+               } else if (naddr && adapter->params.vfres.nvi == 1) {
+                       struct sockaddr addr;
+
+                       ether_addr_copy(addr.sa_data, mac);
+                       err = cxgb4vf_set_mac_addr(netdev, &addr);
+                       if (err) {
+                               dev_err(&pdev->dev,
+                                       "unable to set MAC address %pM\n",
+                                       mac);
+                               goto err_free_dev;
+                       }
+                       dev_info(&pdev->dev,
+                                "Using assigned MAC ACL: %pM\n", mac);
+               }
        }
 
        /* See what interrupts we'll be using.  If we've been configured to
index 8ee5414..8067424 100644 (file)
@@ -347,6 +347,7 @@ int t4vf_bar2_sge_qregs(struct adapter *adapter,
                        u64 *pbar2_qoffset,
                        unsigned int *pbar2_qid);
 
+unsigned int t4vf_get_pf_from_vf(struct adapter *);
 int t4vf_get_sge_params(struct adapter *);
 int t4vf_get_vpd_params(struct adapter *);
 int t4vf_get_dev_params(struct adapter *);
@@ -381,5 +382,7 @@ int t4vf_eth_eq_free(struct adapter *, unsigned int);
 
 int t4vf_handle_fw_rpl(struct adapter *, const __be64 *);
 int t4vf_prep_adapter(struct adapter *);
+int t4vf_get_vf_mac_acl(struct adapter *adapter, unsigned int pf,
+                       unsigned int *naddr, u8 *addr);
 
 #endif /* __T4VF_COMMON_H__ */
index 427bfa7..879f4c5 100644 (file)
@@ -639,6 +639,15 @@ int t4vf_bar2_sge_qregs(struct adapter *adapter,
        return 0;
 }
 
+unsigned int t4vf_get_pf_from_vf(struct adapter *adapter)
+{
+       u32 whoami;
+
+       whoami = t4_read_reg(adapter, T4VF_PL_BASE_ADDR + PL_VF_WHOAMI_A);
+       return (CHELSIO_CHIP_VERSION(adapter->params.chip) <= CHELSIO_T5 ?
+                       SOURCEPF_G(whoami) : T6_SOURCEPF_G(whoami));
+}
+
 /**
  *     t4vf_get_sge_params - retrieve adapter Scatter gather Engine parameters
  *     @adapter: the adapter
@@ -716,7 +725,6 @@ int t4vf_get_sge_params(struct adapter *adapter)
         * read.
         */
        if (!is_t4(adapter->params.chip)) {
-               u32 whoami;
                unsigned int pf, s_hps, s_qpp;
 
                params[0] = (FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_REG) |
@@ -740,11 +748,7 @@ int t4vf_get_sge_params(struct adapter *adapter)
                 * register we just read. Do it once here so other code in
                 * the driver can just use it.
                 */
-               whoami = t4_read_reg(adapter,
-                                    T4VF_PL_BASE_ADDR + PL_VF_WHOAMI_A);
-               pf = CHELSIO_CHIP_VERSION(adapter->params.chip) <= CHELSIO_T5 ?
-                       SOURCEPF_G(whoami) : T6_SOURCEPF_G(whoami);
-
+               pf = t4vf_get_pf_from_vf(adapter);
                s_hps = (HOSTPAGESIZEPF0_S +
                         (HOSTPAGESIZEPF1_S - HOSTPAGESIZEPF0_S) * pf);
                sge_params->sge_vf_hps =
@@ -1807,3 +1811,50 @@ int t4vf_prep_adapter(struct adapter *adapter)
 
        return 0;
 }
+
+/**
+ *     t4vf_get_vf_mac_acl - Get the MAC address to be set to
+ *                           the VI of this VF.
+ *     @adapter: The adapter
+ *     @pf: The pf associated with vf
+ *     @naddr: the number of ACL MAC addresses returned in addr
+ *     @addr: Placeholder for MAC addresses
+ *
+ *     Find the MAC address to be set to the VF's VI. The requested MAC address
+ *     is from the host OS via callback in the PF driver.
+ */
+int t4vf_get_vf_mac_acl(struct adapter *adapter, unsigned int pf,
+                       unsigned int *naddr, u8 *addr)
+{
+       struct fw_acl_mac_cmd cmd;
+       int ret;
+
+       memset(&cmd, 0, sizeof(cmd));
+       cmd.op_to_vfn = cpu_to_be32(FW_CMD_OP_V(FW_ACL_MAC_CMD) |
+                                   FW_CMD_REQUEST_F |
+                                   FW_CMD_READ_F);
+       cmd.en_to_len16 = cpu_to_be32((unsigned int)FW_LEN16(cmd));
+       ret = t4vf_wr_mbox(adapter, &cmd, sizeof(cmd), &cmd);
+       if (ret)
+               return ret;
+
+       if (cmd.nmac < *naddr)
+               *naddr = cmd.nmac;
+
+       switch (pf) {
+       case 3:
+               memcpy(addr, cmd.macaddr3, sizeof(cmd.macaddr3));
+               break;
+       case 2:
+               memcpy(addr, cmd.macaddr2, sizeof(cmd.macaddr2));
+               break;
+       case 1:
+               memcpy(addr, cmd.macaddr1, sizeof(cmd.macaddr1));
+               break;
+       case 0:
+               memcpy(addr, cmd.macaddr0, sizeof(cmd.macaddr0));
+               break;
+       }
+
+       return ret;
+}
index f0e9e2e..6620fc8 100644 (file)
@@ -1966,7 +1966,7 @@ SetMulticastFilter(struct net_device *dev)
     } else if (lp->setup_f == HASH_PERF) {   /* Hash Filtering */
        netdev_for_each_mc_addr(ha, dev) {
                crc = ether_crc_le(ETH_ALEN, ha->addr);
-               hashcode = crc & HASH_BITS;  /* hashcode is 9 LSb of CRC */
+               hashcode = crc & DE4X5_HASH_BITS;  /* hashcode is 9 LSb of CRC */
 
                byte = hashcode >> 3;        /* bit[3-8] -> byte in filter */
                bit = 1 << (hashcode & 0x07);/* bit[0-2] -> bit in byte */
@@ -5043,7 +5043,7 @@ build_setup_frame(struct net_device *dev, int mode)
            *(pa + i) = dev->dev_addr[i];                 /* Host address */
            if (i & 0x01) pa += 2;
        }
-       *(lp->setup_frame + (HASH_TABLE_LEN >> 3) - 3) = 0x80;
+       *(lp->setup_frame + (DE4X5_HASH_TABLE_LEN >> 3) - 3) = 0x80;
     } else {
        for (i=0; i<ETH_ALEN; i++) { /* Host address */
            *(pa + (i&1)) = dev->dev_addr[i];
index ec756eb..1bfdc9b 100644 (file)
 #define PCI  0
 #define EISA 1
 
-#define HASH_TABLE_LEN   512       /* Bits */
-#define HASH_BITS        0x01ff    /* 9 LS bits */
+#define DE4X5_HASH_TABLE_LEN   512       /* Bits */
+#define DE4X5_HASH_BITS        0x01ff    /* 9 LS bits */
 
 #define SETUP_FRAME_LEN  192       /* Bytes */
 #define IMPERF_PA_OFFSET 156       /* Bytes */
index 4555e04..86780b5 100644 (file)
@@ -508,6 +508,10 @@ struct be_wrb_params {
        u16 lso_mss;    /* MSS for LSO */
 };
 
+struct be_eth_addr {
+       unsigned char mac[ETH_ALEN];
+};
+
 struct be_adapter {
        struct pci_dev *pdev;
        struct net_device *netdev;
@@ -523,7 +527,7 @@ struct be_adapter {
        struct be_dma_mem mbox_mem_alloced;
 
        struct be_mcc_obj mcc_obj;
-       spinlock_t mcc_lock;    /* For serializing mcc cmds to BE card */
+       struct mutex mcc_lock;  /* For serializing mcc cmds to BE card */
        spinlock_t mcc_cq_lock;
 
        u16 cfg_num_rx_irqs;            /* configured via set-channels */
@@ -570,9 +574,15 @@ struct be_adapter {
        int if_handle;          /* Used to configure filtering */
        u32 if_flags;           /* Interface filtering flags */
        u32 *pmac_id;           /* MAC addr handle used by BE card */
+       struct be_eth_addr *uc_list;/* list of uc-addrs programmed (not perm) */
        u32 uc_macs;            /* Count of secondary UC MAC programmed */
+       struct be_eth_addr *mc_list;/* list of mcast addrs programmed */
+       u32 mc_count;
        unsigned long vids[BITS_TO_LONGS(VLAN_N_VID)];
        u16 vlans_added;
+       bool update_uc_list;
+       bool update_mc_list;
+       struct mutex rx_filter_lock;/* For protecting vids[] & mc/uc_list[] */
 
        u32 beacon_state;       /* for set_phys_id */
 
@@ -626,6 +636,15 @@ struct be_adapter {
        u8 phy_state; /* state of sfp optics (functional, faulted, etc.,) */
 };
 
+/* Used for defered FW config cmds. Add fields to this struct as reqd */
+struct be_cmd_work {
+       struct work_struct work;
+       struct be_adapter *adapter;
+       union {
+               __be16 vxlan_port;
+       } info;
+};
+
 #define be_physfn(adapter)             (!adapter->virtfn)
 #define be_virtfn(adapter)             (adapter->virtfn)
 #define sriov_enabled(adapter)         (adapter->flags &       \
index 2cc1175..fa11a5a 100644 (file)
@@ -571,7 +571,7 @@ int be_process_mcc(struct be_adapter *adapter)
 /* Wait till no more pending mcc requests are present */
 static int be_mcc_wait_compl(struct be_adapter *adapter)
 {
-#define mcc_timeout            120000 /* 12s timeout */
+#define mcc_timeout            12000 /* 12s timeout */
        int i, status = 0;
        struct be_mcc_obj *mcc_obj = &adapter->mcc_obj;
 
@@ -585,7 +585,7 @@ static int be_mcc_wait_compl(struct be_adapter *adapter)
 
                if (atomic_read(&mcc_obj->q.used) == 0)
                        break;
-               udelay(100);
+               usleep_range(500, 1000);
        }
        if (i == mcc_timeout) {
                dev_err(&adapter->pdev->dev, "FW not responding\n");
@@ -863,7 +863,7 @@ static bool use_mcc(struct be_adapter *adapter)
 static int be_cmd_lock(struct be_adapter *adapter)
 {
        if (use_mcc(adapter)) {
-               spin_lock_bh(&adapter->mcc_lock);
+               mutex_lock(&adapter->mcc_lock);
                return 0;
        } else {
                return mutex_lock_interruptible(&adapter->mbox_lock);
@@ -874,7 +874,7 @@ static int be_cmd_lock(struct be_adapter *adapter)
 static void be_cmd_unlock(struct be_adapter *adapter)
 {
        if (use_mcc(adapter))
-               spin_unlock_bh(&adapter->mcc_lock);
+               return mutex_unlock(&adapter->mcc_lock);
        else
                return mutex_unlock(&adapter->mbox_lock);
 }
@@ -1044,7 +1044,7 @@ int be_cmd_mac_addr_query(struct be_adapter *adapter, u8 *mac_addr,
        struct be_cmd_req_mac_query *req;
        int status;
 
-       spin_lock_bh(&adapter->mcc_lock);
+       mutex_lock(&adapter->mcc_lock);
 
        wrb = wrb_from_mccq(adapter);
        if (!wrb) {
@@ -1073,7 +1073,7 @@ int be_cmd_mac_addr_query(struct be_adapter *adapter, u8 *mac_addr,
        }
 
 err:
-       spin_unlock_bh(&adapter->mcc_lock);
+       mutex_unlock(&adapter->mcc_lock);
        return status;
 }
 
@@ -1085,7 +1085,7 @@ int be_cmd_pmac_add(struct be_adapter *adapter, u8 *mac_addr,
        struct be_cmd_req_pmac_add *req;
        int status;
 
-       spin_lock_bh(&adapter->mcc_lock);
+       mutex_lock(&adapter->mcc_lock);
 
        wrb = wrb_from_mccq(adapter);
        if (!wrb) {
@@ -1110,7 +1110,7 @@ int be_cmd_pmac_add(struct be_adapter *adapter, u8 *mac_addr,
        }
 
 err:
-       spin_unlock_bh(&adapter->mcc_lock);
+       mutex_unlock(&adapter->mcc_lock);
 
         if (status == MCC_STATUS_UNAUTHORIZED_REQUEST)
                status = -EPERM;
@@ -1128,7 +1128,7 @@ int be_cmd_pmac_del(struct be_adapter *adapter, u32 if_id, int pmac_id, u32 dom)
        if (pmac_id == -1)
                return 0;
 
-       spin_lock_bh(&adapter->mcc_lock);
+       mutex_lock(&adapter->mcc_lock);
 
        wrb = wrb_from_mccq(adapter);
        if (!wrb) {
@@ -1148,7 +1148,7 @@ int be_cmd_pmac_del(struct be_adapter *adapter, u32 if_id, int pmac_id, u32 dom)
        status = be_mcc_notify_wait(adapter);
 
 err:
-       spin_unlock_bh(&adapter->mcc_lock);
+       mutex_unlock(&adapter->mcc_lock);
        return status;
 }
 
@@ -1411,7 +1411,7 @@ int be_cmd_rxq_create(struct be_adapter *adapter,
        struct be_dma_mem *q_mem = &rxq->dma_mem;
        int status;
 
-       spin_lock_bh(&adapter->mcc_lock);
+       mutex_lock(&adapter->mcc_lock);
 
        wrb = wrb_from_mccq(adapter);
        if (!wrb) {
@@ -1441,7 +1441,7 @@ int be_cmd_rxq_create(struct be_adapter *adapter,
        }
 
 err:
-       spin_unlock_bh(&adapter->mcc_lock);
+       mutex_unlock(&adapter->mcc_lock);
        return status;
 }
 
@@ -1505,7 +1505,7 @@ int be_cmd_rxq_destroy(struct be_adapter *adapter, struct be_queue_info *q)
        struct be_cmd_req_q_destroy *req;
        int status;
 
-       spin_lock_bh(&adapter->mcc_lock);
+       mutex_lock(&adapter->mcc_lock);
 
        wrb = wrb_from_mccq(adapter);
        if (!wrb) {
@@ -1522,7 +1522,7 @@ int be_cmd_rxq_destroy(struct be_adapter *adapter, struct be_queue_info *q)
        q->created = false;
 
 err:
-       spin_unlock_bh(&adapter->mcc_lock);
+       mutex_unlock(&adapter->mcc_lock);
        return status;
 }
 
@@ -1590,7 +1590,7 @@ int be_cmd_get_stats(struct be_adapter *adapter, struct be_dma_mem *nonemb_cmd)
        struct be_cmd_req_hdr *hdr;
        int status = 0;
 
-       spin_lock_bh(&adapter->mcc_lock);
+       mutex_lock(&adapter->mcc_lock);
 
        wrb = wrb_from_mccq(adapter);
        if (!wrb) {
@@ -1618,7 +1618,7 @@ int be_cmd_get_stats(struct be_adapter *adapter, struct be_dma_mem *nonemb_cmd)
        adapter->stats_cmd_sent = true;
 
 err:
-       spin_unlock_bh(&adapter->mcc_lock);
+       mutex_unlock(&adapter->mcc_lock);
        return status;
 }
 
@@ -1634,7 +1634,7 @@ int lancer_cmd_get_pport_stats(struct be_adapter *adapter,
                            CMD_SUBSYSTEM_ETH))
                return -EPERM;
 
-       spin_lock_bh(&adapter->mcc_lock);
+       mutex_lock(&adapter->mcc_lock);
 
        wrb = wrb_from_mccq(adapter);
        if (!wrb) {
@@ -1657,7 +1657,7 @@ int lancer_cmd_get_pport_stats(struct be_adapter *adapter,
        adapter->stats_cmd_sent = true;
 
 err:
-       spin_unlock_bh(&adapter->mcc_lock);
+       mutex_unlock(&adapter->mcc_lock);
        return status;
 }
 
@@ -1694,7 +1694,7 @@ int be_cmd_link_status_query(struct be_adapter *adapter, u16 *link_speed,
        struct be_cmd_req_link_status *req;
        int status;
 
-       spin_lock_bh(&adapter->mcc_lock);
+       mutex_lock(&adapter->mcc_lock);
 
        if (link_status)
                *link_status = LINK_DOWN;
@@ -1733,7 +1733,7 @@ int be_cmd_link_status_query(struct be_adapter *adapter, u16 *link_speed,
        }
 
 err:
-       spin_unlock_bh(&adapter->mcc_lock);
+       mutex_unlock(&adapter->mcc_lock);
        return status;
 }
 
@@ -1744,7 +1744,7 @@ int be_cmd_get_die_temperature(struct be_adapter *adapter)
        struct be_cmd_req_get_cntl_addnl_attribs *req;
        int status = 0;
 
-       spin_lock_bh(&adapter->mcc_lock);
+       mutex_lock(&adapter->mcc_lock);
 
        wrb = wrb_from_mccq(adapter);
        if (!wrb) {
@@ -1759,7 +1759,7 @@ int be_cmd_get_die_temperature(struct be_adapter *adapter)
 
        status = be_mcc_notify(adapter);
 err:
-       spin_unlock_bh(&adapter->mcc_lock);
+       mutex_unlock(&adapter->mcc_lock);
        return status;
 }
 
@@ -1808,7 +1808,7 @@ int be_cmd_get_fat_dump(struct be_adapter *adapter, u32 buf_len, void *buf)
        if (!get_fat_cmd.va)
                return -ENOMEM;
 
-       spin_lock_bh(&adapter->mcc_lock);
+       mutex_lock(&adapter->mcc_lock);
 
        while (total_size) {
                buf_size = min(total_size, (u32)60*1024);
@@ -1848,7 +1848,7 @@ int be_cmd_get_fat_dump(struct be_adapter *adapter, u32 buf_len, void *buf)
 err:
        dma_free_coherent(&adapter->pdev->dev, get_fat_cmd.size,
                          get_fat_cmd.va, get_fat_cmd.dma);
-       spin_unlock_bh(&adapter->mcc_lock);
+       mutex_unlock(&adapter->mcc_lock);
        return status;
 }
 
@@ -1859,7 +1859,7 @@ int be_cmd_get_fw_ver(struct be_adapter *adapter)
        struct be_cmd_req_get_fw_version *req;
        int status;
 
-       spin_lock_bh(&adapter->mcc_lock);
+       mutex_lock(&adapter->mcc_lock);
 
        wrb = wrb_from_mccq(adapter);
        if (!wrb) {
@@ -1882,7 +1882,7 @@ int be_cmd_get_fw_ver(struct be_adapter *adapter)
                        sizeof(adapter->fw_on_flash));
        }
 err:
-       spin_unlock_bh(&adapter->mcc_lock);
+       mutex_unlock(&adapter->mcc_lock);
        return status;
 }
 
@@ -1896,7 +1896,7 @@ static int __be_cmd_modify_eqd(struct be_adapter *adapter,
        struct be_cmd_req_modify_eq_delay *req;
        int status = 0, i;
 
-       spin_lock_bh(&adapter->mcc_lock);
+       mutex_lock(&adapter->mcc_lock);
 
        wrb = wrb_from_mccq(adapter);
        if (!wrb) {
@@ -1919,7 +1919,7 @@ static int __be_cmd_modify_eqd(struct be_adapter *adapter,
 
        status = be_mcc_notify(adapter);
 err:
-       spin_unlock_bh(&adapter->mcc_lock);
+       mutex_unlock(&adapter->mcc_lock);
        return status;
 }
 
@@ -1946,7 +1946,7 @@ int be_cmd_vlan_config(struct be_adapter *adapter, u32 if_id, u16 *vtag_array,
        struct be_cmd_req_vlan_config *req;
        int status;
 
-       spin_lock_bh(&adapter->mcc_lock);
+       mutex_lock(&adapter->mcc_lock);
 
        wrb = wrb_from_mccq(adapter);
        if (!wrb) {
@@ -1968,7 +1968,7 @@ int be_cmd_vlan_config(struct be_adapter *adapter, u32 if_id, u16 *vtag_array,
 
        status = be_mcc_notify_wait(adapter);
 err:
-       spin_unlock_bh(&adapter->mcc_lock);
+       mutex_unlock(&adapter->mcc_lock);
        return status;
 }
 
@@ -1979,7 +1979,7 @@ static int __be_cmd_rx_filter(struct be_adapter *adapter, u32 flags, u32 value)
        struct be_cmd_req_rx_filter *req = mem->va;
        int status;
 
-       spin_lock_bh(&adapter->mcc_lock);
+       mutex_lock(&adapter->mcc_lock);
 
        wrb = wrb_from_mccq(adapter);
        if (!wrb) {
@@ -1996,8 +1996,7 @@ static int __be_cmd_rx_filter(struct be_adapter *adapter, u32 flags, u32 value)
        req->if_flags = (value == ON) ? req->if_flags_mask : 0;
 
        if (flags & BE_IF_FLAGS_MULTICAST) {
-               struct netdev_hw_addr *ha;
-               int i = 0;
+               int i;
 
                /* Reset mcast promisc mode if already set by setting mask
                 * and not setting flags field
@@ -2005,14 +2004,15 @@ static int __be_cmd_rx_filter(struct be_adapter *adapter, u32 flags, u32 value)
                req->if_flags_mask |=
                        cpu_to_le32(BE_IF_FLAGS_MCAST_PROMISCUOUS &
                                    be_if_cap_flags(adapter));
-               req->mcast_num = cpu_to_le32(netdev_mc_count(adapter->netdev));
-               netdev_for_each_mc_addr(ha, adapter->netdev)
-                       memcpy(req->mcast_mac[i++].byte, ha->addr, ETH_ALEN);
+               req->mcast_num = cpu_to_le32(adapter->mc_count);
+               for (i = 0; i < adapter->mc_count; i++)
+                       ether_addr_copy(req->mcast_mac[i].byte,
+                                       adapter->mc_list[i].mac);
        }
 
        status = be_mcc_notify_wait(adapter);
 err:
-       spin_unlock_bh(&adapter->mcc_lock);
+       mutex_unlock(&adapter->mcc_lock);
        return status;
 }
 
@@ -2043,7 +2043,7 @@ int be_cmd_set_flow_control(struct be_adapter *adapter, u32 tx_fc, u32 rx_fc)
                            CMD_SUBSYSTEM_COMMON))
                return -EPERM;
 
-       spin_lock_bh(&adapter->mcc_lock);
+       mutex_lock(&adapter->mcc_lock);
 
        wrb = wrb_from_mccq(adapter);
        if (!wrb) {
@@ -2063,7 +2063,7 @@ int be_cmd_set_flow_control(struct be_adapter *adapter, u32 tx_fc, u32 rx_fc)
        status = be_mcc_notify_wait(adapter);
 
 err:
-       spin_unlock_bh(&adapter->mcc_lock);
+       mutex_unlock(&adapter->mcc_lock);
 
        if (base_status(status) == MCC_STATUS_FEATURE_NOT_SUPPORTED)
                return  -EOPNOTSUPP;
@@ -2082,7 +2082,7 @@ int be_cmd_get_flow_control(struct be_adapter *adapter, u32 *tx_fc, u32 *rx_fc)
                            CMD_SUBSYSTEM_COMMON))
                return -EPERM;
 
-       spin_lock_bh(&adapter->mcc_lock);
+       mutex_lock(&adapter->mcc_lock);
 
        wrb = wrb_from_mccq(adapter);
        if (!wrb) {
@@ -2105,7 +2105,7 @@ int be_cmd_get_flow_control(struct be_adapter *adapter, u32 *tx_fc, u32 *rx_fc)
        }
 
 err:
-       spin_unlock_bh(&adapter->mcc_lock);
+       mutex_unlock(&adapter->mcc_lock);
        return status;
 }
 
@@ -2186,7 +2186,7 @@ int be_cmd_rss_config(struct be_adapter *adapter, u8 *rsstable,
        if (!(be_if_cap_flags(adapter) & BE_IF_FLAGS_RSS))
                return 0;
 
-       spin_lock_bh(&adapter->mcc_lock);
+       mutex_lock(&adapter->mcc_lock);
 
        wrb = wrb_from_mccq(adapter);
        if (!wrb) {
@@ -2211,7 +2211,7 @@ int be_cmd_rss_config(struct be_adapter *adapter, u8 *rsstable,
 
        status = be_mcc_notify_wait(adapter);
 err:
-       spin_unlock_bh(&adapter->mcc_lock);
+       mutex_unlock(&adapter->mcc_lock);
        return status;
 }
 
@@ -2223,7 +2223,7 @@ int be_cmd_set_beacon_state(struct be_adapter *adapter, u8 port_num,
        struct be_cmd_req_enable_disable_beacon *req;
        int status;
 
-       spin_lock_bh(&adapter->mcc_lock);
+       mutex_lock(&adapter->mcc_lock);
 
        wrb = wrb_from_mccq(adapter);
        if (!wrb) {
@@ -2244,7 +2244,7 @@ int be_cmd_set_beacon_state(struct be_adapter *adapter, u8 port_num,
        status = be_mcc_notify_wait(adapter);
 
 err:
-       spin_unlock_bh(&adapter->mcc_lock);
+       mutex_unlock(&adapter->mcc_lock);
        return status;
 }
 
@@ -2255,7 +2255,7 @@ int be_cmd_get_beacon_state(struct be_adapter *adapter, u8 port_num, u32 *state)
        struct be_cmd_req_get_beacon_state *req;
        int status;
 
-       spin_lock_bh(&adapter->mcc_lock);
+       mutex_lock(&adapter->mcc_lock);
 
        wrb = wrb_from_mccq(adapter);
        if (!wrb) {
@@ -2279,7 +2279,7 @@ int be_cmd_get_beacon_state(struct be_adapter *adapter, u8 port_num, u32 *state)
        }
 
 err:
-       spin_unlock_bh(&adapter->mcc_lock);
+       mutex_unlock(&adapter->mcc_lock);
        return status;
 }
 
@@ -2303,7 +2303,7 @@ int be_cmd_read_port_transceiver_data(struct be_adapter *adapter,
                return -ENOMEM;
        }
 
-       spin_lock_bh(&adapter->mcc_lock);
+       mutex_lock(&adapter->mcc_lock);
 
        wrb = wrb_from_mccq(adapter);
        if (!wrb) {
@@ -2325,7 +2325,7 @@ int be_cmd_read_port_transceiver_data(struct be_adapter *adapter,
                memcpy(data, resp->page_data, PAGE_DATA_LEN);
        }
 err:
-       spin_unlock_bh(&adapter->mcc_lock);
+       mutex_unlock(&adapter->mcc_lock);
        dma_free_coherent(&adapter->pdev->dev, cmd.size, cmd.va, cmd.dma);
        return status;
 }
@@ -2342,7 +2342,7 @@ static int lancer_cmd_write_object(struct be_adapter *adapter,
        void *ctxt = NULL;
        int status;
 
-       spin_lock_bh(&adapter->mcc_lock);
+       mutex_lock(&adapter->mcc_lock);
        adapter->flash_status = 0;
 
        wrb = wrb_from_mccq(adapter);
@@ -2384,7 +2384,7 @@ static int lancer_cmd_write_object(struct be_adapter *adapter,
        if (status)
                goto err_unlock;
 
-       spin_unlock_bh(&adapter->mcc_lock);
+       mutex_unlock(&adapter->mcc_lock);
 
        if (!wait_for_completion_timeout(&adapter->et_cmd_compl,
                                         msecs_to_jiffies(60000)))
@@ -2403,7 +2403,7 @@ static int lancer_cmd_write_object(struct be_adapter *adapter,
        return status;
 
 err_unlock:
-       spin_unlock_bh(&adapter->mcc_lock);
+       mutex_unlock(&adapter->mcc_lock);
        return status;
 }
 
@@ -2457,7 +2457,7 @@ static int lancer_cmd_delete_object(struct be_adapter *adapter,
        struct be_mcc_wrb *wrb;
        int status;
 
-       spin_lock_bh(&adapter->mcc_lock);
+       mutex_lock(&adapter->mcc_lock);
 
        wrb = wrb_from_mccq(adapter);
        if (!wrb) {
@@ -2475,7 +2475,7 @@ static int lancer_cmd_delete_object(struct be_adapter *adapter,
 
        status = be_mcc_notify_wait(adapter);
 err:
-       spin_unlock_bh(&adapter->mcc_lock);
+       mutex_unlock(&adapter->mcc_lock);
        return status;
 }
 
@@ -2488,7 +2488,7 @@ int lancer_cmd_read_object(struct be_adapter *adapter, struct be_dma_mem *cmd,
        struct lancer_cmd_resp_read_object *resp;
        int status;
 
-       spin_lock_bh(&adapter->mcc_lock);
+       mutex_lock(&adapter->mcc_lock);
 
        wrb = wrb_from_mccq(adapter);
        if (!wrb) {
@@ -2522,7 +2522,7 @@ int lancer_cmd_read_object(struct be_adapter *adapter, struct be_dma_mem *cmd,
        }
 
 err_unlock:
-       spin_unlock_bh(&adapter->mcc_lock);
+       mutex_unlock(&adapter->mcc_lock);
        return status;
 }
 
@@ -2534,7 +2534,7 @@ static int be_cmd_write_flashrom(struct be_adapter *adapter,
        struct be_cmd_write_flashrom *req;
        int status;
 
-       spin_lock_bh(&adapter->mcc_lock);
+       mutex_lock(&adapter->mcc_lock);
        adapter->flash_status = 0;
 
        wrb = wrb_from_mccq(adapter);
@@ -2559,7 +2559,7 @@ static int be_cmd_write_flashrom(struct be_adapter *adapter,
        if (status)
                goto err_unlock;
 
-       spin_unlock_bh(&adapter->mcc_lock);
+       mutex_unlock(&adapter->mcc_lock);
 
        if (!wait_for_completion_timeout(&adapter->et_cmd_compl,
                                         msecs_to_jiffies(40000)))
@@ -2570,7 +2570,7 @@ static int be_cmd_write_flashrom(struct be_adapter *adapter,
        return status;
 
 err_unlock:
-       spin_unlock_bh(&adapter->mcc_lock);
+       mutex_unlock(&adapter->mcc_lock);
        return status;
 }
 
@@ -2581,7 +2581,7 @@ static int be_cmd_get_flash_crc(struct be_adapter *adapter, u8 *flashed_crc,
        struct be_mcc_wrb *wrb;
        int status;
 
-       spin_lock_bh(&adapter->mcc_lock);
+       mutex_lock(&adapter->mcc_lock);
 
        wrb = wrb_from_mccq(adapter);
        if (!wrb) {
@@ -2608,7 +2608,7 @@ static int be_cmd_get_flash_crc(struct be_adapter *adapter, u8 *flashed_crc,
                memcpy(flashed_crc, req->crc, 4);
 
 err:
-       spin_unlock_bh(&adapter->mcc_lock);
+       mutex_unlock(&adapter->mcc_lock);
        return status;
 }
 
@@ -3192,7 +3192,7 @@ int be_cmd_enable_magic_wol(struct be_adapter *adapter, u8 *mac,
        struct be_cmd_req_acpi_wol_magic_config *req;
        int status;
 
-       spin_lock_bh(&adapter->mcc_lock);
+       mutex_lock(&adapter->mcc_lock);
 
        wrb = wrb_from_mccq(adapter);
        if (!wrb) {
@@ -3209,7 +3209,7 @@ int be_cmd_enable_magic_wol(struct be_adapter *adapter, u8 *mac,
        status = be_mcc_notify_wait(adapter);
 
 err:
-       spin_unlock_bh(&adapter->mcc_lock);
+       mutex_unlock(&adapter->mcc_lock);
        return status;
 }
 
@@ -3224,7 +3224,7 @@ int be_cmd_set_loopback(struct be_adapter *adapter, u8 port_num,
                            CMD_SUBSYSTEM_LOWLEVEL))
                return -EPERM;
 
-       spin_lock_bh(&adapter->mcc_lock);
+       mutex_lock(&adapter->mcc_lock);
 
        wrb = wrb_from_mccq(adapter);
        if (!wrb) {
@@ -3247,7 +3247,7 @@ int be_cmd_set_loopback(struct be_adapter *adapter, u8 port_num,
        if (status)
                goto err_unlock;
 
-       spin_unlock_bh(&adapter->mcc_lock);
+       mutex_unlock(&adapter->mcc_lock);
 
        if (!wait_for_completion_timeout(&adapter->et_cmd_compl,
                                         msecs_to_jiffies(SET_LB_MODE_TIMEOUT)))
@@ -3256,7 +3256,7 @@ int be_cmd_set_loopback(struct be_adapter *adapter, u8 port_num,
        return status;
 
 err_unlock:
-       spin_unlock_bh(&adapter->mcc_lock);
+       mutex_unlock(&adapter->mcc_lock);
        return status;
 }
 
@@ -3273,7 +3273,7 @@ int be_cmd_loopback_test(struct be_adapter *adapter, u32 port_num,
                            CMD_SUBSYSTEM_LOWLEVEL))
                return -EPERM;
 
-       spin_lock_bh(&adapter->mcc_lock);
+       mutex_lock(&adapter->mcc_lock);
 
        wrb = wrb_from_mccq(adapter);
        if (!wrb) {
@@ -3299,7 +3299,7 @@ int be_cmd_loopback_test(struct be_adapter *adapter, u32 port_num,
        if (status)
                goto err;
 
-       spin_unlock_bh(&adapter->mcc_lock);
+       mutex_unlock(&adapter->mcc_lock);
 
        wait_for_completion(&adapter->et_cmd_compl);
        resp = embedded_payload(wrb);
@@ -3307,7 +3307,7 @@ int be_cmd_loopback_test(struct be_adapter *adapter, u32 port_num,
 
        return status;
 err:
-       spin_unlock_bh(&adapter->mcc_lock);
+       mutex_unlock(&adapter->mcc_lock);
        return status;
 }
 
@@ -3323,7 +3323,7 @@ int be_cmd_ddr_dma_test(struct be_adapter *adapter, u64 pattern,
                            CMD_SUBSYSTEM_LOWLEVEL))
                return -EPERM;
 
-       spin_lock_bh(&adapter->mcc_lock);
+       mutex_lock(&adapter->mcc_lock);
 
        wrb = wrb_from_mccq(adapter);
        if (!wrb) {
@@ -3357,7 +3357,7 @@ int be_cmd_ddr_dma_test(struct be_adapter *adapter, u64 pattern,
        }
 
 err:
-       spin_unlock_bh(&adapter->mcc_lock);
+       mutex_unlock(&adapter->mcc_lock);
        return status;
 }
 
@@ -3368,7 +3368,7 @@ int be_cmd_get_seeprom_data(struct be_adapter *adapter,
        struct be_cmd_req_seeprom_read *req;
        int status;
 
-       spin_lock_bh(&adapter->mcc_lock);
+       mutex_lock(&adapter->mcc_lock);
 
        wrb = wrb_from_mccq(adapter);
        if (!wrb) {
@@ -3384,7 +3384,7 @@ int be_cmd_get_seeprom_data(struct be_adapter *adapter,
        status = be_mcc_notify_wait(adapter);
 
 err:
-       spin_unlock_bh(&adapter->mcc_lock);
+       mutex_unlock(&adapter->mcc_lock);
        return status;
 }
 
@@ -3399,7 +3399,7 @@ int be_cmd_get_phy_info(struct be_adapter *adapter)
                            CMD_SUBSYSTEM_COMMON))
                return -EPERM;
 
-       spin_lock_bh(&adapter->mcc_lock);
+       mutex_lock(&adapter->mcc_lock);
 
        wrb = wrb_from_mccq(adapter);
        if (!wrb) {
@@ -3444,7 +3444,7 @@ int be_cmd_get_phy_info(struct be_adapter *adapter)
        }
        dma_free_coherent(&adapter->pdev->dev, cmd.size, cmd.va, cmd.dma);
 err:
-       spin_unlock_bh(&adapter->mcc_lock);
+       mutex_unlock(&adapter->mcc_lock);
        return status;
 }
 
@@ -3454,7 +3454,7 @@ static int be_cmd_set_qos(struct be_adapter *adapter, u32 bps, u32 domain)
        struct be_cmd_req_set_qos *req;
        int status;
 
-       spin_lock_bh(&adapter->mcc_lock);
+       mutex_lock(&adapter->mcc_lock);
 
        wrb = wrb_from_mccq(adapter);
        if (!wrb) {
@@ -3474,7 +3474,7 @@ static int be_cmd_set_qos(struct be_adapter *adapter, u32 bps, u32 domain)
        status = be_mcc_notify_wait(adapter);
 
 err:
-       spin_unlock_bh(&adapter->mcc_lock);
+       mutex_unlock(&adapter->mcc_lock);
        return status;
 }
 
@@ -3581,7 +3581,7 @@ int be_cmd_get_fn_privileges(struct be_adapter *adapter, u32 *privilege,
        struct be_cmd_req_get_fn_privileges *req;
        int status;
 
-       spin_lock_bh(&adapter->mcc_lock);
+       mutex_lock(&adapter->mcc_lock);
 
        wrb = wrb_from_mccq(adapter);
        if (!wrb) {
@@ -3613,7 +3613,7 @@ int be_cmd_get_fn_privileges(struct be_adapter *adapter, u32 *privilege,
        }
 
 err:
-       spin_unlock_bh(&adapter->mcc_lock);
+       mutex_unlock(&adapter->mcc_lock);
        return status;
 }
 
@@ -3625,7 +3625,7 @@ int be_cmd_set_fn_privileges(struct be_adapter *adapter, u32 privileges,
        struct be_cmd_req_set_fn_privileges *req;
        int status;
 
-       spin_lock_bh(&adapter->mcc_lock);
+       mutex_lock(&adapter->mcc_lock);
 
        wrb = wrb_from_mccq(adapter);
        if (!wrb) {
@@ -3645,7 +3645,7 @@ int be_cmd_set_fn_privileges(struct be_adapter *adapter, u32 privileges,
 
        status = be_mcc_notify_wait(adapter);
 err:
-       spin_unlock_bh(&adapter->mcc_lock);
+       mutex_unlock(&adapter->mcc_lock);
        return status;
 }
 
@@ -3677,7 +3677,7 @@ int be_cmd_get_mac_from_list(struct be_adapter *adapter, u8 *mac,
                return -ENOMEM;
        }
 
-       spin_lock_bh(&adapter->mcc_lock);
+       mutex_lock(&adapter->mcc_lock);
 
        wrb = wrb_from_mccq(adapter);
        if (!wrb) {
@@ -3741,7 +3741,7 @@ int be_cmd_get_mac_from_list(struct be_adapter *adapter, u8 *mac,
        }
 
 out:
-       spin_unlock_bh(&adapter->mcc_lock);
+       mutex_unlock(&adapter->mcc_lock);
        dma_free_coherent(&adapter->pdev->dev, get_mac_list_cmd.size,
                          get_mac_list_cmd.va, get_mac_list_cmd.dma);
        return status;
@@ -3801,7 +3801,7 @@ int be_cmd_set_mac_list(struct be_adapter *adapter, u8 *mac_array,
        if (!cmd.va)
                return -ENOMEM;
 
-       spin_lock_bh(&adapter->mcc_lock);
+       mutex_lock(&adapter->mcc_lock);
 
        wrb = wrb_from_mccq(adapter);
        if (!wrb) {
@@ -3823,7 +3823,7 @@ int be_cmd_set_mac_list(struct be_adapter *adapter, u8 *mac_array,
 
 err:
        dma_free_coherent(&adapter->pdev->dev, cmd.size, cmd.va, cmd.dma);
-       spin_unlock_bh(&adapter->mcc_lock);
+       mutex_unlock(&adapter->mcc_lock);
        return status;
 }
 
@@ -3859,7 +3859,7 @@ int be_cmd_set_hsw_config(struct be_adapter *adapter, u16 pvid,
                            CMD_SUBSYSTEM_COMMON))
                return -EPERM;
 
-       spin_lock_bh(&adapter->mcc_lock);
+       mutex_lock(&adapter->mcc_lock);
 
        wrb = wrb_from_mccq(adapter);
        if (!wrb) {
@@ -3900,7 +3900,7 @@ int be_cmd_set_hsw_config(struct be_adapter *adapter, u16 pvid,
        status = be_mcc_notify_wait(adapter);
 
 err:
-       spin_unlock_bh(&adapter->mcc_lock);
+       mutex_unlock(&adapter->mcc_lock);
        return status;
 }
 
@@ -3914,7 +3914,7 @@ int be_cmd_get_hsw_config(struct be_adapter *adapter, u16 *pvid,
        int status;
        u16 vid;
 
-       spin_lock_bh(&adapter->mcc_lock);
+       mutex_lock(&adapter->mcc_lock);
 
        wrb = wrb_from_mccq(adapter);
        if (!wrb) {
@@ -3961,7 +3961,7 @@ int be_cmd_get_hsw_config(struct be_adapter *adapter, u16 *pvid,
        }
 
 err:
-       spin_unlock_bh(&adapter->mcc_lock);
+       mutex_unlock(&adapter->mcc_lock);
        return status;
 }
 
@@ -4156,7 +4156,7 @@ int be_cmd_set_ext_fat_capabilites(struct be_adapter *adapter,
        struct be_cmd_req_set_ext_fat_caps *req;
        int status;
 
-       spin_lock_bh(&adapter->mcc_lock);
+       mutex_lock(&adapter->mcc_lock);
 
        wrb = wrb_from_mccq(adapter);
        if (!wrb) {
@@ -4172,7 +4172,7 @@ int be_cmd_set_ext_fat_capabilites(struct be_adapter *adapter,
 
        status = be_mcc_notify_wait(adapter);
 err:
-       spin_unlock_bh(&adapter->mcc_lock);
+       mutex_unlock(&adapter->mcc_lock);
        return status;
 }
 
@@ -4650,7 +4650,7 @@ int be_cmd_manage_iface(struct be_adapter *adapter, u32 iface, u8 op)
        if (iface == 0xFFFFFFFF)
                return -1;
 
-       spin_lock_bh(&adapter->mcc_lock);
+       mutex_lock(&adapter->mcc_lock);
 
        wrb = wrb_from_mccq(adapter);
        if (!wrb) {
@@ -4667,7 +4667,7 @@ int be_cmd_manage_iface(struct be_adapter *adapter, u32 iface, u8 op)
 
        status = be_mcc_notify_wait(adapter);
 err:
-       spin_unlock_bh(&adapter->mcc_lock);
+       mutex_unlock(&adapter->mcc_lock);
        return status;
 }
 
@@ -4701,7 +4701,7 @@ int be_cmd_get_if_id(struct be_adapter *adapter, struct be_vf_cfg *vf_cfg,
        struct be_cmd_resp_get_iface_list *resp;
        int status;
 
-       spin_lock_bh(&adapter->mcc_lock);
+       mutex_lock(&adapter->mcc_lock);
 
        wrb = wrb_from_mccq(adapter);
        if (!wrb) {
@@ -4722,7 +4722,7 @@ int be_cmd_get_if_id(struct be_adapter *adapter, struct be_vf_cfg *vf_cfg,
        }
 
 err:
-       spin_unlock_bh(&adapter->mcc_lock);
+       mutex_unlock(&adapter->mcc_lock);
        return status;
 }
 
@@ -4816,7 +4816,7 @@ int be_cmd_enable_vf(struct be_adapter *adapter, u8 domain)
        if (BEx_chip(adapter))
                return 0;
 
-       spin_lock_bh(&adapter->mcc_lock);
+       mutex_lock(&adapter->mcc_lock);
 
        wrb = wrb_from_mccq(adapter);
        if (!wrb) {
@@ -4834,7 +4834,7 @@ int be_cmd_enable_vf(struct be_adapter *adapter, u8 domain)
        req->enable = 1;
        status = be_mcc_notify_wait(adapter);
 err:
-       spin_unlock_bh(&adapter->mcc_lock);
+       mutex_unlock(&adapter->mcc_lock);
        return status;
 }
 
@@ -4905,7 +4905,7 @@ int __be_cmd_set_logical_link_config(struct be_adapter *adapter,
        struct be_cmd_req_set_ll_link *req;
        int status;
 
-       spin_lock_bh(&adapter->mcc_lock);
+       mutex_lock(&adapter->mcc_lock);
 
        wrb = wrb_from_mccq(adapter);
        if (!wrb) {
@@ -4931,7 +4931,7 @@ int __be_cmd_set_logical_link_config(struct be_adapter *adapter,
 
        status = be_mcc_notify_wait(adapter);
 err:
-       spin_unlock_bh(&adapter->mcc_lock);
+       mutex_unlock(&adapter->mcc_lock);
        return status;
 }
 
@@ -4964,7 +4964,7 @@ int be_roce_mcc_cmd(void *netdev_handle, void *wrb_payload,
        struct be_cmd_resp_hdr *resp;
        int status;
 
-       spin_lock_bh(&adapter->mcc_lock);
+       mutex_lock(&adapter->mcc_lock);
 
        wrb = wrb_from_mccq(adapter);
        if (!wrb) {
@@ -4987,7 +4987,7 @@ int be_roce_mcc_cmd(void *netdev_handle, void *wrb_payload,
        memcpy(wrb_payload, resp, sizeof(*resp) + resp->response_length);
        be_dws_le_to_cpu(wrb_payload, sizeof(*resp) + resp->response_length);
 err:
-       spin_unlock_bh(&adapter->mcc_lock);
+       mutex_unlock(&adapter->mcc_lock);
        return status;
 }
 EXPORT_SYMBOL(be_roce_mcc_cmd);
index 874c753..f7584d4 100644 (file)
@@ -53,6 +53,10 @@ static const struct pci_device_id be_dev_ids[] = {
        { 0 }
 };
 MODULE_DEVICE_TABLE(pci, be_dev_ids);
+
+/* Workqueue used by all functions for defering cmd calls to the adapter */
+struct workqueue_struct *be_wq;
+
 /* UE Status Low CSR */
 static const char * const ue_status_low_desc[] = {
        "CEV",
@@ -1420,13 +1424,18 @@ static int be_vid_config(struct be_adapter *adapter)
        u16 num = 0, i = 0;
        int status = 0;
 
-       /* No need to further configure vids if in promiscuous mode */
-       if (be_in_all_promisc(adapter))
+       /* No need to change the VLAN state if the I/F is in promiscuous */
+       if (adapter->netdev->flags & IFF_PROMISC)
                return 0;
 
        if (adapter->vlans_added > be_max_vlans(adapter))
                return be_set_vlan_promisc(adapter);
 
+       if (adapter->if_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS) {
+               status = be_clear_vlan_promisc(adapter);
+               if (status)
+                       return status;
+       }
        /* Construct VLAN Table to give to HW */
        for_each_set_bit(i, adapter->vids, VLAN_N_VID)
                vids[num++] = cpu_to_le16(i);
@@ -1439,8 +1448,6 @@ static int be_vid_config(struct be_adapter *adapter)
                    addl_status(status) ==
                                MCC_ADDL_STATUS_INSUFFICIENT_RESOURCES)
                        return be_set_vlan_promisc(adapter);
-       } else if (adapter->if_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS) {
-               status = be_clear_vlan_promisc(adapter);
        }
        return status;
 }
@@ -1450,46 +1457,45 @@ static int be_vlan_add_vid(struct net_device *netdev, __be16 proto, u16 vid)
        struct be_adapter *adapter = netdev_priv(netdev);
        int status = 0;
 
+       mutex_lock(&adapter->rx_filter_lock);
+
        /* Packets with VID 0 are always received by Lancer by default */
        if (lancer_chip(adapter) && vid == 0)
-               return status;
+               goto done;
 
        if (test_bit(vid, adapter->vids))
-               return status;
+               goto done;
 
        set_bit(vid, adapter->vids);
        adapter->vlans_added++;
 
        status = be_vid_config(adapter);
-       if (status) {
-               adapter->vlans_added--;
-               clear_bit(vid, adapter->vids);
-       }
-
+done:
+       mutex_unlock(&adapter->rx_filter_lock);
        return status;
 }
 
 static int be_vlan_rem_vid(struct net_device *netdev, __be16 proto, u16 vid)
 {
        struct be_adapter *adapter = netdev_priv(netdev);
+       int status = 0;
+
+       mutex_lock(&adapter->rx_filter_lock);
 
        /* Packets with VID 0 are always received by Lancer by default */
        if (lancer_chip(adapter) && vid == 0)
-               return 0;
+               goto done;
 
        if (!test_bit(vid, adapter->vids))
-               return 0;
+               goto done;
 
        clear_bit(vid, adapter->vids);
        adapter->vlans_added--;
 
-       return be_vid_config(adapter);
-}
-
-static void be_clear_all_promisc(struct be_adapter *adapter)
-{
-       be_cmd_rx_filter(adapter, BE_IF_FLAGS_ALL_PROMISCUOUS, OFF);
-       adapter->if_flags &= ~BE_IF_FLAGS_ALL_PROMISCUOUS;
+       status = be_vid_config(adapter);
+done:
+       mutex_unlock(&adapter->rx_filter_lock);
+       return status;
 }
 
 static void be_set_all_promisc(struct be_adapter *adapter)
@@ -1510,75 +1516,207 @@ static void be_set_mc_promisc(struct be_adapter *adapter)
                adapter->if_flags |= BE_IF_FLAGS_MCAST_PROMISCUOUS;
 }
 
-static void be_set_mc_list(struct be_adapter *adapter)
+static void be_set_uc_promisc(struct be_adapter *adapter)
 {
        int status;
 
-       status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_MULTICAST, ON);
+       if (adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS)
+               return;
+
+       status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_PROMISCUOUS, ON);
        if (!status)
-               adapter->if_flags &= ~BE_IF_FLAGS_MCAST_PROMISCUOUS;
-       else
+               adapter->if_flags |= BE_IF_FLAGS_PROMISCUOUS;
+}
+
+static void be_clear_uc_promisc(struct be_adapter *adapter)
+{
+       int status;
+
+       if (!(adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS))
+               return;
+
+       status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_PROMISCUOUS, OFF);
+       if (!status)
+               adapter->if_flags &= ~BE_IF_FLAGS_PROMISCUOUS;
+}
+
+/* The below 2 functions are the callback args for __dev_mc_sync/dev_uc_sync().
+ * We use a single callback function for both sync and unsync. We really don't
+ * add/remove addresses through this callback. But, we use it to detect changes
+ * to the uc/mc lists. The entire uc/mc list is programmed in be_set_rx_mode().
+ */
+static int be_uc_list_update(struct net_device *netdev,
+                            const unsigned char *addr)
+{
+       struct be_adapter *adapter = netdev_priv(netdev);
+
+       adapter->update_uc_list = true;
+       return 0;
+}
+
+static int be_mc_list_update(struct net_device *netdev,
+                            const unsigned char *addr)
+{
+       struct be_adapter *adapter = netdev_priv(netdev);
+
+       adapter->update_mc_list = true;
+       return 0;
+}
+
+static void be_set_mc_list(struct be_adapter *adapter)
+{
+       struct net_device *netdev = adapter->netdev;
+       struct netdev_hw_addr *ha;
+       bool mc_promisc = false;
+       int status;
+
+       netif_addr_lock_bh(netdev);
+       __dev_mc_sync(netdev, be_mc_list_update, be_mc_list_update);
+
+       if (netdev->flags & IFF_PROMISC) {
+               adapter->update_mc_list = false;
+       } else if (netdev->flags & IFF_ALLMULTI ||
+                  netdev_mc_count(netdev) > be_max_mc(adapter)) {
+               /* Enable multicast promisc if num configured exceeds
+                * what we support
+                */
+               mc_promisc = true;
+               adapter->update_mc_list = false;
+       } else if (adapter->if_flags & BE_IF_FLAGS_MCAST_PROMISCUOUS) {
+               /* Update mc-list unconditionally if the iface was previously
+                * in mc-promisc mode and now is out of that mode.
+                */
+               adapter->update_mc_list = true;
+       }
+
+       if (adapter->update_mc_list) {
+               int i = 0;
+
+               /* cache the mc-list in adapter */
+               netdev_for_each_mc_addr(ha, netdev) {
+                       ether_addr_copy(adapter->mc_list[i].mac, ha->addr);
+                       i++;
+               }
+               adapter->mc_count = netdev_mc_count(netdev);
+       }
+       netif_addr_unlock_bh(netdev);
+
+       if (mc_promisc) {
                be_set_mc_promisc(adapter);
+       } else if (adapter->update_mc_list) {
+               status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_MULTICAST, ON);
+               if (!status)
+                       adapter->if_flags &= ~BE_IF_FLAGS_MCAST_PROMISCUOUS;
+               else
+                       be_set_mc_promisc(adapter);
+
+               adapter->update_mc_list = false;
+       }
+}
+
+static void be_clear_mc_list(struct be_adapter *adapter)
+{
+       struct net_device *netdev = adapter->netdev;
+
+       __dev_mc_unsync(netdev, NULL);
+       be_cmd_rx_filter(adapter, BE_IF_FLAGS_MULTICAST, OFF);
+       adapter->mc_count = 0;
 }
 
 static void be_set_uc_list(struct be_adapter *adapter)
 {
+       struct net_device *netdev = adapter->netdev;
        struct netdev_hw_addr *ha;
-       int i = 1; /* First slot is claimed by the Primary MAC */
+       bool uc_promisc = false;
+       int curr_uc_macs = 0, i;
 
-       for (; adapter->uc_macs > 0; adapter->uc_macs--, i++)
-               be_cmd_pmac_del(adapter, adapter->if_handle,
-                               adapter->pmac_id[i], 0);
+       netif_addr_lock_bh(netdev);
+       __dev_uc_sync(netdev, be_uc_list_update, be_uc_list_update);
 
-       if (netdev_uc_count(adapter->netdev) > be_max_uc(adapter)) {
-               be_set_all_promisc(adapter);
-               return;
+       if (netdev->flags & IFF_PROMISC) {
+               adapter->update_uc_list = false;
+       } else if (netdev_uc_count(netdev) > (be_max_uc(adapter) - 1)) {
+               uc_promisc = true;
+               adapter->update_uc_list = false;
+       }  else if (adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS) {
+               /* Update uc-list unconditionally if the iface was previously
+                * in uc-promisc mode and now is out of that mode.
+                */
+               adapter->update_uc_list = true;
        }
 
-       netdev_for_each_uc_addr(ha, adapter->netdev) {
-               adapter->uc_macs++; /* First slot is for Primary MAC */
-               be_cmd_pmac_add(adapter, (u8 *)ha->addr, adapter->if_handle,
-                               &adapter->pmac_id[adapter->uc_macs], 0);
+       if (adapter->update_uc_list) {
+               i = 1; /* First slot is claimed by the Primary MAC */
+
+               /* cache the uc-list in adapter array */
+               netdev_for_each_uc_addr(ha, netdev) {
+                       ether_addr_copy(adapter->uc_list[i].mac, ha->addr);
+                       i++;
+               }
+               curr_uc_macs = netdev_uc_count(netdev);
+       }
+       netif_addr_unlock_bh(netdev);
+
+       if (uc_promisc) {
+               be_set_uc_promisc(adapter);
+       } else if (adapter->update_uc_list) {
+               be_clear_uc_promisc(adapter);
+
+               for (i = 0; i < adapter->uc_macs; i++)
+                       be_cmd_pmac_del(adapter, adapter->if_handle,
+                                       adapter->pmac_id[i + 1], 0);
+
+               for (i = 0; i < curr_uc_macs; i++)
+                       be_cmd_pmac_add(adapter, adapter->uc_list[i].mac,
+                                       adapter->if_handle,
+                                       &adapter->pmac_id[i + 1], 0);
+               adapter->uc_macs = curr_uc_macs;
+               adapter->update_uc_list = false;
        }
 }
 
 static void be_clear_uc_list(struct be_adapter *adapter)
 {
+       struct net_device *netdev = adapter->netdev;
        int i;
 
-       for (i = 1; i < (adapter->uc_macs + 1); i++)
+       __dev_uc_unsync(netdev, NULL);
+       for (i = 0; i < adapter->uc_macs; i++)
                be_cmd_pmac_del(adapter, adapter->if_handle,
-                               adapter->pmac_id[i], 0);
+                               adapter->pmac_id[i + 1], 0);
        adapter->uc_macs = 0;
 }
 
-static void be_set_rx_mode(struct net_device *netdev)
+static void __be_set_rx_mode(struct be_adapter *adapter)
 {
-       struct be_adapter *adapter = netdev_priv(netdev);
+       struct net_device *netdev = adapter->netdev;
+
+       mutex_lock(&adapter->rx_filter_lock);
 
        if (netdev->flags & IFF_PROMISC) {
-               be_set_all_promisc(adapter);
-               return;
+               if (!be_in_all_promisc(adapter))
+                       be_set_all_promisc(adapter);
+       } else if (be_in_all_promisc(adapter)) {
+               /* We need to re-program the vlan-list or clear
+                * vlan-promisc mode (if needed) when the interface
+                * comes out of promisc mode.
+                */
+               be_vid_config(adapter);
        }
 
-       /* Interface was previously in promiscuous mode; disable it */
-       if (be_in_all_promisc(adapter)) {
-               be_clear_all_promisc(adapter);
-               if (adapter->vlans_added)
-                       be_vid_config(adapter);
-       }
+       be_set_uc_list(adapter);
+       be_set_mc_list(adapter);
 
-       /* Enable multicast promisc if num configured exceeds what we support */
-       if (netdev->flags & IFF_ALLMULTI ||
-           netdev_mc_count(netdev) > be_max_mc(adapter)) {
-               be_set_mc_promisc(adapter);
-               return;
-       }
+       mutex_unlock(&adapter->rx_filter_lock);
+}
 
-       if (netdev_uc_count(netdev) != adapter->uc_macs)
-               be_set_uc_list(adapter);
+static void be_work_set_rx_mode(struct work_struct *work)
+{
+       struct be_cmd_work *cmd_work =
+                               container_of(work, struct be_cmd_work, work);
 
-       be_set_mc_list(adapter);
+       __be_set_rx_mode(cmd_work->adapter);
+       kfree(cmd_work);
 }
 
 static int be_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
@@ -3429,6 +3567,7 @@ static void be_disable_if_filters(struct be_adapter *adapter)
                        adapter->pmac_id[0], 0);
 
        be_clear_uc_list(adapter);
+       be_clear_mc_list(adapter);
 
        /* The IFACE flags are enabled in the open path and cleared
         * in the close path. When a VF gets detached from the host and
@@ -3462,6 +3601,11 @@ static int be_close(struct net_device *netdev)
        if (!(adapter->flags & BE_FLAGS_SETUP_DONE))
                return 0;
 
+       /* Before attempting cleanup ensure all the pending cmds in the
+        * config_wq have finished execution
+        */
+       flush_workqueue(be_wq);
+
        be_disable_if_filters(adapter);
 
        if (adapter->flags & BE_FLAGS_NAPI_ENABLED) {
@@ -3586,7 +3730,7 @@ static int be_enable_if_filters(struct be_adapter *adapter)
        if (adapter->vlans_added)
                be_vid_config(adapter);
 
-       be_set_rx_mode(adapter->netdev);
+       __be_set_rx_mode(adapter);
 
        return 0;
 }
@@ -3860,6 +4004,20 @@ static void be_calculate_vf_res(struct be_adapter *adapter, u16 num_vfs,
                vft_res->max_mcc_count = res.max_mcc_count / (num_vfs + 1);
 }
 
+static void be_if_destroy(struct be_adapter *adapter)
+{
+       be_cmd_if_destroy(adapter, adapter->if_handle,  0);
+
+       kfree(adapter->pmac_id);
+       adapter->pmac_id = NULL;
+
+       kfree(adapter->mc_list);
+       adapter->mc_list = NULL;
+
+       kfree(adapter->uc_list);
+       adapter->uc_list = NULL;
+}
+
 static int be_clear(struct be_adapter *adapter)
 {
        struct pci_dev *pdev = adapter->pdev;
@@ -3867,6 +4025,8 @@ static int be_clear(struct be_adapter *adapter)
 
        be_cancel_worker(adapter);
 
+       flush_workqueue(be_wq);
+
        if (sriov_enabled(adapter))
                be_vf_clear(adapter);
 
@@ -3884,10 +4044,8 @@ static int be_clear(struct be_adapter *adapter)
        }
 
        be_disable_vxlan_offloads(adapter);
-       kfree(adapter->pmac_id);
-       adapter->pmac_id = NULL;
 
-       be_cmd_if_destroy(adapter, adapter->if_handle,  0);
+       be_if_destroy(adapter);
 
        be_clear_queues(adapter);
 
@@ -4341,7 +4499,7 @@ static int be_mac_setup(struct be_adapter *adapter)
 
 static void be_schedule_worker(struct be_adapter *adapter)
 {
-       schedule_delayed_work(&adapter->work, msecs_to_jiffies(1000));
+       queue_delayed_work(be_wq, &adapter->work, msecs_to_jiffies(1000));
        adapter->flags |= BE_FLAGS_WORKER_SCHEDULED;
 }
 
@@ -4393,6 +4551,22 @@ static int be_if_create(struct be_adapter *adapter)
        u32 cap_flags = be_if_cap_flags(adapter);
        int status;
 
+       /* alloc required memory for other filtering fields */
+       adapter->pmac_id = kcalloc(be_max_uc(adapter),
+                                  sizeof(*adapter->pmac_id), GFP_KERNEL);
+       if (!adapter->pmac_id)
+               return -ENOMEM;
+
+       adapter->mc_list = kcalloc(be_max_mc(adapter),
+                                  sizeof(*adapter->mc_list), GFP_KERNEL);
+       if (!adapter->mc_list)
+               return -ENOMEM;
+
+       adapter->uc_list = kcalloc(be_max_uc(adapter),
+                                  sizeof(*adapter->uc_list), GFP_KERNEL);
+       if (!adapter->uc_list)
+               return -ENOMEM;
+
        if (adapter->cfg_num_rx_irqs == 1)
                cap_flags &= ~(BE_IF_FLAGS_DEFQ_RSS | BE_IF_FLAGS_RSS);
 
@@ -4401,7 +4575,10 @@ static int be_if_create(struct be_adapter *adapter)
        status = be_cmd_if_create(adapter, be_if_cap_flags(adapter), en_flags,
                                  &adapter->if_handle, 0);
 
-       return status;
+       if (status)
+               return status;
+
+       return 0;
 }
 
 int be_update_queues(struct be_adapter *adapter)
@@ -4530,11 +4707,6 @@ static int be_setup(struct be_adapter *adapter)
        if (status)
                goto err;
 
-       adapter->pmac_id = kcalloc(be_max_uc(adapter),
-                                  sizeof(*adapter->pmac_id), GFP_KERNEL);
-       if (!adapter->pmac_id)
-               return -ENOMEM;
-
        status = be_msix_enable(adapter);
        if (status)
                goto err;
@@ -4728,6 +4900,23 @@ static int be_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
                                       0, 0, nlflags, filter_mask, NULL);
 }
 
+static struct be_cmd_work *be_alloc_work(struct be_adapter *adapter,
+                                        void (*func)(struct work_struct *))
+{
+       struct be_cmd_work *work;
+
+       work = kzalloc(sizeof(*work), GFP_ATOMIC);
+       if (!work) {
+               dev_err(&adapter->pdev->dev,
+                       "be_work memory allocation failed\n");
+               return NULL;
+       }
+
+       INIT_WORK(&work->work, func);
+       work->adapter = adapter;
+       return work;
+}
+
 /* VxLAN offload Notes:
  *
  * The stack defines tunnel offload flags (hw_enc_features) for IP and doesn't
@@ -4742,23 +4931,19 @@ static int be_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
  * adds more than one port, disable offloads and don't re-enable them again
  * until after all the tunnels are removed.
  */
-static void be_add_vxlan_port(struct net_device *netdev,
-                             struct udp_tunnel_info *ti)
+static void be_work_add_vxlan_port(struct work_struct *work)
 {
-       struct be_adapter *adapter = netdev_priv(netdev);
+       struct be_cmd_work *cmd_work =
+                               container_of(work, struct be_cmd_work, work);
+       struct be_adapter *adapter = cmd_work->adapter;
+       struct net_device *netdev = adapter->netdev;
        struct device *dev = &adapter->pdev->dev;
-       __be16 port = ti->port;
+       __be16 port = cmd_work->info.vxlan_port;
        int status;
 
-       if (ti->type != UDP_TUNNEL_TYPE_VXLAN)
-               return;
-
-       if (lancer_chip(adapter) || BEx_chip(adapter) || be_is_mc(adapter))
-               return;
-
        if (adapter->vxlan_port == port && adapter->vxlan_port_count) {
                adapter->vxlan_port_aliases++;
-               return;
+               goto done;
        }
 
        if (adapter->flags & BE_FLAGS_VXLAN_OFFLOADS) {
@@ -4770,7 +4955,7 @@ static void be_add_vxlan_port(struct net_device *netdev,
        }
 
        if (adapter->vxlan_port_count++ >= 1)
-               return;
+               goto done;
 
        status = be_cmd_manage_iface(adapter, adapter->if_handle,
                                     OP_CONVERT_NORMAL_TO_TUNNEL);
@@ -4795,29 +4980,26 @@ static void be_add_vxlan_port(struct net_device *netdev,
 
        dev_info(dev, "Enabled VxLAN offloads for UDP port %d\n",
                 be16_to_cpu(port));
-       return;
+       goto done;
 err:
        be_disable_vxlan_offloads(adapter);
+done:
+       kfree(cmd_work);
 }
 
-static void be_del_vxlan_port(struct net_device *netdev,
-                             struct udp_tunnel_info *ti)
+static void be_work_del_vxlan_port(struct work_struct *work)
 {
-       struct be_adapter *adapter = netdev_priv(netdev);
-       __be16 port = ti->port;
-
-       if (ti->type != UDP_TUNNEL_TYPE_VXLAN)
-               return;
-
-       if (lancer_chip(adapter) || BEx_chip(adapter) || be_is_mc(adapter))
-               return;
+       struct be_cmd_work *cmd_work =
+                               container_of(work, struct be_cmd_work, work);
+       struct be_adapter *adapter = cmd_work->adapter;
+       __be16 port = cmd_work->info.vxlan_port;
 
        if (adapter->vxlan_port != port)
                goto done;
 
        if (adapter->vxlan_port_aliases) {
                adapter->vxlan_port_aliases--;
-               return;
+               goto out;
        }
 
        be_disable_vxlan_offloads(adapter);
@@ -4827,6 +5009,40 @@ static void be_del_vxlan_port(struct net_device *netdev,
                 be16_to_cpu(port));
 done:
        adapter->vxlan_port_count--;
+out:
+       kfree(cmd_work);
+}
+
+static void be_cfg_vxlan_port(struct net_device *netdev,
+                             struct udp_tunnel_info *ti,
+                             void (*func)(struct work_struct *))
+{
+       struct be_adapter *adapter = netdev_priv(netdev);
+       struct be_cmd_work *cmd_work;
+
+       if (ti->type != UDP_TUNNEL_TYPE_VXLAN)
+               return;
+
+       if (lancer_chip(adapter) || BEx_chip(adapter) || be_is_mc(adapter))
+               return;
+
+       cmd_work = be_alloc_work(adapter, func);
+       if (cmd_work) {
+               cmd_work->info.vxlan_port = ti->port;
+               queue_work(be_wq, &cmd_work->work);
+       }
+}
+
+static void be_del_vxlan_port(struct net_device *netdev,
+                             struct udp_tunnel_info *ti)
+{
+       be_cfg_vxlan_port(netdev, ti, be_work_del_vxlan_port);
+}
+
+static void be_add_vxlan_port(struct net_device *netdev,
+                             struct udp_tunnel_info *ti)
+{
+       be_cfg_vxlan_port(netdev, ti, be_work_add_vxlan_port);
 }
 
 static netdev_features_t be_features_check(struct sk_buff *skb,
@@ -4891,6 +5107,16 @@ static int be_get_phys_port_id(struct net_device *dev,
        return 0;
 }
 
+static void be_set_rx_mode(struct net_device *dev)
+{
+       struct be_adapter *adapter = netdev_priv(dev);
+       struct be_cmd_work *work;
+
+       work = be_alloc_work(adapter, be_work_set_rx_mode);
+       if (work)
+               queue_work(be_wq, &work->work);
+}
+
 static const struct net_device_ops be_netdev_ops = {
        .ndo_open               = be_open,
        .ndo_stop               = be_close,
@@ -5116,7 +5342,7 @@ static void be_worker(struct work_struct *work)
 
 reschedule:
        adapter->work_counter++;
-       schedule_delayed_work(&adapter->work, msecs_to_jiffies(1000));
+       queue_delayed_work(be_wq, &adapter->work, msecs_to_jiffies(1000));
 }
 
 static void be_unmap_pci_bars(struct be_adapter *adapter)
@@ -5256,7 +5482,8 @@ static int be_drv_init(struct be_adapter *adapter)
        }
 
        mutex_init(&adapter->mbox_lock);
-       spin_lock_init(&adapter->mcc_lock);
+       mutex_init(&adapter->mcc_lock);
+       mutex_init(&adapter->rx_filter_lock);
        spin_lock_init(&adapter->mcc_cq_lock);
        init_completion(&adapter->et_cmd_compl);
 
@@ -5712,6 +5939,12 @@ static int __init be_init_module(void)
                pr_info(DRV_NAME " : Use sysfs method to enable VFs\n");
        }
 
+       be_wq = create_singlethread_workqueue("be_wq");
+       if (!be_wq) {
+               pr_warn(DRV_NAME "workqueue creation failed\n");
+               return -1;
+       }
+
        return pci_register_driver(&be_driver);
 }
 module_init(be_init_module);
@@ -5719,5 +5952,8 @@ module_init(be_init_module);
 static void __exit be_exit_module(void)
 {
        pci_unregister_driver(&be_driver);
+
+       if (be_wq)
+               destroy_workqueue(be_wq);
 }
 module_exit(be_exit_module);
index 01f7e81..fb5c638 100644 (file)
@@ -2887,7 +2887,7 @@ fec_enet_close(struct net_device *ndev)
  * this kind of feature?).
  */
 
-#define HASH_BITS      6               /* #bits in hash */
+#define FEC_HASH_BITS  6               /* #bits in hash */
 #define CRC32_POLY     0xEDB88320
 
 static void set_multicast_list(struct net_device *ndev)
@@ -2935,10 +2935,10 @@ static void set_multicast_list(struct net_device *ndev)
                        }
                }
 
-               /* only upper 6 bits (HASH_BITS) are used
+               /* only upper 6 bits (FEC_HASH_BITS) are used
                 * which point to specific bit in he hash registers
                 */
-               hash = (crc >> (32 - HASH_BITS)) & 0x3f;
+               hash = (crc >> (32 - FEC_HASH_BITS)) & 0x3f;
 
                if (hash > 31) {
                        tmp = readl(fep->hwp + FEC_GRP_HASH_TABLE_HIGH);
index 7b8fe86..e03b30c 100644 (file)
@@ -271,11 +271,8 @@ static int xgmac_mdio_probe(struct platform_device *pdev)
                goto err_ioremap;
        }
 
-       if (of_get_property(pdev->dev.of_node,
-                           "little-endian", NULL))
-               priv->is_little_endian = true;
-       else
-               priv->is_little_endian = false;
+       priv->is_little_endian = of_property_read_bool(pdev->dev.of_node,
+                                                      "little-endian");
 
        ret = of_mdiobus_register(bus, np);
        if (ret) {
index 5c8afe1..a834774 100644 (file)
@@ -684,8 +684,7 @@ hns_mac_register_phydev(struct mii_bus *mdio, struct hns_mac_cb *mac_cb,
        if (!phy || IS_ERR(phy))
                return -EIO;
 
-       if (mdio->irq)
-               phy->irq = mdio->irq[addr];
+       phy->irq = mdio->irq[addr];
 
        /* All data is now stored in the phy struct;
         * register it
index d41c28d..8e4252d 100644 (file)
@@ -399,7 +399,6 @@ struct mvneta_port {
        u16 rx_ring_size;
 
        struct mii_bus *mii_bus;
-       struct phy_device *phy_dev;
        phy_interface_t phy_interface;
        struct device_node *phy_node;
        unsigned int link;
@@ -2651,6 +2650,7 @@ static int mvneta_poll(struct napi_struct *napi, int budget)
        u32 cause_rx_tx;
        int rx_queue;
        struct mvneta_port *pp = netdev_priv(napi->dev);
+       struct net_device *ndev = pp->dev;
        struct mvneta_pcpu_port *port = this_cpu_ptr(pp->ports);
 
        if (!netif_running(pp->dev)) {
@@ -2668,7 +2668,7 @@ static int mvneta_poll(struct napi_struct *napi, int budget)
                                (MVNETA_CAUSE_PHY_STATUS_CHANGE |
                                 MVNETA_CAUSE_LINK_CHANGE |
                                 MVNETA_CAUSE_PSC_SYNC_CHANGE))) {
-                       mvneta_fixed_link_update(pp, pp->phy_dev);
+                       mvneta_fixed_link_update(pp, ndev->phydev);
                }
        }
 
@@ -2963,6 +2963,7 @@ static int mvneta_setup_txqs(struct mvneta_port *pp)
 static void mvneta_start_dev(struct mvneta_port *pp)
 {
        int cpu;
+       struct net_device *ndev = pp->dev;
 
        mvneta_max_rx_size_set(pp, pp->pkt_size);
        mvneta_txq_max_tx_size_set(pp, pp->pkt_size);
@@ -2985,15 +2986,16 @@ static void mvneta_start_dev(struct mvneta_port *pp)
                    MVNETA_CAUSE_LINK_CHANGE |
                    MVNETA_CAUSE_PSC_SYNC_CHANGE);
 
-       phy_start(pp->phy_dev);
+       phy_start(ndev->phydev);
        netif_tx_start_all_queues(pp->dev);
 }
 
 static void mvneta_stop_dev(struct mvneta_port *pp)
 {
        unsigned int cpu;
+       struct net_device *ndev = pp->dev;
 
-       phy_stop(pp->phy_dev);
+       phy_stop(ndev->phydev);
 
        for_each_online_cpu(cpu) {
                struct mvneta_pcpu_port *port = per_cpu_ptr(pp->ports, cpu);
@@ -3166,7 +3168,7 @@ static int mvneta_set_mac_addr(struct net_device *dev, void *addr)
 static void mvneta_adjust_link(struct net_device *ndev)
 {
        struct mvneta_port *pp = netdev_priv(ndev);
-       struct phy_device *phydev = pp->phy_dev;
+       struct phy_device *phydev = ndev->phydev;
        int status_change = 0;
 
        if (phydev->link) {
@@ -3244,7 +3246,6 @@ static int mvneta_mdio_probe(struct mvneta_port *pp)
        phy_dev->supported &= PHY_GBIT_FEATURES;
        phy_dev->advertising = phy_dev->supported;
 
-       pp->phy_dev = phy_dev;
        pp->link    = 0;
        pp->duplex  = 0;
        pp->speed   = 0;
@@ -3254,8 +3255,9 @@ static int mvneta_mdio_probe(struct mvneta_port *pp)
 
 static void mvneta_mdio_remove(struct mvneta_port *pp)
 {
-       phy_disconnect(pp->phy_dev);
-       pp->phy_dev = NULL;
+       struct net_device *ndev = pp->dev;
+
+       phy_disconnect(ndev->phydev);
 }
 
 /* Electing a CPU must be done in an atomic way: it should be done
@@ -3495,42 +3497,30 @@ static int mvneta_stop(struct net_device *dev)
 
 static int mvneta_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 {
-       struct mvneta_port *pp = netdev_priv(dev);
-
-       if (!pp->phy_dev)
+       if (!dev->phydev)
                return -ENOTSUPP;
 
-       return phy_mii_ioctl(pp->phy_dev, ifr, cmd);
+       return phy_mii_ioctl(dev->phydev, ifr, cmd);
 }
 
 /* Ethtool methods */
 
-/* Get settings (phy address, speed) for ethtools */
-int mvneta_ethtool_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
+/* Set link ksettings (phy address, speed) for ethtools */
+int mvneta_ethtool_set_link_ksettings(struct net_device *ndev,
+                                     const struct ethtool_link_ksettings *cmd)
 {
-       struct mvneta_port *pp = netdev_priv(dev);
-
-       if (!pp->phy_dev)
-               return -ENODEV;
-
-       return phy_ethtool_gset(pp->phy_dev, cmd);
-}
-
-/* Set settings (phy address, speed) for ethtools */
-int mvneta_ethtool_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
-{
-       struct mvneta_port *pp = netdev_priv(dev);
-       struct phy_device *phydev = pp->phy_dev;
+       struct mvneta_port *pp = netdev_priv(ndev);
+       struct phy_device *phydev = ndev->phydev;
 
        if (!phydev)
                return -ENODEV;
 
-       if ((cmd->autoneg == AUTONEG_ENABLE) != pp->use_inband_status) {
+       if ((cmd->base.autoneg == AUTONEG_ENABLE) != pp->use_inband_status) {
                u32 val;
 
-               mvneta_set_autoneg(pp, cmd->autoneg == AUTONEG_ENABLE);
+               mvneta_set_autoneg(pp, cmd->base.autoneg == AUTONEG_ENABLE);
 
-               if (cmd->autoneg == AUTONEG_DISABLE) {
+               if (cmd->base.autoneg == AUTONEG_DISABLE) {
                        val = mvreg_read(pp, MVNETA_GMAC_AUTONEG_CONFIG);
                        val &= ~(MVNETA_GMAC_CONFIG_MII_SPEED |
                                 MVNETA_GMAC_CONFIG_GMII_SPEED |
@@ -3547,17 +3537,17 @@ int mvneta_ethtool_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
                        mvreg_write(pp, MVNETA_GMAC_AUTONEG_CONFIG, val);
                }
 
-               pp->use_inband_status = (cmd->autoneg == AUTONEG_ENABLE);
+               pp->use_inband_status = (cmd->base.autoneg == AUTONEG_ENABLE);
                netdev_info(pp->dev, "autoneg status set to %i\n",
                            pp->use_inband_status);
 
-               if (netif_running(dev)) {
+               if (netif_running(ndev)) {
                        mvneta_port_down(pp);
                        mvneta_port_up(pp);
                }
        }
 
-       return phy_ethtool_sset(pp->phy_dev, cmd);
+       return phy_ethtool_ksettings_set(ndev->phydev, cmd);
 }
 
 /* Set interrupt coalescing for ethtools */
@@ -3821,8 +3811,6 @@ static const struct net_device_ops mvneta_netdev_ops = {
 
 const struct ethtool_ops mvneta_eth_tool_ops = {
        .get_link       = ethtool_op_get_link,
-       .get_settings   = mvneta_ethtool_get_settings,
-       .set_settings   = mvneta_ethtool_set_settings,
        .set_coalesce   = mvneta_ethtool_set_coalesce,
        .get_coalesce   = mvneta_ethtool_get_coalesce,
        .get_drvinfo    = mvneta_ethtool_get_drvinfo,
@@ -3835,6 +3823,8 @@ const struct ethtool_ops mvneta_eth_tool_ops = {
        .get_rxnfc      = mvneta_ethtool_get_rxnfc,
        .get_rxfh       = mvneta_ethtool_get_rxfh,
        .set_rxfh       = mvneta_ethtool_set_rxfh,
+       .get_link_ksettings = phy_ethtool_get_link_ksettings,
+       .set_link_ksettings = mvneta_ethtool_set_link_ksettings,
 };
 
 /* Initialize hw */
index f160954..1801fd8 100644 (file)
@@ -369,18 +369,17 @@ static int mtk_set_mac_address(struct net_device *dev, void *p)
        int ret = eth_mac_addr(dev, p);
        struct mtk_mac *mac = netdev_priv(dev);
        const char *macaddr = dev->dev_addr;
-       unsigned long flags;
 
        if (ret)
                return ret;
 
-       spin_lock_irqsave(&mac->hw->page_lock, flags);
+       spin_lock_bh(&mac->hw->page_lock);
        mtk_w32(mac->hw, (macaddr[0] << 8) | macaddr[1],
                MTK_GDMA_MAC_ADRH(mac->id));
        mtk_w32(mac->hw, (macaddr[2] << 24) | (macaddr[3] << 16) |
                (macaddr[4] << 8) | macaddr[5],
                MTK_GDMA_MAC_ADRL(mac->id));
-       spin_unlock_irqrestore(&mac->hw->page_lock, flags);
+       spin_unlock_bh(&mac->hw->page_lock);
 
        return 0;
 }
@@ -764,7 +763,6 @@ static int mtk_start_xmit(struct sk_buff *skb, struct net_device *dev)
        struct mtk_eth *eth = mac->hw;
        struct mtk_tx_ring *ring = &eth->tx_ring;
        struct net_device_stats *stats = &dev->stats;
-       unsigned long flags;
        bool gso = false;
        int tx_num;
 
@@ -772,14 +770,14 @@ static int mtk_start_xmit(struct sk_buff *skb, struct net_device *dev)
         * however we have 2 queues running on the same ring so we need to lock
         * the ring access
         */
-       spin_lock_irqsave(&eth->page_lock, flags);
+       spin_lock(&eth->page_lock);
 
        tx_num = mtk_cal_txd_req(skb);
        if (unlikely(atomic_read(&ring->free_count) <= tx_num)) {
                mtk_stop_queue(eth);
                netif_err(eth, tx_queued, dev,
                          "Tx Ring full when queue awake!\n");
-               spin_unlock_irqrestore(&eth->page_lock, flags);
+               spin_unlock(&eth->page_lock);
                return NETDEV_TX_BUSY;
        }
 
@@ -804,12 +802,12 @@ static int mtk_start_xmit(struct sk_buff *skb, struct net_device *dev)
        if (unlikely(atomic_read(&ring->free_count) <= ring->thresh))
                mtk_stop_queue(eth);
 
-       spin_unlock_irqrestore(&eth->page_lock, flags);
+       spin_unlock(&eth->page_lock);
 
        return NETDEV_TX_OK;
 
 drop:
-       spin_unlock_irqrestore(&eth->page_lock, flags);
+       spin_unlock(&eth->page_lock);
        stats->tx_dropped++;
        dev_kfree_skb(skb);
        return NETDEV_TX_OK;
@@ -1363,16 +1361,15 @@ static int mtk_open(struct net_device *dev)
 
 static void mtk_stop_dma(struct mtk_eth *eth, u32 glo_cfg)
 {
-       unsigned long flags;
        u32 val;
        int i;
 
        /* stop the dma engine */
-       spin_lock_irqsave(&eth->page_lock, flags);
+       spin_lock_bh(&eth->page_lock);
        val = mtk_r32(eth, glo_cfg);
        mtk_w32(eth, val & ~(MTK_TX_WB_DDONE | MTK_RX_DMA_EN | MTK_TX_DMA_EN),
                glo_cfg);
-       spin_unlock_irqrestore(&eth->page_lock, flags);
+       spin_unlock_bh(&eth->page_lock);
 
        /* wait for dma stop */
        for (i = 0; i < 10; i++) {
@@ -1912,7 +1909,6 @@ static int mtk_remove(struct platform_device *pdev)
        netif_napi_del(&eth->tx_napi);
        netif_napi_del(&eth->rx_napi);
        mtk_cleanup(eth);
-       platform_set_drvdata(pdev, NULL);
 
        return 0;
 }
index 1c35f37..5476927 100644 (file)
@@ -377,9 +377,8 @@ static void qed_cxt_set_proto_cid_count(struct qed_hwfn *p_hwfn,
        }
 }
 
-u32 qed_cxt_get_proto_cid_count(struct qed_hwfn                *p_hwfn,
-                               enum protocol_type      type,
-                               u32                     *vf_cid)
+u32 qed_cxt_get_proto_cid_count(struct qed_hwfn *p_hwfn,
+                               enum protocol_type type, u32 *vf_cid)
 {
        if (vf_cid)
                *vf_cid = p_hwfn->p_cxt_mngr->conn_cfg[type].cids_per_vf;
@@ -405,10 +404,10 @@ u32 qed_cxt_get_proto_tid_count(struct qed_hwfn *p_hwfn,
        return cnt;
 }
 
-static void
-qed_cxt_set_proto_tid_count(struct qed_hwfn *p_hwfn,
-                           enum protocol_type proto,
-                           u8 seg, u8 seg_type, u32 count, bool has_fl)
+static void qed_cxt_set_proto_tid_count(struct qed_hwfn *p_hwfn,
+                                       enum protocol_type proto,
+                                       u8 seg,
+                                       u8 seg_type, u32 count, bool has_fl)
 {
        struct qed_cxt_mngr *p_mngr = p_hwfn->p_cxt_mngr;
        struct qed_tid_seg *p_seg = &p_mngr->conn_cfg[proto].tid_seg[seg];
@@ -420,8 +419,7 @@ qed_cxt_set_proto_tid_count(struct qed_hwfn *p_hwfn,
 
 static void qed_ilt_cli_blk_fill(struct qed_ilt_client_cfg *p_cli,
                                 struct qed_ilt_cli_blk *p_blk,
-                                u32 start_line, u32 total_size,
-                                u32 elem_size)
+                                u32 start_line, u32 total_size, u32 elem_size)
 {
        u32 ilt_size = ILT_PAGE_IN_BYTES(p_cli->p_size.val);
 
@@ -448,8 +446,7 @@ static void qed_ilt_cli_adv_line(struct qed_hwfn *p_hwfn,
                p_cli->first.val = *p_line;
 
        p_cli->active = true;
-       *p_line += DIV_ROUND_UP(p_blk->total_size,
-                               p_blk->real_size_in_page);
+       *p_line += DIV_ROUND_UP(p_blk->total_size, p_blk->real_size_in_page);
        p_cli->last.val = *p_line - 1;
 
        DP_VERBOSE(p_hwfn, QED_MSG_ILT,
@@ -926,12 +923,9 @@ static int qed_ilt_blk_alloc(struct qed_hwfn *p_hwfn,
                void *p_virt;
                u32 size;
 
-               size = min_t(u32, sz_left,
-                            p_blk->real_size_in_page);
+               size = min_t(u32, sz_left, p_blk->real_size_in_page);
                p_virt = dma_alloc_coherent(&p_hwfn->cdev->pdev->dev,
-                                           size,
-                                           &p_phys,
-                                           GFP_KERNEL);
+                                           size, &p_phys, GFP_KERNEL);
                if (!p_virt)
                        return -ENOMEM;
                memset(p_virt, 0, size);
@@ -976,7 +970,7 @@ static int qed_ilt_shadow_alloc(struct qed_hwfn *p_hwfn)
                for (j = 0; j < ILT_CLI_PF_BLOCKS; j++) {
                        p_blk = &clients[i].pf_blks[j];
                        rc = qed_ilt_blk_alloc(p_hwfn, p_blk, i, 0);
-                       if (rc != 0)
+                       if (rc)
                                goto ilt_shadow_fail;
                }
                for (k = 0; k < p_mngr->vf_count; k++) {
@@ -985,7 +979,7 @@ static int qed_ilt_shadow_alloc(struct qed_hwfn *p_hwfn)
 
                                p_blk = &clients[i].vf_blks[j];
                                rc = qed_ilt_blk_alloc(p_hwfn, p_blk, i, lines);
-                               if (rc != 0)
+                               if (rc)
                                        goto ilt_shadow_fail;
                        }
                }
@@ -1672,7 +1666,7 @@ static void qed_tm_init_pf(struct qed_hwfn *p_hwfn)
                     p_hwfn->rel_pf_id * NUM_TASK_PF_SEGMENTS + i);
 
                STORE_RT_REG_AGG(p_hwfn, rt_reg, cfg_word);
-               active_seg_mask |= (tm_iids.pf_tids[i] ? (1 << i) : 0);
+               active_seg_mask |= (tm_iids.pf_tids[i] ? BIT(i) : 0);
 
                tm_offset += tm_iids.pf_tids[i];
        }
@@ -1702,8 +1696,7 @@ void qed_cxt_hw_init_pf(struct qed_hwfn *p_hwfn)
 }
 
 int qed_cxt_acquire_cid(struct qed_hwfn *p_hwfn,
-                       enum protocol_type type,
-                       u32 *p_cid)
+                       enum protocol_type type, u32 *p_cid)
 {
        struct qed_cxt_mngr *p_mngr = p_hwfn->p_cxt_mngr;
        u32 rel_cid;
@@ -1717,8 +1710,7 @@ int qed_cxt_acquire_cid(struct qed_hwfn *p_hwfn,
                                      p_mngr->acquired[type].max_count);
 
        if (rel_cid >= p_mngr->acquired[type].max_count) {
-               DP_NOTICE(p_hwfn, "no CID available for protocol %d\n",
-                         type);
+               DP_NOTICE(p_hwfn, "no CID available for protocol %d\n", type);
                return -EINVAL;
        }
 
@@ -1730,8 +1722,7 @@ int qed_cxt_acquire_cid(struct qed_hwfn *p_hwfn,
 }
 
 static bool qed_cxt_test_cid_acquired(struct qed_hwfn *p_hwfn,
-                                     u32 cid,
-                                     enum protocol_type *p_type)
+                                     u32 cid, enum protocol_type *p_type)
 {
        struct qed_cxt_mngr *p_mngr = p_hwfn->p_cxt_mngr;
        struct qed_cid_acquired_map *p_map;
@@ -1763,8 +1754,7 @@ static bool qed_cxt_test_cid_acquired(struct qed_hwfn *p_hwfn,
        return true;
 }
 
-void qed_cxt_release_cid(struct qed_hwfn *p_hwfn,
-                        u32 cid)
+void qed_cxt_release_cid(struct qed_hwfn *p_hwfn, u32 cid)
 {
        struct qed_cxt_mngr *p_mngr = p_hwfn->p_cxt_mngr;
        enum protocol_type type;
@@ -1781,8 +1771,7 @@ void qed_cxt_release_cid(struct qed_hwfn *p_hwfn,
        __clear_bit(rel_cid, p_mngr->acquired[type].cid_map);
 }
 
-int qed_cxt_get_cid_info(struct qed_hwfn *p_hwfn,
-                        struct qed_cxt_info *p_info)
+int qed_cxt_get_cid_info(struct qed_hwfn *p_hwfn, struct qed_cxt_info *p_info)
 {
        struct qed_cxt_mngr *p_mngr = p_hwfn->p_cxt_mngr;
        u32 conn_cxt_size, hw_p_size, cxts_per_p, line;
index 226cb08..b900dfb 100644 (file)
@@ -1968,6 +1968,7 @@ static int qed_dcbnl_get_ieee_pfc(struct qed_dev *cdev,
 
        if (!dcbx_info->operational.ieee) {
                DP_INFO(hwfn, "DCBX is not enabled/operational in IEEE mode\n");
+               kfree(dcbx_info);
                return -EINVAL;
        }
 
index 0e4f4a9..8117ddf 100644 (file)
@@ -35,8 +35,7 @@
 #include "qed_sriov.h"
 #include "qed_vf.h"
 
-static spinlock_t qm_lock;
-static bool qm_lock_init = false;
+static DEFINE_SPINLOCK(qm_lock);
 
 /* API common to all protocols */
 enum BAR_ID {
@@ -44,8 +43,7 @@ enum BAR_ID {
        BAR_ID_1        /* Used for doorbells */
 };
 
-static u32 qed_hw_bar_size(struct qed_hwfn     *p_hwfn,
-                          enum BAR_ID          bar_id)
+static u32 qed_hw_bar_size(struct qed_hwfn *p_hwfn, enum BAR_ID bar_id)
 {
        u32 bar_reg = (bar_id == BAR_ID_0 ?
                       PGLUE_B_REG_PF_BAR0_SIZE : PGLUE_B_REG_PF_BAR1_SIZE);
@@ -70,8 +68,7 @@ static u32 qed_hw_bar_size(struct qed_hwfn    *p_hwfn,
        }
 }
 
-void qed_init_dp(struct qed_dev *cdev,
-                u32 dp_module, u8 dp_level)
+void qed_init_dp(struct qed_dev *cdev, u32 dp_module, u8 dp_level)
 {
        u32 i;
 
@@ -543,8 +540,7 @@ int qed_resc_alloc(struct qed_dev *cdev)
        cdev->reset_stats = kzalloc(sizeof(*cdev->reset_stats), GFP_KERNEL);
        if (!cdev->reset_stats) {
                DP_NOTICE(cdev, "Failed to allocate reset statistics\n");
-               rc = -ENOMEM;
-               goto alloc_err;
+               goto alloc_no_mem;
        }
 
        return 0;
@@ -605,9 +601,8 @@ int qed_final_cleanup(struct qed_hwfn *p_hwfn,
 
        /* Make sure notification is not set before initiating final cleanup */
        if (REG_RD(p_hwfn, addr)) {
-               DP_NOTICE(
-                       p_hwfn,
-                       "Unexpected; Found final cleanup notification before initiating final cleanup\n");
+               DP_NOTICE(p_hwfn,
+                         "Unexpected; Found final cleanup notification before initiating final cleanup\n");
                REG_WR(p_hwfn, addr, 0);
        }
 
@@ -701,17 +696,14 @@ static void qed_init_cau_rt_data(struct qed_dev *cdev)
                                continue;
 
                        qed_init_cau_sb_entry(p_hwfn, &sb_entry,
-                                             p_block->function_id,
-                                             0, 0);
-                       STORE_RT_REG_AGG(p_hwfn, offset + sb_id * 2,
-                                        sb_entry);
+                                             p_block->function_id, 0, 0);
+                       STORE_RT_REG_AGG(p_hwfn, offset + sb_id * 2, sb_entry);
                }
        }
 }
 
 static int qed_hw_init_common(struct qed_hwfn *p_hwfn,
-                             struct qed_ptt *p_ptt,
-                             int hw_mode)
+                             struct qed_ptt *p_ptt, int hw_mode)
 {
        struct qed_qm_info *qm_info = &p_hwfn->qm_info;
        struct qed_qm_common_rt_init_params params;
@@ -759,7 +751,7 @@ static int qed_hw_init_common(struct qed_hwfn *p_hwfn,
        qed_port_unpretend(p_hwfn, p_ptt);
 
        rc = qed_init_run(p_hwfn, p_ptt, PHASE_ENGINE, ANY_PHASE_ID, hw_mode);
-       if (rc != 0)
+       if (rc)
                return rc;
 
        qed_wr(p_hwfn, p_ptt, PSWRQ2_REG_L2P_VALIDATE_VFID, 0);
@@ -788,13 +780,12 @@ static int qed_hw_init_common(struct qed_hwfn *p_hwfn,
 }
 
 static int qed_hw_init_port(struct qed_hwfn *p_hwfn,
-                           struct qed_ptt *p_ptt,
-                           int hw_mode)
+                           struct qed_ptt *p_ptt, int hw_mode)
 {
        int rc = 0;
 
        rc = qed_init_run(p_hwfn, p_ptt, PHASE_PORT, p_hwfn->port_id, hw_mode);
-       if (rc != 0)
+       if (rc)
                return rc;
 
        if (hw_mode & (1 << MODE_MF_SI)) {
@@ -848,7 +839,7 @@ static int qed_hw_init_pf(struct qed_hwfn *p_hwfn,
        qed_int_igu_init_rt(p_hwfn);
 
        /* Set VLAN in NIG if needed */
-       if (hw_mode & (1 << MODE_MF_SD)) {
+       if (hw_mode & BIT(MODE_MF_SD)) {
                DP_VERBOSE(p_hwfn, NETIF_MSG_HW, "Configuring LLH_FUNC_TAG\n");
                STORE_RT_REG(p_hwfn, NIG_REG_LLH_FUNC_TAG_EN_RT_OFFSET, 1);
                STORE_RT_REG(p_hwfn, NIG_REG_LLH_FUNC_TAG_VALUE_RT_OFFSET,
@@ -856,7 +847,7 @@ static int qed_hw_init_pf(struct qed_hwfn *p_hwfn,
        }
 
        /* Enable classification by MAC if needed */
-       if (hw_mode & (1 << MODE_MF_SI)) {
+       if (hw_mode & BIT(MODE_MF_SI)) {
                DP_VERBOSE(p_hwfn, NETIF_MSG_HW,
                           "Configuring TAGMAC_CLS_TYPE\n");
                STORE_RT_REG(p_hwfn,
@@ -871,7 +862,7 @@ static int qed_hw_init_pf(struct qed_hwfn *p_hwfn,
 
        /* Cleanup chip from previous driver if such remains exist */
        rc = qed_final_cleanup(p_hwfn, p_ptt, rel_pf_id, false);
-       if (rc != 0)
+       if (rc)
                return rc;
 
        /* PF Init sequence */
@@ -950,8 +941,7 @@ static void qed_reset_mb_shadow(struct qed_hwfn *p_hwfn,
        /* Read shadow of current MFW mailbox */
        qed_mcp_read_mb(p_hwfn, p_main_ptt);
        memcpy(p_hwfn->mcp_info->mfw_mb_shadow,
-              p_hwfn->mcp_info->mfw_mb_cur,
-              p_hwfn->mcp_info->mfw_mb_length);
+              p_hwfn->mcp_info->mfw_mb_cur, p_hwfn->mcp_info->mfw_mb_length);
 }
 
 int qed_hw_init(struct qed_dev *cdev,
@@ -971,7 +961,7 @@ int qed_hw_init(struct qed_dev *cdev,
 
        if (IS_PF(cdev)) {
                rc = qed_init_fw_data(cdev, bin_fw_data);
-               if (rc != 0)
+               if (rc)
                        return rc;
        }
 
@@ -988,8 +978,7 @@ int qed_hw_init(struct qed_dev *cdev,
 
                qed_calc_hw_mode(p_hwfn);
 
-               rc = qed_mcp_load_req(p_hwfn, p_hwfn->p_main_ptt,
-                                     &load_code);
+               rc = qed_mcp_load_req(p_hwfn, p_hwfn->p_main_ptt, &load_code);
                if (rc) {
                        DP_NOTICE(p_hwfn, "Failed sending LOAD_REQ command\n");
                        return rc;
@@ -1004,11 +993,6 @@ int qed_hw_init(struct qed_dev *cdev,
                p_hwfn->first_on_engine = (load_code ==
                                           FW_MSG_CODE_DRV_LOAD_ENGINE);
 
-               if (!qm_lock_init) {
-                       spin_lock_init(&qm_lock);
-                       qm_lock_init = true;
-               }
-
                switch (load_code) {
                case FW_MSG_CODE_DRV_LOAD_ENGINE:
                        rc = qed_hw_init_common(p_hwfn, p_hwfn->p_main_ptt,
@@ -1071,9 +1055,8 @@ int qed_hw_init(struct qed_dev *cdev,
 }
 
 #define QED_HW_STOP_RETRY_LIMIT (10)
-static inline void qed_hw_timers_stop(struct qed_dev *cdev,
-                                     struct qed_hwfn *p_hwfn,
-                                     struct qed_ptt *p_ptt)
+static void qed_hw_timers_stop(struct qed_dev *cdev,
+                              struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 {
        int i;
 
@@ -1084,8 +1067,7 @@ static inline void qed_hw_timers_stop(struct qed_dev *cdev,
        for (i = 0; i < QED_HW_STOP_RETRY_LIMIT; i++) {
                if ((!qed_rd(p_hwfn, p_ptt,
                             TM_REG_PF_SCAN_ACTIVE_CONN)) &&
-                   (!qed_rd(p_hwfn, p_ptt,
-                            TM_REG_PF_SCAN_ACTIVE_TASK)))
+                   (!qed_rd(p_hwfn, p_ptt, TM_REG_PF_SCAN_ACTIVE_TASK)))
                        break;
 
                /* Dependent on number of connection/tasks, possibly
@@ -1190,8 +1172,7 @@ void qed_hw_stop_fastpath(struct qed_dev *cdev)
                }
 
                DP_VERBOSE(p_hwfn,
-                          NETIF_MSG_IFDOWN,
-                          "Shutting down the fastpath\n");
+                          NETIF_MSG_IFDOWN, "Shutting down the fastpath\n");
 
                qed_wr(p_hwfn, p_ptt,
                       NIG_REG_RX_LLH_BRB_GATE_DNTFWD_PERPF, 0x1);
@@ -1219,14 +1200,13 @@ void qed_hw_start_fastpath(struct qed_hwfn *p_hwfn)
               NIG_REG_RX_LLH_BRB_GATE_DNTFWD_PERPF, 0x0);
 }
 
-static int qed_reg_assert(struct qed_hwfn *hwfn,
-                         struct qed_ptt *ptt, u32 reg,
-                         bool expected)
+static int qed_reg_assert(struct qed_hwfn *p_hwfn,
+                         struct qed_ptt *p_ptt, u32 reg, bool expected)
 {
-       u32 assert_val = qed_rd(hwfn, ptt, reg);
+       u32 assert_val = qed_rd(p_hwfn, p_ptt, reg);
 
        if (assert_val != expected) {
-               DP_NOTICE(hwfn, "Value at address 0x%x != 0x%08x\n",
+               DP_NOTICE(p_hwfn, "Value at address 0x%08x != 0x%08x\n",
                          reg, expected);
                return -EINVAL;
        }
@@ -1306,8 +1286,7 @@ static void qed_hw_hwfn_prepare(struct qed_hwfn *p_hwfn)
 
        /* Clean Previous errors if such exist */
        qed_wr(p_hwfn, p_hwfn->p_main_ptt,
-              PGLUE_B_REG_WAS_ERROR_PF_31_0_CLR,
-              1 << p_hwfn->abs_pf_id);
+              PGLUE_B_REG_WAS_ERROR_PF_31_0_CLR, 1 << p_hwfn->abs_pf_id);
 
        /* enable internal target-read */
        qed_wr(p_hwfn, p_hwfn->p_main_ptt,
@@ -1317,7 +1296,8 @@ static void qed_hw_hwfn_prepare(struct qed_hwfn *p_hwfn)
 static void get_function_id(struct qed_hwfn *p_hwfn)
 {
        /* ME Register */
-       p_hwfn->hw_info.opaque_fid = (u16)REG_RD(p_hwfn, PXP_PF_ME_OPAQUE_ADDR);
+       p_hwfn->hw_info.opaque_fid = (u16) REG_RD(p_hwfn,
+                                                 PXP_PF_ME_OPAQUE_ADDR);
 
        p_hwfn->hw_info.concrete_fid = REG_RD(p_hwfn, PXP_PF_ME_CONCRETE_ADDR);
 
@@ -1326,6 +1306,10 @@ static void get_function_id(struct qed_hwfn *p_hwfn)
                                      PXP_CONCRETE_FID_PFID);
        p_hwfn->port_id = GET_FIELD(p_hwfn->hw_info.concrete_fid,
                                    PXP_CONCRETE_FID_PORT);
+
+       DP_VERBOSE(p_hwfn, NETIF_MSG_PROBE,
+                  "Read ME register: Concrete 0x%08x Opaque 0x%04x\n",
+                  p_hwfn->hw_info.concrete_fid, p_hwfn->hw_info.opaque_fid);
 }
 
 static void qed_hw_set_feat(struct qed_hwfn *p_hwfn)
@@ -1417,8 +1401,7 @@ static int qed_hw_get_resc(struct qed_hwfn *p_hwfn)
        return 0;
 }
 
-static int qed_hw_get_nvm_info(struct qed_hwfn *p_hwfn,
-                              struct qed_ptt *p_ptt)
+static int qed_hw_get_nvm_info(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 {
        u32 nvm_cfg1_offset, mf_mode, addr, generic_cont0, core_cfg;
        u32 port_cfg_addr, link_temp, nvm_cfg_addr, device_capabilities;
@@ -1472,8 +1455,7 @@ static int qed_hw_get_nvm_info(struct qed_hwfn *p_hwfn,
                p_hwfn->hw_info.port_mode = QED_PORT_MODE_DE_1X25G;
                break;
        default:
-               DP_NOTICE(p_hwfn, "Unknown port mode in 0x%08x\n",
-                         core_cfg);
+               DP_NOTICE(p_hwfn, "Unknown port mode in 0x%08x\n", core_cfg);
                break;
        }
 
@@ -1484,11 +1466,11 @@ static int qed_hw_get_nvm_info(struct qed_hwfn *p_hwfn,
        link_temp = qed_rd(p_hwfn, p_ptt,
                           port_cfg_addr +
                           offsetof(struct nvm_cfg1_port, speed_cap_mask));
-       link->speed.advertised_speeds =
-               link_temp & NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_MASK;
+       link_temp &= NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_MASK;
+       link->speed.advertised_speeds = link_temp;
 
-       p_hwfn->mcp_info->link_capabilities.speed_capabilities =
-                                               link->speed.advertised_speeds;
+       link_temp = link->speed.advertised_speeds;
+       p_hwfn->mcp_info->link_capabilities.speed_capabilities = link_temp;
 
        link_temp = qed_rd(p_hwfn, p_ptt,
                           port_cfg_addr +
@@ -1517,8 +1499,7 @@ static int qed_hw_get_nvm_info(struct qed_hwfn *p_hwfn,
                link->speed.forced_speed = 100000;
                break;
        default:
-               DP_NOTICE(p_hwfn, "Unknown Speed in 0x%08x\n",
-                         link_temp);
+               DP_NOTICE(p_hwfn, "Unknown Speed in 0x%08x\n", link_temp);
        }
 
        link_temp &= NVM_CFG1_PORT_DRV_FLOW_CONTROL_MASK;
@@ -1628,10 +1609,10 @@ static void qed_get_num_funcs(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 
        DP_VERBOSE(p_hwfn,
                   NETIF_MSG_PROBE,
-                  "PF [rel_id %d, abs_id %d] within the %d enabled functions on the engine\n",
+                  "PF [rel_id %d, abs_id %d] occupies index %d within the %d enabled functions on the engine\n",
                   p_hwfn->rel_pf_id,
                   p_hwfn->abs_pf_id,
-                  p_hwfn->num_funcs_on_engine);
+                  p_hwfn->enabled_func_idx, p_hwfn->num_funcs_on_engine);
 }
 
 static int
@@ -1703,10 +1684,9 @@ static int qed_get_dev_info(struct qed_dev *cdev)
        u32 tmp;
 
        /* Read Vendor Id / Device Id */
-       pci_read_config_word(cdev->pdev, PCI_VENDOR_ID,
-                            &cdev->vendor_id);
-       pci_read_config_word(cdev->pdev, PCI_DEVICE_ID,
-                            &cdev->device_id);
+       pci_read_config_word(cdev->pdev, PCI_VENDOR_ID, &cdev->vendor_id);
+       pci_read_config_word(cdev->pdev, PCI_DEVICE_ID, &cdev->device_id);
+
        cdev->chip_num = (u16)qed_rd(p_hwfn, p_hwfn->p_main_ptt,
                                     MISCS_REG_CHIP_NUM);
        cdev->chip_rev = (u16)qed_rd(p_hwfn, p_hwfn->p_main_ptt,
@@ -1782,7 +1762,7 @@ static int qed_hw_prepare_single(struct qed_hwfn *p_hwfn,
        /* First hwfn learns basic information, e.g., number of hwfns */
        if (!p_hwfn->my_id) {
                rc = qed_get_dev_info(p_hwfn->cdev);
-               if (rc != 0)
+               if (rc)
                        goto err1;
        }
 
@@ -2183,8 +2163,7 @@ int qed_fw_l2_queue(struct qed_hwfn *p_hwfn, u16 src_id, u16 *dst_id)
        return 0;
 }
 
-int qed_fw_vport(struct qed_hwfn *p_hwfn,
-                u8 src_id, u8 *dst_id)
+int qed_fw_vport(struct qed_hwfn *p_hwfn, u8 src_id, u8 *dst_id)
 {
        if (src_id >= RESC_NUM(p_hwfn, QED_VPORT)) {
                u8 min, max;
@@ -2203,8 +2182,7 @@ int qed_fw_vport(struct qed_hwfn *p_hwfn,
        return 0;
 }
 
-int qed_fw_rss_eng(struct qed_hwfn *p_hwfn,
-                  u8 src_id, u8 *dst_id)
+int qed_fw_rss_eng(struct qed_hwfn *p_hwfn, u8 src_id, u8 *dst_id)
 {
        if (src_id >= RESC_NUM(p_hwfn, QED_RSS_ENG)) {
                u8 min, max;
@@ -2386,8 +2364,7 @@ static void qed_disable_wfq_for_all_vports(struct qed_hwfn *p_hwfn,
  * 3. total_req_min_rate [all vports min rate sum] shouldn't exceed min_pf_rate.
  */
 static int qed_init_wfq_param(struct qed_hwfn *p_hwfn,
-                             u16 vport_id, u32 req_rate,
-                             u32 min_pf_rate)
+                             u16 vport_id, u32 req_rate, u32 min_pf_rate)
 {
        u32 total_req_min_rate = 0, total_left_rate = 0, left_rate_per_vp = 0;
        int non_requested_count = 0, req_count = 0, i, num_vports;
@@ -2471,7 +2448,7 @@ static int __qed_configure_vport_wfq(struct qed_hwfn *p_hwfn,
 
        rc = qed_init_wfq_param(p_hwfn, vp_id, rate, p_link->min_pf_rate);
 
-       if (rc == 0)
+       if (!rc)
                qed_configure_wfq_for_all_vports(p_hwfn, p_ptt,
                                                 p_link->min_pf_rate);
        else
index e178853..8ebdc79 100644 (file)
@@ -44,8 +44,7 @@ struct qed_ptt_pool {
 
 int qed_ptt_pool_alloc(struct qed_hwfn *p_hwfn)
 {
-       struct qed_ptt_pool *p_pool = kmalloc(sizeof(*p_pool),
-                                             GFP_KERNEL);
+       struct qed_ptt_pool *p_pool = kmalloc(sizeof(*p_pool), GFP_KERNEL);
        int i;
 
        if (!p_pool)
@@ -113,16 +112,14 @@ struct qed_ptt *qed_ptt_acquire(struct qed_hwfn *p_hwfn)
        return NULL;
 }
 
-void qed_ptt_release(struct qed_hwfn *p_hwfn,
-                    struct qed_ptt *p_ptt)
+void qed_ptt_release(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 {
        spin_lock_bh(&p_hwfn->p_ptt_pool->lock);
        list_add(&p_ptt->list_entry, &p_hwfn->p_ptt_pool->free_list);
        spin_unlock_bh(&p_hwfn->p_ptt_pool->lock);
 }
 
-u32 qed_ptt_get_hw_addr(struct qed_hwfn *p_hwfn,
-                       struct qed_ptt *p_ptt)
+u32 qed_ptt_get_hw_addr(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 {
        /* The HW is using DWORDS and we need to translate it to Bytes */
        return le32_to_cpu(p_ptt->pxp.offset) << 2;
@@ -141,8 +138,7 @@ u32 qed_ptt_get_bar_addr(struct qed_ptt *p_ptt)
 }
 
 void qed_ptt_set_win(struct qed_hwfn *p_hwfn,
-                    struct qed_ptt *p_ptt,
-                    u32 new_hw_addr)
+                    struct qed_ptt *p_ptt, u32 new_hw_addr)
 {
        u32 prev_hw_addr;
 
@@ -166,8 +162,7 @@ void qed_ptt_set_win(struct qed_hwfn *p_hwfn,
 }
 
 static u32 qed_set_ptt(struct qed_hwfn *p_hwfn,
-                      struct qed_ptt *p_ptt,
-                      u32 hw_addr)
+                      struct qed_ptt *p_ptt, u32 hw_addr)
 {
        u32 win_hw_addr = qed_ptt_get_hw_addr(p_hwfn, p_ptt);
        u32 offset;
@@ -224,10 +219,7 @@ u32 qed_rd(struct qed_hwfn *p_hwfn,
 
 static void qed_memcpy_hw(struct qed_hwfn *p_hwfn,
                          struct qed_ptt *p_ptt,
-                         void *addr,
-                         u32 hw_addr,
-                         size_t n,
-                         bool to_device)
+                         void *addr, u32 hw_addr, size_t n, bool to_device)
 {
        u32 dw_count, *host_addr, hw_offset;
        size_t quota, done = 0;
@@ -259,8 +251,7 @@ static void qed_memcpy_hw(struct qed_hwfn *p_hwfn,
 }
 
 void qed_memcpy_from(struct qed_hwfn *p_hwfn,
-                    struct qed_ptt *p_ptt,
-                    void *dest, u32 hw_addr, size_t n)
+                    struct qed_ptt *p_ptt, void *dest, u32 hw_addr, size_t n)
 {
        DP_VERBOSE(p_hwfn, NETIF_MSG_HW,
                   "hw_addr 0x%x, dest %p hw_addr 0x%x, size %lu\n",
@@ -270,8 +261,7 @@ void qed_memcpy_from(struct qed_hwfn *p_hwfn,
 }
 
 void qed_memcpy_to(struct qed_hwfn *p_hwfn,
-                  struct qed_ptt *p_ptt,
-                  u32 hw_addr, void *src, size_t n)
+                  struct qed_ptt *p_ptt, u32 hw_addr, void *src, size_t n)
 {
        DP_VERBOSE(p_hwfn, NETIF_MSG_HW,
                   "hw_addr 0x%x, hw_addr 0x%x, src %p size %lu\n",
@@ -280,9 +270,7 @@ void qed_memcpy_to(struct qed_hwfn *p_hwfn,
        qed_memcpy_hw(p_hwfn, p_ptt, src, hw_addr, n, true);
 }
 
-void qed_fid_pretend(struct qed_hwfn *p_hwfn,
-                    struct qed_ptt *p_ptt,
-                    u16 fid)
+void qed_fid_pretend(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, u16 fid)
 {
        u16 control = 0;
 
@@ -309,8 +297,7 @@ void qed_fid_pretend(struct qed_hwfn *p_hwfn,
 }
 
 void qed_port_pretend(struct qed_hwfn *p_hwfn,
-                     struct qed_ptt *p_ptt,
-                     u8 port_id)
+                     struct qed_ptt *p_ptt, u8 port_id)
 {
        u16 control = 0;
 
@@ -326,8 +313,7 @@ void qed_port_pretend(struct qed_hwfn *p_hwfn,
               *(u32 *)&p_ptt->pxp.pretend);
 }
 
-void qed_port_unpretend(struct qed_hwfn *p_hwfn,
-                       struct qed_ptt *p_ptt)
+void qed_port_unpretend(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 {
        u16 control = 0;
 
@@ -429,28 +415,27 @@ u32 qed_dmae_idx_to_go_cmd(u8 idx)
        return DMAE_REG_GO_C0 + (idx << 2);
 }
 
-static int
-qed_dmae_post_command(struct qed_hwfn *p_hwfn,
-                     struct qed_ptt *p_ptt)
+static int qed_dmae_post_command(struct qed_hwfn *p_hwfn,
+                                struct qed_ptt *p_ptt)
 {
-       struct dmae_cmd *command = p_hwfn->dmae_info.p_dmae_cmd;
+       struct dmae_cmd *p_command = p_hwfn->dmae_info.p_dmae_cmd;
        u8 idx_cmd = p_hwfn->dmae_info.channel, i;
        int qed_status = 0;
 
        /* verify address is not NULL */
-       if ((((command->dst_addr_lo == 0) && (command->dst_addr_hi == 0)) ||
-            ((command->src_addr_lo == 0) && (command->src_addr_hi == 0)))) {
+       if ((((!p_command->dst_addr_lo) && (!p_command->dst_addr_hi)) ||
+            ((!p_command->src_addr_lo) && (!p_command->src_addr_hi)))) {
                DP_NOTICE(p_hwfn,
                          "source or destination address 0 idx_cmd=%d\n"
                          "opcode = [0x%08x,0x%04x] len=0x%x src=0x%x:%x dst=0x%x:%x\n",
-                          idx_cmd,
-                          le32_to_cpu(command->opcode),
-                          le16_to_cpu(command->opcode_b),
-                          le16_to_cpu(command->length_dw),
-                          le32_to_cpu(command->src_addr_hi),
-                          le32_to_cpu(command->src_addr_lo),
-                          le32_to_cpu(command->dst_addr_hi),
-                          le32_to_cpu(command->dst_addr_lo));
+                         idx_cmd,
+                         le32_to_cpu(p_command->opcode),
+                         le16_to_cpu(p_command->opcode_b),
+                         le16_to_cpu(p_command->length_dw),
+                         le32_to_cpu(p_command->src_addr_hi),
+                         le32_to_cpu(p_command->src_addr_lo),
+                         le32_to_cpu(p_command->dst_addr_hi),
+                         le32_to_cpu(p_command->dst_addr_lo));
 
                return -EINVAL;
        }
@@ -459,13 +444,13 @@ qed_dmae_post_command(struct qed_hwfn *p_hwfn,
                   NETIF_MSG_HW,
                   "Posting DMAE command [idx %d]: opcode = [0x%08x,0x%04x] len=0x%x src=0x%x:%x dst=0x%x:%x\n",
                   idx_cmd,
-                  le32_to_cpu(command->opcode),
-                  le16_to_cpu(command->opcode_b),
-                  le16_to_cpu(command->length_dw),
-                  le32_to_cpu(command->src_addr_hi),
-                  le32_to_cpu(command->src_addr_lo),
-                  le32_to_cpu(command->dst_addr_hi),
-                  le32_to_cpu(command->dst_addr_lo));
+                  le32_to_cpu(p_command->opcode),
+                  le16_to_cpu(p_command->opcode_b),
+                  le16_to_cpu(p_command->length_dw),
+                  le32_to_cpu(p_command->src_addr_hi),
+                  le32_to_cpu(p_command->src_addr_lo),
+                  le32_to_cpu(p_command->dst_addr_hi),
+                  le32_to_cpu(p_command->dst_addr_lo));
 
        /* Copy the command to DMAE - need to do it before every call
         * for source/dest address no reset.
@@ -475,7 +460,7 @@ qed_dmae_post_command(struct qed_hwfn *p_hwfn,
         */
        for (i = 0; i < DMAE_CMD_SIZE; i++) {
                u32 data = (i < DMAE_CMD_SIZE_TO_FILL) ?
-                          *(((u32 *)command) + i) : 0;
+                          *(((u32 *)p_command) + i) : 0;
 
                qed_wr(p_hwfn, p_ptt,
                       DMAE_REG_CMD_MEM +
@@ -483,9 +468,7 @@ qed_dmae_post_command(struct qed_hwfn *p_hwfn,
                       (i * sizeof(u32)), data);
        }
 
-       qed_wr(p_hwfn, p_ptt,
-              qed_dmae_idx_to_go_cmd(idx_cmd),
-              DMAE_GO_VALUE);
+       qed_wr(p_hwfn, p_ptt, qed_dmae_idx_to_go_cmd(idx_cmd), DMAE_GO_VALUE);
 
        return qed_status;
 }
@@ -498,9 +481,7 @@ int qed_dmae_info_alloc(struct qed_hwfn *p_hwfn)
        u32 **p_comp = &p_hwfn->dmae_info.p_completion_word;
 
        *p_comp = dma_alloc_coherent(&p_hwfn->cdev->pdev->dev,
-                                    sizeof(u32),
-                                    p_addr,
-                                    GFP_KERNEL);
+                                    sizeof(u32), p_addr, GFP_KERNEL);
        if (!*p_comp) {
                DP_NOTICE(p_hwfn, "Failed to allocate `p_completion_word'\n");
                goto err;
@@ -543,8 +524,7 @@ void qed_dmae_info_free(struct qed_hwfn *p_hwfn)
                p_phys = p_hwfn->dmae_info.completion_word_phys_addr;
                dma_free_coherent(&p_hwfn->cdev->pdev->dev,
                                  sizeof(u32),
-                                 p_hwfn->dmae_info.p_completion_word,
-                                 p_phys);
+                                 p_hwfn->dmae_info.p_completion_word, p_phys);
                p_hwfn->dmae_info.p_completion_word = NULL;
        }
 
@@ -552,8 +532,7 @@ void qed_dmae_info_free(struct qed_hwfn *p_hwfn)
                p_phys = p_hwfn->dmae_info.dmae_cmd_phys_addr;
                dma_free_coherent(&p_hwfn->cdev->pdev->dev,
                                  sizeof(struct dmae_cmd),
-                                 p_hwfn->dmae_info.p_dmae_cmd,
-                                 p_phys);
+                                 p_hwfn->dmae_info.p_dmae_cmd, p_phys);
                p_hwfn->dmae_info.p_dmae_cmd = NULL;
        }
 
@@ -571,9 +550,7 @@ void qed_dmae_info_free(struct qed_hwfn *p_hwfn)
 
 static int qed_dmae_operation_wait(struct qed_hwfn *p_hwfn)
 {
-       u32 wait_cnt = 0;
-       u32 wait_cnt_limit = 10000;
-
+       u32 wait_cnt_limit = 10000, wait_cnt = 0;
        int qed_status = 0;
 
        barrier();
@@ -606,7 +583,7 @@ static int qed_dmae_execute_sub_operation(struct qed_hwfn *p_hwfn,
                                          u64 dst_addr,
                                          u8 src_type,
                                          u8 dst_type,
-                                         u32 length)
+                                         u32 length_dw)
 {
        dma_addr_t phys = p_hwfn->dmae_info.intermediate_buffer_phys_addr;
        struct dmae_cmd *cmd = p_hwfn->dmae_info.p_dmae_cmd;
@@ -624,7 +601,7 @@ static int qed_dmae_execute_sub_operation(struct qed_hwfn *p_hwfn,
                cmd->src_addr_lo = cpu_to_le32(lower_32_bits(phys));
                memcpy(&p_hwfn->dmae_info.p_intermediate_buffer[0],
                       (void *)(uintptr_t)src_addr,
-                      length * sizeof(u32));
+                      length_dw * sizeof(u32));
                break;
        default:
                return -EINVAL;
@@ -645,7 +622,7 @@ static int qed_dmae_execute_sub_operation(struct qed_hwfn *p_hwfn,
                return -EINVAL;
        }
 
-       cmd->length_dw = cpu_to_le16((u16)length);
+       cmd->length_dw = cpu_to_le16((u16)length_dw);
 
        qed_dmae_post_command(p_hwfn, p_ptt);
 
@@ -654,16 +631,14 @@ static int qed_dmae_execute_sub_operation(struct qed_hwfn *p_hwfn,
        if (qed_status) {
                DP_NOTICE(p_hwfn,
                          "qed_dmae_host2grc: Wait Failed. source_addr 0x%llx, grc_addr 0x%llx, size_in_dwords 0x%x\n",
-                         src_addr,
-                         dst_addr,
-                         length);
+                         src_addr, dst_addr, length_dw);
                return qed_status;
        }
 
        if (dst_type == QED_DMAE_ADDRESS_HOST_VIRT)
                memcpy((void *)(uintptr_t)(dst_addr),
                       &p_hwfn->dmae_info.p_intermediate_buffer[0],
-                      length * sizeof(u32));
+                      length_dw * sizeof(u32));
 
        return 0;
 }
@@ -730,10 +705,7 @@ static int qed_dmae_execute_command(struct qed_hwfn *p_hwfn,
                if (qed_status) {
                        DP_NOTICE(p_hwfn,
                                  "qed_dmae_execute_sub_operation Failed with error 0x%x. source_addr 0x%llx, destination addr 0x%llx, size_in_dwords 0x%x\n",
-                                 qed_status,
-                                 src_addr,
-                                 dst_addr,
-                                 length_cur);
+                                 qed_status, src_addr, dst_addr, length_cur);
                        break;
                }
        }
@@ -743,10 +715,7 @@ static int qed_dmae_execute_command(struct qed_hwfn *p_hwfn,
 
 int qed_dmae_host2grc(struct qed_hwfn *p_hwfn,
                      struct qed_ptt *p_ptt,
-                     u64 source_addr,
-                     u32 grc_addr,
-                     u32 size_in_dwords,
-                     u32 flags)
+                 u64 source_addr, u32 grc_addr, u32 size_in_dwords, u32 flags)
 {
        u32 grc_addr_in_dw = grc_addr / sizeof(u32);
        struct qed_dmae_params params;
@@ -768,9 +737,10 @@ int qed_dmae_host2grc(struct qed_hwfn *p_hwfn,
        return rc;
 }
 
-int
-qed_dmae_grc2host(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, u32 grc_addr,
-                 dma_addr_t dest_addr, u32 size_in_dwords, u32 flags)
+int qed_dmae_grc2host(struct qed_hwfn *p_hwfn,
+                     struct qed_ptt *p_ptt,
+                     u32 grc_addr,
+                     dma_addr_t dest_addr, u32 size_in_dwords, u32 flags)
 {
        u32 grc_addr_in_dw = grc_addr / sizeof(u32);
        struct qed_dmae_params params;
@@ -791,12 +761,11 @@ qed_dmae_grc2host(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, u32 grc_addr,
        return rc;
 }
 
-int
-qed_dmae_host2host(struct qed_hwfn *p_hwfn,
-                  struct qed_ptt *p_ptt,
-                  dma_addr_t source_addr,
-                  dma_addr_t dest_addr,
-                  u32 size_in_dwords, struct qed_dmae_params *p_params)
+int qed_dmae_host2host(struct qed_hwfn *p_hwfn,
+                      struct qed_ptt *p_ptt,
+                      dma_addr_t source_addr,
+                      dma_addr_t dest_addr,
+                      u32 size_in_dwords, struct qed_dmae_params *p_params)
 {
        int rc;
 
index 9866a20..b7a4b27 100644 (file)
@@ -59,17 +59,14 @@ void qed_init_clear_rt_data(struct qed_hwfn *p_hwfn)
                p_hwfn->rt_data.b_valid[i] = false;
 }
 
-void qed_init_store_rt_reg(struct qed_hwfn *p_hwfn,
-                          u32 rt_offset,
-                          u32 val)
+void qed_init_store_rt_reg(struct qed_hwfn *p_hwfn, u32 rt_offset, u32 val)
 {
        p_hwfn->rt_data.init_val[rt_offset] = val;
        p_hwfn->rt_data.b_valid[rt_offset] = true;
 }
 
 void qed_init_store_rt_agg(struct qed_hwfn *p_hwfn,
-                          u32 rt_offset, u32 *p_val,
-                          size_t size)
+                          u32 rt_offset, u32 *p_val, size_t size)
 {
        size_t i;
 
@@ -81,10 +78,7 @@ void qed_init_store_rt_agg(struct qed_hwfn *p_hwfn,
 
 static int qed_init_rt(struct qed_hwfn *p_hwfn,
                       struct qed_ptt *p_ptt,
-                      u32 addr,
-                      u16 rt_offset,
-                      u16 size,
-                      bool b_must_dmae)
+                      u32 addr, u16 rt_offset, u16 size, bool b_must_dmae)
 {
        u32 *p_init_val = &p_hwfn->rt_data.init_val[rt_offset];
        bool *p_valid = &p_hwfn->rt_data.b_valid[rt_offset];
@@ -102,8 +96,7 @@ static int qed_init_rt(struct qed_hwfn       *p_hwfn,
                 * simply write the data instead of using dmae.
                 */
                if (!b_must_dmae) {
-                       qed_wr(p_hwfn, p_ptt, addr + (i << 2),
-                              p_init_val[i]);
+                       qed_wr(p_hwfn, p_ptt, addr + (i << 2), p_init_val[i]);
                        continue;
                }
 
@@ -115,7 +108,7 @@ static int qed_init_rt(struct qed_hwfn      *p_hwfn,
                rc = qed_dmae_host2grc(p_hwfn, p_ptt,
                                       (uintptr_t)(p_init_val + i),
                                       addr + (i << 2), segment, 0);
-               if (rc != 0)
+               if (rc)
                        return rc;
 
                /* Jump over the entire segment, including invalid entry */
@@ -182,9 +175,7 @@ static int qed_init_array_dmae(struct qed_hwfn *p_hwfn,
 
 static int qed_init_fill_dmae(struct qed_hwfn *p_hwfn,
                              struct qed_ptt *p_ptt,
-                             u32 addr,
-                             u32 fill,
-                             u32 fill_count)
+                             u32 addr, u32 fill, u32 fill_count)
 {
        static u32 zero_buffer[DMAE_MAX_RW_SIZE];
 
@@ -199,15 +190,12 @@ static int qed_init_fill_dmae(struct qed_hwfn *p_hwfn,
 
        return qed_dmae_host2grc(p_hwfn, p_ptt,
                                 (uintptr_t)(&zero_buffer[0]),
-                                addr, fill_count,
-                                QED_DMAE_FLAG_RW_REPL_SRC);
+                                addr, fill_count, QED_DMAE_FLAG_RW_REPL_SRC);
 }
 
 static void qed_init_fill(struct qed_hwfn *p_hwfn,
                          struct qed_ptt *p_ptt,
-                         u32 addr,
-                         u32 fill,
-                         u32 fill_count)
+                         u32 addr, u32 fill, u32 fill_count)
 {
        u32 i;
 
@@ -218,12 +206,12 @@ static void qed_init_fill(struct qed_hwfn *p_hwfn,
 static int qed_init_cmd_array(struct qed_hwfn *p_hwfn,
                              struct qed_ptt *p_ptt,
                              struct init_write_op *cmd,
-                             bool b_must_dmae,
-                             bool b_can_dmae)
+                             bool b_must_dmae, bool b_can_dmae)
 {
+       u32 dmae_array_offset = le32_to_cpu(cmd->args.array_offset);
        u32 data = le32_to_cpu(cmd->data);
        u32 addr = GET_FIELD(data, INIT_WRITE_OP_ADDRESS) << 2;
-       u32 dmae_array_offset = le32_to_cpu(cmd->args.array_offset);
+
        u32 offset, output_len, input_len, max_size;
        struct qed_dev *cdev = p_hwfn->cdev;
        union init_array_hdr *hdr;
@@ -233,8 +221,7 @@ static int qed_init_cmd_array(struct qed_hwfn *p_hwfn,
 
        array_data = cdev->fw_data->arr_data;
 
-       hdr = (union init_array_hdr *)(array_data +
-                                      dmae_array_offset);
+       hdr = (union init_array_hdr *)(array_data + dmae_array_offset);
        data = le32_to_cpu(hdr->raw.data);
        switch (GET_FIELD(data, INIT_ARRAY_RAW_HDR_TYPE)) {
        case INIT_ARR_ZIPPED:
@@ -290,13 +277,12 @@ static int qed_init_cmd_array(struct qed_hwfn *p_hwfn,
 /* init_ops write command */
 static int qed_init_cmd_wr(struct qed_hwfn *p_hwfn,
                           struct qed_ptt *p_ptt,
-                          struct init_write_op *cmd,
-                          bool b_can_dmae)
+                          struct init_write_op *p_cmd, bool b_can_dmae)
 {
-       u32 data = le32_to_cpu(cmd->data);
-       u32 addr = GET_FIELD(data, INIT_WRITE_OP_ADDRESS) << 2;
+       u32 data = le32_to_cpu(p_cmd->data);
        bool b_must_dmae = GET_FIELD(data, INIT_WRITE_OP_WIDE_BUS);
-       union init_write_args *arg = &cmd->args;
+       u32 addr = GET_FIELD(data, INIT_WRITE_OP_ADDRESS) << 2;
+       union init_write_args *arg = &p_cmd->args;
        int rc = 0;
 
        /* Sanitize */
@@ -309,20 +295,18 @@ static int qed_init_cmd_wr(struct qed_hwfn *p_hwfn,
 
        switch (GET_FIELD(data, INIT_WRITE_OP_SOURCE)) {
        case INIT_SRC_INLINE:
-               qed_wr(p_hwfn, p_ptt, addr,
-                      le32_to_cpu(arg->inline_val));
+               data = le32_to_cpu(p_cmd->args.inline_val);
+               qed_wr(p_hwfn, p_ptt, addr, data);
                break;
        case INIT_SRC_ZEROS:
-               if (b_must_dmae ||
-                   (b_can_dmae && (le32_to_cpu(arg->zeros_count) >= 64)))
-                       rc = qed_init_fill_dmae(p_hwfn, p_ptt, addr, 0,
-                                               le32_to_cpu(arg->zeros_count));
+               data = le32_to_cpu(p_cmd->args.zeros_count);
+               if (b_must_dmae || (b_can_dmae && (data >= 64)))
+                       rc = qed_init_fill_dmae(p_hwfn, p_ptt, addr, 0, data);
                else
-                       qed_init_fill(p_hwfn, p_ptt, addr, 0,
-                                     le32_to_cpu(arg->zeros_count));
+                       qed_init_fill(p_hwfn, p_ptt, addr, 0, data);
                break;
        case INIT_SRC_ARRAY:
-               rc = qed_init_cmd_array(p_hwfn, p_ptt, cmd,
+               rc = qed_init_cmd_array(p_hwfn, p_ptt, p_cmd,
                                        b_must_dmae, b_can_dmae);
                break;
        case INIT_SRC_RUNTIME:
@@ -353,8 +337,7 @@ static inline bool comp_or(u32 val, u32 expected_val)
 
 /* init_ops read/poll commands */
 static void qed_init_cmd_rd(struct qed_hwfn *p_hwfn,
-                           struct qed_ptt *p_ptt,
-                           struct init_read_op *cmd)
+                           struct qed_ptt *p_ptt, struct init_read_op *cmd)
 {
        bool (*comp_check)(u32 val, u32 expected_val);
        u32 delay = QED_INIT_POLL_PERIOD_US, val;
@@ -412,35 +395,33 @@ static void qed_init_cmd_cb(struct qed_hwfn *p_hwfn,
 }
 
 static u8 qed_init_cmd_mode_match(struct qed_hwfn *p_hwfn,
-                                 u16 *offset,
-                                 int modes)
+                                 u16 *p_offset, int modes)
 {
        struct qed_dev *cdev = p_hwfn->cdev;
        const u8 *modes_tree_buf;
        u8 arg1, arg2, tree_val;
 
        modes_tree_buf = cdev->fw_data->modes_tree_buf;
-       tree_val = modes_tree_buf[(*offset)++];
+       tree_val = modes_tree_buf[(*p_offset)++];
        switch (tree_val) {
        case INIT_MODE_OP_NOT:
-               return qed_init_cmd_mode_match(p_hwfn, offset, modes) ^ 1;
+               return qed_init_cmd_mode_match(p_hwfn, p_offset, modes) ^ 1;
        case INIT_MODE_OP_OR:
-               arg1    = qed_init_cmd_mode_match(p_hwfn, offset, modes);
-               arg2    = qed_init_cmd_mode_match(p_hwfn, offset, modes);
+               arg1 = qed_init_cmd_mode_match(p_hwfn, p_offset, modes);
+               arg2 = qed_init_cmd_mode_match(p_hwfn, p_offset, modes);
                return arg1 | arg2;
        case INIT_MODE_OP_AND:
-               arg1    = qed_init_cmd_mode_match(p_hwfn, offset, modes);
-               arg2    = qed_init_cmd_mode_match(p_hwfn, offset, modes);
+               arg1 = qed_init_cmd_mode_match(p_hwfn, p_offset, modes);
+               arg2 = qed_init_cmd_mode_match(p_hwfn, p_offset, modes);
                return arg1 & arg2;
        default:
                tree_val -= MAX_INIT_MODE_OPS;
-               return (modes & (1 << tree_val)) ? 1 : 0;
+               return (modes & BIT(tree_val)) ? 1 : 0;
        }
 }
 
 static u32 qed_init_cmd_mode(struct qed_hwfn *p_hwfn,
-                            struct init_if_mode_op *p_cmd,
-                            int modes)
+                            struct init_if_mode_op *p_cmd, int modes)
 {
        u16 offset = le16_to_cpu(p_cmd->modes_buf_offset);
 
@@ -453,8 +434,7 @@ static u32 qed_init_cmd_mode(struct qed_hwfn *p_hwfn,
 
 static u32 qed_init_cmd_phase(struct qed_hwfn *p_hwfn,
                              struct init_if_phase_op *p_cmd,
-                             u32 phase,
-                             u32 phase_id)
+                             u32 phase, u32 phase_id)
 {
        u32 data = le32_to_cpu(p_cmd->phase_data);
        u32 op_data = le32_to_cpu(p_cmd->op_data);
@@ -468,10 +448,7 @@ static u32 qed_init_cmd_phase(struct qed_hwfn *p_hwfn,
 }
 
 int qed_init_run(struct qed_hwfn *p_hwfn,
-                struct qed_ptt *p_ptt,
-                int phase,
-                int phase_id,
-                int modes)
+                struct qed_ptt *p_ptt, int phase, int phase_id, int modes)
 {
        struct qed_dev *cdev = p_hwfn->cdev;
        u32 cmd_num, num_init_ops;
index 8fa50fa..61ec973 100644 (file)
@@ -1775,10 +1775,9 @@ struct qed_sb_attn_info {
 };
 
 static inline u16 qed_attn_update_idx(struct qed_hwfn *p_hwfn,
-                                     struct qed_sb_attn_info   *p_sb_desc)
+                                     struct qed_sb_attn_info *p_sb_desc)
 {
-       u16     rc = 0;
-       u16     index;
+       u16 rc = 0, index;
 
        /* Make certain HW write took affect */
        mmiowb();
@@ -1802,15 +1801,13 @@ static inline u16 qed_attn_update_idx(struct qed_hwfn *p_hwfn,
  *  @param asserted_bits newly asserted bits
  *  @return int
  */
-static int qed_int_assertion(struct qed_hwfn *p_hwfn,
-                            u16 asserted_bits)
+static int qed_int_assertion(struct qed_hwfn *p_hwfn, u16 asserted_bits)
 {
        struct qed_sb_attn_info *sb_attn_sw = p_hwfn->p_sb_attn;
        u32 igu_mask;
 
        /* Mask the source of the attention in the IGU */
-       igu_mask = qed_rd(p_hwfn, p_hwfn->p_dpc_ptt,
-                         IGU_REG_ATTENTION_ENABLE);
+       igu_mask = qed_rd(p_hwfn, p_hwfn->p_dpc_ptt, IGU_REG_ATTENTION_ENABLE);
        DP_VERBOSE(p_hwfn, NETIF_MSG_INTR, "IGU mask: 0x%08x --> 0x%08x\n",
                   igu_mask, igu_mask & ~(asserted_bits & ATTN_BITS_MASKABLE));
        igu_mask &= ~(asserted_bits & ATTN_BITS_MASKABLE);
@@ -2041,7 +2038,7 @@ static int qed_int_deassertion(struct qed_hwfn  *p_hwfn,
                        struct aeu_invert_reg_bit *p_bit = &p_aeu->bits[j];
 
                        if ((p_bit->flags & ATTENTION_PARITY) &&
-                           !!(parities & (1 << bit_idx)))
+                           !!(parities & BIT(bit_idx)))
                                qed_int_deassertion_parity(p_hwfn, p_bit,
                                                           bit_idx);
 
@@ -2114,8 +2111,7 @@ static int qed_int_deassertion(struct qed_hwfn  *p_hwfn,
                                    ~((u32)deasserted_bits));
 
        /* Unmask deasserted attentions in IGU */
-       aeu_mask = qed_rd(p_hwfn, p_hwfn->p_dpc_ptt,
-                         IGU_REG_ATTENTION_ENABLE);
+       aeu_mask = qed_rd(p_hwfn, p_hwfn->p_dpc_ptt, IGU_REG_ATTENTION_ENABLE);
        aeu_mask |= (deasserted_bits & ATTN_BITS_MASKABLE);
        qed_wr(p_hwfn, p_hwfn->p_dpc_ptt, IGU_REG_ATTENTION_ENABLE, aeu_mask);
 
@@ -2160,8 +2156,7 @@ static int qed_int_attentions(struct qed_hwfn *p_hwfn)
                        index, attn_bits, attn_acks, asserted_bits,
                        deasserted_bits, p_sb_attn_sw->known_attn);
        } else if (asserted_bits == 0x100) {
-               DP_INFO(p_hwfn,
-                       "MFW indication via attention\n");
+               DP_INFO(p_hwfn, "MFW indication via attention\n");
        } else {
                DP_VERBOSE(p_hwfn, NETIF_MSG_INTR,
                           "MFW indication [deassertion]\n");
@@ -2173,18 +2168,14 @@ static int qed_int_attentions(struct qed_hwfn *p_hwfn)
                        return rc;
        }
 
-       if (deasserted_bits) {
+       if (deasserted_bits)
                rc = qed_int_deassertion(p_hwfn, deasserted_bits);
-               if (rc)
-                       return rc;
-       }
 
        return rc;
 }
 
 static void qed_sb_ack_attn(struct qed_hwfn *p_hwfn,
-                           void __iomem *igu_addr,
-                           u32 ack_cons)
+                           void __iomem *igu_addr, u32 ack_cons)
 {
        struct igu_prod_cons_update igu_ack = { 0 };
 
@@ -2242,9 +2233,8 @@ void qed_int_sp_dpc(unsigned long hwfn_cookie)
 
        /* Gather Interrupts/Attentions information */
        if (!sb_info->sb_virt) {
-               DP_ERR(
-                       p_hwfn->cdev,
-                       "Interrupt Status block is NULL - cannot check for new interrupts!\n");
+               DP_ERR(p_hwfn->cdev,
+                      "Interrupt Status block is NULL - cannot check for new interrupts!\n");
        } else {
                u32 tmp_index = sb_info->sb_ack;
 
@@ -2255,9 +2245,8 @@ void qed_int_sp_dpc(unsigned long hwfn_cookie)
        }
 
        if (!sb_attn || !sb_attn->sb_attn) {
-               DP_ERR(
-                       p_hwfn->cdev,
-                       "Attentions Status block is NULL - cannot check for new attentions!\n");
+               DP_ERR(p_hwfn->cdev,
+                      "Attentions Status block is NULL - cannot check for new attentions!\n");
        } else {
                u16 tmp_index = sb_attn->index;
 
@@ -2313,8 +2302,7 @@ static void qed_int_sb_attn_free(struct qed_hwfn *p_hwfn)
        if (p_sb->sb_attn)
                dma_free_coherent(&p_hwfn->cdev->pdev->dev,
                                  SB_ATTN_ALIGNED_SIZE(p_hwfn),
-                                 p_sb->sb_attn,
-                                 p_sb->sb_phys);
+                                 p_sb->sb_attn, p_sb->sb_phys);
        kfree(p_sb);
 }
 
@@ -2337,8 +2325,7 @@ static void qed_int_sb_attn_setup(struct qed_hwfn *p_hwfn,
 
 static void qed_int_sb_attn_init(struct qed_hwfn *p_hwfn,
                                 struct qed_ptt *p_ptt,
-                                void *sb_virt_addr,
-                                dma_addr_t sb_phy_addr)
+                                void *sb_virt_addr, dma_addr_t sb_phy_addr)
 {
        struct qed_sb_attn_info *sb_info = p_hwfn->p_sb_attn;
        int i, j, k;
@@ -2378,8 +2365,8 @@ static int qed_int_sb_attn_alloc(struct qed_hwfn *p_hwfn,
 {
        struct qed_dev *cdev = p_hwfn->cdev;
        struct qed_sb_attn_info *p_sb;
-       void *p_virt;
        dma_addr_t p_phys = 0;
+       void *p_virt;
 
        /* SB struct */
        p_sb = kmalloc(sizeof(*p_sb), GFP_KERNEL);
@@ -2412,9 +2399,7 @@ static int qed_int_sb_attn_alloc(struct qed_hwfn *p_hwfn,
 
 void qed_init_cau_sb_entry(struct qed_hwfn *p_hwfn,
                           struct cau_sb_entry *p_sb_entry,
-                          u8 pf_id,
-                          u16 vf_number,
-                          u8 vf_valid)
+                          u8 pf_id, u16 vf_number, u8 vf_valid)
 {
        struct qed_dev *cdev = p_hwfn->cdev;
        u32 cau_state;
@@ -2428,12 +2413,6 @@ void qed_init_cau_sb_entry(struct qed_hwfn *p_hwfn,
        SET_FIELD(p_sb_entry->params, CAU_SB_ENTRY_SB_TIMESET0, 0x7F);
        SET_FIELD(p_sb_entry->params, CAU_SB_ENTRY_SB_TIMESET1, 0x7F);
 
-       /* setting the time resultion to a fixed value ( = 1) */
-       SET_FIELD(p_sb_entry->params, CAU_SB_ENTRY_TIMER_RES0,
-                 QED_CAU_DEF_RX_TIMER_RES);
-       SET_FIELD(p_sb_entry->params, CAU_SB_ENTRY_TIMER_RES1,
-                 QED_CAU_DEF_TX_TIMER_RES);
-
        cau_state = CAU_HC_DISABLE_STATE;
 
        if (cdev->int_coalescing_mode == QED_COAL_MODE_ENABLE) {
@@ -2468,9 +2447,7 @@ void qed_init_cau_sb_entry(struct qed_hwfn *p_hwfn,
 void qed_int_cau_conf_sb(struct qed_hwfn *p_hwfn,
                         struct qed_ptt *p_ptt,
                         dma_addr_t sb_phys,
-                        u16 igu_sb_id,
-                        u16 vf_number,
-                        u8 vf_valid)
+                        u16 igu_sb_id, u16 vf_number, u8 vf_valid)
 {
        struct cau_sb_entry sb_entry;
 
@@ -2514,8 +2491,7 @@ void qed_int_cau_conf_sb(struct qed_hwfn *p_hwfn,
                        timer_res = 2;
                timeset = (u8)(p_hwfn->cdev->rx_coalesce_usecs >> timer_res);
                qed_int_cau_conf_pi(p_hwfn, p_ptt, igu_sb_id, RX_PI,
-                                   QED_COAL_RX_STATE_MACHINE,
-                                   timeset);
+                                   QED_COAL_RX_STATE_MACHINE, timeset);
 
                if (p_hwfn->cdev->tx_coalesce_usecs <= 0x7F)
                        timer_res = 0;
@@ -2541,8 +2517,7 @@ void qed_int_cau_conf_pi(struct qed_hwfn *p_hwfn,
                         u8 timeset)
 {
        struct cau_pi_entry pi_entry;
-       u32 sb_offset;
-       u32 pi_offset;
+       u32 sb_offset, pi_offset;
 
        if (IS_VF(p_hwfn->cdev))
                return;
@@ -2569,8 +2544,7 @@ void qed_int_cau_conf_pi(struct qed_hwfn *p_hwfn,
 }
 
 void qed_int_sb_setup(struct qed_hwfn *p_hwfn,
-                     struct qed_ptt *p_ptt,
-                     struct qed_sb_info *sb_info)
+                     struct qed_ptt *p_ptt, struct qed_sb_info *sb_info)
 {
        /* zero status block and ack counter */
        sb_info->sb_ack = 0;
@@ -2590,8 +2564,7 @@ void qed_int_sb_setup(struct qed_hwfn *p_hwfn,
  *
  * @return u16
  */
-static u16 qed_get_igu_sb_id(struct qed_hwfn *p_hwfn,
-                            u16 sb_id)
+static u16 qed_get_igu_sb_id(struct qed_hwfn *p_hwfn, u16 sb_id)
 {
        u16 igu_sb_id;
 
@@ -2603,8 +2576,12 @@ static u16 qed_get_igu_sb_id(struct qed_hwfn *p_hwfn,
        else
                igu_sb_id = qed_vf_get_igu_sb_id(p_hwfn, sb_id);
 
-       DP_VERBOSE(p_hwfn, NETIF_MSG_INTR, "SB [%s] index is 0x%04x\n",
-                  (sb_id == QED_SP_SB_ID) ? "DSB" : "non-DSB", igu_sb_id);
+       if (sb_id == QED_SP_SB_ID)
+               DP_VERBOSE(p_hwfn, NETIF_MSG_INTR,
+                          "Slowpath SB index in IGU is 0x%04x\n", igu_sb_id);
+       else
+               DP_VERBOSE(p_hwfn, NETIF_MSG_INTR,
+                          "SB [%04x] <--> IGU SB [%04x]\n", sb_id, igu_sb_id);
 
        return igu_sb_id;
 }
@@ -2612,9 +2589,7 @@ static u16 qed_get_igu_sb_id(struct qed_hwfn *p_hwfn,
 int qed_int_sb_init(struct qed_hwfn *p_hwfn,
                    struct qed_ptt *p_ptt,
                    struct qed_sb_info *sb_info,
-                   void *sb_virt_addr,
-                   dma_addr_t sb_phy_addr,
-                   u16 sb_id)
+                   void *sb_virt_addr, dma_addr_t sb_phy_addr, u16 sb_id)
 {
        sb_info->sb_virt = sb_virt_addr;
        sb_info->sb_phys = sb_phy_addr;
@@ -2650,8 +2625,7 @@ int qed_int_sb_init(struct qed_hwfn *p_hwfn,
 }
 
 int qed_int_sb_release(struct qed_hwfn *p_hwfn,
-                      struct qed_sb_info *sb_info,
-                      u16 sb_id)
+                      struct qed_sb_info *sb_info, u16 sb_id)
 {
        if (sb_id == QED_SP_SB_ID) {
                DP_ERR(p_hwfn, "Do Not free sp sb using this function");
@@ -2685,8 +2659,7 @@ static void qed_int_sp_sb_free(struct qed_hwfn *p_hwfn)
        kfree(p_sb);
 }
 
-static int qed_int_sp_sb_alloc(struct qed_hwfn *p_hwfn,
-                              struct qed_ptt *p_ptt)
+static int qed_int_sp_sb_alloc(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 {
        struct qed_sb_sp_info *p_sb;
        dma_addr_t p_phys = 0;
@@ -2721,9 +2694,7 @@ static int qed_int_sp_sb_alloc(struct qed_hwfn *p_hwfn,
 
 int qed_int_register_cb(struct qed_hwfn *p_hwfn,
                        qed_int_comp_cb_t comp_cb,
-                       void *cookie,
-                       u8 *sb_idx,
-                       __le16 **p_fw_cons)
+                       void *cookie, u8 *sb_idx, __le16 **p_fw_cons)
 {
        struct qed_sb_sp_info *p_sp_sb = p_hwfn->p_sp_sb;
        int rc = -ENOMEM;
@@ -2764,8 +2735,7 @@ u16 qed_int_get_sp_sb_id(struct qed_hwfn *p_hwfn)
 }
 
 void qed_int_igu_enable_int(struct qed_hwfn *p_hwfn,
-                           struct qed_ptt *p_ptt,
-                           enum qed_int_mode int_mode)
+                           struct qed_ptt *p_ptt, enum qed_int_mode int_mode)
 {
        u32 igu_pf_conf = IGU_PF_CONF_FUNC_EN | IGU_PF_CONF_ATTN_BIT_EN;
 
@@ -2809,7 +2779,7 @@ int qed_int_igu_enable(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
        qed_wr(p_hwfn, p_ptt, MISC_REG_AEU_MASK_ATTN_IGU, 0xff);
        if ((int_mode != QED_INT_MODE_INTA) || IS_LEAD_HWFN(p_hwfn)) {
                rc = qed_slowpath_irq_req(p_hwfn);
-               if (rc != 0) {
+               if (rc) {
                        DP_NOTICE(p_hwfn, "Slowpath IRQ request failed\n");
                        return -EINVAL;
                }
@@ -2822,8 +2792,7 @@ int qed_int_igu_enable(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
        return rc;
 }
 
-void qed_int_igu_disable_int(struct qed_hwfn *p_hwfn,
-                            struct qed_ptt *p_ptt)
+void qed_int_igu_disable_int(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 {
        p_hwfn->b_int_enabled = 0;
 
@@ -2950,13 +2919,11 @@ void qed_int_igu_init_pure_rt(struct qed_hwfn *p_hwfn,
                                        p_hwfn->hw_info.opaque_fid, b_set);
 }
 
-static u32 qed_int_igu_read_cam_block(struct qed_hwfn  *p_hwfn,
-                                     struct qed_ptt    *p_ptt,
-                                     u16               sb_id)
+static u32 qed_int_igu_read_cam_block(struct qed_hwfn *p_hwfn,
+                                     struct qed_ptt *p_ptt, u16 sb_id)
 {
        u32 val = qed_rd(p_hwfn, p_ptt,
-                        IGU_REG_MAPPING_MEMORY +
-                        sizeof(u32) * sb_id);
+                        IGU_REG_MAPPING_MEMORY + sizeof(u32) * sb_id);
        struct qed_igu_block *p_block;
 
        p_block = &p_hwfn->hw_info.p_igu_info->igu_map.igu_blocks[sb_id];
@@ -2983,8 +2950,7 @@ out:
        return val;
 }
 
-int qed_int_igu_read_cam(struct qed_hwfn *p_hwfn,
-                        struct qed_ptt *p_ptt)
+int qed_int_igu_read_cam(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 {
        struct qed_igu_info *p_igu_info;
        u32 val, min_vf = 0, max_vf = 0;
@@ -3104,22 +3070,19 @@ int qed_int_igu_read_cam(struct qed_hwfn *p_hwfn,
  */
 void qed_int_igu_init_rt(struct qed_hwfn *p_hwfn)
 {
-       u32 igu_pf_conf = 0;
-
-       igu_pf_conf |= IGU_PF_CONF_FUNC_EN;
+       u32 igu_pf_conf = IGU_PF_CONF_FUNC_EN;
 
        STORE_RT_REG(p_hwfn, IGU_REG_PF_CONFIGURATION_RT_OFFSET, igu_pf_conf);
 }
 
 u64 qed_int_igu_read_sisr_reg(struct qed_hwfn *p_hwfn)
 {
-       u64 intr_status = 0;
-       u32 intr_status_lo = 0;
-       u32 intr_status_hi = 0;
        u32 lsb_igu_cmd_addr = IGU_REG_SISR_MDPC_WMASK_LSB_UPPER -
                               IGU_CMD_INT_ACK_BASE;
        u32 msb_igu_cmd_addr = IGU_REG_SISR_MDPC_WMASK_MSB_UPPER -
                               IGU_CMD_INT_ACK_BASE;
+       u32 intr_status_hi = 0, intr_status_lo = 0;
+       u64 intr_status = 0;
 
        intr_status_lo = REG_RD(p_hwfn,
                                GTT_BAR0_MAP_REG_IGU_CMD +
@@ -3153,8 +3116,7 @@ static void qed_int_sp_dpc_free(struct qed_hwfn *p_hwfn)
        kfree(p_hwfn->sp_dpc);
 }
 
-int qed_int_alloc(struct qed_hwfn *p_hwfn,
-                 struct qed_ptt *p_ptt)
+int qed_int_alloc(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 {
        int rc = 0;
 
@@ -3169,10 +3131,9 @@ int qed_int_alloc(struct qed_hwfn *p_hwfn,
                return rc;
        }
        rc = qed_int_sb_attn_alloc(p_hwfn, p_ptt);
-       if (rc) {
+       if (rc)
                DP_ERR(p_hwfn->cdev, "Failed to allocate sb attn mem\n");
-               return rc;
-       }
+
        return rc;
 }
 
@@ -3183,8 +3144,7 @@ void qed_int_free(struct qed_hwfn *p_hwfn)
        qed_int_sp_dpc_free(p_hwfn);
 }
 
-void qed_int_setup(struct qed_hwfn *p_hwfn,
-                  struct qed_ptt *p_ptt)
+void qed_int_setup(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 {
        qed_int_sb_setup(p_hwfn, p_ptt, &p_hwfn->p_sp_sb->sb_info);
        qed_int_sb_attn_setup(p_hwfn, p_ptt);
index 401e738..c823c46 100644 (file)
@@ -52,7 +52,7 @@ int qed_sp_eth_vport_start(struct qed_hwfn *p_hwfn,
        u16 rx_mode = 0;
 
        rc = qed_fw_vport(p_hwfn, p_params->vport_id, &abs_vport_id);
-       if (rc != 0)
+       if (rc)
                return rc;
 
        memset(&init_data, 0, sizeof(init_data));
@@ -80,8 +80,7 @@ int qed_sp_eth_vport_start(struct qed_hwfn *p_hwfn,
        p_ramrod->rx_mode.state = cpu_to_le16(rx_mode);
 
        /* TPA related fields */
-       memset(&p_ramrod->tpa_param, 0,
-              sizeof(struct eth_vport_tpa_param));
+       memset(&p_ramrod->tpa_param, 0, sizeof(struct eth_vport_tpa_param));
 
        p_ramrod->tpa_param.max_buff_num = p_params->max_buffers_per_cqe;
 
@@ -306,14 +305,14 @@ qed_sp_update_mcast_bin(struct qed_hwfn *p_hwfn,
        memset(&p_ramrod->approx_mcast.bins, 0,
               sizeof(p_ramrod->approx_mcast.bins));
 
-       if (p_params->update_approx_mcast_flg) {
-               p_ramrod->common.update_approx_mcast_flg = 1;
-               for (i = 0; i < ETH_MULTICAST_MAC_BINS_IN_REGS; i++) {
-                       u32 *p_bins = (u32 *)p_params->bins;
-                       __le32 val = cpu_to_le32(p_bins[i]);
+       if (!p_params->update_approx_mcast_flg)
+               return;
 
-                       p_ramrod->approx_mcast.bins[i] = val;
-               }
+       p_ramrod->common.update_approx_mcast_flg = 1;
+       for (i = 0; i < ETH_MULTICAST_MAC_BINS_IN_REGS; i++) {
+               u32 *p_bins = (u32 *)p_params->bins;
+
+               p_ramrod->approx_mcast.bins[i] = cpu_to_le32(p_bins[i]);
        }
 }
 
@@ -336,7 +335,7 @@ int qed_sp_vport_update(struct qed_hwfn *p_hwfn,
        }
 
        rc = qed_fw_vport(p_hwfn, p_params->vport_id, &abs_vport_id);
-       if (rc != 0)
+       if (rc)
                return rc;
 
        memset(&init_data, 0, sizeof(init_data));
@@ -361,8 +360,8 @@ int qed_sp_vport_update(struct qed_hwfn *p_hwfn,
        p_cmn->tx_active_flg = p_params->vport_active_tx_flg;
        p_cmn->update_tx_active_flg = p_params->update_vport_active_tx_flg;
        p_cmn->accept_any_vlan = p_params->accept_any_vlan;
-       p_cmn->update_accept_any_vlan_flg =
-                       p_params->update_accept_any_vlan_flg;
+       val = p_params->update_accept_any_vlan_flg;
+       p_cmn->update_accept_any_vlan_flg = val;
 
        p_cmn->inner_vlan_removal_en = p_params->inner_vlan_removal_flg;
        val = p_params->update_inner_vlan_removal_flg;
@@ -411,7 +410,7 @@ int qed_sp_vport_stop(struct qed_hwfn *p_hwfn, u16 opaque_fid, u8 vport_id)
                return qed_vf_pf_vport_stop(p_hwfn);
 
        rc = qed_fw_vport(p_hwfn, vport_id, &abs_vport_id);
-       if (rc != 0)
+       if (rc)
                return rc;
 
        memset(&init_data, 0, sizeof(init_data));
@@ -476,7 +475,7 @@ static int qed_filter_accept_cmd(struct qed_dev *cdev,
 
                rc = qed_sp_vport_update(p_hwfn, &vport_update_params,
                                         comp_mode, p_comp_data);
-               if (rc != 0) {
+               if (rc) {
                        DP_ERR(cdev, "Update rx_mode failed %d\n", rc);
                        return rc;
                }
@@ -511,7 +510,7 @@ static int qed_sp_release_queue_cid(
 int qed_sp_eth_rxq_start_ramrod(struct qed_hwfn *p_hwfn,
                                u16 opaque_fid,
                                u32 cid,
-                               struct qed_queue_start_common_params *params,
+                               struct qed_queue_start_common_params *p_params,
                                u8 stats_id,
                                u16 bd_max_bytes,
                                dma_addr_t bd_chain_phys_addr,
@@ -526,23 +525,23 @@ int qed_sp_eth_rxq_start_ramrod(struct qed_hwfn *p_hwfn,
        int rc = -EINVAL;
 
        /* Store information for the stop */
-       p_rx_cid                = &p_hwfn->p_rx_cids[params->queue_id];
-       p_rx_cid->cid           = cid;
-       p_rx_cid->opaque_fid    = opaque_fid;
-       p_rx_cid->vport_id      = params->vport_id;
+       p_rx_cid = &p_hwfn->p_rx_cids[p_params->queue_id];
+       p_rx_cid->cid = cid;
+       p_rx_cid->opaque_fid = opaque_fid;
+       p_rx_cid->vport_id = p_params->vport_id;
 
-       rc = qed_fw_vport(p_hwfn, params->vport_id, &abs_vport_id);
-       if (rc != 0)
+       rc = qed_fw_vport(p_hwfn, p_params->vport_id, &abs_vport_id);
+       if (rc)
                return rc;
 
-       rc = qed_fw_l2_queue(p_hwfn, params->queue_id, &abs_rx_q_id);
-       if (rc != 0)
+       rc = qed_fw_l2_queue(p_hwfn, p_params->queue_id, &abs_rx_q_id);
+       if (rc)
                return rc;
 
        DP_VERBOSE(p_hwfn, QED_MSG_SP,
                   "opaque_fid=0x%x, cid=0x%x, rx_qid=0x%x, vport_id=0x%x, sb_id=0x%x\n",
-                  opaque_fid, cid, params->queue_id, params->vport_id,
-                  params->sb);
+                  opaque_fid,
+                  cid, p_params->queue_id, p_params->vport_id, p_params->sb);
 
        /* Get SPQ entry */
        memset(&init_data, 0, sizeof(init_data));
@@ -558,24 +557,25 @@ int qed_sp_eth_rxq_start_ramrod(struct qed_hwfn *p_hwfn,
 
        p_ramrod = &p_ent->ramrod.rx_queue_start;
 
-       p_ramrod->sb_id                 = cpu_to_le16(params->sb);
-       p_ramrod->sb_index              = params->sb_idx;
-       p_ramrod->vport_id              = abs_vport_id;
-       p_ramrod->stats_counter_id      = stats_id;
-       p_ramrod->rx_queue_id           = cpu_to_le16(abs_rx_q_id);
-       p_ramrod->complete_cqe_flg      = 0;
-       p_ramrod->complete_event_flg    = 1;
+       p_ramrod->sb_id = cpu_to_le16(p_params->sb);
+       p_ramrod->sb_index = p_params->sb_idx;
+       p_ramrod->vport_id = abs_vport_id;
+       p_ramrod->stats_counter_id = stats_id;
+       p_ramrod->rx_queue_id = cpu_to_le16(abs_rx_q_id);
+       p_ramrod->complete_cqe_flg = 0;
+       p_ramrod->complete_event_flg = 1;
 
-       p_ramrod->bd_max_bytes  = cpu_to_le16(bd_max_bytes);
+       p_ramrod->bd_max_bytes = cpu_to_le16(bd_max_bytes);
        DMA_REGPAIR_LE(p_ramrod->bd_base, bd_chain_phys_addr);
 
-       p_ramrod->num_of_pbl_pages      = cpu_to_le16(cqe_pbl_size);
+       p_ramrod->num_of_pbl_pages = cpu_to_le16(cqe_pbl_size);
        DMA_REGPAIR_LE(p_ramrod->cqe_pbl_addr, cqe_pbl_addr);
 
-       p_ramrod->vf_rx_prod_index = params->vf_qid;
-       if (params->vf_qid)
+       p_ramrod->vf_rx_prod_index = p_params->vf_qid;
+       if (p_params->vf_qid)
                DP_VERBOSE(p_hwfn, QED_MSG_SP,
-                          "Queue is meant for VF rxq[%04x]\n", params->vf_qid);
+                          "Queue is meant for VF rxq[%04x]\n",
+                          p_params->vf_qid);
 
        return qed_spq_post(p_hwfn, p_ent, NULL);
 }
@@ -583,7 +583,7 @@ int qed_sp_eth_rxq_start_ramrod(struct qed_hwfn *p_hwfn,
 static int
 qed_sp_eth_rx_queue_start(struct qed_hwfn *p_hwfn,
                          u16 opaque_fid,
-                         struct qed_queue_start_common_params *params,
+                         struct qed_queue_start_common_params *p_params,
                          u16 bd_max_bytes,
                          dma_addr_t bd_chain_phys_addr,
                          dma_addr_t cqe_pbl_addr,
@@ -597,20 +597,20 @@ qed_sp_eth_rx_queue_start(struct qed_hwfn *p_hwfn,
 
        if (IS_VF(p_hwfn->cdev)) {
                return qed_vf_pf_rxq_start(p_hwfn,
-                                          params->queue_id,
-                                          params->sb,
-                                          params->sb_idx,
+                                          p_params->queue_id,
+                                          p_params->sb,
+                                          (u8)p_params->sb_idx,
                                           bd_max_bytes,
                                           bd_chain_phys_addr,
                                           cqe_pbl_addr, cqe_pbl_size, pp_prod);
        }
 
-       rc = qed_fw_l2_queue(p_hwfn, params->queue_id, &abs_l2_queue);
-       if (rc != 0)
+       rc = qed_fw_l2_queue(p_hwfn, p_params->queue_id, &abs_l2_queue);
+       if (rc)
                return rc;
 
-       rc = qed_fw_vport(p_hwfn, params->vport_id, &abs_stats_id);
-       if (rc != 0)
+       rc = qed_fw_vport(p_hwfn, p_params->vport_id, &abs_stats_id);
+       if (rc)
                return rc;
 
        *pp_prod = (u8 __iomem *)p_hwfn->regview +
@@ -622,9 +622,8 @@ qed_sp_eth_rx_queue_start(struct qed_hwfn *p_hwfn,
                          (u32 *)(&init_prod_val));
 
        /* Allocate a CID for the queue */
-       p_rx_cid = &p_hwfn->p_rx_cids[params->queue_id];
-       rc = qed_cxt_acquire_cid(p_hwfn, PROTOCOLID_ETH,
-                                &p_rx_cid->cid);
+       p_rx_cid = &p_hwfn->p_rx_cids[p_params->queue_id];
+       rc = qed_cxt_acquire_cid(p_hwfn, PROTOCOLID_ETH, &p_rx_cid->cid);
        if (rc) {
                DP_NOTICE(p_hwfn, "Failed to acquire cid\n");
                return rc;
@@ -634,14 +633,14 @@ qed_sp_eth_rx_queue_start(struct qed_hwfn *p_hwfn,
        rc = qed_sp_eth_rxq_start_ramrod(p_hwfn,
                                         opaque_fid,
                                         p_rx_cid->cid,
-                                        params,
+                                        p_params,
                                         abs_stats_id,
                                         bd_max_bytes,
                                         bd_chain_phys_addr,
                                         cqe_pbl_addr,
                                         cqe_pbl_size);
 
-       if (rc != 0)
+       if (rc)
                qed_sp_release_queue_cid(p_hwfn, p_rx_cid);
 
        return rc;
@@ -788,21 +787,20 @@ int qed_sp_eth_txq_start_ramrod(struct qed_hwfn  *p_hwfn,
        if (rc)
                return rc;
 
-       p_ramrod                = &p_ent->ramrod.tx_queue_start;
-       p_ramrod->vport_id      = abs_vport_id;
+       p_ramrod = &p_ent->ramrod.tx_queue_start;
+       p_ramrod->vport_id = abs_vport_id;
 
-       p_ramrod->sb_id                 = cpu_to_le16(p_params->sb);
-       p_ramrod->sb_index              = p_params->sb_idx;
-       p_ramrod->stats_counter_id      = stats_id;
+       p_ramrod->sb_id = cpu_to_le16(p_params->sb);
+       p_ramrod->sb_index = p_params->sb_idx;
+       p_ramrod->stats_counter_id = stats_id;
 
-       p_ramrod->queue_zone_id         = cpu_to_le16(abs_tx_q_id);
-       p_ramrod->pbl_size              = cpu_to_le16(pbl_size);
+       p_ramrod->queue_zone_id = cpu_to_le16(abs_tx_q_id);
+
+       p_ramrod->pbl_size = cpu_to_le16(pbl_size);
        DMA_REGPAIR_LE(p_ramrod->pbl_base_addr, pbl_addr);
 
-       pq_id                   = qed_get_qm_pq(p_hwfn,
-                                               PROTOCOLID_ETH,
-                                               p_pq_params);
-       p_ramrod->qm_pq_id      = cpu_to_le16(pq_id);
+       pq_id = qed_get_qm_pq(p_hwfn, PROTOCOLID_ETH, p_pq_params);
+       p_ramrod->qm_pq_id = cpu_to_le16(pq_id);
 
        return qed_spq_post(p_hwfn, p_ent, NULL);
 }
@@ -836,8 +834,7 @@ qed_sp_eth_tx_queue_start(struct qed_hwfn *p_hwfn,
        memset(&pq_params, 0, sizeof(pq_params));
 
        /* Allocate a CID for the queue */
-       rc = qed_cxt_acquire_cid(p_hwfn, PROTOCOLID_ETH,
-                                &p_tx_cid->cid);
+       rc = qed_cxt_acquire_cid(p_hwfn, PROTOCOLID_ETH, &p_tx_cid->cid);
        if (rc) {
                DP_NOTICE(p_hwfn, "Failed to acquire cid\n");
                return rc;
@@ -896,8 +893,7 @@ int qed_sp_eth_tx_queue_stop(struct qed_hwfn *p_hwfn, u16 tx_queue_id)
        return qed_sp_release_queue_cid(p_hwfn, p_tx_cid);
 }
 
-static enum eth_filter_action
-qed_filter_action(enum qed_filter_opcode opcode)
+static enum eth_filter_action qed_filter_action(enum qed_filter_opcode opcode)
 {
        enum eth_filter_action action = MAX_ETH_FILTER_ACTION;
 
@@ -1033,19 +1029,19 @@ qed_filter_ucast_common(struct qed_hwfn *p_hwfn,
                p_first_filter->vni = cpu_to_le32(p_filter_cmd->vni);
 
        if (p_filter_cmd->opcode == QED_FILTER_MOVE) {
-               p_second_filter->type           = p_first_filter->type;
-               p_second_filter->mac_msb        = p_first_filter->mac_msb;
-               p_second_filter->mac_mid        = p_first_filter->mac_mid;
-               p_second_filter->mac_lsb        = p_first_filter->mac_lsb;
-               p_second_filter->vlan_id        = p_first_filter->vlan_id;
-               p_second_filter->vni            = p_first_filter->vni;
+               p_second_filter->type = p_first_filter->type;
+               p_second_filter->mac_msb = p_first_filter->mac_msb;
+               p_second_filter->mac_mid = p_first_filter->mac_mid;
+               p_second_filter->mac_lsb = p_first_filter->mac_lsb;
+               p_second_filter->vlan_id = p_first_filter->vlan_id;
+               p_second_filter->vni = p_first_filter->vni;
 
                p_first_filter->action = ETH_FILTER_ACTION_REMOVE;
 
                p_first_filter->vport_id = vport_to_remove_from;
 
-               p_second_filter->action         = ETH_FILTER_ACTION_ADD;
-               p_second_filter->vport_id       = vport_to_add_to;
+               p_second_filter->action = ETH_FILTER_ACTION_ADD;
+               p_second_filter->vport_id = vport_to_add_to;
        } else if (p_filter_cmd->opcode == QED_FILTER_REPLACE) {
                p_first_filter->vport_id = vport_to_add_to;
                memcpy(p_second_filter, p_first_filter,
@@ -1086,7 +1082,7 @@ int qed_sp_eth_filter_ucast(struct qed_hwfn *p_hwfn,
        rc = qed_filter_ucast_common(p_hwfn, opaque_fid, p_filter_cmd,
                                     &p_ramrod, &p_ent,
                                     comp_mode, p_comp_data);
-       if (rc != 0) {
+       if (rc) {
                DP_ERR(p_hwfn, "Uni. filter command failed %d\n", rc);
                return rc;
        }
@@ -1094,10 +1090,8 @@ int qed_sp_eth_filter_ucast(struct qed_hwfn *p_hwfn,
        p_header->assert_on_error = p_filter_cmd->assert_on_error;
 
        rc = qed_spq_post(p_hwfn, p_ent, NULL);
-       if (rc != 0) {
-               DP_ERR(p_hwfn,
-                      "Unicast filter ADD command failed %d\n",
-                      rc);
+       if (rc) {
+               DP_ERR(p_hwfn, "Unicast filter ADD command failed %d\n", rc);
                return rc;
        }
 
@@ -1136,15 +1130,10 @@ int qed_sp_eth_filter_ucast(struct qed_hwfn *p_hwfn,
  * Return:
  ******************************************************************************/
 static u32 qed_calc_crc32c(u8 *crc32_packet,
-                          u32 crc32_length,
-                          u32 crc32_seed,
-                          u8 complement)
+                          u32 crc32_length, u32 crc32_seed, u8 complement)
 {
-       u32 byte = 0;
-       u32 bit = 0;
-       u8 msb = 0;
-       u8 current_byte = 0;
-       u32 crc32_result = crc32_seed;
+       u32 byte = 0, bit = 0, crc32_result = crc32_seed;
+       u8 msb = 0, current_byte = 0;
 
        if ((!crc32_packet) ||
            (crc32_length == 0) ||
@@ -1164,9 +1153,7 @@ static u32 qed_calc_crc32c(u8 *crc32_packet,
        return crc32_result;
 }
 
-static inline u32 qed_crc32c_le(u32 seed,
-                               u8 *mac,
-                               u32 len)
+static u32 qed_crc32c_le(u32 seed, u8 *mac, u32 len)
 {
        u32 packet_buf[2] = { 0 };
 
@@ -1196,17 +1183,14 @@ qed_sp_eth_filter_mcast(struct qed_hwfn *p_hwfn,
        u8 abs_vport_id = 0;
        int rc, i;
 
-       if (p_filter_cmd->opcode == QED_FILTER_ADD) {
+       if (p_filter_cmd->opcode == QED_FILTER_ADD)
                rc = qed_fw_vport(p_hwfn, p_filter_cmd->vport_to_add_to,
                                  &abs_vport_id);
-               if (rc)
-                       return rc;
-       } else {
+       else
                rc = qed_fw_vport(p_hwfn, p_filter_cmd->vport_to_remove_from,
                                  &abs_vport_id);
-               if (rc)
-                       return rc;
-       }
+       if (rc)
+               return rc;
 
        /* Get SPQ entry */
        memset(&init_data, 0, sizeof(init_data));
@@ -1244,11 +1228,11 @@ qed_sp_eth_filter_mcast(struct qed_hwfn *p_hwfn,
 
                /* Convert to correct endianity */
                for (i = 0; i < ETH_MULTICAST_MAC_BINS_IN_REGS; i++) {
+                       struct vport_update_ramrod_mcast *p_ramrod_bins;
                        u32 *p_bins = (u32 *)bins;
-                       struct vport_update_ramrod_mcast *approx_mcast;
 
-                       approx_mcast = &p_ramrod->approx_mcast;
-                       approx_mcast->bins[i] = cpu_to_le32(p_bins[i]);
+                       p_ramrod_bins = &p_ramrod->approx_mcast;
+                       p_ramrod_bins->bins[i] = cpu_to_le32(p_bins[i]);
                }
        }
 
@@ -1286,8 +1270,7 @@ static int qed_filter_mcast_cmd(struct qed_dev *cdev,
                rc = qed_sp_eth_filter_mcast(p_hwfn,
                                             opaque_fid,
                                             p_filter_cmd,
-                                            comp_mode,
-                                            p_comp_data);
+                                            comp_mode, p_comp_data);
        }
        return rc;
 }
@@ -1314,9 +1297,8 @@ static int qed_filter_ucast_cmd(struct qed_dev *cdev,
                rc = qed_sp_eth_filter_ucast(p_hwfn,
                                             opaque_fid,
                                             p_filter_cmd,
-                                            comp_mode,
-                                            p_comp_data);
-               if (rc != 0)
+                                            comp_mode, p_comp_data);
+               if (rc)
                        break;
        }
 
@@ -1590,8 +1572,7 @@ out:
        }
 }
 
-void qed_get_vport_stats(struct qed_dev *cdev,
-                        struct qed_eth_stats *stats)
+void qed_get_vport_stats(struct qed_dev *cdev, struct qed_eth_stats *stats)
 {
        u32 i;
 
@@ -1766,8 +1747,7 @@ static int qed_start_vport(struct qed_dev *cdev,
        return 0;
 }
 
-static int qed_stop_vport(struct qed_dev *cdev,
-                         u8 vport_id)
+static int qed_stop_vport(struct qed_dev *cdev, u8 vport_id)
 {
        int rc, i;
 
@@ -1775,8 +1755,7 @@ static int qed_stop_vport(struct qed_dev *cdev,
                struct qed_hwfn *p_hwfn = &cdev->hwfns[i];
 
                rc = qed_sp_vport_stop(p_hwfn,
-                                      p_hwfn->hw_info.opaque_fid,
-                                      vport_id);
+                                      p_hwfn->hw_info.opaque_fid, vport_id);
 
                if (rc) {
                        DP_ERR(cdev, "Failed to stop VPORT\n");
@@ -1801,10 +1780,8 @@ static int qed_update_vport(struct qed_dev *cdev,
 
        /* Translate protocol params into sp params */
        sp_params.vport_id = params->vport_id;
-       sp_params.update_vport_active_rx_flg =
-               params->update_vport_active_flg;
-       sp_params.update_vport_active_tx_flg =
-               params->update_vport_active_flg;
+       sp_params.update_vport_active_rx_flg = params->update_vport_active_flg;
+       sp_params.update_vport_active_tx_flg = params->update_vport_active_flg;
        sp_params.vport_active_rx_flg = params->vport_active_flg;
        sp_params.vport_active_tx_flg = params->vport_active_flg;
        sp_params.update_tx_switching_flg = params->update_tx_switching_flg;
@@ -1817,8 +1794,7 @@ static int qed_update_vport(struct qed_dev *cdev,
         * We need to re-fix the rss values per engine for CMT.
         */
        if (cdev->num_hwfns > 1 && params->update_rss_flg) {
-               struct qed_update_vport_rss_params *rss =
-                       &params->rss_params;
+               struct qed_update_vport_rss_params *rss = &params->rss_params;
                int k, max = 0;
 
                /* Find largest entry, since it's possible RSS needs to
@@ -1861,8 +1837,8 @@ static int qed_update_vport(struct qed_dev *cdev,
                       QED_RSS_IND_TABLE_SIZE * sizeof(u16));
                memcpy(sp_rss_params.rss_key, params->rss_params.rss_key,
                       QED_RSS_KEY_SIZE * sizeof(u32));
+               sp_params.rss_params = &sp_rss_params;
        }
-       sp_params.rss_params = &sp_rss_params;
 
        for_each_hwfn(cdev, i) {
                struct qed_hwfn *p_hwfn = &cdev->hwfns[i];
@@ -1893,8 +1869,8 @@ static int qed_start_rxq(struct qed_dev *cdev,
                         u16 cqe_pbl_size,
                         void __iomem **pp_prod)
 {
-       int rc, hwfn_index;
        struct qed_hwfn *p_hwfn;
+       int rc, hwfn_index;
 
        hwfn_index = params->rss_id % cdev->num_hwfns;
        p_hwfn = &cdev->hwfns[hwfn_index];
@@ -1935,8 +1911,7 @@ static int qed_stop_rxq(struct qed_dev *cdev,
 
        rc = qed_sp_eth_rx_queue_stop(p_hwfn,
                                      params->rx_queue_id / cdev->num_hwfns,
-                                     params->eq_completion_only,
-                                     false);
+                                     params->eq_completion_only, false);
        if (rc) {
                DP_ERR(cdev, "Failed to stop RXQ#%d\n", params->rx_queue_id);
                return rc;
@@ -2047,11 +2022,11 @@ static int qed_configure_filter_rx_mode(struct qed_dev *cdev,
 
        memset(&accept_flags, 0, sizeof(accept_flags));
 
-       accept_flags.update_rx_mode_config      = 1;
-       accept_flags.update_tx_mode_config      = 1;
-       accept_flags.rx_accept_filter           = QED_ACCEPT_UCAST_MATCHED |
-                                                 QED_ACCEPT_MCAST_MATCHED |
-                                                 QED_ACCEPT_BCAST;
+       accept_flags.update_rx_mode_config = 1;
+       accept_flags.update_tx_mode_config = 1;
+       accept_flags.rx_accept_filter = QED_ACCEPT_UCAST_MATCHED |
+                                       QED_ACCEPT_MCAST_MATCHED |
+                                       QED_ACCEPT_BCAST;
        accept_flags.tx_accept_filter = QED_ACCEPT_UCAST_MATCHED |
                                        QED_ACCEPT_MCAST_MATCHED |
                                        QED_ACCEPT_BCAST;
@@ -2072,9 +2047,8 @@ static int qed_configure_filter_ucast(struct qed_dev *cdev,
        struct qed_filter_ucast ucast;
 
        if (!params->vlan_valid && !params->mac_valid) {
-               DP_NOTICE(
-                       cdev,
-                       "Tried configuring a unicast filter, but both MAC and VLAN are not set\n");
+               DP_NOTICE(cdev,
+                         "Tried configuring a unicast filter, but both MAC and VLAN are not set\n");
                return -EINVAL;
        }
 
@@ -2135,8 +2109,7 @@ static int qed_configure_filter_mcast(struct qed_dev *cdev,
        for (i = 0; i < mcast.num_mc_addrs; i++)
                ether_addr_copy(mcast.mac[i], params->mac[i]);
 
-       return qed_filter_mcast_cmd(cdev, &mcast,
-                                   QED_SPQ_MODE_CB, NULL);
+       return qed_filter_mcast_cmd(cdev, &mcast, QED_SPQ_MODE_CB, NULL);
 }
 
 static int qed_configure_filter(struct qed_dev *cdev,
@@ -2153,15 +2126,13 @@ static int qed_configure_filter(struct qed_dev *cdev,
                accept_flags = params->filter.accept_flags;
                return qed_configure_filter_rx_mode(cdev, accept_flags);
        default:
-               DP_NOTICE(cdev, "Unknown filter type %d\n",
-                         (int)params->type);
+               DP_NOTICE(cdev, "Unknown filter type %d\n", (int)params->type);
                return -EINVAL;
        }
 }
 
 static int qed_fp_cqe_completion(struct qed_dev *dev,
-                                u8 rss_id,
-                                struct eth_slow_path_rx_cqe *cqe)
+                                u8 rss_id, struct eth_slow_path_rx_cqe *cqe)
 {
        return qed_eth_cqe_completion(&dev->hwfns[rss_id % dev->num_hwfns],
                                      cqe);
index c7dc34b..54976cc 100644 (file)
@@ -51,8 +51,6 @@ MODULE_FIRMWARE(QED_FW_FILE_NAME);
 
 static int __init qed_init(void)
 {
-       pr_notice("qed_init called\n");
-
        pr_info("%s", version);
 
        return 0;
@@ -106,8 +104,7 @@ static void qed_free_pci(struct qed_dev *cdev)
 /* Performs PCI initializations as well as initializing PCI-related parameters
  * in the device structrue. Returns 0 in case of success.
  */
-static int qed_init_pci(struct qed_dev *cdev,
-                       struct pci_dev *pdev)
+static int qed_init_pci(struct qed_dev *cdev, struct pci_dev *pdev)
 {
        u8 rev_id;
        int rc;
@@ -263,8 +260,7 @@ static struct qed_dev *qed_alloc_cdev(struct pci_dev *pdev)
 }
 
 /* Sets the requested power state */
-static int qed_set_power_state(struct qed_dev *cdev,
-                              pci_power_t state)
+static int qed_set_power_state(struct qed_dev *cdev, pci_power_t state)
 {
        if (!cdev)
                return -ENODEV;
@@ -366,8 +362,8 @@ static int qed_enable_msix(struct qed_dev *cdev,
                DP_NOTICE(cdev,
                          "Trying to enable MSI-X with less vectors (%d out of %d)\n",
                          cnt, int_params->in.num_vectors);
-               rc = pci_enable_msix_exact(cdev->pdev,
-                                          int_params->msix_table, cnt);
+               rc = pci_enable_msix_exact(cdev->pdev, int_params->msix_table,
+                                          cnt);
                if (!rc)
                        rc = cnt;
        }
@@ -439,6 +435,11 @@ static int qed_set_int_mode(struct qed_dev *cdev, bool force_mode)
        }
 
 out:
+       if (!rc)
+               DP_INFO(cdev, "Using %s interrupts\n",
+                       int_params->out.int_mode == QED_INT_MODE_INTA ?
+                       "INTa" : int_params->out.int_mode == QED_INT_MODE_MSI ?
+                       "MSI" : "MSIX");
        cdev->int_coalescing_mode = QED_COAL_MODE_ENABLE;
 
        return rc;
@@ -514,19 +515,18 @@ static irqreturn_t qed_single_int(int irq, void *dev_instance)
 int qed_slowpath_irq_req(struct qed_hwfn *hwfn)
 {
        struct qed_dev *cdev = hwfn->cdev;
+       u32 int_mode;
        int rc = 0;
        u8 id;
 
-       if (cdev->int_params.out.int_mode == QED_INT_MODE_MSIX) {
+       int_mode = cdev->int_params.out.int_mode;
+       if (int_mode == QED_INT_MODE_MSIX) {
                id = hwfn->my_id;
                snprintf(hwfn->name, NAME_SIZE, "sp-%d-%02x:%02x.%02x",
                         id, cdev->pdev->bus->number,
                         PCI_SLOT(cdev->pdev->devfn), hwfn->abs_pf_id);
                rc = request_irq(cdev->int_params.msix_table[id].vector,
                                 qed_msix_sp_int, 0, hwfn->name, hwfn->sp_dpc);
-               if (!rc)
-                       DP_VERBOSE(hwfn, (NETIF_MSG_INTR | QED_MSG_SP),
-                                  "Requested slowpath MSI-X\n");
        } else {
                unsigned long flags = 0;
 
@@ -541,6 +541,13 @@ int qed_slowpath_irq_req(struct qed_hwfn *hwfn)
                                 flags, cdev->name, cdev);
        }
 
+       if (rc)
+               DP_NOTICE(cdev, "request_irq failed, rc = %d\n", rc);
+       else
+               DP_VERBOSE(hwfn, (NETIF_MSG_INTR | QED_MSG_SP),
+                          "Requested slowpath %s\n",
+                          (int_mode == QED_INT_MODE_MSIX) ? "MSI-X" : "IRQ");
+
        return rc;
 }
 
@@ -974,8 +981,7 @@ static u32 qed_sb_init(struct qed_dev *cdev,
 }
 
 static u32 qed_sb_release(struct qed_dev *cdev,
-                         struct qed_sb_info *sb_info,
-                         u16 sb_id)
+                         struct qed_sb_info *sb_info, u16 sb_id)
 {
        struct qed_hwfn *p_hwfn;
        int hwfn_index;
@@ -1025,20 +1031,23 @@ static int qed_set_link(struct qed_dev *cdev, struct qed_link_params *params)
                link_params->speed.autoneg = params->autoneg;
        if (params->override_flags & QED_LINK_OVERRIDE_SPEED_ADV_SPEEDS) {
                link_params->speed.advertised_speeds = 0;
-               if ((params->adv_speeds & SUPPORTED_1000baseT_Half) ||
-                   (params->adv_speeds & SUPPORTED_1000baseT_Full))
+               if ((params->adv_speeds & QED_LM_1000baseT_Half_BIT) ||
+                   (params->adv_speeds & QED_LM_1000baseT_Full_BIT))
+                       link_params->speed.advertised_speeds |=
+                           NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_1G;
+               if (params->adv_speeds & QED_LM_10000baseKR_Full_BIT)
                        link_params->speed.advertised_speeds |=
-                               NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_1G;
-               if (params->adv_speeds & SUPPORTED_10000baseKR_Full)
+                           NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_10G;
+               if (params->adv_speeds & QED_LM_25000baseKR_Full_BIT)
                        link_params->speed.advertised_speeds |=
-                               NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_10G;
-               if (params->adv_speeds & SUPPORTED_40000baseLR4_Full)
+                           NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_25G;
+               if (params->adv_speeds & QED_LM_40000baseLR4_Full_BIT)
                        link_params->speed.advertised_speeds |=
-                               NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_40G;
-               if (params->adv_speeds & 0)
+                           NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_40G;
+               if (params->adv_speeds & QED_LM_50000baseKR2_Full_BIT)
                        link_params->speed.advertised_speeds |=
-                               NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_50G;
-               if (params->adv_speeds & 0)
+                           NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_50G;
+               if (params->adv_speeds & QED_LM_100000baseKR4_Full_BIT)
                        link_params->speed.advertised_speeds |=
                            NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_BB_100G;
        }
@@ -1168,50 +1177,56 @@ static void qed_fill_link(struct qed_hwfn *hwfn,
                if_link->link_up = true;
 
        /* TODO - at the moment assume supported and advertised speed equal */
-       if_link->supported_caps = SUPPORTED_FIBRE;
+       if_link->supported_caps = QED_LM_FIBRE_BIT;
        if (params.speed.autoneg)
-               if_link->supported_caps |= SUPPORTED_Autoneg;
+               if_link->supported_caps |= QED_LM_Autoneg_BIT;
        if (params.pause.autoneg ||
            (params.pause.forced_rx && params.pause.forced_tx))
-               if_link->supported_caps |= SUPPORTED_Asym_Pause;
+               if_link->supported_caps |= QED_LM_Asym_Pause_BIT;
        if (params.pause.autoneg || params.pause.forced_rx ||
            params.pause.forced_tx)
-               if_link->supported_caps |= SUPPORTED_Pause;
+               if_link->supported_caps |= QED_LM_Pause_BIT;
 
        if_link->advertised_caps = if_link->supported_caps;
        if (params.speed.advertised_speeds &
            NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_1G)
-               if_link->advertised_caps |= SUPPORTED_1000baseT_Half |
-                                          SUPPORTED_1000baseT_Full;
+               if_link->advertised_caps |= QED_LM_1000baseT_Half_BIT |
+                   QED_LM_1000baseT_Full_BIT;
        if (params.speed.advertised_speeds &
            NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_10G)
-               if_link->advertised_caps |= SUPPORTED_10000baseKR_Full;
+               if_link->advertised_caps |= QED_LM_10000baseKR_Full_BIT;
        if (params.speed.advertised_speeds &
-               NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_40G)
-               if_link->advertised_caps |= SUPPORTED_40000baseLR4_Full;
+           NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_25G)
+               if_link->advertised_caps |= QED_LM_25000baseKR_Full_BIT;
        if (params.speed.advertised_speeds &
-               NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_50G)
-               if_link->advertised_caps |= 0;
+           NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_40G)
+               if_link->advertised_caps |= QED_LM_40000baseLR4_Full_BIT;
+       if (params.speed.advertised_speeds &
+           NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_50G)
+               if_link->advertised_caps |= QED_LM_50000baseKR2_Full_BIT;
        if (params.speed.advertised_speeds &
            NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_BB_100G)
-               if_link->advertised_caps |= 0;
+               if_link->advertised_caps |= QED_LM_100000baseKR4_Full_BIT;
 
        if (link_caps.speed_capabilities &
            NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_1G)
-               if_link->supported_caps |= SUPPORTED_1000baseT_Half |
-                                          SUPPORTED_1000baseT_Full;
+               if_link->supported_caps |= QED_LM_1000baseT_Half_BIT |
+                   QED_LM_1000baseT_Full_BIT;
        if (link_caps.speed_capabilities &
            NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_10G)
-               if_link->supported_caps |= SUPPORTED_10000baseKR_Full;
+               if_link->supported_caps |= QED_LM_10000baseKR_Full_BIT;
+       if (link_caps.speed_capabilities &
+           NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_25G)
+               if_link->supported_caps |= QED_LM_25000baseKR_Full_BIT;
        if (link_caps.speed_capabilities &
-               NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_40G)
-               if_link->supported_caps |= SUPPORTED_40000baseLR4_Full;
+           NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_40G)
+               if_link->supported_caps |= QED_LM_40000baseLR4_Full_BIT;
        if (link_caps.speed_capabilities &
-               NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_50G)
-               if_link->supported_caps |= 0;
+           NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_50G)
+               if_link->supported_caps |= QED_LM_50000baseKR2_Full_BIT;
        if (link_caps.speed_capabilities &
            NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_BB_100G)
-               if_link->supported_caps |= 0;
+               if_link->supported_caps |= QED_LM_100000baseKR4_Full_BIT;
 
        if (link.link_up)
                if_link->speed = link.speed;
@@ -1231,33 +1246,29 @@ static void qed_fill_link(struct qed_hwfn *hwfn,
                if_link->pause_config |= QED_LINK_PAUSE_TX_ENABLE;
 
        /* Link partner capabilities */
-       if (link.partner_adv_speed &
-           QED_LINK_PARTNER_SPEED_1G_HD)
-               if_link->lp_caps |= SUPPORTED_1000baseT_Half;
-       if (link.partner_adv_speed &
-           QED_LINK_PARTNER_SPEED_1G_FD)
-               if_link->lp_caps |= SUPPORTED_1000baseT_Full;
-       if (link.partner_adv_speed &
-           QED_LINK_PARTNER_SPEED_10G)
-               if_link->lp_caps |= SUPPORTED_10000baseKR_Full;
-       if (link.partner_adv_speed &
-           QED_LINK_PARTNER_SPEED_40G)
-               if_link->lp_caps |= SUPPORTED_40000baseLR4_Full;
-       if (link.partner_adv_speed &
-           QED_LINK_PARTNER_SPEED_50G)
-               if_link->lp_caps |= 0;
-       if (link.partner_adv_speed &
-           QED_LINK_PARTNER_SPEED_100G)
-               if_link->lp_caps |= 0;
+       if (link.partner_adv_speed & QED_LINK_PARTNER_SPEED_1G_HD)
+               if_link->lp_caps |= QED_LM_1000baseT_Half_BIT;
+       if (link.partner_adv_speed & QED_LINK_PARTNER_SPEED_1G_FD)
+               if_link->lp_caps |= QED_LM_1000baseT_Full_BIT;
+       if (link.partner_adv_speed & QED_LINK_PARTNER_SPEED_10G)
+               if_link->lp_caps |= QED_LM_10000baseKR_Full_BIT;
+       if (link.partner_adv_speed & QED_LINK_PARTNER_SPEED_25G)
+               if_link->lp_caps |= QED_LM_25000baseKR_Full_BIT;
+       if (link.partner_adv_speed & QED_LINK_PARTNER_SPEED_40G)
+               if_link->lp_caps |= QED_LM_40000baseLR4_Full_BIT;
+       if (link.partner_adv_speed & QED_LINK_PARTNER_SPEED_50G)
+               if_link->lp_caps |= QED_LM_50000baseKR2_Full_BIT;
+       if (link.partner_adv_speed & QED_LINK_PARTNER_SPEED_100G)
+               if_link->lp_caps |= QED_LM_100000baseKR4_Full_BIT;
 
        if (link.an_complete)
-               if_link->lp_caps |= SUPPORTED_Autoneg;
+               if_link->lp_caps |= QED_LM_Autoneg_BIT;
 
        if (link.partner_adv_pause)
-               if_link->lp_caps |= SUPPORTED_Pause;
+               if_link->lp_caps |= QED_LM_Pause_BIT;
        if (link.partner_adv_pause == QED_LINK_PARTNER_ASYMMETRIC_PAUSE ||
            link.partner_adv_pause == QED_LINK_PARTNER_BOTH_PAUSE)
-               if_link->lp_caps |= SUPPORTED_Asym_Pause;
+               if_link->lp_caps |= QED_LM_Asym_Pause_BIT;
 }
 
 static void qed_get_current_link(struct qed_dev *cdev,
index a240f26..88b448b 100644 (file)
@@ -54,8 +54,7 @@ bool qed_mcp_is_init(struct qed_hwfn *p_hwfn)
        return true;
 }
 
-void qed_mcp_cmd_port_init(struct qed_hwfn *p_hwfn,
-                          struct qed_ptt *p_ptt)
+void qed_mcp_cmd_port_init(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 {
        u32 addr = SECTION_OFFSIZE_ADDR(p_hwfn->mcp_info->public_base,
                                        PUBLIC_PORT);
@@ -68,8 +67,7 @@ void qed_mcp_cmd_port_init(struct qed_hwfn *p_hwfn,
                   p_hwfn->mcp_info->port_addr, MFW_PORT(p_hwfn));
 }
 
-void qed_mcp_read_mb(struct qed_hwfn *p_hwfn,
-                    struct qed_ptt *p_ptt)
+void qed_mcp_read_mb(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 {
        u32 length = MFW_DRV_MSG_MAX_DWORDS(p_hwfn->mcp_info->mfw_mb_length);
        u32 tmp, i;
@@ -99,8 +97,7 @@ int qed_mcp_free(struct qed_hwfn *p_hwfn)
        return 0;
 }
 
-static int qed_load_mcp_offsets(struct qed_hwfn *p_hwfn,
-                               struct qed_ptt *p_ptt)
+static int qed_load_mcp_offsets(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 {
        struct qed_mcp_info *p_info = p_hwfn->mcp_info;
        u32 drv_mb_offsize, mfw_mb_offsize;
@@ -143,8 +140,7 @@ static int qed_load_mcp_offsets(struct qed_hwfn *p_hwfn,
        return 0;
 }
 
-int qed_mcp_cmd_init(struct qed_hwfn *p_hwfn,
-                    struct qed_ptt *p_ptt)
+int qed_mcp_cmd_init(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 {
        struct qed_mcp_info *p_info;
        u32 size;
@@ -165,9 +161,7 @@ int qed_mcp_cmd_init(struct qed_hwfn *p_hwfn,
 
        size = MFW_DRV_MSG_MAX_DWORDS(p_info->mfw_mb_length) * sizeof(u32);
        p_info->mfw_mb_cur = kzalloc(size, GFP_KERNEL);
-       p_info->mfw_mb_shadow =
-               kzalloc(sizeof(u32) * MFW_DRV_MSG_MAX_DWORDS(
-                               p_info->mfw_mb_length), GFP_KERNEL);
+       p_info->mfw_mb_shadow = kzalloc(size, GFP_KERNEL);
        if (!p_info->mfw_mb_shadow || !p_info->mfw_mb_addr)
                goto err;
 
@@ -189,8 +183,7 @@ err:
  * access is achieved by setting a blocking flag, which will fail other
  * competing contexts to send their mailboxes.
  */
-static int qed_mcp_mb_lock(struct qed_hwfn *p_hwfn,
-                          u32 cmd)
+static int qed_mcp_mb_lock(struct qed_hwfn *p_hwfn, u32 cmd)
 {
        spin_lock_bh(&p_hwfn->mcp_info->lock);
 
@@ -221,15 +214,13 @@ static int qed_mcp_mb_lock(struct qed_hwfn *p_hwfn,
        return 0;
 }
 
-static void qed_mcp_mb_unlock(struct qed_hwfn  *p_hwfn,
-                             u32               cmd)
+static void qed_mcp_mb_unlock(struct qed_hwfn *p_hwfn, u32 cmd)
 {
        if (cmd != DRV_MSG_CODE_LOAD_REQ && cmd != DRV_MSG_CODE_UNLOAD_REQ)
                spin_unlock_bh(&p_hwfn->mcp_info->lock);
 }
 
-int qed_mcp_reset(struct qed_hwfn *p_hwfn,
-                 struct qed_ptt *p_ptt)
+int qed_mcp_reset(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 {
        u32 seq = ++p_hwfn->mcp_info->drv_mb_seq;
        u8 delay = CHIP_MCP_RESP_ITER_US;
@@ -326,7 +317,8 @@ static int qed_do_mcp_cmd(struct qed_hwfn *p_hwfn,
                *o_mcp_param = DRV_MB_RD(p_hwfn, p_ptt, fw_mb_param);
        } else {
                /* FW BUG! */
-               DP_ERR(p_hwfn, "MFW failed to respond!\n");
+               DP_ERR(p_hwfn, "MFW failed to respond [cmd 0x%x param 0x%x]\n",
+                      cmd, param);
                *o_mcp_resp = 0;
                rc = -EAGAIN;
        }
@@ -342,7 +334,7 @@ static int qed_mcp_cmd_and_union(struct qed_hwfn *p_hwfn,
 
        /* MCP not initialized */
        if (!qed_mcp_is_init(p_hwfn)) {
-               DP_NOTICE(p_hwfn, "MFW is not initialized !\n");
+               DP_NOTICE(p_hwfn, "MFW is not initialized!\n");
                return -EBUSY;
        }
 
@@ -399,8 +391,7 @@ int qed_mcp_cmd(struct qed_hwfn *p_hwfn,
 }
 
 int qed_mcp_load_req(struct qed_hwfn *p_hwfn,
-                    struct qed_ptt *p_ptt,
-                    u32 *p_load_code)
+                    struct qed_ptt *p_ptt, u32 *p_load_code)
 {
        struct qed_dev *cdev = p_hwfn->cdev;
        struct qed_mcp_mb_params mb_params;
@@ -527,8 +518,7 @@ static void qed_mcp_handle_transceiver_change(struct qed_hwfn *p_hwfn,
                   "Received transceiver state update [0x%08x] from mfw [Addr 0x%x]\n",
                   transceiver_state,
                   (u32)(p_hwfn->mcp_info->port_addr +
-                        offsetof(struct public_port,
-                                 transceiver_data)));
+                         offsetof(struct public_port, transceiver_data)));
 
        transceiver_state = GET_FIELD(transceiver_state,
                                      ETH_TRANSCEIVER_STATE);
@@ -540,8 +530,7 @@ static void qed_mcp_handle_transceiver_change(struct qed_hwfn *p_hwfn,
 }
 
 static void qed_mcp_handle_link_change(struct qed_hwfn *p_hwfn,
-                                      struct qed_ptt *p_ptt,
-                                      bool b_reset)
+                                      struct qed_ptt *p_ptt, bool b_reset)
 {
        struct qed_mcp_link_state *p_link;
        u8 max_bw, min_bw;
@@ -557,8 +546,7 @@ static void qed_mcp_handle_link_change(struct qed_hwfn *p_hwfn,
                           "Received link update [0x%08x] from mfw [Addr 0x%x]\n",
                           status,
                           (u32)(p_hwfn->mcp_info->port_addr +
-                                offsetof(struct public_port,
-                                         link_status)));
+                                offsetof(struct public_port, link_status)));
        } else {
                DP_VERBOSE(p_hwfn, NETIF_MSG_LINK,
                           "Resetting link indications\n");
@@ -634,6 +622,9 @@ static void qed_mcp_handle_link_change(struct qed_hwfn *p_hwfn,
        p_link->partner_adv_speed |=
                (status & LINK_STATUS_LINK_PARTNER_20G_CAPABLE) ?
                QED_LINK_PARTNER_SPEED_20G : 0;
+       p_link->partner_adv_speed |=
+               (status & LINK_STATUS_LINK_PARTNER_25G_CAPABLE) ?
+               QED_LINK_PARTNER_SPEED_25G : 0;
        p_link->partner_adv_speed |=
                (status & LINK_STATUS_LINK_PARTNER_40G_CAPABLE) ?
                QED_LINK_PARTNER_SPEED_40G : 0;
@@ -752,8 +743,7 @@ static void qed_read_pf_bandwidth(struct qed_hwfn *p_hwfn,
 
 static u32 qed_mcp_get_shmem_func(struct qed_hwfn *p_hwfn,
                                  struct qed_ptt *p_ptt,
-                                 struct public_func *p_data,
-                                 int pfid)
+                                 struct public_func *p_data, int pfid)
 {
        u32 addr = SECTION_OFFSIZE_ADDR(p_hwfn->mcp_info->public_base,
                                        PUBLIC_FUNC);
@@ -763,8 +753,7 @@ static u32 qed_mcp_get_shmem_func(struct qed_hwfn *p_hwfn,
 
        memset(p_data, 0, sizeof(*p_data));
 
-       size = min_t(u32, sizeof(*p_data),
-                    QED_SECTION_SIZE(mfw_path_offsize));
+       size = min_t(u32, sizeof(*p_data), QED_SECTION_SIZE(mfw_path_offsize));
        for (i = 0; i < size / sizeof(u32); i++)
                ((u32 *)p_data)[i] = qed_rd(p_hwfn, p_ptt,
                                            func_addr + (i << 2));
@@ -799,15 +788,13 @@ int qed_hw_init_first_eth(struct qed_hwfn *p_hwfn,
        return -EINVAL;
 }
 
-static void qed_mcp_update_bw(struct qed_hwfn *p_hwfn,
-                             struct qed_ptt *p_ptt)
+static void qed_mcp_update_bw(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 {
        struct qed_mcp_function_info *p_info;
        struct public_func shmem_info;
        u32 resp = 0, param = 0;
 
-       qed_mcp_get_shmem_func(p_hwfn, p_ptt, &shmem_info,
-                              MCP_PF_ID(p_hwfn));
+       qed_mcp_get_shmem_func(p_hwfn, p_ptt, &shmem_info, MCP_PF_ID(p_hwfn));
 
        qed_read_pf_bandwidth(p_hwfn, &shmem_info);
 
@@ -940,8 +927,7 @@ int qed_mcp_get_mfw_ver(struct qed_hwfn *p_hwfn,
        return 0;
 }
 
-int qed_mcp_get_media_type(struct qed_dev *cdev,
-                          u32 *p_media_type)
+int qed_mcp_get_media_type(struct qed_dev *cdev, u32 *p_media_type)
 {
        struct qed_hwfn *p_hwfn = &cdev->hwfns[0];
        struct qed_ptt  *p_ptt;
@@ -950,7 +936,7 @@ int qed_mcp_get_media_type(struct qed_dev *cdev,
                return -EINVAL;
 
        if (!qed_mcp_is_init(p_hwfn)) {
-               DP_NOTICE(p_hwfn, "MFW is not initialized !\n");
+               DP_NOTICE(p_hwfn, "MFW is not initialized!\n");
                return -EBUSY;
        }
 
@@ -1003,15 +989,13 @@ int qed_mcp_fill_shmem_func_info(struct qed_hwfn *p_hwfn,
        struct qed_mcp_function_info *info;
        struct public_func shmem_info;
 
-       qed_mcp_get_shmem_func(p_hwfn, p_ptt, &shmem_info,
-                              MCP_PF_ID(p_hwfn));
+       qed_mcp_get_shmem_func(p_hwfn, p_ptt, &shmem_info, MCP_PF_ID(p_hwfn));
        info = &p_hwfn->mcp_info->func_info;
 
        info->pause_on_host = (shmem_info.config &
                               FUNC_MF_CFG_PAUSE_ON_HOST_RING) ? 1 : 0;
 
-       if (qed_mcp_get_shmem_proto(p_hwfn, &shmem_info,
-                                   &info->protocol)) {
+       if (qed_mcp_get_shmem_proto(p_hwfn, &shmem_info, &info->protocol)) {
                DP_ERR(p_hwfn, "Unknown personality %08x\n",
                       (u32)(shmem_info.config & FUNC_MF_CFG_PROTOCOL_MASK));
                return -EINVAL;
@@ -1072,15 +1056,13 @@ struct qed_mcp_link_capabilities
        return &p_hwfn->mcp_info->link_capabilities;
 }
 
-int qed_mcp_drain(struct qed_hwfn *p_hwfn,
-                 struct qed_ptt *p_ptt)
+int qed_mcp_drain(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 {
        u32 resp = 0, param = 0;
        int rc;
 
        rc = qed_mcp_cmd(p_hwfn, p_ptt,
-                        DRV_MSG_CODE_NIG_DRAIN, 1000,
-                        &resp, &param);
+                        DRV_MSG_CODE_NIG_DRAIN, 1000, &resp, &param);
 
        /* Wait for the drain to complete before returning */
        msleep(1020);
@@ -1089,8 +1071,7 @@ int qed_mcp_drain(struct qed_hwfn *p_hwfn,
 }
 
 int qed_mcp_get_flash_size(struct qed_hwfn *p_hwfn,
-                          struct qed_ptt *p_ptt,
-                          u32 *p_flash_size)
+                          struct qed_ptt *p_ptt, u32 *p_flash_size)
 {
        u32 flash_size;
 
@@ -1168,8 +1149,8 @@ qed_mcp_send_drv_version(struct qed_hwfn *p_hwfn,
        return rc;
 }
 
-int qed_mcp_set_led(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
-                   enum qed_led_mode mode)
+int qed_mcp_set_led(struct qed_hwfn *p_hwfn,
+                   struct qed_ptt *p_ptt, enum qed_led_mode mode)
 {
        u32 resp = 0, param = 0, drv_mb_param;
        int rc;
index 7f319aa..013d1b9 100644 (file)
@@ -60,9 +60,10 @@ struct qed_mcp_link_state {
 #define QED_LINK_PARTNER_SPEED_1G_FD    BIT(1)
 #define QED_LINK_PARTNER_SPEED_10G      BIT(2)
 #define QED_LINK_PARTNER_SPEED_20G      BIT(3)
-#define QED_LINK_PARTNER_SPEED_40G      BIT(4)
-#define QED_LINK_PARTNER_SPEED_50G      BIT(5)
-#define QED_LINK_PARTNER_SPEED_100G     BIT(6)
+#define QED_LINK_PARTNER_SPEED_25G      BIT(4)
+#define QED_LINK_PARTNER_SPEED_40G      BIT(5)
+#define QED_LINK_PARTNER_SPEED_50G      BIT(6)
+#define QED_LINK_PARTNER_SPEED_100G     BIT(7)
        u32     partner_adv_speed;
 
        bool    partner_tx_flow_ctrl_en;
index a52f3fc..2888eb0 100644 (file)
@@ -25,9 +25,7 @@
 
 int qed_sp_init_request(struct qed_hwfn *p_hwfn,
                        struct qed_spq_entry **pp_ent,
-                       u8 cmd,
-                       u8 protocol,
-                       struct qed_sp_init_data *p_data)
+                       u8 cmd, u8 protocol, struct qed_sp_init_data *p_data)
 {
        u32 opaque_cid = p_data->opaque_fid << 16 | p_data->cid;
        struct qed_spq_entry *p_ent = NULL;
@@ -38,7 +36,7 @@ int qed_sp_init_request(struct qed_hwfn *p_hwfn,
 
        rc = qed_spq_get_entry(p_hwfn, pp_ent);
 
-       if (rc != 0)
+       if (rc)
                return rc;
 
        p_ent = *pp_ent;
@@ -321,8 +319,7 @@ int qed_sp_pf_start(struct qed_hwfn *p_hwfn,
 
        rc = qed_sp_init_request(p_hwfn, &p_ent,
                                 COMMON_RAMROD_PF_START,
-                                PROTOCOLID_COMMON,
-                                &init_data);
+                                PROTOCOLID_COMMON, &init_data);
        if (rc)
                return rc;
 
@@ -356,8 +353,7 @@ int qed_sp_pf_start(struct qed_hwfn *p_hwfn,
        DMA_REGPAIR_LE(p_ramrod->consolid_q_pbl_addr,
                       p_hwfn->p_consq->chain.pbl.p_phys_table);
 
-       qed_tunn_set_pf_start_params(p_hwfn, p_tunn,
-                                    &p_ramrod->tunnel_config);
+       qed_tunn_set_pf_start_params(p_hwfn, p_tunn, &p_ramrod->tunnel_config);
 
        if (IS_MF_SI(p_hwfn))
                p_ramrod->allow_npar_tx_switching = allow_npar_tx_switch;
@@ -389,8 +385,7 @@ int qed_sp_pf_start(struct qed_hwfn *p_hwfn,
 
        DP_VERBOSE(p_hwfn, QED_MSG_SPQ,
                   "Setting event_ring_sb [id %04x index %02x], outer_tag [%d]\n",
-                  sb, sb_index,
-                  p_ramrod->outer_tag);
+                  sb, sb_index, p_ramrod->outer_tag);
 
        rc = qed_spq_post(p_hwfn, p_ent, NULL);
 
index d73456e..0265a32 100644 (file)
@@ -41,8 +41,7 @@
 ***************************************************************************/
 static void qed_spq_blocking_cb(struct qed_hwfn *p_hwfn,
                                void *cookie,
-                               union event_ring_data *data,
-                               u8 fw_return_code)
+                               union event_ring_data *data, u8 fw_return_code)
 {
        struct qed_spq_comp_done *comp_done;
 
@@ -109,9 +108,8 @@ static int qed_spq_block(struct qed_hwfn *p_hwfn,
 /***************************************************************************
 * SPQ entries inner API
 ***************************************************************************/
-static int
-qed_spq_fill_entry(struct qed_hwfn *p_hwfn,
-                  struct qed_spq_entry *p_ent)
+static int qed_spq_fill_entry(struct qed_hwfn *p_hwfn,
+                             struct qed_spq_entry *p_ent)
 {
        p_ent->flags = 0;
 
@@ -189,8 +187,7 @@ static void qed_spq_hw_initialize(struct qed_hwfn *p_hwfn,
 }
 
 static int qed_spq_hw_post(struct qed_hwfn *p_hwfn,
-                          struct qed_spq *p_spq,
-                          struct qed_spq_entry *p_ent)
+                          struct qed_spq *p_spq, struct qed_spq_entry *p_ent)
 {
        struct qed_chain *p_chain = &p_hwfn->p_spq->chain;
        u16 echo = qed_chain_get_prod_idx(p_chain);
@@ -255,8 +252,7 @@ qed_async_event_completion(struct qed_hwfn *p_hwfn,
 /***************************************************************************
 * EQ API
 ***************************************************************************/
-void qed_eq_prod_update(struct qed_hwfn *p_hwfn,
-                       u16 prod)
+void qed_eq_prod_update(struct qed_hwfn *p_hwfn, u16 prod)
 {
        u32 addr = GTT_BAR0_MAP_REG_USDM_RAM +
                   USTORM_EQE_CONS_OFFSET(p_hwfn->rel_pf_id);
@@ -267,9 +263,7 @@ void qed_eq_prod_update(struct qed_hwfn *p_hwfn,
        mmiowb();
 }
 
-int qed_eq_completion(struct qed_hwfn *p_hwfn,
-                     void *cookie)
-
+int qed_eq_completion(struct qed_hwfn *p_hwfn, void *cookie)
 {
        struct qed_eq *p_eq = cookie;
        struct qed_chain *p_chain = &p_eq->chain;
@@ -323,8 +317,7 @@ int qed_eq_completion(struct qed_hwfn *p_hwfn,
        return rc;
 }
 
-struct qed_eq *qed_eq_alloc(struct qed_hwfn *p_hwfn,
-                           u16 num_elem)
+struct qed_eq *qed_eq_alloc(struct qed_hwfn *p_hwfn, u16 num_elem)
 {
        struct qed_eq *p_eq;
 
@@ -348,11 +341,8 @@ struct qed_eq *qed_eq_alloc(struct qed_hwfn *p_hwfn,
        }
 
        /* register EQ completion on the SP SB */
-       qed_int_register_cb(p_hwfn,
-                           qed_eq_completion,
-                           p_eq,
-                           &p_eq->eq_sb_index,
-                           &p_eq->p_fw_cons);
+       qed_int_register_cb(p_hwfn, qed_eq_completion,
+                           p_eq, &p_eq->eq_sb_index, &p_eq->p_fw_cons);
 
        return p_eq;
 
@@ -361,14 +351,12 @@ eq_allocate_fail:
        return NULL;
 }
 
-void qed_eq_setup(struct qed_hwfn *p_hwfn,
-                 struct qed_eq *p_eq)
+void qed_eq_setup(struct qed_hwfn *p_hwfn, struct qed_eq *p_eq)
 {
        qed_chain_reset(&p_eq->chain);
 }
 
-void qed_eq_free(struct qed_hwfn *p_hwfn,
-                struct qed_eq *p_eq)
+void qed_eq_free(struct qed_hwfn *p_hwfn, struct qed_eq *p_eq)
 {
        if (!p_eq)
                return;
@@ -379,10 +367,9 @@ void qed_eq_free(struct qed_hwfn *p_hwfn,
 /***************************************************************************
 * CQE API - manipulate EQ functionality
 ***************************************************************************/
-static int qed_cqe_completion(
-       struct qed_hwfn *p_hwfn,
-       struct eth_slow_path_rx_cqe *cqe,
-       enum protocol_type protocol)
+static int qed_cqe_completion(struct qed_hwfn *p_hwfn,
+                             struct eth_slow_path_rx_cqe *cqe,
+                             enum protocol_type protocol)
 {
        if (IS_VF(p_hwfn->cdev))
                return 0;
@@ -463,8 +450,7 @@ int qed_spq_alloc(struct qed_hwfn *p_hwfn)
        u32 capacity;
 
        /* SPQ struct */
-       p_spq =
-               kzalloc(sizeof(struct qed_spq), GFP_KERNEL);
+       p_spq = kzalloc(sizeof(struct qed_spq), GFP_KERNEL);
        if (!p_spq) {
                DP_NOTICE(p_hwfn, "Failed to allocate `struct qed_spq'\n");
                return -ENOMEM;
@@ -525,9 +511,7 @@ void qed_spq_free(struct qed_hwfn *p_hwfn)
        kfree(p_spq);
 }
 
-int
-qed_spq_get_entry(struct qed_hwfn *p_hwfn,
-                 struct qed_spq_entry **pp_ent)
+int qed_spq_get_entry(struct qed_hwfn *p_hwfn, struct qed_spq_entry **pp_ent)
 {
        struct qed_spq *p_spq = p_hwfn->p_spq;
        struct qed_spq_entry *p_ent = NULL;
@@ -538,14 +522,15 @@ qed_spq_get_entry(struct qed_hwfn *p_hwfn,
        if (list_empty(&p_spq->free_pool)) {
                p_ent = kzalloc(sizeof(*p_ent), GFP_ATOMIC);
                if (!p_ent) {
+                       DP_NOTICE(p_hwfn,
+                                 "Failed to allocate an SPQ entry for a pending ramrod\n");
                        rc = -ENOMEM;
                        goto out_unlock;
                }
                p_ent->queue = &p_spq->unlimited_pending;
        } else {
                p_ent = list_first_entry(&p_spq->free_pool,
-                                        struct qed_spq_entry,
-                                        list);
+                                        struct qed_spq_entry, list);
                list_del(&p_ent->list);
                p_ent->queue = &p_spq->pending;
        }
@@ -564,8 +549,7 @@ static void __qed_spq_return_entry(struct qed_hwfn *p_hwfn,
        list_add_tail(&p_ent->list, &p_hwfn->p_spq->free_pool);
 }
 
-void qed_spq_return_entry(struct qed_hwfn *p_hwfn,
-                         struct qed_spq_entry *p_ent)
+void qed_spq_return_entry(struct qed_hwfn *p_hwfn, struct qed_spq_entry *p_ent)
 {
        spin_lock_bh(&p_hwfn->p_spq->lock);
        __qed_spq_return_entry(p_hwfn, p_ent);
@@ -586,10 +570,9 @@ void qed_spq_return_entry(struct qed_hwfn *p_hwfn,
  *
  * @return int
  */
-static int
-qed_spq_add_entry(struct qed_hwfn *p_hwfn,
-                 struct qed_spq_entry *p_ent,
-                 enum spq_priority priority)
+static int qed_spq_add_entry(struct qed_hwfn *p_hwfn,
+                            struct qed_spq_entry *p_ent,
+                            enum spq_priority priority)
 {
        struct qed_spq *p_spq = p_hwfn->p_spq;
 
@@ -604,8 +587,7 @@ qed_spq_add_entry(struct qed_hwfn *p_hwfn,
                        struct qed_spq_entry *p_en2;
 
                        p_en2 = list_first_entry(&p_spq->free_pool,
-                                                struct qed_spq_entry,
-                                                list);
+                                                struct qed_spq_entry, list);
                        list_del(&p_en2->list);
 
                        /* Copy the ring element physical pointer to the new
@@ -655,8 +637,7 @@ u32 qed_spq_get_cid(struct qed_hwfn *p_hwfn)
 * Posting new Ramrods
 ***************************************************************************/
 static int qed_spq_post_list(struct qed_hwfn *p_hwfn,
-                            struct list_head *head,
-                            u32 keep_reserve)
+                            struct list_head *head, u32 keep_reserve)
 {
        struct qed_spq *p_spq = p_hwfn->p_spq;
        int rc;
@@ -690,8 +671,7 @@ static int qed_spq_pend_post(struct qed_hwfn *p_hwfn)
                        break;
 
                p_ent = list_first_entry(&p_spq->unlimited_pending,
-                                        struct qed_spq_entry,
-                                        list);
+                                        struct qed_spq_entry, list);
                if (!p_ent)
                        return -EINVAL;
 
@@ -705,8 +685,7 @@ static int qed_spq_pend_post(struct qed_hwfn *p_hwfn)
 }
 
 int qed_spq_post(struct qed_hwfn *p_hwfn,
-                struct qed_spq_entry *p_ent,
-                u8 *fw_return_code)
+                struct qed_spq_entry *p_ent, u8 *fw_return_code)
 {
        int rc = 0;
        struct qed_spq *p_spq = p_hwfn ? p_hwfn->p_spq : NULL;
@@ -803,8 +782,7 @@ int qed_spq_completion(struct qed_hwfn *p_hwfn,
                return -EINVAL;
 
        spin_lock_bh(&p_spq->lock);
-       list_for_each_entry_safe(p_ent, tmp, &p_spq->completion_pending,
-                                list) {
+       list_for_each_entry_safe(p_ent, tmp, &p_spq->completion_pending, list) {
                if (p_ent->elem.hdr.echo == echo) {
                        u16 pos = le16_to_cpu(echo) % SPQ_RING_SIZE;
 
@@ -846,15 +824,22 @@ int qed_spq_completion(struct qed_hwfn *p_hwfn,
 
        if (!found) {
                DP_NOTICE(p_hwfn,
-                         "Failed to find an entry this EQE completes\n");
+                         "Failed to find an entry this EQE [echo %04x] completes\n",
+                         le16_to_cpu(echo));
                return -EEXIST;
        }
 
-       DP_VERBOSE(p_hwfn, QED_MSG_SPQ, "Complete: func %p cookie %p)\n",
+       DP_VERBOSE(p_hwfn, QED_MSG_SPQ,
+                  "Complete EQE [echo %04x]: func %p cookie %p)\n",
+                  le16_to_cpu(echo),
                   p_ent->comp_cb.function, p_ent->comp_cb.cookie);
        if (found->comp_cb.function)
                found->comp_cb.function(p_hwfn, found->comp_cb.cookie, p_data,
                                        fw_return_code);
+       else
+               DP_VERBOSE(p_hwfn,
+                          QED_MSG_SPQ,
+                          "Got a completion without a callback function\n");
 
        if ((found->comp_mode != QED_SPQ_MODE_EBLOCK) ||
            (found->queue == &p_spq->unlimited_pending))
@@ -901,14 +886,12 @@ consq_allocate_fail:
        return NULL;
 }
 
-void qed_consq_setup(struct qed_hwfn *p_hwfn,
-                    struct qed_consq *p_consq)
+void qed_consq_setup(struct qed_hwfn *p_hwfn, struct qed_consq *p_consq)
 {
        qed_chain_reset(&p_consq->chain);
 }
 
-void qed_consq_free(struct qed_hwfn *p_hwfn,
-                   struct qed_consq *p_consq)
+void qed_consq_free(struct qed_hwfn *p_hwfn, struct qed_consq *p_consq)
 {
        if (!p_consq)
                return;
index 15399da..51e4c90 100644 (file)
@@ -699,7 +699,7 @@ static void qed_iov_config_perm_table(struct qed_hwfn *p_hwfn,
                                &qzone_id);
 
                reg_addr = PSWHST_REG_ZONE_PERMISSION_TABLE + qzone_id * 4;
-               val = enable ? (vf->abs_vf_id | (1 << 8)) : 0;
+               val = enable ? (vf->abs_vf_id | BIT(8)) : 0;
                qed_wr(p_hwfn, p_ptt, reg_addr, val);
        }
 }
@@ -1090,13 +1090,13 @@ static u16 qed_iov_prep_vp_update_resp_tlvs(struct qed_hwfn *p_hwfn,
 
        /* Prepare response for all extended tlvs if they are found by PF */
        for (i = 0; i < QED_IOV_VP_UPDATE_MAX; i++) {
-               if (!(tlvs_mask & (1 << i)))
+               if (!(tlvs_mask & BIT(i)))
                        continue;
 
                resp = qed_add_tlv(p_hwfn, &p_mbx->offset,
                                   qed_iov_vport_to_tlv(p_hwfn, i), size);
 
-               if (tlvs_accepted & (1 << i))
+               if (tlvs_accepted & BIT(i))
                        resp->hdr.status = status;
                else
                        resp->hdr.status = PFVF_STATUS_NOT_SUPPORTED;
@@ -1334,8 +1334,7 @@ static void qed_iov_vf_mbx_acquire(struct qed_hwfn *p_hwfn,
        pfdev_info->fw_minor = FW_MINOR_VERSION;
        pfdev_info->fw_rev = FW_REVISION_VERSION;
        pfdev_info->fw_eng = FW_ENGINEERING_VERSION;
-       pfdev_info->minor_fp_hsi = min_t(u8,
-                                        ETH_HSI_VER_MINOR,
+       pfdev_info->minor_fp_hsi = min_t(u8, ETH_HSI_VER_MINOR,
                                         req->vfdev_info.eth_fp_hsi_minor);
        pfdev_info->os_type = VFPF_ACQUIRE_OS_LINUX;
        qed_mcp_get_mfw_ver(p_hwfn, p_ptt, &pfdev_info->mfw_ver, NULL);
@@ -1438,14 +1437,11 @@ static int qed_iov_reconfigure_unicast_vlan(struct qed_hwfn *p_hwfn,
 
                filter.type = QED_FILTER_VLAN;
                filter.vlan = p_vf->shadow_config.vlans[i].vid;
-               DP_VERBOSE(p_hwfn,
-                          QED_MSG_IOV,
+               DP_VERBOSE(p_hwfn, QED_MSG_IOV,
                           "Reconfiguring VLAN [0x%04x] for VF [%04x]\n",
                           filter.vlan, p_vf->relative_vf_id);
-               rc = qed_sp_eth_filter_ucast(p_hwfn,
-                                            p_vf->opaque_fid,
-                                            &filter,
-                                            QED_SPQ_MODE_CB, NULL);
+               rc = qed_sp_eth_filter_ucast(p_hwfn, p_vf->opaque_fid,
+                                            &filter, QED_SPQ_MODE_CB, NULL);
                if (rc) {
                        DP_NOTICE(p_hwfn,
                                  "Failed to configure VLAN [%04x] to VF [%04x]\n",
@@ -1463,7 +1459,7 @@ qed_iov_reconfigure_unicast_shadow(struct qed_hwfn *p_hwfn,
 {
        int rc = 0;
 
-       if ((events & (1 << VLAN_ADDR_FORCED)) &&
+       if ((events & BIT(VLAN_ADDR_FORCED)) &&
            !(p_vf->configured_features & (1 << VLAN_ADDR_FORCED)))
                rc = qed_iov_reconfigure_unicast_vlan(p_hwfn, p_vf);
 
@@ -1479,7 +1475,7 @@ static int qed_iov_configure_vport_forced(struct qed_hwfn *p_hwfn,
        if (!p_vf->vport_instance)
                return -EINVAL;
 
-       if (events & (1 << MAC_ADDR_FORCED)) {
+       if (events & BIT(MAC_ADDR_FORCED)) {
                /* Since there's no way [currently] of removing the MAC,
                 * we can always assume this means we need to force it.
                 */
@@ -1502,7 +1498,7 @@ static int qed_iov_configure_vport_forced(struct qed_hwfn *p_hwfn,
                p_vf->configured_features |= 1 << MAC_ADDR_FORCED;
        }
 
-       if (events & (1 << VLAN_ADDR_FORCED)) {
+       if (events & BIT(VLAN_ADDR_FORCED)) {
                struct qed_sp_vport_update_params vport_update;
                u8 removal;
                int i;
@@ -1572,7 +1568,7 @@ static int qed_iov_configure_vport_forced(struct qed_hwfn *p_hwfn,
                if (filter.vlan)
                        p_vf->configured_features |= 1 << VLAN_ADDR_FORCED;
                else
-                       p_vf->configured_features &= ~(1 << VLAN_ADDR_FORCED);
+                       p_vf->configured_features &= ~BIT(VLAN_ADDR_FORCED);
        }
 
        /* If forced features are terminated, we need to configure the shadow
@@ -1619,8 +1615,7 @@ static void qed_iov_vf_mbx_start_vport(struct qed_hwfn *p_hwfn,
 
                qed_int_cau_conf_sb(p_hwfn, p_ptt,
                                    start->sb_addr[sb_id],
-                                   vf->igu_sbs[sb_id],
-                                   vf->abs_vf_id, 1);
+                                   vf->igu_sbs[sb_id], vf->abs_vf_id, 1);
        }
        qed_iov_enable_vf_traffic(p_hwfn, p_ptt, vf);
 
@@ -1632,7 +1627,7 @@ static void qed_iov_vf_mbx_start_vport(struct qed_hwfn *p_hwfn,
         * vfs that would still be fine, since they passed '0' as padding].
         */
        p_bitmap = &vf_info->bulletin.p_virt->valid_bitmap;
-       if (!(*p_bitmap & (1 << VFPF_BULLETIN_UNTAGGED_DEFAULT_FORCED))) {
+       if (!(*p_bitmap & BIT(VFPF_BULLETIN_UNTAGGED_DEFAULT_FORCED))) {
                u8 vf_req = start->only_untagged;
 
                vf_info->bulletin.p_virt->default_only_untagged = vf_req;
@@ -1652,7 +1647,7 @@ static void qed_iov_vf_mbx_start_vport(struct qed_hwfn *p_hwfn,
        params.mtu = vf->mtu;
 
        rc = qed_sp_eth_vport_start(p_hwfn, &params);
-       if (rc != 0) {
+       if (rc) {
                DP_ERR(p_hwfn,
                       "qed_iov_vf_mbx_start_vport returned error %d\n", rc);
                status = PFVF_STATUS_FAILURE;
@@ -1679,7 +1674,7 @@ static void qed_iov_vf_mbx_stop_vport(struct qed_hwfn *p_hwfn,
        vf->spoof_chk = false;
 
        rc = qed_sp_vport_stop(p_hwfn, vf->opaque_fid, vf->vport_id);
-       if (rc != 0) {
+       if (rc) {
                DP_ERR(p_hwfn, "qed_iov_vf_mbx_stop_vport returned error %d\n",
                       rc);
                status = PFVF_STATUS_FAILURE;
@@ -2045,7 +2040,7 @@ qed_iov_vp_update_vlan_param(struct qed_hwfn *p_hwfn,
        p_vf->shadow_config.inner_vlan_removal = p_vlan_tlv->remove_vlan;
 
        /* Ignore the VF request if we're forcing a vlan */
-       if (!(p_vf->configured_features & (1 << VLAN_ADDR_FORCED))) {
+       if (!(p_vf->configured_features & BIT(VLAN_ADDR_FORCED))) {
                p_data->update_inner_vlan_removal_flg = 1;
                p_data->inner_vlan_removal_flg = p_vlan_tlv->remove_vlan;
        }
@@ -2340,7 +2335,7 @@ static int qed_iov_vf_update_vlan_shadow(struct qed_hwfn *p_hwfn,
        /* In forced mode, we're willing to remove entries - but we don't add
         * new ones.
         */
-       if (p_vf->bulletin.p_virt->valid_bitmap & (1 << VLAN_ADDR_FORCED))
+       if (p_vf->bulletin.p_virt->valid_bitmap & BIT(VLAN_ADDR_FORCED))
                return 0;
 
        if (p_params->opcode == QED_FILTER_ADD ||
@@ -2374,7 +2369,7 @@ static int qed_iov_vf_update_mac_shadow(struct qed_hwfn *p_hwfn,
        int i;
 
        /* If we're in forced-mode, we don't allow any change */
-       if (p_vf->bulletin.p_virt->valid_bitmap & (1 << MAC_ADDR_FORCED))
+       if (p_vf->bulletin.p_virt->valid_bitmap & BIT(MAC_ADDR_FORCED))
                return 0;
 
        /* First remove entries and then add new ones */
@@ -2509,7 +2504,7 @@ static void qed_iov_vf_mbx_ucast_filter(struct qed_hwfn *p_hwfn,
        }
 
        /* Determine if the unicast filtering is acceptible by PF */
-       if ((p_bulletin->valid_bitmap & (1 << VLAN_ADDR_FORCED)) &&
+       if ((p_bulletin->valid_bitmap & BIT(VLAN_ADDR_FORCED)) &&
            (params.type == QED_FILTER_VLAN ||
             params.type == QED_FILTER_MAC_VLAN)) {
                /* Once VLAN is forced or PVID is set, do not allow
@@ -2521,7 +2516,7 @@ static void qed_iov_vf_mbx_ucast_filter(struct qed_hwfn *p_hwfn,
                goto out;
        }
 
-       if ((p_bulletin->valid_bitmap & (1 << MAC_ADDR_FORCED)) &&
+       if ((p_bulletin->valid_bitmap & BIT(MAC_ADDR_FORCED)) &&
            (params.type == QED_FILTER_MAC ||
             params.type == QED_FILTER_MAC_VLAN)) {
                if (!ether_addr_equal(p_bulletin->mac, params.mac) ||
@@ -2749,7 +2744,7 @@ cleanup:
                /* Mark VF for ack and clean pending state */
                if (p_vf->state == VF_RESET)
                        p_vf->state = VF_STOPPED;
-               ack_vfs[vfid / 32] |= (1 << (vfid % 32));
+               ack_vfs[vfid / 32] |= BIT((vfid % 32));
                p_hwfn->pf_iov_info->pending_flr[rel_vf_id / 64] &=
                    ~(1ULL << (rel_vf_id % 64));
                p_hwfn->pf_iov_info->pending_events[rel_vf_id / 64] &=
@@ -2805,7 +2800,7 @@ int qed_iov_mark_vf_flr(struct qed_hwfn *p_hwfn, u32 *p_disabled_vfs)
                        continue;
 
                vfid = p_vf->abs_vf_id;
-               if ((1 << (vfid % 32)) & p_disabled_vfs[vfid / 32]) {
+               if (BIT((vfid % 32)) & p_disabled_vfs[vfid / 32]) {
                        u64 *p_flr = p_hwfn->pf_iov_info->pending_flr;
                        u16 rel_vf_id = p_vf->relative_vf_id;
 
@@ -3064,8 +3059,7 @@ static void qed_iov_bulletin_set_forced_mac(struct qed_hwfn *p_hwfn,
 
        vf_info->bulletin.p_virt->valid_bitmap |= feature;
        /* Forced MAC will disable MAC_ADDR */
-       vf_info->bulletin.p_virt->valid_bitmap &=
-                               ~(1 << VFPF_BULLETIN_MAC_ADDR);
+       vf_info->bulletin.p_virt->valid_bitmap &= ~BIT(VFPF_BULLETIN_MAC_ADDR);
 
        qed_iov_configure_vport_forced(p_hwfn, vf_info, feature);
 }
@@ -3163,7 +3157,7 @@ static u8 *qed_iov_bulletin_get_forced_mac(struct qed_hwfn *p_hwfn,
        if (!p_vf || !p_vf->bulletin.p_virt)
                return NULL;
 
-       if (!(p_vf->bulletin.p_virt->valid_bitmap & (1 << MAC_ADDR_FORCED)))
+       if (!(p_vf->bulletin.p_virt->valid_bitmap & BIT(MAC_ADDR_FORCED)))
                return NULL;
 
        return p_vf->bulletin.p_virt->mac;
@@ -3177,7 +3171,7 @@ u16 qed_iov_bulletin_get_forced_vlan(struct qed_hwfn *p_hwfn, u16 rel_vf_id)
        if (!p_vf || !p_vf->bulletin.p_virt)
                return 0;
 
-       if (!(p_vf->bulletin.p_virt->valid_bitmap & (1 << VLAN_ADDR_FORCED)))
+       if (!(p_vf->bulletin.p_virt->valid_bitmap & BIT(VLAN_ADDR_FORCED)))
                return 0;
 
        return p_vf->bulletin.p_virt->pvid;
index f8492ca..427e043 100644 (file)
@@ -249,78 +249,150 @@ static u32 qede_get_priv_flags(struct net_device *dev)
        return (!!(edev->dev_info.common.num_hwfns > 1)) << QEDE_PRI_FLAG_CMT;
 }
 
-static int qede_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
+struct qede_link_mode_mapping {
+       u32 qed_link_mode;
+       u32 ethtool_link_mode;
+};
+
+static const struct qede_link_mode_mapping qed_lm_map[] = {
+       {QED_LM_FIBRE_BIT, ETHTOOL_LINK_MODE_FIBRE_BIT},
+       {QED_LM_Autoneg_BIT, ETHTOOL_LINK_MODE_Autoneg_BIT},
+       {QED_LM_Asym_Pause_BIT, ETHTOOL_LINK_MODE_Asym_Pause_BIT},
+       {QED_LM_Pause_BIT, ETHTOOL_LINK_MODE_Pause_BIT},
+       {QED_LM_1000baseT_Half_BIT, ETHTOOL_LINK_MODE_1000baseT_Half_BIT},
+       {QED_LM_1000baseT_Full_BIT, ETHTOOL_LINK_MODE_1000baseT_Full_BIT},
+       {QED_LM_10000baseKR_Full_BIT, ETHTOOL_LINK_MODE_10000baseKR_Full_BIT},
+       {QED_LM_25000baseKR_Full_BIT, ETHTOOL_LINK_MODE_25000baseKR_Full_BIT},
+       {QED_LM_40000baseLR4_Full_BIT, ETHTOOL_LINK_MODE_40000baseLR4_Full_BIT},
+       {QED_LM_50000baseKR2_Full_BIT, ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT},
+       {QED_LM_100000baseKR4_Full_BIT,
+        ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT},
+};
+
+#define QEDE_DRV_TO_ETHTOOL_CAPS(caps, lk_ksettings, name)     \
+{                                                              \
+       int i;                                                  \
+                                                               \
+       for (i = 0; i < QED_LM_COUNT; i++) {                    \
+               if ((caps) & (qed_lm_map[i].qed_link_mode))     \
+                       __set_bit(qed_lm_map[i].ethtool_link_mode,\
+                                 lk_ksettings->link_modes.name); \
+       }                                                       \
+}
+
+#define QEDE_ETHTOOL_TO_DRV_CAPS(caps, lk_ksettings, name)     \
+{                                                              \
+       int i;                                                  \
+                                                               \
+       for (i = 0; i < QED_LM_COUNT; i++) {                    \
+               if (test_bit(qed_lm_map[i].ethtool_link_mode,   \
+                            lk_ksettings->link_modes.name))    \
+                       caps |= qed_lm_map[i].qed_link_mode;    \
+       }                                                       \
+}
+
+static int qede_get_link_ksettings(struct net_device *dev,
+                                  struct ethtool_link_ksettings *cmd)
 {
+       struct ethtool_link_settings *base = &cmd->base;
        struct qede_dev *edev = netdev_priv(dev);
        struct qed_link_output current_link;
 
        memset(&current_link, 0, sizeof(current_link));
        edev->ops->common->get_link(edev->cdev, &current_link);
 
-       cmd->supported = current_link.supported_caps;
-       cmd->advertising = current_link.advertised_caps;
+       ethtool_link_ksettings_zero_link_mode(cmd, supported);
+       QEDE_DRV_TO_ETHTOOL_CAPS(current_link.supported_caps, cmd, supported)
+
+       ethtool_link_ksettings_zero_link_mode(cmd, advertising);
+       QEDE_DRV_TO_ETHTOOL_CAPS(current_link.advertised_caps, cmd, advertising)
+
+       ethtool_link_ksettings_zero_link_mode(cmd, lp_advertising);
+       QEDE_DRV_TO_ETHTOOL_CAPS(current_link.lp_caps, cmd, lp_advertising)
+
        if ((edev->state == QEDE_STATE_OPEN) && (current_link.link_up)) {
-               ethtool_cmd_speed_set(cmd, current_link.speed);
-               cmd->duplex = current_link.duplex;
+               base->speed = current_link.speed;
+               base->duplex = current_link.duplex;
        } else {
-               cmd->duplex = DUPLEX_UNKNOWN;
-               ethtool_cmd_speed_set(cmd, SPEED_UNKNOWN);
+               base->speed = SPEED_UNKNOWN;
+               base->duplex = DUPLEX_UNKNOWN;
        }
-       cmd->port = current_link.port;
-       cmd->autoneg = (current_link.autoneg) ? AUTONEG_ENABLE :
-                                               AUTONEG_DISABLE;
-       cmd->lp_advertising = current_link.lp_caps;
+       base->port = current_link.port;
+       base->autoneg = (current_link.autoneg) ? AUTONEG_ENABLE :
+                       AUTONEG_DISABLE;
 
        return 0;
 }
 
-static int qede_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
+static int qede_set_link_ksettings(struct net_device *dev,
+                                  const struct ethtool_link_ksettings *cmd)
 {
+       const struct ethtool_link_settings *base = &cmd->base;
        struct qede_dev *edev = netdev_priv(dev);
        struct qed_link_output current_link;
        struct qed_link_params params;
-       u32 speed;
 
        if (!edev->ops || !edev->ops->common->can_link_change(edev->cdev)) {
-               DP_INFO(edev,
-                       "Link settings are not allowed to be changed\n");
+               DP_INFO(edev, "Link settings are not allowed to be changed\n");
                return -EOPNOTSUPP;
        }
-
        memset(&current_link, 0, sizeof(current_link));
        memset(&params, 0, sizeof(params));
        edev->ops->common->get_link(edev->cdev, &current_link);
 
-       speed = ethtool_cmd_speed(cmd);
        params.override_flags |= QED_LINK_OVERRIDE_SPEED_ADV_SPEEDS;
        params.override_flags |= QED_LINK_OVERRIDE_SPEED_AUTONEG;
-       if (cmd->autoneg == AUTONEG_ENABLE) {
+       if (base->autoneg == AUTONEG_ENABLE) {
                params.autoneg = true;
                params.forced_speed = 0;
-               params.adv_speeds = cmd->advertising;
-       } else { /* forced speed */
+               QEDE_ETHTOOL_TO_DRV_CAPS(params.adv_speeds, cmd, advertising)
+       } else {                /* forced speed */
                params.override_flags |= QED_LINK_OVERRIDE_SPEED_FORCED_SPEED;
                params.autoneg = false;
-               params.forced_speed = speed;
-               switch (speed) {
+               params.forced_speed = base->speed;
+               switch (base->speed) {
                case SPEED_10000:
                        if (!(current_link.supported_caps &
-                           SUPPORTED_10000baseKR_Full)) {
+                             QED_LM_10000baseKR_Full_BIT)) {
                                DP_INFO(edev, "10G speed not supported\n");
                                return -EINVAL;
                        }
-                       params.adv_speeds = SUPPORTED_10000baseKR_Full;
+                       params.adv_speeds = QED_LM_10000baseKR_Full_BIT;
+                       break;
+               case SPEED_25000:
+                       if (!(current_link.supported_caps &
+                             QED_LM_25000baseKR_Full_BIT)) {
+                               DP_INFO(edev, "25G speed not supported\n");
+                               return -EINVAL;
+                       }
+                       params.adv_speeds = QED_LM_25000baseKR_Full_BIT;
                        break;
                case SPEED_40000:
                        if (!(current_link.supported_caps &
-                           SUPPORTED_40000baseLR4_Full)) {
+                             QED_LM_40000baseLR4_Full_BIT)) {
                                DP_INFO(edev, "40G speed not supported\n");
                                return -EINVAL;
                        }
-                       params.adv_speeds = SUPPORTED_40000baseLR4_Full;
+                       params.adv_speeds = QED_LM_40000baseLR4_Full_BIT;
+                       break;
+               case 0xdead:
+                       if (!(current_link.supported_caps &
+                             QED_LM_50000baseKR2_Full_BIT)) {
+                               DP_INFO(edev, "50G speed not supported\n");
+                               return -EINVAL;
+                       }
+                       params.adv_speeds = QED_LM_50000baseKR2_Full_BIT;
+                       break;
+               case 0xbeef:
+                       if (!(current_link.supported_caps &
+                             QED_LM_100000baseKR4_Full_BIT)) {
+                               DP_INFO(edev, "100G speed not supported\n");
+                               return -EINVAL;
+                       }
+                       params.adv_speeds = QED_LM_100000baseKR4_Full_BIT;
                        break;
                default:
-                       DP_INFO(edev, "Unsupported speed %u\n", speed);
+                       DP_INFO(edev, "Unsupported speed %u\n", base->speed);
                        return -EINVAL;
                }
        }
@@ -368,8 +440,7 @@ static u32 qede_get_msglevel(struct net_device *ndev)
 {
        struct qede_dev *edev = netdev_priv(ndev);
 
-       return ((u32)edev->dp_level << QED_LOG_LEVEL_SHIFT) |
-              edev->dp_module;
+       return ((u32)edev->dp_level << QED_LOG_LEVEL_SHIFT) | edev->dp_module;
 }
 
 static void qede_set_msglevel(struct net_device *ndev, u32 level)
@@ -393,8 +464,7 @@ static int qede_nway_reset(struct net_device *dev)
        struct qed_link_params link_params;
 
        if (!edev->ops || !edev->ops->common->can_link_change(edev->cdev)) {
-               DP_INFO(edev,
-                       "Link settings are not allowed to be changed\n");
+               DP_INFO(edev, "Link settings are not allowed to be changed\n");
                return -EOPNOTSUPP;
        }
 
@@ -1228,8 +1298,8 @@ static int qede_get_tunable(struct net_device *dev,
 }
 
 static const struct ethtool_ops qede_ethtool_ops = {
-       .get_settings = qede_get_settings,
-       .set_settings = qede_set_settings,
+       .get_link_ksettings = qede_get_link_ksettings,
+       .set_link_ksettings = qede_set_link_ksettings,
        .get_drvinfo = qede_get_drvinfo,
        .get_msglevel = qede_get_msglevel,
        .set_msglevel = qede_set_msglevel,
@@ -1260,7 +1330,7 @@ static const struct ethtool_ops qede_ethtool_ops = {
 };
 
 static const struct ethtool_ops qede_vf_ethtool_ops = {
-       .get_settings = qede_get_settings,
+       .get_link_ksettings = qede_get_link_ksettings,
        .get_drvinfo = qede_get_drvinfo,
        .get_msglevel = qede_get_msglevel,
        .set_msglevel = qede_set_msglevel,
index e4bd02e..5ce8a3c 100644 (file)
@@ -222,7 +222,7 @@ int __init qede_init(void)
 {
        int ret;
 
-       pr_notice("qede_init: %s\n", version);
+       pr_info("qede_init: %s\n", version);
 
        qed_ops = qed_get_eth_ops();
        if (!qed_ops) {
@@ -253,7 +253,8 @@ int __init qede_init(void)
 
 static void __exit qede_cleanup(void)
 {
-       pr_notice("qede_cleanup called\n");
+       if (debug & QED_LOG_INFO_MASK)
+               pr_info("qede_cleanup called\n");
 
        unregister_netdevice_notifier(&qede_netdev_notifier);
        pci_unregister_driver(&qede_pci_driver);
@@ -270,8 +271,7 @@ module_exit(qede_cleanup);
 
 /* Unmap the data and free skb */
 static int qede_free_tx_pkt(struct qede_dev *edev,
-                           struct qede_tx_queue *txq,
-                           int *len)
+                           struct qede_tx_queue *txq, int *len)
 {
        u16 idx = txq->sw_tx_cons & NUM_TX_BDS_MAX;
        struct sk_buff *skb = txq->sw_tx_ring[idx].skb;
@@ -329,8 +329,7 @@ static int qede_free_tx_pkt(struct qede_dev *edev,
 static void qede_free_failed_tx_pkt(struct qede_dev *edev,
                                    struct qede_tx_queue *txq,
                                    struct eth_tx_1st_bd *first_bd,
-                                   int nbd,
-                                   bool data_split)
+                                   int nbd, bool data_split)
 {
        u16 idx = txq->sw_tx_prod & NUM_TX_BDS_MAX;
        struct sk_buff *skb = txq->sw_tx_ring[idx].skb;
@@ -339,8 +338,7 @@ static void qede_free_failed_tx_pkt(struct qede_dev *edev,
 
        /* Return prod to its position before this skb was handled */
        qed_chain_set_prod(&txq->tx_pbl,
-                          le16_to_cpu(txq->tx_db.data.bd_prod),
-                          first_bd);
+                          le16_to_cpu(txq->tx_db.data.bd_prod), first_bd);
 
        first_bd = (struct eth_tx_1st_bd *)qed_chain_produce(&txq->tx_pbl);
 
@@ -366,8 +364,7 @@ static void qede_free_failed_tx_pkt(struct qede_dev *edev,
 
        /* Return again prod to its position before this skb was handled */
        qed_chain_set_prod(&txq->tx_pbl,
-                          le16_to_cpu(txq->tx_db.data.bd_prod),
-                          first_bd);
+                          le16_to_cpu(txq->tx_db.data.bd_prod), first_bd);
 
        /* Free skb */
        dev_kfree_skb_any(skb);
@@ -376,8 +373,7 @@ static void qede_free_failed_tx_pkt(struct qede_dev *edev,
 }
 
 static u32 qede_xmit_type(struct qede_dev *edev,
-                         struct sk_buff *skb,
-                         int *ipv6_ext)
+                         struct sk_buff *skb, int *ipv6_ext)
 {
        u32 rc = XMIT_L4_CSUM;
        __be16 l3_proto;
@@ -434,15 +430,13 @@ static void qede_set_params_for_ipv6_ext(struct sk_buff *skb,
 }
 
 static int map_frag_to_bd(struct qede_dev *edev,
-                         skb_frag_t *frag,
-                         struct eth_tx_bd *bd)
+                         skb_frag_t *frag, struct eth_tx_bd *bd)
 {
        dma_addr_t mapping;
 
        /* Map skb non-linear frag data for DMA */
        mapping = skb_frag_dma_map(&edev->pdev->dev, frag, 0,
-                                  skb_frag_size(frag),
-                                  DMA_TO_DEVICE);
+                                  skb_frag_size(frag), DMA_TO_DEVICE);
        if (unlikely(dma_mapping_error(&edev->pdev->dev, mapping))) {
                DP_NOTICE(edev, "Unable to map frag - dropping packet\n");
                return -ENOMEM;
@@ -504,9 +498,8 @@ static inline void qede_update_tx_producer(struct qede_tx_queue *txq)
 }
 
 /* Main transmit function */
-static
-netdev_tx_t qede_start_xmit(struct sk_buff *skb,
-                           struct net_device *ndev)
+static netdev_tx_t qede_start_xmit(struct sk_buff *skb,
+                                  struct net_device *ndev)
 {
        struct qede_dev *edev = netdev_priv(ndev);
        struct netdev_queue *netdev_txq;
@@ -530,8 +523,7 @@ netdev_tx_t qede_start_xmit(struct sk_buff *skb,
        txq = QEDE_TX_QUEUE(edev, txq_index);
        netdev_txq = netdev_get_tx_queue(ndev, txq_index);
 
-       WARN_ON(qed_chain_get_elem_left(&txq->tx_pbl) <
-                              (MAX_SKB_FRAGS + 1));
+       WARN_ON(qed_chain_get_elem_left(&txq->tx_pbl) < (MAX_SKB_FRAGS + 1));
 
        xmit_type = qede_xmit_type(edev, skb, &ipv6_ext);
 
@@ -761,8 +753,7 @@ int qede_txq_has_work(struct qede_tx_queue *txq)
        return hw_bd_cons != qed_chain_get_cons_idx(&txq->tx_pbl);
 }
 
-static int qede_tx_int(struct qede_dev *edev,
-                      struct qede_tx_queue *txq)
+static int qede_tx_int(struct qede_dev *edev, struct qede_tx_queue *txq)
 {
        struct netdev_queue *netdev_txq;
        u16 hw_bd_cons;
@@ -960,8 +951,7 @@ static inline void qede_update_rx_prod(struct qede_dev *edev,
 
 static u32 qede_get_rxhash(struct qede_dev *edev,
                           u8 bitfields,
-                          __le32 rss_hash,
-                          enum pkt_hash_types *rxhash_type)
+                          __le32 rss_hash, enum pkt_hash_types *rxhash_type)
 {
        enum rss_hash_type htype;
 
@@ -990,12 +980,10 @@ static void qede_set_skb_csum(struct sk_buff *skb, u8 csum_flag)
 
 static inline void qede_skb_receive(struct qede_dev *edev,
                                    struct qede_fastpath *fp,
-                                   struct sk_buff *skb,
-                                   u16 vlan_tag)
+                                   struct sk_buff *skb, u16 vlan_tag)
 {
        if (vlan_tag)
-               __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q),
-                                      vlan_tag);
+               __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tag);
 
        napi_gro_receive(&fp->napi, skb);
 }
@@ -1018,8 +1006,7 @@ static void qede_set_gro_params(struct qede_dev *edev,
 
 static int qede_fill_frag_skb(struct qede_dev *edev,
                              struct qede_rx_queue *rxq,
-                             u8 tpa_agg_index,
-                             u16 len_on_bd)
+                             u8 tpa_agg_index, u16 len_on_bd)
 {
        struct sw_rx_data *current_bd = &rxq->sw_rx_ring[rxq->sw_rx_cons &
                                                         NUM_RX_BDS_MAX];
@@ -1467,7 +1454,7 @@ alloc_skb:
                skb = netdev_alloc_skb(edev->ndev, QEDE_RX_HDR_SIZE);
                if (unlikely(!skb)) {
                        DP_NOTICE(edev,
-                                 "Build_skb failed, dropping incoming packet\n");
+                                 "skb allocation failed, dropping incoming packet\n");
                        qede_recycle_rx_bd_ring(rxq, edev, fp_cqe->bd_num);
                        rxq->rx_alloc_errors++;
                        goto next_cqe;
@@ -1575,8 +1562,7 @@ alloc_skb:
                skb->protocol = eth_type_trans(skb, edev->ndev);
 
                rx_hash = qede_get_rxhash(edev, fp_cqe->bitfields,
-                                         fp_cqe->rss_hash,
-                                         &rxhash_type);
+                                         fp_cqe->rss_hash, &rxhash_type);
 
                skb_set_hash(skb, rx_hash, rxhash_type);
 
@@ -1787,9 +1773,9 @@ void qede_fill_by_demand_stats(struct qede_dev *edev)
        edev->stats.tx_mac_ctrl_frames = stats.tx_mac_ctrl_frames;
 }
 
-static struct rtnl_link_stats64 *qede_get_stats64(
-                           struct net_device *dev,
-                           struct rtnl_link_stats64 *stats)
+static
+struct rtnl_link_stats64 *qede_get_stats64(struct net_device *dev,
+                                          struct rtnl_link_stats64 *stats)
 {
        struct qede_dev *edev = netdev_priv(dev);
 
@@ -2103,8 +2089,7 @@ static void qede_vlan_mark_nonconfigured(struct qede_dev *edev)
                }
 
                DP_VERBOSE(edev, NETIF_MSG_IFDOWN,
-                          "marked vlan %d as non-configured\n",
-                          vlan->vid);
+                          "marked vlan %d as non-configured\n", vlan->vid);
        }
 
        edev->accept_any_vlan = false;
@@ -2146,7 +2131,7 @@ static void qede_udp_tunnel_add(struct net_device *dev,
 
                edev->vxlan_dst_port = t_port;
 
-               DP_VERBOSE(edev, QED_MSG_DEBUG, "Added vxlan port=%d",
+               DP_VERBOSE(edev, QED_MSG_DEBUG, "Added vxlan port=%d\n",
                           t_port);
 
                set_bit(QEDE_SP_VXLAN_PORT_CONFIG, &edev->sp_flags);
@@ -2157,7 +2142,7 @@ static void qede_udp_tunnel_add(struct net_device *dev,
 
                edev->geneve_dst_port = t_port;
 
-               DP_VERBOSE(edev, QED_MSG_DEBUG, "Added geneve port=%d",
+               DP_VERBOSE(edev, QED_MSG_DEBUG, "Added geneve port=%d\n",
                           t_port);
                set_bit(QEDE_SP_GENEVE_PORT_CONFIG, &edev->sp_flags);
                break;
@@ -2181,7 +2166,7 @@ static void qede_udp_tunnel_del(struct net_device *dev,
 
                edev->vxlan_dst_port = 0;
 
-               DP_VERBOSE(edev, QED_MSG_DEBUG, "Deleted vxlan port=%d",
+               DP_VERBOSE(edev, QED_MSG_DEBUG, "Deleted vxlan port=%d\n",
                           t_port);
 
                set_bit(QEDE_SP_VXLAN_PORT_CONFIG, &edev->sp_flags);
@@ -2192,7 +2177,7 @@ static void qede_udp_tunnel_del(struct net_device *dev,
 
                edev->geneve_dst_port = 0;
 
-               DP_VERBOSE(edev, QED_MSG_DEBUG, "Deleted geneve port=%d",
+               DP_VERBOSE(edev, QED_MSG_DEBUG, "Deleted geneve port=%d\n",
                           t_port);
                set_bit(QEDE_SP_GENEVE_PORT_CONFIG, &edev->sp_flags);
                break;
@@ -2237,15 +2222,13 @@ static const struct net_device_ops qede_netdev_ops = {
 static struct qede_dev *qede_alloc_etherdev(struct qed_dev *cdev,
                                            struct pci_dev *pdev,
                                            struct qed_dev_eth_info *info,
-                                           u32 dp_module,
-                                           u8 dp_level)
+                                           u32 dp_module, u8 dp_level)
 {
        struct net_device *ndev;
        struct qede_dev *edev;
 
        ndev = alloc_etherdev_mqs(sizeof(*edev),
-                                 info->num_queues,
-                                 info->num_queues);
+                                 info->num_queues, info->num_queues);
        if (!ndev) {
                pr_err("etherdev allocation failed\n");
                return NULL;
@@ -2261,6 +2244,9 @@ static struct qede_dev *qede_alloc_etherdev(struct qed_dev *cdev,
        edev->q_num_rx_buffers = NUM_RX_BDS_DEF;
        edev->q_num_tx_buffers = NUM_TX_BDS_DEF;
 
+       DP_INFO(edev, "Allocated netdev with %d tx queues and %d rx queues\n",
+               info->num_queues, info->num_queues);
+
        SET_NETDEV_DEV(ndev, &pdev->dev);
 
        memset(&edev->stats, 0, sizeof(edev->stats));
@@ -2453,7 +2439,7 @@ static int __qede_probe(struct pci_dev *pdev, u32 dp_module, u8 dp_level,
                        bool is_vf, enum qede_probe_mode mode)
 {
        struct qed_probe_params probe_params;
-       struct qed_slowpath_params params;
+       struct qed_slowpath_params sp_params;
        struct qed_dev_eth_info dev_info;
        struct qede_dev *edev;
        struct qed_dev *cdev;
@@ -2476,14 +2462,14 @@ static int __qede_probe(struct pci_dev *pdev, u32 dp_module, u8 dp_level,
        qede_update_pf_params(cdev);
 
        /* Start the Slowpath-process */
-       memset(&params, 0, sizeof(struct qed_slowpath_params));
-       params.int_mode = QED_INT_MODE_MSIX;
-       params.drv_major = QEDE_MAJOR_VERSION;
-       params.drv_minor = QEDE_MINOR_VERSION;
-       params.drv_rev = QEDE_REVISION_VERSION;
-       params.drv_eng = QEDE_ENGINEERING_VERSION;
-       strlcpy(params.name, "qede LAN", QED_DRV_VER_STR_SIZE);
-       rc = qed_ops->common->slowpath_start(cdev, &params);
+       memset(&sp_params, 0, sizeof(sp_params));
+       sp_params.int_mode = QED_INT_MODE_MSIX;
+       sp_params.drv_major = QEDE_MAJOR_VERSION;
+       sp_params.drv_minor = QEDE_MINOR_VERSION;
+       sp_params.drv_rev = QEDE_REVISION_VERSION;
+       sp_params.drv_eng = QEDE_ENGINEERING_VERSION;
+       strlcpy(sp_params.name, "qede LAN", QED_DRV_VER_STR_SIZE);
+       rc = qed_ops->common->slowpath_start(cdev, &sp_params);
        if (rc) {
                pr_notice("Cannot start slowpath\n");
                goto err1;
@@ -2586,7 +2572,7 @@ static void __qede_remove(struct pci_dev *pdev, enum qede_remove_mode mode)
        qed_ops->common->slowpath_stop(cdev);
        qed_ops->common->remove(cdev);
 
-       pr_notice("Ending successfully qede_remove\n");
+       dev_info(&pdev->dev, "Ending qede_remove successfully\n");
 }
 
 static void qede_remove(struct pci_dev *pdev)
@@ -2634,16 +2620,14 @@ static void qede_free_mem_sb(struct qede_dev *edev,
 
 /* This function allocates fast-path status block memory */
 static int qede_alloc_mem_sb(struct qede_dev *edev,
-                            struct qed_sb_info *sb_info,
-                            u16 sb_id)
+                            struct qed_sb_info *sb_info, u16 sb_id)
 {
        struct status_block *sb_virt;
        dma_addr_t sb_phys;
        int rc;
 
        sb_virt = dma_alloc_coherent(&edev->pdev->dev,
-                                    sizeof(*sb_virt),
-                                    &sb_phys, GFP_KERNEL);
+                                    sizeof(*sb_virt), &sb_phys, GFP_KERNEL);
        if (!sb_virt) {
                DP_ERR(edev, "Status block allocation failed\n");
                return -ENOMEM;
@@ -2675,16 +2659,15 @@ static void qede_free_rx_buffers(struct qede_dev *edev,
                data = rx_buf->data;
 
                dma_unmap_page(&edev->pdev->dev,
-                              rx_buf->mapping,
-                              PAGE_SIZE, DMA_FROM_DEVICE);
+                              rx_buf->mapping, PAGE_SIZE, DMA_FROM_DEVICE);
 
                rx_buf->data = NULL;
                __free_page(data);
        }
 }
 
-static void qede_free_sge_mem(struct qede_dev *edev,
-                             struct qede_rx_queue *rxq) {
+static void qede_free_sge_mem(struct qede_dev *edev, struct qede_rx_queue *rxq)
+{
        int i;
 
        if (edev->gro_disable)
@@ -2703,8 +2686,7 @@ static void qede_free_sge_mem(struct qede_dev *edev,
        }
 }
 
-static void qede_free_mem_rxq(struct qede_dev *edev,
-                             struct qede_rx_queue *rxq)
+static void qede_free_mem_rxq(struct qede_dev *edev, struct qede_rx_queue *rxq)
 {
        qede_free_sge_mem(edev, rxq);
 
@@ -2726,9 +2708,6 @@ static int qede_alloc_rx_buffer(struct qede_dev *edev,
        struct eth_rx_bd *rx_bd;
        dma_addr_t mapping;
        struct page *data;
-       u16 rx_buf_size;
-
-       rx_buf_size = rxq->rx_buf_size;
 
        data = alloc_pages(GFP_ATOMIC, 0);
        if (unlikely(!data)) {
@@ -2763,8 +2742,7 @@ static int qede_alloc_rx_buffer(struct qede_dev *edev,
        return 0;
 }
 
-static int qede_alloc_sge_mem(struct qede_dev *edev,
-                             struct qede_rx_queue *rxq)
+static int qede_alloc_sge_mem(struct qede_dev *edev, struct qede_rx_queue *rxq)
 {
        dma_addr_t mapping;
        int i;
@@ -2811,15 +2789,14 @@ err:
 }
 
 /* This function allocates all memory needed per Rx queue */
-static int qede_alloc_mem_rxq(struct qede_dev *edev,
-                             struct qede_rx_queue *rxq)
+static int qede_alloc_mem_rxq(struct qede_dev *edev, struct qede_rx_queue *rxq)
 {
        int i, rc, size;
 
        rxq->num_rx_buffers = edev->q_num_rx_buffers;
 
-       rxq->rx_buf_size = NET_IP_ALIGN + ETH_OVERHEAD +
-                          edev->ndev->mtu;
+       rxq->rx_buf_size = NET_IP_ALIGN + ETH_OVERHEAD + edev->ndev->mtu;
+
        if (rxq->rx_buf_size > PAGE_SIZE)
                rxq->rx_buf_size = PAGE_SIZE;
 
@@ -2873,8 +2850,7 @@ err:
        return rc;
 }
 
-static void qede_free_mem_txq(struct qede_dev *edev,
-                             struct qede_tx_queue *txq)
+static void qede_free_mem_txq(struct qede_dev *edev, struct qede_tx_queue *txq)
 {
        /* Free the parallel SW ring */
        kfree(txq->sw_tx_ring);
@@ -2884,8 +2860,7 @@ static void qede_free_mem_txq(struct qede_dev *edev,
 }
 
 /* This function allocates all memory needed per Tx queue */
-static int qede_alloc_mem_txq(struct qede_dev *edev,
-                             struct qede_tx_queue *txq)
+static int qede_alloc_mem_txq(struct qede_dev *edev, struct qede_tx_queue *txq)
 {
        int size, rc;
        union eth_tx_bd_types *p_virt;
@@ -2917,8 +2892,7 @@ err:
 }
 
 /* This function frees all memory of a single fp */
-static void qede_free_mem_fp(struct qede_dev *edev,
-                            struct qede_fastpath *fp)
+static void qede_free_mem_fp(struct qede_dev *edev, struct qede_fastpath *fp)
 {
        int tc;
 
@@ -2933,8 +2907,7 @@ static void qede_free_mem_fp(struct qede_dev *edev,
 /* This function allocates all memory needed for a single fp (i.e. an entity
  * which contains status block, one rx queue and multiple per-TC tx queues.
  */
-static int qede_alloc_mem_fp(struct qede_dev *edev,
-                            struct qede_fastpath *fp)
+static int qede_alloc_mem_fp(struct qede_dev *edev, struct qede_fastpath *fp)
 {
        int rc, tc;
 
@@ -3146,8 +3119,7 @@ static int qede_setup_irqs(struct qede_dev *edev)
 }
 
 static int qede_drain_txq(struct qede_dev *edev,
-                         struct qede_tx_queue *txq,
-                         bool allow_drain)
+                         struct qede_tx_queue *txq, bool allow_drain)
 {
        int rc, cnt = 1000;
 
index 1e1cc0f..d4809ad 100644 (file)
@@ -1876,6 +1876,20 @@ static int ravb_set_gti(struct net_device *ndev)
        return 0;
 }
 
+static void ravb_set_config_mode(struct net_device *ndev)
+{
+       struct ravb_private *priv = netdev_priv(ndev);
+
+       if (priv->chip_id == RCAR_GEN2) {
+               ravb_modify(ndev, CCC, CCC_OPC, CCC_OPC_CONFIG);
+               /* Set CSEL value */
+               ravb_modify(ndev, CCC, CCC_CSEL, CCC_CSEL_HPB);
+       } else {
+               ravb_modify(ndev, CCC, CCC_OPC, CCC_OPC_CONFIG |
+                           CCC_GAC | CCC_CSEL_HPB);
+       }
+}
+
 static int ravb_probe(struct platform_device *pdev)
 {
        struct device_node *np = pdev->dev.of_node;
@@ -1978,14 +1992,7 @@ static int ravb_probe(struct platform_device *pdev)
        ndev->ethtool_ops = &ravb_ethtool_ops;
 
        /* Set AVB config mode */
-       if (chip_id == RCAR_GEN2) {
-               ravb_modify(ndev, CCC, CCC_OPC, CCC_OPC_CONFIG);
-               /* Set CSEL value */
-               ravb_modify(ndev, CCC, CCC_CSEL, CCC_CSEL_HPB);
-       } else {
-               ravb_modify(ndev, CCC, CCC_OPC, CCC_OPC_CONFIG |
-                           CCC_GAC | CCC_CSEL_HPB);
-       }
+       ravb_set_config_mode(ndev);
 
        /* Set GTI value */
        error = ravb_set_gti(ndev);
@@ -2097,6 +2104,54 @@ static int ravb_remove(struct platform_device *pdev)
 }
 
 #ifdef CONFIG_PM
+static int ravb_suspend(struct device *dev)
+{
+       struct net_device *ndev = dev_get_drvdata(dev);
+       int ret = 0;
+
+       if (netif_running(ndev)) {
+               netif_device_detach(ndev);
+               ret = ravb_close(ndev);
+       }
+
+       return ret;
+}
+
+static int ravb_resume(struct device *dev)
+{
+       struct net_device *ndev = dev_get_drvdata(dev);
+       struct ravb_private *priv = netdev_priv(ndev);
+       int ret = 0;
+
+       /* All register have been reset to default values.
+        * Restore all registers which where setup at probe time and
+        * reopen device if it was running before system suspended.
+        */
+
+       /* Set AVB config mode */
+       ravb_set_config_mode(ndev);
+
+       /* Set GTI value */
+       ret = ravb_set_gti(ndev);
+       if (ret)
+               return ret;
+
+       /* Request GTI loading */
+       ravb_modify(ndev, GCCR, GCCR_LTI, GCCR_LTI);
+
+       /* Restore descriptor base address table */
+       ravb_write(ndev, priv->desc_bat_dma, DBAT);
+
+       if (netif_running(ndev)) {
+               ret = ravb_open(ndev);
+               if (ret < 0)
+                       return ret;
+               netif_device_attach(ndev);
+       }
+
+       return ret;
+}
+
 static int ravb_runtime_nop(struct device *dev)
 {
        /* Runtime PM callback shared between ->runtime_suspend()
@@ -2110,6 +2165,7 @@ static int ravb_runtime_nop(struct device *dev)
 }
 
 static const struct dev_pm_ops ravb_dev_pm_ops = {
+       SET_SYSTEM_SLEEP_PM_OPS(ravb_suspend, ravb_resume)
        SET_RUNTIME_PM_OPS(ravb_runtime_nop, ravb_runtime_nop, NULL)
 };
 
index 799d58d..1f8240a 100644 (file)
@@ -1723,7 +1723,7 @@ out:
 static void sh_eth_adjust_link(struct net_device *ndev)
 {
        struct sh_eth_private *mdp = netdev_priv(ndev);
-       struct phy_device *phydev = mdp->phydev;
+       struct phy_device *phydev = ndev->phydev;
        int new_state = 0;
 
        if (phydev->link) {
@@ -1800,51 +1800,48 @@ static int sh_eth_phy_init(struct net_device *ndev)
 
        phy_attached_info(phydev);
 
-       mdp->phydev = phydev;
-
        return 0;
 }
 
 /* PHY control start function */
 static int sh_eth_phy_start(struct net_device *ndev)
 {
-       struct sh_eth_private *mdp = netdev_priv(ndev);
        int ret;
 
        ret = sh_eth_phy_init(ndev);
        if (ret)
                return ret;
 
-       phy_start(mdp->phydev);
+       phy_start(ndev->phydev);
 
        return 0;
 }
 
-static int sh_eth_get_settings(struct net_device *ndev,
-                              struct ethtool_cmd *ecmd)
+static int sh_eth_get_link_ksettings(struct net_device *ndev,
+                                    struct ethtool_link_ksettings *cmd)
 {
        struct sh_eth_private *mdp = netdev_priv(ndev);
        unsigned long flags;
        int ret;
 
-       if (!mdp->phydev)
+       if (!ndev->phydev)
                return -ENODEV;
 
        spin_lock_irqsave(&mdp->lock, flags);
-       ret = phy_ethtool_gset(mdp->phydev, ecmd);
+       ret = phy_ethtool_ksettings_get(ndev->phydev, cmd);
        spin_unlock_irqrestore(&mdp->lock, flags);
 
        return ret;
 }
 
-static int sh_eth_set_settings(struct net_device *ndev,
-                              struct ethtool_cmd *ecmd)
+static int sh_eth_set_link_ksettings(struct net_device *ndev,
+                                    const struct ethtool_link_ksettings *cmd)
 {
        struct sh_eth_private *mdp = netdev_priv(ndev);
        unsigned long flags;
        int ret;
 
-       if (!mdp->phydev)
+       if (!ndev->phydev)
                return -ENODEV;
 
        spin_lock_irqsave(&mdp->lock, flags);
@@ -1852,11 +1849,11 @@ static int sh_eth_set_settings(struct net_device *ndev,
        /* disable tx and rx */
        sh_eth_rcv_snd_disable(ndev);
 
-       ret = phy_ethtool_sset(mdp->phydev, ecmd);
+       ret = phy_ethtool_ksettings_set(ndev->phydev, cmd);
        if (ret)
                goto error_exit;
 
-       if (ecmd->duplex == DUPLEX_FULL)
+       if (cmd->base.duplex == DUPLEX_FULL)
                mdp->duplex = 1;
        else
                mdp->duplex = 0;
@@ -2067,11 +2064,11 @@ static int sh_eth_nway_reset(struct net_device *ndev)
        unsigned long flags;
        int ret;
 
-       if (!mdp->phydev)
+       if (!ndev->phydev)
                return -ENODEV;
 
        spin_lock_irqsave(&mdp->lock, flags);
-       ret = phy_start_aneg(mdp->phydev);
+       ret = phy_start_aneg(ndev->phydev);
        spin_unlock_irqrestore(&mdp->lock, flags);
 
        return ret;
@@ -2198,8 +2195,6 @@ static int sh_eth_set_ringparam(struct net_device *ndev,
 }
 
 static const struct ethtool_ops sh_eth_ethtool_ops = {
-       .get_settings   = sh_eth_get_settings,
-       .set_settings   = sh_eth_set_settings,
        .get_regs_len   = sh_eth_get_regs_len,
        .get_regs       = sh_eth_get_regs,
        .nway_reset     = sh_eth_nway_reset,
@@ -2211,6 +2206,8 @@ static const struct ethtool_ops sh_eth_ethtool_ops = {
        .get_sset_count     = sh_eth_get_sset_count,
        .get_ringparam  = sh_eth_get_ringparam,
        .set_ringparam  = sh_eth_set_ringparam,
+       .get_link_ksettings = sh_eth_get_link_ksettings,
+       .set_link_ksettings = sh_eth_set_link_ksettings,
 };
 
 /* network device open function */
@@ -2408,10 +2405,9 @@ static int sh_eth_close(struct net_device *ndev)
        sh_eth_dev_exit(ndev);
 
        /* PHY Disconnect */
-       if (mdp->phydev) {
-               phy_stop(mdp->phydev);
-               phy_disconnect(mdp->phydev);
-               mdp->phydev = NULL;
+       if (ndev->phydev) {
+               phy_stop(ndev->phydev);
+               phy_disconnect(ndev->phydev);
        }
 
        free_irq(ndev->irq, ndev);
@@ -2429,8 +2425,7 @@ static int sh_eth_close(struct net_device *ndev)
 /* ioctl to device function */
 static int sh_eth_do_ioctl(struct net_device *ndev, struct ifreq *rq, int cmd)
 {
-       struct sh_eth_private *mdp = netdev_priv(ndev);
-       struct phy_device *phydev = mdp->phydev;
+       struct phy_device *phydev = ndev->phydev;
 
        if (!netif_running(ndev))
                return -EINVAL;
index c62380e..d050f37 100644 (file)
@@ -518,7 +518,6 @@ struct sh_eth_private {
        /* MII transceiver section. */
        u32 phy_id;                     /* PHY ID */
        struct mii_bus *mii_bus;        /* MDIO bus control */
-       struct phy_device *phydev;      /* PHY device control */
        int link;
        phy_interface_t phy_interface;
        int msg_enable;
index f658fee..b8c9f18 100644 (file)
@@ -177,7 +177,7 @@ static int efx_ef10_get_vf_index(struct efx_nic *efx)
 
 static int efx_ef10_init_datapath_caps(struct efx_nic *efx)
 {
-       MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_CAPABILITIES_OUT_LEN);
+       MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_CAPABILITIES_V2_OUT_LEN);
        struct efx_ef10_nic_data *nic_data = efx->nic_data;
        size_t outlen;
        int rc;
@@ -188,7 +188,7 @@ static int efx_ef10_init_datapath_caps(struct efx_nic *efx)
                          outbuf, sizeof(outbuf), &outlen);
        if (rc)
                return rc;
-       if (outlen < sizeof(outbuf)) {
+       if (outlen < MC_CMD_GET_CAPABILITIES_OUT_LEN) {
                netif_err(efx, drv, efx->net_dev,
                          "unable to read datapath firmware capabilities\n");
                return -EIO;
@@ -197,6 +197,12 @@ static int efx_ef10_init_datapath_caps(struct efx_nic *efx)
        nic_data->datapath_caps =
                MCDI_DWORD(outbuf, GET_CAPABILITIES_OUT_FLAGS1);
 
+       if (outlen >= MC_CMD_GET_CAPABILITIES_V2_OUT_LEN)
+               nic_data->datapath_caps2 = MCDI_DWORD(outbuf,
+                               GET_CAPABILITIES_V2_OUT_FLAGS2);
+       else
+               nic_data->datapath_caps2 = 0;
+
        /* record the DPCPU firmware IDs to determine VEB vswitching support.
         */
        nic_data->rx_dpcpu_fw_id =
@@ -227,6 +233,116 @@ static int efx_ef10_get_sysclk_freq(struct efx_nic *efx)
        return rc > 0 ? rc : -ERANGE;
 }
 
+static int efx_ef10_get_timer_workarounds(struct efx_nic *efx)
+{
+       struct efx_ef10_nic_data *nic_data = efx->nic_data;
+       unsigned int implemented;
+       unsigned int enabled;
+       int rc;
+
+       nic_data->workaround_35388 = false;
+       nic_data->workaround_61265 = false;
+
+       rc = efx_mcdi_get_workarounds(efx, &implemented, &enabled);
+
+       if (rc == -ENOSYS) {
+               /* Firmware without GET_WORKAROUNDS - not a problem. */
+               rc = 0;
+       } else if (rc == 0) {
+               /* Bug61265 workaround is always enabled if implemented. */
+               if (enabled & MC_CMD_GET_WORKAROUNDS_OUT_BUG61265)
+                       nic_data->workaround_61265 = true;
+
+               if (enabled & MC_CMD_GET_WORKAROUNDS_OUT_BUG35388) {
+                       nic_data->workaround_35388 = true;
+               } else if (implemented & MC_CMD_GET_WORKAROUNDS_OUT_BUG35388) {
+                       /* Workaround is implemented but not enabled.
+                        * Try to enable it.
+                        */
+                       rc = efx_mcdi_set_workaround(efx,
+                                                    MC_CMD_WORKAROUND_BUG35388,
+                                                    true, NULL);
+                       if (rc == 0)
+                               nic_data->workaround_35388 = true;
+                       /* If we failed to set the workaround just carry on. */
+                       rc = 0;
+               }
+       }
+
+       netif_dbg(efx, probe, efx->net_dev,
+                 "workaround for bug 35388 is %sabled\n",
+                 nic_data->workaround_35388 ? "en" : "dis");
+       netif_dbg(efx, probe, efx->net_dev,
+                 "workaround for bug 61265 is %sabled\n",
+                 nic_data->workaround_61265 ? "en" : "dis");
+
+       return rc;
+}
+
+static void efx_ef10_process_timer_config(struct efx_nic *efx,
+                                         const efx_dword_t *data)
+{
+       unsigned int max_count;
+
+       if (EFX_EF10_WORKAROUND_61265(efx)) {
+               efx->timer_quantum_ns = MCDI_DWORD(data,
+                       GET_EVQ_TMR_PROPERTIES_OUT_MCDI_TMR_STEP_NS);
+               efx->timer_max_ns = MCDI_DWORD(data,
+                       GET_EVQ_TMR_PROPERTIES_OUT_MCDI_TMR_MAX_NS);
+       } else if (EFX_EF10_WORKAROUND_35388(efx)) {
+               efx->timer_quantum_ns = MCDI_DWORD(data,
+                       GET_EVQ_TMR_PROPERTIES_OUT_BUG35388_TMR_NS_PER_COUNT);
+               max_count = MCDI_DWORD(data,
+                       GET_EVQ_TMR_PROPERTIES_OUT_BUG35388_TMR_MAX_COUNT);
+               efx->timer_max_ns = max_count * efx->timer_quantum_ns;
+       } else {
+               efx->timer_quantum_ns = MCDI_DWORD(data,
+                       GET_EVQ_TMR_PROPERTIES_OUT_TMR_REG_NS_PER_COUNT);
+               max_count = MCDI_DWORD(data,
+                       GET_EVQ_TMR_PROPERTIES_OUT_TMR_REG_MAX_COUNT);
+               efx->timer_max_ns = max_count * efx->timer_quantum_ns;
+       }
+
+       netif_dbg(efx, probe, efx->net_dev,
+                 "got timer properties from MC: quantum %u ns; max %u ns\n",
+                 efx->timer_quantum_ns, efx->timer_max_ns);
+}
+
+static int efx_ef10_get_timer_config(struct efx_nic *efx)
+{
+       MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_EVQ_TMR_PROPERTIES_OUT_LEN);
+       int rc;
+
+       rc = efx_ef10_get_timer_workarounds(efx);
+       if (rc)
+               return rc;
+
+       rc = efx_mcdi_rpc_quiet(efx, MC_CMD_GET_EVQ_TMR_PROPERTIES, NULL, 0,
+                               outbuf, sizeof(outbuf), NULL);
+
+       if (rc == 0) {
+               efx_ef10_process_timer_config(efx, outbuf);
+       } else if (rc == -ENOSYS || rc == -EPERM) {
+               /* Not available - fall back to Huntington defaults. */
+               unsigned int quantum;
+
+               rc = efx_ef10_get_sysclk_freq(efx);
+               if (rc < 0)
+                       return rc;
+
+               quantum = 1536000 / rc; /* 1536 cycles */
+               efx->timer_quantum_ns = quantum;
+               efx->timer_max_ns = efx->type->timer_period_max * quantum;
+               rc = 0;
+       } else {
+               efx_mcdi_display_error(efx, MC_CMD_GET_EVQ_TMR_PROPERTIES,
+                                      MC_CMD_GET_EVQ_TMR_PROPERTIES_OUT_LEN,
+                                      NULL, 0, rc);
+       }
+
+       return rc;
+}
+
 static int efx_ef10_get_mac_address_pf(struct efx_nic *efx, u8 *mac_address)
 {
        MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_MAC_ADDRESSES_OUT_LEN);
@@ -527,33 +643,11 @@ static int efx_ef10_probe(struct efx_nic *efx)
        if (rc)
                goto fail5;
 
-       rc = efx_ef10_get_sysclk_freq(efx);
+       rc = efx_ef10_get_timer_config(efx);
        if (rc < 0)
                goto fail5;
        efx->timer_quantum_ns = 1536000 / rc; /* 1536 cycles */
 
-       /* Check whether firmware supports bug 35388 workaround.
-        * First try to enable it, then if we get EPERM, just
-        * ask if it's already enabled
-        */
-       rc = efx_mcdi_set_workaround(efx, MC_CMD_WORKAROUND_BUG35388, true, NULL);
-       if (rc == 0) {
-               nic_data->workaround_35388 = true;
-       } else if (rc == -EPERM) {
-               unsigned int enabled;
-
-               rc = efx_mcdi_get_workarounds(efx, NULL, &enabled);
-               if (rc)
-                       goto fail3;
-               nic_data->workaround_35388 = enabled &
-                       MC_CMD_GET_WORKAROUNDS_OUT_BUG35388;
-       } else if (rc != -ENOSYS && rc != -ENOENT) {
-               goto fail5;
-       }
-       netif_dbg(efx, probe, efx->net_dev,
-                 "workaround for bug 35388 is %sabled\n",
-                 nic_data->workaround_35388 ? "en" : "dis");
-
        rc = efx_mcdi_mon_probe(efx);
        if (rc && rc != -EPERM)
                goto fail5;
@@ -1743,27 +1837,43 @@ static size_t efx_ef10_update_stats_vf(struct efx_nic *efx, u64 *full_stats,
 static void efx_ef10_push_irq_moderation(struct efx_channel *channel)
 {
        struct efx_nic *efx = channel->efx;
-       unsigned int mode, value;
+       unsigned int mode, usecs;
        efx_dword_t timer_cmd;
 
-       if (channel->irq_moderation) {
+       if (channel->irq_moderation_us) {
                mode = 3;
-               value = channel->irq_moderation - 1;
+               usecs = channel->irq_moderation_us;
        } else {
                mode = 0;
-               value = 0;
+               usecs = 0;
        }
 
-       if (EFX_EF10_WORKAROUND_35388(efx)) {
+       if (EFX_EF10_WORKAROUND_61265(efx)) {
+               MCDI_DECLARE_BUF(inbuf, MC_CMD_SET_EVQ_TMR_IN_LEN);
+               unsigned int ns = usecs * 1000;
+
+               MCDI_SET_DWORD(inbuf, SET_EVQ_TMR_IN_INSTANCE,
+                              channel->channel);
+               MCDI_SET_DWORD(inbuf, SET_EVQ_TMR_IN_TMR_LOAD_REQ_NS, ns);
+               MCDI_SET_DWORD(inbuf, SET_EVQ_TMR_IN_TMR_RELOAD_REQ_NS, ns);
+               MCDI_SET_DWORD(inbuf, SET_EVQ_TMR_IN_TMR_MODE, mode);
+
+               efx_mcdi_rpc_async(efx, MC_CMD_SET_EVQ_TMR,
+                                  inbuf, sizeof(inbuf), 0, NULL, 0);
+       } else if (EFX_EF10_WORKAROUND_35388(efx)) {
+               unsigned int ticks = efx_usecs_to_ticks(efx, usecs);
+
                EFX_POPULATE_DWORD_3(timer_cmd, ERF_DD_EVQ_IND_TIMER_FLAGS,
                                     EFE_DD_EVQ_IND_TIMER_FLAGS,
                                     ERF_DD_EVQ_IND_TIMER_MODE, mode,
-                                    ERF_DD_EVQ_IND_TIMER_VAL, value);
+                                    ERF_DD_EVQ_IND_TIMER_VAL, ticks);
                efx_writed_page(efx, &timer_cmd, ER_DD_EVQ_INDIRECT,
                                channel->channel);
        } else {
+               unsigned int ticks = efx_usecs_to_ticks(efx, usecs);
+
                EFX_POPULATE_DWORD_2(timer_cmd, ERF_DZ_TC_TIMER_MODE, mode,
-                                    ERF_DZ_TC_TIMER_VAL, value);
+                                    ERF_DZ_TC_TIMER_VAL, ticks);
                efx_writed_page(efx, &timer_cmd, ER_DZ_EVQ_TMR,
                                channel->channel);
        }
@@ -2535,13 +2645,12 @@ fail:
 static int efx_ef10_ev_init(struct efx_channel *channel)
 {
        MCDI_DECLARE_BUF(inbuf,
-                        MC_CMD_INIT_EVQ_IN_LEN(EFX_MAX_EVQ_SIZE * 8 /
-                                               EFX_BUF_SIZE));
-       MCDI_DECLARE_BUF(outbuf, MC_CMD_INIT_EVQ_OUT_LEN);
+                        MC_CMD_INIT_EVQ_V2_IN_LEN(EFX_MAX_EVQ_SIZE * 8 /
+                                                  EFX_BUF_SIZE));
+       MCDI_DECLARE_BUF(outbuf, MC_CMD_INIT_EVQ_V2_OUT_LEN);
        size_t entries = channel->eventq.buf.len / EFX_BUF_SIZE;
        struct efx_nic *efx = channel->efx;
        struct efx_ef10_nic_data *nic_data;
-       bool supports_rx_merge;
        size_t inlen, outlen;
        unsigned int enabled, implemented;
        dma_addr_t dma_addr;
@@ -2549,9 +2658,6 @@ static int efx_ef10_ev_init(struct efx_channel *channel)
        int i;
 
        nic_data = efx->nic_data;
-       supports_rx_merge =
-               !!(nic_data->datapath_caps &
-                  1 << MC_CMD_GET_CAPABILITIES_OUT_RX_BATCHING_LBN);
 
        /* Fill event queue with all ones (i.e. empty events) */
        memset(channel->eventq.buf.addr, 0xff, channel->eventq.buf.len);
@@ -2560,11 +2666,6 @@ static int efx_ef10_ev_init(struct efx_channel *channel)
        MCDI_SET_DWORD(inbuf, INIT_EVQ_IN_INSTANCE, channel->channel);
        /* INIT_EVQ expects index in vector table, not absolute */
        MCDI_SET_DWORD(inbuf, INIT_EVQ_IN_IRQ_NUM, channel->channel);
-       MCDI_POPULATE_DWORD_4(inbuf, INIT_EVQ_IN_FLAGS,
-                             INIT_EVQ_IN_FLAG_INTERRUPTING, 1,
-                             INIT_EVQ_IN_FLAG_RX_MERGE, 1,
-                             INIT_EVQ_IN_FLAG_TX_MERGE, 1,
-                             INIT_EVQ_IN_FLAG_CUT_THRU, !supports_rx_merge);
        MCDI_SET_DWORD(inbuf, INIT_EVQ_IN_TMR_MODE,
                       MC_CMD_INIT_EVQ_IN_TMR_MODE_DIS);
        MCDI_SET_DWORD(inbuf, INIT_EVQ_IN_TMR_LOAD, 0);
@@ -2573,6 +2674,27 @@ static int efx_ef10_ev_init(struct efx_channel *channel)
                       MC_CMD_INIT_EVQ_IN_COUNT_MODE_DIS);
        MCDI_SET_DWORD(inbuf, INIT_EVQ_IN_COUNT_THRSHLD, 0);
 
+       if (nic_data->datapath_caps2 &
+           1 << MC_CMD_GET_CAPABILITIES_V2_OUT_INIT_EVQ_V2_LBN) {
+               /* Use the new generic approach to specifying event queue
+                * configuration, requesting lower latency or higher throughput.
+                * The options that actually get used appear in the output.
+                */
+               MCDI_POPULATE_DWORD_2(inbuf, INIT_EVQ_V2_IN_FLAGS,
+                                     INIT_EVQ_V2_IN_FLAG_INTERRUPTING, 1,
+                                     INIT_EVQ_V2_IN_FLAG_TYPE,
+                                     MC_CMD_INIT_EVQ_V2_IN_FLAG_TYPE_AUTO);
+       } else {
+               bool cut_thru = !(nic_data->datapath_caps &
+                       1 << MC_CMD_GET_CAPABILITIES_OUT_RX_BATCHING_LBN);
+
+               MCDI_POPULATE_DWORD_4(inbuf, INIT_EVQ_IN_FLAGS,
+                                     INIT_EVQ_IN_FLAG_INTERRUPTING, 1,
+                                     INIT_EVQ_IN_FLAG_RX_MERGE, 1,
+                                     INIT_EVQ_IN_FLAG_TX_MERGE, 1,
+                                     INIT_EVQ_IN_FLAG_CUT_THRU, cut_thru);
+       }
+
        dma_addr = channel->eventq.buf.dma_addr;
        for (i = 0; i < entries; ++i) {
                MCDI_SET_ARRAY_QWORD(inbuf, INIT_EVQ_IN_DMA_ADDR, i, dma_addr);
@@ -2583,6 +2705,13 @@ static int efx_ef10_ev_init(struct efx_channel *channel)
 
        rc = efx_mcdi_rpc(efx, MC_CMD_INIT_EVQ, inbuf, inlen,
                          outbuf, sizeof(outbuf), &outlen);
+
+       if (outlen >= MC_CMD_INIT_EVQ_V2_OUT_LEN)
+               netif_dbg(efx, drv, efx->net_dev,
+                         "Channel %d using event queue flags %08x\n",
+                         channel->channel,
+                         MCDI_DWORD(outbuf, INIT_EVQ_V2_OUT_FLAGS));
+
        /* IRQ return is ignored */
        if (channel->channel || rc)
                return rc;
@@ -2590,8 +2719,8 @@ static int efx_ef10_ev_init(struct efx_channel *channel)
        /* Successfully created event queue on channel 0 */
        rc = efx_mcdi_get_workarounds(efx, &implemented, &enabled);
        if (rc == -ENOSYS) {
-               /* GET_WORKAROUNDS was implemented before the bug26807
-                * workaround, thus the latter must be unavailable in this fw
+               /* GET_WORKAROUNDS was implemented before this workaround,
+                * thus it must be unavailable in this firmware.
                 */
                nic_data->workaround_26807 = false;
                rc = 0;
index 14b821b..f3826ae 100644 (file)
@@ -281,6 +281,27 @@ static int efx_process_channel(struct efx_channel *channel, int budget)
  * NAPI guarantees serialisation of polls of the same device, which
  * provides the guarantee required by efx_process_channel().
  */
+static void efx_update_irq_mod(struct efx_nic *efx, struct efx_channel *channel)
+{
+       int step = efx->irq_mod_step_us;
+
+       if (channel->irq_mod_score < irq_adapt_low_thresh) {
+               if (channel->irq_moderation_us > step) {
+                       channel->irq_moderation_us -= step;
+                       efx->type->push_irq_moderation(channel);
+               }
+       } else if (channel->irq_mod_score > irq_adapt_high_thresh) {
+               if (channel->irq_moderation_us <
+                   efx->irq_rx_moderation_us) {
+                       channel->irq_moderation_us += step;
+                       efx->type->push_irq_moderation(channel);
+               }
+       }
+
+       channel->irq_count = 0;
+       channel->irq_mod_score = 0;
+}
+
 static int efx_poll(struct napi_struct *napi, int budget)
 {
        struct efx_channel *channel =
@@ -301,22 +322,7 @@ static int efx_poll(struct napi_struct *napi, int budget)
                if (efx_channel_has_rx_queue(channel) &&
                    efx->irq_rx_adaptive &&
                    unlikely(++channel->irq_count == 1000)) {
-                       if (unlikely(channel->irq_mod_score <
-                                    irq_adapt_low_thresh)) {
-                               if (channel->irq_moderation > 1) {
-                                       channel->irq_moderation -= 1;
-                                       efx->type->push_irq_moderation(channel);
-                               }
-                       } else if (unlikely(channel->irq_mod_score >
-                                           irq_adapt_high_thresh)) {
-                               if (channel->irq_moderation <
-                                   efx->irq_rx_moderation) {
-                                       channel->irq_moderation += 1;
-                                       efx->type->push_irq_moderation(channel);
-                               }
-                       }
-                       channel->irq_count = 0;
-                       channel->irq_mod_score = 0;
+                       efx_update_irq_mod(efx, channel);
                }
 
                efx_filter_rfs_expire(channel);
@@ -1703,6 +1709,7 @@ static int efx_probe_nic(struct efx_nic *efx)
        netif_set_real_num_rx_queues(efx->net_dev, efx->n_rx_channels);
 
        /* Initialise the interrupt moderation settings */
+       efx->irq_mod_step_us = DIV_ROUND_UP(efx->timer_quantum_ns, 1000);
        efx_init_irq_moderation(efx, tx_irq_mod_usec, rx_irq_mod_usec, true,
                                true);
 
@@ -1949,14 +1956,21 @@ static void efx_remove_all(struct efx_nic *efx)
  * Interrupt moderation
  *
  **************************************************************************/
-
-static unsigned int irq_mod_ticks(unsigned int usecs, unsigned int quantum_ns)
+unsigned int efx_usecs_to_ticks(struct efx_nic *efx, unsigned int usecs)
 {
        if (usecs == 0)
                return 0;
-       if (usecs * 1000 < quantum_ns)
+       if (usecs * 1000 < efx->timer_quantum_ns)
                return 1; /* never round down to 0 */
-       return usecs * 1000 / quantum_ns;
+       return usecs * 1000 / efx->timer_quantum_ns;
+}
+
+unsigned int efx_ticks_to_usecs(struct efx_nic *efx, unsigned int ticks)
+{
+       /* We must round up when converting ticks to microseconds
+        * because we round down when converting the other way.
+        */
+       return DIV_ROUND_UP(ticks * efx->timer_quantum_ns, 1000);
 }
 
 /* Set interrupt moderation parameters */
@@ -1965,21 +1979,16 @@ int efx_init_irq_moderation(struct efx_nic *efx, unsigned int tx_usecs,
                            bool rx_may_override_tx)
 {
        struct efx_channel *channel;
-       unsigned int irq_mod_max = DIV_ROUND_UP(efx->type->timer_period_max *
-                                               efx->timer_quantum_ns,
-                                               1000);
-       unsigned int tx_ticks;
-       unsigned int rx_ticks;
+       unsigned int timer_max_us;
 
        EFX_ASSERT_RESET_SERIALISED(efx);
 
-       if (tx_usecs > irq_mod_max || rx_usecs > irq_mod_max)
-               return -EINVAL;
+       timer_max_us = efx->timer_max_ns / 1000;
 
-       tx_ticks = irq_mod_ticks(tx_usecs, efx->timer_quantum_ns);
-       rx_ticks = irq_mod_ticks(rx_usecs, efx->timer_quantum_ns);
+       if (tx_usecs > timer_max_us || rx_usecs > timer_max_us)
+               return -EINVAL;
 
-       if (tx_ticks != rx_ticks && efx->tx_channel_offset == 0 &&
+       if (tx_usecs != rx_usecs && efx->tx_channel_offset == 0 &&
            !rx_may_override_tx) {
                netif_err(efx, drv, efx->net_dev, "Channels are shared. "
                          "RX and TX IRQ moderation must be equal\n");
@@ -1987,12 +1996,12 @@ int efx_init_irq_moderation(struct efx_nic *efx, unsigned int tx_usecs,
        }
 
        efx->irq_rx_adaptive = rx_adaptive;
-       efx->irq_rx_moderation = rx_ticks;
+       efx->irq_rx_moderation_us = rx_usecs;
        efx_for_each_channel(channel, efx) {
                if (efx_channel_has_rx_queue(channel))
-                       channel->irq_moderation = rx_ticks;
+                       channel->irq_moderation_us = rx_usecs;
                else if (efx_channel_has_tx_queues(channel))
-                       channel->irq_moderation = tx_ticks;
+                       channel->irq_moderation_us = tx_usecs;
        }
 
        return 0;
@@ -2001,26 +2010,21 @@ int efx_init_irq_moderation(struct efx_nic *efx, unsigned int tx_usecs,
 void efx_get_irq_moderation(struct efx_nic *efx, unsigned int *tx_usecs,
                            unsigned int *rx_usecs, bool *rx_adaptive)
 {
-       /* We must round up when converting ticks to microseconds
-        * because we round down when converting the other way.
-        */
-
        *rx_adaptive = efx->irq_rx_adaptive;
-       *rx_usecs = DIV_ROUND_UP(efx->irq_rx_moderation *
-                                efx->timer_quantum_ns,
-                                1000);
+       *rx_usecs = efx->irq_rx_moderation_us;
 
        /* If channels are shared between RX and TX, so is IRQ
         * moderation.  Otherwise, IRQ moderation is the same for all
         * TX channels and is not adaptive.
         */
-       if (efx->tx_channel_offset == 0)
+       if (efx->tx_channel_offset == 0) {
                *tx_usecs = *rx_usecs;
-       else
-               *tx_usecs = DIV_ROUND_UP(
-                       efx->channel[efx->tx_channel_offset]->irq_moderation *
-                       efx->timer_quantum_ns,
-                       1000);
+       } else {
+               struct efx_channel *tx_channel;
+
+               tx_channel = efx->channel[efx->tx_channel_offset];
+               *tx_usecs = tx_channel->irq_moderation_us;
+       }
 }
 
 /**************************************************************************
index c3ae739..342ae16 100644 (file)
@@ -204,6 +204,8 @@ int efx_try_recovery(struct efx_nic *efx);
 
 /* Global */
 void efx_schedule_reset(struct efx_nic *efx, enum reset_type type);
+unsigned int efx_usecs_to_ticks(struct efx_nic *efx, unsigned int usecs);
+unsigned int efx_ticks_to_usecs(struct efx_nic *efx, unsigned int ticks);
 int efx_init_irq_moderation(struct efx_nic *efx, unsigned int tx_usecs,
                            unsigned int rx_usecs, bool rx_adaptive,
                            bool rx_may_override_tx);
index d790cb8..1a70926 100644 (file)
@@ -378,12 +378,15 @@ static void falcon_push_irq_moderation(struct efx_channel *channel)
        struct efx_nic *efx = channel->efx;
 
        /* Set timer register */
-       if (channel->irq_moderation) {
+       if (channel->irq_moderation_us) {
+               unsigned int ticks;
+
+               ticks = efx_usecs_to_ticks(efx, channel->irq_moderation_us);
                EFX_POPULATE_DWORD_2(timer_cmd,
                                     FRF_AB_TC_TIMER_MODE,
                                     FFE_BB_TIMER_MODE_INT_HLDOFF,
                                     FRF_AB_TC_TIMER_VAL,
-                                    channel->irq_moderation - 1);
+                                    ticks - 1);
        } else {
                EFX_POPULATE_DWORD_2(timer_cmd,
                                     FRF_AB_TC_TIMER_MODE,
@@ -2373,6 +2376,8 @@ static int falcon_probe_nic(struct efx_nic *efx)
                             EFX_MAX_CHANNELS);
        efx->max_tx_channels = efx->max_channels;
        efx->timer_quantum_ns = 4968; /* 621 cycles */
+       efx->timer_max_ns = efx->type->timer_period_max *
+                           efx->timer_quantum_ns;
 
        /* Initialise I2C adapter */
        board = falcon_board(efx);
index d28e7dd..9fbc12a 100644 (file)
@@ -548,7 +548,10 @@ static bool efx_mcdi_complete_async(struct efx_mcdi_iface *mcdi, bool timeout)
                efx_mcdi_display_error(efx, async->cmd, async->inlen, errbuf,
                                       err_len, rc);
        }
-       async->complete(efx, async->cookie, rc, outbuf, data_len);
+
+       if (async->complete)
+               async->complete(efx, async->cookie, rc, outbuf,
+                               min(async->outlen, data_len));
        kfree(async);
 
        efx_mcdi_release(mcdi);
index c9a5b00..ccceafc 100644 (file)
 #define          MC_CMD_POLL_BIST_MEM_BUS_MC 0x0
 /* enum: CSR IREG bus. */
 #define          MC_CMD_POLL_BIST_MEM_BUS_CSR 0x1
-/* enum: RX DPCPU bus. */
+/* enum: RX0 DPCPU bus. */
 #define          MC_CMD_POLL_BIST_MEM_BUS_DPCPU_RX 0x2
 /* enum: TX0 DPCPU bus. */
 #define          MC_CMD_POLL_BIST_MEM_BUS_DPCPU_TX0 0x3
 /* enum: TX1 DPCPU bus. */
 #define          MC_CMD_POLL_BIST_MEM_BUS_DPCPU_TX1 0x4
-/* enum: RX DICPU bus. */
+/* enum: RX0 DICPU bus. */
 #define          MC_CMD_POLL_BIST_MEM_BUS_DICPU_RX 0x5
 /* enum: TX DICPU bus. */
 #define          MC_CMD_POLL_BIST_MEM_BUS_DICPU_TX 0x6
+/* enum: RX1 DPCPU bus. */
+#define          MC_CMD_POLL_BIST_MEM_BUS_DPCPU_RX1 0x7
+/* enum: RX1 DICPU bus. */
+#define          MC_CMD_POLL_BIST_MEM_BUS_DICPU_RX1 0x8
 /* Pattern written to RAM / register */
 #define       MC_CMD_POLL_BIST_OUT_MEM_EXPECT_OFST 16
 /* Actual value read from RAM / register */
 #define        MC_CMD_NVRAM_INFO_OUT_PROTECTED_WIDTH 1
 #define        MC_CMD_NVRAM_INFO_OUT_TLV_LBN 1
 #define        MC_CMD_NVRAM_INFO_OUT_TLV_WIDTH 1
+#define        MC_CMD_NVRAM_INFO_OUT_CMAC_LBN 6
+#define        MC_CMD_NVRAM_INFO_OUT_CMAC_WIDTH 1
 #define        MC_CMD_NVRAM_INFO_OUT_A_B_LBN 7
 #define        MC_CMD_NVRAM_INFO_OUT_A_B_WIDTH 1
 #define       MC_CMD_NVRAM_INFO_OUT_PHYSDEV_OFST 16
  * the command will fail with MC_CMD_ERR_FILTERS_PRESENT.
  */
 #define          MC_CMD_WORKAROUND_BUG26807 0x6
+/* enum: Bug 61265 work around (broken EVQ TMR writes). */
+#define          MC_CMD_WORKAROUND_BUG61265 0x7
 /* 0 = disable the workaround indicated by TYPE; any non-zero value = enable
  * the workaround
  */
  * (GET_PHY_CFG_OUT_MEDIA_TYPE); the valid 'page number' input values, and the
  * output data, are interpreted on a per-type basis. For SFP+: PAGE=0 or 1
  * returns a 128-byte block read from module I2C address 0xA0 offset 0 or 0x80.
- * Anything else: currently undefined. Locks required: None. Return code: 0.
  */
 #define MC_CMD_GET_PHY_MEDIA_INFO 0x4b
 
 #define        LICENSED_V3_FEATURES_TX_SNIFF_WIDTH 1
 #define        LICENSED_V3_FEATURES_PROXY_FILTER_OPS_LBN 8
 #define        LICENSED_V3_FEATURES_PROXY_FILTER_OPS_WIDTH 1
+#define        LICENSED_V3_FEATURES_EVENT_CUT_THROUGH_LBN 9
+#define        LICENSED_V3_FEATURES_EVENT_CUT_THROUGH_WIDTH 1
 #define       LICENSED_V3_FEATURES_MASK_LBN 0
 #define       LICENSED_V3_FEATURES_MASK_WIDTH 64
 
 /* Only valid if INTRFLAG was true */
 #define       MC_CMD_INIT_EVQ_OUT_IRQ_OFST 0
 
+/* MC_CMD_INIT_EVQ_V2_IN msgrequest */
+#define    MC_CMD_INIT_EVQ_V2_IN_LENMIN 44
+#define    MC_CMD_INIT_EVQ_V2_IN_LENMAX 548
+#define    MC_CMD_INIT_EVQ_V2_IN_LEN(num) (36+8*(num))
+/* Size, in entries */
+#define       MC_CMD_INIT_EVQ_V2_IN_SIZE_OFST 0
+/* Desired instance. Must be set to a specific instance, which is a function
+ * local queue index.
+ */
+#define       MC_CMD_INIT_EVQ_V2_IN_INSTANCE_OFST 4
+/* The initial timer value. The load value is ignored if the timer mode is DIS.
+ */
+#define       MC_CMD_INIT_EVQ_V2_IN_TMR_LOAD_OFST 8
+/* The reload value is ignored in one-shot modes */
+#define       MC_CMD_INIT_EVQ_V2_IN_TMR_RELOAD_OFST 12
+/* tbd */
+#define       MC_CMD_INIT_EVQ_V2_IN_FLAGS_OFST 16
+#define        MC_CMD_INIT_EVQ_V2_IN_FLAG_INTERRUPTING_LBN 0
+#define        MC_CMD_INIT_EVQ_V2_IN_FLAG_INTERRUPTING_WIDTH 1
+#define        MC_CMD_INIT_EVQ_V2_IN_FLAG_RPTR_DOS_LBN 1
+#define        MC_CMD_INIT_EVQ_V2_IN_FLAG_RPTR_DOS_WIDTH 1
+#define        MC_CMD_INIT_EVQ_V2_IN_FLAG_INT_ARMD_LBN 2
+#define        MC_CMD_INIT_EVQ_V2_IN_FLAG_INT_ARMD_WIDTH 1
+#define        MC_CMD_INIT_EVQ_V2_IN_FLAG_CUT_THRU_LBN 3
+#define        MC_CMD_INIT_EVQ_V2_IN_FLAG_CUT_THRU_WIDTH 1
+#define        MC_CMD_INIT_EVQ_V2_IN_FLAG_RX_MERGE_LBN 4
+#define        MC_CMD_INIT_EVQ_V2_IN_FLAG_RX_MERGE_WIDTH 1
+#define        MC_CMD_INIT_EVQ_V2_IN_FLAG_TX_MERGE_LBN 5
+#define        MC_CMD_INIT_EVQ_V2_IN_FLAG_TX_MERGE_WIDTH 1
+#define        MC_CMD_INIT_EVQ_V2_IN_FLAG_USE_TIMER_LBN 6
+#define        MC_CMD_INIT_EVQ_V2_IN_FLAG_USE_TIMER_WIDTH 1
+#define        MC_CMD_INIT_EVQ_V2_IN_FLAG_TYPE_LBN 7
+#define        MC_CMD_INIT_EVQ_V2_IN_FLAG_TYPE_WIDTH 4
+/* enum: All initialisation flags specified by host. */
+#define          MC_CMD_INIT_EVQ_V2_IN_FLAG_TYPE_MANUAL 0x0
+/* enum: MEDFORD only. Certain initialisation flags specified by host may be
+ * over-ridden by firmware based on licenses and firmware variant in order to
+ * provide the lowest latency achievable. See
+ * MC_CMD_INIT_EVQ_V2/MC_CMD_INIT_EVQ_V2_OUT/FLAGS for list of affected flags.
+ */
+#define          MC_CMD_INIT_EVQ_V2_IN_FLAG_TYPE_LOW_LATENCY 0x1
+/* enum: MEDFORD only. Certain initialisation flags specified by host may be
+ * over-ridden by firmware based on licenses and firmware variant in order to
+ * provide the best throughput achievable. See
+ * MC_CMD_INIT_EVQ_V2/MC_CMD_INIT_EVQ_V2_OUT/FLAGS for list of affected flags.
+ */
+#define          MC_CMD_INIT_EVQ_V2_IN_FLAG_TYPE_THROUGHPUT 0x2
+/* enum: MEDFORD only. Certain initialisation flags may be over-ridden by
+ * firmware based on licenses and firmware variant. See
+ * MC_CMD_INIT_EVQ_V2/MC_CMD_INIT_EVQ_V2_OUT/FLAGS for list of affected flags.
+ */
+#define          MC_CMD_INIT_EVQ_V2_IN_FLAG_TYPE_AUTO 0x3
+#define       MC_CMD_INIT_EVQ_V2_IN_TMR_MODE_OFST 20
+/* enum: Disabled */
+#define          MC_CMD_INIT_EVQ_V2_IN_TMR_MODE_DIS 0x0
+/* enum: Immediate */
+#define          MC_CMD_INIT_EVQ_V2_IN_TMR_IMMED_START 0x1
+/* enum: Triggered */
+#define          MC_CMD_INIT_EVQ_V2_IN_TMR_TRIG_START 0x2
+/* enum: Hold-off */
+#define          MC_CMD_INIT_EVQ_V2_IN_TMR_INT_HLDOFF 0x3
+/* Target EVQ for wakeups if in wakeup mode. */
+#define       MC_CMD_INIT_EVQ_V2_IN_TARGET_EVQ_OFST 24
+/* Target interrupt if in interrupting mode (note union with target EVQ). Use
+ * MC_CMD_RESOURCE_INSTANCE_ANY unless a specific one required for test
+ * purposes.
+ */
+#define       MC_CMD_INIT_EVQ_V2_IN_IRQ_NUM_OFST 24
+/* Event Counter Mode. */
+#define       MC_CMD_INIT_EVQ_V2_IN_COUNT_MODE_OFST 28
+/* enum: Disabled */
+#define          MC_CMD_INIT_EVQ_V2_IN_COUNT_MODE_DIS 0x0
+/* enum: Disabled */
+#define          MC_CMD_INIT_EVQ_V2_IN_COUNT_MODE_RX 0x1
+/* enum: Disabled */
+#define          MC_CMD_INIT_EVQ_V2_IN_COUNT_MODE_TX 0x2
+/* enum: Disabled */
+#define          MC_CMD_INIT_EVQ_V2_IN_COUNT_MODE_RXTX 0x3
+/* Event queue packet count threshold. */
+#define       MC_CMD_INIT_EVQ_V2_IN_COUNT_THRSHLD_OFST 32
+/* 64-bit address of 4k of 4k-aligned host memory buffer */
+#define       MC_CMD_INIT_EVQ_V2_IN_DMA_ADDR_OFST 36
+#define       MC_CMD_INIT_EVQ_V2_IN_DMA_ADDR_LEN 8
+#define       MC_CMD_INIT_EVQ_V2_IN_DMA_ADDR_LO_OFST 36
+#define       MC_CMD_INIT_EVQ_V2_IN_DMA_ADDR_HI_OFST 40
+#define       MC_CMD_INIT_EVQ_V2_IN_DMA_ADDR_MINNUM 1
+#define       MC_CMD_INIT_EVQ_V2_IN_DMA_ADDR_MAXNUM 64
+
+/* MC_CMD_INIT_EVQ_V2_OUT msgresponse */
+#define    MC_CMD_INIT_EVQ_V2_OUT_LEN 8
+/* Only valid if INTRFLAG was true */
+#define       MC_CMD_INIT_EVQ_V2_OUT_IRQ_OFST 0
+/* Actual configuration applied on the card */
+#define       MC_CMD_INIT_EVQ_V2_OUT_FLAGS_OFST 4
+#define        MC_CMD_INIT_EVQ_V2_OUT_FLAG_CUT_THRU_LBN 0
+#define        MC_CMD_INIT_EVQ_V2_OUT_FLAG_CUT_THRU_WIDTH 1
+#define        MC_CMD_INIT_EVQ_V2_OUT_FLAG_RX_MERGE_LBN 1
+#define        MC_CMD_INIT_EVQ_V2_OUT_FLAG_RX_MERGE_WIDTH 1
+#define        MC_CMD_INIT_EVQ_V2_OUT_FLAG_TX_MERGE_LBN 2
+#define        MC_CMD_INIT_EVQ_V2_OUT_FLAG_TX_MERGE_WIDTH 1
+#define        MC_CMD_INIT_EVQ_V2_OUT_FLAG_RXQ_FORCE_EV_MERGING_LBN 3
+#define        MC_CMD_INIT_EVQ_V2_OUT_FLAG_RXQ_FORCE_EV_MERGING_WIDTH 1
+
 /* QUEUE_CRC_MODE structuredef */
 #define    QUEUE_CRC_MODE_LEN 1
 #define       QUEUE_CRC_MODE_MODE_LBN 0
 #define        MC_CMD_INIT_RXQ_IN_FLAG_PREFIX_WIDTH 1
 #define        MC_CMD_INIT_RXQ_IN_FLAG_DISABLE_SCATTER_LBN 9
 #define        MC_CMD_INIT_RXQ_IN_FLAG_DISABLE_SCATTER_WIDTH 1
-#define        MC_CMD_INIT_RXQ_IN_FLAG_FORCE_EV_MERGING_LBN 10
-#define        MC_CMD_INIT_RXQ_IN_FLAG_FORCE_EV_MERGING_WIDTH 1
+#define        MC_CMD_INIT_RXQ_IN_UNUSED_LBN 10
+#define        MC_CMD_INIT_RXQ_IN_UNUSED_WIDTH 1
 /* Owner ID to use if in buffer mode (zero if physical) */
 #define       MC_CMD_INIT_RXQ_IN_OWNER_ID_OFST 20
 /* The port ID associated with the v-adaptor which should contain this DMAQ. */
 #define        MC_CMD_GET_CAPABILITIES_V2_OUT_EVENT_CUT_THROUGH_WIDTH 1
 #define        MC_CMD_GET_CAPABILITIES_V2_OUT_RX_CUT_THROUGH_LBN 4
 #define        MC_CMD_GET_CAPABILITIES_V2_OUT_RX_CUT_THROUGH_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_TX_VFIFO_ULL_MODE_LBN 5
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_TX_VFIFO_ULL_MODE_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_MAC_STATS_40G_TX_SIZE_BINS_LBN 6
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_MAC_STATS_40G_TX_SIZE_BINS_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_INIT_EVQ_V2_LBN 7
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_INIT_EVQ_V2_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_TX_MAC_TIMESTAMPING_LBN 8
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_TX_MAC_TIMESTAMPING_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_TX_TIMESTAMP_LBN 9
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_TX_TIMESTAMP_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_RX_SNIFF_LBN 10
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_RX_SNIFF_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_TX_SNIFF_LBN 11
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_TX_SNIFF_WIDTH 1
 /* Number of FATSOv2 contexts per datapath supported by this NIC. Not present
  * on older firmware (check the length).
  */
 #define       MC_CMD_GET_CAPABILITIES_V2_OUT_SIZE_PIO_BUFF_OFST 70
 #define       MC_CMD_GET_CAPABILITIES_V2_OUT_SIZE_PIO_BUFF_LEN 2
 
+/* MC_CMD_GET_CAPABILITIES_V3_OUT msgresponse */
+#define    MC_CMD_GET_CAPABILITIES_V3_OUT_LEN 73
+/* First word of flags. */
+#define       MC_CMD_GET_CAPABILITIES_V3_OUT_FLAGS1_OFST 0
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_VPORT_RECONFIGURE_LBN 3
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_VPORT_RECONFIGURE_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_TX_STRIPING_LBN 4
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_TX_STRIPING_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_VADAPTOR_QUERY_LBN 5
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_VADAPTOR_QUERY_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_EVB_PORT_VLAN_RESTRICT_LBN 6
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_EVB_PORT_VLAN_RESTRICT_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_DRV_ATTACH_PREBOOT_LBN 7
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_DRV_ATTACH_PREBOOT_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_RX_FORCE_EVENT_MERGING_LBN 8
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_RX_FORCE_EVENT_MERGING_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_SET_MAC_ENHANCED_LBN 9
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_SET_MAC_ENHANCED_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_UNKNOWN_UCAST_DST_FILTER_ALWAYS_MULTI_RECIPIENT_LBN 10
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_UNKNOWN_UCAST_DST_FILTER_ALWAYS_MULTI_RECIPIENT_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_VADAPTOR_PERMIT_SET_MAC_WHEN_FILTERS_INSTALLED_LBN 11
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_VADAPTOR_PERMIT_SET_MAC_WHEN_FILTERS_INSTALLED_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_TX_MAC_SECURITY_FILTERING_LBN 12
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_TX_MAC_SECURITY_FILTERING_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_ADDITIONAL_RSS_MODES_LBN 13
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_ADDITIONAL_RSS_MODES_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_QBB_LBN 14
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_QBB_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_RX_PACKED_STREAM_VAR_BUFFERS_LBN 15
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_RX_PACKED_STREAM_VAR_BUFFERS_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_RX_RSS_LIMITED_LBN 16
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_RX_RSS_LIMITED_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_RX_PACKED_STREAM_LBN 17
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_RX_PACKED_STREAM_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_RX_INCLUDE_FCS_LBN 18
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_RX_INCLUDE_FCS_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_TX_VLAN_INSERTION_LBN 19
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_TX_VLAN_INSERTION_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_RX_VLAN_STRIPPING_LBN 20
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_RX_VLAN_STRIPPING_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_TX_TSO_LBN 21
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_TX_TSO_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_RX_PREFIX_LEN_0_LBN 22
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_RX_PREFIX_LEN_0_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_RX_PREFIX_LEN_14_LBN 23
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_RX_PREFIX_LEN_14_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_RX_TIMESTAMP_LBN 24
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_RX_TIMESTAMP_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_RX_BATCHING_LBN 25
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_RX_BATCHING_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_MCAST_FILTER_CHAINING_LBN 26
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_MCAST_FILTER_CHAINING_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_PM_AND_RXDP_COUNTERS_LBN 27
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_PM_AND_RXDP_COUNTERS_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_RX_DISABLE_SCATTER_LBN 28
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_RX_DISABLE_SCATTER_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_TX_MCAST_UDP_LOOPBACK_LBN 29
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_TX_MCAST_UDP_LOOPBACK_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_EVB_LBN 30
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_EVB_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_VXLAN_NVGRE_LBN 31
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_VXLAN_NVGRE_WIDTH 1
+/* RxDPCPU firmware id. */
+#define       MC_CMD_GET_CAPABILITIES_V3_OUT_RX_DPCPU_FW_ID_OFST 4
+#define       MC_CMD_GET_CAPABILITIES_V3_OUT_RX_DPCPU_FW_ID_LEN 2
+/* enum: Standard RXDP firmware */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP  0x0
+/* enum: Low latency RXDP firmware */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_LOW_LATENCY  0x1
+/* enum: Packed stream RXDP firmware */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_PACKED_STREAM  0x2
+/* enum: BIST RXDP firmware */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_BIST  0x10a
+/* enum: RXDP Test firmware image 1 */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_TEST_FW_TO_MC_CUT_THROUGH  0x101
+/* enum: RXDP Test firmware image 2 */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_TEST_FW_TO_MC_STORE_FORWARD  0x102
+/* enum: RXDP Test firmware image 3 */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_TEST_FW_TO_MC_STORE_FORWARD_FIRST  0x103
+/* enum: RXDP Test firmware image 4 */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_TEST_EVERY_EVENT_BATCHABLE  0x104
+/* enum: RXDP Test firmware image 5 */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_TEST_BACKPRESSURE  0x105
+/* enum: RXDP Test firmware image 6 */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_TEST_FW_PACKET_EDITS  0x106
+/* enum: RXDP Test firmware image 7 */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_TEST_FW_RX_HDR_SPLIT  0x107
+/* enum: RXDP Test firmware image 8 */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_TEST_FW_DISABLE_DL  0x108
+/* enum: RXDP Test firmware image 9 */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_TEST_FW_DOORBELL_DELAY  0x10b
+/* TxDPCPU firmware id. */
+#define       MC_CMD_GET_CAPABILITIES_V3_OUT_TX_DPCPU_FW_ID_OFST 6
+#define       MC_CMD_GET_CAPABILITIES_V3_OUT_TX_DPCPU_FW_ID_LEN 2
+/* enum: Standard TXDP firmware */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXDP  0x0
+/* enum: Low latency TXDP firmware */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXDP_LOW_LATENCY  0x1
+/* enum: High packet rate TXDP firmware */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXDP_HIGH_PACKET_RATE  0x3
+/* enum: BIST TXDP firmware */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXDP_BIST  0x12d
+/* enum: TXDP Test firmware image 1 */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXDP_TEST_FW_TSO_EDIT  0x101
+/* enum: TXDP Test firmware image 2 */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXDP_TEST_FW_PACKET_EDITS  0x102
+/* enum: TXDP CSR bus test firmware */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXDP_TEST_FW_CSR  0x103
+#define       MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_VERSION_OFST 8
+#define       MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_VERSION_LEN 2
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_VERSION_REV_LBN 0
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_VERSION_REV_WIDTH 12
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_VERSION_TYPE_LBN 12
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_VERSION_TYPE_WIDTH 4
+/* enum: reserved value - do not use (may indicate alternative interpretation
+ * of REV field in future)
+ */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_RESERVED  0x0
+/* enum: Trivial RX PD firmware for early Huntington development (Huntington
+ * development only)
+ */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_FIRST_PKT  0x1
+/* enum: RX PD firmware with approximately Siena-compatible behaviour
+ * (Huntington development only)
+ */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_SIENA_COMPAT  0x2
+/* enum: Virtual switching (full feature) RX PD production firmware */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_VSWITCH  0x3
+/* enum: siena_compat variant RX PD firmware using PM rather than MAC
+ * (Huntington development only)
+ */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_SIENA_COMPAT_PM  0x4
+/* enum: Low latency RX PD production firmware */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_LOW_LATENCY  0x5
+/* enum: Packed stream RX PD production firmware */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_PACKED_STREAM  0x6
+/* enum: RX PD firmware handling layer 2 only for high packet rate performance
+ * tests (Medford development only)
+ */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_LAYER2_PERF  0x7
+/* enum: Rules engine RX PD production firmware */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_RULES_ENGINE  0x8
+/* enum: RX PD firmware for GUE parsing prototype (Medford development only) */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_TESTFW_GUE_PROTOTYPE  0xe
+/* enum: RX PD firmware parsing but not filtering network overlay tunnel
+ * encapsulations (Medford development only)
+ */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_TESTFW_ENCAP_PARSING_ONLY  0xf
+#define       MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_VERSION_OFST 10
+#define       MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_VERSION_LEN 2
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_VERSION_REV_LBN 0
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_VERSION_REV_WIDTH 12
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_VERSION_TYPE_LBN 12
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_VERSION_TYPE_WIDTH 4
+/* enum: reserved value - do not use (may indicate alternative interpretation
+ * of REV field in future)
+ */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_TYPE_RESERVED  0x0
+/* enum: Trivial TX PD firmware for early Huntington development (Huntington
+ * development only)
+ */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_TYPE_FIRST_PKT  0x1
+/* enum: TX PD firmware with approximately Siena-compatible behaviour
+ * (Huntington development only)
+ */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_TYPE_SIENA_COMPAT  0x2
+/* enum: Virtual switching (full feature) TX PD production firmware */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_TYPE_VSWITCH  0x3
+/* enum: siena_compat variant TX PD firmware using PM rather than MAC
+ * (Huntington development only)
+ */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_TYPE_SIENA_COMPAT_PM  0x4
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_TYPE_LOW_LATENCY  0x5 /* enum */
+/* enum: TX PD firmware handling layer 2 only for high packet rate performance
+ * tests (Medford development only)
+ */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_TYPE_LAYER2_PERF  0x7
+/* enum: Rules engine TX PD production firmware */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_TYPE_RULES_ENGINE  0x8
+/* enum: RX PD firmware for GUE parsing prototype (Medford development only) */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_TYPE_TESTFW_GUE_PROTOTYPE  0xe
+/* Hardware capabilities of NIC */
+#define       MC_CMD_GET_CAPABILITIES_V3_OUT_HW_CAPABILITIES_OFST 12
+/* Licensed capabilities */
+#define       MC_CMD_GET_CAPABILITIES_V3_OUT_LICENSE_CAPABILITIES_OFST 16
+/* Second word of flags. Not present on older firmware (check the length). */
+#define       MC_CMD_GET_CAPABILITIES_V3_OUT_FLAGS2_OFST 20
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_TX_TSO_V2_LBN 0
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_TX_TSO_V2_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_TX_TSO_V2_ENCAP_LBN 1
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_TX_TSO_V2_ENCAP_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_EVQ_TIMER_CTRL_LBN 2
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_EVQ_TIMER_CTRL_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_EVENT_CUT_THROUGH_LBN 3
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_EVENT_CUT_THROUGH_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_RX_CUT_THROUGH_LBN 4
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_RX_CUT_THROUGH_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_TX_VFIFO_ULL_MODE_LBN 5
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_TX_VFIFO_ULL_MODE_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_MAC_STATS_40G_TX_SIZE_BINS_LBN 6
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_MAC_STATS_40G_TX_SIZE_BINS_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_INIT_EVQ_V2_LBN 7
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_INIT_EVQ_V2_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_TX_MAC_TIMESTAMPING_LBN 8
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_TX_MAC_TIMESTAMPING_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_TX_TIMESTAMP_LBN 9
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_TX_TIMESTAMP_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_RX_SNIFF_LBN 10
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_RX_SNIFF_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_TX_SNIFF_LBN 11
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_TX_SNIFF_WIDTH 1
+/* Number of FATSOv2 contexts per datapath supported by this NIC. Not present
+ * on older firmware (check the length).
+ */
+#define       MC_CMD_GET_CAPABILITIES_V3_OUT_TX_TSO_V2_N_CONTEXTS_OFST 24
+#define       MC_CMD_GET_CAPABILITIES_V3_OUT_TX_TSO_V2_N_CONTEXTS_LEN 2
+/* One byte per PF containing the number of the external port assigned to this
+ * PF, indexed by PF number. Special values indicate that a PF is either not
+ * present or not assigned.
+ */
+#define       MC_CMD_GET_CAPABILITIES_V3_OUT_PFS_TO_PORTS_ASSIGNMENT_OFST 26
+#define       MC_CMD_GET_CAPABILITIES_V3_OUT_PFS_TO_PORTS_ASSIGNMENT_LEN 1
+#define       MC_CMD_GET_CAPABILITIES_V3_OUT_PFS_TO_PORTS_ASSIGNMENT_NUM 16
+/* enum: The caller is not permitted to access information on this PF. */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_ACCESS_NOT_PERMITTED  0xff
+/* enum: PF does not exist. */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_PF_NOT_PRESENT  0xfe
+/* enum: PF does exist but is not assigned to any external port. */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_PF_NOT_ASSIGNED  0xfd
+/* enum: This value indicates that PF is assigned, but it cannot be expressed
+ * in this field. It is intended for a possible future situation where a more
+ * complex scheme of PFs to ports mapping is being used. The future driver
+ * should look for a new field supporting the new scheme. The current/old
+ * driver should treat this value as PF_NOT_ASSIGNED.
+ */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_INCOMPATIBLE_ASSIGNMENT  0xfc
+/* One byte per PF containing the number of its VFs, indexed by PF number. A
+ * special value indicates that a PF is not present.
+ */
+#define       MC_CMD_GET_CAPABILITIES_V3_OUT_NUM_VFS_PER_PF_OFST 42
+#define       MC_CMD_GET_CAPABILITIES_V3_OUT_NUM_VFS_PER_PF_LEN 1
+#define       MC_CMD_GET_CAPABILITIES_V3_OUT_NUM_VFS_PER_PF_NUM 16
+/* enum: The caller is not permitted to access information on this PF. */
+/*               MC_CMD_GET_CAPABILITIES_V3_OUT_ACCESS_NOT_PERMITTED  0xff */
+/* enum: PF does not exist. */
+/*               MC_CMD_GET_CAPABILITIES_V3_OUT_PF_NOT_PRESENT  0xfe */
+/* Number of VIs available for each external port */
+#define       MC_CMD_GET_CAPABILITIES_V3_OUT_NUM_VIS_PER_PORT_OFST 58
+#define       MC_CMD_GET_CAPABILITIES_V3_OUT_NUM_VIS_PER_PORT_LEN 2
+#define       MC_CMD_GET_CAPABILITIES_V3_OUT_NUM_VIS_PER_PORT_NUM 4
+/* Size of RX descriptor cache expressed as binary logarithm The actual size
+ * equals (2 ^ RX_DESC_CACHE_SIZE)
+ */
+#define       MC_CMD_GET_CAPABILITIES_V3_OUT_RX_DESC_CACHE_SIZE_OFST 66
+#define       MC_CMD_GET_CAPABILITIES_V3_OUT_RX_DESC_CACHE_SIZE_LEN 1
+/* Size of TX descriptor cache expressed as binary logarithm The actual size
+ * equals (2 ^ TX_DESC_CACHE_SIZE)
+ */
+#define       MC_CMD_GET_CAPABILITIES_V3_OUT_TX_DESC_CACHE_SIZE_OFST 67
+#define       MC_CMD_GET_CAPABILITIES_V3_OUT_TX_DESC_CACHE_SIZE_LEN 1
+/* Total number of available PIO buffers */
+#define       MC_CMD_GET_CAPABILITIES_V3_OUT_NUM_PIO_BUFFS_OFST 68
+#define       MC_CMD_GET_CAPABILITIES_V3_OUT_NUM_PIO_BUFFS_LEN 2
+/* Size of a single PIO buffer */
+#define       MC_CMD_GET_CAPABILITIES_V3_OUT_SIZE_PIO_BUFF_OFST 70
+#define       MC_CMD_GET_CAPABILITIES_V3_OUT_SIZE_PIO_BUFF_LEN 2
+/* On chips later than Medford the amount of address space assigned to each VI
+ * is configurable. This is a global setting that the driver must query to
+ * discover the VI to address mapping. Cut-through PIO (CTPIO) in not available
+ * with 8k VI windows.
+ */
+#define       MC_CMD_GET_CAPABILITIES_V3_OUT_VI_WINDOW_MODE_OFST 72
+#define       MC_CMD_GET_CAPABILITIES_V3_OUT_VI_WINDOW_MODE_LEN 1
+/* enum: Each VI occupies 8k as on Huntington and Medford. PIO is at offset 4k.
+ * CTPIO is not mapped.
+ */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_VI_WINDOW_MODE_8K   0x0
+/* enum: Each VI occupies 16k. PIO is at offset 4k. CTPIO is at offset 12k. */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_VI_WINDOW_MODE_16K  0x1
+/* enum: Each VI occupies 64k. PIO is at offset 4k. CTPIO is at offset 12k. */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_VI_WINDOW_MODE_64K  0x2
+
 
 /***********************************/
 /* MC_CMD_V2_EXTN
  */
 #define MC_CMD_GET_RXDP_CONFIG 0xc2
 
-#define MC_CMD_0xc2_PRIVILEGE_CTG SRIOV_CTG_ADMIN
+#define MC_CMD_0xc2_PRIVILEGE_CTG SRIOV_CTG_GENERAL
 
 /* MC_CMD_GET_RXDP_CONFIG_IN msgrequest */
 #define    MC_CMD_GET_RXDP_CONFIG_IN_LEN 0
  * that this operation returns a zero-length response
  */
 #define          MC_CMD_LICENSING_V3_IN_OP_UPDATE_LICENSE  0x0
-/* enum: report counts of installed licenses */
+/* enum: report counts of installed licenses Returns EAGAIN if license
+ * processing (updating) has been started but not yet completed.
+ */
 #define          MC_CMD_LICENSING_V3_IN_OP_REPORT_LICENSE  0x1
 
 /* MC_CMD_LICENSING_V3_OUT msgresponse */
 #define          MC_CMD_GET_WORKAROUNDS_OUT_BUG42008 0x20
 /* enum: Bug 26807 features present in firmware (multicast filter chaining) */
 #define          MC_CMD_GET_WORKAROUNDS_OUT_BUG26807 0x40
+/* enum: Bug 61265 work around (broken EVQ TMR writes). */
+#define          MC_CMD_GET_WORKAROUNDS_OUT_BUG61265 0x80
 
 
 /***********************************/
 #define MC_CMD_0x118_PRIVILEGE_CTG SRIOV_CTG_ADMIN
 
 /* MC_CMD_RX_BALANCING_IN msgrequest */
-#define    MC_CMD_RX_BALANCING_IN_LEN 4
+#define    MC_CMD_RX_BALANCING_IN_LEN 16
 /* The RX port whose upconverter table will be modified */
 #define       MC_CMD_RX_BALANCING_IN_PORT_OFST 0
-#define       MC_CMD_RX_BALANCING_IN_PORT_LEN 1
 /* The VLAN priority associated to the table index and vFIFO */
-#define       MC_CMD_RX_BALANCING_IN_PRIORITY_OFST 1
-#define       MC_CMD_RX_BALANCING_IN_PRIORITY_LEN 1
+#define       MC_CMD_RX_BALANCING_IN_PRIORITY_OFST 4
 /* The resulting bit of SRC^DST for indexing the table */
-#define       MC_CMD_RX_BALANCING_IN_SRC_DST_OFST 2
-#define       MC_CMD_RX_BALANCING_IN_SRC_DST_LEN 1
+#define       MC_CMD_RX_BALANCING_IN_SRC_DST_OFST 8
 /* The RX engine to which the vFIFO in the table entry will point to */
-#define       MC_CMD_RX_BALANCING_IN_ENG_OFST 3
-#define       MC_CMD_RX_BALANCING_IN_ENG_LEN 1
+#define       MC_CMD_RX_BALANCING_IN_ENG_OFST 12
 
 /* MC_CMD_RX_BALANCING_OUT msgresponse */
 #define    MC_CMD_RX_BALANCING_OUT_LEN 0
 
+/***********************************/
+/* MC_CMD_SET_EVQ_TMR
+ * Update the timer load, timer reload and timer mode values for a given EVQ.
+ * The requested timer values (in TMR_LOAD_REQ_NS and TMR_RELOAD_REQ_NS) will
+ * be rounded up to the granularity supported by the hardware, then truncated
+ * to the range supported by the hardware. The resulting value after the
+ * rounding and truncation will be returned to the caller (in TMR_LOAD_ACT_NS
+ * and TMR_RELOAD_ACT_NS).
+ */
+#define MC_CMD_SET_EVQ_TMR 0x120
+
+#define MC_CMD_0x120_PRIVILEGE_CTG SRIOV_CTG_GENERAL
+
+/* MC_CMD_SET_EVQ_TMR_IN msgrequest */
+#define    MC_CMD_SET_EVQ_TMR_IN_LEN 16
+/* Function-relative queue instance */
+#define       MC_CMD_SET_EVQ_TMR_IN_INSTANCE_OFST 0
+/* Requested value for timer load (in nanoseconds) */
+#define       MC_CMD_SET_EVQ_TMR_IN_TMR_LOAD_REQ_NS_OFST 4
+/* Requested value for timer reload (in nanoseconds) */
+#define       MC_CMD_SET_EVQ_TMR_IN_TMR_RELOAD_REQ_NS_OFST 8
+/* Timer mode. Meanings as per EVQ_TMR_REG.TC_TIMER_VAL */
+#define       MC_CMD_SET_EVQ_TMR_IN_TMR_MODE_OFST 12
+#define          MC_CMD_SET_EVQ_TMR_IN_TIMER_MODE_DIS  0x0 /* enum */
+#define          MC_CMD_SET_EVQ_TMR_IN_TIMER_MODE_IMMED_START  0x1 /* enum */
+#define          MC_CMD_SET_EVQ_TMR_IN_TIMER_MODE_TRIG_START  0x2 /* enum */
+#define          MC_CMD_SET_EVQ_TMR_IN_TIMER_MODE_INT_HLDOFF  0x3 /* enum */
+
+/* MC_CMD_SET_EVQ_TMR_OUT msgresponse */
+#define    MC_CMD_SET_EVQ_TMR_OUT_LEN 8
+/* Actual value for timer load (in nanoseconds) */
+#define       MC_CMD_SET_EVQ_TMR_OUT_TMR_LOAD_ACT_NS_OFST 0
+/* Actual value for timer reload (in nanoseconds) */
+#define       MC_CMD_SET_EVQ_TMR_OUT_TMR_RELOAD_ACT_NS_OFST 4
+
+
+/***********************************/
+/* MC_CMD_GET_EVQ_TMR_PROPERTIES
+ * Query properties about the event queue timers.
+ */
+#define MC_CMD_GET_EVQ_TMR_PROPERTIES 0x122
+
+#define MC_CMD_0x122_PRIVILEGE_CTG SRIOV_CTG_GENERAL
+
+/* MC_CMD_GET_EVQ_TMR_PROPERTIES_IN msgrequest */
+#define    MC_CMD_GET_EVQ_TMR_PROPERTIES_IN_LEN 0
+
+/* MC_CMD_GET_EVQ_TMR_PROPERTIES_OUT msgresponse */
+#define    MC_CMD_GET_EVQ_TMR_PROPERTIES_OUT_LEN 36
+/* Reserved for future use. */
+#define       MC_CMD_GET_EVQ_TMR_PROPERTIES_OUT_FLAGS_OFST 0
+/* For timers updated via writes to EVQ_TMR_REG, this is the time interval (in
+ * nanoseconds) for each increment of the timer load/reload count. The
+ * requested duration of a timer is this value multiplied by the timer
+ * load/reload count.
+ */
+#define       MC_CMD_GET_EVQ_TMR_PROPERTIES_OUT_TMR_REG_NS_PER_COUNT_OFST 4
+/* For timers updated via writes to EVQ_TMR_REG, this is the maximum value
+ * allowed for timer load/reload counts.
+ */
+#define       MC_CMD_GET_EVQ_TMR_PROPERTIES_OUT_TMR_REG_MAX_COUNT_OFST 8
+/* For timers updated via writes to EVQ_TMR_REG, timer load/reload counts not a
+ * multiple of this step size will be rounded in an implementation defined
+ * manner.
+ */
+#define       MC_CMD_GET_EVQ_TMR_PROPERTIES_OUT_TMR_REG_STEP_OFST 12
+/* Maximum timer duration (in nanoseconds) for timers updated via MCDI. Only
+ * meaningful if MC_CMD_SET_EVQ_TMR is implemented.
+ */
+#define       MC_CMD_GET_EVQ_TMR_PROPERTIES_OUT_MCDI_TMR_MAX_NS_OFST 16
+/* Timer durations requested via MCDI that are not a multiple of this step size
+ * will be rounded up. Only meaningful if MC_CMD_SET_EVQ_TMR is implemented.
+ */
+#define       MC_CMD_GET_EVQ_TMR_PROPERTIES_OUT_MCDI_TMR_STEP_NS_OFST 20
+/* For timers updated using the bug35388 workaround, this is the time interval
+ * (in nanoseconds) for each increment of the timer load/reload count. The
+ * requested duration of a timer is this value multiplied by the timer
+ * load/reload count. This field is only meaningful if the bug35388 workaround
+ * is enabled.
+ */
+#define       MC_CMD_GET_EVQ_TMR_PROPERTIES_OUT_BUG35388_TMR_NS_PER_COUNT_OFST 24
+/* For timers updated using the bug35388 workaround, this is the maximum value
+ * allowed for timer load/reload counts. This field is only meaningful if the
+ * bug35388 workaround is enabled.
+ */
+#define       MC_CMD_GET_EVQ_TMR_PROPERTIES_OUT_BUG35388_TMR_MAX_COUNT_OFST 28
+/* For timers updated using the bug35388 workaround, timer load/reload counts
+ * not a multiple of this step size will be rounded in an implementation
+ * defined manner. This field is only meaningful if the bug35388 workaround is
+ * enabled.
+ */
+#define       MC_CMD_GET_EVQ_TMR_PROPERTIES_OUT_BUG35388_TMR_STEP_OFST 32
 
 #endif /* MCDI_PCOL_H */
index 9ff062a..13b7f52 100644 (file)
@@ -392,7 +392,7 @@ enum efx_sync_events_state {
  * @eventq_init: Event queue initialised flag
  * @enabled: Channel enabled indicator
  * @irq: IRQ number (MSI and MSI-X only)
- * @irq_moderation: IRQ moderation value (in hardware ticks)
+ * @irq_moderation_us: IRQ moderation value (in microseconds)
  * @napi_dev: Net device used with NAPI
  * @napi_str: NAPI control structure
  * @state: state for NAPI vs busy polling
@@ -433,7 +433,7 @@ struct efx_channel {
        bool eventq_init;
        bool enabled;
        int irq;
-       unsigned int irq_moderation;
+       unsigned int irq_moderation_us;
        struct net_device *napi_dev;
        struct napi_struct napi_str;
 #ifdef CONFIG_NET_RX_BUSY_POLL
@@ -810,8 +810,10 @@ struct vfdi_status;
  * @membase: Memory BAR value
  * @interrupt_mode: Interrupt mode
  * @timer_quantum_ns: Interrupt timer quantum, in nanoseconds
+ * @timer_max_ns: Interrupt timer maximum value, in nanoseconds
  * @irq_rx_adaptive: Adaptive IRQ moderation enabled for RX event queues
- * @irq_rx_moderation: IRQ moderation time for RX event queues
+ * @irq_rx_mod_step_us: Step size for IRQ moderation for RX event queues
+ * @irq_rx_moderation_us: IRQ moderation time for RX event queues
  * @msg_enable: Log message enable flags
  * @state: Device state number (%STATE_*). Serialised by the rtnl_lock.
  * @reset_pending: Bitmask for pending resets
@@ -940,8 +942,10 @@ struct efx_nic {
 
        enum efx_int_mode interrupt_mode;
        unsigned int timer_quantum_ns;
+       unsigned int timer_max_ns;
        bool irq_rx_adaptive;
-       unsigned int irq_rx_moderation;
+       unsigned int irq_mod_step_us;
+       unsigned int irq_rx_moderation_us;
        u32 msg_enable;
 
        enum nic_state state;
index 96944c3..d8b1694 100644 (file)
@@ -507,10 +507,13 @@ enum {
  * @stats: Hardware statistics
  * @workaround_35388: Flag: firmware supports workaround for bug 35388
  * @workaround_26807: Flag: firmware supports workaround for bug 26807
+ * @workaround_61265: Flag: firmware supports workaround for bug 61265
  * @must_check_datapath_caps: Flag: @datapath_caps needs to be revalidated
  *     after MC reboot
  * @datapath_caps: Capabilities of datapath firmware (FLAGS1 field of
  *     %MC_CMD_GET_CAPABILITIES response)
+ * @datapath_caps2: Further Capabilities of datapath firmware (FLAGS2 field of
+ * %MC_CMD_GET_CAPABILITIES response)
  * @rx_dpcpu_fw_id: Firmware ID of the RxDPCPU
  * @tx_dpcpu_fw_id: Firmware ID of the TxDPCPU
  * @vport_id: The function's vport ID, only relevant for PFs
@@ -540,8 +543,10 @@ struct efx_ef10_nic_data {
        u64 stats[EF10_STAT_COUNT];
        bool workaround_35388;
        bool workaround_26807;
+       bool workaround_61265;
        bool must_check_datapath_caps;
        u32 datapath_caps;
+       u32 datapath_caps2;
        unsigned int rx_dpcpu_fw_id;
        unsigned int tx_dpcpu_fw_id;
        unsigned int vport_id;
index c771e0a..dd204d9 100644 (file)
@@ -1306,7 +1306,7 @@ static int efx_ptp_probe_channel(struct efx_channel *channel)
 {
        struct efx_nic *efx = channel->efx;
 
-       channel->irq_moderation = 0;
+       channel->irq_moderation_us = 0;
        channel->rx_queue.core_index = 0;
 
        return efx_ptp_probe(efx, channel);
index 2219b54..04ed1b4 100644 (file)
@@ -34,19 +34,24 @@ static void siena_init_wol(struct efx_nic *efx);
 
 static void siena_push_irq_moderation(struct efx_channel *channel)
 {
+       struct efx_nic *efx = channel->efx;
        efx_dword_t timer_cmd;
 
-       if (channel->irq_moderation)
+       if (channel->irq_moderation_us) {
+               unsigned int ticks;
+
+               ticks = efx_usecs_to_ticks(efx, channel->irq_moderation_us);
                EFX_POPULATE_DWORD_2(timer_cmd,
                                     FRF_CZ_TC_TIMER_MODE,
                                     FFE_CZ_TIMER_MODE_INT_HLDOFF,
                                     FRF_CZ_TC_TIMER_VAL,
-                                    channel->irq_moderation - 1);
-       else
+                                    ticks - 1);
+       } else {
                EFX_POPULATE_DWORD_2(timer_cmd,
                                     FRF_CZ_TC_TIMER_MODE,
                                     FFE_CZ_TIMER_MODE_DIS,
                                     FRF_CZ_TC_TIMER_VAL, 0);
+       }
        efx_writed_page_locked(channel->efx, &timer_cmd, FR_BZ_TIMER_COMMAND_P0,
                               channel->channel);
 }
@@ -222,6 +227,9 @@ static int siena_probe_nvconfig(struct efx_nic *efx)
        efx->timer_quantum_ns =
                (caps & (1 << MC_CMD_CAPABILITIES_TURBO_ACTIVE_LBN)) ?
                3072 : 6144; /* 768 cycles */
+       efx->timer_max_ns = efx->type->timer_period_max *
+                           efx->timer_quantum_ns;
+
        return rc;
 }
 
index 2310b75..351cd14 100644 (file)
@@ -50,4 +50,8 @@
 #define EFX_WORKAROUND_35388(efx)                                      \
        (efx_nic_rev(efx) == EFX_REV_HUNT_A0 && EFX_EF10_WORKAROUND_35388(efx))
 
+/* Moderation timer access must go through MCDI */
+#define EFX_EF10_WORKAROUND_61265(efx)                                 \
+       (((struct efx_ef10_nic_data *)efx->nic_data)->workaround_61265)
+
 #endif /* EFX_WORKAROUNDS_H */
index f85d605..421ebda 100644 (file)
@@ -140,9 +140,10 @@ do {                                                               \
 #define CPSW_CMINTMAX_INTVL    (1000 / CPSW_CMINTMIN_CNT)
 #define CPSW_CMINTMIN_INTVL    ((1000 / CPSW_CMINTMAX_CNT) + 1)
 
-#define cpsw_slave_index(priv)                         \
-               ((priv->data.dual_emac) ? priv->emac_port :     \
-               priv->data.active_slave)
+#define cpsw_slave_index(cpsw, priv)                           \
+               ((cpsw->data.dual_emac) ? priv->emac_port :     \
+               cpsw->data.active_slave)
+#define IRQ_NUM                        2
 
 static int debug_level;
 module_param(debug_level, int, 0);
@@ -363,38 +364,39 @@ static inline void slave_write(struct cpsw_slave *slave, u32 val, u32 offset)
        __raw_writel(val, slave->regs + offset);
 }
 
-struct cpsw_priv {
-       struct platform_device          *pdev;
-       struct net_device               *ndev;
-       struct napi_struct              napi_rx;
-       struct napi_struct              napi_tx;
+struct cpsw_common {
        struct device                   *dev;
        struct cpsw_platform_data       data;
+       struct napi_struct              napi_rx;
+       struct napi_struct              napi_tx;
        struct cpsw_ss_regs __iomem     *regs;
        struct cpsw_wr_regs __iomem     *wr_regs;
        u8 __iomem                      *hw_stats;
        struct cpsw_host_regs __iomem   *host_port_regs;
-       u32                             msg_enable;
        u32                             version;
        u32                             coal_intvl;
        u32                             bus_freq_mhz;
        int                             rx_packet_max;
-       struct clk                      *clk;
-       u8                              mac_addr[ETH_ALEN];
        struct cpsw_slave               *slaves;
        struct cpdma_ctlr               *dma;
        struct cpdma_chan               *txch, *rxch;
        struct cpsw_ale                 *ale;
-       bool                            rx_pause;
-       bool                            tx_pause;
        bool                            quirk_irq;
        bool                            rx_irq_disabled;
        bool                            tx_irq_disabled;
-       /* snapshot of IRQ numbers */
-       u32 irqs_table[4];
-       u32 num_irqs;
-       struct cpts *cpts;
+       u32 irqs_table[IRQ_NUM];
+       struct cpts                     *cpts;
+};
+
+struct cpsw_priv {
+       struct net_device               *ndev;
+       struct device                   *dev;
+       u32                             msg_enable;
+       u8                              mac_addr[ETH_ALEN];
+       bool                            rx_pause;
+       bool                            tx_pause;
        u32 emac_port;
+       struct cpsw_common *cpsw;
 };
 
 struct cpsw_stats {
@@ -485,78 +487,71 @@ static const struct cpsw_stats cpsw_gstrings_stats[] = {
 
 #define CPSW_STATS_LEN ARRAY_SIZE(cpsw_gstrings_stats)
 
-#define napi_to_priv(napi)     container_of(napi, struct cpsw_priv, napi)
+#define ndev_to_cpsw(ndev) (((struct cpsw_priv *)netdev_priv(ndev))->cpsw)
+#define napi_to_cpsw(napi)     container_of(napi, struct cpsw_common, napi)
 #define for_each_slave(priv, func, arg...)                             \
        do {                                                            \
                struct cpsw_slave *slave;                               \
+               struct cpsw_common *cpsw = (priv)->cpsw;                \
                int n;                                                  \
-               if (priv->data.dual_emac)                               \
-                       (func)((priv)->slaves + priv->emac_port, ##arg);\
+               if (cpsw->data.dual_emac)                               \
+                       (func)((cpsw)->slaves + priv->emac_port, ##arg);\
                else                                                    \
-                       for (n = (priv)->data.slaves,                   \
-                                       slave = (priv)->slaves;         \
+                       for (n = cpsw->data.slaves,                     \
+                                       slave = cpsw->slaves;           \
                                        n; n--)                         \
                                (func)(slave++, ##arg);                 \
        } while (0)
-#define cpsw_get_slave_ndev(priv, __slave_no__)                                \
-       ((__slave_no__ < priv->data.slaves) ?                           \
-               priv->slaves[__slave_no__].ndev : NULL)
-#define cpsw_get_slave_priv(priv, __slave_no__)                                \
-       (((__slave_no__ < priv->data.slaves) &&                         \
-               (priv->slaves[__slave_no__].ndev)) ?                    \
-               netdev_priv(priv->slaves[__slave_no__].ndev) : NULL)    \
-
-#define cpsw_dual_emac_src_port_detect(status, priv, ndev, skb)                \
+
+#define cpsw_dual_emac_src_port_detect(cpsw, status, ndev, skb)                \
        do {                                                            \
-               if (!priv->data.dual_emac)                              \
+               if (!cpsw->data.dual_emac)                              \
                        break;                                          \
                if (CPDMA_RX_SOURCE_PORT(status) == 1) {                \
-                       ndev = cpsw_get_slave_ndev(priv, 0);            \
-                       priv = netdev_priv(ndev);                       \
+                       ndev = cpsw->slaves[0].ndev;                    \
                        skb->dev = ndev;                                \
                } else if (CPDMA_RX_SOURCE_PORT(status) == 2) {         \
-                       ndev = cpsw_get_slave_ndev(priv, 1);            \
-                       priv = netdev_priv(ndev);                       \
+                       ndev = cpsw->slaves[1].ndev;                    \
                        skb->dev = ndev;                                \
                }                                                       \
        } while (0)
-#define cpsw_add_mcast(priv, addr)                                     \
+#define cpsw_add_mcast(cpsw, priv, addr)                               \
        do {                                                            \
-               if (priv->data.dual_emac) {                             \
-                       struct cpsw_slave *slave = priv->slaves +       \
+               if (cpsw->data.dual_emac) {                             \
+                       struct cpsw_slave *slave = cpsw->slaves +       \
                                                priv->emac_port;        \
-                       int slave_port = cpsw_get_slave_port(priv,      \
+                       int slave_port = cpsw_get_slave_port(           \
                                                slave->slave_num);      \
-                       cpsw_ale_add_mcast(priv->ale, addr,             \
+                       cpsw_ale_add_mcast(cpsw->ale, addr,             \
                                1 << slave_port | ALE_PORT_HOST,        \
                                ALE_VLAN, slave->port_vlan, 0);         \
                } else {                                                \
-                       cpsw_ale_add_mcast(priv->ale, addr,             \
+                       cpsw_ale_add_mcast(cpsw->ale, addr,             \
                                ALE_ALL_PORTS,                          \
                                0, 0, 0);                               \
                }                                                       \
        } while (0)
 
-static inline int cpsw_get_slave_port(struct cpsw_priv *priv, u32 slave_num)
+static inline int cpsw_get_slave_port(u32 slave_num)
 {
        return slave_num + 1;
 }
 
 static void cpsw_set_promiscious(struct net_device *ndev, bool enable)
 {
-       struct cpsw_priv *priv = netdev_priv(ndev);
-       struct cpsw_ale *ale = priv->ale;
+       struct cpsw_common *cpsw = ndev_to_cpsw(ndev);
+       struct cpsw_ale *ale = cpsw->ale;
        int i;
 
-       if (priv->data.dual_emac) {
+       if (cpsw->data.dual_emac) {
                bool flag = false;
 
                /* Enabling promiscuous mode for one interface will be
                 * common for both the interface as the interface shares
                 * the same hardware resource.
                 */
-               for (i = 0; i < priv->data.slaves; i++)
-                       if (priv->slaves[i].ndev->flags & IFF_PROMISC)
+               for (i = 0; i < cpsw->data.slaves; i++)
+                       if (cpsw->slaves[i].ndev->flags & IFF_PROMISC)
                                flag = true;
 
                if (!enable && flag) {
@@ -579,7 +574,7 @@ static void cpsw_set_promiscious(struct net_device *ndev, bool enable)
                        unsigned long timeout = jiffies + HZ;
 
                        /* Disable Learn for all ports (host is port 0 and slaves are port 1 and up */
-                       for (i = 0; i <= priv->data.slaves; i++) {
+                       for (i = 0; i <= cpsw->data.slaves; i++) {
                                cpsw_ale_control_set(ale, i,
                                                     ALE_PORT_NOLEARN, 1);
                                cpsw_ale_control_set(ale, i,
@@ -606,7 +601,7 @@ static void cpsw_set_promiscious(struct net_device *ndev, bool enable)
                        cpsw_ale_control_set(ale, 0, ALE_P0_UNI_FLOOD, 0);
 
                        /* Enable Learn for all ports (host is port 0 and slaves are port 1 and up */
-                       for (i = 0; i <= priv->data.slaves; i++) {
+                       for (i = 0; i <= cpsw->data.slaves; i++) {
                                cpsw_ale_control_set(ale, i,
                                                     ALE_PORT_NOLEARN, 0);
                                cpsw_ale_control_set(ale, i,
@@ -620,17 +615,18 @@ static void cpsw_set_promiscious(struct net_device *ndev, bool enable)
 static void cpsw_ndo_set_rx_mode(struct net_device *ndev)
 {
        struct cpsw_priv *priv = netdev_priv(ndev);
+       struct cpsw_common *cpsw = priv->cpsw;
        int vid;
 
-       if (priv->data.dual_emac)
-               vid = priv->slaves[priv->emac_port].port_vlan;
+       if (cpsw->data.dual_emac)
+               vid = cpsw->slaves[priv->emac_port].port_vlan;
        else
-               vid = priv->data.default_vlan;
+               vid = cpsw->data.default_vlan;
 
        if (ndev->flags & IFF_PROMISC) {
                /* Enable promiscuous mode */
                cpsw_set_promiscious(ndev, true);
-               cpsw_ale_set_allmulti(priv->ale, IFF_ALLMULTI);
+               cpsw_ale_set_allmulti(cpsw->ale, IFF_ALLMULTI);
                return;
        } else {
                /* Disable promiscuous mode */
@@ -638,36 +634,36 @@ static void cpsw_ndo_set_rx_mode(struct net_device *ndev)
        }
 
        /* Restore allmulti on vlans if necessary */
-       cpsw_ale_set_allmulti(priv->ale, priv->ndev->flags & IFF_ALLMULTI);
+       cpsw_ale_set_allmulti(cpsw->ale, priv->ndev->flags & IFF_ALLMULTI);
 
        /* Clear all mcast from ALE */
-       cpsw_ale_flush_multicast(priv->ale, ALE_ALL_PORTS, vid);
+       cpsw_ale_flush_multicast(cpsw->ale, ALE_ALL_PORTS, vid);
 
        if (!netdev_mc_empty(ndev)) {
                struct netdev_hw_addr *ha;
 
                /* program multicast address list into ALE register */
                netdev_for_each_mc_addr(ha, ndev) {
-                       cpsw_add_mcast(priv, (u8 *)ha->addr);
+                       cpsw_add_mcast(cpsw, priv, (u8 *)ha->addr);
                }
        }
 }
 
-static void cpsw_intr_enable(struct cpsw_priv *priv)
+static void cpsw_intr_enable(struct cpsw_common *cpsw)
 {
-       __raw_writel(0xFF, &priv->wr_regs->tx_en);
-       __raw_writel(0xFF, &priv->wr_regs->rx_en);
+       __raw_writel(0xFF, &cpsw->wr_regs->tx_en);
+       __raw_writel(0xFF, &cpsw->wr_regs->rx_en);
 
-       cpdma_ctlr_int_ctrl(priv->dma, true);
+       cpdma_ctlr_int_ctrl(cpsw->dma, true);
        return;
 }
 
-static void cpsw_intr_disable(struct cpsw_priv *priv)
+static void cpsw_intr_disable(struct cpsw_common *cpsw)
 {
-       __raw_writel(0, &priv->wr_regs->tx_en);
-       __raw_writel(0, &priv->wr_regs->rx_en);
+       __raw_writel(0, &cpsw->wr_regs->tx_en);
+       __raw_writel(0, &cpsw->wr_regs->rx_en);
 
-       cpdma_ctlr_int_ctrl(priv->dma, false);
+       cpdma_ctlr_int_ctrl(cpsw->dma, false);
        return;
 }
 
@@ -675,14 +671,14 @@ static void cpsw_tx_handler(void *token, int len, int status)
 {
        struct sk_buff          *skb = token;
        struct net_device       *ndev = skb->dev;
-       struct cpsw_priv        *priv = netdev_priv(ndev);
+       struct cpsw_common      *cpsw = ndev_to_cpsw(ndev);
 
        /* Check whether the queue is stopped due to stalled tx dma, if the
         * queue is stopped then start the queue as we have free desc for tx
         */
        if (unlikely(netif_queue_stopped(ndev)))
                netif_wake_queue(ndev);
-       cpts_tx_timestamp(priv->cpts, skb);
+       cpts_tx_timestamp(cpsw->cpts, skb);
        ndev->stats.tx_packets++;
        ndev->stats.tx_bytes += len;
        dev_kfree_skb_any(skb);
@@ -693,19 +689,19 @@ static void cpsw_rx_handler(void *token, int len, int status)
        struct sk_buff          *skb = token;
        struct sk_buff          *new_skb;
        struct net_device       *ndev = skb->dev;
-       struct cpsw_priv        *priv = netdev_priv(ndev);
        int                     ret = 0;
+       struct cpsw_common      *cpsw = ndev_to_cpsw(ndev);
 
-       cpsw_dual_emac_src_port_detect(status, priv, ndev, skb);
+       cpsw_dual_emac_src_port_detect(cpsw, status, ndev, skb);
 
        if (unlikely(status < 0) || unlikely(!netif_running(ndev))) {
                bool ndev_status = false;
-               struct cpsw_slave *slave = priv->slaves;
+               struct cpsw_slave *slave = cpsw->slaves;
                int n;
 
-               if (priv->data.dual_emac) {
+               if (cpsw->data.dual_emac) {
                        /* In dual emac mode check for all interfaces */
-                       for (n = priv->data.slaves; n; n--, slave++)
+                       for (n = cpsw->data.slaves; n; n--, slave++)
                                if (netif_running(slave->ndev))
                                        ndev_status = true;
                }
@@ -726,10 +722,10 @@ static void cpsw_rx_handler(void *token, int len, int status)
                return;
        }
 
-       new_skb = netdev_alloc_skb_ip_align(ndev, priv->rx_packet_max);
+       new_skb = netdev_alloc_skb_ip_align(ndev, cpsw->rx_packet_max);
        if (new_skb) {
                skb_put(skb, len);
-               cpts_rx_timestamp(priv->cpts, skb);
+               cpts_rx_timestamp(cpsw->cpts, skb);
                skb->protocol = eth_type_trans(skb, ndev);
                netif_receive_skb(skb);
                ndev->stats.rx_bytes += len;
@@ -741,83 +737,77 @@ static void cpsw_rx_handler(void *token, int len, int status)
        }
 
 requeue:
-       ret = cpdma_chan_submit(priv->rxch, new_skb, new_skb->data,
-                       skb_tailroom(new_skb), 0);
+       ret = cpdma_chan_submit(cpsw->rxch, new_skb, new_skb->data,
+                               skb_tailroom(new_skb), 0);
        if (WARN_ON(ret < 0))
                dev_kfree_skb_any(new_skb);
 }
 
 static irqreturn_t cpsw_tx_interrupt(int irq, void *dev_id)
 {
-       struct cpsw_priv *priv = dev_id;
+       struct cpsw_common *cpsw = dev_id;
 
-       writel(0, &priv->wr_regs->tx_en);
-       cpdma_ctlr_eoi(priv->dma, CPDMA_EOI_TX);
+       writel(0, &cpsw->wr_regs->tx_en);
+       cpdma_ctlr_eoi(cpsw->dma, CPDMA_EOI_TX);
 
-       if (priv->quirk_irq) {
-               disable_irq_nosync(priv->irqs_table[1]);
-               priv->tx_irq_disabled = true;
+       if (cpsw->quirk_irq) {
+               disable_irq_nosync(cpsw->irqs_table[1]);
+               cpsw->tx_irq_disabled = true;
        }
 
-       napi_schedule(&priv->napi_tx);
+       napi_schedule(&cpsw->napi_tx);
        return IRQ_HANDLED;
 }
 
 static irqreturn_t cpsw_rx_interrupt(int irq, void *dev_id)
 {
-       struct cpsw_priv *priv = dev_id;
+       struct cpsw_common *cpsw = dev_id;
 
-       cpdma_ctlr_eoi(priv->dma, CPDMA_EOI_RX);
-       writel(0, &priv->wr_regs->rx_en);
+       cpdma_ctlr_eoi(cpsw->dma, CPDMA_EOI_RX);
+       writel(0, &cpsw->wr_regs->rx_en);
 
-       if (priv->quirk_irq) {
-               disable_irq_nosync(priv->irqs_table[0]);
-               priv->rx_irq_disabled = true;
+       if (cpsw->quirk_irq) {
+               disable_irq_nosync(cpsw->irqs_table[0]);
+               cpsw->rx_irq_disabled = true;
        }
 
-       napi_schedule(&priv->napi_rx);
+       napi_schedule(&cpsw->napi_rx);
        return IRQ_HANDLED;
 }
 
 static int cpsw_tx_poll(struct napi_struct *napi_tx, int budget)
 {
-       struct cpsw_priv        *priv = napi_to_priv(napi_tx);
+       struct cpsw_common      *cpsw = napi_to_cpsw(napi_tx);
        int                     num_tx;
 
-       num_tx = cpdma_chan_process(priv->txch, budget);
+       num_tx = cpdma_chan_process(cpsw->txch, budget);
        if (num_tx < budget) {
                napi_complete(napi_tx);
-               writel(0xff, &priv->wr_regs->tx_en);
-               if (priv->quirk_irq && priv->tx_irq_disabled) {
-                       priv->tx_irq_disabled = false;
-                       enable_irq(priv->irqs_table[1]);
+               writel(0xff, &cpsw->wr_regs->tx_en);
+               if (cpsw->quirk_irq && cpsw->tx_irq_disabled) {
+                       cpsw->tx_irq_disabled = false;
+                       enable_irq(cpsw->irqs_table[1]);
                }
        }
 
-       if (num_tx)
-               cpsw_dbg(priv, intr, "poll %d tx pkts\n", num_tx);
-
        return num_tx;
 }
 
 static int cpsw_rx_poll(struct napi_struct *napi_rx, int budget)
 {
-       struct cpsw_priv        *priv = napi_to_priv(napi_rx);
+       struct cpsw_common      *cpsw = napi_to_cpsw(napi_rx);
        int                     num_rx;
 
-       num_rx = cpdma_chan_process(priv->rxch, budget);
+       num_rx = cpdma_chan_process(cpsw->rxch, budget);
        if (num_rx < budget) {
                napi_complete(napi_rx);
-               writel(0xff, &priv->wr_regs->rx_en);
-               if (priv->quirk_irq && priv->rx_irq_disabled) {
-                       priv->rx_irq_disabled = false;
-                       enable_irq(priv->irqs_table[0]);
+               writel(0xff, &cpsw->wr_regs->rx_en);
+               if (cpsw->quirk_irq && cpsw->rx_irq_disabled) {
+                       cpsw->rx_irq_disabled = false;
+                       enable_irq(cpsw->irqs_table[0]);
                }
        }
 
-       if (num_rx)
-               cpsw_dbg(priv, intr, "poll %d rx pkts\n", num_rx);
-
        return num_rx;
 }
 
@@ -850,17 +840,18 @@ static void _cpsw_adjust_link(struct cpsw_slave *slave,
        struct phy_device       *phy = slave->phy;
        u32                     mac_control = 0;
        u32                     slave_port;
+       struct cpsw_common *cpsw = priv->cpsw;
 
        if (!phy)
                return;
 
-       slave_port = cpsw_get_slave_port(priv, slave->slave_num);
+       slave_port = cpsw_get_slave_port(slave->slave_num);
 
        if (phy->link) {
-               mac_control = priv->data.mac_control;
+               mac_control = cpsw->data.mac_control;
 
                /* enable forwarding */
-               cpsw_ale_control_set(priv->ale, slave_port,
+               cpsw_ale_control_set(cpsw->ale, slave_port,
                                     ALE_PORT_STATE, ALE_PORT_STATE_FORWARD);
 
                if (phy->speed == 1000)
@@ -884,7 +875,7 @@ static void _cpsw_adjust_link(struct cpsw_slave *slave,
        } else {
                mac_control = 0;
                /* disable forwarding */
-               cpsw_ale_control_set(priv->ale, slave_port,
+               cpsw_ale_control_set(cpsw->ale, slave_port,
                                     ALE_PORT_STATE, ALE_PORT_STATE_DISABLE);
        }
 
@@ -916,9 +907,9 @@ static void cpsw_adjust_link(struct net_device *ndev)
 static int cpsw_get_coalesce(struct net_device *ndev,
                                struct ethtool_coalesce *coal)
 {
-       struct cpsw_priv *priv = netdev_priv(ndev);
+       struct cpsw_common *cpsw = ndev_to_cpsw(ndev);
 
-       coal->rx_coalesce_usecs = priv->coal_intvl;
+       coal->rx_coalesce_usecs = cpsw->coal_intvl;
        return 0;
 }
 
@@ -931,11 +922,12 @@ static int cpsw_set_coalesce(struct net_device *ndev,
        u32 prescale = 0;
        u32 addnl_dvdr = 1;
        u32 coal_intvl = 0;
+       struct cpsw_common *cpsw = priv->cpsw;
 
        coal_intvl = coal->rx_coalesce_usecs;
 
-       int_ctrl =  readl(&priv->wr_regs->int_control);
-       prescale = priv->bus_freq_mhz * 4;
+       int_ctrl =  readl(&cpsw->wr_regs->int_control);
+       prescale = cpsw->bus_freq_mhz * 4;
 
        if (!coal->rx_coalesce_usecs) {
                int_ctrl &= ~(CPSW_INTPRESCALE_MASK | CPSW_INTPACEEN);
@@ -963,27 +955,18 @@ static int cpsw_set_coalesce(struct net_device *ndev,
        }
 
        num_interrupts = (1000 * addnl_dvdr) / coal_intvl;
-       writel(num_interrupts, &priv->wr_regs->rx_imax);
-       writel(num_interrupts, &priv->wr_regs->tx_imax);
+       writel(num_interrupts, &cpsw->wr_regs->rx_imax);
+       writel(num_interrupts, &cpsw->wr_regs->tx_imax);
 
        int_ctrl |= CPSW_INTPACEEN;
        int_ctrl &= (~CPSW_INTPRESCALE_MASK);
        int_ctrl |= (prescale & CPSW_INTPRESCALE_MASK);
 
 update_return:
-       writel(int_ctrl, &priv->wr_regs->int_control);
+       writel(int_ctrl, &cpsw->wr_regs->int_control);
 
        cpsw_notice(priv, timer, "Set coalesce to %d usecs.\n", coal_intvl);
-       if (priv->data.dual_emac) {
-               int i;
-
-               for (i = 0; i < priv->data.slaves; i++) {
-                       priv = netdev_priv(priv->slaves[i].ndev);
-                       priv->coal_intvl = coal_intvl;
-               }
-       } else {
-               priv->coal_intvl = coal_intvl;
-       }
+       cpsw->coal_intvl = coal_intvl;
 
        return 0;
 }
@@ -1017,21 +1000,21 @@ static void cpsw_get_strings(struct net_device *ndev, u32 stringset, u8 *data)
 static void cpsw_get_ethtool_stats(struct net_device *ndev,
                                    struct ethtool_stats *stats, u64 *data)
 {
-       struct cpsw_priv *priv = netdev_priv(ndev);
        struct cpdma_chan_stats rx_stats;
        struct cpdma_chan_stats tx_stats;
        u32 val;
        u8 *p;
        int i;
+       struct cpsw_common *cpsw = ndev_to_cpsw(ndev);
 
        /* Collect Davinci CPDMA stats for Rx and Tx Channel */
-       cpdma_chan_get_stats(priv->rxch, &rx_stats);
-       cpdma_chan_get_stats(priv->txch, &tx_stats);
+       cpdma_chan_get_stats(cpsw->rxch, &rx_stats);
+       cpdma_chan_get_stats(cpsw->txch, &tx_stats);
 
        for (i = 0; i < CPSW_STATS_LEN; i++) {
                switch (cpsw_gstrings_stats[i].type) {
                case CPSW_STATS:
-                       val = readl(priv->hw_stats +
+                       val = readl(cpsw->hw_stats +
                                    cpsw_gstrings_stats[i].stat_offset);
                        data[i] = val;
                        break;
@@ -1051,52 +1034,48 @@ static void cpsw_get_ethtool_stats(struct net_device *ndev,
        }
 }
 
-static int cpsw_common_res_usage_state(struct cpsw_priv *priv)
+static int cpsw_common_res_usage_state(struct cpsw_common *cpsw)
 {
        u32 i;
        u32 usage_count = 0;
 
-       if (!priv->data.dual_emac)
+       if (!cpsw->data.dual_emac)
                return 0;
 
-       for (i = 0; i < priv->data.slaves; i++)
-               if (priv->slaves[i].open_stat)
+       for (i = 0; i < cpsw->data.slaves; i++)
+               if (cpsw->slaves[i].open_stat)
                        usage_count++;
 
        return usage_count;
 }
 
-static inline int cpsw_tx_packet_submit(struct net_device *ndev,
-                       struct cpsw_priv *priv, struct sk_buff *skb)
+static inline int cpsw_tx_packet_submit(struct cpsw_priv *priv,
+                                       struct sk_buff *skb)
 {
-       if (!priv->data.dual_emac)
-               return cpdma_chan_submit(priv->txch, skb, skb->data,
-                                 skb->len, 0);
+       struct cpsw_common *cpsw = priv->cpsw;
 
-       if (ndev == cpsw_get_slave_ndev(priv, 0))
-               return cpdma_chan_submit(priv->txch, skb, skb->data,
-                                 skb->len, 1);
-       else
-               return cpdma_chan_submit(priv->txch, skb, skb->data,
-                                 skb->len, 2);
+       return cpdma_chan_submit(cpsw->txch, skb, skb->data, skb->len,
+                                priv->emac_port + cpsw->data.dual_emac);
 }
 
 static inline void cpsw_add_dual_emac_def_ale_entries(
                struct cpsw_priv *priv, struct cpsw_slave *slave,
                u32 slave_port)
 {
+       struct cpsw_common *cpsw = priv->cpsw;
        u32 port_mask = 1 << slave_port | ALE_PORT_HOST;
 
-       if (priv->version == CPSW_VERSION_1)
+       if (cpsw->version == CPSW_VERSION_1)
                slave_write(slave, slave->port_vlan, CPSW1_PORT_VLAN);
        else
                slave_write(slave, slave->port_vlan, CPSW2_PORT_VLAN);
-       cpsw_ale_add_vlan(priv->ale, slave->port_vlan, port_mask,
+       cpsw_ale_add_vlan(cpsw->ale, slave->port_vlan, port_mask,
                          port_mask, port_mask, 0);
-       cpsw_ale_add_mcast(priv->ale, priv->ndev->broadcast,
+       cpsw_ale_add_mcast(cpsw->ale, priv->ndev->broadcast,
                           port_mask, ALE_VLAN, slave->port_vlan, 0);
-       cpsw_ale_add_ucast(priv->ale, priv->mac_addr,
-               HOST_PORT_NUM, ALE_VLAN | ALE_SECURE, slave->port_vlan);
+       cpsw_ale_add_ucast(cpsw->ale, priv->mac_addr,
+                          HOST_PORT_NUM, ALE_VLAN |
+                          ALE_SECURE, slave->port_vlan);
 }
 
 static void soft_reset_slave(struct cpsw_slave *slave)
@@ -1110,13 +1089,14 @@ static void soft_reset_slave(struct cpsw_slave *slave)
 static void cpsw_slave_open(struct cpsw_slave *slave, struct cpsw_priv *priv)
 {
        u32 slave_port;
+       struct cpsw_common *cpsw = priv->cpsw;
 
        soft_reset_slave(slave);
 
        /* setup priority mapping */
        __raw_writel(RX_PRIORITY_MAPPING, &slave->sliver->rx_pri_map);
 
-       switch (priv->version) {
+       switch (cpsw->version) {
        case CPSW_VERSION_1:
                slave_write(slave, TX_PRIORITY_MAPPING, CPSW1_TX_PRI_MAP);
                break;
@@ -1128,17 +1108,17 @@ static void cpsw_slave_open(struct cpsw_slave *slave, struct cpsw_priv *priv)
        }
 
        /* setup max packet size, and mac address */
-       __raw_writel(priv->rx_packet_max, &slave->sliver->rx_maxlen);
+       __raw_writel(cpsw->rx_packet_max, &slave->sliver->rx_maxlen);
        cpsw_set_slave_mac(slave, priv);
 
        slave->mac_control = 0; /* no link yet */
 
-       slave_port = cpsw_get_slave_port(priv, slave->slave_num);
+       slave_port = cpsw_get_slave_port(slave->slave_num);
 
-       if (priv->data.dual_emac)
+       if (cpsw->data.dual_emac)
                cpsw_add_dual_emac_def_ale_entries(priv, slave, slave_port);
        else
-               cpsw_ale_add_mcast(priv->ale, priv->ndev->broadcast,
+               cpsw_ale_add_mcast(cpsw->ale, priv->ndev->broadcast,
                                   1 << slave_port, 0, 0, ALE_MCAST_FWD_2);
 
        if (slave->data->phy_node) {
@@ -1168,81 +1148,83 @@ static void cpsw_slave_open(struct cpsw_slave *slave, struct cpsw_priv *priv)
        phy_start(slave->phy);
 
        /* Configure GMII_SEL register */
-       cpsw_phy_sel(&priv->pdev->dev, slave->phy->interface, slave->slave_num);
+       cpsw_phy_sel(cpsw->dev, slave->phy->interface, slave->slave_num);
 }
 
 static inline void cpsw_add_default_vlan(struct cpsw_priv *priv)
 {
-       const int vlan = priv->data.default_vlan;
+       struct cpsw_common *cpsw = priv->cpsw;
+       const int vlan = cpsw->data.default_vlan;
        u32 reg;
        int i;
        int unreg_mcast_mask;
 
-       reg = (priv->version == CPSW_VERSION_1) ? CPSW1_PORT_VLAN :
+       reg = (cpsw->version == CPSW_VERSION_1) ? CPSW1_PORT_VLAN :
               CPSW2_PORT_VLAN;
 
-       writel(vlan, &priv->host_port_regs->port_vlan);
+       writel(vlan, &cpsw->host_port_regs->port_vlan);
 
-       for (i = 0; i < priv->data.slaves; i++)
-               slave_write(priv->slaves + i, vlan, reg);
+       for (i = 0; i < cpsw->data.slaves; i++)
+               slave_write(cpsw->slaves + i, vlan, reg);
 
        if (priv->ndev->flags & IFF_ALLMULTI)
                unreg_mcast_mask = ALE_ALL_PORTS;
        else
                unreg_mcast_mask = ALE_PORT_1 | ALE_PORT_2;
 
-       cpsw_ale_add_vlan(priv->ale, vlan, ALE_ALL_PORTS,
+       cpsw_ale_add_vlan(cpsw->ale, vlan, ALE_ALL_PORTS,
                          ALE_ALL_PORTS, ALE_ALL_PORTS,
                          unreg_mcast_mask);
 }
 
 static void cpsw_init_host_port(struct cpsw_priv *priv)
 {
-       u32 control_reg;
        u32 fifo_mode;
+       u32 control_reg;
+       struct cpsw_common *cpsw = priv->cpsw;
 
        /* soft reset the controller and initialize ale */
-       soft_reset("cpsw", &priv->regs->soft_reset);
-       cpsw_ale_start(priv->ale);
+       soft_reset("cpsw", &cpsw->regs->soft_reset);
+       cpsw_ale_start(cpsw->ale);
 
        /* switch to vlan unaware mode */
-       cpsw_ale_control_set(priv->ale, HOST_PORT_NUM, ALE_VLAN_AWARE,
+       cpsw_ale_control_set(cpsw->ale, HOST_PORT_NUM, ALE_VLAN_AWARE,
                             CPSW_ALE_VLAN_AWARE);
-       control_reg = readl(&priv->regs->control);
+       control_reg = readl(&cpsw->regs->control);
        control_reg |= CPSW_VLAN_AWARE;
-       writel(control_reg, &priv->regs->control);
-       fifo_mode = (priv->data.dual_emac) ? CPSW_FIFO_DUAL_MAC_MODE :
+       writel(control_reg, &cpsw->regs->control);
+       fifo_mode = (cpsw->data.dual_emac) ? CPSW_FIFO_DUAL_MAC_MODE :
                     CPSW_FIFO_NORMAL_MODE;
-       writel(fifo_mode, &priv->host_port_regs->tx_in_ctl);
+       writel(fifo_mode, &cpsw->host_port_regs->tx_in_ctl);
 
        /* setup host port priority mapping */
        __raw_writel(CPDMA_TX_PRIORITY_MAP,
-                    &priv->host_port_regs->cpdma_tx_pri_map);
-       __raw_writel(0, &priv->host_port_regs->cpdma_rx_chan_map);
+                    &cpsw->host_port_regs->cpdma_tx_pri_map);
+       __raw_writel(0, &cpsw->host_port_regs->cpdma_rx_chan_map);
 
-       cpsw_ale_control_set(priv->ale, HOST_PORT_NUM,
+       cpsw_ale_control_set(cpsw->ale, HOST_PORT_NUM,
                             ALE_PORT_STATE, ALE_PORT_STATE_FORWARD);
 
-       if (!priv->data.dual_emac) {
-               cpsw_ale_add_ucast(priv->ale, priv->mac_addr, HOST_PORT_NUM,
+       if (!cpsw->data.dual_emac) {
+               cpsw_ale_add_ucast(cpsw->ale, priv->mac_addr, HOST_PORT_NUM,
                                   0, 0);
-               cpsw_ale_add_mcast(priv->ale, priv->ndev->broadcast,
+               cpsw_ale_add_mcast(cpsw->ale, priv->ndev->broadcast,
                                   ALE_PORT_HOST, 0, 0, ALE_MCAST_FWD_2);
        }
 }
 
-static void cpsw_slave_stop(struct cpsw_slave *slave, struct cpsw_priv *priv)
+static void cpsw_slave_stop(struct cpsw_slave *slave, struct cpsw_common *cpsw)
 {
        u32 slave_port;
 
-       slave_port = cpsw_get_slave_port(priv, slave->slave_num);
+       slave_port = cpsw_get_slave_port(slave->slave_num);
 
        if (!slave->phy)
                return;
        phy_stop(slave->phy);
        phy_disconnect(slave->phy);
        slave->phy = NULL;
-       cpsw_ale_control_set(priv->ale, slave_port,
+       cpsw_ale_control_set(cpsw->ale, slave_port,
                             ALE_PORT_STATE, ALE_PORT_STATE_DISABLE);
        soft_reset_slave(slave);
 }
@@ -1250,78 +1232,78 @@ static void cpsw_slave_stop(struct cpsw_slave *slave, struct cpsw_priv *priv)
 static int cpsw_ndo_open(struct net_device *ndev)
 {
        struct cpsw_priv *priv = netdev_priv(ndev);
+       struct cpsw_common *cpsw = priv->cpsw;
        int i, ret;
        u32 reg;
 
-       ret = pm_runtime_get_sync(&priv->pdev->dev);
+       ret = pm_runtime_get_sync(cpsw->dev);
        if (ret < 0) {
-               pm_runtime_put_noidle(&priv->pdev->dev);
+               pm_runtime_put_noidle(cpsw->dev);
                return ret;
        }
 
-       if (!cpsw_common_res_usage_state(priv))
-               cpsw_intr_disable(priv);
+       if (!cpsw_common_res_usage_state(cpsw))
+               cpsw_intr_disable(cpsw);
        netif_carrier_off(ndev);
 
-       reg = priv->version;
+       reg = cpsw->version;
 
        dev_info(priv->dev, "initializing cpsw version %d.%d (%d)\n",
                 CPSW_MAJOR_VERSION(reg), CPSW_MINOR_VERSION(reg),
                 CPSW_RTL_VERSION(reg));
 
        /* initialize host and slave ports */
-       if (!cpsw_common_res_usage_state(priv))
+       if (!cpsw_common_res_usage_state(cpsw))
                cpsw_init_host_port(priv);
        for_each_slave(priv, cpsw_slave_open, priv);
 
        /* Add default VLAN */
-       if (!priv->data.dual_emac)
+       if (!cpsw->data.dual_emac)
                cpsw_add_default_vlan(priv);
        else
-               cpsw_ale_add_vlan(priv->ale, priv->data.default_vlan,
+               cpsw_ale_add_vlan(cpsw->ale, cpsw->data.default_vlan,
                                  ALE_ALL_PORTS, ALE_ALL_PORTS, 0, 0);
 
-       if (!cpsw_common_res_usage_state(priv)) {
-               struct cpsw_priv *priv_sl0 = cpsw_get_slave_priv(priv, 0);
+       if (!cpsw_common_res_usage_state(cpsw)) {
                int buf_num;
 
                /* setup tx dma to fixed prio and zero offset */
-               cpdma_control_set(priv->dma, CPDMA_TX_PRIO_FIXED, 1);
-               cpdma_control_set(priv->dma, CPDMA_RX_BUFFER_OFFSET, 0);
+               cpdma_control_set(cpsw->dma, CPDMA_TX_PRIO_FIXED, 1);
+               cpdma_control_set(cpsw->dma, CPDMA_RX_BUFFER_OFFSET, 0);
 
                /* disable priority elevation */
-               __raw_writel(0, &priv->regs->ptype);
+               __raw_writel(0, &cpsw->regs->ptype);
 
                /* enable statistics collection only on all ports */
-               __raw_writel(0x7, &priv->regs->stat_port_en);
+               __raw_writel(0x7, &cpsw->regs->stat_port_en);
 
                /* Enable internal fifo flow control */
-               writel(0x7, &priv->regs->flow_control);
+               writel(0x7, &cpsw->regs->flow_control);
 
-               napi_enable(&priv_sl0->napi_rx);
-               napi_enable(&priv_sl0->napi_tx);
+               napi_enable(&cpsw->napi_rx);
+               napi_enable(&cpsw->napi_tx);
 
-               if (priv_sl0->tx_irq_disabled) {
-                       priv_sl0->tx_irq_disabled = false;
-                       enable_irq(priv->irqs_table[1]);
+               if (cpsw->tx_irq_disabled) {
+                       cpsw->tx_irq_disabled = false;
+                       enable_irq(cpsw->irqs_table[1]);
                }
 
-               if (priv_sl0->rx_irq_disabled) {
-                       priv_sl0->rx_irq_disabled = false;
-                       enable_irq(priv->irqs_table[0]);
+               if (cpsw->rx_irq_disabled) {
+                       cpsw->rx_irq_disabled = false;
+                       enable_irq(cpsw->irqs_table[0]);
                }
 
-               buf_num = cpdma_chan_get_rx_buf_num(priv->dma);
+               buf_num = cpdma_chan_get_rx_buf_num(cpsw->dma);
                for (i = 0; i < buf_num; i++) {
                        struct sk_buff *skb;
 
                        ret = -ENOMEM;
                        skb = __netdev_alloc_skb_ip_align(priv->ndev,
-                                       priv->rx_packet_max, GFP_KERNEL);
+                                       cpsw->rx_packet_max, GFP_KERNEL);
                        if (!skb)
                                goto err_cleanup;
-                       ret = cpdma_chan_submit(priv->rxch, skb, skb->data,
-                                       skb_tailroom(skb), 0);
+                       ret = cpdma_chan_submit(cpsw->rxch, skb, skb->data,
+                                               skb_tailroom(skb), 0);
                        if (ret < 0) {
                                kfree_skb(skb);
                                goto err_cleanup;
@@ -1333,32 +1315,32 @@ static int cpsw_ndo_open(struct net_device *ndev)
                 */
                cpsw_info(priv, ifup, "submitted %d rx descriptors\n", i);
 
-               if (cpts_register(&priv->pdev->dev, priv->cpts,
-                                 priv->data.cpts_clock_mult,
-                                 priv->data.cpts_clock_shift))
+               if (cpts_register(cpsw->dev, cpsw->cpts,
+                                 cpsw->data.cpts_clock_mult,
+                                 cpsw->data.cpts_clock_shift))
                        dev_err(priv->dev, "error registering cpts device\n");
 
        }
 
        /* Enable Interrupt pacing if configured */
-       if (priv->coal_intvl != 0) {
+       if (cpsw->coal_intvl != 0) {
                struct ethtool_coalesce coal;
 
-               coal.rx_coalesce_usecs = priv->coal_intvl;
+               coal.rx_coalesce_usecs = cpsw->coal_intvl;
                cpsw_set_coalesce(ndev, &coal);
        }
 
-       cpdma_ctlr_start(priv->dma);
-       cpsw_intr_enable(priv);
+       cpdma_ctlr_start(cpsw->dma);
+       cpsw_intr_enable(cpsw);
 
-       if (priv->data.dual_emac)
-               priv->slaves[priv->emac_port].open_stat = true;
+       if (cpsw->data.dual_emac)
+               cpsw->slaves[priv->emac_port].open_stat = true;
        return 0;
 
 err_cleanup:
-       cpdma_ctlr_stop(priv->dma);
-       for_each_slave(priv, cpsw_slave_stop, priv);
-       pm_runtime_put_sync(&priv->pdev->dev);
+       cpdma_ctlr_stop(cpsw->dma);
+       for_each_slave(priv, cpsw_slave_stop, cpsw);
+       pm_runtime_put_sync(cpsw->dev);
        netif_carrier_off(priv->ndev);
        return ret;
 }
@@ -1366,25 +1348,24 @@ err_cleanup:
 static int cpsw_ndo_stop(struct net_device *ndev)
 {
        struct cpsw_priv *priv = netdev_priv(ndev);
+       struct cpsw_common *cpsw = priv->cpsw;
 
        cpsw_info(priv, ifdown, "shutting down cpsw device\n");
        netif_stop_queue(priv->ndev);
        netif_carrier_off(priv->ndev);
 
-       if (cpsw_common_res_usage_state(priv) <= 1) {
-               struct cpsw_priv *priv_sl0 = cpsw_get_slave_priv(priv, 0);
-
-               napi_disable(&priv_sl0->napi_rx);
-               napi_disable(&priv_sl0->napi_tx);
-               cpts_unregister(priv->cpts);
-               cpsw_intr_disable(priv);
-               cpdma_ctlr_stop(priv->dma);
-               cpsw_ale_stop(priv->ale);
-       }
-       for_each_slave(priv, cpsw_slave_stop, priv);
-       pm_runtime_put_sync(&priv->pdev->dev);
-       if (priv->data.dual_emac)
-               priv->slaves[priv->emac_port].open_stat = false;
+       if (cpsw_common_res_usage_state(cpsw) <= 1) {
+               napi_disable(&cpsw->napi_rx);
+               napi_disable(&cpsw->napi_tx);
+               cpts_unregister(cpsw->cpts);
+               cpsw_intr_disable(cpsw);
+               cpdma_ctlr_stop(cpsw->dma);
+               cpsw_ale_stop(cpsw->ale);
+       }
+       for_each_slave(priv, cpsw_slave_stop, cpsw);
+       pm_runtime_put_sync(cpsw->dev);
+       if (cpsw->data.dual_emac)
+               cpsw->slaves[priv->emac_port].open_stat = false;
        return 0;
 }
 
@@ -1393,6 +1374,7 @@ static netdev_tx_t cpsw_ndo_start_xmit(struct sk_buff *skb,
 {
        struct cpsw_priv *priv = netdev_priv(ndev);
        int ret;
+       struct cpsw_common *cpsw = priv->cpsw;
 
        netif_trans_update(ndev);
 
@@ -1403,12 +1385,12 @@ static netdev_tx_t cpsw_ndo_start_xmit(struct sk_buff *skb,
        }
 
        if (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP &&
-                               priv->cpts->tx_enable)
+                               cpsw->cpts->tx_enable)
                skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
 
        skb_tx_timestamp(skb);
 
-       ret = cpsw_tx_packet_submit(ndev, priv, skb);
+       ret = cpsw_tx_packet_submit(priv, skb);
        if (unlikely(ret != 0)) {
                cpsw_err(priv, tx_err, "desc submit failed\n");
                goto fail;
@@ -1417,7 +1399,7 @@ static netdev_tx_t cpsw_ndo_start_xmit(struct sk_buff *skb,
        /* If there is no more tx desc left free then we need to
         * tell the kernel to stop sending us tx frames.
         */
-       if (unlikely(!cpdma_check_free_tx_desc(priv->txch)))
+       if (unlikely(!cpdma_check_free_tx_desc(cpsw->txch)))
                netif_stop_queue(ndev);
 
        return NETDEV_TX_OK;
@@ -1429,12 +1411,12 @@ fail:
 
 #ifdef CONFIG_TI_CPTS
 
-static void cpsw_hwtstamp_v1(struct cpsw_priv *priv)
+static void cpsw_hwtstamp_v1(struct cpsw_common *cpsw)
 {
-       struct cpsw_slave *slave = &priv->slaves[priv->data.active_slave];
+       struct cpsw_slave *slave = &cpsw->slaves[cpsw->data.active_slave];
        u32 ts_en, seq_id;
 
-       if (!priv->cpts->tx_enable && !priv->cpts->rx_enable) {
+       if (!cpsw->cpts->tx_enable && !cpsw->cpts->rx_enable) {
                slave_write(slave, 0, CPSW1_TS_CTL);
                return;
        }
@@ -1442,10 +1424,10 @@ static void cpsw_hwtstamp_v1(struct cpsw_priv *priv)
        seq_id = (30 << CPSW_V1_SEQ_ID_OFS_SHIFT) | ETH_P_1588;
        ts_en = EVENT_MSG_BITS << CPSW_V1_MSG_TYPE_OFS;
 
-       if (priv->cpts->tx_enable)
+       if (cpsw->cpts->tx_enable)
                ts_en |= CPSW_V1_TS_TX_EN;
 
-       if (priv->cpts->rx_enable)
+       if (cpsw->cpts->rx_enable)
                ts_en |= CPSW_V1_TS_RX_EN;
 
        slave_write(slave, ts_en, CPSW1_TS_CTL);
@@ -1455,32 +1437,33 @@ static void cpsw_hwtstamp_v1(struct cpsw_priv *priv)
 static void cpsw_hwtstamp_v2(struct cpsw_priv *priv)
 {
        struct cpsw_slave *slave;
+       struct cpsw_common *cpsw = priv->cpsw;
        u32 ctrl, mtype;
 
-       if (priv->data.dual_emac)
-               slave = &priv->slaves[priv->emac_port];
+       if (cpsw->data.dual_emac)
+               slave = &cpsw->slaves[priv->emac_port];
        else
-               slave = &priv->slaves[priv->data.active_slave];
+               slave = &cpsw->slaves[cpsw->data.active_slave];
 
        ctrl = slave_read(slave, CPSW2_CONTROL);
-       switch (priv->version) {
+       switch (cpsw->version) {
        case CPSW_VERSION_2:
                ctrl &= ~CTRL_V2_ALL_TS_MASK;
 
-               if (priv->cpts->tx_enable)
+               if (cpsw->cpts->tx_enable)
                        ctrl |= CTRL_V2_TX_TS_BITS;
 
-               if (priv->cpts->rx_enable)
+               if (cpsw->cpts->rx_enable)
                        ctrl |= CTRL_V2_RX_TS_BITS;
                break;
        case CPSW_VERSION_3:
        default:
                ctrl &= ~CTRL_V3_ALL_TS_MASK;
 
-               if (priv->cpts->tx_enable)
+               if (cpsw->cpts->tx_enable)
                        ctrl |= CTRL_V3_TX_TS_BITS;
 
-               if (priv->cpts->rx_enable)
+               if (cpsw->cpts->rx_enable)
                        ctrl |= CTRL_V3_RX_TS_BITS;
                break;
        }
@@ -1489,18 +1472,19 @@ static void cpsw_hwtstamp_v2(struct cpsw_priv *priv)
 
        slave_write(slave, mtype, CPSW2_TS_SEQ_MTYPE);
        slave_write(slave, ctrl, CPSW2_CONTROL);
-       __raw_writel(ETH_P_1588, &priv->regs->ts_ltype);
+       __raw_writel(ETH_P_1588, &cpsw->regs->ts_ltype);
 }
 
 static int cpsw_hwtstamp_set(struct net_device *dev, struct ifreq *ifr)
 {
        struct cpsw_priv *priv = netdev_priv(dev);
-       struct cpts *cpts = priv->cpts;
        struct hwtstamp_config cfg;
+       struct cpsw_common *cpsw = priv->cpsw;
+       struct cpts *cpts = cpsw->cpts;
 
-       if (priv->version != CPSW_VERSION_1 &&
-           priv->version != CPSW_VERSION_2 &&
-           priv->version != CPSW_VERSION_3)
+       if (cpsw->version != CPSW_VERSION_1 &&
+           cpsw->version != CPSW_VERSION_2 &&
+           cpsw->version != CPSW_VERSION_3)
                return -EOPNOTSUPP;
 
        if (copy_from_user(&cfg, ifr->ifr_data, sizeof(cfg)))
@@ -1540,9 +1524,9 @@ static int cpsw_hwtstamp_set(struct net_device *dev, struct ifreq *ifr)
 
        cpts->tx_enable = cfg.tx_type == HWTSTAMP_TX_ON;
 
-       switch (priv->version) {
+       switch (cpsw->version) {
        case CPSW_VERSION_1:
-               cpsw_hwtstamp_v1(priv);
+               cpsw_hwtstamp_v1(cpsw);
                break;
        case CPSW_VERSION_2:
        case CPSW_VERSION_3:
@@ -1557,13 +1541,13 @@ static int cpsw_hwtstamp_set(struct net_device *dev, struct ifreq *ifr)
 
 static int cpsw_hwtstamp_get(struct net_device *dev, struct ifreq *ifr)
 {
-       struct cpsw_priv *priv = netdev_priv(dev);
-       struct cpts *cpts = priv->cpts;
+       struct cpsw_common *cpsw = ndev_to_cpsw(dev);
+       struct cpts *cpts = cpsw->cpts;
        struct hwtstamp_config cfg;
 
-       if (priv->version != CPSW_VERSION_1 &&
-           priv->version != CPSW_VERSION_2 &&
-           priv->version != CPSW_VERSION_3)
+       if (cpsw->version != CPSW_VERSION_1 &&
+           cpsw->version != CPSW_VERSION_2 &&
+           cpsw->version != CPSW_VERSION_3)
                return -EOPNOTSUPP;
 
        cfg.flags = 0;
@@ -1579,7 +1563,8 @@ static int cpsw_hwtstamp_get(struct net_device *dev, struct ifreq *ifr)
 static int cpsw_ndo_ioctl(struct net_device *dev, struct ifreq *req, int cmd)
 {
        struct cpsw_priv *priv = netdev_priv(dev);
-       int slave_no = cpsw_slave_index(priv);
+       struct cpsw_common *cpsw = priv->cpsw;
+       int slave_no = cpsw_slave_index(cpsw, priv);
 
        if (!netif_running(dev))
                return -EINVAL;
@@ -1593,27 +1578,29 @@ static int cpsw_ndo_ioctl(struct net_device *dev, struct ifreq *req, int cmd)
 #endif
        }
 
-       if (!priv->slaves[slave_no].phy)
+       if (!cpsw->slaves[slave_no].phy)
                return -EOPNOTSUPP;
-       return phy_mii_ioctl(priv->slaves[slave_no].phy, req, cmd);
+       return phy_mii_ioctl(cpsw->slaves[slave_no].phy, req, cmd);
 }
 
 static void cpsw_ndo_tx_timeout(struct net_device *ndev)
 {
        struct cpsw_priv *priv = netdev_priv(ndev);
+       struct cpsw_common *cpsw = priv->cpsw;
 
        cpsw_err(priv, tx_err, "transmit timeout, restarting dma\n");
        ndev->stats.tx_errors++;
-       cpsw_intr_disable(priv);
-       cpdma_chan_stop(priv->txch);
-       cpdma_chan_start(priv->txch);
-       cpsw_intr_enable(priv);
+       cpsw_intr_disable(cpsw);
+       cpdma_chan_stop(cpsw->txch);
+       cpdma_chan_start(cpsw->txch);
+       cpsw_intr_enable(cpsw);
 }
 
 static int cpsw_ndo_set_mac_address(struct net_device *ndev, void *p)
 {
        struct cpsw_priv *priv = netdev_priv(ndev);
        struct sockaddr *addr = (struct sockaddr *)p;
+       struct cpsw_common *cpsw = priv->cpsw;
        int flags = 0;
        u16 vid = 0;
        int ret;
@@ -1621,27 +1608,27 @@ static int cpsw_ndo_set_mac_address(struct net_device *ndev, void *p)
        if (!is_valid_ether_addr(addr->sa_data))
                return -EADDRNOTAVAIL;
 
-       ret = pm_runtime_get_sync(&priv->pdev->dev);
+       ret = pm_runtime_get_sync(cpsw->dev);
        if (ret < 0) {
-               pm_runtime_put_noidle(&priv->pdev->dev);
+               pm_runtime_put_noidle(cpsw->dev);
                return ret;
        }
 
-       if (priv->data.dual_emac) {
-               vid = priv->slaves[priv->emac_port].port_vlan;
+       if (cpsw->data.dual_emac) {
+               vid = cpsw->slaves[priv->emac_port].port_vlan;
                flags = ALE_VLAN;
        }
 
-       cpsw_ale_del_ucast(priv->ale, priv->mac_addr, HOST_PORT_NUM,
+       cpsw_ale_del_ucast(cpsw->ale, priv->mac_addr, HOST_PORT_NUM,
                           flags, vid);
-       cpsw_ale_add_ucast(priv->ale, addr->sa_data, HOST_PORT_NUM,
+       cpsw_ale_add_ucast(cpsw->ale, addr->sa_data, HOST_PORT_NUM,
                           flags, vid);
 
        memcpy(priv->mac_addr, addr->sa_data, ETH_ALEN);
        memcpy(ndev->dev_addr, priv->mac_addr, ETH_ALEN);
        for_each_slave(priv, cpsw_set_slave_mac, priv);
 
-       pm_runtime_put(&priv->pdev->dev);
+       pm_runtime_put(cpsw->dev);
 
        return 0;
 }
@@ -1649,12 +1636,12 @@ static int cpsw_ndo_set_mac_address(struct net_device *ndev, void *p)
 #ifdef CONFIG_NET_POLL_CONTROLLER
 static void cpsw_ndo_poll_controller(struct net_device *ndev)
 {
-       struct cpsw_priv *priv = netdev_priv(ndev);
+       struct cpsw_common *cpsw = ndev_to_cpsw(ndev);
 
-       cpsw_intr_disable(priv);
-       cpsw_rx_interrupt(priv->irqs_table[0], priv);
-       cpsw_tx_interrupt(priv->irqs_table[1], priv);
-       cpsw_intr_enable(priv);
+       cpsw_intr_disable(cpsw);
+       cpsw_rx_interrupt(cpsw->irqs_table[0], cpsw);
+       cpsw_tx_interrupt(cpsw->irqs_table[1], cpsw);
+       cpsw_intr_enable(cpsw);
 }
 #endif
 
@@ -1664,8 +1651,9 @@ static inline int cpsw_add_vlan_ale_entry(struct cpsw_priv *priv,
        int ret;
        int unreg_mcast_mask = 0;
        u32 port_mask;
+       struct cpsw_common *cpsw = priv->cpsw;
 
-       if (priv->data.dual_emac) {
+       if (cpsw->data.dual_emac) {
                port_mask = (1 << (priv->emac_port + 1)) | ALE_PORT_HOST;
 
                if (priv->ndev->flags & IFF_ALLMULTI)
@@ -1679,27 +1667,27 @@ static inline int cpsw_add_vlan_ale_entry(struct cpsw_priv *priv,
                        unreg_mcast_mask = ALE_PORT_1 | ALE_PORT_2;
        }
 
-       ret = cpsw_ale_add_vlan(priv->ale, vid, port_mask, 0, port_mask,
+       ret = cpsw_ale_add_vlan(cpsw->ale, vid, port_mask, 0, port_mask,
                                unreg_mcast_mask);
        if (ret != 0)
                return ret;
 
-       ret = cpsw_ale_add_ucast(priv->ale, priv->mac_addr,
+       ret = cpsw_ale_add_ucast(cpsw->ale, priv->mac_addr,
                                 HOST_PORT_NUM, ALE_VLAN, vid);
        if (ret != 0)
                goto clean_vid;
 
-       ret = cpsw_ale_add_mcast(priv->ale, priv->ndev->broadcast,
+       ret = cpsw_ale_add_mcast(cpsw->ale, priv->ndev->broadcast,
                                 port_mask, ALE_VLAN, vid, 0);
        if (ret != 0)
                goto clean_vlan_ucast;
        return 0;
 
 clean_vlan_ucast:
-       cpsw_ale_del_ucast(priv->ale, priv->mac_addr,
+       cpsw_ale_del_ucast(cpsw->ale, priv->mac_addr,
                           HOST_PORT_NUM, ALE_VLAN, vid);
 clean_vid:
-       cpsw_ale_del_vlan(priv->ale, vid, 0);
+       cpsw_ale_del_vlan(cpsw->ale, vid, 0);
        return ret;
 }
 
@@ -1707,26 +1695,27 @@ static int cpsw_ndo_vlan_rx_add_vid(struct net_device *ndev,
                                    __be16 proto, u16 vid)
 {
        struct cpsw_priv *priv = netdev_priv(ndev);
+       struct cpsw_common *cpsw = priv->cpsw;
        int ret;
 
-       if (vid == priv->data.default_vlan)
+       if (vid == cpsw->data.default_vlan)
                return 0;
 
-       ret = pm_runtime_get_sync(&priv->pdev->dev);
+       ret = pm_runtime_get_sync(cpsw->dev);
        if (ret < 0) {
-               pm_runtime_put_noidle(&priv->pdev->dev);
+               pm_runtime_put_noidle(cpsw->dev);
                return ret;
        }
 
-       if (priv->data.dual_emac) {
+       if (cpsw->data.dual_emac) {
                /* In dual EMAC, reserved VLAN id should not be used for
                 * creating VLAN interfaces as this can break the dual
                 * EMAC port separation
                 */
                int i;
 
-               for (i = 0; i < priv->data.slaves; i++) {
-                       if (vid == priv->slaves[i].port_vlan)
+               for (i = 0; i < cpsw->data.slaves; i++) {
+                       if (vid == cpsw->slaves[i].port_vlan)
                                return -EINVAL;
                }
        }
@@ -1734,7 +1723,7 @@ static int cpsw_ndo_vlan_rx_add_vid(struct net_device *ndev,
        dev_info(priv->dev, "Adding vlanid %d to vlan filter\n", vid);
        ret = cpsw_add_vlan_ale_entry(priv, vid);
 
-       pm_runtime_put(&priv->pdev->dev);
+       pm_runtime_put(cpsw->dev);
        return ret;
 }
 
@@ -1742,39 +1731,40 @@ static int cpsw_ndo_vlan_rx_kill_vid(struct net_device *ndev,
                                     __be16 proto, u16 vid)
 {
        struct cpsw_priv *priv = netdev_priv(ndev);
+       struct cpsw_common *cpsw = priv->cpsw;
        int ret;
 
-       if (vid == priv->data.default_vlan)
+       if (vid == cpsw->data.default_vlan)
                return 0;
 
-       ret = pm_runtime_get_sync(&priv->pdev->dev);
+       ret = pm_runtime_get_sync(cpsw->dev);
        if (ret < 0) {
-               pm_runtime_put_noidle(&priv->pdev->dev);
+               pm_runtime_put_noidle(cpsw->dev);
                return ret;
        }
 
-       if (priv->data.dual_emac) {
+       if (cpsw->data.dual_emac) {
                int i;
 
-               for (i = 0; i < priv->data.slaves; i++) {
-                       if (vid == priv->slaves[i].port_vlan)
+               for (i = 0; i < cpsw->data.slaves; i++) {
+                       if (vid == cpsw->slaves[i].port_vlan)
                                return -EINVAL;
                }
        }
 
        dev_info(priv->dev, "removing vlanid %d from vlan filter\n", vid);
-       ret = cpsw_ale_del_vlan(priv->ale, vid, 0);
+       ret = cpsw_ale_del_vlan(cpsw->ale, vid, 0);
        if (ret != 0)
                return ret;
 
-       ret = cpsw_ale_del_ucast(priv->ale, priv->mac_addr,
+       ret = cpsw_ale_del_ucast(cpsw->ale, priv->mac_addr,
                                 HOST_PORT_NUM, ALE_VLAN, vid);
        if (ret != 0)
                return ret;
 
-       ret = cpsw_ale_del_mcast(priv->ale, priv->ndev->broadcast,
+       ret = cpsw_ale_del_mcast(cpsw->ale, priv->ndev->broadcast,
                                 0, ALE_VLAN, vid);
-       pm_runtime_put(&priv->pdev->dev);
+       pm_runtime_put(cpsw->dev);
        return ret;
 }
 
@@ -1797,31 +1787,32 @@ static const struct net_device_ops cpsw_netdev_ops = {
 
 static int cpsw_get_regs_len(struct net_device *ndev)
 {
-       struct cpsw_priv *priv = netdev_priv(ndev);
+       struct cpsw_common *cpsw = ndev_to_cpsw(ndev);
 
-       return priv->data.ale_entries * ALE_ENTRY_WORDS * sizeof(u32);
+       return cpsw->data.ale_entries * ALE_ENTRY_WORDS * sizeof(u32);
 }
 
 static void cpsw_get_regs(struct net_device *ndev,
                          struct ethtool_regs *regs, void *p)
 {
-       struct cpsw_priv *priv = netdev_priv(ndev);
        u32 *reg = p;
+       struct cpsw_common *cpsw = ndev_to_cpsw(ndev);
 
        /* update CPSW IP version */
-       regs->version = priv->version;
+       regs->version = cpsw->version;
 
-       cpsw_ale_dump(priv->ale, reg);
+       cpsw_ale_dump(cpsw->ale, reg);
 }
 
 static void cpsw_get_drvinfo(struct net_device *ndev,
                             struct ethtool_drvinfo *info)
 {
-       struct cpsw_priv *priv = netdev_priv(ndev);
+       struct cpsw_common *cpsw = ndev_to_cpsw(ndev);
+       struct platform_device  *pdev = to_platform_device(cpsw->dev);
 
        strlcpy(info->driver, "cpsw", sizeof(info->driver));
        strlcpy(info->version, "1.0", sizeof(info->version));
-       strlcpy(info->bus_info, priv->pdev->name, sizeof(info->bus_info));
+       strlcpy(info->bus_info, pdev->name, sizeof(info->bus_info));
 }
 
 static u32 cpsw_get_msglevel(struct net_device *ndev)
@@ -1840,7 +1831,7 @@ static int cpsw_get_ts_info(struct net_device *ndev,
                            struct ethtool_ts_info *info)
 {
 #ifdef CONFIG_TI_CPTS
-       struct cpsw_priv *priv = netdev_priv(ndev);
+       struct cpsw_common *cpsw = ndev_to_cpsw(ndev);
 
        info->so_timestamping =
                SOF_TIMESTAMPING_TX_HARDWARE |
@@ -1849,7 +1840,7 @@ static int cpsw_get_ts_info(struct net_device *ndev,
                SOF_TIMESTAMPING_RX_SOFTWARE |
                SOF_TIMESTAMPING_SOFTWARE |
                SOF_TIMESTAMPING_RAW_HARDWARE;
-       info->phc_index = priv->cpts->phc_index;
+       info->phc_index = cpsw->cpts->phc_index;
        info->tx_types =
                (1 << HWTSTAMP_TX_OFF) |
                (1 << HWTSTAMP_TX_ON);
@@ -1872,10 +1863,11 @@ static int cpsw_get_settings(struct net_device *ndev,
                             struct ethtool_cmd *ecmd)
 {
        struct cpsw_priv *priv = netdev_priv(ndev);
-       int slave_no = cpsw_slave_index(priv);
+       struct cpsw_common *cpsw = priv->cpsw;
+       int slave_no = cpsw_slave_index(cpsw, priv);
 
-       if (priv->slaves[slave_no].phy)
-               return phy_ethtool_gset(priv->slaves[slave_no].phy, ecmd);
+       if (cpsw->slaves[slave_no].phy)
+               return phy_ethtool_gset(cpsw->slaves[slave_no].phy, ecmd);
        else
                return -EOPNOTSUPP;
 }
@@ -1883,10 +1875,11 @@ static int cpsw_get_settings(struct net_device *ndev,
 static int cpsw_set_settings(struct net_device *ndev, struct ethtool_cmd *ecmd)
 {
        struct cpsw_priv *priv = netdev_priv(ndev);
-       int slave_no = cpsw_slave_index(priv);
+       struct cpsw_common *cpsw = priv->cpsw;
+       int slave_no = cpsw_slave_index(cpsw, priv);
 
-       if (priv->slaves[slave_no].phy)
-               return phy_ethtool_sset(priv->slaves[slave_no].phy, ecmd);
+       if (cpsw->slaves[slave_no].phy)
+               return phy_ethtool_sset(cpsw->slaves[slave_no].phy, ecmd);
        else
                return -EOPNOTSUPP;
 }
@@ -1894,22 +1887,24 @@ static int cpsw_set_settings(struct net_device *ndev, struct ethtool_cmd *ecmd)
 static void cpsw_get_wol(struct net_device *ndev, struct ethtool_wolinfo *wol)
 {
        struct cpsw_priv *priv = netdev_priv(ndev);
-       int slave_no = cpsw_slave_index(priv);
+       struct cpsw_common *cpsw = priv->cpsw;
+       int slave_no = cpsw_slave_index(cpsw, priv);
 
        wol->supported = 0;
        wol->wolopts = 0;
 
-       if (priv->slaves[slave_no].phy)
-               phy_ethtool_get_wol(priv->slaves[slave_no].phy, wol);
+       if (cpsw->slaves[slave_no].phy)
+               phy_ethtool_get_wol(cpsw->slaves[slave_no].phy, wol);
 }
 
 static int cpsw_set_wol(struct net_device *ndev, struct ethtool_wolinfo *wol)
 {
        struct cpsw_priv *priv = netdev_priv(ndev);
-       int slave_no = cpsw_slave_index(priv);
+       struct cpsw_common *cpsw = priv->cpsw;
+       int slave_no = cpsw_slave_index(cpsw, priv);
 
-       if (priv->slaves[slave_no].phy)
-               return phy_ethtool_set_wol(priv->slaves[slave_no].phy, wol);
+       if (cpsw->slaves[slave_no].phy)
+               return phy_ethtool_set_wol(cpsw->slaves[slave_no].phy, wol);
        else
                return -EOPNOTSUPP;
 }
@@ -1940,12 +1935,13 @@ static int cpsw_set_pauseparam(struct net_device *ndev,
 static int cpsw_ethtool_op_begin(struct net_device *ndev)
 {
        struct cpsw_priv *priv = netdev_priv(ndev);
+       struct cpsw_common *cpsw = priv->cpsw;
        int ret;
 
-       ret = pm_runtime_get_sync(&priv->pdev->dev);
+       ret = pm_runtime_get_sync(cpsw->dev);
        if (ret < 0) {
                cpsw_err(priv, drv, "ethtool begin failed %d\n", ret);
-               pm_runtime_put_noidle(&priv->pdev->dev);
+               pm_runtime_put_noidle(cpsw->dev);
        }
 
        return ret;
@@ -1956,7 +1952,7 @@ static void cpsw_ethtool_op_complete(struct net_device *ndev)
        struct cpsw_priv *priv = netdev_priv(ndev);
        int ret;
 
-       ret = pm_runtime_put(&priv->pdev->dev);
+       ret = pm_runtime_put(priv->cpsw->dev);
        if (ret < 0)
                cpsw_err(priv, drv, "ethtool complete failed %d\n", ret);
 }
@@ -1984,12 +1980,12 @@ static const struct ethtool_ops cpsw_ethtool_ops = {
        .complete       = cpsw_ethtool_op_complete,
 };
 
-static void cpsw_slave_init(struct cpsw_slave *slave, struct cpsw_priv *priv,
+static void cpsw_slave_init(struct cpsw_slave *slave, struct cpsw_common *cpsw,
                            u32 slave_reg_ofs, u32 sliver_reg_ofs)
 {
-       void __iomem            *regs = priv->regs;
+       void __iomem            *regs = cpsw->regs;
        int                     slave_num = slave->slave_num;
-       struct cpsw_slave_data  *data = priv->data.slave_data + slave_num;
+       struct cpsw_slave_data  *data = cpsw->data.slave_data + slave_num;
 
        slave->data     = data;
        slave->regs     = regs + slave_reg_ofs;
@@ -2160,71 +2156,50 @@ no_phy_slave:
        return 0;
 }
 
-static int cpsw_probe_dual_emac(struct platform_device *pdev,
-                               struct cpsw_priv *priv)
+static int cpsw_probe_dual_emac(struct cpsw_priv *priv)
 {
-       struct cpsw_platform_data       *data = &priv->data;
+       struct cpsw_common              *cpsw = priv->cpsw;
+       struct cpsw_platform_data       *data = &cpsw->data;
        struct net_device               *ndev;
        struct cpsw_priv                *priv_sl2;
-       int ret = 0, i;
+       int ret = 0;
 
        ndev = alloc_etherdev(sizeof(struct cpsw_priv));
        if (!ndev) {
-               dev_err(&pdev->dev, "cpsw: error allocating net_device\n");
+               dev_err(cpsw->dev, "cpsw: error allocating net_device\n");
                return -ENOMEM;
        }
 
        priv_sl2 = netdev_priv(ndev);
-       priv_sl2->data = *data;
-       priv_sl2->pdev = pdev;
+       priv_sl2->cpsw = cpsw;
        priv_sl2->ndev = ndev;
        priv_sl2->dev  = &ndev->dev;
        priv_sl2->msg_enable = netif_msg_init(debug_level, CPSW_DEBUG);
-       priv_sl2->rx_packet_max = max(rx_packet_max, 128);
 
        if (is_valid_ether_addr(data->slave_data[1].mac_addr)) {
                memcpy(priv_sl2->mac_addr, data->slave_data[1].mac_addr,
                        ETH_ALEN);
-               dev_info(&pdev->dev, "cpsw: Detected MACID = %pM\n", priv_sl2->mac_addr);
+               dev_info(cpsw->dev, "cpsw: Detected MACID = %pM\n",
+                        priv_sl2->mac_addr);
        } else {
                random_ether_addr(priv_sl2->mac_addr);
-               dev_info(&pdev->dev, "cpsw: Random MACID = %pM\n", priv_sl2->mac_addr);
+               dev_info(cpsw->dev, "cpsw: Random MACID = %pM\n",
+                        priv_sl2->mac_addr);
        }
        memcpy(ndev->dev_addr, priv_sl2->mac_addr, ETH_ALEN);
 
-       priv_sl2->slaves = priv->slaves;
-       priv_sl2->clk = priv->clk;
-
-       priv_sl2->coal_intvl = 0;
-       priv_sl2->bus_freq_mhz = priv->bus_freq_mhz;
-
-       priv_sl2->regs = priv->regs;
-       priv_sl2->host_port_regs = priv->host_port_regs;
-       priv_sl2->wr_regs = priv->wr_regs;
-       priv_sl2->hw_stats = priv->hw_stats;
-       priv_sl2->dma = priv->dma;
-       priv_sl2->txch = priv->txch;
-       priv_sl2->rxch = priv->rxch;
-       priv_sl2->ale = priv->ale;
        priv_sl2->emac_port = 1;
-       priv->slaves[1].ndev = ndev;
-       priv_sl2->cpts = priv->cpts;
-       priv_sl2->version = priv->version;
-
-       for (i = 0; i < priv->num_irqs; i++) {
-               priv_sl2->irqs_table[i] = priv->irqs_table[i];
-               priv_sl2->num_irqs = priv->num_irqs;
-       }
+       cpsw->slaves[1].ndev = ndev;
        ndev->features |= NETIF_F_HW_VLAN_CTAG_FILTER;
 
        ndev->netdev_ops = &cpsw_netdev_ops;
        ndev->ethtool_ops = &cpsw_ethtool_ops;
 
        /* register the network device */
-       SET_NETDEV_DEV(ndev, &pdev->dev);
+       SET_NETDEV_DEV(ndev, cpsw->dev);
        ret = register_netdev(ndev);
        if (ret) {
-               dev_err(&pdev->dev, "cpsw: error registering net device\n");
+               dev_err(cpsw->dev, "cpsw: error registering net device\n");
                free_netdev(ndev);
                ret = -ENODEV;
        }
@@ -2272,6 +2247,7 @@ MODULE_DEVICE_TABLE(of, cpsw_of_mtable);
 
 static int cpsw_probe(struct platform_device *pdev)
 {
+       struct clk                      *clk;
        struct cpsw_platform_data       *data;
        struct net_device               *ndev;
        struct cpsw_priv                *priv;
@@ -2282,9 +2258,13 @@ static int cpsw_probe(struct platform_device *pdev)
        const struct of_device_id       *of_id;
        struct gpio_descs               *mode;
        u32 slave_offset, sliver_offset, slave_size;
+       struct cpsw_common              *cpsw;
        int ret = 0, i;
        int irq;
 
+       cpsw = devm_kzalloc(&pdev->dev, sizeof(struct cpsw_common), GFP_KERNEL);
+       cpsw->dev = &pdev->dev;
+
        ndev = alloc_etherdev(sizeof(struct cpsw_priv));
        if (!ndev) {
                dev_err(&pdev->dev, "error allocating net_device\n");
@@ -2293,13 +2273,13 @@ static int cpsw_probe(struct platform_device *pdev)
 
        platform_set_drvdata(pdev, ndev);
        priv = netdev_priv(ndev);
-       priv->pdev = pdev;
+       priv->cpsw = cpsw;
        priv->ndev = ndev;
        priv->dev  = &ndev->dev;
        priv->msg_enable = netif_msg_init(debug_level, CPSW_DEBUG);
-       priv->rx_packet_max = max(rx_packet_max, 128);
-       priv->cpts = devm_kzalloc(&pdev->dev, sizeof(struct cpts), GFP_KERNEL);
-       if (!priv->cpts) {
+       cpsw->rx_packet_max = max(rx_packet_max, 128);
+       cpsw->cpts = devm_kzalloc(&pdev->dev, sizeof(struct cpts), GFP_KERNEL);
+       if (!cpsw->cpts) {
                dev_err(&pdev->dev, "error allocating cpts\n");
                ret = -ENOMEM;
                goto clean_ndev_ret;
@@ -2320,12 +2300,12 @@ static int cpsw_probe(struct platform_device *pdev)
        /* Select default pin state */
        pinctrl_pm_select_default_state(&pdev->dev);
 
-       if (cpsw_probe_dt(&priv->data, pdev)) {
+       if (cpsw_probe_dt(&cpsw->data, pdev)) {
                dev_err(&pdev->dev, "cpsw: platform data missing\n");
                ret = -ENODEV;
                goto clean_runtime_disable_ret;
        }
-       data = &priv->data;
+       data = &cpsw->data;
 
        if (is_valid_ether_addr(data->slave_data[0].mac_addr)) {
                memcpy(priv->mac_addr, data->slave_data[0].mac_addr, ETH_ALEN);
@@ -2337,27 +2317,26 @@ static int cpsw_probe(struct platform_device *pdev)
 
        memcpy(ndev->dev_addr, priv->mac_addr, ETH_ALEN);
 
-       priv->slaves = devm_kzalloc(&pdev->dev,
+       cpsw->slaves = devm_kzalloc(&pdev->dev,
                                    sizeof(struct cpsw_slave) * data->slaves,
                                    GFP_KERNEL);
-       if (!priv->slaves) {
+       if (!cpsw->slaves) {
                ret = -ENOMEM;
                goto clean_runtime_disable_ret;
        }
        for (i = 0; i < data->slaves; i++)
-               priv->slaves[i].slave_num = i;
+               cpsw->slaves[i].slave_num = i;
 
-       priv->slaves[0].ndev = ndev;
+       cpsw->slaves[0].ndev = ndev;
        priv->emac_port = 0;
 
-       priv->clk = devm_clk_get(&pdev->dev, "fck");
-       if (IS_ERR(priv->clk)) {
+       clk = devm_clk_get(&pdev->dev, "fck");
+       if (IS_ERR(clk)) {
                dev_err(priv->dev, "fck is not found\n");
                ret = -ENODEV;
                goto clean_runtime_disable_ret;
        }
-       priv->coal_intvl = 0;
-       priv->bus_freq_mhz = clk_get_rate(priv->clk) / 1000000;
+       cpsw->bus_freq_mhz = clk_get_rate(clk) / 1000000;
 
        ss_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
        ss_regs = devm_ioremap_resource(&pdev->dev, ss_res);
@@ -2365,7 +2344,7 @@ static int cpsw_probe(struct platform_device *pdev)
                ret = PTR_ERR(ss_regs);
                goto clean_runtime_disable_ret;
        }
-       priv->regs = ss_regs;
+       cpsw->regs = ss_regs;
 
        /* Need to enable clocks with runtime PM api to access module
         * registers
@@ -2375,24 +2354,24 @@ static int cpsw_probe(struct platform_device *pdev)
                pm_runtime_put_noidle(&pdev->dev);
                goto clean_runtime_disable_ret;
        }
-       priv->version = readl(&priv->regs->id_ver);
+       cpsw->version = readl(&cpsw->regs->id_ver);
        pm_runtime_put_sync(&pdev->dev);
 
        res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
-       priv->wr_regs = devm_ioremap_resource(&pdev->dev, res);
-       if (IS_ERR(priv->wr_regs)) {
-               ret = PTR_ERR(priv->wr_regs);
+       cpsw->wr_regs = devm_ioremap_resource(&pdev->dev, res);
+       if (IS_ERR(cpsw->wr_regs)) {
+               ret = PTR_ERR(cpsw->wr_regs);
                goto clean_runtime_disable_ret;
        }
 
        memset(&dma_params, 0, sizeof(dma_params));
        memset(&ale_params, 0, sizeof(ale_params));
 
-       switch (priv->version) {
+       switch (cpsw->version) {
        case CPSW_VERSION_1:
-               priv->host_port_regs = ss_regs + CPSW1_HOST_PORT_OFFSET;
-               priv->cpts->reg      = ss_regs + CPSW1_CPTS_OFFSET;
-               priv->hw_stats       = ss_regs + CPSW1_HW_STATS;
+               cpsw->host_port_regs = ss_regs + CPSW1_HOST_PORT_OFFSET;
+               cpsw->cpts->reg      = ss_regs + CPSW1_CPTS_OFFSET;
+               cpsw->hw_stats       = ss_regs + CPSW1_HW_STATS;
                dma_params.dmaregs   = ss_regs + CPSW1_CPDMA_OFFSET;
                dma_params.txhdp     = ss_regs + CPSW1_STATERAM_OFFSET;
                ale_params.ale_regs  = ss_regs + CPSW1_ALE_OFFSET;
@@ -2404,9 +2383,9 @@ static int cpsw_probe(struct platform_device *pdev)
        case CPSW_VERSION_2:
        case CPSW_VERSION_3:
        case CPSW_VERSION_4:
-               priv->host_port_regs = ss_regs + CPSW2_HOST_PORT_OFFSET;
-               priv->cpts->reg      = ss_regs + CPSW2_CPTS_OFFSET;
-               priv->hw_stats       = ss_regs + CPSW2_HW_STATS;
+               cpsw->host_port_regs = ss_regs + CPSW2_HOST_PORT_OFFSET;
+               cpsw->cpts->reg      = ss_regs + CPSW2_CPTS_OFFSET;
+               cpsw->hw_stats       = ss_regs + CPSW2_HW_STATS;
                dma_params.dmaregs   = ss_regs + CPSW2_CPDMA_OFFSET;
                dma_params.txhdp     = ss_regs + CPSW2_STATERAM_OFFSET;
                ale_params.ale_regs  = ss_regs + CPSW2_ALE_OFFSET;
@@ -2417,13 +2396,14 @@ static int cpsw_probe(struct platform_device *pdev)
                        (u32 __force) ss_res->start + CPSW2_BD_OFFSET;
                break;
        default:
-               dev_err(priv->dev, "unknown version 0x%08x\n", priv->version);
+               dev_err(priv->dev, "unknown version 0x%08x\n", cpsw->version);
                ret = -ENODEV;
                goto clean_runtime_disable_ret;
        }
-       for (i = 0; i < priv->data.slaves; i++) {
-               struct cpsw_slave *slave = &priv->slaves[i];
-               cpsw_slave_init(slave, priv, slave_offset, sliver_offset);
+       for (i = 0; i < cpsw->data.slaves; i++) {
+               struct cpsw_slave *slave = &cpsw->slaves[i];
+
+               cpsw_slave_init(slave, cpsw, slave_offset, sliver_offset);
                slave_offset  += slave_size;
                sliver_offset += SLIVER_SIZE;
        }
@@ -2443,19 +2423,19 @@ static int cpsw_probe(struct platform_device *pdev)
        dma_params.has_ext_regs         = true;
        dma_params.desc_hw_addr         = dma_params.desc_mem_phys;
 
-       priv->dma = cpdma_ctlr_create(&dma_params);
-       if (!priv->dma) {
+       cpsw->dma = cpdma_ctlr_create(&dma_params);
+       if (!cpsw->dma) {
                dev_err(priv->dev, "error initializing dma\n");
                ret = -ENOMEM;
                goto clean_runtime_disable_ret;
        }
 
-       priv->txch = cpdma_chan_create(priv->dma, tx_chan_num(0),
+       cpsw->txch = cpdma_chan_create(cpsw->dma, tx_chan_num(0),
                                       cpsw_tx_handler);
-       priv->rxch = cpdma_chan_create(priv->dma, rx_chan_num(0),
+       cpsw->rxch = cpdma_chan_create(cpsw->dma, rx_chan_num(0),
                                       cpsw_rx_handler);
 
-       if (WARN_ON(!priv->txch || !priv->rxch)) {
+       if (WARN_ON(!cpsw->txch || !cpsw->rxch)) {
                dev_err(priv->dev, "error initializing dma channels\n");
                ret = -ENOMEM;
                goto clean_dma_ret;
@@ -2466,8 +2446,8 @@ static int cpsw_probe(struct platform_device *pdev)
        ale_params.ale_entries          = data->ale_entries;
        ale_params.ale_ports            = data->slaves;
 
-       priv->ale = cpsw_ale_create(&ale_params);
-       if (!priv->ale) {
+       cpsw->ale = cpsw_ale_create(&ale_params);
+       if (!cpsw->ale) {
                dev_err(priv->dev, "error initializing ale engine\n");
                ret = -ENODEV;
                goto clean_dma_ret;
@@ -2484,7 +2464,7 @@ static int cpsw_probe(struct platform_device *pdev)
        if (of_id) {
                pdev->id_entry = of_id->data;
                if (pdev->id_entry->driver_data)
-                       priv->quirk_irq = true;
+                       cpsw->quirk_irq = true;
        }
 
        /* Grab RX and TX IRQs. Note that we also have RX_THRESHOLD and
@@ -2502,9 +2482,9 @@ static int cpsw_probe(struct platform_device *pdev)
                goto clean_ale_ret;
        }
 
-       priv->irqs_table[0] = irq;
+       cpsw->irqs_table[0] = irq;
        ret = devm_request_irq(&pdev->dev, irq, cpsw_rx_interrupt,
-                              0, dev_name(&pdev->dev), priv);
+                              0, dev_name(&pdev->dev), cpsw);
        if (ret < 0) {
                dev_err(priv->dev, "error attaching irq (%d)\n", ret);
                goto clean_ale_ret;
@@ -2517,21 +2497,20 @@ static int cpsw_probe(struct platform_device *pdev)
                goto clean_ale_ret;
        }
 
-       priv->irqs_table[1] = irq;
+       cpsw->irqs_table[1] = irq;
        ret = devm_request_irq(&pdev->dev, irq, cpsw_tx_interrupt,
-                              0, dev_name(&pdev->dev), priv);
+                              0, dev_name(&pdev->dev), cpsw);
        if (ret < 0) {
                dev_err(priv->dev, "error attaching irq (%d)\n", ret);
                goto clean_ale_ret;
        }
-       priv->num_irqs = 2;
 
        ndev->features |= NETIF_F_HW_VLAN_CTAG_FILTER;
 
        ndev->netdev_ops = &cpsw_netdev_ops;
        ndev->ethtool_ops = &cpsw_ethtool_ops;
-       netif_napi_add(ndev, &priv->napi_rx, cpsw_rx_poll, CPSW_POLL_WEIGHT);
-       netif_tx_napi_add(ndev, &priv->napi_tx, cpsw_tx_poll, CPSW_POLL_WEIGHT);
+       netif_napi_add(ndev, &cpsw->napi_rx, cpsw_rx_poll, CPSW_POLL_WEIGHT);
+       netif_tx_napi_add(ndev, &cpsw->napi_tx, cpsw_tx_poll, CPSW_POLL_WEIGHT);
 
        /* register the network device */
        SET_NETDEV_DEV(ndev, &pdev->dev);
@@ -2545,8 +2524,8 @@ static int cpsw_probe(struct platform_device *pdev)
        cpsw_notice(priv, probe, "initialized device (regs %pa, irq %d)\n",
                    &ss_res->start, ndev->irq);
 
-       if (priv->data.dual_emac) {
-               ret = cpsw_probe_dual_emac(pdev, priv);
+       if (cpsw->data.dual_emac) {
+               ret = cpsw_probe_dual_emac(priv);
                if (ret) {
                        cpsw_err(priv, probe, "error probe slave 2 emac interface\n");
                        goto clean_ale_ret;
@@ -2556,9 +2535,9 @@ static int cpsw_probe(struct platform_device *pdev)
        return 0;
 
 clean_ale_ret:
-       cpsw_ale_destroy(priv->ale);
+       cpsw_ale_destroy(cpsw->ale);
 clean_dma_ret:
-       cpdma_ctlr_destroy(priv->dma);
+       cpdma_ctlr_destroy(cpsw->dma);
 clean_runtime_disable_ret:
        pm_runtime_disable(&pdev->dev);
 clean_ndev_ret:
@@ -2569,7 +2548,7 @@ clean_ndev_ret:
 static int cpsw_remove(struct platform_device *pdev)
 {
        struct net_device *ndev = platform_get_drvdata(pdev);
-       struct cpsw_priv *priv = netdev_priv(ndev);
+       struct cpsw_common *cpsw = ndev_to_cpsw(ndev);
        int ret;
 
        ret = pm_runtime_get_sync(&pdev->dev);
@@ -2578,17 +2557,17 @@ static int cpsw_remove(struct platform_device *pdev)
                return ret;
        }
 
-       if (priv->data.dual_emac)
-               unregister_netdev(cpsw_get_slave_ndev(priv, 1));
+       if (cpsw->data.dual_emac)
+               unregister_netdev(cpsw->slaves[1].ndev);
        unregister_netdev(ndev);
 
-       cpsw_ale_destroy(priv->ale);
-       cpdma_ctlr_destroy(priv->dma);
+       cpsw_ale_destroy(cpsw->ale);
+       cpdma_ctlr_destroy(cpsw->dma);
        of_platform_depopulate(&pdev->dev);
        pm_runtime_put_sync(&pdev->dev);
        pm_runtime_disable(&pdev->dev);
-       if (priv->data.dual_emac)
-               free_netdev(cpsw_get_slave_ndev(priv, 1));
+       if (cpsw->data.dual_emac)
+               free_netdev(cpsw->slaves[1].ndev);
        free_netdev(ndev);
        return 0;
 }
@@ -2598,14 +2577,14 @@ static int cpsw_suspend(struct device *dev)
 {
        struct platform_device  *pdev = to_platform_device(dev);
        struct net_device       *ndev = platform_get_drvdata(pdev);
-       struct cpsw_priv        *priv = netdev_priv(ndev);
+       struct cpsw_common      *cpsw = ndev_to_cpsw(ndev);
 
-       if (priv->data.dual_emac) {
+       if (cpsw->data.dual_emac) {
                int i;
 
-               for (i = 0; i < priv->data.slaves; i++) {
-                       if (netif_running(priv->slaves[i].ndev))
-                               cpsw_ndo_stop(priv->slaves[i].ndev);
+               for (i = 0; i < cpsw->data.slaves; i++) {
+                       if (netif_running(cpsw->slaves[i].ndev))
+                               cpsw_ndo_stop(cpsw->slaves[i].ndev);
                }
        } else {
                if (netif_running(ndev))
@@ -2613,7 +2592,7 @@ static int cpsw_suspend(struct device *dev)
        }
 
        /* Select sleep pin state */
-       pinctrl_pm_select_sleep_state(&pdev->dev);
+       pinctrl_pm_select_sleep_state(dev);
 
        return 0;
 }
@@ -2622,17 +2601,17 @@ static int cpsw_resume(struct device *dev)
 {
        struct platform_device  *pdev = to_platform_device(dev);
        struct net_device       *ndev = platform_get_drvdata(pdev);
-       struct cpsw_priv        *priv = netdev_priv(ndev);
+       struct cpsw_common      *cpsw = netdev_priv(ndev);
 
        /* Select default pin state */
-       pinctrl_pm_select_default_state(&pdev->dev);
+       pinctrl_pm_select_default_state(dev);
 
-       if (priv->data.dual_emac) {
+       if (cpsw->data.dual_emac) {
                int i;
 
-               for (i = 0; i < priv->data.slaves; i++) {
-                       if (netif_running(priv->slaves[i].ndev))
-                               cpsw_ndo_open(priv->slaves[i].ndev);
+               for (i = 0; i < cpsw->data.slaves; i++) {
+                       if (netif_running(cpsw->slaves[i].ndev))
+                               cpsw_ndo_open(cpsw->slaves[i].ndev);
                }
        } else {
                if (netif_running(ndev))
index 19e5f32..cf72b33 100644 (file)
@@ -86,7 +86,7 @@ struct cpdma_desc_pool {
        void __iomem            *iomap;         /* ioremap map */
        void                    *cpumap;        /* dma_alloc map */
        int                     desc_size, mem_size;
-       int                     num_desc, used_desc;
+       int                     num_desc;
        struct device           *dev;
        struct gen_pool         *gen_pool;
 };
@@ -148,7 +148,10 @@ static void cpdma_desc_pool_destroy(struct cpdma_desc_pool *pool)
        if (!pool)
                return;
 
-       WARN_ON(pool->used_desc);
+       WARN(gen_pool_size(pool->gen_pool) != gen_pool_avail(pool->gen_pool),
+            "cpdma_desc_pool size %d != avail %d",
+            gen_pool_size(pool->gen_pool),
+            gen_pool_avail(pool->gen_pool));
        if (pool->cpumap)
                dma_free_coherent(pool->dev, pool->mem_size, pool->cpumap,
                                  pool->phys);
@@ -232,21 +235,14 @@ desc_from_phys(struct cpdma_desc_pool *pool, dma_addr_t dma)
 static struct cpdma_desc __iomem *
 cpdma_desc_alloc(struct cpdma_desc_pool *pool)
 {
-       struct cpdma_desc __iomem *desc = NULL;
-
-       desc = (struct cpdma_desc __iomem *)gen_pool_alloc(pool->gen_pool,
-                                                          pool->desc_size);
-       if (desc)
-               pool->used_desc++;
-
-       return desc;
+       return (struct cpdma_desc __iomem *)
+               gen_pool_alloc(pool->gen_pool, pool->desc_size);
 }
 
 static void cpdma_desc_free(struct cpdma_desc_pool *pool,
                            struct cpdma_desc __iomem *desc, int num_desc)
 {
        gen_pool_free(pool->gen_pool, (unsigned long)desc, pool->desc_size);
-       pool->used_desc--;
 }
 
 struct cpdma_ctlr *cpdma_ctlr_create(struct cpdma_params *params)
index 727a79f..2d6fc9a 100644 (file)
@@ -597,14 +597,14 @@ static u32 hash_get(u8 *addr)
 }
 
 /**
- * hash_add - Hash function to add mac addr from hash table
+ * emac_hash_add - Hash function to add mac addr from hash table
  * @priv: The DaVinci EMAC private adapter structure
  * @mac_addr: mac address to delete from hash table
  *
  * Adds mac address to the internal hash table
  *
  */
-static int hash_add(struct emac_priv *priv, u8 *mac_addr)
+static int emac_hash_add(struct emac_priv *priv, u8 *mac_addr)
 {
        struct device *emac_dev = &priv->ndev->dev;
        u32 rc = 0;
@@ -613,7 +613,7 @@ static int hash_add(struct emac_priv *priv, u8 *mac_addr)
 
        if (hash_value >= EMAC_NUM_MULTICAST_BITS) {
                if (netif_msg_drv(priv)) {
-                       dev_err(emac_dev, "DaVinci EMAC: hash_add(): Invalid "\
+                       dev_err(emac_dev, "DaVinci EMAC: emac_hash_add(): Invalid "\
                                "Hash %08x, should not be greater than %08x",
                                hash_value, (EMAC_NUM_MULTICAST_BITS - 1));
                }
@@ -639,14 +639,14 @@ static int hash_add(struct emac_priv *priv, u8 *mac_addr)
 }
 
 /**
- * hash_del - Hash function to delete mac addr from hash table
+ * emac_hash_del - Hash function to delete mac addr from hash table
  * @priv: The DaVinci EMAC private adapter structure
  * @mac_addr: mac address to delete from hash table
  *
  * Removes mac address from the internal hash table
  *
  */
-static int hash_del(struct emac_priv *priv, u8 *mac_addr)
+static int emac_hash_del(struct emac_priv *priv, u8 *mac_addr)
 {
        u32 hash_value;
        u32 hash_bit;
@@ -696,10 +696,10 @@ static void emac_add_mcast(struct emac_priv *priv, u32 action, u8 *mac_addr)
 
        switch (action) {
        case EMAC_MULTICAST_ADD:
-               update = hash_add(priv, mac_addr);
+               update = emac_hash_add(priv, mac_addr);
                break;
        case EMAC_MULTICAST_DEL:
-               update = hash_del(priv, mac_addr);
+               update = emac_hash_del(priv, mac_addr);
                break;
        case EMAC_ALL_MULTI_SET:
                update = 1;
index 9006877..e46b1eb 100644 (file)
@@ -97,7 +97,6 @@ static struct acpi_driver fjes_acpi_driver = {
 static struct platform_driver fjes_driver = {
        .driver = {
                .name = DRV_NAME,
-               .owner = THIS_MODULE,
        },
        .probe = fjes_probe,
        .remove = fjes_remove,
index 591af71..fa7b1e4 100644 (file)
@@ -490,6 +490,7 @@ struct nvsp_2_vsc_capability {
                        u64 sriov:1;
                        u64 ieee8021q:1;
                        u64 correlation_id:1;
+                       u64 teaming:1;
                };
        };
 } __packed;
index 410fb8e..8078bc2 100644 (file)
@@ -468,9 +468,13 @@ static int negotiate_nvsp_ver(struct hv_device *device,
        init_packet->msg.v2_msg.send_ndis_config.mtu = ndev->mtu + ETH_HLEN;
        init_packet->msg.v2_msg.send_ndis_config.capability.ieee8021q = 1;
 
-       if (nvsp_ver >= NVSP_PROTOCOL_VERSION_5)
+       if (nvsp_ver >= NVSP_PROTOCOL_VERSION_5) {
                init_packet->msg.v2_msg.send_ndis_config.capability.sriov = 1;
 
+               /* Teaming bit is needed to receive link speed updates */
+               init_packet->msg.v2_msg.send_ndis_config.capability.teaming = 1;
+       }
+
        ret = vmbus_sendpacket(device->channel, init_packet,
                                sizeof(struct nvsp_message),
                                (unsigned long)init_packet,
index 3ba29fc..eb2c122 100644 (file)
@@ -579,19 +579,32 @@ void netvsc_linkstatus_callback(struct hv_device *device_obj,
        struct netvsc_reconfig *event;
        unsigned long flags;
 
-       /* Handle link change statuses only */
+       net = hv_get_drvdata(device_obj);
+
+       if (!net)
+               return;
+
+       ndev_ctx = netdev_priv(net);
+
+       /* Update the physical link speed when changing to another vSwitch */
+       if (indicate->status == RNDIS_STATUS_LINK_SPEED_CHANGE) {
+               u32 speed;
+
+               speed = *(u32 *)((void *)indicate + indicate->
+                                status_buf_offset) / 10000;
+               ndev_ctx->speed = speed;
+               return;
+       }
+
+       /* Handle these link change statuses below */
        if (indicate->status != RNDIS_STATUS_NETWORK_CHANGE &&
            indicate->status != RNDIS_STATUS_MEDIA_CONNECT &&
            indicate->status != RNDIS_STATUS_MEDIA_DISCONNECT)
                return;
 
-       net = hv_get_drvdata(device_obj);
-
-       if (!net || net->reg_state != NETREG_REGISTERED)
+       if (net->reg_state != NETREG_REGISTERED)
                return;
 
-       ndev_ctx = netdev_priv(net);
-
        event = kzalloc(sizeof(*event), GFP_ATOMIC);
        if (!event)
                return;
@@ -1337,6 +1350,8 @@ static int netvsc_probe(struct hv_device *dev,
 
        netif_carrier_off(net);
 
+       netvsc_init_settings(net);
+
        net_device_ctx = netdev_priv(net);
        net_device_ctx->device_ctx = dev;
        net_device_ctx->msg_enable = netif_msg_init(debug, default_msg);
@@ -1398,8 +1413,6 @@ static int netvsc_probe(struct hv_device *dev,
        netif_set_real_num_tx_queues(net, nvdev->num_chn);
        netif_set_real_num_rx_queues(net, nvdev->num_chn);
 
-       netvsc_init_settings(net);
-
        ret = register_netdev(net);
        if (ret != 0) {
                pr_err("Unable to register netdev.\n");
index 8e830f7..dd3b335 100644 (file)
@@ -752,6 +752,28 @@ static int rndis_filter_query_device_link_status(struct rndis_device *dev)
        return ret;
 }
 
+static int rndis_filter_query_link_speed(struct rndis_device *dev)
+{
+       u32 size = sizeof(u32);
+       u32 link_speed;
+       struct net_device_context *ndc;
+       int ret;
+
+       ret = rndis_filter_query_device(dev, RNDIS_OID_GEN_LINK_SPEED,
+                                       &link_speed, &size);
+
+       if (!ret) {
+               ndc = netdev_priv(dev->ndev);
+
+               /* The link speed reported from host is in 100bps unit, so
+                * we convert it to Mbps here.
+                */
+               ndc->speed = link_speed / 10000;
+       }
+
+       return ret;
+}
+
 int rndis_filter_set_packet_filter(struct rndis_device *dev, u32 new_filter)
 {
        struct rndis_request *request;
@@ -1044,6 +1066,8 @@ int rndis_filter_device_add(struct hv_device *dev,
        if (net_device->nvsp_version < NVSP_PROTOCOL_VERSION_5)
                return 0;
 
+       rndis_filter_query_link_speed(rndis_device);
+
        /* vRSS setup */
        memset(&rsscap, 0, rsscap_size);
        ret = rndis_filter_query_device(rndis_device,
index 47a6434..d66133b 100644 (file)
@@ -307,6 +307,18 @@ config MDIO_XGENE
          This module provides a driver for the MDIO busses found in the
          APM X-Gene SoC's.
 
+config MICROSEMI_PHY
+    tristate "Drivers for the Microsemi PHYs"
+    ---help---
+      Currently supports the VSC8531 and VSC8541 PHYs
+
+config XILINX_GMII2RGMII
+       tristate "Xilinx GMII2RGMII converter driver"
+       ---help---
+         This driver support xilinx GMII to RGMII IP core it provides
+         the Reduced Gigabit Media Independent Interface(RGMII) between
+         Ethernet physical media devices and the Gigabit Ethernet controller.
+
 endif # PHYLIB
 
 config MICREL_KS8995MA
index 534dfa7..73d65ce 100644 (file)
@@ -11,6 +11,7 @@ obj-$(CONFIG_CICADA_PHY)      += cicada.o
 obj-$(CONFIG_LXT_PHY)          += lxt.o
 obj-$(CONFIG_QSEMI_PHY)                += qsemi.o
 obj-$(CONFIG_SMSC_PHY)         += smsc.o
+obj-$(CONFIG_MICROSEMI_PHY) += mscc.o
 obj-$(CONFIG_TERANETICS_PHY)   += teranetics.o
 obj-$(CONFIG_VITESSE_PHY)      += vitesse.o
 obj-$(CONFIG_BCM_NET_PHYLIB)   += bcm-phy-lib.o
@@ -49,3 +50,4 @@ obj-$(CONFIG_MDIO_BCM_IPROC)  += mdio-bcm-iproc.o
 obj-$(CONFIG_INTEL_XWAY_PHY)   += intel-xway.o
 obj-$(CONFIG_MDIO_HISI_FEMAC)  += mdio-hisi-femac.o
 obj-$(CONFIG_MDIO_XGENE)       += mdio-xgene.o
+obj-$(CONFIG_XILINX_GMII2RGMII) += xilinx_gmii2rgmii.o
diff --git a/drivers/net/phy/mscc.c b/drivers/net/phy/mscc.c
new file mode 100644 (file)
index 0000000..ad33390
--- /dev/null
@@ -0,0 +1,161 @@
+/*
+ * Driver for Microsemi VSC85xx PHYs
+ *
+ * Author: Nagaraju Lakkaraju
+ * License: Dual MIT/GPL
+ * Copyright (c) 2016 Microsemi Corporation
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/mdio.h>
+#include <linux/mii.h>
+#include <linux/phy.h>
+
+enum rgmii_rx_clock_delay {
+       RGMII_RX_CLK_DELAY_0_2_NS = 0,
+       RGMII_RX_CLK_DELAY_0_8_NS = 1,
+       RGMII_RX_CLK_DELAY_1_1_NS = 2,
+       RGMII_RX_CLK_DELAY_1_7_NS = 3,
+       RGMII_RX_CLK_DELAY_2_0_NS = 4,
+       RGMII_RX_CLK_DELAY_2_3_NS = 5,
+       RGMII_RX_CLK_DELAY_2_6_NS = 6,
+       RGMII_RX_CLK_DELAY_3_4_NS = 7
+};
+
+#define MII_VSC85XX_INT_MASK              25
+#define MII_VSC85XX_INT_MASK_MASK         0xa000
+#define MII_VSC85XX_INT_STATUS            26
+
+#define MSCC_EXT_PAGE_ACCESS              31
+#define MSCC_PHY_PAGE_STANDARD            0x0000 /* Standard registers */
+#define MSCC_PHY_PAGE_EXTENDED_2          0x0002 /* Extended reg - page 2 */
+
+/* Extended Page 2 Registers */
+#define MSCC_PHY_RGMII_CNTL                       20
+#define RGMII_RX_CLK_DELAY_MASK                   0x0070
+#define RGMII_RX_CLK_DELAY_POS            4
+
+/* Microsemi PHY ID's */
+#define PHY_ID_VSC8531                            0x00070570
+#define PHY_ID_VSC8541                            0x00070770
+
+static int vsc85xx_phy_page_set(struct phy_device *phydev, u8 page)
+{
+       int rc;
+
+       rc = phy_write(phydev, MSCC_EXT_PAGE_ACCESS, page);
+       return rc;
+}
+
+static int vsc85xx_default_config(struct phy_device *phydev)
+{
+       int rc;
+       u16 reg_val;
+
+       mutex_lock(&phydev->lock);
+       rc = vsc85xx_phy_page_set(phydev, MSCC_PHY_PAGE_EXTENDED_2);
+       if (rc != 0)
+               goto out_unlock;
+
+       reg_val = phy_read(phydev, MSCC_PHY_RGMII_CNTL);
+       reg_val &= ~(RGMII_RX_CLK_DELAY_MASK);
+       reg_val |= (RGMII_RX_CLK_DELAY_1_1_NS << RGMII_RX_CLK_DELAY_POS);
+       phy_write(phydev, MSCC_PHY_RGMII_CNTL, reg_val);
+       rc = vsc85xx_phy_page_set(phydev, MSCC_PHY_PAGE_STANDARD);
+
+out_unlock:
+       mutex_unlock(&phydev->lock);
+
+       return rc;
+}
+
+static int vsc85xx_config_init(struct phy_device *phydev)
+{
+       int rc;
+
+       rc = vsc85xx_default_config(phydev);
+       if (rc)
+               return rc;
+       rc = genphy_config_init(phydev);
+
+       return rc;
+}
+
+static int vsc85xx_ack_interrupt(struct phy_device *phydev)
+{
+       int rc = 0;
+
+       if (phydev->interrupts == PHY_INTERRUPT_ENABLED)
+               rc = phy_read(phydev, MII_VSC85XX_INT_STATUS);
+
+       return (rc < 0) ? rc : 0;
+}
+
+static int vsc85xx_config_intr(struct phy_device *phydev)
+{
+       int rc;
+
+       if (phydev->interrupts == PHY_INTERRUPT_ENABLED) {
+               rc = phy_write(phydev, MII_VSC85XX_INT_MASK,
+                                  MII_VSC85XX_INT_MASK_MASK);
+       } else {
+               rc = phy_write(phydev, MII_VSC85XX_INT_MASK, 0);
+               if (rc < 0)
+                       return rc;
+               rc = phy_read(phydev, MII_VSC85XX_INT_STATUS);
+       }
+
+       return rc;
+}
+
+/* Microsemi VSC85xx PHYs */
+static struct phy_driver vsc85xx_driver[] = {
+{
+       .phy_id                 = PHY_ID_VSC8531,
+       .name                   = "Microsemi VSC8531",
+       .phy_id_mask    = 0xfffffff0,
+       .features               = PHY_GBIT_FEATURES,
+       .flags                  = PHY_HAS_INTERRUPT,
+       .soft_reset             = &genphy_soft_reset,
+       .config_init    = &vsc85xx_config_init,
+       .config_aneg    = &genphy_config_aneg,
+       .aneg_done              = &genphy_aneg_done,
+       .read_status    = &genphy_read_status,
+       .ack_interrupt  = &vsc85xx_ack_interrupt,
+       .config_intr    = &vsc85xx_config_intr,
+       .suspend                = &genphy_suspend,
+       .resume                 = &genphy_resume,
+},
+{
+       .phy_id                 = PHY_ID_VSC8541,
+       .name                   = "Microsemi VSC8541 SyncE",
+       .phy_id_mask    = 0xfffffff0,
+       .features               = PHY_GBIT_FEATURES,
+       .flags                  = PHY_HAS_INTERRUPT,
+       .soft_reset             = &genphy_soft_reset,
+       .config_init    = &vsc85xx_config_init,
+       .config_aneg    = &genphy_config_aneg,
+       .aneg_done              = &genphy_aneg_done,
+       .read_status    = &genphy_read_status,
+       .ack_interrupt  = &vsc85xx_ack_interrupt,
+       .config_intr    = &vsc85xx_config_intr,
+       .suspend                = &genphy_suspend,
+       .resume                 = &genphy_resume,
+}
+
+};
+
+module_phy_driver(vsc85xx_driver);
+
+static struct mdio_device_id __maybe_unused vsc85xx_tbl[] = {
+       { PHY_ID_VSC8531, 0xfffffff0, },
+       { PHY_ID_VSC8541, 0xfffffff0, },
+       { }
+};
+
+MODULE_DEVICE_TABLE(mdio, vsc85xx_tbl);
+
+MODULE_DESCRIPTION("Microsemi VSC85xx PHY driver");
+MODULE_AUTHOR("Nagaraju Lakkaraju");
+MODULE_LICENSE("Dual MIT/GPL");
diff --git a/drivers/net/phy/xilinx_gmii2rgmii.c b/drivers/net/phy/xilinx_gmii2rgmii.c
new file mode 100644 (file)
index 0000000..cad6e19
--- /dev/null
@@ -0,0 +1,109 @@
+/* Xilinx GMII2RGMII Converter driver
+ *
+ * Copyright (C) 2016 Xilinx, Inc.
+ *
+ * Author: Kedareswara rao Appana <appanad@xilinx.com>
+ *
+ * Description:
+ * This driver is developed for Xilinx GMII2RGMII Converter
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/mii.h>
+#include <linux/mdio.h>
+#include <linux/phy.h>
+#include <linux/of_mdio.h>
+
+#define XILINX_GMII2RGMII_REG          0x10
+#define XILINX_GMII2RGMII_SPEED_MASK   (BMCR_SPEED1000 | BMCR_SPEED100)
+
+struct gmii2rgmii {
+       struct phy_device *phy_dev;
+       struct phy_driver *phy_drv;
+       struct phy_driver conv_phy_drv;
+       int addr;
+};
+
+static int xgmiitorgmii_read_status(struct phy_device *phydev)
+{
+       struct gmii2rgmii *priv = phydev->priv;
+       u16 val = 0;
+
+       priv->phy_drv->read_status(phydev);
+
+       val = mdiobus_read(phydev->mdio.bus, priv->addr, XILINX_GMII2RGMII_REG);
+       val &= XILINX_GMII2RGMII_SPEED_MASK;
+
+       if (phydev->speed == SPEED_1000)
+               val |= BMCR_SPEED1000;
+       else if (phydev->speed == SPEED_100)
+               val |= BMCR_SPEED100;
+       else
+               val |= BMCR_SPEED10;
+
+       mdiobus_write(phydev->mdio.bus, priv->addr, XILINX_GMII2RGMII_REG, val);
+
+       return 0;
+}
+
+int xgmiitorgmii_probe(struct mdio_device *mdiodev)
+{
+       struct device *dev = &mdiodev->dev;
+       struct device_node *np = dev->of_node, *phy_node;
+       struct gmii2rgmii *priv;
+
+       priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
+       if (!priv)
+               return -ENOMEM;
+
+       phy_node = of_parse_phandle(np, "phy-handle", 0);
+       if (!phy_node) {
+               dev_err(dev, "Couldn't parse phy-handle\n");
+               return -ENODEV;
+       }
+
+       priv->phy_dev = of_phy_find_device(phy_node);
+       if (!priv->phy_dev) {
+               dev_info(dev, "Couldn't find phydev\n");
+               return -EPROBE_DEFER;
+       }
+
+       priv->addr = mdiodev->addr;
+       priv->phy_drv = priv->phy_dev->drv;
+       memcpy(&priv->conv_phy_drv, priv->phy_dev->drv,
+              sizeof(struct phy_driver));
+       priv->conv_phy_drv.read_status = xgmiitorgmii_read_status;
+       priv->phy_dev->priv = priv;
+       priv->phy_dev->drv = &priv->conv_phy_drv;
+
+       return 0;
+}
+
+static const struct of_device_id xgmiitorgmii_of_match[] = {
+       { .compatible = "xlnx,gmii-to-rgmii-1.0" },
+       {},
+};
+MODULE_DEVICE_TABLE(of, xgmiitorgmii_of_match);
+
+static struct mdio_driver xgmiitorgmii_driver = {
+       .probe  = xgmiitorgmii_probe,
+       .mdiodrv.driver = {
+               .name = "xgmiitorgmii",
+               .of_match_table = xgmiitorgmii_of_match,
+       },
+};
+
+mdio_module_driver(xgmiitorgmii_driver);
+
+MODULE_DESCRIPTION("Xilinx GMII2RGMII converter driver");
+MODULE_LICENSE("GPL");
index f226db4..70cfa06 100644 (file)
@@ -1103,6 +1103,15 @@ static int ppp_nl_newlink(struct net *src_net, struct net_device *dev,
        }
 
        conf.file = file;
+
+       /* Don't use device name generated by the rtnetlink layer when ifname
+        * isn't specified. Let ppp_dev_configure() set the device name using
+        * the PPP unit identifer as suffix (i.e. ppp<unit_id>). This allows
+        * userspace to infer the device name using to the PPPIOCGUNIT ioctl.
+        */
+       if (!tb[IFLA_IFNAME])
+               conf.ifname_is_set = false;
+
        err = ppp_dev_configure(src_net, dev, &conf);
 
 out_unlock:
index ae0905e..1951b10 100644 (file)
@@ -37,6 +37,7 @@
 #include <net/icmp.h>
 #include <net/route.h>
 #include <net/gre.h>
+#include <net/pptp.h>
 
 #include <linux/uaccess.h>
 
@@ -53,41 +54,6 @@ static struct proto pptp_sk_proto __read_mostly;
 static const struct ppp_channel_ops pptp_chan_ops;
 static const struct proto_ops pptp_ops;
 
-#define PPP_LCP_ECHOREQ 0x09
-#define PPP_LCP_ECHOREP 0x0A
-#define SC_RCV_BITS    (SC_RCV_B7_1|SC_RCV_B7_0|SC_RCV_ODDP|SC_RCV_EVNP)
-
-#define MISSING_WINDOW 20
-#define WRAPPED(curseq, lastseq)\
-       ((((curseq) & 0xffffff00) == 0) &&\
-       (((lastseq) & 0xffffff00) == 0xffffff00))
-
-#define PPTP_GRE_PROTO  0x880B
-#define PPTP_GRE_VER    0x1
-
-#define PPTP_GRE_FLAG_C        0x80
-#define PPTP_GRE_FLAG_R        0x40
-#define PPTP_GRE_FLAG_K        0x20
-#define PPTP_GRE_FLAG_S        0x10
-#define PPTP_GRE_FLAG_A        0x80
-
-#define PPTP_GRE_IS_C(f) ((f)&PPTP_GRE_FLAG_C)
-#define PPTP_GRE_IS_R(f) ((f)&PPTP_GRE_FLAG_R)
-#define PPTP_GRE_IS_K(f) ((f)&PPTP_GRE_FLAG_K)
-#define PPTP_GRE_IS_S(f) ((f)&PPTP_GRE_FLAG_S)
-#define PPTP_GRE_IS_A(f) ((f)&PPTP_GRE_FLAG_A)
-
-#define PPTP_HEADER_OVERHEAD (2+sizeof(struct pptp_gre_header))
-struct pptp_gre_header {
-       u8  flags;
-       u8  ver;
-       __be16 protocol;
-       __be16 payload_len;
-       __be16 call_id;
-       __be32 seq;
-       __be32 ack;
-} __packed;
-
 static struct pppox_sock *lookup_chan(u16 call_id, __be32 s_addr)
 {
        struct pppox_sock *sock;
@@ -240,16 +206,14 @@ static int pptp_xmit(struct ppp_channel *chan, struct sk_buff *skb)
        skb_push(skb, header_len);
        hdr = (struct pptp_gre_header *)(skb->data);
 
-       hdr->flags       = PPTP_GRE_FLAG_K;
-       hdr->ver         = PPTP_GRE_VER;
-       hdr->protocol    = htons(PPTP_GRE_PROTO);
-       hdr->call_id     = htons(opt->dst_addr.call_id);
+       hdr->gre_hd.flags = GRE_KEY | GRE_VERSION_1 | GRE_SEQ;
+       hdr->gre_hd.protocol = GRE_PROTO_PPP;
+       hdr->call_id = htons(opt->dst_addr.call_id);
 
-       hdr->flags      |= PPTP_GRE_FLAG_S;
-       hdr->seq         = htonl(++opt->seq_sent);
+       hdr->seq = htonl(++opt->seq_sent);
        if (opt->ack_sent != seq_recv)  {
                /* send ack with this message */
-               hdr->ver |= PPTP_GRE_FLAG_A;
+               hdr->gre_hd.flags |= GRE_ACK;
                hdr->ack  = htonl(seq_recv);
                opt->ack_sent = seq_recv;
        }
@@ -312,7 +276,7 @@ static int pptp_rcv_core(struct sock *sk, struct sk_buff *skb)
        headersize  = sizeof(*header);
 
        /* test if acknowledgement present */
-       if (PPTP_GRE_IS_A(header->ver)) {
+       if (GRE_IS_ACK(header->gre_hd.flags)) {
                __u32 ack;
 
                if (!pskb_may_pull(skb, headersize))
@@ -320,7 +284,7 @@ static int pptp_rcv_core(struct sock *sk, struct sk_buff *skb)
                header = (struct pptp_gre_header *)(skb->data);
 
                /* ack in different place if S = 0 */
-               ack = PPTP_GRE_IS_S(header->flags) ? header->ack : header->seq;
+               ack = GRE_IS_SEQ(header->gre_hd.flags) ? header->ack : header->seq;
 
                ack = ntohl(ack);
 
@@ -333,7 +297,7 @@ static int pptp_rcv_core(struct sock *sk, struct sk_buff *skb)
                headersize -= sizeof(header->ack);
        }
        /* test if payload present */
-       if (!PPTP_GRE_IS_S(header->flags))
+       if (!GRE_IS_SEQ(header->gre_hd.flags))
                goto drop;
 
        payload_len = ntohs(header->payload_len);
@@ -394,11 +358,11 @@ static int pptp_rcv(struct sk_buff *skb)
 
        header = (struct pptp_gre_header *)skb->data;
 
-       if (ntohs(header->protocol) != PPTP_GRE_PROTO || /* PPTP-GRE protocol for PPTP */
-               PPTP_GRE_IS_C(header->flags) ||                /* flag C should be clear */
-               PPTP_GRE_IS_R(header->flags) ||                /* flag R should be clear */
-               !PPTP_GRE_IS_K(header->flags) ||               /* flag K should be set */
-               (header->flags&0xF) != 0)                      /* routing and recursion ctrl = 0 */
+       if (header->gre_hd.protocol != GRE_PROTO_PPP || /* PPTP-GRE protocol for PPTP */
+               GRE_IS_CSUM(header->gre_hd.flags) ||    /* flag CSUM should be clear */
+               GRE_IS_ROUTING(header->gre_hd.flags) || /* flag ROUTING should be clear */
+               !GRE_IS_KEY(header->gre_hd.flags) ||    /* flag KEY should be set */
+               (header->gre_hd.flags & GRE_FLAGS))     /* flag Recursion Ctrl should be clear */
                /* if invalid, discard this packet */
                goto drop;
 
index 4b44586..c5544d3 100644 (file)
@@ -2300,10 +2300,8 @@ static int hso_serial_common_create(struct hso_serial *serial, int num_urbs,
        serial->rx_data_length = rx_size;
        for (i = 0; i < serial->num_rx_urbs; i++) {
                serial->rx_urb[i] = usb_alloc_urb(0, GFP_KERNEL);
-               if (!serial->rx_urb[i]) {
-                       dev_err(dev, "Could not allocate urb?\n");
+               if (!serial->rx_urb[i])
                        goto exit;
-               }
                serial->rx_urb[i]->transfer_buffer = NULL;
                serial->rx_urb[i]->transfer_buffer_length = 0;
                serial->rx_data[i] = kzalloc(serial->rx_data_length,
@@ -2314,10 +2312,8 @@ static int hso_serial_common_create(struct hso_serial *serial, int num_urbs,
 
        /* TX, allocate urb and initialize */
        serial->tx_urb = usb_alloc_urb(0, GFP_KERNEL);
-       if (!serial->tx_urb) {
-               dev_err(dev, "Could not allocate urb?\n");
+       if (!serial->tx_urb)
                goto exit;
-       }
        serial->tx_urb->transfer_buffer = NULL;
        serial->tx_urb->transfer_buffer_length = 0;
        /* prepare our TX buffer */
@@ -2555,20 +2551,16 @@ static struct hso_device *hso_create_net_device(struct usb_interface *interface,
        /* start allocating */
        for (i = 0; i < MUX_BULK_RX_BUF_COUNT; i++) {
                hso_net->mux_bulk_rx_urb_pool[i] = usb_alloc_urb(0, GFP_KERNEL);
-               if (!hso_net->mux_bulk_rx_urb_pool[i]) {
-                       dev_err(&interface->dev, "Could not allocate rx urb\n");
+               if (!hso_net->mux_bulk_rx_urb_pool[i])
                        goto exit;
-               }
                hso_net->mux_bulk_rx_buf_pool[i] = kzalloc(MUX_BULK_RX_BUF_SIZE,
                                                           GFP_KERNEL);
                if (!hso_net->mux_bulk_rx_buf_pool[i])
                        goto exit;
        }
        hso_net->mux_bulk_tx_urb = usb_alloc_urb(0, GFP_KERNEL);
-       if (!hso_net->mux_bulk_tx_urb) {
-               dev_err(&interface->dev, "Could not allocate tx urb\n");
+       if (!hso_net->mux_bulk_tx_urb)
                goto exit;
-       }
        hso_net->mux_bulk_tx_buf = kzalloc(MUX_BULK_TX_BUF_SIZE, GFP_KERNEL);
        if (!hso_net->mux_bulk_tx_buf)
                goto exit;
@@ -2787,10 +2779,8 @@ struct hso_shared_int *hso_create_shared_int(struct usb_interface *interface)
        }
 
        mux->shared_intr_urb = usb_alloc_urb(0, GFP_KERNEL);
-       if (!mux->shared_intr_urb) {
-               dev_err(&interface->dev, "Could not allocate intr urb?\n");
+       if (!mux->shared_intr_urb)
                goto exit;
-       }
        mux->shared_intr_buf =
                kzalloc(le16_to_cpu(mux->intr_endp->wMaxPacketSize),
                        GFP_KERNEL);
index 6a9d474..432b8a3 100644 (file)
@@ -3002,10 +3002,8 @@ static void lan78xx_tx_bh(struct lan78xx_net *dev)
 
 gso_skb:
        urb = usb_alloc_urb(0, GFP_ATOMIC);
-       if (!urb) {
-               netif_dbg(dev, tx_err, dev->net, "no urb\n");
+       if (!urb)
                goto drop;
-       }
 
        entry = (struct skb_data *)skb->cb;
        entry->urb = urb;
index 3bfb592..d5071e3 100644 (file)
@@ -2062,11 +2062,8 @@ int usbnet_write_cmd_async(struct usbnet *dev, u8 cmd, u8 reqtype,
                   cmd, reqtype, value, index, size);
 
        urb = usb_alloc_urb(0, GFP_ATOMIC);
-       if (!urb) {
-               netdev_err(dev->net, "Error allocating URB in"
-                          " %s!\n", __func__);
+       if (!urb)
                goto fail;
-       }
 
        if (data) {
                buf = kmemdup(data, size, GFP_ATOMIC);
index fc1355d..5d429f8 100644 (file)
@@ -206,7 +206,6 @@ int i2400mu_notification_setup(struct i2400mu *i2400mu)
        i2400mu->notif_urb = usb_alloc_urb(0, GFP_KERNEL);
        if (!i2400mu->notif_urb) {
                ret = -ENOMEM;
-               dev_err(dev, "notification: cannot allocate URB\n");
                goto error_alloc_urb;
        }
        epd = usb_get_epd(i2400mu->usb_iface,
index 8aded24..7a60d2e 100644 (file)
@@ -706,10 +706,8 @@ static int ar5523_alloc_rx_bufs(struct ar5523 *ar)
 
                data->ar = ar;
                data->urb = usb_alloc_urb(0, GFP_KERNEL);
-               if (!data->urb) {
-                       ar5523_err(ar, "could not allocate rx data urb\n");
+               if (!data->urb)
                        goto err;
-               }
                list_add_tail(&data->list, &ar->rx_data_free);
                atomic_inc(&ar->rx_data_free_cnt);
        }
@@ -824,7 +822,6 @@ static void ar5523_tx_work_locked(struct ar5523 *ar)
 
                urb = usb_alloc_urb(0, GFP_KERNEL);
                if (!urb) {
-                       ar5523_err(ar, "Failed to allocate TX urb\n");
                        ieee80211_free_txskb(ar->hw, skb);
                        continue;
                }
@@ -949,10 +946,8 @@ static int ar5523_alloc_tx_cmd(struct ar5523 *ar)
        init_completion(&cmd->done);
 
        cmd->urb_tx = usb_alloc_urb(0, GFP_KERNEL);
-       if (!cmd->urb_tx) {
-               ar5523_err(ar, "could not allocate urb\n");
+       if (!cmd->urb_tx)
                return -ENOMEM;
-       }
        cmd->buf_tx = usb_alloc_coherent(ar->dev, AR5523_MAX_TXCMDSZ,
                                         GFP_KERNEL,
                                         &cmd->urb_tx->transfer_dma);
index 98b15a9..fa26619 100644 (file)
@@ -1099,15 +1099,11 @@ struct brcmf_usbdev *brcmf_usb_attach(struct brcmf_usbdev_info *devinfo,
        devinfo->tx_freecount = ntxq;
 
        devinfo->ctl_urb = usb_alloc_urb(0, GFP_ATOMIC);
-       if (!devinfo->ctl_urb) {
-               brcmf_err("usb_alloc_urb (ctl) failed\n");
+       if (!devinfo->ctl_urb)
                goto error;
-       }
        devinfo->bulk_urb = usb_alloc_urb(0, GFP_ATOMIC);
-       if (!devinfo->bulk_urb) {
-               brcmf_err("usb_alloc_urb (bulk) failed\n");
+       if (!devinfo->bulk_urb)
                goto error;
-       }
 
        return &devinfo->bus_pub;
 
index 56f109b..bca6935 100644 (file)
@@ -1613,10 +1613,8 @@ static int ezusb_probe(struct usb_interface *interface,
                        }
 
                        upriv->read_urb = usb_alloc_urb(0, GFP_KERNEL);
-                       if (!upriv->read_urb) {
-                               err("No free urbs available");
+                       if (!upriv->read_urb)
                                goto error;
-                       }
                        if (le16_to_cpu(ep->wMaxPacketSize) != 64)
                                pr_warn("bulk in: wMaxPacketSize!= 64\n");
                        if (ep->bEndpointAddress != (2 | USB_DIR_IN))
index 799a2ef..e0ade40 100644 (file)
@@ -198,22 +198,16 @@ static int if_usb_probe(struct usb_interface *intf,
        }
 
        cardp->rx_urb = usb_alloc_urb(0, GFP_KERNEL);
-       if (!cardp->rx_urb) {
-               lbtf_deb_usbd(&udev->dev, "Rx URB allocation failed\n");
+       if (!cardp->rx_urb)
                goto dealloc;
-       }
 
        cardp->tx_urb = usb_alloc_urb(0, GFP_KERNEL);
-       if (!cardp->tx_urb) {
-               lbtf_deb_usbd(&udev->dev, "Tx URB allocation failed\n");
+       if (!cardp->tx_urb)
                goto dealloc;
-       }
 
        cardp->cmd_urb = usb_alloc_urb(0, GFP_KERNEL);
-       if (!cardp->cmd_urb) {
-               lbtf_deb_usbd(&udev->dev, "Cmd URB allocation failed\n");
+       if (!cardp->cmd_urb)
                goto dealloc;
-       }
 
        cardp->ep_out_buf = kmalloc(MRVDRV_ETH_TX_PACKET_BUFFER_SIZE,
                                    GFP_KERNEL);
index 0857575..3bd04f5 100644 (file)
@@ -657,11 +657,8 @@ static int mwifiex_usb_tx_init(struct mwifiex_adapter *adapter)
        card->tx_cmd.ep = card->tx_cmd_ep;
 
        card->tx_cmd.urb = usb_alloc_urb(0, GFP_KERNEL);
-       if (!card->tx_cmd.urb) {
-               mwifiex_dbg(adapter, ERROR,
-                           "tx_cmd.urb allocation failed\n");
+       if (!card->tx_cmd.urb)
                return -ENOMEM;
-       }
 
        for (i = 0; i < MWIFIEX_TX_DATA_PORT; i++) {
                port = &card->port[i];
@@ -677,11 +674,8 @@ static int mwifiex_usb_tx_init(struct mwifiex_adapter *adapter)
                        port->tx_data_list[j].ep = port->tx_data_ep;
                        port->tx_data_list[j].urb =
                                        usb_alloc_urb(0, GFP_KERNEL);
-                       if (!port->tx_data_list[j].urb) {
-                               mwifiex_dbg(adapter, ERROR,
-                                           "urb allocation failed\n");
+                       if (!port->tx_data_list[j].urb)
                                return -ENOMEM;
-                       }
                }
        }
 
@@ -697,10 +691,8 @@ static int mwifiex_usb_rx_init(struct mwifiex_adapter *adapter)
        card->rx_cmd.ep = card->rx_cmd_ep;
 
        card->rx_cmd.urb = usb_alloc_urb(0, GFP_KERNEL);
-       if (!card->rx_cmd.urb) {
-               mwifiex_dbg(adapter, ERROR, "rx_cmd.urb allocation failed\n");
+       if (!card->rx_cmd.urb)
                return -ENOMEM;
-       }
 
        card->rx_cmd.skb = dev_alloc_skb(MWIFIEX_RX_CMD_BUF_SIZE);
        if (!card->rx_cmd.skb)
@@ -714,11 +706,8 @@ static int mwifiex_usb_rx_init(struct mwifiex_adapter *adapter)
                card->rx_data_list[i].ep = card->rx_data_ep;
 
                card->rx_data_list[i].urb = usb_alloc_urb(0, GFP_KERNEL);
-               if (!card->rx_data_list[i].urb) {
-                       mwifiex_dbg(adapter, ERROR,
-                                   "rx_data_list[] urb allocation failed\n");
+               if (!card->rx_data_list[i].urb)
                        return -1;
-               }
                if (mwifiex_usb_submit_rx_urb(&card->rx_data_list[i],
                                              MWIFIEX_RX_DATA_BUF_SIZE))
                        return -1;
index 41617b7..32aa5c1 100644 (file)
@@ -739,11 +739,8 @@ static int _rtl_usb_receive(struct ieee80211_hw *hw)
        for (i = 0; i < rtlusb->rx_urb_num; i++) {
                err = -ENOMEM;
                urb = usb_alloc_urb(0, GFP_KERNEL);
-               if (!urb) {
-                       RT_TRACE(rtlpriv, COMP_USB, DBG_EMERG,
-                                "Failed to alloc URB!!\n");
+               if (!urb)
                        goto err_out;
-               }
 
                err = _rtl_prep_rx_urb(hw, rtlusb, urb, GFP_KERNEL);
                if (err < 0) {
@@ -907,15 +904,12 @@ static void _rtl_tx_complete(struct urb *urb)
 static struct urb *_rtl_usb_tx_urb_setup(struct ieee80211_hw *hw,
                                struct sk_buff *skb, u32 ep_num)
 {
-       struct rtl_priv *rtlpriv = rtl_priv(hw);
        struct rtl_usb *rtlusb = rtl_usbdev(rtl_usbpriv(hw));
        struct urb *_urb;
 
        WARN_ON(NULL == skb);
        _urb = usb_alloc_urb(0, GFP_ATOMIC);
        if (!_urb) {
-               RT_TRACE(rtlpriv, COMP_USB, DBG_EMERG,
-                        "Can't allocate URB for bulk out!\n");
                kfree_skb(skb);
                return NULL;
        }
index e13a4ab..1fde9c8 100644 (file)
@@ -34,48 +34,23 @@ static void cvm_oct_get_drvinfo(struct net_device *dev,
        strlcpy(info->bus_info, "Builtin", sizeof(info->bus_info));
 }
 
-static int cvm_oct_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
-{
-       struct octeon_ethernet *priv = netdev_priv(dev);
-
-       if (priv->phydev)
-               return phy_ethtool_gset(priv->phydev, cmd);
-
-       return -EINVAL;
-}
-
-static int cvm_oct_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
-{
-       struct octeon_ethernet *priv = netdev_priv(dev);
-
-       if (!capable(CAP_NET_ADMIN))
-               return -EPERM;
-
-       if (priv->phydev)
-               return phy_ethtool_sset(priv->phydev, cmd);
-
-       return -EINVAL;
-}
-
 static int cvm_oct_nway_reset(struct net_device *dev)
 {
-       struct octeon_ethernet *priv = netdev_priv(dev);
-
        if (!capable(CAP_NET_ADMIN))
                return -EPERM;
 
-       if (priv->phydev)
-               return phy_start_aneg(priv->phydev);
+       if (dev->phydev)
+               return phy_start_aneg(dev->phydev);
 
        return -EINVAL;
 }
 
 const struct ethtool_ops cvm_oct_ethtool_ops = {
        .get_drvinfo = cvm_oct_get_drvinfo,
-       .get_settings = cvm_oct_get_settings,
-       .set_settings = cvm_oct_set_settings,
        .nway_reset = cvm_oct_nway_reset,
        .get_link = ethtool_op_get_link,
+       .get_link_ksettings = phy_ethtool_get_link_ksettings,
+       .set_link_ksettings = phy_ethtool_set_link_ksettings,
 };
 
 /**
@@ -88,15 +63,13 @@ const struct ethtool_ops cvm_oct_ethtool_ops = {
  */
 int cvm_oct_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
 {
-       struct octeon_ethernet *priv = netdev_priv(dev);
-
        if (!netif_running(dev))
                return -EINVAL;
 
-       if (!priv->phydev)
+       if (!dev->phydev)
                return -EINVAL;
 
-       return phy_mii_ioctl(priv->phydev, rq, cmd);
+       return phy_mii_ioctl(dev->phydev, rq, cmd);
 }
 
 void cvm_oct_note_carrier(struct octeon_ethernet *priv,
@@ -119,9 +92,9 @@ void cvm_oct_adjust_link(struct net_device *dev)
        cvmx_helper_link_info_t link_info;
 
        link_info.u64           = 0;
-       link_info.s.link_up     = priv->phydev->link ? 1 : 0;
-       link_info.s.full_duplex = priv->phydev->duplex ? 1 : 0;
-       link_info.s.speed       = priv->phydev->speed;
+       link_info.s.link_up     = dev->phydev->link ? 1 : 0;
+       link_info.s.full_duplex = dev->phydev->duplex ? 1 : 0;
+       link_info.s.speed       = dev->phydev->speed;
        priv->link_info         = link_info.u64;
 
        /*
@@ -130,8 +103,8 @@ void cvm_oct_adjust_link(struct net_device *dev)
        if (priv->poll)
                priv->poll(dev);
 
-       if (priv->last_link != priv->phydev->link) {
-               priv->last_link = priv->phydev->link;
+       if (priv->last_link != dev->phydev->link) {
+               priv->last_link = dev->phydev->link;
                cvmx_helper_link_set(priv->port, link_info);
                cvm_oct_note_carrier(priv, link_info);
        }
@@ -151,9 +124,8 @@ int cvm_oct_common_stop(struct net_device *dev)
 
        priv->poll = NULL;
 
-       if (priv->phydev)
-               phy_disconnect(priv->phydev);
-       priv->phydev = NULL;
+       if (dev->phydev)
+               phy_disconnect(dev->phydev);
 
        if (priv->last_link) {
                link_info.u64 = 0;
@@ -176,6 +148,7 @@ int cvm_oct_phy_setup_device(struct net_device *dev)
 {
        struct octeon_ethernet *priv = netdev_priv(dev);
        struct device_node *phy_node;
+       struct phy_device *phydev = NULL;
 
        if (!priv->of_node)
                goto no_phy;
@@ -193,14 +166,14 @@ int cvm_oct_phy_setup_device(struct net_device *dev)
        if (!phy_node)
                goto no_phy;
 
-       priv->phydev = of_phy_connect(dev, phy_node, cvm_oct_adjust_link, 0,
-                                     PHY_INTERFACE_MODE_GMII);
+       phydev = of_phy_connect(dev, phy_node, cvm_oct_adjust_link, 0,
+                               PHY_INTERFACE_MODE_GMII);
 
-       if (!priv->phydev)
+       if (!phydev)
                return -ENODEV;
 
        priv->last_link = 0;
-       phy_start_aneg(priv->phydev);
+       phy_start_aneg(phydev);
 
        return 0;
 no_phy:
index 91b148c..48846df 100644 (file)
@@ -145,7 +145,7 @@ int cvm_oct_rgmii_open(struct net_device *dev)
        if (ret)
                return ret;
 
-       if (priv->phydev) {
+       if (dev->phydev) {
                /*
                 * In phydev mode, we need still periodic polling for the
                 * preamble error checking, and we also need to call this
index e9cd5f2..45d5763 100644 (file)
@@ -457,10 +457,8 @@ int cvm_oct_common_init(struct net_device *dev)
 
 void cvm_oct_common_uninit(struct net_device *dev)
 {
-       struct octeon_ethernet *priv = netdev_priv(dev);
-
-       if (priv->phydev)
-               phy_disconnect(priv->phydev);
+       if (dev->phydev)
+               phy_disconnect(dev->phydev);
 }
 
 int cvm_oct_common_open(struct net_device *dev,
@@ -484,10 +482,10 @@ int cvm_oct_common_open(struct net_device *dev,
        if (octeon_is_simulation())
                return 0;
 
-       if (priv->phydev) {
-               int r = phy_read_status(priv->phydev);
+       if (dev->phydev) {
+               int r = phy_read_status(dev->phydev);
 
-               if (r == 0 && priv->phydev->link == 0)
+               if (r == 0 && dev->phydev->link == 0)
                        netif_carrier_off(dev);
                cvm_oct_adjust_link(dev);
        } else {
index 6275c15..d533aef 100644 (file)
@@ -40,7 +40,6 @@ struct octeon_ethernet {
        struct sk_buff_head tx_free_list[16];
        /* Device statistics */
        struct net_device_stats stats;
-       struct phy_device *phydev;
        unsigned int last_speed;
        unsigned int last_link;
        /* Last negotiated link state */
index c633476..bca66d8 100644 (file)
@@ -390,6 +390,8 @@ static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent,
        atomic_set(&ent->count, 1);
        spin_lock_init(&ent->pde_unload_lock);
        INIT_LIST_HEAD(&ent->pde_openers);
+       proc_set_user(ent, (*parent)->uid, (*parent)->gid);
+
 out:
        return ent;
 }
index c8bbc68..7ae6b1d 100644 (file)
@@ -21,6 +21,7 @@
 #include <linux/bitops.h>
 #include <linux/mount.h>
 #include <linux/nsproxy.h>
+#include <linux/uidgid.h>
 #include <net/net_namespace.h>
 #include <linux/seq_file.h>
 
@@ -185,6 +186,8 @@ const struct file_operations proc_net_operations = {
 static __net_init int proc_net_ns_init(struct net *net)
 {
        struct proc_dir_entry *netd, *net_statd;
+       kuid_t uid;
+       kgid_t gid;
        int err;
 
        err = -ENOMEM;
@@ -199,6 +202,16 @@ static __net_init int proc_net_ns_init(struct net *net)
        netd->parent = &proc_root;
        memcpy(netd->name, "net", 4);
 
+       uid = make_kuid(net->user_ns, 0);
+       if (!uid_valid(uid))
+               uid = netd->uid;
+
+       gid = make_kgid(net->user_ns, 0);
+       if (!gid_valid(gid))
+               gid = netd->gid;
+
+       proc_set_user(netd, uid, gid);
+
        err = -EEXIST;
        net_statd = proc_net_mkdir(net, "stat", netd);
        if (!net_statd)
index 1b93650..2ed3d71 100644 (file)
@@ -430,6 +430,7 @@ static int sysctl_perm(struct ctl_table_header *head, struct ctl_table *table, i
 static struct inode *proc_sys_make_inode(struct super_block *sb,
                struct ctl_table_header *head, struct ctl_table *table)
 {
+       struct ctl_table_root *root = head->root;
        struct inode *inode;
        struct proc_inode *ei;
 
@@ -457,6 +458,10 @@ static struct inode *proc_sys_make_inode(struct super_block *sb,
                if (is_empty_dir(head))
                        make_empty_dir_inode(inode);
        }
+
+       if (root->set_ownership)
+               root->set_ownership(head, table, &inode->i_uid, &inode->i_gid);
+
 out:
        return inode;
 }
index 984f73b..a4414a1 100644 (file)
@@ -497,6 +497,23 @@ static inline bool cgroup_is_descendant(struct cgroup *cgrp,
        return cgrp->ancestor_ids[ancestor->level] == ancestor->id;
 }
 
+/**
+ * task_under_cgroup_hierarchy - test task's membership of cgroup ancestry
+ * @task: the task to be tested
+ * @ancestor: possible ancestor of @task's cgroup
+ *
+ * Tests whether @task's default cgroup hierarchy is a descendant of @ancestor.
+ * It follows all the same rules as cgroup_is_descendant, and only applies
+ * to the default hierarchy.
+ */
+static inline bool task_under_cgroup_hierarchy(struct task_struct *task,
+                                              struct cgroup *ancestor)
+{
+       struct css_set *cset = task_css_set(task);
+
+       return cgroup_is_descendant(cset->dfl_cgrp, ancestor);
+}
+
 /* no synchronization, the result can only be used as a hint */
 static inline bool cgroup_is_populated(struct cgroup *cgrp)
 {
@@ -557,6 +574,7 @@ static inline void pr_cont_cgroup_path(struct cgroup *cgrp)
 #else /* !CONFIG_CGROUPS */
 
 struct cgroup_subsys_state;
+struct cgroup;
 
 static inline void css_put(struct cgroup_subsys_state *css) {}
 static inline int cgroup_attach_task_all(struct task_struct *from,
@@ -574,6 +592,11 @@ static inline void cgroup_free(struct task_struct *p) {}
 static inline int cgroup_init_early(void) { return 0; }
 static inline int cgroup_init(void) { return 0; }
 
+static inline bool task_under_cgroup_hierarchy(struct task_struct *task,
+                                              struct cgroup *ancestor)
+{
+       return true;
+}
 #endif /* !CONFIG_CGROUPS */
 
 /*
index 3a788bf..794bb07 100644 (file)
@@ -52,6 +52,7 @@
 #include <uapi/linux/netdevice.h>
 #include <uapi/linux/if_bonding.h>
 #include <uapi/linux/pkt_cls.h>
+#include <linux/hashtable.h>
 
 struct netpoll_info;
 struct device;
@@ -1800,6 +1801,9 @@ struct net_device {
        unsigned int            num_tx_queues;
        unsigned int            real_num_tx_queues;
        struct Qdisc            *qdisc;
+#ifdef CONFIG_NET_SCHED
+       DECLARE_HASHTABLE       (qdisc_hash, 4);
+#endif
        unsigned long           tx_queue_len;
        spinlock_t              tx_global_lock;
        int                     watchdog_timeo;
index d6c4177..3ed7d20 100644 (file)
@@ -276,6 +276,21 @@ enum qed_protocol {
        QED_PROTOCOL_ISCSI,
 };
 
+enum qed_link_mode_bits {
+       QED_LM_FIBRE_BIT = BIT(0),
+       QED_LM_Autoneg_BIT = BIT(1),
+       QED_LM_Asym_Pause_BIT = BIT(2),
+       QED_LM_Pause_BIT = BIT(3),
+       QED_LM_1000baseT_Half_BIT = BIT(4),
+       QED_LM_1000baseT_Full_BIT = BIT(5),
+       QED_LM_10000baseKR_Full_BIT = BIT(6),
+       QED_LM_25000baseKR_Full_BIT = BIT(7),
+       QED_LM_40000baseLR4_Full_BIT = BIT(8),
+       QED_LM_50000baseKR2_Full_BIT = BIT(9),
+       QED_LM_100000baseKR4_Full_BIT = BIT(10),
+       QED_LM_COUNT = 11
+};
+
 struct qed_link_params {
        bool    link_up;
 
index 697e160..d82cb60 100644 (file)
@@ -25,6 +25,7 @@
 #include <linux/rcupdate.h>
 #include <linux/wait.h>
 #include <linux/rbtree.h>
+#include <linux/uidgid.h>
 #include <uapi/linux/sysctl.h>
 
 /* For the /proc/sys support */
@@ -157,6 +158,9 @@ struct ctl_table_root {
        struct ctl_table_set default_set;
        struct ctl_table_set *(*lookup)(struct ctl_table_root *root,
                                           struct nsproxy *namespaces);
+       void (*set_ownership)(struct ctl_table_header *head,
+                             struct ctl_table *table,
+                             kuid_t *uid, kgid_t *gid);
        int (*permissions)(struct ctl_table_header *head, struct ctl_table *table);
 };
 
index 73ea256..d25d836 100644 (file)
@@ -7,7 +7,15 @@
 struct gre_base_hdr {
        __be16 flags;
        __be16 protocol;
-};
+} __packed;
+
+struct gre_full_hdr {
+       struct gre_base_hdr fixed_header;
+       __be16 csum;
+       __be16 reserved1;
+       __be32 key;
+       __be32 seq;
+} __packed;
 #define GRE_HEADER_SECTION 4
 
 #define GREPROTO_CISCO         0
index 2840b58..2a89658 100644 (file)
@@ -13,6 +13,7 @@
 
 #include <linux/skbuff.h>
 #include <net/sock.h>
+#include <net/strparser.h>
 #include <uapi/linux/kcm.h>
 
 extern unsigned int kcm_net_id;
@@ -21,16 +22,8 @@ extern unsigned int kcm_net_id;
 #define KCM_STATS_INCR(stat) ((stat)++)
 
 struct kcm_psock_stats {
-       unsigned long long rx_msgs;
-       unsigned long long rx_bytes;
        unsigned long long tx_msgs;
        unsigned long long tx_bytes;
-       unsigned int rx_aborts;
-       unsigned int rx_mem_fail;
-       unsigned int rx_need_more_hdr;
-       unsigned int rx_msg_too_big;
-       unsigned int rx_msg_timeouts;
-       unsigned int rx_bad_hdr_len;
        unsigned long long reserved;
        unsigned long long unreserved;
        unsigned int tx_aborts;
@@ -64,13 +57,6 @@ struct kcm_tx_msg {
        struct sk_buff *last_skb;
 };
 
-struct kcm_rx_msg {
-       int full_len;
-       int accum_len;
-       int offset;
-       int early_eaten;
-};
-
 /* Socket structure for KCM client sockets */
 struct kcm_sock {
        struct sock sk;
@@ -87,6 +73,7 @@ struct kcm_sock {
        struct work_struct tx_work;
        struct list_head wait_psock_list;
        struct sk_buff *seq_skb;
+       u32 tx_stopped : 1;
 
        /* Don't use bit fields here, these are set under different locks */
        bool tx_wait;
@@ -104,11 +91,11 @@ struct bpf_prog;
 /* Structure for an attached lower socket */
 struct kcm_psock {
        struct sock *sk;
+       struct strparser strp;
        struct kcm_mux *mux;
        int index;
 
        u32 tx_stopped : 1;
-       u32 rx_stopped : 1;
        u32 done : 1;
        u32 unattaching : 1;
 
@@ -121,18 +108,12 @@ struct kcm_psock {
        struct kcm_psock_stats stats;
 
        /* Receive */
-       struct sk_buff *rx_skb_head;
-       struct sk_buff **rx_skb_nextp;
-       struct sk_buff *ready_rx_msg;
        struct list_head psock_ready_list;
-       struct work_struct rx_work;
-       struct delayed_work rx_delayed_work;
        struct bpf_prog *bpf_prog;
        struct kcm_sock *rx_kcm;
        unsigned long long saved_rx_bytes;
        unsigned long long saved_rx_msgs;
-       struct timer_list rx_msg_timer;
-       unsigned int rx_need_bytes;
+       struct sk_buff *ready_rx_msg;
 
        /* Transmit */
        struct kcm_sock *tx_kcm;
@@ -146,6 +127,7 @@ struct kcm_net {
        struct mutex mutex;
        struct kcm_psock_stats aggregate_psock_stats;
        struct kcm_mux_stats aggregate_mux_stats;
+       struct strp_aggr_stats aggregate_strp_stats;
        struct list_head mux_list;
        int count;
 };
@@ -163,6 +145,7 @@ struct kcm_mux {
 
        struct kcm_mux_stats stats;
        struct kcm_psock_stats aggregate_psock_stats;
+       struct strp_aggr_stats aggregate_strp_stats;
 
        /* Receive */
        spinlock_t rx_lock ____cacheline_aligned_in_smp;
@@ -190,14 +173,6 @@ static inline void aggregate_psock_stats(struct kcm_psock_stats *stats,
        /* Save psock statistics in the mux when psock is being unattached. */
 
 #define SAVE_PSOCK_STATS(_stat) (agg_stats->_stat += stats->_stat)
-       SAVE_PSOCK_STATS(rx_msgs);
-       SAVE_PSOCK_STATS(rx_bytes);
-       SAVE_PSOCK_STATS(rx_aborts);
-       SAVE_PSOCK_STATS(rx_mem_fail);
-       SAVE_PSOCK_STATS(rx_need_more_hdr);
-       SAVE_PSOCK_STATS(rx_msg_too_big);
-       SAVE_PSOCK_STATS(rx_msg_timeouts);
-       SAVE_PSOCK_STATS(rx_bad_hdr_len);
        SAVE_PSOCK_STATS(tx_msgs);
        SAVE_PSOCK_STATS(tx_bytes);
        SAVE_PSOCK_STATS(reserved);
index d061ffe..7adf438 100644 (file)
@@ -40,7 +40,6 @@ struct netns_ipv4 {
 #ifdef CONFIG_IP_MULTIPLE_TABLES
        struct fib_rules_ops    *rules_ops;
        bool                    fib_has_custom_rules;
-       struct fib_table __rcu  *fib_local;
        struct fib_table __rcu  *fib_main;
        struct fib_table __rcu  *fib_default;
 #endif
index 7caa99b..cd334c9 100644 (file)
@@ -90,8 +90,8 @@ int unregister_qdisc(struct Qdisc_ops *qops);
 void qdisc_get_default(char *id, size_t len);
 int qdisc_set_default(const char *id);
 
-void qdisc_list_add(struct Qdisc *q);
-void qdisc_list_del(struct Qdisc *q);
+void qdisc_hash_add(struct Qdisc *q);
+void qdisc_hash_del(struct Qdisc *q);
 struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle);
 struct Qdisc *qdisc_lookup_class(struct net_device *dev, u32 handle);
 struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r,
diff --git a/include/net/pptp.h b/include/net/pptp.h
new file mode 100644 (file)
index 0000000..92e9f1f
--- /dev/null
@@ -0,0 +1,23 @@
+#ifndef _NET_PPTP_H
+#define _NET_PPTP_H
+
+#define PPP_LCP_ECHOREQ 0x09
+#define PPP_LCP_ECHOREP 0x0A
+#define SC_RCV_BITS     (SC_RCV_B7_1|SC_RCV_B7_0|SC_RCV_ODDP|SC_RCV_EVNP)
+
+#define MISSING_WINDOW 20
+#define WRAPPED(curseq, lastseq)\
+       ((((curseq) & 0xffffff00) == 0) &&\
+       (((lastseq) & 0xffffff00) == 0xffffff00))
+
+#define PPTP_HEADER_OVERHEAD (2+sizeof(struct pptp_gre_header))
+struct pptp_gre_header {
+       struct gre_base_hdr gre_hd;
+       __be16 payload_len;
+       __be16 call_id;
+       __be32 seq;
+       __be32 ack;
+} __packed;
+
+
+#endif
index 909aff2..0d50177 100644 (file)
@@ -61,7 +61,7 @@ struct Qdisc {
        u32                     limit;
        const struct Qdisc_ops  *ops;
        struct qdisc_size_table __rcu *stab;
-       struct list_head        list;
+       struct hlist_node       hash;
        u32                     handle;
        u32                     parent;
        void                    *u32_node;
diff --git a/include/net/strparser.h b/include/net/strparser.h
new file mode 100644 (file)
index 0000000..fdb3d67
--- /dev/null
@@ -0,0 +1,145 @@
+/*
+ * Stream Parser
+ *
+ * Copyright (c) 2016 Tom Herbert <tom@herbertland.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation.
+ */
+
+#ifndef __NET_STRPARSER_H_
+#define __NET_STRPARSER_H_
+
+#include <linux/skbuff.h>
+#include <net/sock.h>
+
+#define STRP_STATS_ADD(stat, count) ((stat) += (count))
+#define STRP_STATS_INCR(stat) ((stat)++)
+
+struct strp_stats {
+       unsigned long long rx_msgs;
+       unsigned long long rx_bytes;
+       unsigned int rx_mem_fail;
+       unsigned int rx_need_more_hdr;
+       unsigned int rx_msg_too_big;
+       unsigned int rx_msg_timeouts;
+       unsigned int rx_bad_hdr_len;
+};
+
+struct strp_aggr_stats {
+       unsigned long long rx_msgs;
+       unsigned long long rx_bytes;
+       unsigned int rx_mem_fail;
+       unsigned int rx_need_more_hdr;
+       unsigned int rx_msg_too_big;
+       unsigned int rx_msg_timeouts;
+       unsigned int rx_bad_hdr_len;
+       unsigned int rx_aborts;
+       unsigned int rx_interrupted;
+       unsigned int rx_unrecov_intr;
+};
+
+struct strparser;
+
+/* Callbacks are called with lock held for the attached socket */
+struct strp_callbacks {
+       int (*parse_msg)(struct strparser *strp, struct sk_buff *skb);
+       void (*rcv_msg)(struct strparser *strp, struct sk_buff *skb);
+       int (*read_sock_done)(struct strparser *strp, int err);
+       void (*abort_parser)(struct strparser *strp, int err);
+};
+
+struct strp_rx_msg {
+       int full_len;
+       int offset;
+};
+
+static inline struct strp_rx_msg *strp_rx_msg(struct sk_buff *skb)
+{
+       return (struct strp_rx_msg *)((void *)skb->cb +
+               offsetof(struct qdisc_skb_cb, data));
+}
+
+/* Structure for an attached lower socket */
+struct strparser {
+       struct sock *sk;
+
+       u32 rx_stopped : 1;
+       u32 rx_paused : 1;
+       u32 rx_aborted : 1;
+       u32 rx_interrupted : 1;
+       u32 rx_unrecov_intr : 1;
+
+       struct sk_buff **rx_skb_nextp;
+       struct timer_list rx_msg_timer;
+       struct sk_buff *rx_skb_head;
+       unsigned int rx_need_bytes;
+       struct delayed_work rx_delayed_work;
+       struct work_struct rx_work;
+       struct strp_stats stats;
+       struct strp_callbacks cb;
+};
+
+/* Must be called with lock held for attached socket */
+static inline void strp_pause(struct strparser *strp)
+{
+       strp->rx_paused = 1;
+}
+
+/* May be called without holding lock for attached socket */
+static inline void strp_unpause(struct strparser *strp)
+{
+       strp->rx_paused = 0;
+}
+
+static inline void save_strp_stats(struct strparser *strp,
+                                  struct strp_aggr_stats *agg_stats)
+{
+       /* Save psock statistics in the mux when psock is being unattached. */
+
+#define SAVE_PSOCK_STATS(_stat) (agg_stats->_stat +=           \
+                                strp->stats._stat)
+       SAVE_PSOCK_STATS(rx_msgs);
+       SAVE_PSOCK_STATS(rx_bytes);
+       SAVE_PSOCK_STATS(rx_mem_fail);
+       SAVE_PSOCK_STATS(rx_need_more_hdr);
+       SAVE_PSOCK_STATS(rx_msg_too_big);
+       SAVE_PSOCK_STATS(rx_msg_timeouts);
+       SAVE_PSOCK_STATS(rx_bad_hdr_len);
+#undef SAVE_PSOCK_STATS
+
+       if (strp->rx_aborted)
+               agg_stats->rx_aborts++;
+       if (strp->rx_interrupted)
+               agg_stats->rx_interrupted++;
+       if (strp->rx_unrecov_intr)
+               agg_stats->rx_unrecov_intr++;
+}
+
+static inline void aggregate_strp_stats(struct strp_aggr_stats *stats,
+                                       struct strp_aggr_stats *agg_stats)
+{
+#define SAVE_PSOCK_STATS(_stat) (agg_stats->_stat += stats->_stat)
+       SAVE_PSOCK_STATS(rx_msgs);
+       SAVE_PSOCK_STATS(rx_bytes);
+       SAVE_PSOCK_STATS(rx_mem_fail);
+       SAVE_PSOCK_STATS(rx_need_more_hdr);
+       SAVE_PSOCK_STATS(rx_msg_too_big);
+       SAVE_PSOCK_STATS(rx_msg_timeouts);
+       SAVE_PSOCK_STATS(rx_bad_hdr_len);
+       SAVE_PSOCK_STATS(rx_aborts);
+       SAVE_PSOCK_STATS(rx_interrupted);
+       SAVE_PSOCK_STATS(rx_unrecov_intr);
+#undef SAVE_PSOCK_STATS
+
+}
+
+void strp_done(struct strparser *strp);
+void strp_stop(struct strparser *strp);
+void strp_check_rcv(struct strparser *strp);
+int strp_init(struct strparser *strp, struct sock *csk,
+             struct strp_callbacks *cb);
+void strp_tcp_data_ready(struct strparser *strp);
+
+#endif /* __NET_STRPARSER_H_ */
index 0fbf6fd..734fe83 100644 (file)
 
 #define BATADV_NL_MCAST_GROUP_TPMETER  "tpmeter"
 
+/**
+ * enum batadv_tt_client_flags - TT client specific flags
+ * @BATADV_TT_CLIENT_DEL: the client has to be deleted from the table
+ * @BATADV_TT_CLIENT_ROAM: the client roamed to/from another node and the new
+ *  update telling its new real location has not been received/sent yet
+ * @BATADV_TT_CLIENT_WIFI: this client is connected through a wifi interface.
+ *  This information is used by the "AP Isolation" feature
+ * @BATADV_TT_CLIENT_ISOLA: this client is considered "isolated". This
+ *  information is used by the Extended Isolation feature
+ * @BATADV_TT_CLIENT_NOPURGE: this client should never be removed from the table
+ * @BATADV_TT_CLIENT_NEW: this client has been added to the local table but has
+ *  not been announced yet
+ * @BATADV_TT_CLIENT_PENDING: this client is marked for removal but it is kept
+ *  in the table for one more originator interval for consistency purposes
+ * @BATADV_TT_CLIENT_TEMP: this global client has been detected to be part of
+ *  the network but no nnode has already announced it
+ *
+ * Bits from 0 to 7 are called _remote flags_ because they are sent on the wire.
+ * Bits from 8 to 15 are called _local flags_ because they are used for local
+ * computations only.
+ *
+ * Bits from 4 to 7 - a subset of remote flags - are ensured to be in sync with
+ * the other nodes in the network. To achieve this goal these flags are included
+ * in the TT CRC computation.
+ */
+enum batadv_tt_client_flags {
+       BATADV_TT_CLIENT_DEL     = (1 << 0),
+       BATADV_TT_CLIENT_ROAM    = (1 << 1),
+       BATADV_TT_CLIENT_WIFI    = (1 << 4),
+       BATADV_TT_CLIENT_ISOLA   = (1 << 5),
+       BATADV_TT_CLIENT_NOPURGE = (1 << 8),
+       BATADV_TT_CLIENT_NEW     = (1 << 9),
+       BATADV_TT_CLIENT_PENDING = (1 << 10),
+       BATADV_TT_CLIENT_TEMP    = (1 << 11),
+};
+
 /**
  * enum batadv_nl_attrs - batman-adv netlink attributes
  *
  * @BATADV_ATTR_TPMETER_BYTES: amount of acked bytes during run
  * @BATADV_ATTR_TPMETER_COOKIE: session cookie to match tp_meter session
  * @BATADV_ATTR_PAD: attribute used for padding for 64-bit alignment
+ * @BATADV_ATTR_ACTIVE: Flag indicating if the hard interface is active
+ * @BATADV_ATTR_TT_ADDRESS: Client MAC address
+ * @BATADV_ATTR_TT_TTVN: Translation table version
+ * @BATADV_ATTR_TT_LAST_TTVN: Previous translation table version
+ * @BATADV_ATTR_TT_CRC32: CRC32 over translation table
+ * @BATADV_ATTR_TT_VID: VLAN ID
+ * @BATADV_ATTR_TT_FLAGS: Translation table client flags
+ * @BATADV_ATTR_FLAG_BEST: Flags indicating entry is the best
+ * @BATADV_ATTR_LAST_SEEN_MSECS: Time in milliseconds since last seen
+ * @BATADV_ATTR_NEIGH_ADDRESS: Neighbour MAC address
+ * @BATADV_ATTR_TQ: TQ to neighbour
+ * @BATADV_ATTR_THROUGHPUT: Estimated throughput to Neighbour
+ * @BATADV_ATTR_BANDWIDTH_UP: Reported uplink bandwidth
+ * @BATADV_ATTR_BANDWIDTH_DOWN: Reported downlink bandwidth
+ * @BATADV_ATTR_ROUTER: Gateway router MAC address
+ * @BATADV_ATTR_BLA_OWN: Flag indicating own originator
+ * @BATADV_ATTR_BLA_ADDRESS: Bridge loop avoidance claim MAC address
+ * @BATADV_ATTR_BLA_VID: BLA VLAN ID
+ * @BATADV_ATTR_BLA_BACKBONE: BLA gateway originator MAC address
+ * @BATADV_ATTR_BLA_CRC: BLA CRC
  * @__BATADV_ATTR_AFTER_LAST: internal use
  * @NUM_BATADV_ATTR: total number of batadv_nl_attrs available
  * @BATADV_ATTR_MAX: highest attribute number currently defined
@@ -60,6 +116,26 @@ enum batadv_nl_attrs {
        BATADV_ATTR_TPMETER_BYTES,
        BATADV_ATTR_TPMETER_COOKIE,
        BATADV_ATTR_PAD,
+       BATADV_ATTR_ACTIVE,
+       BATADV_ATTR_TT_ADDRESS,
+       BATADV_ATTR_TT_TTVN,
+       BATADV_ATTR_TT_LAST_TTVN,
+       BATADV_ATTR_TT_CRC32,
+       BATADV_ATTR_TT_VID,
+       BATADV_ATTR_TT_FLAGS,
+       BATADV_ATTR_FLAG_BEST,
+       BATADV_ATTR_LAST_SEEN_MSECS,
+       BATADV_ATTR_NEIGH_ADDRESS,
+       BATADV_ATTR_TQ,
+       BATADV_ATTR_THROUGHPUT,
+       BATADV_ATTR_BANDWIDTH_UP,
+       BATADV_ATTR_BANDWIDTH_DOWN,
+       BATADV_ATTR_ROUTER,
+       BATADV_ATTR_BLA_OWN,
+       BATADV_ATTR_BLA_ADDRESS,
+       BATADV_ATTR_BLA_VID,
+       BATADV_ATTR_BLA_BACKBONE,
+       BATADV_ATTR_BLA_CRC,
        /* add attributes above here, update the policy in netlink.c */
        __BATADV_ATTR_AFTER_LAST,
        NUM_BATADV_ATTR = __BATADV_ATTR_AFTER_LAST,
@@ -73,6 +149,15 @@ enum batadv_nl_attrs {
  * @BATADV_CMD_GET_MESH_INFO: Query basic information about batman-adv device
  * @BATADV_CMD_TP_METER: Start a tp meter session
  * @BATADV_CMD_TP_METER_CANCEL: Cancel a tp meter session
+ * @BATADV_CMD_GET_ROUTING_ALGOS: Query the list of routing algorithms.
+ * @BATADV_CMD_GET_HARDIFS: Query list of hard interfaces
+ * @BATADV_CMD_GET_TRANSTABLE_LOCAL: Query list of local translations
+ * @BATADV_CMD_GET_TRANSTABLE_GLOBAL Query list of global translations
+ * @BATADV_CMD_GET_ORIGINATORS: Query list of originators
+ * @BATADV_CMD_GET_NEIGHBORS: Query list of neighbours
+ * @BATADV_CMD_GET_GATEWAYS: Query list of gateways
+ * @BATADV_CMD_GET_BLA_CLAIM: Query list of bridge loop avoidance claims
+ * @BATADV_CMD_GET_BLA_BACKBONE: Query list of bridge loop avoidance backbones
  * @__BATADV_CMD_AFTER_LAST: internal use
  * @BATADV_CMD_MAX: highest used command number
  */
@@ -81,6 +166,15 @@ enum batadv_nl_commands {
        BATADV_CMD_GET_MESH_INFO,
        BATADV_CMD_TP_METER,
        BATADV_CMD_TP_METER_CANCEL,
+       BATADV_CMD_GET_ROUTING_ALGOS,
+       BATADV_CMD_GET_HARDIFS,
+       BATADV_CMD_GET_TRANSTABLE_LOCAL,
+       BATADV_CMD_GET_TRANSTABLE_GLOBAL,
+       BATADV_CMD_GET_ORIGINATORS,
+       BATADV_CMD_GET_NEIGHBORS,
+       BATADV_CMD_GET_GATEWAYS,
+       BATADV_CMD_GET_BLA_CLAIM,
+       BATADV_CMD_GET_BLA_BACKBONE,
        /* add new commands above here */
        __BATADV_CMD_AFTER_LAST,
        BATADV_CMD_MAX = __BATADV_CMD_AFTER_LAST - 1
index 9e5fc16..866d53c 100644 (file)
@@ -375,6 +375,17 @@ enum bpf_func_id {
         */
        BPF_FUNC_probe_write_user,
 
+       /**
+        * bpf_current_task_under_cgroup(map, index) - Check cgroup2 membership of current task
+        * @map: pointer to bpf_map in BPF_MAP_TYPE_CGROUP_ARRAY type
+        * @index: index of the cgroup in the bpf_map
+        * Return:
+        *   == 0 current failed the cgroup2 descendant test
+        *   == 1 current succeeded the cgroup2 descendant test
+        *    < 0 error
+        */
+       BPF_FUNC_current_task_under_cgroup,
+
        __BPF_FUNC_MAX_ID,
 };
 
index 1046f55..361b9f0 100644 (file)
 #define GRE_SEQ                __cpu_to_be16(0x1000)
 #define GRE_STRICT     __cpu_to_be16(0x0800)
 #define GRE_REC                __cpu_to_be16(0x0700)
-#define GRE_FLAGS      __cpu_to_be16(0x00F8)
+#define GRE_ACK                __cpu_to_be16(0x0080)
+#define GRE_FLAGS      __cpu_to_be16(0x0078)
 #define GRE_VERSION    __cpu_to_be16(0x0007)
 
+#define GRE_IS_CSUM(f)         ((f) & GRE_CSUM)
+#define GRE_IS_ROUTING(f)      ((f) & GRE_ROUTING)
+#define GRE_IS_KEY(f)          ((f) & GRE_KEY)
+#define GRE_IS_SEQ(f)          ((f) & GRE_SEQ)
+#define GRE_IS_STRICT(f)       ((f) & GRE_STRICT)
+#define GRE_IS_REC(f)          ((f) & GRE_REC)
+#define GRE_IS_ACK(f)          ((f) & GRE_ACK)
+
+#define GRE_VERSION_1          __cpu_to_be16(0x0001)
+#define GRE_PROTO_PPP          __cpu_to_be16(0x880b)
+#define GRE_PPTP_KEY_MASK      __cpu_to_be32(0xffff)
+
 struct ip_tunnel_parm {
        char                    name[IFNAMSIZ];
        int                     link;
index 237fac4..15d8510 100644 (file)
@@ -48,6 +48,7 @@
 #define BMCR_SPEED100          0x2000  /* Select 100Mbps              */
 #define BMCR_LOOPBACK          0x4000  /* TXD loopback bits           */
 #define BMCR_RESET             0x8000  /* Reset to default state      */
+#define BMCR_SPEED10           0x0000  /* Select 10Mbps               */
 
 /* Basic mode status register. */
 #define BMSR_ERCAP             0x0001  /* Ext-reg capability          */
index 633a650..a2ac051 100644 (file)
@@ -538,7 +538,7 @@ static int __init register_perf_event_array_map(void)
 }
 late_initcall(register_perf_event_array_map);
 
-#ifdef CONFIG_SOCK_CGROUP_DATA
+#ifdef CONFIG_CGROUPS
 static void *cgroup_fd_array_get_ptr(struct bpf_map *map,
                                     struct file *map_file /* not used */,
                                     int fd)
index daea765..abb61f3 100644 (file)
@@ -930,14 +930,14 @@ static int check_func_arg(struct verifier_env *env, u32 regno,
                          enum bpf_arg_type arg_type,
                          struct bpf_call_arg_meta *meta)
 {
-       struct reg_state *reg = env->cur_state.regs + regno;
-       enum bpf_reg_type expected_type;
+       struct reg_state *regs = env->cur_state.regs, *reg = &regs[regno];
+       enum bpf_reg_type expected_type, type = reg->type;
        int err = 0;
 
        if (arg_type == ARG_DONTCARE)
                return 0;
 
-       if (reg->type == NOT_INIT) {
+       if (type == NOT_INIT) {
                verbose("R%d !read_ok\n", regno);
                return -EACCES;
        }
@@ -950,16 +950,29 @@ static int check_func_arg(struct verifier_env *env, u32 regno,
                return 0;
        }
 
+       if (type == PTR_TO_PACKET && !may_write_pkt_data(env->prog->type)) {
+               verbose("helper access to the packet is not allowed for clsact\n");
+               return -EACCES;
+       }
+
        if (arg_type == ARG_PTR_TO_MAP_KEY ||
            arg_type == ARG_PTR_TO_MAP_VALUE) {
                expected_type = PTR_TO_STACK;
+               if (type != PTR_TO_PACKET && type != expected_type)
+                       goto err_type;
        } else if (arg_type == ARG_CONST_STACK_SIZE ||
                   arg_type == ARG_CONST_STACK_SIZE_OR_ZERO) {
                expected_type = CONST_IMM;
+               if (type != expected_type)
+                       goto err_type;
        } else if (arg_type == ARG_CONST_MAP_PTR) {
                expected_type = CONST_PTR_TO_MAP;
+               if (type != expected_type)
+                       goto err_type;
        } else if (arg_type == ARG_PTR_TO_CTX) {
                expected_type = PTR_TO_CTX;
+               if (type != expected_type)
+                       goto err_type;
        } else if (arg_type == ARG_PTR_TO_STACK ||
                   arg_type == ARG_PTR_TO_RAW_STACK) {
                expected_type = PTR_TO_STACK;
@@ -967,20 +980,16 @@ static int check_func_arg(struct verifier_env *env, u32 regno,
                 * passed in as argument, it's a CONST_IMM type. Final test
                 * happens during stack boundary checking.
                 */
-               if (reg->type == CONST_IMM && reg->imm == 0)
-                       expected_type = CONST_IMM;
+               if (type == CONST_IMM && reg->imm == 0)
+                       /* final test in check_stack_boundary() */;
+               else if (type != PTR_TO_PACKET && type != expected_type)
+                       goto err_type;
                meta->raw_mode = arg_type == ARG_PTR_TO_RAW_STACK;
        } else {
                verbose("unsupported arg_type %d\n", arg_type);
                return -EFAULT;
        }
 
-       if (reg->type != expected_type) {
-               verbose("R%d type=%s expected=%s\n", regno,
-                       reg_type_str[reg->type], reg_type_str[expected_type]);
-               return -EACCES;
-       }
-
        if (arg_type == ARG_CONST_MAP_PTR) {
                /* bpf_map_xxx(map_ptr) call: remember that map_ptr */
                meta->map_ptr = reg->map_ptr;
@@ -998,8 +1007,13 @@ static int check_func_arg(struct verifier_env *env, u32 regno,
                        verbose("invalid map_ptr to access map->key\n");
                        return -EACCES;
                }
-               err = check_stack_boundary(env, regno, meta->map_ptr->key_size,
-                                          false, NULL);
+               if (type == PTR_TO_PACKET)
+                       err = check_packet_access(env, regno, 0,
+                                                 meta->map_ptr->key_size);
+               else
+                       err = check_stack_boundary(env, regno,
+                                                  meta->map_ptr->key_size,
+                                                  false, NULL);
        } else if (arg_type == ARG_PTR_TO_MAP_VALUE) {
                /* bpf_map_xxx(..., map_ptr, ..., value) call:
                 * check [value, value + map->value_size) validity
@@ -1009,9 +1023,13 @@ static int check_func_arg(struct verifier_env *env, u32 regno,
                        verbose("invalid map_ptr to access map->value\n");
                        return -EACCES;
                }
-               err = check_stack_boundary(env, regno,
-                                          meta->map_ptr->value_size,
-                                          false, NULL);
+               if (type == PTR_TO_PACKET)
+                       err = check_packet_access(env, regno, 0,
+                                                 meta->map_ptr->value_size);
+               else
+                       err = check_stack_boundary(env, regno,
+                                                  meta->map_ptr->value_size,
+                                                  false, NULL);
        } else if (arg_type == ARG_CONST_STACK_SIZE ||
                   arg_type == ARG_CONST_STACK_SIZE_OR_ZERO) {
                bool zero_size_allowed = (arg_type == ARG_CONST_STACK_SIZE_OR_ZERO);
@@ -1025,11 +1043,18 @@ static int check_func_arg(struct verifier_env *env, u32 regno,
                        verbose("ARG_CONST_STACK_SIZE cannot be first argument\n");
                        return -EACCES;
                }
-               err = check_stack_boundary(env, regno - 1, reg->imm,
-                                          zero_size_allowed, meta);
+               if (regs[regno - 1].type == PTR_TO_PACKET)
+                       err = check_packet_access(env, regno - 1, 0, reg->imm);
+               else
+                       err = check_stack_boundary(env, regno - 1, reg->imm,
+                                                  zero_size_allowed, meta);
        }
 
        return err;
+err_type:
+       verbose("R%d type=%s expected=%s\n", regno,
+               reg_type_str[type], reg_type_str[expected_type]);
+       return -EACCES;
 }
 
 static int check_map_func_compatibility(struct bpf_map *map, int func_id)
@@ -1053,7 +1078,8 @@ static int check_map_func_compatibility(struct bpf_map *map, int func_id)
                        goto error;
                break;
        case BPF_MAP_TYPE_CGROUP_ARRAY:
-               if (func_id != BPF_FUNC_skb_under_cgroup)
+               if (func_id != BPF_FUNC_skb_under_cgroup &&
+                   func_id != BPF_FUNC_current_task_under_cgroup)
                        goto error;
                break;
        default:
@@ -1075,6 +1101,7 @@ static int check_map_func_compatibility(struct bpf_map *map, int func_id)
                if (map->map_type != BPF_MAP_TYPE_STACK_TRACE)
                        goto error;
                break;
+       case BPF_FUNC_current_task_under_cgroup:
        case BPF_FUNC_skb_under_cgroup:
                if (map->map_type != BPF_MAP_TYPE_CGROUP_ARRAY)
                        goto error;
index b20438f..ad35213 100644 (file)
@@ -376,6 +376,34 @@ static const struct bpf_func_proto bpf_get_current_task_proto = {
        .ret_type       = RET_INTEGER,
 };
 
+static u64 bpf_current_task_under_cgroup(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+{
+       struct bpf_map *map = (struct bpf_map *)(long)r1;
+       struct bpf_array *array = container_of(map, struct bpf_array, map);
+       struct cgroup *cgrp;
+       u32 idx = (u32)r2;
+
+       if (unlikely(in_interrupt()))
+               return -EINVAL;
+
+       if (unlikely(idx >= array->map.max_entries))
+               return -E2BIG;
+
+       cgrp = READ_ONCE(array->ptrs[idx]);
+       if (unlikely(!cgrp))
+               return -EAGAIN;
+
+       return task_under_cgroup_hierarchy(current, cgrp);
+}
+
+static const struct bpf_func_proto bpf_current_task_under_cgroup_proto = {
+       .func           = bpf_current_task_under_cgroup,
+       .gpl_only       = false,
+       .ret_type       = RET_INTEGER,
+       .arg1_type      = ARG_CONST_MAP_PTR,
+       .arg2_type      = ARG_ANYTHING,
+};
+
 static const struct bpf_func_proto *tracing_func_proto(enum bpf_func_id func_id)
 {
        switch (func_id) {
@@ -407,6 +435,10 @@ static const struct bpf_func_proto *tracing_func_proto(enum bpf_func_id func_id)
                return &bpf_perf_event_read_proto;
        case BPF_FUNC_probe_write_user:
                return bpf_get_probe_write_proto();
+       case BPF_FUNC_current_task_under_cgroup:
+               return &bpf_current_task_under_cgroup_proto;
+       case BPF_FUNC_get_prandom_u32:
+               return &bpf_get_prandom_u32_proto;
        default:
                return NULL;
        }
index c2cdbce..7b6cd34 100644 (file)
@@ -369,6 +369,7 @@ source "net/irda/Kconfig"
 source "net/bluetooth/Kconfig"
 source "net/rxrpc/Kconfig"
 source "net/kcm/Kconfig"
+source "net/strparser/Kconfig"
 
 config FIB_RULES
        bool
index 9bd20bb..4cafaa2 100644 (file)
@@ -35,6 +35,7 @@ obj-$(CONFIG_BT)              += bluetooth/
 obj-$(CONFIG_SUNRPC)           += sunrpc/
 obj-$(CONFIG_AF_RXRPC)         += rxrpc/
 obj-$(CONFIG_AF_KCM)           += kcm/
+obj-$(CONFIG_STREAM_PARSER)    += strparser/
 obj-$(CONFIG_ATM)              += atm/
 obj-$(CONFIG_L2TP)             += l2tp/
 obj-$(CONFIG_DECNET)           += decnet/
index 81dbbf5..f2cc50d 100644 (file)
 #include <linux/errno.h>
 #include <linux/list.h>
 #include <linux/moduleparam.h>
+#include <linux/netlink.h>
 #include <linux/printk.h>
 #include <linux/seq_file.h>
+#include <linux/skbuff.h>
 #include <linux/stddef.h>
 #include <linux/string.h>
+#include <net/genetlink.h>
+#include <net/netlink.h>
+#include <uapi/linux/batman_adv.h>
 
 #include "bat_algo.h"
+#include "netlink.h"
 
 char batadv_routing_algo[20] = "BATMAN_IV";
 static struct hlist_head batadv_algo_list;
@@ -138,3 +144,65 @@ static struct kparam_string batadv_param_string_ra = {
 
 module_param_cb(routing_algo, &batadv_param_ops_ra, &batadv_param_string_ra,
                0644);
+
+/**
+ * batadv_algo_dump_entry - fill in information about one supported routing
+ *  algorithm
+ * @msg: netlink message to be sent back
+ * @portid: Port to reply to
+ * @seq: Sequence number of message
+ * @bat_algo_ops: Algorithm to be dumped
+ *
+ * Return: Error number, or 0 on success
+ */
+static int batadv_algo_dump_entry(struct sk_buff *msg, u32 portid, u32 seq,
+                                 struct batadv_algo_ops *bat_algo_ops)
+{
+       void *hdr;
+
+       hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family,
+                         NLM_F_MULTI, BATADV_CMD_GET_ROUTING_ALGOS);
+       if (!hdr)
+               return -EMSGSIZE;
+
+       if (nla_put_string(msg, BATADV_ATTR_ALGO_NAME, bat_algo_ops->name))
+               goto nla_put_failure;
+
+       genlmsg_end(msg, hdr);
+       return 0;
+
+ nla_put_failure:
+       genlmsg_cancel(msg, hdr);
+       return -EMSGSIZE;
+}
+
+/**
+ * batadv_algo_dump - fill in information about supported routing
+ *  algorithms
+ * @msg: netlink message to be sent back
+ * @cb: Parameters to the netlink request
+ *
+ * Return: Length of reply message.
+ */
+int batadv_algo_dump(struct sk_buff *msg, struct netlink_callback *cb)
+{
+       int portid = NETLINK_CB(cb->skb).portid;
+       struct batadv_algo_ops *bat_algo_ops;
+       int skip = cb->args[0];
+       int i = 0;
+
+       hlist_for_each_entry(bat_algo_ops, &batadv_algo_list, list) {
+               if (i++ < skip)
+                       continue;
+
+               if (batadv_algo_dump_entry(msg, portid, cb->nlh->nlmsg_seq,
+                                          bat_algo_ops)) {
+                       i--;
+                       break;
+               }
+       }
+
+       cb->args[0] = i;
+
+       return msg->len;
+}
index 860d773..3b5b69c 100644 (file)
@@ -22,7 +22,9 @@
 
 #include <linux/types.h>
 
+struct netlink_callback;
 struct seq_file;
+struct sk_buff;
 
 extern char batadv_routing_algo[];
 extern struct list_head batadv_hardif_list;
@@ -31,5 +33,6 @@ void batadv_algo_init(void);
 int batadv_algo_register(struct batadv_algo_ops *bat_algo_ops);
 int batadv_algo_select(struct batadv_priv *bat_priv, char *name);
 int batadv_algo_seq_print_text(struct seq_file *seq, void *offset);
+int batadv_algo_dump(struct sk_buff *msg, struct netlink_callback *cb);
 
 #endif /* _NET_BATMAN_ADV_BAT_ALGO_H_ */
index 19b0abd..9ed4f1f 100644 (file)
@@ -35,6 +35,7 @@
 #include <linux/list.h>
 #include <linux/lockdep.h>
 #include <linux/netdevice.h>
+#include <linux/netlink.h>
 #include <linux/pkt_sched.h>
 #include <linux/printk.h>
 #include <linux/random.h>
 #include <linux/string.h>
 #include <linux/types.h>
 #include <linux/workqueue.h>
+#include <net/genetlink.h>
+#include <net/netlink.h>
+#include <uapi/linux/batman_adv.h>
 
 #include "bat_algo.h"
 #include "bitarray.h"
+#include "gateway_client.h"
 #include "hard-interface.h"
 #include "hash.h"
 #include "log.h"
+#include "netlink.h"
 #include "network-coding.h"
 #include "originator.h"
 #include "packet.h"
@@ -528,36 +534,25 @@ static void batadv_iv_ogm_send_to_if(struct batadv_forw_packet *forw_packet,
 static void batadv_iv_ogm_emit(struct batadv_forw_packet *forw_packet)
 {
        struct net_device *soft_iface;
-       struct batadv_priv *bat_priv;
-       struct batadv_hard_iface *primary_if = NULL;
 
        if (!forw_packet->if_incoming) {
                pr_err("Error - can't forward packet: incoming iface not specified\n");
-               goto out;
+               return;
        }
 
        soft_iface = forw_packet->if_incoming->soft_iface;
-       bat_priv = netdev_priv(soft_iface);
 
        if (WARN_ON(!forw_packet->if_outgoing))
-               goto out;
+               return;
 
        if (WARN_ON(forw_packet->if_outgoing->soft_iface != soft_iface))
-               goto out;
+               return;
 
        if (forw_packet->if_incoming->if_status != BATADV_IF_ACTIVE)
-               goto out;
-
-       primary_if = batadv_primary_if_get_selected(bat_priv);
-       if (!primary_if)
-               goto out;
+               return;
 
        /* only for one specific outgoing interface */
        batadv_iv_ogm_send_to_if(forw_packet, forw_packet->if_outgoing);
-
-out:
-       if (primary_if)
-               batadv_hardif_put(primary_if);
 }
 
 /**
@@ -685,19 +680,12 @@ static void batadv_iv_ogm_aggregate_new(const unsigned char *packet_buff,
        struct batadv_forw_packet *forw_packet_aggr;
        unsigned char *skb_buff;
        unsigned int skb_size;
+       atomic_t *queue_left = own_packet ? NULL : &bat_priv->batman_queue_left;
 
-       /* own packet should always be scheduled */
-       if (!own_packet) {
-               if (!batadv_atomic_dec_not_zero(&bat_priv->batman_queue_left)) {
-                       batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
-                                  "batman packet queue full\n");
-                       return;
-               }
-       }
-
-       forw_packet_aggr = kmalloc(sizeof(*forw_packet_aggr), GFP_ATOMIC);
+       forw_packet_aggr = batadv_forw_packet_alloc(if_incoming, if_outgoing,
+                                                   queue_left, bat_priv);
        if (!forw_packet_aggr)
-               goto out_nomem;
+               return;
 
        if (atomic_read(&bat_priv->aggregated_ogms) &&
            packet_len < BATADV_MAX_AGGREGATION_BYTES)
@@ -708,8 +696,11 @@ static void batadv_iv_ogm_aggregate_new(const unsigned char *packet_buff,
        skb_size += ETH_HLEN;
 
        forw_packet_aggr->skb = netdev_alloc_skb_ip_align(NULL, skb_size);
-       if (!forw_packet_aggr->skb)
-               goto out_free_forw_packet;
+       if (!forw_packet_aggr->skb) {
+               batadv_forw_packet_free(forw_packet_aggr);
+               return;
+       }
+
        forw_packet_aggr->skb->priority = TC_PRIO_CONTROL;
        skb_reserve(forw_packet_aggr->skb, ETH_HLEN);
 
@@ -717,12 +708,7 @@ static void batadv_iv_ogm_aggregate_new(const unsigned char *packet_buff,
        forw_packet_aggr->packet_len = packet_len;
        memcpy(skb_buff, packet_buff, packet_len);
 
-       kref_get(&if_incoming->refcount);
-       kref_get(&if_outgoing->refcount);
        forw_packet_aggr->own = own_packet;
-       forw_packet_aggr->if_incoming = if_incoming;
-       forw_packet_aggr->if_outgoing = if_outgoing;
-       forw_packet_aggr->num_packets = 0;
        forw_packet_aggr->direct_link_flags = BATADV_NO_FLAGS;
        forw_packet_aggr->send_time = send_time;
 
@@ -741,13 +727,6 @@ static void batadv_iv_ogm_aggregate_new(const unsigned char *packet_buff,
        queue_delayed_work(batadv_event_workqueue,
                           &forw_packet_aggr->delayed_work,
                           send_time - jiffies);
-
-       return;
-out_free_forw_packet:
-       kfree(forw_packet_aggr);
-out_nomem:
-       if (!own_packet)
-               atomic_inc(&bat_priv->batman_queue_left);
 }
 
 /* aggregate a new packet into the existing ogm packet */
@@ -1830,10 +1809,6 @@ static void batadv_iv_send_outstanding_bat_ogm_packet(struct work_struct *work)
                batadv_iv_ogm_schedule(forw_packet->if_incoming);
 
 out:
-       /* don't count own packet */
-       if (!forw_packet->own)
-               atomic_inc(&bat_priv->batman_queue_left);
-
        batadv_forw_packet_free(forw_packet);
 }
 
@@ -1977,6 +1952,235 @@ next:
                seq_puts(seq, "No batman nodes in range ...\n");
 }
 
+/**
+ * batadv_iv_ogm_neigh_get_tq_avg - Get the TQ average for a neighbour on a
+ *  given outgoing interface.
+ * @neigh_node: Neighbour of interest
+ * @if_outgoing: Outgoing interface of interest
+ * @tq_avg: Pointer of where to store the TQ average
+ *
+ * Return: False if no average TQ available, otherwise true.
+ */
+static bool
+batadv_iv_ogm_neigh_get_tq_avg(struct batadv_neigh_node *neigh_node,
+                              struct batadv_hard_iface *if_outgoing,
+                              u8 *tq_avg)
+{
+       struct batadv_neigh_ifinfo *n_ifinfo;
+
+       n_ifinfo = batadv_neigh_ifinfo_get(neigh_node, if_outgoing);
+       if (!n_ifinfo)
+               return false;
+
+       *tq_avg = n_ifinfo->bat_iv.tq_avg;
+       batadv_neigh_ifinfo_put(n_ifinfo);
+
+       return true;
+}
+
+/**
+ * batadv_iv_ogm_orig_dump_subentry - Dump an originator subentry into a
+ *  message
+ * @msg: Netlink message to dump into
+ * @portid: Port making netlink request
+ * @seq: Sequence number of netlink message
+ * @bat_priv: The bat priv with all the soft interface information
+ * @if_outgoing: Limit dump to entries with this outgoing interface
+ * @orig_node: Originator to dump
+ * @neigh_node: Single hops neighbour
+ * @best: Is the best originator
+ *
+ * Return: Error code, or 0 on success
+ */
+static int
+batadv_iv_ogm_orig_dump_subentry(struct sk_buff *msg, u32 portid, u32 seq,
+                                struct batadv_priv *bat_priv,
+                                struct batadv_hard_iface *if_outgoing,
+                                struct batadv_orig_node *orig_node,
+                                struct batadv_neigh_node *neigh_node,
+                                bool best)
+{
+       void *hdr;
+       u8 tq_avg;
+       unsigned int last_seen_msecs;
+
+       last_seen_msecs = jiffies_to_msecs(jiffies - orig_node->last_seen);
+
+       if (!batadv_iv_ogm_neigh_get_tq_avg(neigh_node, if_outgoing, &tq_avg))
+               return 0;
+
+       if (if_outgoing != BATADV_IF_DEFAULT &&
+           if_outgoing != neigh_node->if_incoming)
+               return 0;
+
+       hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family,
+                         NLM_F_MULTI, BATADV_CMD_GET_ORIGINATORS);
+       if (!hdr)
+               return -ENOBUFS;
+
+       if (nla_put(msg, BATADV_ATTR_ORIG_ADDRESS, ETH_ALEN,
+                   orig_node->orig) ||
+           nla_put(msg, BATADV_ATTR_NEIGH_ADDRESS, ETH_ALEN,
+                   neigh_node->addr) ||
+           nla_put_u32(msg, BATADV_ATTR_HARD_IFINDEX,
+                       neigh_node->if_incoming->net_dev->ifindex) ||
+           nla_put_u8(msg, BATADV_ATTR_TQ, tq_avg) ||
+           nla_put_u32(msg, BATADV_ATTR_LAST_SEEN_MSECS,
+                       last_seen_msecs))
+               goto nla_put_failure;
+
+       if (best && nla_put_flag(msg, BATADV_ATTR_FLAG_BEST))
+               goto nla_put_failure;
+
+       genlmsg_end(msg, hdr);
+       return 0;
+
+ nla_put_failure:
+       genlmsg_cancel(msg, hdr);
+       return -EMSGSIZE;
+}
+
+/**
+ * batadv_iv_ogm_orig_dump_entry - Dump an originator entry into a message
+ * @msg: Netlink message to dump into
+ * @portid: Port making netlink request
+ * @seq: Sequence number of netlink message
+ * @bat_priv: The bat priv with all the soft interface information
+ * @if_outgoing: Limit dump to entries with this outgoing interface
+ * @orig_node: Originator to dump
+ * @sub_s: Number of sub entries to skip
+ *
+ * This function assumes the caller holds rcu_read_lock().
+ *
+ * Return: Error code, or 0 on success
+ */
+static int
+batadv_iv_ogm_orig_dump_entry(struct sk_buff *msg, u32 portid, u32 seq,
+                             struct batadv_priv *bat_priv,
+                             struct batadv_hard_iface *if_outgoing,
+                             struct batadv_orig_node *orig_node, int *sub_s)
+{
+       struct batadv_neigh_node *neigh_node_best;
+       struct batadv_neigh_node *neigh_node;
+       int sub = 0;
+       bool best;
+       u8 tq_avg_best;
+
+       neigh_node_best = batadv_orig_router_get(orig_node, if_outgoing);
+       if (!neigh_node_best)
+               goto out;
+
+       if (!batadv_iv_ogm_neigh_get_tq_avg(neigh_node_best, if_outgoing,
+                                           &tq_avg_best))
+               goto out;
+
+       if (tq_avg_best == 0)
+               goto out;
+
+       hlist_for_each_entry_rcu(neigh_node, &orig_node->neigh_list, list) {
+               if (sub++ < *sub_s)
+                       continue;
+
+               best = (neigh_node == neigh_node_best);
+
+               if (batadv_iv_ogm_orig_dump_subentry(msg, portid, seq,
+                                                    bat_priv, if_outgoing,
+                                                    orig_node, neigh_node,
+                                                    best)) {
+                       batadv_neigh_node_put(neigh_node_best);
+
+                       *sub_s = sub - 1;
+                       return -EMSGSIZE;
+               }
+       }
+
+ out:
+       if (neigh_node_best)
+               batadv_neigh_node_put(neigh_node_best);
+
+       *sub_s = 0;
+       return 0;
+}
+
+/**
+ * batadv_iv_ogm_orig_dump_bucket - Dump an originator bucket into a
+ *  message
+ * @msg: Netlink message to dump into
+ * @portid: Port making netlink request
+ * @seq: Sequence number of netlink message
+ * @bat_priv: The bat priv with all the soft interface information
+ * @if_outgoing: Limit dump to entries with this outgoing interface
+ * @head: Bucket to be dumped
+ * @idx_s: Number of entries to be skipped
+ * @sub: Number of sub entries to be skipped
+ *
+ * Return: Error code, or 0 on success
+ */
+static int
+batadv_iv_ogm_orig_dump_bucket(struct sk_buff *msg, u32 portid, u32 seq,
+                              struct batadv_priv *bat_priv,
+                              struct batadv_hard_iface *if_outgoing,
+                              struct hlist_head *head, int *idx_s, int *sub)
+{
+       struct batadv_orig_node *orig_node;
+       int idx = 0;
+
+       rcu_read_lock();
+       hlist_for_each_entry_rcu(orig_node, head, hash_entry) {
+               if (idx++ < *idx_s)
+                       continue;
+
+               if (batadv_iv_ogm_orig_dump_entry(msg, portid, seq, bat_priv,
+                                                 if_outgoing, orig_node,
+                                                 sub)) {
+                       rcu_read_unlock();
+                       *idx_s = idx - 1;
+                       return -EMSGSIZE;
+               }
+       }
+       rcu_read_unlock();
+
+       *idx_s = 0;
+       *sub = 0;
+       return 0;
+}
+
+/**
+ * batadv_iv_ogm_orig_dump - Dump the originators into a message
+ * @msg: Netlink message to dump into
+ * @cb: Control block containing additional options
+ * @bat_priv: The bat priv with all the soft interface information
+ * @if_outgoing: Limit dump to entries with this outgoing interface
+ */
+static void
+batadv_iv_ogm_orig_dump(struct sk_buff *msg, struct netlink_callback *cb,
+                       struct batadv_priv *bat_priv,
+                       struct batadv_hard_iface *if_outgoing)
+{
+       struct batadv_hashtable *hash = bat_priv->orig_hash;
+       struct hlist_head *head;
+       int bucket = cb->args[0];
+       int idx = cb->args[1];
+       int sub = cb->args[2];
+       int portid = NETLINK_CB(cb->skb).portid;
+
+       while (bucket < hash->size) {
+               head = &hash->table[bucket];
+
+               if (batadv_iv_ogm_orig_dump_bucket(msg, portid,
+                                                  cb->nlh->nlmsg_seq,
+                                                  bat_priv, if_outgoing, head,
+                                                  &idx, &sub))
+                       break;
+
+               bucket++;
+       }
+
+       cb->args[0] = bucket;
+       cb->args[1] = idx;
+       cb->args[2] = sub;
+}
+
 /**
  * batadv_iv_hardif_neigh_print - print a single hop neighbour node
  * @seq: neighbour table seq_file struct
@@ -2029,35 +2233,40 @@ static void batadv_iv_neigh_print(struct batadv_priv *bat_priv,
 }
 
 /**
- * batadv_iv_ogm_neigh_cmp - compare the metrics of two neighbors
+ * batadv_iv_ogm_neigh_diff - calculate tq difference of two neighbors
  * @neigh1: the first neighbor object of the comparison
  * @if_outgoing1: outgoing interface for the first neighbor
  * @neigh2: the second neighbor object of the comparison
  * @if_outgoing2: outgoing interface for the second neighbor
+ * @diff: pointer to integer receiving the calculated difference
  *
- * Return: a value less, equal to or greater than 0 if the metric via neigh1 is
- * lower, the same as or higher than the metric via neigh2
+ * The content of *@diff is only valid when this function returns true.
+ * It is less, equal to or greater than 0 if the metric via neigh1 is lower,
+ * the same as or higher than the metric via neigh2
+ *
+ * Return: true when the difference could be calculated, false otherwise
  */
-static int batadv_iv_ogm_neigh_cmp(struct batadv_neigh_node *neigh1,
-                                  struct batadv_hard_iface *if_outgoing1,
-                                  struct batadv_neigh_node *neigh2,
-                                  struct batadv_hard_iface *if_outgoing2)
+static bool batadv_iv_ogm_neigh_diff(struct batadv_neigh_node *neigh1,
+                                    struct batadv_hard_iface *if_outgoing1,
+                                    struct batadv_neigh_node *neigh2,
+                                    struct batadv_hard_iface *if_outgoing2,
+                                    int *diff)
 {
        struct batadv_neigh_ifinfo *neigh1_ifinfo, *neigh2_ifinfo;
        u8 tq1, tq2;
-       int diff;
+       bool ret = true;
 
        neigh1_ifinfo = batadv_neigh_ifinfo_get(neigh1, if_outgoing1);
        neigh2_ifinfo = batadv_neigh_ifinfo_get(neigh2, if_outgoing2);
 
        if (!neigh1_ifinfo || !neigh2_ifinfo) {
-               diff = 0;
+               ret = false;
                goto out;
        }
 
        tq1 = neigh1_ifinfo->bat_iv.tq_avg;
        tq2 = neigh2_ifinfo->bat_iv.tq_avg;
-       diff = tq1 - tq2;
+       *diff = (int)tq1 - (int)tq2;
 
 out:
        if (neigh1_ifinfo)
@@ -2065,6 +2274,162 @@ out:
        if (neigh2_ifinfo)
                batadv_neigh_ifinfo_put(neigh2_ifinfo);
 
+       return ret;
+}
+
+/**
+ * batadv_iv_ogm_neigh_dump_neigh - Dump a neighbour into a netlink message
+ * @msg: Netlink message to dump into
+ * @portid: Port making netlink request
+ * @seq: Sequence number of netlink message
+ * @hardif_neigh: Neighbour to be dumped
+ *
+ * Return: Error code, or 0 on success
+ */
+static int
+batadv_iv_ogm_neigh_dump_neigh(struct sk_buff *msg, u32 portid, u32 seq,
+                              struct batadv_hardif_neigh_node *hardif_neigh)
+{
+       void *hdr;
+       unsigned int last_seen_msecs;
+
+       last_seen_msecs = jiffies_to_msecs(jiffies - hardif_neigh->last_seen);
+
+       hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family,
+                         NLM_F_MULTI, BATADV_CMD_GET_NEIGHBORS);
+       if (!hdr)
+               return -ENOBUFS;
+
+       if (nla_put(msg, BATADV_ATTR_NEIGH_ADDRESS, ETH_ALEN,
+                   hardif_neigh->addr) ||
+           nla_put_u32(msg, BATADV_ATTR_HARD_IFINDEX,
+                       hardif_neigh->if_incoming->net_dev->ifindex) ||
+           nla_put_u32(msg, BATADV_ATTR_LAST_SEEN_MSECS,
+                       last_seen_msecs))
+               goto nla_put_failure;
+
+       genlmsg_end(msg, hdr);
+       return 0;
+
+ nla_put_failure:
+       genlmsg_cancel(msg, hdr);
+       return -EMSGSIZE;
+}
+
+/**
+ * batadv_iv_ogm_neigh_dump_hardif - Dump the neighbours of a hard interface
+ *  into a message
+ * @msg: Netlink message to dump into
+ * @portid: Port making netlink request
+ * @seq: Sequence number of netlink message
+ * @bat_priv: The bat priv with all the soft interface information
+ * @hard_iface: Hard interface to dump the neighbours for
+ * @idx_s: Number of entries to skip
+ *
+ * This function assumes the caller holds rcu_read_lock().
+ *
+ * Return: Error code, or 0 on success
+ */
+static int
+batadv_iv_ogm_neigh_dump_hardif(struct sk_buff *msg, u32 portid, u32 seq,
+                               struct batadv_priv *bat_priv,
+                               struct batadv_hard_iface *hard_iface,
+                               int *idx_s)
+{
+       struct batadv_hardif_neigh_node *hardif_neigh;
+       int idx = 0;
+
+       hlist_for_each_entry_rcu(hardif_neigh,
+                                &hard_iface->neigh_list, list) {
+               if (idx++ < *idx_s)
+                       continue;
+
+               if (batadv_iv_ogm_neigh_dump_neigh(msg, portid, seq,
+                                                  hardif_neigh)) {
+                       *idx_s = idx - 1;
+                       return -EMSGSIZE;
+               }
+       }
+
+       *idx_s = 0;
+       return 0;
+}
+
+/**
+ * batadv_iv_ogm_neigh_dump - Dump the neighbours into a message
+ * @msg: Netlink message to dump into
+ * @cb: Control block containing additional options
+ * @bat_priv: The bat priv with all the soft interface information
+ * @single_hardif: Limit dump to this hard interfaace
+ */
+static void
+batadv_iv_ogm_neigh_dump(struct sk_buff *msg, struct netlink_callback *cb,
+                        struct batadv_priv *bat_priv,
+                        struct batadv_hard_iface *single_hardif)
+{
+       struct batadv_hard_iface *hard_iface;
+       int i_hardif = 0;
+       int i_hardif_s = cb->args[0];
+       int idx = cb->args[1];
+       int portid = NETLINK_CB(cb->skb).portid;
+
+       rcu_read_lock();
+       if (single_hardif) {
+               if (i_hardif_s == 0) {
+                       if (batadv_iv_ogm_neigh_dump_hardif(msg, portid,
+                                                           cb->nlh->nlmsg_seq,
+                                                           bat_priv,
+                                                           single_hardif,
+                                                           &idx) == 0)
+                               i_hardif++;
+               }
+       } else {
+               list_for_each_entry_rcu(hard_iface, &batadv_hardif_list,
+                                       list) {
+                       if (hard_iface->soft_iface != bat_priv->soft_iface)
+                               continue;
+
+                       if (i_hardif++ < i_hardif_s)
+                               continue;
+
+                       if (batadv_iv_ogm_neigh_dump_hardif(msg, portid,
+                                                           cb->nlh->nlmsg_seq,
+                                                           bat_priv,
+                                                           hard_iface, &idx)) {
+                               i_hardif--;
+                               break;
+                       }
+               }
+       }
+       rcu_read_unlock();
+
+       cb->args[0] = i_hardif;
+       cb->args[1] = idx;
+}
+
+/**
+ * batadv_iv_ogm_neigh_cmp - compare the metrics of two neighbors
+ * @neigh1: the first neighbor object of the comparison
+ * @if_outgoing1: outgoing interface for the first neighbor
+ * @neigh2: the second neighbor object of the comparison
+ * @if_outgoing2: outgoing interface for the second neighbor
+ *
+ * Return: a value less, equal to or greater than 0 if the metric via neigh1 is
+ * lower, the same as or higher than the metric via neigh2
+ */
+static int batadv_iv_ogm_neigh_cmp(struct batadv_neigh_node *neigh1,
+                                  struct batadv_hard_iface *if_outgoing1,
+                                  struct batadv_neigh_node *neigh2,
+                                  struct batadv_hard_iface *if_outgoing2)
+{
+       bool ret;
+       int diff;
+
+       ret = batadv_iv_ogm_neigh_diff(neigh1, if_outgoing1, neigh2,
+                                      if_outgoing2, &diff);
+       if (!ret)
+               return 0;
+
        return diff;
 }
 
@@ -2085,36 +2450,339 @@ batadv_iv_ogm_neigh_is_sob(struct batadv_neigh_node *neigh1,
                           struct batadv_neigh_node *neigh2,
                           struct batadv_hard_iface *if_outgoing2)
 {
-       struct batadv_neigh_ifinfo *neigh1_ifinfo, *neigh2_ifinfo;
-       u8 tq1, tq2;
        bool ret;
+       int diff;
 
-       neigh1_ifinfo = batadv_neigh_ifinfo_get(neigh1, if_outgoing1);
-       neigh2_ifinfo = batadv_neigh_ifinfo_get(neigh2, if_outgoing2);
+       ret = batadv_iv_ogm_neigh_diff(neigh1, if_outgoing1, neigh2,
+                                      if_outgoing2, &diff);
+       if (!ret)
+               return false;
 
-       /* we can't say that the metric is better */
-       if (!neigh1_ifinfo || !neigh2_ifinfo) {
-               ret = false;
+       ret = diff > -BATADV_TQ_SIMILARITY_THRESHOLD;
+       return ret;
+}
+
+static void batadv_iv_iface_activate(struct batadv_hard_iface *hard_iface)
+{
+       /* begin scheduling originator messages on that interface */
+       batadv_iv_ogm_schedule(hard_iface);
+}
+
+static struct batadv_gw_node *
+batadv_iv_gw_get_best_gw_node(struct batadv_priv *bat_priv)
+{
+       struct batadv_neigh_node *router;
+       struct batadv_neigh_ifinfo *router_ifinfo;
+       struct batadv_gw_node *gw_node, *curr_gw = NULL;
+       u64 max_gw_factor = 0;
+       u64 tmp_gw_factor = 0;
+       u8 max_tq = 0;
+       u8 tq_avg;
+       struct batadv_orig_node *orig_node;
+
+       rcu_read_lock();
+       hlist_for_each_entry_rcu(gw_node, &bat_priv->gw.list, list) {
+               orig_node = gw_node->orig_node;
+               router = batadv_orig_router_get(orig_node, BATADV_IF_DEFAULT);
+               if (!router)
+                       continue;
+
+               router_ifinfo = batadv_neigh_ifinfo_get(router,
+                                                       BATADV_IF_DEFAULT);
+               if (!router_ifinfo)
+                       goto next;
+
+               if (!kref_get_unless_zero(&gw_node->refcount))
+                       goto next;
+
+               tq_avg = router_ifinfo->bat_iv.tq_avg;
+
+               switch (atomic_read(&bat_priv->gw.sel_class)) {
+               case 1: /* fast connection */
+                       tmp_gw_factor = tq_avg * tq_avg;
+                       tmp_gw_factor *= gw_node->bandwidth_down;
+                       tmp_gw_factor *= 100 * 100;
+                       tmp_gw_factor >>= 18;
+
+                       if ((tmp_gw_factor > max_gw_factor) ||
+                           ((tmp_gw_factor == max_gw_factor) &&
+                            (tq_avg > max_tq))) {
+                               if (curr_gw)
+                                       batadv_gw_node_put(curr_gw);
+                               curr_gw = gw_node;
+                               kref_get(&curr_gw->refcount);
+                       }
+                       break;
+
+               default: /* 2:  stable connection (use best statistic)
+                         * 3:  fast-switch (use best statistic but change as
+                         *     soon as a better gateway appears)
+                         * XX: late-switch (use best statistic but change as
+                         *     soon as a better gateway appears which has
+                         *     $routing_class more tq points)
+                         */
+                       if (tq_avg > max_tq) {
+                               if (curr_gw)
+                                       batadv_gw_node_put(curr_gw);
+                               curr_gw = gw_node;
+                               kref_get(&curr_gw->refcount);
+                       }
+                       break;
+               }
+
+               if (tq_avg > max_tq)
+                       max_tq = tq_avg;
+
+               if (tmp_gw_factor > max_gw_factor)
+                       max_gw_factor = tmp_gw_factor;
+
+               batadv_gw_node_put(gw_node);
+
+next:
+               batadv_neigh_node_put(router);
+               if (router_ifinfo)
+                       batadv_neigh_ifinfo_put(router_ifinfo);
+       }
+       rcu_read_unlock();
+
+       return curr_gw;
+}
+
+static bool batadv_iv_gw_is_eligible(struct batadv_priv *bat_priv,
+                                    struct batadv_orig_node *curr_gw_orig,
+                                    struct batadv_orig_node *orig_node)
+{
+       struct batadv_neigh_ifinfo *router_orig_ifinfo = NULL;
+       struct batadv_neigh_ifinfo *router_gw_ifinfo = NULL;
+       struct batadv_neigh_node *router_gw = NULL;
+       struct batadv_neigh_node *router_orig = NULL;
+       u8 gw_tq_avg, orig_tq_avg;
+       bool ret = false;
+
+       /* dynamic re-election is performed only on fast or late switch */
+       if (atomic_read(&bat_priv->gw.sel_class) <= 2)
+               return false;
+
+       router_gw = batadv_orig_router_get(curr_gw_orig, BATADV_IF_DEFAULT);
+       if (!router_gw) {
+               ret = true;
                goto out;
        }
 
-       tq1 = neigh1_ifinfo->bat_iv.tq_avg;
-       tq2 = neigh2_ifinfo->bat_iv.tq_avg;
-       ret = (tq1 - tq2) > -BATADV_TQ_SIMILARITY_THRESHOLD;
+       router_gw_ifinfo = batadv_neigh_ifinfo_get(router_gw,
+                                                  BATADV_IF_DEFAULT);
+       if (!router_gw_ifinfo) {
+               ret = true;
+               goto out;
+       }
+
+       router_orig = batadv_orig_router_get(orig_node, BATADV_IF_DEFAULT);
+       if (!router_orig)
+               goto out;
+
+       router_orig_ifinfo = batadv_neigh_ifinfo_get(router_orig,
+                                                    BATADV_IF_DEFAULT);
+       if (!router_orig_ifinfo)
+               goto out;
+
+       gw_tq_avg = router_gw_ifinfo->bat_iv.tq_avg;
+       orig_tq_avg = router_orig_ifinfo->bat_iv.tq_avg;
+
+       /* the TQ value has to be better */
+       if (orig_tq_avg < gw_tq_avg)
+               goto out;
+
+       /* if the routing class is greater than 3 the value tells us how much
+        * greater the TQ value of the new gateway must be
+        */
+       if ((atomic_read(&bat_priv->gw.sel_class) > 3) &&
+           (orig_tq_avg - gw_tq_avg < atomic_read(&bat_priv->gw.sel_class)))
+               goto out;
+
+       batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
+                  "Restarting gateway selection: better gateway found (tq curr: %i, tq new: %i)\n",
+                  gw_tq_avg, orig_tq_avg);
 
+       ret = true;
 out:
-       if (neigh1_ifinfo)
-               batadv_neigh_ifinfo_put(neigh1_ifinfo);
-       if (neigh2_ifinfo)
-               batadv_neigh_ifinfo_put(neigh2_ifinfo);
+       if (router_gw_ifinfo)
+               batadv_neigh_ifinfo_put(router_gw_ifinfo);
+       if (router_orig_ifinfo)
+               batadv_neigh_ifinfo_put(router_orig_ifinfo);
+       if (router_gw)
+               batadv_neigh_node_put(router_gw);
+       if (router_orig)
+               batadv_neigh_node_put(router_orig);
 
        return ret;
 }
 
-static void batadv_iv_iface_activate(struct batadv_hard_iface *hard_iface)
+/* fails if orig_node has no router */
+static int batadv_iv_gw_write_buffer_text(struct batadv_priv *bat_priv,
+                                         struct seq_file *seq,
+                                         const struct batadv_gw_node *gw_node)
 {
-       /* begin scheduling originator messages on that interface */
-       batadv_iv_ogm_schedule(hard_iface);
+       struct batadv_gw_node *curr_gw;
+       struct batadv_neigh_node *router;
+       struct batadv_neigh_ifinfo *router_ifinfo = NULL;
+       int ret = -1;
+
+       router = batadv_orig_router_get(gw_node->orig_node, BATADV_IF_DEFAULT);
+       if (!router)
+               goto out;
+
+       router_ifinfo = batadv_neigh_ifinfo_get(router, BATADV_IF_DEFAULT);
+       if (!router_ifinfo)
+               goto out;
+
+       curr_gw = batadv_gw_get_selected_gw_node(bat_priv);
+
+       seq_printf(seq, "%s %pM (%3i) %pM [%10s]: %u.%u/%u.%u MBit\n",
+                  (curr_gw == gw_node ? "=>" : "  "),
+                  gw_node->orig_node->orig,
+                  router_ifinfo->bat_iv.tq_avg, router->addr,
+                  router->if_incoming->net_dev->name,
+                  gw_node->bandwidth_down / 10,
+                  gw_node->bandwidth_down % 10,
+                  gw_node->bandwidth_up / 10,
+                  gw_node->bandwidth_up % 10);
+       ret = seq_has_overflowed(seq) ? -1 : 0;
+
+       if (curr_gw)
+               batadv_gw_node_put(curr_gw);
+out:
+       if (router_ifinfo)
+               batadv_neigh_ifinfo_put(router_ifinfo);
+       if (router)
+               batadv_neigh_node_put(router);
+       return ret;
+}
+
+static void batadv_iv_gw_print(struct batadv_priv *bat_priv,
+                              struct seq_file *seq)
+{
+       struct batadv_gw_node *gw_node;
+       int gw_count = 0;
+
+       seq_puts(seq,
+                "      Gateway      (#/255)           Nexthop [outgoingIF]: advertised uplink bandwidth\n");
+
+       rcu_read_lock();
+       hlist_for_each_entry_rcu(gw_node, &bat_priv->gw.list, list) {
+               /* fails if orig_node has no router */
+               if (batadv_iv_gw_write_buffer_text(bat_priv, seq, gw_node) < 0)
+                       continue;
+
+               gw_count++;
+       }
+       rcu_read_unlock();
+
+       if (gw_count == 0)
+               seq_puts(seq, "No gateways in range ...\n");
+}
+
+/**
+ * batadv_iv_gw_dump_entry - Dump a gateway into a message
+ * @msg: Netlink message to dump into
+ * @portid: Port making netlink request
+ * @seq: Sequence number of netlink message
+ * @bat_priv: The bat priv with all the soft interface information
+ * @gw_node: Gateway to be dumped
+ *
+ * Return: Error code, or 0 on success
+ */
+static int batadv_iv_gw_dump_entry(struct sk_buff *msg, u32 portid, u32 seq,
+                                  struct batadv_priv *bat_priv,
+                                  struct batadv_gw_node *gw_node)
+{
+       struct batadv_neigh_ifinfo *router_ifinfo = NULL;
+       struct batadv_neigh_node *router;
+       struct batadv_gw_node *curr_gw;
+       int ret = -EINVAL;
+       void *hdr;
+
+       router = batadv_orig_router_get(gw_node->orig_node, BATADV_IF_DEFAULT);
+       if (!router)
+               goto out;
+
+       router_ifinfo = batadv_neigh_ifinfo_get(router, BATADV_IF_DEFAULT);
+       if (!router_ifinfo)
+               goto out;
+
+       curr_gw = batadv_gw_get_selected_gw_node(bat_priv);
+
+       hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family,
+                         NLM_F_MULTI, BATADV_CMD_GET_GATEWAYS);
+       if (!hdr) {
+               ret = -ENOBUFS;
+               goto out;
+       }
+
+       ret = -EMSGSIZE;
+
+       if (curr_gw == gw_node)
+               if (nla_put_flag(msg, BATADV_ATTR_FLAG_BEST)) {
+                       genlmsg_cancel(msg, hdr);
+                       goto out;
+               }
+
+       if (nla_put(msg, BATADV_ATTR_ORIG_ADDRESS, ETH_ALEN,
+                   gw_node->orig_node->orig) ||
+           nla_put_u8(msg, BATADV_ATTR_TQ, router_ifinfo->bat_iv.tq_avg) ||
+           nla_put(msg, BATADV_ATTR_ROUTER, ETH_ALEN,
+                   router->addr) ||
+           nla_put_string(msg, BATADV_ATTR_HARD_IFNAME,
+                          router->if_incoming->net_dev->name) ||
+           nla_put_u32(msg, BATADV_ATTR_BANDWIDTH_DOWN,
+                       gw_node->bandwidth_down) ||
+           nla_put_u32(msg, BATADV_ATTR_BANDWIDTH_UP,
+                       gw_node->bandwidth_up)) {
+               genlmsg_cancel(msg, hdr);
+               goto out;
+       }
+
+       genlmsg_end(msg, hdr);
+       ret = 0;
+
+out:
+       if (router_ifinfo)
+               batadv_neigh_ifinfo_put(router_ifinfo);
+       if (router)
+               batadv_neigh_node_put(router);
+       return ret;
+}
+
+/**
+ * batadv_iv_gw_dump - Dump gateways into a message
+ * @msg: Netlink message to dump into
+ * @cb: Control block containing additional options
+ * @bat_priv: The bat priv with all the soft interface information
+ */
+static void batadv_iv_gw_dump(struct sk_buff *msg, struct netlink_callback *cb,
+                             struct batadv_priv *bat_priv)
+{
+       int portid = NETLINK_CB(cb->skb).portid;
+       struct batadv_gw_node *gw_node;
+       int idx_skip = cb->args[0];
+       int idx = 0;
+
+       rcu_read_lock();
+       hlist_for_each_entry_rcu(gw_node, &bat_priv->gw.list, list) {
+               if (idx++ < idx_skip)
+                       continue;
+
+               if (batadv_iv_gw_dump_entry(msg, portid, cb->nlh->nlmsg_seq,
+                                           bat_priv, gw_node)) {
+                       idx_skip = idx - 1;
+                       goto unlock;
+               }
+       }
+
+       idx_skip = idx;
+unlock:
+       rcu_read_unlock();
+
+       cb->args[0] = idx_skip;
 }
 
 static struct batadv_algo_ops batadv_batman_iv __read_mostly = {
@@ -2130,13 +2798,21 @@ static struct batadv_algo_ops batadv_batman_iv __read_mostly = {
                .cmp = batadv_iv_ogm_neigh_cmp,
                .is_similar_or_better = batadv_iv_ogm_neigh_is_sob,
                .print = batadv_iv_neigh_print,
+               .dump = batadv_iv_ogm_neigh_dump,
        },
        .orig = {
                .print = batadv_iv_ogm_orig_print,
+               .dump = batadv_iv_ogm_orig_dump,
                .free = batadv_iv_ogm_orig_free,
                .add_if = batadv_iv_ogm_orig_add_if,
                .del_if = batadv_iv_ogm_orig_del_if,
        },
+       .gw = {
+               .get_best_gw_node = batadv_iv_gw_get_best_gw_node,
+               .is_eligible = batadv_iv_gw_is_eligible,
+               .print = batadv_iv_gw_print,
+               .dump = batadv_iv_gw_dump,
+       },
 };
 
 int __init batadv_iv_init(void)
index 0366cbf..9e872dc 100644 (file)
 #include <linux/atomic.h>
 #include <linux/bug.h>
 #include <linux/cache.h>
+#include <linux/errno.h>
+#include <linux/if_ether.h>
 #include <linux/init.h>
 #include <linux/jiffies.h>
+#include <linux/kernel.h>
+#include <linux/kref.h>
 #include <linux/netdevice.h>
+#include <linux/netlink.h>
 #include <linux/rculist.h>
 #include <linux/rcupdate.h>
 #include <linux/seq_file.h>
 #include <linux/stddef.h>
 #include <linux/types.h>
 #include <linux/workqueue.h>
+#include <net/genetlink.h>
+#include <net/netlink.h>
+#include <uapi/linux/batman_adv.h>
 
 #include "bat_algo.h"
 #include "bat_v_elp.h"
 #include "bat_v_ogm.h"
+#include "gateway_client.h"
+#include "gateway_common.h"
 #include "hard-interface.h"
 #include "hash.h"
+#include "log.h"
+#include "netlink.h"
 #include "originator.h"
 #include "packet.h"
 
+struct sk_buff;
+
 static void batadv_v_iface_activate(struct batadv_hard_iface *hard_iface)
 {
        struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface);
@@ -199,6 +213,138 @@ static void batadv_v_neigh_print(struct batadv_priv *bat_priv,
                seq_puts(seq, "No batman nodes in range ...\n");
 }
 
+/**
+ * batadv_v_neigh_dump_neigh - Dump a neighbour into a message
+ * @msg: Netlink message to dump into
+ * @portid: Port making netlink request
+ * @seq: Sequence number of netlink message
+ * @hardif_neigh: Neighbour to dump
+ *
+ * Return: Error code, or 0 on success
+ */
+static int
+batadv_v_neigh_dump_neigh(struct sk_buff *msg, u32 portid, u32 seq,
+                         struct batadv_hardif_neigh_node *hardif_neigh)
+{
+       void *hdr;
+       unsigned int last_seen_msecs;
+       u32 throughput;
+
+       last_seen_msecs = jiffies_to_msecs(jiffies - hardif_neigh->last_seen);
+       throughput = ewma_throughput_read(&hardif_neigh->bat_v.throughput);
+       throughput = throughput * 100;
+
+       hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family, NLM_F_MULTI,
+                         BATADV_CMD_GET_NEIGHBORS);
+       if (!hdr)
+               return -ENOBUFS;
+
+       if (nla_put(msg, BATADV_ATTR_NEIGH_ADDRESS, ETH_ALEN,
+                   hardif_neigh->addr) ||
+           nla_put_u32(msg, BATADV_ATTR_HARD_IFINDEX,
+                       hardif_neigh->if_incoming->net_dev->ifindex) ||
+           nla_put_u32(msg, BATADV_ATTR_LAST_SEEN_MSECS,
+                       last_seen_msecs) ||
+           nla_put_u32(msg, BATADV_ATTR_THROUGHPUT, throughput))
+               goto nla_put_failure;
+
+       genlmsg_end(msg, hdr);
+       return 0;
+
+ nla_put_failure:
+       genlmsg_cancel(msg, hdr);
+       return -EMSGSIZE;
+}
+
+/**
+ * batadv_v_neigh_dump_hardif - Dump the  neighbours of a hard interface  into
+ *  a message
+ * @msg: Netlink message to dump into
+ * @portid: Port making netlink request
+ * @seq: Sequence number of netlink message
+ * @bat_priv: The bat priv with all the soft interface information
+ * @hard_iface: The hard interface to be dumped
+ * @idx_s: Entries to be skipped
+ *
+ * This function assumes the caller holds rcu_read_lock().
+ *
+ * Return: Error code, or 0 on success
+ */
+static int
+batadv_v_neigh_dump_hardif(struct sk_buff *msg, u32 portid, u32 seq,
+                          struct batadv_priv *bat_priv,
+                          struct batadv_hard_iface *hard_iface,
+                          int *idx_s)
+{
+       struct batadv_hardif_neigh_node *hardif_neigh;
+       int idx = 0;
+
+       hlist_for_each_entry_rcu(hardif_neigh,
+                                &hard_iface->neigh_list, list) {
+               if (idx++ < *idx_s)
+                       continue;
+
+               if (batadv_v_neigh_dump_neigh(msg, portid, seq, hardif_neigh)) {
+                       *idx_s = idx - 1;
+                       return -EMSGSIZE;
+               }
+       }
+
+       *idx_s = 0;
+       return 0;
+}
+
+/**
+ * batadv_v_neigh_dump - Dump the neighbours of a hard interface  into a
+ *  message
+ * @msg: Netlink message to dump into
+ * @cb: Control block containing additional options
+ * @bat_priv: The bat priv with all the soft interface information
+ * @single_hardif: Limit dumping to this hard interface
+ */
+static void
+batadv_v_neigh_dump(struct sk_buff *msg, struct netlink_callback *cb,
+                   struct batadv_priv *bat_priv,
+                   struct batadv_hard_iface *single_hardif)
+{
+       struct batadv_hard_iface *hard_iface;
+       int i_hardif = 0;
+       int i_hardif_s = cb->args[0];
+       int idx = cb->args[1];
+       int portid = NETLINK_CB(cb->skb).portid;
+
+       rcu_read_lock();
+       if (single_hardif) {
+               if (i_hardif_s == 0) {
+                       if (batadv_v_neigh_dump_hardif(msg, portid,
+                                                      cb->nlh->nlmsg_seq,
+                                                      bat_priv, single_hardif,
+                                                      &idx) == 0)
+                               i_hardif++;
+               }
+       } else {
+               list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) {
+                       if (hard_iface->soft_iface != bat_priv->soft_iface)
+                               continue;
+
+                       if (i_hardif++ < i_hardif_s)
+                               continue;
+
+                       if (batadv_v_neigh_dump_hardif(msg, portid,
+                                                      cb->nlh->nlmsg_seq,
+                                                      bat_priv, hard_iface,
+                                                      &idx)) {
+                               i_hardif--;
+                               break;
+                       }
+               }
+       }
+       rcu_read_unlock();
+
+       cb->args[0] = i_hardif;
+       cb->args[1] = idx;
+}
+
 /**
  * batadv_v_orig_print - print the originator table
  * @bat_priv: the bat priv with all the soft interface information
@@ -266,6 +412,204 @@ next:
                seq_puts(seq, "No batman nodes in range ...\n");
 }
 
+/**
+ * batadv_v_orig_dump_subentry - Dump an originator subentry into a
+ *  message
+ * @msg: Netlink message to dump into
+ * @portid: Port making netlink request
+ * @seq: Sequence number of netlink message
+ * @bat_priv: The bat priv with all the soft interface information
+ * @if_outgoing: Limit dump to entries with this outgoing interface
+ * @orig_node: Originator to dump
+ * @neigh_node: Single hops neighbour
+ * @best: Is the best originator
+ *
+ * Return: Error code, or 0 on success
+ */
+static int
+batadv_v_orig_dump_subentry(struct sk_buff *msg, u32 portid, u32 seq,
+                           struct batadv_priv *bat_priv,
+                           struct batadv_hard_iface *if_outgoing,
+                           struct batadv_orig_node *orig_node,
+                           struct batadv_neigh_node *neigh_node,
+                           bool best)
+{
+       struct batadv_neigh_ifinfo *n_ifinfo;
+       unsigned int last_seen_msecs;
+       u32 throughput;
+       void *hdr;
+
+       n_ifinfo = batadv_neigh_ifinfo_get(neigh_node, if_outgoing);
+       if (!n_ifinfo)
+               return 0;
+
+       throughput = n_ifinfo->bat_v.throughput * 100;
+
+       batadv_neigh_ifinfo_put(n_ifinfo);
+
+       last_seen_msecs = jiffies_to_msecs(jiffies - orig_node->last_seen);
+
+       if (if_outgoing != BATADV_IF_DEFAULT &&
+           if_outgoing != neigh_node->if_incoming)
+               return 0;
+
+       hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family, NLM_F_MULTI,
+                         BATADV_CMD_GET_ORIGINATORS);
+       if (!hdr)
+               return -ENOBUFS;
+
+       if (nla_put(msg, BATADV_ATTR_ORIG_ADDRESS, ETH_ALEN, orig_node->orig) ||
+           nla_put(msg, BATADV_ATTR_NEIGH_ADDRESS, ETH_ALEN,
+                   neigh_node->addr) ||
+           nla_put_u32(msg, BATADV_ATTR_HARD_IFINDEX,
+                       neigh_node->if_incoming->net_dev->ifindex) ||
+           nla_put_u32(msg, BATADV_ATTR_THROUGHPUT, throughput) ||
+           nla_put_u32(msg, BATADV_ATTR_LAST_SEEN_MSECS,
+                       last_seen_msecs))
+               goto nla_put_failure;
+
+       if (best && nla_put_flag(msg, BATADV_ATTR_FLAG_BEST))
+               goto nla_put_failure;
+
+       genlmsg_end(msg, hdr);
+       return 0;
+
+ nla_put_failure:
+       genlmsg_cancel(msg, hdr);
+       return -EMSGSIZE;
+}
+
+/**
+ * batadv_v_orig_dump_entry - Dump an originator entry into a message
+ * @msg: Netlink message to dump into
+ * @portid: Port making netlink request
+ * @seq: Sequence number of netlink message
+ * @bat_priv: The bat priv with all the soft interface information
+ * @if_outgoing: Limit dump to entries with this outgoing interface
+ * @orig_node: Originator to dump
+ * @sub_s: Number of sub entries to skip
+ *
+ * This function assumes the caller holds rcu_read_lock().
+ *
+ * Return: Error code, or 0 on success
+ */
+static int
+batadv_v_orig_dump_entry(struct sk_buff *msg, u32 portid, u32 seq,
+                        struct batadv_priv *bat_priv,
+                        struct batadv_hard_iface *if_outgoing,
+                        struct batadv_orig_node *orig_node, int *sub_s)
+{
+       struct batadv_neigh_node *neigh_node_best;
+       struct batadv_neigh_node *neigh_node;
+       int sub = 0;
+       bool best;
+
+       neigh_node_best = batadv_orig_router_get(orig_node, if_outgoing);
+       if (!neigh_node_best)
+               goto out;
+
+       hlist_for_each_entry_rcu(neigh_node, &orig_node->neigh_list, list) {
+               if (sub++ < *sub_s)
+                       continue;
+
+               best = (neigh_node == neigh_node_best);
+
+               if (batadv_v_orig_dump_subentry(msg, portid, seq, bat_priv,
+                                               if_outgoing, orig_node,
+                                               neigh_node, best)) {
+                       batadv_neigh_node_put(neigh_node_best);
+
+                       *sub_s = sub - 1;
+                       return -EMSGSIZE;
+               }
+       }
+
+ out:
+       if (neigh_node_best)
+               batadv_neigh_node_put(neigh_node_best);
+
+       *sub_s = 0;
+       return 0;
+}
+
+/**
+ * batadv_v_orig_dump_bucket - Dump an originator bucket into a
+ *  message
+ * @msg: Netlink message to dump into
+ * @portid: Port making netlink request
+ * @seq: Sequence number of netlink message
+ * @bat_priv: The bat priv with all the soft interface information
+ * @if_outgoing: Limit dump to entries with this outgoing interface
+ * @head: Bucket to be dumped
+ * @idx_s: Number of entries to be skipped
+ * @sub: Number of sub entries to be skipped
+ *
+ * Return: Error code, or 0 on success
+ */
+static int
+batadv_v_orig_dump_bucket(struct sk_buff *msg, u32 portid, u32 seq,
+                         struct batadv_priv *bat_priv,
+                         struct batadv_hard_iface *if_outgoing,
+                         struct hlist_head *head, int *idx_s, int *sub)
+{
+       struct batadv_orig_node *orig_node;
+       int idx = 0;
+
+       rcu_read_lock();
+       hlist_for_each_entry_rcu(orig_node, head, hash_entry) {
+               if (idx++ < *idx_s)
+                       continue;
+
+               if (batadv_v_orig_dump_entry(msg, portid, seq, bat_priv,
+                                            if_outgoing, orig_node, sub)) {
+                       rcu_read_unlock();
+                       *idx_s = idx - 1;
+                       return -EMSGSIZE;
+               }
+       }
+       rcu_read_unlock();
+
+       *idx_s = 0;
+       *sub = 0;
+       return 0;
+}
+
+/**
+ * batadv_v_orig_dump - Dump the originators into a message
+ * @msg: Netlink message to dump into
+ * @cb: Control block containing additional options
+ * @bat_priv: The bat priv with all the soft interface information
+ * @if_outgoing: Limit dump to entries with this outgoing interface
+ */
+static void
+batadv_v_orig_dump(struct sk_buff *msg, struct netlink_callback *cb,
+                  struct batadv_priv *bat_priv,
+                  struct batadv_hard_iface *if_outgoing)
+{
+       struct batadv_hashtable *hash = bat_priv->orig_hash;
+       struct hlist_head *head;
+       int bucket = cb->args[0];
+       int idx = cb->args[1];
+       int sub = cb->args[2];
+       int portid = NETLINK_CB(cb->skb).portid;
+
+       while (bucket < hash->size) {
+               head = &hash->table[bucket];
+
+               if (batadv_v_orig_dump_bucket(msg, portid,
+                                             cb->nlh->nlmsg_seq,
+                                             bat_priv, if_outgoing, head, &idx,
+                                             &sub))
+                       break;
+
+               bucket++;
+       }
+
+       cb->args[0] = bucket;
+       cb->args[1] = idx;
+       cb->args[2] = sub;
+}
+
 static int batadv_v_neigh_cmp(struct batadv_neigh_node *neigh1,
                              struct batadv_hard_iface *if_outgoing1,
                              struct batadv_neigh_node *neigh2,
@@ -320,6 +664,363 @@ err_ifinfo1:
        return ret;
 }
 
+static ssize_t batadv_v_store_sel_class(struct batadv_priv *bat_priv,
+                                       char *buff, size_t count)
+{
+       u32 old_class, class;
+
+       if (!batadv_parse_throughput(bat_priv->soft_iface, buff,
+                                    "B.A.T.M.A.N. V GW selection class",
+                                    &class))
+               return -EINVAL;
+
+       old_class = atomic_read(&bat_priv->gw.sel_class);
+       atomic_set(&bat_priv->gw.sel_class, class);
+
+       if (old_class != class)
+               batadv_gw_reselect(bat_priv);
+
+       return count;
+}
+
+static ssize_t batadv_v_show_sel_class(struct batadv_priv *bat_priv, char *buff)
+{
+       u32 class = atomic_read(&bat_priv->gw.sel_class);
+
+       return sprintf(buff, "%u.%u MBit\n", class / 10, class % 10);
+}
+
+/**
+ * batadv_v_gw_throughput_get - retrieve the GW-bandwidth for a given GW
+ * @gw_node: the GW to retrieve the metric for
+ * @bw: the pointer where the metric will be stored. The metric is computed as
+ *  the minimum between the GW advertised throughput and the path throughput to
+ *  it in the mesh
+ *
+ * Return: 0 on success, -1 on failure
+ */
+static int batadv_v_gw_throughput_get(struct batadv_gw_node *gw_node, u32 *bw)
+{
+       struct batadv_neigh_ifinfo *router_ifinfo = NULL;
+       struct batadv_orig_node *orig_node;
+       struct batadv_neigh_node *router;
+       int ret = -1;
+
+       orig_node = gw_node->orig_node;
+       router = batadv_orig_router_get(orig_node, BATADV_IF_DEFAULT);
+       if (!router)
+               goto out;
+
+       router_ifinfo = batadv_neigh_ifinfo_get(router, BATADV_IF_DEFAULT);
+       if (!router_ifinfo)
+               goto out;
+
+       /* the GW metric is computed as the minimum between the path throughput
+        * to reach the GW itself and the advertised bandwidth.
+        * This gives us an approximation of the effective throughput that the
+        * client can expect via this particular GW node
+        */
+       *bw = router_ifinfo->bat_v.throughput;
+       *bw = min_t(u32, *bw, gw_node->bandwidth_down);
+
+       ret = 0;
+out:
+       if (router)
+               batadv_neigh_node_put(router);
+       if (router_ifinfo)
+               batadv_neigh_ifinfo_put(router_ifinfo);
+
+       return ret;
+}
+
+/**
+ * batadv_v_gw_get_best_gw_node - retrieve the best GW node
+ * @bat_priv: the bat priv with all the soft interface information
+ *
+ * Return: the GW node having the best GW-metric, NULL if no GW is known
+ */
+static struct batadv_gw_node *
+batadv_v_gw_get_best_gw_node(struct batadv_priv *bat_priv)
+{
+       struct batadv_gw_node *gw_node, *curr_gw = NULL;
+       u32 max_bw = 0, bw;
+
+       rcu_read_lock();
+       hlist_for_each_entry_rcu(gw_node, &bat_priv->gw.list, list) {
+               if (!kref_get_unless_zero(&gw_node->refcount))
+                       continue;
+
+               if (batadv_v_gw_throughput_get(gw_node, &bw) < 0)
+                       goto next;
+
+               if (curr_gw && (bw <= max_bw))
+                       goto next;
+
+               if (curr_gw)
+                       batadv_gw_node_put(curr_gw);
+
+               curr_gw = gw_node;
+               kref_get(&curr_gw->refcount);
+               max_bw = bw;
+
+next:
+               batadv_gw_node_put(gw_node);
+       }
+       rcu_read_unlock();
+
+       return curr_gw;
+}
+
+/**
+ * batadv_v_gw_is_eligible - check if a originator would be selected as GW
+ * @bat_priv: the bat priv with all the soft interface information
+ * @curr_gw_orig: originator representing the currently selected GW
+ * @orig_node: the originator representing the new candidate
+ *
+ * Return: true if orig_node can be selected as current GW, false otherwise
+ */
+static bool batadv_v_gw_is_eligible(struct batadv_priv *bat_priv,
+                                   struct batadv_orig_node *curr_gw_orig,
+                                   struct batadv_orig_node *orig_node)
+{
+       struct batadv_gw_node *curr_gw = NULL, *orig_gw = NULL;
+       u32 gw_throughput, orig_throughput, threshold;
+       bool ret = false;
+
+       threshold = atomic_read(&bat_priv->gw.sel_class);
+
+       curr_gw = batadv_gw_node_get(bat_priv, curr_gw_orig);
+       if (!curr_gw) {
+               ret = true;
+               goto out;
+       }
+
+       if (batadv_v_gw_throughput_get(curr_gw, &gw_throughput) < 0) {
+               ret = true;
+               goto out;
+       }
+
+       orig_gw = batadv_gw_node_get(bat_priv, orig_node);
+       if (!orig_node)
+               goto out;
+
+       if (batadv_v_gw_throughput_get(orig_gw, &orig_throughput) < 0)
+               goto out;
+
+       if (orig_throughput < gw_throughput)
+               goto out;
+
+       if ((orig_throughput - gw_throughput) < threshold)
+               goto out;
+
+       batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
+                  "Restarting gateway selection: better gateway found (throughput curr: %u, throughput new: %u)\n",
+                  gw_throughput, orig_throughput);
+
+       ret = true;
+out:
+       if (curr_gw)
+               batadv_gw_node_put(curr_gw);
+       if (orig_gw)
+               batadv_gw_node_put(orig_gw);
+
+       return ret;
+}
+
+/* fails if orig_node has no router */
+static int batadv_v_gw_write_buffer_text(struct batadv_priv *bat_priv,
+                                        struct seq_file *seq,
+                                        const struct batadv_gw_node *gw_node)
+{
+       struct batadv_gw_node *curr_gw;
+       struct batadv_neigh_node *router;
+       struct batadv_neigh_ifinfo *router_ifinfo = NULL;
+       int ret = -1;
+
+       router = batadv_orig_router_get(gw_node->orig_node, BATADV_IF_DEFAULT);
+       if (!router)
+               goto out;
+
+       router_ifinfo = batadv_neigh_ifinfo_get(router, BATADV_IF_DEFAULT);
+       if (!router_ifinfo)
+               goto out;
+
+       curr_gw = batadv_gw_get_selected_gw_node(bat_priv);
+
+       seq_printf(seq, "%s %pM (%9u.%1u) %pM [%10s]: %u.%u/%u.%u MBit\n",
+                  (curr_gw == gw_node ? "=>" : "  "),
+                  gw_node->orig_node->orig,
+                  router_ifinfo->bat_v.throughput / 10,
+                  router_ifinfo->bat_v.throughput % 10, router->addr,
+                  router->if_incoming->net_dev->name,
+                  gw_node->bandwidth_down / 10,
+                  gw_node->bandwidth_down % 10,
+                  gw_node->bandwidth_up / 10,
+                  gw_node->bandwidth_up % 10);
+       ret = seq_has_overflowed(seq) ? -1 : 0;
+
+       if (curr_gw)
+               batadv_gw_node_put(curr_gw);
+out:
+       if (router_ifinfo)
+               batadv_neigh_ifinfo_put(router_ifinfo);
+       if (router)
+               batadv_neigh_node_put(router);
+       return ret;
+}
+
+/**
+ * batadv_v_gw_print - print the gateway list
+ * @bat_priv: the bat priv with all the soft interface information
+ * @seq: gateway table seq_file struct
+ */
+static void batadv_v_gw_print(struct batadv_priv *bat_priv,
+                             struct seq_file *seq)
+{
+       struct batadv_gw_node *gw_node;
+       int gw_count = 0;
+
+       seq_puts(seq,
+                "      Gateway        ( throughput)           Nexthop [outgoingIF]: advertised uplink bandwidth\n");
+
+       rcu_read_lock();
+       hlist_for_each_entry_rcu(gw_node, &bat_priv->gw.list, list) {
+               /* fails if orig_node has no router */
+               if (batadv_v_gw_write_buffer_text(bat_priv, seq, gw_node) < 0)
+                       continue;
+
+               gw_count++;
+       }
+       rcu_read_unlock();
+
+       if (gw_count == 0)
+               seq_puts(seq, "No gateways in range ...\n");
+}
+
+/**
+ * batadv_v_gw_dump_entry - Dump a gateway into a message
+ * @msg: Netlink message to dump into
+ * @portid: Port making netlink request
+ * @seq: Sequence number of netlink message
+ * @bat_priv: The bat priv with all the soft interface information
+ * @gw_node: Gateway to be dumped
+ *
+ * Return: Error code, or 0 on success
+ */
+static int batadv_v_gw_dump_entry(struct sk_buff *msg, u32 portid, u32 seq,
+                                 struct batadv_priv *bat_priv,
+                                 struct batadv_gw_node *gw_node)
+{
+       struct batadv_neigh_ifinfo *router_ifinfo = NULL;
+       struct batadv_neigh_node *router;
+       struct batadv_gw_node *curr_gw;
+       int ret = -EINVAL;
+       void *hdr;
+
+       router = batadv_orig_router_get(gw_node->orig_node, BATADV_IF_DEFAULT);
+       if (!router)
+               goto out;
+
+       router_ifinfo = batadv_neigh_ifinfo_get(router, BATADV_IF_DEFAULT);
+       if (!router_ifinfo)
+               goto out;
+
+       curr_gw = batadv_gw_get_selected_gw_node(bat_priv);
+
+       hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family,
+                         NLM_F_MULTI, BATADV_CMD_GET_GATEWAYS);
+       if (!hdr) {
+               ret = -ENOBUFS;
+               goto out;
+       }
+
+       ret = -EMSGSIZE;
+
+       if (curr_gw == gw_node) {
+               if (nla_put_flag(msg, BATADV_ATTR_FLAG_BEST)) {
+                       genlmsg_cancel(msg, hdr);
+                       goto out;
+               }
+       }
+
+       if (nla_put(msg, BATADV_ATTR_ORIG_ADDRESS, ETH_ALEN,
+                   gw_node->orig_node->orig)) {
+               genlmsg_cancel(msg, hdr);
+               goto out;
+       }
+
+       if (nla_put_u32(msg, BATADV_ATTR_THROUGHPUT,
+                       router_ifinfo->bat_v.throughput)) {
+               genlmsg_cancel(msg, hdr);
+               goto out;
+       }
+
+       if (nla_put(msg, BATADV_ATTR_ROUTER, ETH_ALEN, router->addr)) {
+               genlmsg_cancel(msg, hdr);
+               goto out;
+       }
+
+       if (nla_put_string(msg, BATADV_ATTR_HARD_IFNAME,
+                          router->if_incoming->net_dev->name)) {
+               genlmsg_cancel(msg, hdr);
+               goto out;
+       }
+
+       if (nla_put_u32(msg, BATADV_ATTR_BANDWIDTH_DOWN,
+                       gw_node->bandwidth_down)) {
+               genlmsg_cancel(msg, hdr);
+               goto out;
+       }
+
+       if (nla_put_u32(msg, BATADV_ATTR_BANDWIDTH_UP, gw_node->bandwidth_up)) {
+               genlmsg_cancel(msg, hdr);
+               goto out;
+       }
+
+       genlmsg_end(msg, hdr);
+       ret = 0;
+
+out:
+       if (router_ifinfo)
+               batadv_neigh_ifinfo_put(router_ifinfo);
+       if (router)
+               batadv_neigh_node_put(router);
+       return ret;
+}
+
+/**
+ * batadv_v_gw_dump - Dump gateways into a message
+ * @msg: Netlink message to dump into
+ * @cb: Control block containing additional options
+ * @bat_priv: The bat priv with all the soft interface information
+ */
+static void batadv_v_gw_dump(struct sk_buff *msg, struct netlink_callback *cb,
+                            struct batadv_priv *bat_priv)
+{
+       int portid = NETLINK_CB(cb->skb).portid;
+       struct batadv_gw_node *gw_node;
+       int idx_skip = cb->args[0];
+       int idx = 0;
+
+       rcu_read_lock();
+       hlist_for_each_entry_rcu(gw_node, &bat_priv->gw.list, list) {
+               if (idx++ < idx_skip)
+                       continue;
+
+               if (batadv_v_gw_dump_entry(msg, portid, cb->nlh->nlmsg_seq,
+                                          bat_priv, gw_node)) {
+                       idx_skip = idx - 1;
+                       goto unlock;
+               }
+       }
+
+       idx_skip = idx;
+unlock:
+       rcu_read_unlock();
+
+       cb->args[0] = idx_skip;
+}
+
 static struct batadv_algo_ops batadv_batman_v __read_mostly = {
        .name = "BATMAN_V",
        .iface = {
@@ -334,9 +1035,19 @@ static struct batadv_algo_ops batadv_batman_v __read_mostly = {
                .cmp = batadv_v_neigh_cmp,
                .is_similar_or_better = batadv_v_neigh_is_sob,
                .print = batadv_v_neigh_print,
+               .dump = batadv_v_neigh_dump,
        },
        .orig = {
                .print = batadv_v_orig_print,
+               .dump = batadv_v_orig_dump,
+       },
+       .gw = {
+               .store_sel_class = batadv_v_store_sel_class,
+               .show_sel_class = batadv_v_show_sel_class,
+               .get_best_gw_node = batadv_v_gw_get_best_gw_node,
+               .is_eligible = batadv_v_gw_is_eligible,
+               .print = batadv_v_gw_print,
+               .dump = batadv_v_gw_dump,
        },
 };
 
@@ -363,7 +1074,16 @@ void batadv_v_hardif_init(struct batadv_hard_iface *hard_iface)
  */
 int batadv_v_mesh_init(struct batadv_priv *bat_priv)
 {
-       return batadv_v_ogm_init(bat_priv);
+       int ret = 0;
+
+       ret = batadv_v_ogm_init(bat_priv);
+       if (ret < 0)
+               return ret;
+
+       /* set default throughput difference threshold to 5Mbps */
+       atomic_set(&bat_priv->gw.sel_class, 50);
+
+       return 0;
 }
 
 /**
index ad2ffe1..35ed1d3 100644 (file)
@@ -35,6 +35,7 @@
 #include <linux/list.h>
 #include <linux/lockdep.h>
 #include <linux/netdevice.h>
+#include <linux/netlink.h>
 #include <linux/rculist.h>
 #include <linux/rcupdate.h>
 #include <linux/seq_file.h>
 #include <linux/string.h>
 #include <linux/workqueue.h>
 #include <net/arp.h>
+#include <net/genetlink.h>
+#include <net/netlink.h>
+#include <net/sock.h>
+#include <uapi/linux/batman_adv.h>
 
 #include "hard-interface.h"
 #include "hash.h"
 #include "log.h"
+#include "netlink.h"
 #include "originator.h"
 #include "packet.h"
+#include "soft-interface.h"
 #include "sysfs.h"
 #include "translation-table.h"
 
@@ -1148,7 +1155,7 @@ static bool batadv_bla_process_claim(struct batadv_priv *bat_priv,
 
        /* Let the loopdetect frames on the mesh in any case. */
        if (bla_dst->type == BATADV_CLAIM_TYPE_LOOPDETECT)
-               return 0;
+               return false;
 
        /* check if it is a claim frame. */
        ret = batadv_check_claim_group(bat_priv, primary_if, hw_src, hw_dst,
@@ -2051,6 +2058,168 @@ out:
        return 0;
 }
 
+/**
+ * batadv_bla_claim_dump_entry - dump one entry of the claim table
+ * to a netlink socket
+ * @msg: buffer for the message
+ * @portid: netlink port
+ * @seq: Sequence number of netlink message
+ * @primary_if: primary interface
+ * @claim: entry to dump
+ *
+ * Return: 0 or error code.
+ */
+static int
+batadv_bla_claim_dump_entry(struct sk_buff *msg, u32 portid, u32 seq,
+                           struct batadv_hard_iface *primary_if,
+                           struct batadv_bla_claim *claim)
+{
+       u8 *primary_addr = primary_if->net_dev->dev_addr;
+       u16 backbone_crc;
+       bool is_own;
+       void *hdr;
+       int ret = -EINVAL;
+
+       hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family,
+                         NLM_F_MULTI, BATADV_CMD_GET_BLA_CLAIM);
+       if (!hdr) {
+               ret = -ENOBUFS;
+               goto out;
+       }
+
+       is_own = batadv_compare_eth(claim->backbone_gw->orig,
+                                   primary_addr);
+
+       spin_lock_bh(&claim->backbone_gw->crc_lock);
+       backbone_crc = claim->backbone_gw->crc;
+       spin_unlock_bh(&claim->backbone_gw->crc_lock);
+
+       if (is_own)
+               if (nla_put_flag(msg, BATADV_ATTR_BLA_OWN)) {
+                       genlmsg_cancel(msg, hdr);
+                       goto out;
+               }
+
+       if (nla_put(msg, BATADV_ATTR_BLA_ADDRESS, ETH_ALEN, claim->addr) ||
+           nla_put_u16(msg, BATADV_ATTR_BLA_VID, claim->vid) ||
+           nla_put(msg, BATADV_ATTR_BLA_BACKBONE, ETH_ALEN,
+                   claim->backbone_gw->orig) ||
+           nla_put_u16(msg, BATADV_ATTR_BLA_CRC,
+                       backbone_crc)) {
+               genlmsg_cancel(msg, hdr);
+               goto out;
+       }
+
+       genlmsg_end(msg, hdr);
+       ret = 0;
+
+out:
+       return ret;
+}
+
+/**
+ * batadv_bla_claim_dump_bucket - dump one bucket of the claim table
+ * to a netlink socket
+ * @msg: buffer for the message
+ * @portid: netlink port
+ * @seq: Sequence number of netlink message
+ * @primary_if: primary interface
+ * @head: bucket to dump
+ * @idx_skip: How many entries to skip
+ *
+ * Return: always 0.
+ */
+static int
+batadv_bla_claim_dump_bucket(struct sk_buff *msg, u32 portid, u32 seq,
+                            struct batadv_hard_iface *primary_if,
+                            struct hlist_head *head, int *idx_skip)
+{
+       struct batadv_bla_claim *claim;
+       int idx = 0;
+
+       rcu_read_lock();
+       hlist_for_each_entry_rcu(claim, head, hash_entry) {
+               if (idx++ < *idx_skip)
+                       continue;
+               if (batadv_bla_claim_dump_entry(msg, portid, seq,
+                                               primary_if, claim)) {
+                       *idx_skip = idx - 1;
+                       goto unlock;
+               }
+       }
+
+       *idx_skip = idx;
+unlock:
+       rcu_read_unlock();
+       return 0;
+}
+
+/**
+ * batadv_bla_claim_dump - dump claim table to a netlink socket
+ * @msg: buffer for the message
+ * @cb: callback structure containing arguments
+ *
+ * Return: message length.
+ */
+int batadv_bla_claim_dump(struct sk_buff *msg, struct netlink_callback *cb)
+{
+       struct batadv_hard_iface *primary_if = NULL;
+       int portid = NETLINK_CB(cb->skb).portid;
+       struct net *net = sock_net(cb->skb->sk);
+       struct net_device *soft_iface;
+       struct batadv_hashtable *hash;
+       struct batadv_priv *bat_priv;
+       int bucket = cb->args[0];
+       struct hlist_head *head;
+       int idx = cb->args[1];
+       int ifindex;
+       int ret = 0;
+
+       ifindex = batadv_netlink_get_ifindex(cb->nlh,
+                                            BATADV_ATTR_MESH_IFINDEX);
+       if (!ifindex)
+               return -EINVAL;
+
+       soft_iface = dev_get_by_index(net, ifindex);
+       if (!soft_iface || !batadv_softif_is_valid(soft_iface)) {
+               ret = -ENODEV;
+               goto out;
+       }
+
+       bat_priv = netdev_priv(soft_iface);
+       hash = bat_priv->bla.claim_hash;
+
+       primary_if = batadv_primary_if_get_selected(bat_priv);
+       if (!primary_if || primary_if->if_status != BATADV_IF_ACTIVE) {
+               ret = -ENOENT;
+               goto out;
+       }
+
+       while (bucket < hash->size) {
+               head = &hash->table[bucket];
+
+               if (batadv_bla_claim_dump_bucket(msg, portid,
+                                                cb->nlh->nlmsg_seq,
+                                                primary_if, head, &idx))
+                       break;
+               bucket++;
+       }
+
+       cb->args[0] = bucket;
+       cb->args[1] = idx;
+
+       ret = msg->len;
+
+out:
+       if (primary_if)
+               batadv_hardif_put(primary_if);
+
+       if (soft_iface)
+               dev_put(soft_iface);
+
+       return ret;
+}
+
 /**
  * batadv_bla_backbone_table_seq_print_text - print the backbone table in a seq
  *  file
@@ -2114,3 +2283,167 @@ out:
                batadv_hardif_put(primary_if);
        return 0;
 }
+
+/**
+ * batadv_bla_backbone_dump_entry - dump one entry of the backbone table
+ * to a netlink socket
+ * @msg: buffer for the message
+ * @portid: netlink port
+ * @seq: Sequence number of netlink message
+ * @primary_if: primary interface
+ * @backbone_gw: entry to dump
+ *
+ * Return: 0 or error code.
+ */
+static int
+batadv_bla_backbone_dump_entry(struct sk_buff *msg, u32 portid, u32 seq,
+                              struct batadv_hard_iface *primary_if,
+                              struct batadv_bla_backbone_gw *backbone_gw)
+{
+       u8 *primary_addr = primary_if->net_dev->dev_addr;
+       u16 backbone_crc;
+       bool is_own;
+       int msecs;
+       void *hdr;
+       int ret = -EINVAL;
+
+       hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family,
+                         NLM_F_MULTI, BATADV_CMD_GET_BLA_BACKBONE);
+       if (!hdr) {
+               ret = -ENOBUFS;
+               goto out;
+       }
+
+       is_own = batadv_compare_eth(backbone_gw->orig, primary_addr);
+
+       spin_lock_bh(&backbone_gw->crc_lock);
+       backbone_crc = backbone_gw->crc;
+       spin_unlock_bh(&backbone_gw->crc_lock);
+
+       msecs = jiffies_to_msecs(jiffies - backbone_gw->lasttime);
+
+       if (is_own)
+               if (nla_put_flag(msg, BATADV_ATTR_BLA_OWN)) {
+                       genlmsg_cancel(msg, hdr);
+                       goto out;
+               }
+
+       if (nla_put(msg, BATADV_ATTR_BLA_BACKBONE, ETH_ALEN,
+                   backbone_gw->orig) ||
+           nla_put_u16(msg, BATADV_ATTR_BLA_VID, backbone_gw->vid) ||
+           nla_put_u16(msg, BATADV_ATTR_BLA_CRC,
+                       backbone_crc) ||
+           nla_put_u32(msg, BATADV_ATTR_LAST_SEEN_MSECS, msecs)) {
+               genlmsg_cancel(msg, hdr);
+               goto out;
+       }
+
+       genlmsg_end(msg, hdr);
+       ret = 0;
+
+out:
+       return ret;
+}
+
+/**
+ * batadv_bla_backbone_dump_bucket - dump one bucket of the backbone table
+ * to a netlink socket
+ * @msg: buffer for the message
+ * @portid: netlink port
+ * @seq: Sequence number of netlink message
+ * @primary_if: primary interface
+ * @head: bucket to dump
+ * @idx_skip: How many entries to skip
+ *
+ * Return: always 0.
+ */
+static int
+batadv_bla_backbone_dump_bucket(struct sk_buff *msg, u32 portid, u32 seq,
+                               struct batadv_hard_iface *primary_if,
+                               struct hlist_head *head, int *idx_skip)
+{
+       struct batadv_bla_backbone_gw *backbone_gw;
+       int idx = 0;
+
+       rcu_read_lock();
+       hlist_for_each_entry_rcu(backbone_gw, head, hash_entry) {
+               if (idx++ < *idx_skip)
+                       continue;
+               if (batadv_bla_backbone_dump_entry(msg, portid, seq,
+                                                  primary_if, backbone_gw)) {
+                       *idx_skip = idx - 1;
+                       goto unlock;
+               }
+       }
+
+       *idx_skip = idx;
+unlock:
+       rcu_read_unlock();
+       return 0;
+}
+
+/**
+ * batadv_bla_backbone_dump - dump backbone table to a netlink socket
+ * @msg: buffer for the message
+ * @cb: callback structure containing arguments
+ *
+ * Return: message length.
+ */
+int batadv_bla_backbone_dump(struct sk_buff *msg, struct netlink_callback *cb)
+{
+       struct batadv_hard_iface *primary_if = NULL;
+       int portid = NETLINK_CB(cb->skb).portid;
+       struct net *net = sock_net(cb->skb->sk);
+       struct net_device *soft_iface;
+       struct batadv_hashtable *hash;
+       struct batadv_priv *bat_priv;
+       int bucket = cb->args[0];
+       struct hlist_head *head;
+       int idx = cb->args[1];
+       int ifindex;
+       int ret = 0;
+
+       ifindex = batadv_netlink_get_ifindex(cb->nlh,
+                                            BATADV_ATTR_MESH_IFINDEX);
+       if (!ifindex)
+               return -EINVAL;
+
+       soft_iface = dev_get_by_index(net, ifindex);
+       if (!soft_iface || !batadv_softif_is_valid(soft_iface)) {
+               ret = -ENODEV;
+               goto out;
+       }
+
+       bat_priv = netdev_priv(soft_iface);
+       hash = bat_priv->bla.backbone_hash;
+
+       primary_if = batadv_primary_if_get_selected(bat_priv);
+       if (!primary_if || primary_if->if_status != BATADV_IF_ACTIVE) {
+               ret = -ENOENT;
+               goto out;
+       }
+
+       while (bucket < hash->size) {
+               head = &hash->table[bucket];
+
+               if (batadv_bla_backbone_dump_bucket(msg, portid,
+                                                   cb->nlh->nlmsg_seq,
+                                                   primary_if, head, &idx))
+                       break;
+               bucket++;
+       }
+
+       cb->args[0] = bucket;
+       cb->args[1] = idx;
+
+       ret = msg->len;
+
+out:
+       if (primary_if)
+               batadv_hardif_put(primary_if);
+
+       if (soft_iface)
+               dev_put(soft_iface);
+
+       return ret;
+}
index 0f01dae..1ae93e4 100644 (file)
@@ -23,6 +23,7 @@
 #include <linux/types.h>
 
 struct net_device;
+struct netlink_callback;
 struct seq_file;
 struct sk_buff;
 
@@ -35,8 +36,10 @@ bool batadv_bla_is_backbone_gw(struct sk_buff *skb,
                               struct batadv_orig_node *orig_node,
                               int hdr_size);
 int batadv_bla_claim_table_seq_print_text(struct seq_file *seq, void *offset);
+int batadv_bla_claim_dump(struct sk_buff *msg, struct netlink_callback *cb);
 int batadv_bla_backbone_table_seq_print_text(struct seq_file *seq,
                                             void *offset);
+int batadv_bla_backbone_dump(struct sk_buff *msg, struct netlink_callback *cb);
 bool batadv_bla_is_backbone_gw_orig(struct batadv_priv *bat_priv, u8 *orig,
                                    unsigned short vid);
 bool batadv_bla_check_bcast_duplist(struct batadv_priv *bat_priv,
@@ -47,7 +50,7 @@ void batadv_bla_update_orig_address(struct batadv_priv *bat_priv,
 void batadv_bla_status_update(struct net_device *net_dev);
 int batadv_bla_init(struct batadv_priv *bat_priv);
 void batadv_bla_free(struct batadv_priv *bat_priv);
-
+int batadv_bla_claim_dump(struct sk_buff *msg, struct netlink_callback *cb);
 #define BATADV_BLA_CRC_INIT    0
 #else /* ifdef CONFIG_BATMAN_ADV_BLA */
 
@@ -112,6 +115,18 @@ static inline void batadv_bla_free(struct batadv_priv *bat_priv)
 {
 }
 
+static inline int batadv_bla_claim_dump(struct sk_buff *msg,
+                                       struct netlink_callback *cb)
+{
+       return -EOPNOTSUPP;
+}
+
+static inline int batadv_bla_backbone_dump(struct sk_buff *msg,
+                                          struct netlink_callback *cb)
+{
+       return -EOPNOTSUPP;
+}
+
 #endif /* ifdef CONFIG_BATMAN_ADV_BLA */
 
 #endif /* ifndef _NET_BATMAN_ADV_BLA_H_ */
index 1d68b6e..b4ffba7 100644 (file)
@@ -31,6 +31,7 @@
 #include <linux/stddef.h>
 #include <linux/stringify.h>
 #include <linux/sysfs.h>
+#include <net/net_namespace.h>
 
 #include "bat_algo.h"
 #include "bridge_loop_avoidance.h"
@@ -305,12 +306,16 @@ void batadv_debugfs_destroy(void)
  */
 int batadv_debugfs_add_hardif(struct batadv_hard_iface *hard_iface)
 {
+       struct net *net = dev_net(hard_iface->net_dev);
        struct batadv_debuginfo **bat_debug;
        struct dentry *file;
 
        if (!batadv_debugfs)
                goto out;
 
+       if (net != &init_net)
+               return 0;
+
        hard_iface->debug_dir = debugfs_create_dir(hard_iface->net_dev->name,
                                                   batadv_debugfs);
        if (!hard_iface->debug_dir)
@@ -341,6 +346,11 @@ out:
  */
 void batadv_debugfs_del_hardif(struct batadv_hard_iface *hard_iface)
 {
+       struct net *net = dev_net(hard_iface->net_dev);
+
+       if (net != &init_net)
+               return;
+
        if (batadv_debugfs) {
                debugfs_remove_recursive(hard_iface->debug_dir);
                hard_iface->debug_dir = NULL;
@@ -351,11 +361,15 @@ int batadv_debugfs_add_meshif(struct net_device *dev)
 {
        struct batadv_priv *bat_priv = netdev_priv(dev);
        struct batadv_debuginfo **bat_debug;
+       struct net *net = dev_net(dev);
        struct dentry *file;
 
        if (!batadv_debugfs)
                goto out;
 
+       if (net != &init_net)
+               return 0;
+
        bat_priv->debug_dir = debugfs_create_dir(dev->name, batadv_debugfs);
        if (!bat_priv->debug_dir)
                goto out;
@@ -392,6 +406,10 @@ out:
 void batadv_debugfs_del_meshif(struct net_device *dev)
 {
        struct batadv_priv *bat_priv = netdev_priv(dev);
+       struct net *net = dev_net(dev);
+
+       if (net != &init_net)
+               return;
 
        batadv_debug_log_cleanup(bat_priv);
 
index 63a805d..c2928c2 100644 (file)
@@ -20,6 +20,7 @@
 
 #include <linux/atomic.h>
 #include <linux/byteorder/generic.h>
+#include <linux/errno.h>
 #include <linux/etherdevice.h>
 #include <linux/fs.h>
 #include <linux/if_ether.h>
@@ -31,6 +32,7 @@
 #include <linux/kref.h>
 #include <linux/list.h>
 #include <linux/netdevice.h>
+#include <linux/netlink.h>
 #include <linux/rculist.h>
 #include <linux/rcupdate.h>
 #include <linux/seq_file.h>
 #include <linux/spinlock.h>
 #include <linux/stddef.h>
 #include <linux/udp.h>
+#include <net/sock.h>
+#include <uapi/linux/batman_adv.h>
 
 #include "gateway_common.h"
 #include "hard-interface.h"
 #include "log.h"
+#include "netlink.h"
 #include "originator.h"
 #include "packet.h"
 #include "routing.h"
+#include "soft-interface.h"
 #include "sysfs.h"
 #include "translation-table.h"
 
@@ -80,12 +86,12 @@ static void batadv_gw_node_release(struct kref *ref)
  * batadv_gw_node_put - decrement the gw_node refcounter and possibly release it
  * @gw_node: gateway node to free
  */
-static void batadv_gw_node_put(struct batadv_gw_node *gw_node)
+void batadv_gw_node_put(struct batadv_gw_node *gw_node)
 {
        kref_put(&gw_node->refcount, batadv_gw_node_release);
 }
 
-static struct batadv_gw_node *
+struct batadv_gw_node *
 batadv_gw_get_selected_gw_node(struct batadv_priv *bat_priv)
 {
        struct batadv_gw_node *gw_node;
@@ -164,86 +170,6 @@ void batadv_gw_reselect(struct batadv_priv *bat_priv)
        atomic_set(&bat_priv->gw.reselect, 1);
 }
 
-static struct batadv_gw_node *
-batadv_gw_get_best_gw_node(struct batadv_priv *bat_priv)
-{
-       struct batadv_neigh_node *router;
-       struct batadv_neigh_ifinfo *router_ifinfo;
-       struct batadv_gw_node *gw_node, *curr_gw = NULL;
-       u64 max_gw_factor = 0;
-       u64 tmp_gw_factor = 0;
-       u8 max_tq = 0;
-       u8 tq_avg;
-       struct batadv_orig_node *orig_node;
-
-       rcu_read_lock();
-       hlist_for_each_entry_rcu(gw_node, &bat_priv->gw.list, list) {
-               orig_node = gw_node->orig_node;
-               router = batadv_orig_router_get(orig_node, BATADV_IF_DEFAULT);
-               if (!router)
-                       continue;
-
-               router_ifinfo = batadv_neigh_ifinfo_get(router,
-                                                       BATADV_IF_DEFAULT);
-               if (!router_ifinfo)
-                       goto next;
-
-               if (!kref_get_unless_zero(&gw_node->refcount))
-                       goto next;
-
-               tq_avg = router_ifinfo->bat_iv.tq_avg;
-
-               switch (atomic_read(&bat_priv->gw.sel_class)) {
-               case 1: /* fast connection */
-                       tmp_gw_factor = tq_avg * tq_avg;
-                       tmp_gw_factor *= gw_node->bandwidth_down;
-                       tmp_gw_factor *= 100 * 100;
-                       tmp_gw_factor >>= 18;
-
-                       if ((tmp_gw_factor > max_gw_factor) ||
-                           ((tmp_gw_factor == max_gw_factor) &&
-                            (tq_avg > max_tq))) {
-                               if (curr_gw)
-                                       batadv_gw_node_put(curr_gw);
-                               curr_gw = gw_node;
-                               kref_get(&curr_gw->refcount);
-                       }
-                       break;
-
-               default: /* 2:  stable connection (use best statistic)
-                         * 3:  fast-switch (use best statistic but change as
-                         *     soon as a better gateway appears)
-                         * XX: late-switch (use best statistic but change as
-                         *     soon as a better gateway appears which has
-                         *     $routing_class more tq points)
-                         */
-                       if (tq_avg > max_tq) {
-                               if (curr_gw)
-                                       batadv_gw_node_put(curr_gw);
-                               curr_gw = gw_node;
-                               kref_get(&curr_gw->refcount);
-                       }
-                       break;
-               }
-
-               if (tq_avg > max_tq)
-                       max_tq = tq_avg;
-
-               if (tmp_gw_factor > max_gw_factor)
-                       max_gw_factor = tmp_gw_factor;
-
-               batadv_gw_node_put(gw_node);
-
-next:
-               batadv_neigh_node_put(router);
-               if (router_ifinfo)
-                       batadv_neigh_ifinfo_put(router_ifinfo);
-       }
-       rcu_read_unlock();
-
-       return curr_gw;
-}
-
 /**
  * batadv_gw_check_client_stop - check if client mode has been switched off
  * @bat_priv: the bat priv with all the soft interface information
@@ -287,12 +213,19 @@ void batadv_gw_election(struct batadv_priv *bat_priv)
        if (atomic_read(&bat_priv->gw.mode) != BATADV_GW_MODE_CLIENT)
                goto out;
 
+       if (!bat_priv->algo_ops->gw.get_best_gw_node)
+               goto out;
+
        curr_gw = batadv_gw_get_selected_gw_node(bat_priv);
 
        if (!batadv_atomic_dec_not_zero(&bat_priv->gw.reselect) && curr_gw)
                goto out;
 
-       next_gw = batadv_gw_get_best_gw_node(bat_priv);
+       /* if gw.reselect is set to 1 it means that a previous call to
+        * gw.is_eligible() said that we have a new best GW, therefore it can
+        * now be picked from the list and selected
+        */
+       next_gw = bat_priv->algo_ops->gw.get_best_gw_node(bat_priv);
 
        if (curr_gw == next_gw)
                goto out;
@@ -360,70 +293,31 @@ out:
 void batadv_gw_check_election(struct batadv_priv *bat_priv,
                              struct batadv_orig_node *orig_node)
 {
-       struct batadv_neigh_ifinfo *router_orig_tq = NULL;
-       struct batadv_neigh_ifinfo *router_gw_tq = NULL;
        struct batadv_orig_node *curr_gw_orig;
-       struct batadv_neigh_node *router_gw = NULL;
-       struct batadv_neigh_node *router_orig = NULL;
-       u8 gw_tq_avg, orig_tq_avg;
+
+       /* abort immediately if the routing algorithm does not support gateway
+        * election
+        */
+       if (!bat_priv->algo_ops->gw.is_eligible)
+               return;
 
        curr_gw_orig = batadv_gw_get_selected_orig(bat_priv);
        if (!curr_gw_orig)
                goto reselect;
 
-       router_gw = batadv_orig_router_get(curr_gw_orig, BATADV_IF_DEFAULT);
-       if (!router_gw)
-               goto reselect;
-
-       router_gw_tq = batadv_neigh_ifinfo_get(router_gw,
-                                              BATADV_IF_DEFAULT);
-       if (!router_gw_tq)
-               goto reselect;
-
        /* this node already is the gateway */
        if (curr_gw_orig == orig_node)
                goto out;
 
-       router_orig = batadv_orig_router_get(orig_node, BATADV_IF_DEFAULT);
-       if (!router_orig)
-               goto out;
-
-       router_orig_tq = batadv_neigh_ifinfo_get(router_orig,
-                                                BATADV_IF_DEFAULT);
-       if (!router_orig_tq)
-               goto out;
-
-       gw_tq_avg = router_gw_tq->bat_iv.tq_avg;
-       orig_tq_avg = router_orig_tq->bat_iv.tq_avg;
-
-       /* the TQ value has to be better */
-       if (orig_tq_avg < gw_tq_avg)
-               goto out;
-
-       /* if the routing class is greater than 3 the value tells us how much
-        * greater the TQ value of the new gateway must be
-        */
-       if ((atomic_read(&bat_priv->gw.sel_class) > 3) &&
-           (orig_tq_avg - gw_tq_avg < atomic_read(&bat_priv->gw.sel_class)))
+       if (!bat_priv->algo_ops->gw.is_eligible(bat_priv, curr_gw_orig,
+                                               orig_node))
                goto out;
 
-       batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
-                  "Restarting gateway selection: better gateway found (tq curr: %i, tq new: %i)\n",
-                  gw_tq_avg, orig_tq_avg);
-
 reselect:
        batadv_gw_reselect(bat_priv);
 out:
        if (curr_gw_orig)
                batadv_orig_node_put(curr_gw_orig);
-       if (router_gw)
-               batadv_neigh_node_put(router_gw);
-       if (router_orig)
-               batadv_neigh_node_put(router_orig);
-       if (router_gw_tq)
-               batadv_neigh_ifinfo_put(router_gw_tq);
-       if (router_orig_tq)
-               batadv_neigh_ifinfo_put(router_orig_tq);
 }
 
 /**
@@ -472,9 +366,8 @@ static void batadv_gw_node_add(struct batadv_priv *bat_priv,
  *
  * Return: gateway node if found or NULL otherwise.
  */
-static struct batadv_gw_node *
-batadv_gw_node_get(struct batadv_priv *bat_priv,
-                  struct batadv_orig_node *orig_node)
+struct batadv_gw_node *batadv_gw_node_get(struct batadv_priv *bat_priv,
+                                         struct batadv_orig_node *orig_node)
 {
        struct batadv_gw_node *gw_node_tmp, *gw_node = NULL;
 
@@ -585,81 +478,85 @@ void batadv_gw_node_free(struct batadv_priv *bat_priv)
        spin_unlock_bh(&bat_priv->gw.list_lock);
 }
 
-/* fails if orig_node has no router */
-static int batadv_write_buffer_text(struct batadv_priv *bat_priv,
-                                   struct seq_file *seq,
-                                   const struct batadv_gw_node *gw_node)
+int batadv_gw_client_seq_print_text(struct seq_file *seq, void *offset)
 {
-       struct batadv_gw_node *curr_gw;
-       struct batadv_neigh_node *router;
-       struct batadv_neigh_ifinfo *router_ifinfo = NULL;
-       int ret = -1;
+       struct net_device *net_dev = (struct net_device *)seq->private;
+       struct batadv_priv *bat_priv = netdev_priv(net_dev);
+       struct batadv_hard_iface *primary_if;
 
-       router = batadv_orig_router_get(gw_node->orig_node, BATADV_IF_DEFAULT);
-       if (!router)
-               goto out;
+       primary_if = batadv_seq_print_text_primary_if_get(seq);
+       if (!primary_if)
+               return 0;
 
-       router_ifinfo = batadv_neigh_ifinfo_get(router, BATADV_IF_DEFAULT);
-       if (!router_ifinfo)
-               goto out;
+       seq_printf(seq, "[B.A.T.M.A.N. adv %s, MainIF/MAC: %s/%pM (%s %s)]\n",
+                  BATADV_SOURCE_VERSION, primary_if->net_dev->name,
+                  primary_if->net_dev->dev_addr, net_dev->name,
+                  bat_priv->algo_ops->name);
 
-       curr_gw = batadv_gw_get_selected_gw_node(bat_priv);
+       batadv_hardif_put(primary_if);
 
-       seq_printf(seq, "%s %pM (%3i) %pM [%10s]: %u.%u/%u.%u MBit\n",
-                  (curr_gw == gw_node ? "=>" : "  "),
-                  gw_node->orig_node->orig,
-                  router_ifinfo->bat_iv.tq_avg, router->addr,
-                  router->if_incoming->net_dev->name,
-                  gw_node->bandwidth_down / 10,
-                  gw_node->bandwidth_down % 10,
-                  gw_node->bandwidth_up / 10,
-                  gw_node->bandwidth_up % 10);
-       ret = seq_has_overflowed(seq) ? -1 : 0;
+       if (!bat_priv->algo_ops->gw.print) {
+               seq_puts(seq,
+                        "No printing function for this routing protocol\n");
+               return 0;
+       }
 
-       if (curr_gw)
-               batadv_gw_node_put(curr_gw);
-out:
-       if (router_ifinfo)
-               batadv_neigh_ifinfo_put(router_ifinfo);
-       if (router)
-               batadv_neigh_node_put(router);
-       return ret;
+       bat_priv->algo_ops->gw.print(bat_priv, seq);
+
+       return 0;
 }
 
-int batadv_gw_client_seq_print_text(struct seq_file *seq, void *offset)
+/**
+ * batadv_gw_dump - Dump gateways into a message
+ * @msg: Netlink message to dump into
+ * @cb: Control block containing additional options
+ *
+ * Return: Error code, or length of message
+ */
+int batadv_gw_dump(struct sk_buff *msg, struct netlink_callback *cb)
 {
-       struct net_device *net_dev = (struct net_device *)seq->private;
-       struct batadv_priv *bat_priv = netdev_priv(net_dev);
-       struct batadv_hard_iface *primary_if;
-       struct batadv_gw_node *gw_node;
-       int gw_count = 0;
-
-       primary_if = batadv_seq_print_text_primary_if_get(seq);
-       if (!primary_if)
+       struct batadv_hard_iface *primary_if = NULL;
+       struct net *net = sock_net(cb->skb->sk);
+       struct net_device *soft_iface;
+       struct batadv_priv *bat_priv;
+       int ifindex;
+       int ret;
+
+       ifindex = batadv_netlink_get_ifindex(cb->nlh,
+                                            BATADV_ATTR_MESH_IFINDEX);
+       if (!ifindex)
+               return -EINVAL;
+
+       soft_iface = dev_get_by_index(net, ifindex);
+       if (!soft_iface || !batadv_softif_is_valid(soft_iface)) {
+               ret = -ENODEV;
                goto out;
+       }
 
-       seq_printf(seq,
-                  "      Gateway      (#/255)           Nexthop [outgoingIF]: advertised uplink bandwidth ... [B.A.T.M.A.N. adv %s, MainIF/MAC: %s/%pM (%s)]\n",
-                  BATADV_SOURCE_VERSION, primary_if->net_dev->name,
-                  primary_if->net_dev->dev_addr, net_dev->name);
+       bat_priv = netdev_priv(soft_iface);
 
-       rcu_read_lock();
-       hlist_for_each_entry_rcu(gw_node, &bat_priv->gw.list, list) {
-               /* fails if orig_node has no router */
-               if (batadv_write_buffer_text(bat_priv, seq, gw_node) < 0)
-                       continue;
+       primary_if = batadv_primary_if_get_selected(bat_priv);
+       if (!primary_if || primary_if->if_status != BATADV_IF_ACTIVE) {
+               ret = -ENOENT;
+               goto out;
+       }
 
-               gw_count++;
+       if (!bat_priv->algo_ops->gw.dump) {
+               ret = -EOPNOTSUPP;
+               goto out;
        }
-       rcu_read_unlock();
 
-       if (gw_count == 0)
-               seq_puts(seq, "No gateways in range ...\n");
+       bat_priv->algo_ops->gw.dump(msg, cb, bat_priv);
+
+       ret = msg->len;
 
 out:
        if (primary_if)
                batadv_hardif_put(primary_if);
-       return 0;
+       if (soft_iface)
+               dev_put(soft_iface);
+
+       return ret;
 }
 
 /**
index 582dd8c..859166d 100644 (file)
@@ -23,6 +23,7 @@
 #include <linux/types.h>
 
 struct batadv_tvlv_gateway_data;
+struct netlink_callback;
 struct seq_file;
 struct sk_buff;
 
@@ -39,10 +40,16 @@ void batadv_gw_node_update(struct batadv_priv *bat_priv,
 void batadv_gw_node_delete(struct batadv_priv *bat_priv,
                           struct batadv_orig_node *orig_node);
 void batadv_gw_node_free(struct batadv_priv *bat_priv);
+void batadv_gw_node_put(struct batadv_gw_node *gw_node);
+struct batadv_gw_node *
+batadv_gw_get_selected_gw_node(struct batadv_priv *bat_priv);
 int batadv_gw_client_seq_print_text(struct seq_file *seq, void *offset);
+int batadv_gw_dump(struct sk_buff *msg, struct netlink_callback *cb);
 bool batadv_gw_out_of_range(struct batadv_priv *bat_priv, struct sk_buff *skb);
 enum batadv_dhcp_recipient
 batadv_gw_dhcp_recipient_get(struct sk_buff *skb, unsigned int *header_len,
                             u8 *chaddr);
+struct batadv_gw_node *batadv_gw_node_get(struct batadv_priv *bat_priv,
+                                         struct batadv_orig_node *orig_node);
 
 #endif /* _NET_BATMAN_ADV_GATEWAY_CLIENT_H_ */
index d7bc6a8..2118481 100644 (file)
@@ -241,10 +241,9 @@ static void batadv_gw_tvlv_ogm_handler_v1(struct batadv_priv *bat_priv,
 
        batadv_gw_node_update(bat_priv, orig, &gateway);
 
-       /* restart gateway selection if fast or late switching was enabled */
+       /* restart gateway selection */
        if ((gateway.bandwidth_down != 0) &&
-           (atomic_read(&bat_priv->gw.mode) == BATADV_GW_MODE_CLIENT) &&
-           (atomic_read(&bat_priv->gw.sel_class) > 2))
+           (atomic_read(&bat_priv->gw.mode) == BATADV_GW_MODE_CLIENT))
                batadv_gw_check_election(bat_priv, orig);
 }
 
index 1f90808..43c9a3e 100644 (file)
@@ -35,7 +35,8 @@
 #include <linux/rtnetlink.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
-#include <linux/workqueue.h>
+#include <net/net_namespace.h>
+#include <net/rtnetlink.h>
 
 #include "bat_v.h"
 #include "bridge_loop_avoidance.h"
@@ -84,26 +85,56 @@ out:
        return hard_iface;
 }
 
+/**
+ * batadv_getlink_net - return link net namespace (of use fallback)
+ * @netdev: net_device to check
+ * @fallback_net: return in case get_link_net is not available for @netdev
+ *
+ * Return: result of rtnl_link_ops->get_link_net or @fallback_net
+ */
+static const struct net *batadv_getlink_net(const struct net_device *netdev,
+                                           const struct net *fallback_net)
+{
+       if (!netdev->rtnl_link_ops)
+               return fallback_net;
+
+       if (!netdev->rtnl_link_ops->get_link_net)
+               return fallback_net;
+
+       return netdev->rtnl_link_ops->get_link_net(netdev);
+}
+
 /**
  * batadv_mutual_parents - check if two devices are each others parent
- * @dev1: 1st net_device
- * @dev2: 2nd net_device
+ * @dev1: 1st net dev
+ * @net1: 1st devices netns
+ * @dev2: 2nd net dev
+ * @net2: 2nd devices netns
  *
  * veth devices come in pairs and each is the parent of the other!
  *
  * Return: true if the devices are each others parent, otherwise false
  */
 static bool batadv_mutual_parents(const struct net_device *dev1,
-                                 const struct net_device *dev2)
+                                 const struct net *net1,
+                                 const struct net_device *dev2,
+                                 const struct net *net2)
 {
        int dev1_parent_iflink = dev_get_iflink(dev1);
        int dev2_parent_iflink = dev_get_iflink(dev2);
+       const struct net *dev1_parent_net;
+       const struct net *dev2_parent_net;
+
+       dev1_parent_net = batadv_getlink_net(dev1, net1);
+       dev2_parent_net = batadv_getlink_net(dev2, net2);
 
        if (!dev1_parent_iflink || !dev2_parent_iflink)
                return false;
 
        return (dev1_parent_iflink == dev2->ifindex) &&
-              (dev2_parent_iflink == dev1->ifindex);
+              (dev2_parent_iflink == dev1->ifindex) &&
+              net_eq(dev1_parent_net, net2) &&
+              net_eq(dev2_parent_net, net1);
 }
 
 /**
@@ -121,8 +152,9 @@ static bool batadv_mutual_parents(const struct net_device *dev1,
  */
 static bool batadv_is_on_batman_iface(const struct net_device *net_dev)
 {
-       struct net_device *parent_dev;
        struct net *net = dev_net(net_dev);
+       struct net_device *parent_dev;
+       const struct net *parent_net;
        bool ret;
 
        /* check if this is a batman-adv mesh interface */
@@ -134,13 +166,16 @@ static bool batadv_is_on_batman_iface(const struct net_device *net_dev)
            dev_get_iflink(net_dev) == net_dev->ifindex)
                return false;
 
+       parent_net = batadv_getlink_net(net_dev, net);
+
        /* recurse over the parent device */
-       parent_dev = __dev_get_by_index(net, dev_get_iflink(net_dev));
+       parent_dev = __dev_get_by_index((struct net *)parent_net,
+                                       dev_get_iflink(net_dev));
        /* if we got a NULL parent_dev there is something broken.. */
        if (WARN(!parent_dev, "Cannot find parent device"))
                return false;
 
-       if (batadv_mutual_parents(net_dev, parent_dev))
+       if (batadv_mutual_parents(net_dev, net, parent_dev, parent_net))
                return false;
 
        ret = batadv_is_on_batman_iface(parent_dev);
@@ -625,25 +660,6 @@ out:
                batadv_hardif_put(primary_if);
 }
 
-/**
- * batadv_hardif_remove_interface_finish - cleans up the remains of a hardif
- * @work: work queue item
- *
- * Free the parts of the hard interface which can not be removed under
- * rtnl lock (to prevent deadlock situations).
- */
-static void batadv_hardif_remove_interface_finish(struct work_struct *work)
-{
-       struct batadv_hard_iface *hard_iface;
-
-       hard_iface = container_of(work, struct batadv_hard_iface,
-                                 cleanup_work);
-
-       batadv_debugfs_del_hardif(hard_iface);
-       batadv_sysfs_del_hardif(&hard_iface->hardif_obj);
-       batadv_hardif_put(hard_iface);
-}
-
 static struct batadv_hard_iface *
 batadv_hardif_add_interface(struct net_device *net_dev)
 {
@@ -676,8 +692,6 @@ batadv_hardif_add_interface(struct net_device *net_dev)
 
        INIT_LIST_HEAD(&hard_iface->list);
        INIT_HLIST_HEAD(&hard_iface->neigh_list);
-       INIT_WORK(&hard_iface->cleanup_work,
-                 batadv_hardif_remove_interface_finish);
 
        spin_lock_init(&hard_iface->neigh_list_lock);
 
@@ -719,7 +733,9 @@ static void batadv_hardif_remove_interface(struct batadv_hard_iface *hard_iface)
                return;
 
        hard_iface->if_status = BATADV_IF_TO_BE_REMOVED;
-       queue_work(batadv_event_workqueue, &hard_iface->cleanup_work);
+       batadv_debugfs_del_hardif(hard_iface);
+       batadv_sysfs_del_hardif(&hard_iface->hardif_obj);
+       batadv_hardif_put(hard_iface);
 }
 
 void batadv_hardif_remove_interfaces(void)
index fe4c5e2..ef07e5b 100644 (file)
@@ -82,6 +82,12 @@ static void batadv_recv_handler_init(void);
 
 static int __init batadv_init(void)
 {
+       int ret;
+
+       ret = batadv_tt_cache_init();
+       if (ret < 0)
+               return ret;
+
        INIT_LIST_HEAD(&batadv_hardif_list);
        batadv_algo_init();
 
@@ -93,9 +99,8 @@ static int __init batadv_init(void)
        batadv_tp_meter_init();
 
        batadv_event_workqueue = create_singlethread_workqueue("bat_events");
-
        if (!batadv_event_workqueue)
-               return -ENOMEM;
+               goto err_create_wq;
 
        batadv_socket_init();
        batadv_debugfs_init();
@@ -108,6 +113,11 @@ static int __init batadv_init(void)
                BATADV_SOURCE_VERSION, BATADV_COMPAT_VERSION);
 
        return 0;
+
+err_create_wq:
+       batadv_tt_cache_destroy();
+
+       return -ENOMEM;
 }
 
 static void __exit batadv_exit(void)
@@ -123,6 +133,8 @@ static void __exit batadv_exit(void)
        batadv_event_workqueue = NULL;
 
        rcu_barrier();
+
+       batadv_tt_cache_destroy();
 }
 
 int batadv_mesh_init(struct net_device *soft_iface)
@@ -638,3 +650,4 @@ MODULE_AUTHOR(BATADV_DRIVER_AUTHOR);
 MODULE_DESCRIPTION(BATADV_DRIVER_DESC);
 MODULE_SUPPORTED_DEVICE(BATADV_DRIVER_DEVICE);
 MODULE_VERSION(BATADV_SOURCE_VERSION);
+MODULE_ALIAS_RTNL_LINK("batadv");
index 06a8608..09af21e 100644 (file)
@@ -24,7 +24,7 @@
 #define BATADV_DRIVER_DEVICE "batman-adv"
 
 #ifndef BATADV_SOURCE_VERSION
-#define BATADV_SOURCE_VERSION "2016.3"
+#define BATADV_SOURCE_VERSION "2016.4"
 #endif
 
 /* B.A.T.M.A.N. parameters */
index cc91507..894df60 100644 (file)
@@ -528,7 +528,7 @@ update:
        }
 
        return !(mcast_data.flags &
-                (BATADV_MCAST_WANT_ALL_IPV4 + BATADV_MCAST_WANT_ALL_IPV6));
+                (BATADV_MCAST_WANT_ALL_IPV4 | BATADV_MCAST_WANT_ALL_IPV6));
 }
 
 /**
index 231f8ea..18831e7 100644 (file)
@@ -18,6 +18,8 @@
 #include "netlink.h"
 #include "main.h"
 
+#include <linux/atomic.h>
+#include <linux/byteorder/generic.h>
 #include <linux/errno.h>
 #include <linux/fs.h>
 #include <linux/genetlink.h>
 #include <linux/netdevice.h>
 #include <linux/netlink.h>
 #include <linux/printk.h>
+#include <linux/rculist.h>
+#include <linux/rcupdate.h>
+#include <linux/skbuff.h>
 #include <linux/stddef.h>
 #include <linux/types.h>
 #include <net/genetlink.h>
 #include <net/netlink.h>
+#include <net/sock.h>
 #include <uapi/linux/batman_adv.h>
 
+#include "bat_algo.h"
+#include "bridge_loop_avoidance.h"
+#include "gateway_client.h"
 #include "hard-interface.h"
+#include "originator.h"
+#include "packet.h"
 #include "soft-interface.h"
 #include "tp_meter.h"
+#include "translation-table.h"
 
-struct sk_buff;
-
-static struct genl_family batadv_netlink_family = {
+struct genl_family batadv_netlink_family = {
        .id = GENL_ID_GENERATE,
        .hdrsize = 0,
        .name = BATADV_NL_NAME,
        .version = 1,
        .maxattr = BATADV_ATTR_MAX,
+       .netnsok = true,
 };
 
 /* multicast groups */
@@ -69,8 +80,43 @@ static struct nla_policy batadv_netlink_policy[NUM_BATADV_ATTR] = {
        [BATADV_ATTR_TPMETER_TEST_TIME] = { .type = NLA_U32 },
        [BATADV_ATTR_TPMETER_BYTES]     = { .type = NLA_U64 },
        [BATADV_ATTR_TPMETER_COOKIE]    = { .type = NLA_U32 },
+       [BATADV_ATTR_ACTIVE]            = { .type = NLA_FLAG },
+       [BATADV_ATTR_TT_ADDRESS]        = { .len = ETH_ALEN },
+       [BATADV_ATTR_TT_TTVN]           = { .type = NLA_U8 },
+       [BATADV_ATTR_TT_LAST_TTVN]      = { .type = NLA_U8 },
+       [BATADV_ATTR_TT_CRC32]          = { .type = NLA_U32 },
+       [BATADV_ATTR_TT_VID]            = { .type = NLA_U16 },
+       [BATADV_ATTR_TT_FLAGS]          = { .type = NLA_U32 },
+       [BATADV_ATTR_FLAG_BEST]         = { .type = NLA_FLAG },
+       [BATADV_ATTR_LAST_SEEN_MSECS]   = { .type = NLA_U32 },
+       [BATADV_ATTR_NEIGH_ADDRESS]     = { .len = ETH_ALEN },
+       [BATADV_ATTR_TQ]                = { .type = NLA_U8 },
+       [BATADV_ATTR_THROUGHPUT]        = { .type = NLA_U32 },
+       [BATADV_ATTR_BANDWIDTH_UP]      = { .type = NLA_U32 },
+       [BATADV_ATTR_BANDWIDTH_DOWN]    = { .type = NLA_U32 },
+       [BATADV_ATTR_ROUTER]            = { .len = ETH_ALEN },
+       [BATADV_ATTR_BLA_OWN]           = { .type = NLA_FLAG },
+       [BATADV_ATTR_BLA_ADDRESS]       = { .len = ETH_ALEN },
+       [BATADV_ATTR_BLA_VID]           = { .type = NLA_U16 },
+       [BATADV_ATTR_BLA_BACKBONE]      = { .len = ETH_ALEN },
+       [BATADV_ATTR_BLA_CRC]           = { .type = NLA_U16 },
 };
 
+/**
+ * batadv_netlink_get_ifindex - Extract an interface index from a message
+ * @nlh: Message header
+ * @attrtype: Attribute which holds an interface index
+ *
+ * Return: interface index, or 0.
+ */
+int
+batadv_netlink_get_ifindex(const struct nlmsghdr *nlh, int attrtype)
+{
+       struct nlattr *attr = nlmsg_find_attr(nlh, GENL_HDRLEN, attrtype);
+
+       return attr ? nla_get_u32(attr) : 0;
+}
+
 /**
  * batadv_netlink_mesh_info_put - fill in generic information about mesh
  *  interface
@@ -93,8 +139,16 @@ batadv_netlink_mesh_info_put(struct sk_buff *msg, struct net_device *soft_iface)
            nla_put_u32(msg, BATADV_ATTR_MESH_IFINDEX, soft_iface->ifindex) ||
            nla_put_string(msg, BATADV_ATTR_MESH_IFNAME, soft_iface->name) ||
            nla_put(msg, BATADV_ATTR_MESH_ADDRESS, ETH_ALEN,
-                   soft_iface->dev_addr))
+                   soft_iface->dev_addr) ||
+           nla_put_u8(msg, BATADV_ATTR_TT_TTVN,
+                      (u8)atomic_read(&bat_priv->tt.vn)))
+               goto out;
+
+#ifdef CONFIG_BATMAN_ADV_BLA
+       if (nla_put_u16(msg, BATADV_ATTR_BLA_CRC,
+                       ntohs(bat_priv->bla.claim_dest.group)))
                goto out;
+#endif
 
        primary_if = batadv_primary_if_get_selected(bat_priv);
        if (primary_if && primary_if->if_status == BATADV_IF_ACTIVE) {
@@ -380,6 +434,106 @@ out:
        return ret;
 }
 
+/**
+ * batadv_netlink_dump_hardif_entry - Dump one hard interface into a message
+ * @msg: Netlink message to dump into
+ * @portid: Port making netlink request
+ * @seq: Sequence number of netlink message
+ * @hard_iface: Hard interface to dump
+ *
+ * Return: error code, or 0 on success
+ */
+static int
+batadv_netlink_dump_hardif_entry(struct sk_buff *msg, u32 portid, u32 seq,
+                                struct batadv_hard_iface *hard_iface)
+{
+       struct net_device *net_dev = hard_iface->net_dev;
+       void *hdr;
+
+       hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family, NLM_F_MULTI,
+                         BATADV_CMD_GET_HARDIFS);
+       if (!hdr)
+               return -EMSGSIZE;
+
+       if (nla_put_u32(msg, BATADV_ATTR_HARD_IFINDEX,
+                       net_dev->ifindex) ||
+           nla_put_string(msg, BATADV_ATTR_HARD_IFNAME,
+                          net_dev->name) ||
+           nla_put(msg, BATADV_ATTR_HARD_ADDRESS, ETH_ALEN,
+                   net_dev->dev_addr))
+               goto nla_put_failure;
+
+       if (hard_iface->if_status == BATADV_IF_ACTIVE) {
+               if (nla_put_flag(msg, BATADV_ATTR_ACTIVE))
+                       goto nla_put_failure;
+       }
+
+       genlmsg_end(msg, hdr);
+       return 0;
+
+ nla_put_failure:
+       genlmsg_cancel(msg, hdr);
+       return -EMSGSIZE;
+}
+
+/**
+ * batadv_netlink_dump_hardifs - Dump all hard interface into a messages
+ * @msg: Netlink message to dump into
+ * @cb: Parameters from query
+ *
+ * Return: error code, or length of reply message on success
+ */
+static int
+batadv_netlink_dump_hardifs(struct sk_buff *msg, struct netlink_callback *cb)
+{
+       struct net *net = sock_net(cb->skb->sk);
+       struct net_device *soft_iface;
+       struct batadv_hard_iface *hard_iface;
+       int ifindex;
+       int portid = NETLINK_CB(cb->skb).portid;
+       int seq = cb->nlh->nlmsg_seq;
+       int skip = cb->args[0];
+       int i = 0;
+
+       ifindex = batadv_netlink_get_ifindex(cb->nlh,
+                                            BATADV_ATTR_MESH_IFINDEX);
+       if (!ifindex)
+               return -EINVAL;
+
+       soft_iface = dev_get_by_index(net, ifindex);
+       if (!soft_iface)
+               return -ENODEV;
+
+       if (!batadv_softif_is_valid(soft_iface)) {
+               dev_put(soft_iface);
+               return -ENODEV;
+       }
+
+       rcu_read_lock();
+
+       list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) {
+               if (hard_iface->soft_iface != soft_iface)
+                       continue;
+
+               if (i++ < skip)
+                       continue;
+
+               if (batadv_netlink_dump_hardif_entry(msg, portid, seq,
+                                                    hard_iface)) {
+                       i--;
+                       break;
+               }
+       }
+
+       rcu_read_unlock();
+
+       dev_put(soft_iface);
+
+       cb->args[0] = i;
+
+       return msg->len;
+}
+
 static struct genl_ops batadv_netlink_ops[] = {
        {
                .cmd = BATADV_CMD_GET_MESH_INFO,
@@ -399,6 +553,61 @@ static struct genl_ops batadv_netlink_ops[] = {
                .policy = batadv_netlink_policy,
                .doit = batadv_netlink_tp_meter_cancel,
        },
+       {
+               .cmd = BATADV_CMD_GET_ROUTING_ALGOS,
+               .flags = GENL_ADMIN_PERM,
+               .policy = batadv_netlink_policy,
+               .dumpit = batadv_algo_dump,
+       },
+       {
+               .cmd = BATADV_CMD_GET_HARDIFS,
+               .flags = GENL_ADMIN_PERM,
+               .policy = batadv_netlink_policy,
+               .dumpit = batadv_netlink_dump_hardifs,
+       },
+       {
+               .cmd = BATADV_CMD_GET_TRANSTABLE_LOCAL,
+               .flags = GENL_ADMIN_PERM,
+               .policy = batadv_netlink_policy,
+               .dumpit = batadv_tt_local_dump,
+       },
+       {
+               .cmd = BATADV_CMD_GET_TRANSTABLE_GLOBAL,
+               .flags = GENL_ADMIN_PERM,
+               .policy = batadv_netlink_policy,
+               .dumpit = batadv_tt_global_dump,
+       },
+       {
+               .cmd = BATADV_CMD_GET_ORIGINATORS,
+               .flags = GENL_ADMIN_PERM,
+               .policy = batadv_netlink_policy,
+               .dumpit = batadv_orig_dump,
+       },
+       {
+               .cmd = BATADV_CMD_GET_NEIGHBORS,
+               .flags = GENL_ADMIN_PERM,
+               .policy = batadv_netlink_policy,
+               .dumpit = batadv_hardif_neigh_dump,
+       },
+       {
+               .cmd = BATADV_CMD_GET_GATEWAYS,
+               .flags = GENL_ADMIN_PERM,
+               .policy = batadv_netlink_policy,
+               .dumpit = batadv_gw_dump,
+       },
+       {
+               .cmd = BATADV_CMD_GET_BLA_CLAIM,
+               .flags = GENL_ADMIN_PERM,
+               .policy = batadv_netlink_policy,
+               .dumpit = batadv_bla_claim_dump,
+       },
+       {
+               .cmd = BATADV_CMD_GET_BLA_BACKBONE,
+               .flags = GENL_ADMIN_PERM,
+               .policy = batadv_netlink_policy,
+               .dumpit = batadv_bla_backbone_dump,
+       },
+
 };
 
 /**
index 945653a..52eb162 100644 (file)
 #include "main.h"
 
 #include <linux/types.h>
+#include <net/genetlink.h>
+
+struct nlmsghdr;
 
 void batadv_netlink_register(void);
 void batadv_netlink_unregister(void);
+int batadv_netlink_get_ifindex(const struct nlmsghdr *nlh, int attrtype);
 
 int batadv_netlink_tpmeter_notify(struct batadv_priv *bat_priv, const u8 *dst,
                                  u8 result, u32 test_time, u64 total_bytes,
                                  u32 cookie);
 
+extern struct genl_family batadv_netlink_family;
+
 #endif /* _NET_BATMAN_ADV_NETLINK_H_ */
index 3940b5d..95c8555 100644 (file)
 #include <linux/list.h>
 #include <linux/lockdep.h>
 #include <linux/netdevice.h>
+#include <linux/netlink.h>
 #include <linux/rculist.h>
 #include <linux/seq_file.h>
+#include <linux/skbuff.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
 #include <linux/workqueue.h>
+#include <net/sock.h>
+#include <uapi/linux/batman_adv.h>
 
 #include "bat_algo.h"
 #include "distributed-arp-table.h"
 #include "hash.h"
 #include "log.h"
 #include "multicast.h"
+#include "netlink.h"
 #include "network-coding.h"
 #include "routing.h"
+#include "soft-interface.h"
 #include "translation-table.h"
 
 /* hash class keys */
@@ -720,6 +726,83 @@ int batadv_hardif_neigh_seq_print_text(struct seq_file *seq, void *offset)
        return 0;
 }
 
+/**
+ * batadv_hardif_neigh_dump - Dump to netlink the neighbor infos for a specific
+ *  outgoing interface
+ * @msg: message to dump into
+ * @cb: parameters for the dump
+ *
+ * Return: 0 or error value
+ */
+int batadv_hardif_neigh_dump(struct sk_buff *msg, struct netlink_callback *cb)
+{
+       struct net *net = sock_net(cb->skb->sk);
+       struct net_device *soft_iface;
+       struct net_device *hard_iface = NULL;
+       struct batadv_hard_iface *hardif = BATADV_IF_DEFAULT;
+       struct batadv_priv *bat_priv;
+       struct batadv_hard_iface *primary_if = NULL;
+       int ret;
+       int ifindex, hard_ifindex;
+
+       ifindex = batadv_netlink_get_ifindex(cb->nlh, BATADV_ATTR_MESH_IFINDEX);
+       if (!ifindex)
+               return -EINVAL;
+
+       soft_iface = dev_get_by_index(net, ifindex);
+       if (!soft_iface || !batadv_softif_is_valid(soft_iface)) {
+               ret = -ENODEV;
+               goto out;
+       }
+
+       bat_priv = netdev_priv(soft_iface);
+
+       primary_if = batadv_primary_if_get_selected(bat_priv);
+       if (!primary_if || primary_if->if_status != BATADV_IF_ACTIVE) {
+               ret = -ENOENT;
+               goto out;
+       }
+
+       hard_ifindex = batadv_netlink_get_ifindex(cb->nlh,
+                                                 BATADV_ATTR_HARD_IFINDEX);
+       if (hard_ifindex) {
+               hard_iface = dev_get_by_index(net, hard_ifindex);
+               if (hard_iface)
+                       hardif = batadv_hardif_get_by_netdev(hard_iface);
+
+               if (!hardif) {
+                       ret = -ENODEV;
+                       goto out;
+               }
+
+               if (hardif->soft_iface != soft_iface) {
+                       ret = -ENOENT;
+                       goto out;
+               }
+       }
+
+       if (!bat_priv->algo_ops->neigh.dump) {
+               ret = -EOPNOTSUPP;
+               goto out;
+       }
+
+       bat_priv->algo_ops->neigh.dump(msg, cb, bat_priv, hardif);
+
+       ret = msg->len;
+
+ out:
+       if (hardif)
+               batadv_hardif_put(hardif);
+       if (hard_iface)
+               dev_put(hard_iface);
+       if (primary_if)
+               batadv_hardif_put(primary_if);
+       if (soft_iface)
+               dev_put(soft_iface);
+
+       return ret;
+}
+
 /**
  * batadv_orig_ifinfo_release - release orig_ifinfo from lists and queue for
  *  free after rcu grace period
@@ -1330,6 +1413,83 @@ out:
        return 0;
 }
 
+/**
+ * batadv_orig_dump - Dump to netlink the originator infos for a specific
+ *  outgoing interface
+ * @msg: message to dump into
+ * @cb: parameters for the dump
+ *
+ * Return: 0 or error value
+ */
+int batadv_orig_dump(struct sk_buff *msg, struct netlink_callback *cb)
+{
+       struct net *net = sock_net(cb->skb->sk);
+       struct net_device *soft_iface;
+       struct net_device *hard_iface = NULL;
+       struct batadv_hard_iface *hardif = BATADV_IF_DEFAULT;
+       struct batadv_priv *bat_priv;
+       struct batadv_hard_iface *primary_if = NULL;
+       int ret;
+       int ifindex, hard_ifindex;
+
+       ifindex = batadv_netlink_get_ifindex(cb->nlh, BATADV_ATTR_MESH_IFINDEX);
+       if (!ifindex)
+               return -EINVAL;
+
+       soft_iface = dev_get_by_index(net, ifindex);
+       if (!soft_iface || !batadv_softif_is_valid(soft_iface)) {
+               ret = -ENODEV;
+               goto out;
+       }
+
+       bat_priv = netdev_priv(soft_iface);
+
+       primary_if = batadv_primary_if_get_selected(bat_priv);
+       if (!primary_if || primary_if->if_status != BATADV_IF_ACTIVE) {
+               ret = -ENOENT;
+               goto out;
+       }
+
+       hard_ifindex = batadv_netlink_get_ifindex(cb->nlh,
+                                                 BATADV_ATTR_HARD_IFINDEX);
+       if (hard_ifindex) {
+               hard_iface = dev_get_by_index(net, hard_ifindex);
+               if (hard_iface)
+                       hardif = batadv_hardif_get_by_netdev(hard_iface);
+
+               if (!hardif) {
+                       ret = -ENODEV;
+                       goto out;
+               }
+
+               if (hardif->soft_iface != soft_iface) {
+                       ret = -ENOENT;
+                       goto out;
+               }
+       }
+
+       if (!bat_priv->algo_ops->orig.dump) {
+               ret = -EOPNOTSUPP;
+               goto out;
+       }
+
+       bat_priv->algo_ops->orig.dump(msg, cb, bat_priv, hardif);
+
+       ret = msg->len;
+
+ out:
+       if (hardif)
+               batadv_hardif_put(hardif);
+       if (hard_iface)
+               dev_put(hard_iface);
+       if (primary_if)
+               batadv_hardif_put(primary_if);
+       if (soft_iface)
+               dev_put(soft_iface);
+
+       return ret;
+}
+
 int batadv_orig_hash_add_if(struct batadv_hard_iface *hard_iface,
                            int max_if_num)
 {
index 566306b..ebc5618 100644 (file)
@@ -31,7 +31,9 @@
 
 #include "hash.h"
 
+struct netlink_callback;
 struct seq_file;
+struct sk_buff;
 
 bool batadv_compare_orig(const struct hlist_node *node, const void *data2);
 int batadv_originator_init(struct batadv_priv *bat_priv);
@@ -61,6 +63,7 @@ batadv_neigh_ifinfo_get(struct batadv_neigh_node *neigh,
                        struct batadv_hard_iface *if_outgoing);
 void batadv_neigh_ifinfo_put(struct batadv_neigh_ifinfo *neigh_ifinfo);
 
+int batadv_hardif_neigh_dump(struct sk_buff *msg, struct netlink_callback *cb);
 int batadv_hardif_neigh_seq_print_text(struct seq_file *seq, void *offset);
 
 struct batadv_orig_ifinfo *
@@ -72,6 +75,7 @@ batadv_orig_ifinfo_new(struct batadv_orig_node *orig_node,
 void batadv_orig_ifinfo_put(struct batadv_orig_ifinfo *orig_ifinfo);
 
 int batadv_orig_seq_print_text(struct seq_file *seq, void *offset);
+int batadv_orig_dump(struct sk_buff *msg, struct netlink_callback *cb);
 int batadv_orig_hardif_seq_print_text(struct seq_file *seq, void *offset);
 int batadv_orig_hash_add_if(struct batadv_hard_iface *hard_iface,
                            int max_if_num);
index 6b011ff..6afc0b8 100644 (file)
@@ -128,42 +128,6 @@ enum batadv_tt_data_flags {
        BATADV_TT_FULL_TABLE = BIT(4),
 };
 
-/**
- * enum batadv_tt_client_flags - TT client specific flags
- * @BATADV_TT_CLIENT_DEL: the client has to be deleted from the table
- * @BATADV_TT_CLIENT_ROAM: the client roamed to/from another node and the new
- *  update telling its new real location has not been received/sent yet
- * @BATADV_TT_CLIENT_WIFI: this client is connected through a wifi interface.
- *  This information is used by the "AP Isolation" feature
- * @BATADV_TT_CLIENT_ISOLA: this client is considered "isolated". This
- *  information is used by the Extended Isolation feature
- * @BATADV_TT_CLIENT_NOPURGE: this client should never be removed from the table
- * @BATADV_TT_CLIENT_NEW: this client has been added to the local table but has
- *  not been announced yet
- * @BATADV_TT_CLIENT_PENDING: this client is marked for removal but it is kept
- *  in the table for one more originator interval for consistency purposes
- * @BATADV_TT_CLIENT_TEMP: this global client has been detected to be part of
- *  the network but no nnode has already announced it
- *
- * Bits from 0 to 7 are called _remote flags_ because they are sent on the wire.
- * Bits from 8 to 15 are called _local flags_ because they are used for local
- * computations only.
- *
- * Bits from 4 to 7 - a subset of remote flags - are ensured to be in sync with
- * the other nodes in the network. To achieve this goal these flags are included
- * in the TT CRC computation.
- */
-enum batadv_tt_client_flags {
-       BATADV_TT_CLIENT_DEL     = BIT(0),
-       BATADV_TT_CLIENT_ROAM    = BIT(1),
-       BATADV_TT_CLIENT_WIFI    = BIT(4),
-       BATADV_TT_CLIENT_ISOLA   = BIT(5),
-       BATADV_TT_CLIENT_NOPURGE = BIT(8),
-       BATADV_TT_CLIENT_NEW     = BIT(9),
-       BATADV_TT_CLIENT_PENDING = BIT(10),
-       BATADV_TT_CLIENT_TEMP    = BIT(11),
-};
-
 /**
  * enum batadv_vlan_flags - flags for the four MSB of any vlan ID field
  * @BATADV_VLAN_HAS_TAG: whether the field contains a valid vlan tag or not
index 7602c00..610f2c4 100644 (file)
@@ -74,11 +74,23 @@ static void _batadv_update_route(struct batadv_priv *bat_priv,
        if (!orig_ifinfo)
                return;
 
-       rcu_read_lock();
-       curr_router = rcu_dereference(orig_ifinfo->router);
-       if (curr_router && !kref_get_unless_zero(&curr_router->refcount))
-               curr_router = NULL;
-       rcu_read_unlock();
+       spin_lock_bh(&orig_node->neigh_list_lock);
+       /* curr_router used earlier may not be the current orig_ifinfo->router
+        * anymore because it was dereferenced outside of the neigh_list_lock
+        * protected region. After the new best neighbor has replace the current
+        * best neighbor the reference counter needs to decrease. Consequently,
+        * the code needs to ensure the curr_router variable contains a pointer
+        * to the replaced best neighbor.
+        */
+       curr_router = rcu_dereference_protected(orig_ifinfo->router, true);
+
+       /* increase refcount of new best neighbor */
+       if (neigh_node)
+               kref_get(&neigh_node->refcount);
+
+       rcu_assign_pointer(orig_ifinfo->router, neigh_node);
+       spin_unlock_bh(&orig_node->neigh_list_lock);
+       batadv_orig_ifinfo_put(orig_ifinfo);
 
        /* route deleted */
        if ((curr_router) && (!neigh_node)) {
@@ -100,27 +112,6 @@ static void _batadv_update_route(struct batadv_priv *bat_priv,
                           curr_router->addr);
        }
 
-       if (curr_router)
-               batadv_neigh_node_put(curr_router);
-
-       spin_lock_bh(&orig_node->neigh_list_lock);
-       /* curr_router used earlier may not be the current orig_ifinfo->router
-        * anymore because it was dereferenced outside of the neigh_list_lock
-        * protected region. After the new best neighbor has replace the current
-        * best neighbor the reference counter needs to decrease. Consequently,
-        * the code needs to ensure the curr_router variable contains a pointer
-        * to the replaced best neighbor.
-        */
-       curr_router = rcu_dereference_protected(orig_ifinfo->router, true);
-
-       /* increase refcount of new best neighbor */
-       if (neigh_node)
-               kref_get(&neigh_node->refcount);
-
-       rcu_assign_pointer(orig_ifinfo->router, neigh_node);
-       spin_unlock_bh(&orig_node->neigh_list_lock);
-       batadv_orig_ifinfo_put(orig_ifinfo);
-
        /* decrease refcount of previous best neighbor */
        if (curr_router)
                batadv_neigh_node_put(curr_router);
index 6191159..8d4e1f5 100644 (file)
@@ -315,8 +315,7 @@ out:
  *
  * Wrap the given skb into a batman-adv unicast or unicast-4addr header
  * depending on whether BATADV_UNICAST or BATADV_UNICAST_4ADDR was supplied
- * as packet_type. Then send this frame to the given orig_node and release a
- * reference to this orig_node.
+ * as packet_type. Then send this frame to the given orig_node.
  *
  * Return: NET_XMIT_DROP in case of error or NET_XMIT_SUCCESS otherwise.
  */
@@ -370,8 +369,6 @@ int batadv_send_skb_unicast(struct batadv_priv *bat_priv,
                ret = NET_XMIT_SUCCESS;
 
 out:
-       if (orig_node)
-               batadv_orig_node_put(orig_node);
        if (ret == NET_XMIT_DROP)
                kfree_skb(skb);
        return ret;
@@ -403,6 +400,7 @@ int batadv_send_skb_via_tt_generic(struct batadv_priv *bat_priv,
        struct ethhdr *ethhdr = (struct ethhdr *)skb->data;
        struct batadv_orig_node *orig_node;
        u8 *src, *dst;
+       int ret;
 
        src = ethhdr->h_source;
        dst = ethhdr->h_dest;
@@ -414,8 +412,13 @@ int batadv_send_skb_via_tt_generic(struct batadv_priv *bat_priv,
        }
        orig_node = batadv_transtable_search(bat_priv, src, dst, vid);
 
-       return batadv_send_skb_unicast(bat_priv, skb, packet_type,
-                                      packet_subtype, orig_node, vid);
+       ret = batadv_send_skb_unicast(bat_priv, skb, packet_type,
+                                     packet_subtype, orig_node, vid);
+
+       if (orig_node)
+               batadv_orig_node_put(orig_node);
+
+       return ret;
 }
 
 /**
@@ -433,12 +436,25 @@ int batadv_send_skb_via_gw(struct batadv_priv *bat_priv, struct sk_buff *skb,
                           unsigned short vid)
 {
        struct batadv_orig_node *orig_node;
+       int ret;
 
        orig_node = batadv_gw_get_selected_orig(bat_priv);
-       return batadv_send_skb_unicast(bat_priv, skb, BATADV_UNICAST_4ADDR,
-                                      BATADV_P_DATA, orig_node, vid);
+       ret = batadv_send_skb_unicast(bat_priv, skb, BATADV_UNICAST_4ADDR,
+                                     BATADV_P_DATA, orig_node, vid);
+
+       if (orig_node)
+               batadv_orig_node_put(orig_node);
+
+       return ret;
 }
 
+/**
+ * batadv_forw_packet_free - free a forwarding packet
+ * @forw_packet: The packet to free
+ *
+ * This frees a forwarding packet and releases any resources it might
+ * have claimed.
+ */
 void batadv_forw_packet_free(struct batadv_forw_packet *forw_packet)
 {
        kfree_skb(forw_packet->skb);
@@ -446,9 +462,73 @@ void batadv_forw_packet_free(struct batadv_forw_packet *forw_packet)
                batadv_hardif_put(forw_packet->if_incoming);
        if (forw_packet->if_outgoing)
                batadv_hardif_put(forw_packet->if_outgoing);
+       if (forw_packet->queue_left)
+               atomic_inc(forw_packet->queue_left);
        kfree(forw_packet);
 }
 
+/**
+ * batadv_forw_packet_alloc - allocate a forwarding packet
+ * @if_incoming: The (optional) if_incoming to be grabbed
+ * @if_outgoing: The (optional) if_outgoing to be grabbed
+ * @queue_left: The (optional) queue counter to decrease
+ * @bat_priv: The bat_priv for the mesh of this forw_packet
+ *
+ * Allocates a forwarding packet and tries to get a reference to the
+ * (optional) if_incoming, if_outgoing and queue_left. If queue_left
+ * is NULL then bat_priv is optional, too.
+ *
+ * Return: An allocated forwarding packet on success, NULL otherwise.
+ */
+struct batadv_forw_packet *
+batadv_forw_packet_alloc(struct batadv_hard_iface *if_incoming,
+                        struct batadv_hard_iface *if_outgoing,
+                        atomic_t *queue_left,
+                        struct batadv_priv *bat_priv)
+{
+       struct batadv_forw_packet *forw_packet;
+       const char *qname;
+
+       if (queue_left && !batadv_atomic_dec_not_zero(queue_left)) {
+               qname = "unknown";
+
+               if (queue_left == &bat_priv->bcast_queue_left)
+                       qname = "bcast";
+
+               if (queue_left == &bat_priv->batman_queue_left)
+                       qname = "batman";
+
+               batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
+                          "%s queue is full\n", qname);
+
+               return NULL;
+       }
+
+       forw_packet = kmalloc(sizeof(*forw_packet), GFP_ATOMIC);
+       if (!forw_packet)
+               goto err;
+
+       if (if_incoming)
+               kref_get(&if_incoming->refcount);
+
+       if (if_outgoing)
+               kref_get(&if_outgoing->refcount);
+
+       forw_packet->skb = NULL;
+       forw_packet->queue_left = queue_left;
+       forw_packet->if_incoming = if_incoming;
+       forw_packet->if_outgoing = if_outgoing;
+       forw_packet->num_packets = 0;
+
+       return forw_packet;
+
+err:
+       if (queue_left)
+               atomic_inc(queue_left);
+
+       return NULL;
+}
+
 static void
 _batadv_add_bcast_packet_to_list(struct batadv_priv *bat_priv,
                                 struct batadv_forw_packet *forw_packet,
@@ -487,24 +567,20 @@ int batadv_add_bcast_packet_to_list(struct batadv_priv *bat_priv,
        struct batadv_bcast_packet *bcast_packet;
        struct sk_buff *newskb;
 
-       if (!batadv_atomic_dec_not_zero(&bat_priv->bcast_queue_left)) {
-               batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
-                          "bcast packet queue full\n");
-               goto out;
-       }
-
        primary_if = batadv_primary_if_get_selected(bat_priv);
        if (!primary_if)
-               goto out_and_inc;
-
-       forw_packet = kmalloc(sizeof(*forw_packet), GFP_ATOMIC);
+               goto err;
 
+       forw_packet = batadv_forw_packet_alloc(primary_if, NULL,
+                                              &bat_priv->bcast_queue_left,
+                                              bat_priv);
+       batadv_hardif_put(primary_if);
        if (!forw_packet)
-               goto out_and_inc;
+               goto err;
 
        newskb = skb_copy(skb, GFP_ATOMIC);
        if (!newskb)
-               goto packet_free;
+               goto err_packet_free;
 
        /* as we have a copy now, it is safe to decrease the TTL */
        bcast_packet = (struct batadv_bcast_packet *)newskb->data;
@@ -513,11 +589,6 @@ int batadv_add_bcast_packet_to_list(struct batadv_priv *bat_priv,
        skb_reset_mac_header(newskb);
 
        forw_packet->skb = newskb;
-       forw_packet->if_incoming = primary_if;
-       forw_packet->if_outgoing = NULL;
-
-       /* how often did we send the bcast packet ? */
-       forw_packet->num_packets = 0;
 
        INIT_DELAYED_WORK(&forw_packet->delayed_work,
                          batadv_send_outstanding_bcast_packet);
@@ -525,13 +596,9 @@ int batadv_add_bcast_packet_to_list(struct batadv_priv *bat_priv,
        _batadv_add_bcast_packet_to_list(bat_priv, forw_packet, delay);
        return NETDEV_TX_OK;
 
-packet_free:
-       kfree(forw_packet);
-out_and_inc:
-       atomic_inc(&bat_priv->bcast_queue_left);
-out:
-       if (primary_if)
-               batadv_hardif_put(primary_if);
+err_packet_free:
+       batadv_forw_packet_free(forw_packet);
+err:
        return NETDEV_TX_BUSY;
 }
 
@@ -592,7 +659,6 @@ static void batadv_send_outstanding_bcast_packet(struct work_struct *work)
 
 out:
        batadv_forw_packet_free(forw_packet);
-       atomic_inc(&bat_priv->bcast_queue_left);
 }
 
 void
@@ -633,9 +699,6 @@ batadv_purge_outstanding_packets(struct batadv_priv *bat_priv,
 
                if (pending) {
                        hlist_del(&forw_packet->list);
-                       if (!forw_packet->own)
-                               atomic_inc(&bat_priv->bcast_queue_left);
-
                        batadv_forw_packet_free(forw_packet);
                }
        }
@@ -663,9 +726,6 @@ batadv_purge_outstanding_packets(struct batadv_priv *bat_priv,
 
                if (pending) {
                        hlist_del(&forw_packet->list);
-                       if (!forw_packet->own)
-                               atomic_inc(&bat_priv->batman_queue_left);
-
                        batadv_forw_packet_free(forw_packet);
                }
        }
index 7cecb75..999f786 100644 (file)
 struct sk_buff;
 
 void batadv_forw_packet_free(struct batadv_forw_packet *forw_packet);
+struct batadv_forw_packet *
+batadv_forw_packet_alloc(struct batadv_hard_iface *if_incoming,
+                        struct batadv_hard_iface *if_outgoing,
+                        atomic_t *queue_left,
+                        struct batadv_priv *bat_priv);
+
 int batadv_send_skb_to_orig(struct sk_buff *skb,
                            struct batadv_orig_node *orig_node,
                            struct batadv_hard_iface *recv_if);
index 7527c06..e508bf5 100644 (file)
@@ -39,6 +39,7 @@
 #include <linux/random.h>
 #include <linux/rculist.h>
 #include <linux/rcupdate.h>
+#include <linux/rtnetlink.h>
 #include <linux/skbuff.h>
 #include <linux/slab.h>
 #include <linux/socket.h>
@@ -46,7 +47,6 @@
 #include <linux/stddef.h>
 #include <linux/string.h>
 #include <linux/types.h>
-#include <linux/workqueue.h>
 
 #include "bat_algo.h"
 #include "bridge_loop_avoidance.h"
@@ -57,6 +57,7 @@
 #include "hard-interface.h"
 #include "multicast.h"
 #include "network-coding.h"
+#include "originator.h"
 #include "packet.h"
 #include "send.h"
 #include "sysfs.h"
@@ -377,6 +378,8 @@ dropped:
 dropped_freed:
        batadv_inc_counter(bat_priv, BATADV_CNT_TX_DROPPED);
 end:
+       if (mcast_single_orig)
+               batadv_orig_node_put(mcast_single_orig);
        if (primary_if)
                batadv_hardif_put(primary_if);
        return NETDEV_TX_OK;
@@ -746,34 +749,6 @@ static void batadv_set_lockdep_class(struct net_device *dev)
        netdev_for_each_tx_queue(dev, batadv_set_lockdep_class_one, NULL);
 }
 
-/**
- * batadv_softif_destroy_finish - cleans up the remains of a softif
- * @work: work queue item
- *
- * Free the parts of the soft interface which can not be removed under
- * rtnl lock (to prevent deadlock situations).
- */
-static void batadv_softif_destroy_finish(struct work_struct *work)
-{
-       struct batadv_softif_vlan *vlan;
-       struct batadv_priv *bat_priv;
-       struct net_device *soft_iface;
-
-       bat_priv = container_of(work, struct batadv_priv,
-                               cleanup_work);
-       soft_iface = bat_priv->soft_iface;
-
-       /* destroy the "untagged" VLAN */
-       vlan = batadv_softif_vlan_get(bat_priv, BATADV_NO_FLAGS);
-       if (vlan) {
-               batadv_softif_destroy_vlan(bat_priv, vlan);
-               batadv_softif_vlan_put(vlan);
-       }
-
-       batadv_sysfs_del_meshif(soft_iface);
-       unregister_netdev(soft_iface);
-}
-
 /**
  * batadv_softif_init_late - late stage initialization of soft interface
  * @dev: registered network device to modify
@@ -791,7 +766,6 @@ static int batadv_softif_init_late(struct net_device *dev)
 
        bat_priv = netdev_priv(dev);
        bat_priv->soft_iface = dev;
-       INIT_WORK(&bat_priv->cleanup_work, batadv_softif_destroy_finish);
 
        /* batadv_interface_stats() needs to be available as soon as
         * register_netdevice() has been called
@@ -1028,8 +1002,19 @@ struct net_device *batadv_softif_create(struct net *net, const char *name)
 void batadv_softif_destroy_sysfs(struct net_device *soft_iface)
 {
        struct batadv_priv *bat_priv = netdev_priv(soft_iface);
+       struct batadv_softif_vlan *vlan;
+
+       ASSERT_RTNL();
+
+       /* destroy the "untagged" VLAN */
+       vlan = batadv_softif_vlan_get(bat_priv, BATADV_NO_FLAGS);
+       if (vlan) {
+               batadv_softif_destroy_vlan(bat_priv, vlan);
+               batadv_softif_vlan_put(vlan);
+       }
 
-       queue_work(batadv_event_workqueue, &bat_priv->cleanup_work);
+       batadv_sysfs_del_meshif(soft_iface);
+       unregister_netdevice(soft_iface);
 }
 
 /**
index fe9ca94..02d96f2 100644 (file)
@@ -37,6 +37,7 @@
 #include <linux/stddef.h>
 #include <linux/string.h>
 #include <linux/stringify.h>
+#include <linux/workqueue.h>
 
 #include "bridge_loop_avoidance.h"
 #include "distributed-arp-table.h"
@@ -428,6 +429,13 @@ static ssize_t batadv_show_gw_mode(struct kobject *kobj, struct attribute *attr,
        struct batadv_priv *bat_priv = batadv_kobj_to_batpriv(kobj);
        int bytes_written;
 
+       /* GW mode is not available if the routing algorithm in use does not
+        * implement the GW API
+        */
+       if (!bat_priv->algo_ops->gw.get_best_gw_node ||
+           !bat_priv->algo_ops->gw.is_eligible)
+               return -ENOENT;
+
        switch (atomic_read(&bat_priv->gw.mode)) {
        case BATADV_GW_MODE_CLIENT:
                bytes_written = sprintf(buff, "%s\n",
@@ -455,6 +463,13 @@ static ssize_t batadv_store_gw_mode(struct kobject *kobj,
        char *curr_gw_mode_str;
        int gw_mode_tmp = -1;
 
+       /* toggling GW mode is allowed only if the routing algorithm in use
+        * provides the GW API
+        */
+       if (!bat_priv->algo_ops->gw.get_best_gw_node ||
+           !bat_priv->algo_ops->gw.is_eligible)
+               return -EINVAL;
+
        if (buff[count - 1] == '\n')
                buff[count - 1] = '\0';
 
@@ -514,6 +529,50 @@ static ssize_t batadv_store_gw_mode(struct kobject *kobj,
        return count;
 }
 
+static ssize_t batadv_show_gw_sel_class(struct kobject *kobj,
+                                       struct attribute *attr, char *buff)
+{
+       struct batadv_priv *bat_priv = batadv_kobj_to_batpriv(kobj);
+
+       /* GW selection class is not available if the routing algorithm in use
+        * does not implement the GW API
+        */
+       if (!bat_priv->algo_ops->gw.get_best_gw_node ||
+           !bat_priv->algo_ops->gw.is_eligible)
+               return -ENOENT;
+
+       if (bat_priv->algo_ops->gw.show_sel_class)
+               return bat_priv->algo_ops->gw.show_sel_class(bat_priv, buff);
+
+       return sprintf(buff, "%i\n", atomic_read(&bat_priv->gw.sel_class));
+}
+
+static ssize_t batadv_store_gw_sel_class(struct kobject *kobj,
+                                        struct attribute *attr, char *buff,
+                                        size_t count)
+{
+       struct batadv_priv *bat_priv = batadv_kobj_to_batpriv(kobj);
+
+       /* setting the GW selection class is allowed only if the routing
+        * algorithm in use implements the GW API
+        */
+       if (!bat_priv->algo_ops->gw.get_best_gw_node ||
+           !bat_priv->algo_ops->gw.is_eligible)
+               return -EINVAL;
+
+       if (buff[count - 1] == '\n')
+               buff[count - 1] = '\0';
+
+       if (bat_priv->algo_ops->gw.store_sel_class)
+               return bat_priv->algo_ops->gw.store_sel_class(bat_priv, buff,
+                                                             count);
+
+       return __batadv_store_uint_attr(buff, count, 1, BATADV_TQ_MAX_VALUE,
+                                       batadv_post_gw_reselect, attr,
+                                       &bat_priv->gw.sel_class,
+                                       bat_priv->soft_iface);
+}
+
 static ssize_t batadv_show_gw_bwidth(struct kobject *kobj,
                                     struct attribute *attr, char *buff)
 {
@@ -625,8 +684,8 @@ BATADV_ATTR_SIF_UINT(orig_interval, orig_interval, S_IRUGO | S_IWUSR,
                     2 * BATADV_JITTER, INT_MAX, NULL);
 BATADV_ATTR_SIF_UINT(hop_penalty, hop_penalty, S_IRUGO | S_IWUSR, 0,
                     BATADV_TQ_MAX_VALUE, NULL);
-BATADV_ATTR_SIF_UINT(gw_sel_class, gw.sel_class, S_IRUGO | S_IWUSR, 1,
-                    BATADV_TQ_MAX_VALUE, batadv_post_gw_reselect);
+static BATADV_ATTR(gw_sel_class, S_IRUGO | S_IWUSR, batadv_show_gw_sel_class,
+                  batadv_store_gw_sel_class);
 static BATADV_ATTR(gw_bandwidth, S_IRUGO | S_IWUSR, batadv_show_gw_bwidth,
                   batadv_store_gw_bwidth);
 #ifdef CONFIG_BATMAN_ADV_MCAST
@@ -712,6 +771,8 @@ rem_attr:
        for (bat_attr = batadv_mesh_attrs; *bat_attr; ++bat_attr)
                sysfs_remove_file(bat_priv->mesh_obj, &((*bat_attr)->attr));
 
+       kobject_uevent(bat_priv->mesh_obj, KOBJ_REMOVE);
+       kobject_del(bat_priv->mesh_obj);
        kobject_put(bat_priv->mesh_obj);
        bat_priv->mesh_obj = NULL;
 out:
@@ -726,6 +787,8 @@ void batadv_sysfs_del_meshif(struct net_device *dev)
        for (bat_attr = batadv_mesh_attrs; *bat_attr; ++bat_attr)
                sysfs_remove_file(bat_priv->mesh_obj, &((*bat_attr)->attr));
 
+       kobject_uevent(bat_priv->mesh_obj, KOBJ_REMOVE);
+       kobject_del(bat_priv->mesh_obj);
        kobject_put(bat_priv->mesh_obj);
        bat_priv->mesh_obj = NULL;
 }
@@ -781,6 +844,10 @@ rem_attr:
        for (bat_attr = batadv_vlan_attrs; *bat_attr; ++bat_attr)
                sysfs_remove_file(vlan->kobj, &((*bat_attr)->attr));
 
+       if (vlan->kobj != bat_priv->mesh_obj) {
+               kobject_uevent(vlan->kobj, KOBJ_REMOVE);
+               kobject_del(vlan->kobj);
+       }
        kobject_put(vlan->kobj);
        vlan->kobj = NULL;
 out:
@@ -800,6 +867,10 @@ void batadv_sysfs_del_vlan(struct batadv_priv *bat_priv,
        for (bat_attr = batadv_vlan_attrs; *bat_attr; ++bat_attr)
                sysfs_remove_file(vlan->kobj, &((*bat_attr)->attr));
 
+       if (vlan->kobj != bat_priv->mesh_obj) {
+               kobject_uevent(vlan->kobj, KOBJ_REMOVE);
+               kobject_del(vlan->kobj);
+       }
        kobject_put(vlan->kobj);
        vlan->kobj = NULL;
 }
@@ -828,31 +899,31 @@ static ssize_t batadv_show_mesh_iface(struct kobject *kobj,
        return length;
 }
 
-static ssize_t batadv_store_mesh_iface(struct kobject *kobj,
-                                      struct attribute *attr, char *buff,
-                                      size_t count)
+/**
+ * batadv_store_mesh_iface_finish - store new hardif mesh_iface state
+ * @net_dev: netdevice to add/remove to/from batman-adv soft-interface
+ * @ifname: name of soft-interface to modify
+ *
+ * Changes the parts of the hard+soft interface which can not be modified under
+ * sysfs lock (to prevent deadlock situations).
+ *
+ * Return: 0 on success, 0 < on failure
+ */
+static int batadv_store_mesh_iface_finish(struct net_device *net_dev,
+                                         char ifname[IFNAMSIZ])
 {
-       struct net_device *net_dev = batadv_kobj_to_netdev(kobj);
        struct net *net = dev_net(net_dev);
        struct batadv_hard_iface *hard_iface;
-       int status_tmp = -1;
-       int ret = count;
+       int status_tmp;
+       int ret = 0;
+
+       ASSERT_RTNL();
 
        hard_iface = batadv_hardif_get_by_netdev(net_dev);
        if (!hard_iface)
-               return count;
-
-       if (buff[count - 1] == '\n')
-               buff[count - 1] = '\0';
-
-       if (strlen(buff) >= IFNAMSIZ) {
-               pr_err("Invalid parameter for 'mesh_iface' setting received: interface name too long '%s'\n",
-                      buff);
-               batadv_hardif_put(hard_iface);
-               return -EINVAL;
-       }
+               return 0;
 
-       if (strncmp(buff, "none", 4) == 0)
+       if (strncmp(ifname, "none", 4) == 0)
                status_tmp = BATADV_IF_NOT_IN_USE;
        else
                status_tmp = BATADV_IF_I_WANT_YOU;
@@ -861,15 +932,13 @@ static ssize_t batadv_store_mesh_iface(struct kobject *kobj,
                goto out;
 
        if ((hard_iface->soft_iface) &&
-           (strncmp(hard_iface->soft_iface->name, buff, IFNAMSIZ) == 0))
+           (strncmp(hard_iface->soft_iface->name, ifname, IFNAMSIZ) == 0))
                goto out;
 
-       rtnl_lock();
-
        if (status_tmp == BATADV_IF_NOT_IN_USE) {
                batadv_hardif_disable_interface(hard_iface,
                                                BATADV_IF_CLEANUP_AUTO);
-               goto unlock;
+               goto out;
        }
 
        /* if the interface already is in use */
@@ -877,15 +946,71 @@ static ssize_t batadv_store_mesh_iface(struct kobject *kobj,
                batadv_hardif_disable_interface(hard_iface,
                                                BATADV_IF_CLEANUP_AUTO);
 
-       ret = batadv_hardif_enable_interface(hard_iface, net, buff);
-
-unlock:
-       rtnl_unlock();
+       ret = batadv_hardif_enable_interface(hard_iface, net, ifname);
 out:
        batadv_hardif_put(hard_iface);
        return ret;
 }
 
+/**
+ * batadv_store_mesh_iface_work - store new hardif mesh_iface state
+ * @work: work queue item
+ *
+ * Changes the parts of the hard+soft interface which can not be modified under
+ * sysfs lock (to prevent deadlock situations).
+ */
+static void batadv_store_mesh_iface_work(struct work_struct *work)
+{
+       struct batadv_store_mesh_work *store_work;
+       int ret;
+
+       store_work = container_of(work, struct batadv_store_mesh_work, work);
+
+       rtnl_lock();
+       ret = batadv_store_mesh_iface_finish(store_work->net_dev,
+                                            store_work->soft_iface_name);
+       rtnl_unlock();
+
+       if (ret < 0)
+               pr_err("Failed to store new mesh_iface state %s for %s: %d\n",
+                      store_work->soft_iface_name, store_work->net_dev->name,
+                      ret);
+
+       dev_put(store_work->net_dev);
+       kfree(store_work);
+}
+
+static ssize_t batadv_store_mesh_iface(struct kobject *kobj,
+                                      struct attribute *attr, char *buff,
+                                      size_t count)
+{
+       struct net_device *net_dev = batadv_kobj_to_netdev(kobj);
+       struct batadv_store_mesh_work *store_work;
+
+       if (buff[count - 1] == '\n')
+               buff[count - 1] = '\0';
+
+       if (strlen(buff) >= IFNAMSIZ) {
+               pr_err("Invalid parameter for 'mesh_iface' setting received: interface name too long '%s'\n",
+                      buff);
+               return -EINVAL;
+       }
+
+       store_work = kmalloc(sizeof(*store_work), GFP_KERNEL);
+       if (!store_work)
+               return -ENOMEM;
+
+       dev_hold(net_dev);
+       INIT_WORK(&store_work->work, batadv_store_mesh_iface_work);
+       store_work->net_dev = net_dev;
+       strlcpy(store_work->soft_iface_name, buff,
+               sizeof(store_work->soft_iface_name));
+
+       queue_work(batadv_event_workqueue, &store_work->work);
+
+       return count;
+}
+
 static ssize_t batadv_show_iface_status(struct kobject *kobj,
                                        struct attribute *attr, char *buff)
 {
@@ -1048,6 +1173,8 @@ out:
 
 void batadv_sysfs_del_hardif(struct kobject **hardif_obj)
 {
+       kobject_uevent(*hardif_obj, KOBJ_REMOVE);
+       kobject_del(*hardif_obj);
        kobject_put(*hardif_obj);
        *hardif_obj = NULL;
 }
index 7e6df7a..2080407 100644 (file)
 #include <linux/bitops.h>
 #include <linux/bug.h>
 #include <linux/byteorder/generic.h>
+#include <linux/cache.h>
 #include <linux/compiler.h>
 #include <linux/crc32c.h>
 #include <linux/errno.h>
 #include <linux/etherdevice.h>
 #include <linux/fs.h>
 #include <linux/if_ether.h>
+#include <linux/init.h>
 #include <linux/jhash.h>
 #include <linux/jiffies.h>
 #include <linux/kernel.h>
 #include <linux/list.h>
 #include <linux/lockdep.h>
 #include <linux/netdevice.h>
+#include <linux/netlink.h>
 #include <linux/rculist.h>
 #include <linux/rcupdate.h>
 #include <linux/seq_file.h>
+#include <linux/skbuff.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
 #include <linux/stddef.h>
 #include <linux/string.h>
 #include <linux/workqueue.h>
+#include <net/genetlink.h>
+#include <net/netlink.h>
+#include <net/sock.h>
+#include <uapi/linux/batman_adv.h>
 
 #include "bridge_loop_avoidance.h"
 #include "hard-interface.h"
 #include "hash.h"
 #include "log.h"
 #include "multicast.h"
+#include "netlink.h"
 #include "originator.h"
 #include "packet.h"
 #include "soft-interface.h"
 #include "tvlv.h"
 
+static struct kmem_cache *batadv_tl_cache __read_mostly;
+static struct kmem_cache *batadv_tg_cache __read_mostly;
+static struct kmem_cache *batadv_tt_orig_cache __read_mostly;
+static struct kmem_cache *batadv_tt_change_cache __read_mostly;
+static struct kmem_cache *batadv_tt_req_cache __read_mostly;
+static struct kmem_cache *batadv_tt_roam_cache __read_mostly;
+
 /* hash class keys */
 static struct lock_class_key batadv_tt_local_hash_lock_class_key;
 static struct lock_class_key batadv_tt_global_hash_lock_class_key;
@@ -204,6 +220,20 @@ batadv_tt_global_hash_find(struct batadv_priv *bat_priv, const u8 *addr,
        return tt_global_entry;
 }
 
+/**
+ * batadv_tt_local_entry_free_rcu - free the tt_local_entry
+ * @rcu: rcu pointer of the tt_local_entry
+ */
+static void batadv_tt_local_entry_free_rcu(struct rcu_head *rcu)
+{
+       struct batadv_tt_local_entry *tt_local_entry;
+
+       tt_local_entry = container_of(rcu, struct batadv_tt_local_entry,
+                                     common.rcu);
+
+       kmem_cache_free(batadv_tl_cache, tt_local_entry);
+}
+
 /**
  * batadv_tt_local_entry_release - release tt_local_entry from lists and queue
  *  for free after rcu grace period
@@ -218,7 +248,7 @@ static void batadv_tt_local_entry_release(struct kref *ref)
 
        batadv_softif_vlan_put(tt_local_entry->vlan);
 
-       kfree_rcu(tt_local_entry, common.rcu);
+       call_rcu(&tt_local_entry->common.rcu, batadv_tt_local_entry_free_rcu);
 }
 
 /**
@@ -233,6 +263,20 @@ batadv_tt_local_entry_put(struct batadv_tt_local_entry *tt_local_entry)
                 batadv_tt_local_entry_release);
 }
 
+/**
+ * batadv_tt_global_entry_free_rcu - free the tt_global_entry
+ * @rcu: rcu pointer of the tt_global_entry
+ */
+static void batadv_tt_global_entry_free_rcu(struct rcu_head *rcu)
+{
+       struct batadv_tt_global_entry *tt_global_entry;
+
+       tt_global_entry = container_of(rcu, struct batadv_tt_global_entry,
+                                      common.rcu);
+
+       kmem_cache_free(batadv_tg_cache, tt_global_entry);
+}
+
 /**
  * batadv_tt_global_entry_release - release tt_global_entry from lists and queue
  *  for free after rcu grace period
@@ -246,7 +290,8 @@ static void batadv_tt_global_entry_release(struct kref *ref)
                                       common.refcount);
 
        batadv_tt_global_del_orig_list(tt_global_entry);
-       kfree_rcu(tt_global_entry, common.rcu);
+
+       call_rcu(&tt_global_entry->common.rcu, batadv_tt_global_entry_free_rcu);
 }
 
 /**
@@ -383,6 +428,19 @@ static void batadv_tt_global_size_dec(struct batadv_orig_node *orig_node,
        batadv_tt_global_size_mod(orig_node, vid, -1);
 }
 
+/**
+ * batadv_tt_orig_list_entry_free_rcu - free the orig_entry
+ * @rcu: rcu pointer of the orig_entry
+ */
+static void batadv_tt_orig_list_entry_free_rcu(struct rcu_head *rcu)
+{
+       struct batadv_tt_orig_list_entry *orig_entry;
+
+       orig_entry = container_of(rcu, struct batadv_tt_orig_list_entry, rcu);
+
+       kmem_cache_free(batadv_tt_orig_cache, orig_entry);
+}
+
 /**
  * batadv_tt_orig_list_entry_release - release tt orig entry from lists and
  *  queue for free after rcu grace period
@@ -396,7 +454,7 @@ static void batadv_tt_orig_list_entry_release(struct kref *ref)
                                  refcount);
 
        batadv_orig_node_put(orig_entry->orig_node);
-       kfree_rcu(orig_entry, rcu);
+       call_rcu(&orig_entry->rcu, batadv_tt_orig_list_entry_free_rcu);
 }
 
 /**
@@ -426,7 +484,7 @@ static void batadv_tt_local_event(struct batadv_priv *bat_priv,
        bool event_removed = false;
        bool del_op_requested, del_op_entry;
 
-       tt_change_node = kmalloc(sizeof(*tt_change_node), GFP_ATOMIC);
+       tt_change_node = kmem_cache_alloc(batadv_tt_change_cache, GFP_ATOMIC);
        if (!tt_change_node)
                return;
 
@@ -467,8 +525,8 @@ static void batadv_tt_local_event(struct batadv_priv *bat_priv,
                continue;
 del:
                list_del(&entry->list);
-               kfree(entry);
-               kfree(tt_change_node);
+               kmem_cache_free(batadv_tt_change_cache, entry);
+               kmem_cache_free(batadv_tt_change_cache, tt_change_node);
                event_removed = true;
                goto unlock;
        }
@@ -646,7 +704,7 @@ bool batadv_tt_local_add(struct net_device *soft_iface, const u8 *addr,
                goto out;
        }
 
-       tt_local = kmalloc(sizeof(*tt_local), GFP_ATOMIC);
+       tt_local = kmem_cache_alloc(batadv_tl_cache, GFP_ATOMIC);
        if (!tt_local)
                goto out;
 
@@ -656,7 +714,7 @@ bool batadv_tt_local_add(struct net_device *soft_iface, const u8 *addr,
                net_ratelimited_function(batadv_info, soft_iface,
                                         "adding TT local entry %pM to non-existent VLAN %d\n",
                                         addr, BATADV_PRINT_VID(vid));
-               kfree(tt_local);
+               kmem_cache_free(batadv_tl_cache, tt_local);
                tt_local = NULL;
                goto out;
        }
@@ -959,7 +1017,7 @@ static void batadv_tt_tvlv_container_update(struct batadv_priv *bat_priv)
                        tt_diff_entries_count++;
                }
                list_del(&entry->list);
-               kfree(entry);
+               kmem_cache_free(batadv_tt_change_cache, entry);
        }
        spin_unlock_bh(&bat_priv->tt.changes_list_lock);
 
@@ -1057,6 +1115,164 @@ out:
        return 0;
 }
 
+/**
+ * batadv_tt_local_dump_entry - Dump one TT local entry into a message
+ * @msg :Netlink message to dump into
+ * @portid: Port making netlink request
+ * @seq: Sequence number of netlink message
+ * @bat_priv: The bat priv with all the soft interface information
+ * @common: tt local & tt global common data
+ *
+ * Return: Error code, or 0 on success
+ */
+static int
+batadv_tt_local_dump_entry(struct sk_buff *msg, u32 portid, u32 seq,
+                          struct batadv_priv *bat_priv,
+                          struct batadv_tt_common_entry *common)
+{
+       void *hdr;
+       struct batadv_softif_vlan *vlan;
+       struct batadv_tt_local_entry *local;
+       unsigned int last_seen_msecs;
+       u32 crc;
+
+       local = container_of(common, struct batadv_tt_local_entry, common);
+       last_seen_msecs = jiffies_to_msecs(jiffies - local->last_seen);
+
+       vlan = batadv_softif_vlan_get(bat_priv, common->vid);
+       if (!vlan)
+               return 0;
+
+       crc = vlan->tt.crc;
+
+       batadv_softif_vlan_put(vlan);
+
+       hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family,
+                         NLM_F_MULTI,
+                         BATADV_CMD_GET_TRANSTABLE_LOCAL);
+       if (!hdr)
+               return -ENOBUFS;
+
+       if (nla_put(msg, BATADV_ATTR_TT_ADDRESS, ETH_ALEN, common->addr) ||
+           nla_put_u32(msg, BATADV_ATTR_TT_CRC32, crc) ||
+           nla_put_u16(msg, BATADV_ATTR_TT_VID, common->vid) ||
+           nla_put_u32(msg, BATADV_ATTR_TT_FLAGS, common->flags))
+               goto nla_put_failure;
+
+       if (!(common->flags & BATADV_TT_CLIENT_NOPURGE) &&
+           nla_put_u32(msg, BATADV_ATTR_LAST_SEEN_MSECS, last_seen_msecs))
+               goto nla_put_failure;
+
+       genlmsg_end(msg, hdr);
+       return 0;
+
+ nla_put_failure:
+       genlmsg_cancel(msg, hdr);
+       return -EMSGSIZE;
+}
+
+/**
+ * batadv_tt_local_dump_bucket - Dump one TT local bucket into a message
+ * @msg: Netlink message to dump into
+ * @portid: Port making netlink request
+ * @seq: Sequence number of netlink message
+ * @bat_priv: The bat priv with all the soft interface information
+ * @head: Pointer to the list containing the local tt entries
+ * @idx_s: Number of entries to skip
+ *
+ * Return: Error code, or 0 on success
+ */
+static int
+batadv_tt_local_dump_bucket(struct sk_buff *msg, u32 portid, u32 seq,
+                           struct batadv_priv *bat_priv,
+                           struct hlist_head *head, int *idx_s)
+{
+       struct batadv_tt_common_entry *common;
+       int idx = 0;
+
+       rcu_read_lock();
+       hlist_for_each_entry_rcu(common, head, hash_entry) {
+               if (idx++ < *idx_s)
+                       continue;
+
+               if (batadv_tt_local_dump_entry(msg, portid, seq, bat_priv,
+                                              common)) {
+                       rcu_read_unlock();
+                       *idx_s = idx - 1;
+                       return -EMSGSIZE;
+               }
+       }
+       rcu_read_unlock();
+
+       *idx_s = 0;
+       return 0;
+}
+
+/**
+ * batadv_tt_local_dump - Dump TT local entries into a message
+ * @msg: Netlink message to dump into
+ * @cb: Parameters from query
+ *
+ * Return: Error code, or 0 on success
+ */
+int batadv_tt_local_dump(struct sk_buff *msg, struct netlink_callback *cb)
+{
+       struct net *net = sock_net(cb->skb->sk);
+       struct net_device *soft_iface;
+       struct batadv_priv *bat_priv;
+       struct batadv_hard_iface *primary_if = NULL;
+       struct batadv_hashtable *hash;
+       struct hlist_head *head;
+       int ret;
+       int ifindex;
+       int bucket = cb->args[0];
+       int idx = cb->args[1];
+       int portid = NETLINK_CB(cb->skb).portid;
+
+       ifindex = batadv_netlink_get_ifindex(cb->nlh, BATADV_ATTR_MESH_IFINDEX);
+       if (!ifindex)
+               return -EINVAL;
+
+       soft_iface = dev_get_by_index(net, ifindex);
+       if (!soft_iface || !batadv_softif_is_valid(soft_iface)) {
+               ret = -ENODEV;
+               goto out;
+       }
+
+       bat_priv = netdev_priv(soft_iface);
+
+       primary_if = batadv_primary_if_get_selected(bat_priv);
+       if (!primary_if || primary_if->if_status != BATADV_IF_ACTIVE) {
+               ret = -ENOENT;
+               goto out;
+       }
+
+       hash = bat_priv->tt.local_hash;
+
+       while (bucket < hash->size) {
+               head = &hash->table[bucket];
+
+               if (batadv_tt_local_dump_bucket(msg, portid, cb->nlh->nlmsg_seq,
+                                               bat_priv, head, &idx))
+                       break;
+
+               bucket++;
+       }
+
+       ret = msg->len;
+
+ out:
+       if (primary_if)
+               batadv_hardif_put(primary_if);
+       if (soft_iface)
+               dev_put(soft_iface);
+
+       cb->args[0] = bucket;
+       cb->args[1] = idx;
+
+       return ret;
+}
+
 static void
 batadv_tt_local_set_pending(struct batadv_priv *bat_priv,
                            struct batadv_tt_local_entry *tt_local_entry,
@@ -1259,7 +1475,7 @@ static void batadv_tt_changes_list_free(struct batadv_priv *bat_priv)
        list_for_each_entry_safe(entry, safe, &bat_priv->tt.changes_list,
                                 list) {
                list_del(&entry->list);
-               kfree(entry);
+               kmem_cache_free(batadv_tt_change_cache, entry);
        }
 
        atomic_set(&bat_priv->tt.local_changes, 0);
@@ -1341,7 +1557,7 @@ batadv_tt_global_orig_entry_add(struct batadv_tt_global_entry *tt_global,
                goto out;
        }
 
-       orig_entry = kzalloc(sizeof(*orig_entry), GFP_ATOMIC);
+       orig_entry = kmem_cache_zalloc(batadv_tt_orig_cache, GFP_ATOMIC);
        if (!orig_entry)
                goto out;
 
@@ -1411,7 +1627,8 @@ static bool batadv_tt_global_add(struct batadv_priv *bat_priv,
                goto out;
 
        if (!tt_global_entry) {
-               tt_global_entry = kzalloc(sizeof(*tt_global_entry), GFP_ATOMIC);
+               tt_global_entry = kmem_cache_zalloc(batadv_tg_cache,
+                                                   GFP_ATOMIC);
                if (!tt_global_entry)
                        goto out;
 
@@ -1703,6 +1920,218 @@ out:
        return 0;
 }
 
+/**
+ * batadv_tt_global_dump_subentry - Dump all TT local entries into a message
+ * @msg: Netlink message to dump into
+ * @portid: Port making netlink request
+ * @seq: Sequence number of netlink message
+ * @common: tt local & tt global common data
+ * @orig: Originator node announcing a non-mesh client
+ * @best: Is the best originator for the TT entry
+ *
+ * Return: Error code, or 0 on success
+ */
+static int
+batadv_tt_global_dump_subentry(struct sk_buff *msg, u32 portid, u32 seq,
+                              struct batadv_tt_common_entry *common,
+                              struct batadv_tt_orig_list_entry *orig,
+                              bool best)
+{
+       void *hdr;
+       struct batadv_orig_node_vlan *vlan;
+       u8 last_ttvn;
+       u32 crc;
+
+       vlan = batadv_orig_node_vlan_get(orig->orig_node,
+                                        common->vid);
+       if (!vlan)
+               return 0;
+
+       crc = vlan->tt.crc;
+
+       batadv_orig_node_vlan_put(vlan);
+
+       hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family,
+                         NLM_F_MULTI,
+                         BATADV_CMD_GET_TRANSTABLE_GLOBAL);
+       if (!hdr)
+               return -ENOBUFS;
+
+       last_ttvn = atomic_read(&orig->orig_node->last_ttvn);
+
+       if (nla_put(msg, BATADV_ATTR_TT_ADDRESS, ETH_ALEN, common->addr) ||
+           nla_put(msg, BATADV_ATTR_ORIG_ADDRESS, ETH_ALEN,
+                   orig->orig_node->orig) ||
+           nla_put_u8(msg, BATADV_ATTR_TT_TTVN, orig->ttvn) ||
+           nla_put_u8(msg, BATADV_ATTR_TT_LAST_TTVN, last_ttvn) ||
+           nla_put_u32(msg, BATADV_ATTR_TT_CRC32, crc) ||
+           nla_put_u16(msg, BATADV_ATTR_TT_VID, common->vid) ||
+           nla_put_u32(msg, BATADV_ATTR_TT_FLAGS, common->flags))
+               goto nla_put_failure;
+
+       if (best && nla_put_flag(msg, BATADV_ATTR_FLAG_BEST))
+               goto nla_put_failure;
+
+       genlmsg_end(msg, hdr);
+       return 0;
+
+ nla_put_failure:
+       genlmsg_cancel(msg, hdr);
+       return -EMSGSIZE;
+}
+
+/**
+ * batadv_tt_global_dump_entry - Dump one TT global entry into a message
+ * @msg: Netlink message to dump into
+ * @portid: Port making netlink request
+ * @seq: Sequence number of netlink message
+ * @bat_priv: The bat priv with all the soft interface information
+ * @common: tt local & tt global common data
+ * @sub_s: Number of entries to skip
+ *
+ * This function assumes the caller holds rcu_read_lock().
+ *
+ * Return: Error code, or 0 on success
+ */
+static int
+batadv_tt_global_dump_entry(struct sk_buff *msg, u32 portid, u32 seq,
+                           struct batadv_priv *bat_priv,
+                           struct batadv_tt_common_entry *common, int *sub_s)
+{
+       struct batadv_tt_orig_list_entry *orig_entry, *best_entry;
+       struct batadv_tt_global_entry *global;
+       struct hlist_head *head;
+       int sub = 0;
+       bool best;
+
+       global = container_of(common, struct batadv_tt_global_entry, common);
+       best_entry = batadv_transtable_best_orig(bat_priv, global);
+       head = &global->orig_list;
+
+       hlist_for_each_entry_rcu(orig_entry, head, list) {
+               if (sub++ < *sub_s)
+                       continue;
+
+               best = (orig_entry == best_entry);
+
+               if (batadv_tt_global_dump_subentry(msg, portid, seq, common,
+                                                  orig_entry, best)) {
+                       *sub_s = sub - 1;
+                       return -EMSGSIZE;
+               }
+       }
+
+       *sub_s = 0;
+       return 0;
+}
+
+/**
+ * batadv_tt_global_dump_bucket - Dump one TT local bucket into a message
+ * @msg: Netlink message to dump into
+ * @portid: Port making netlink request
+ * @seq: Sequence number of netlink message
+ * @bat_priv: The bat priv with all the soft interface information
+ * @head: Pointer to the list containing the global tt entries
+ * @idx_s: Number of entries to skip
+ * @sub: Number of entries to skip
+ *
+ * Return: Error code, or 0 on success
+ */
+static int
+batadv_tt_global_dump_bucket(struct sk_buff *msg, u32 portid, u32 seq,
+                            struct batadv_priv *bat_priv,
+                            struct hlist_head *head, int *idx_s, int *sub)
+{
+       struct batadv_tt_common_entry *common;
+       int idx = 0;
+
+       rcu_read_lock();
+       hlist_for_each_entry_rcu(common, head, hash_entry) {
+               if (idx++ < *idx_s)
+                       continue;
+
+               if (batadv_tt_global_dump_entry(msg, portid, seq, bat_priv,
+                                               common, sub)) {
+                       rcu_read_unlock();
+                       *idx_s = idx - 1;
+                       return -EMSGSIZE;
+               }
+       }
+       rcu_read_unlock();
+
+       *idx_s = 0;
+       *sub = 0;
+       return 0;
+}
+
+/**
+ * batadv_tt_global_dump -  Dump TT global entries into a message
+ * @msg: Netlink message to dump into
+ * @cb: Parameters from query
+ *
+ * Return: Error code, or length of message on success
+ */
+int batadv_tt_global_dump(struct sk_buff *msg, struct netlink_callback *cb)
+{
+       struct net *net = sock_net(cb->skb->sk);
+       struct net_device *soft_iface;
+       struct batadv_priv *bat_priv;
+       struct batadv_hard_iface *primary_if = NULL;
+       struct batadv_hashtable *hash;
+       struct hlist_head *head;
+       int ret;
+       int ifindex;
+       int bucket = cb->args[0];
+       int idx = cb->args[1];
+       int sub = cb->args[2];
+       int portid = NETLINK_CB(cb->skb).portid;
+
+       ifindex = batadv_netlink_get_ifindex(cb->nlh, BATADV_ATTR_MESH_IFINDEX);
+       if (!ifindex)
+               return -EINVAL;
+
+       soft_iface = dev_get_by_index(net, ifindex);
+       if (!soft_iface || !batadv_softif_is_valid(soft_iface)) {
+               ret = -ENODEV;
+               goto out;
+       }
+
+       bat_priv = netdev_priv(soft_iface);
+
+       primary_if = batadv_primary_if_get_selected(bat_priv);
+       if (!primary_if || primary_if->if_status != BATADV_IF_ACTIVE) {
+               ret = -ENOENT;
+               goto out;
+       }
+
+       hash = bat_priv->tt.global_hash;
+
+       while (bucket < hash->size) {
+               head = &hash->table[bucket];
+
+               if (batadv_tt_global_dump_bucket(msg, portid,
+                                                cb->nlh->nlmsg_seq, bat_priv,
+                                                head, &idx, &sub))
+                       break;
+
+               bucket++;
+       }
+
+       ret = msg->len;
+
+ out:
+       if (primary_if)
+               batadv_hardif_put(primary_if);
+       if (soft_iface)
+               dev_put(soft_iface);
+
+       cb->args[0] = bucket;
+       cb->args[1] = idx;
+       cb->args[2] = sub;
+
+       return ret;
+}
+
 /**
  * _batadv_tt_global_del_orig_entry - remove and free an orig_entry
  * @tt_global_entry: the global entry to remove the orig_entry from
@@ -2280,7 +2709,7 @@ static void batadv_tt_req_node_release(struct kref *ref)
 
        tt_req_node = container_of(ref, struct batadv_tt_req_node, refcount);
 
-       kfree(tt_req_node);
+       kmem_cache_free(batadv_tt_req_cache, tt_req_node);
 }
 
 /**
@@ -2367,7 +2796,7 @@ batadv_tt_req_node_new(struct batadv_priv *bat_priv,
                        goto unlock;
        }
 
-       tt_req_node = kmalloc(sizeof(*tt_req_node), GFP_ATOMIC);
+       tt_req_node = kmem_cache_alloc(batadv_tt_req_cache, GFP_ATOMIC);
        if (!tt_req_node)
                goto unlock;
 
@@ -3104,7 +3533,7 @@ static void batadv_tt_roam_list_free(struct batadv_priv *bat_priv)
 
        list_for_each_entry_safe(node, safe, &bat_priv->tt.roam_list, list) {
                list_del(&node->list);
-               kfree(node);
+               kmem_cache_free(batadv_tt_roam_cache, node);
        }
 
        spin_unlock_bh(&bat_priv->tt.roam_list_lock);
@@ -3121,7 +3550,7 @@ static void batadv_tt_roam_purge(struct batadv_priv *bat_priv)
                        continue;
 
                list_del(&node->list);
-               kfree(node);
+               kmem_cache_free(batadv_tt_roam_cache, node);
        }
        spin_unlock_bh(&bat_priv->tt.roam_list_lock);
 }
@@ -3162,7 +3591,8 @@ static bool batadv_tt_check_roam_count(struct batadv_priv *bat_priv, u8 *client)
        }
 
        if (!ret) {
-               tt_roam_node = kmalloc(sizeof(*tt_roam_node), GFP_ATOMIC);
+               tt_roam_node = kmem_cache_alloc(batadv_tt_roam_cache,
+                                               GFP_ATOMIC);
                if (!tt_roam_node)
                        goto unlock;
 
@@ -3865,3 +4295,85 @@ bool batadv_tt_global_is_isolated(struct batadv_priv *bat_priv,
 
        return ret;
 }
+
+/**
+ * batadv_tt_cache_init - Initialize tt memory object cache
+ *
+ * Return: 0 on success or negative error number in case of failure.
+ */
+int __init batadv_tt_cache_init(void)
+{
+       size_t tl_size = sizeof(struct batadv_tt_local_entry);
+       size_t tg_size = sizeof(struct batadv_tt_global_entry);
+       size_t tt_orig_size = sizeof(struct batadv_tt_orig_list_entry);
+       size_t tt_change_size = sizeof(struct batadv_tt_change_node);
+       size_t tt_req_size = sizeof(struct batadv_tt_req_node);
+       size_t tt_roam_size = sizeof(struct batadv_tt_roam_node);
+
+       batadv_tl_cache = kmem_cache_create("batadv_tl_cache", tl_size, 0,
+                                           SLAB_HWCACHE_ALIGN, NULL);
+       if (!batadv_tl_cache)
+               return -ENOMEM;
+
+       batadv_tg_cache = kmem_cache_create("batadv_tg_cache", tg_size, 0,
+                                           SLAB_HWCACHE_ALIGN, NULL);
+       if (!batadv_tg_cache)
+               goto err_tt_tl_destroy;
+
+       batadv_tt_orig_cache = kmem_cache_create("batadv_tt_orig_cache",
+                                                tt_orig_size, 0,
+                                                SLAB_HWCACHE_ALIGN, NULL);
+       if (!batadv_tt_orig_cache)
+               goto err_tt_tg_destroy;
+
+       batadv_tt_change_cache = kmem_cache_create("batadv_tt_change_cache",
+                                                  tt_change_size, 0,
+                                                  SLAB_HWCACHE_ALIGN, NULL);
+       if (!batadv_tt_change_cache)
+               goto err_tt_orig_destroy;
+
+       batadv_tt_req_cache = kmem_cache_create("batadv_tt_req_cache",
+                                               tt_req_size, 0,
+                                               SLAB_HWCACHE_ALIGN, NULL);
+       if (!batadv_tt_req_cache)
+               goto err_tt_change_destroy;
+
+       batadv_tt_roam_cache = kmem_cache_create("batadv_tt_roam_cache",
+                                                tt_roam_size, 0,
+                                                SLAB_HWCACHE_ALIGN, NULL);
+       if (!batadv_tt_roam_cache)
+               goto err_tt_req_destroy;
+
+       return 0;
+
+err_tt_req_destroy:
+       kmem_cache_destroy(batadv_tt_req_cache);
+       batadv_tt_req_cache = NULL;
+err_tt_change_destroy:
+       kmem_cache_destroy(batadv_tt_change_cache);
+       batadv_tt_change_cache = NULL;
+err_tt_orig_destroy:
+       kmem_cache_destroy(batadv_tt_orig_cache);
+       batadv_tt_orig_cache = NULL;
+err_tt_tg_destroy:
+       kmem_cache_destroy(batadv_tg_cache);
+       batadv_tg_cache = NULL;
+err_tt_tl_destroy:
+       kmem_cache_destroy(batadv_tl_cache);
+       batadv_tl_cache = NULL;
+
+       return -ENOMEM;
+}
+
+/**
+ * batadv_tt_cache_destroy - Destroy tt memory object cache
+ */
+void batadv_tt_cache_destroy(void)
+{
+       kmem_cache_destroy(batadv_tl_cache);
+       kmem_cache_destroy(batadv_tg_cache);
+       kmem_cache_destroy(batadv_tt_orig_cache);
+       kmem_cache_destroy(batadv_tt_change_cache);
+       kmem_cache_destroy(batadv_tt_req_cache);
+       kmem_cache_destroy(batadv_tt_roam_cache);
+}
index 7c7e2c0..783fdba 100644 (file)
 
 #include <linux/types.h>
 
+struct netlink_callback;
 struct net_device;
 struct seq_file;
+struct sk_buff;
 
 int batadv_tt_init(struct batadv_priv *bat_priv);
 bool batadv_tt_local_add(struct net_device *soft_iface, const u8 *addr,
@@ -33,6 +35,8 @@ u16 batadv_tt_local_remove(struct batadv_priv *bat_priv,
                           const char *message, bool roaming);
 int batadv_tt_local_seq_print_text(struct seq_file *seq, void *offset);
 int batadv_tt_global_seq_print_text(struct seq_file *seq, void *offset);
+int batadv_tt_local_dump(struct sk_buff *msg, struct netlink_callback *cb);
+int batadv_tt_global_dump(struct sk_buff *msg, struct netlink_callback *cb);
 void batadv_tt_global_del_orig(struct batadv_priv *bat_priv,
                               struct batadv_orig_node *orig_node,
                               s32 match_vid, const char *message);
@@ -59,4 +63,7 @@ bool batadv_tt_add_temporary_global_entry(struct batadv_priv *bat_priv,
 bool batadv_tt_global_is_isolated(struct batadv_priv *bat_priv,
                                  const u8 *addr, unsigned short vid);
 
+int batadv_tt_cache_init(void);
+void batadv_tt_cache_destroy(void);
+
 #endif /* _NET_BATMAN_ADV_TRANSLATION_TABLE_H_ */
index a64522c..b5f01a3 100644 (file)
@@ -28,6 +28,7 @@
 #include <linux/if_ether.h>
 #include <linux/kref.h>
 #include <linux/netdevice.h>
+#include <linux/netlink.h>
 #include <linux/sched.h> /* for linux/wait.h */
 #include <linux/spinlock.h>
 #include <linux/types.h>
@@ -132,7 +133,6 @@ struct batadv_hard_iface_bat_v {
  * @rcu: struct used for freeing in an RCU-safe manner
  * @bat_iv: per hard-interface B.A.T.M.A.N. IV data
  * @bat_v: per hard-interface B.A.T.M.A.N. V data
- * @cleanup_work: work queue callback item for hard-interface deinit
  * @debug_dir: dentry for nc subdir in batman-adv directory in debugfs
  * @neigh_list: list of unique single hop neighbors via this interface
  * @neigh_list_lock: lock protecting neigh_list
@@ -152,7 +152,6 @@ struct batadv_hard_iface {
 #ifdef CONFIG_BATMAN_ADV_BATMAN_V
        struct batadv_hard_iface_bat_v bat_v;
 #endif
-       struct work_struct cleanup_work;
        struct dentry *debug_dir;
        struct hlist_head neigh_list;
        /* neigh_list_lock protects: neigh_list */
@@ -1015,7 +1014,6 @@ struct batadv_priv_bat_v {
  * @forw_bcast_list_lock: lock protecting forw_bcast_list
  * @tp_list_lock: spinlock protecting @tp_list
  * @orig_work: work queue callback item for orig node purging
- * @cleanup_work: work queue callback item for soft-interface deinit
  * @primary_if: one of the hard-interfaces assigned to this mesh interface
  *  becomes the primary interface
  * @algo_ops: routing algorithm used by this mesh interface
@@ -1074,7 +1072,6 @@ struct batadv_priv {
        spinlock_t tp_list_lock; /* protects tp_list */
        atomic_t tp_num;
        struct delayed_work orig_work;
-       struct work_struct cleanup_work;
        struct batadv_hard_iface __rcu *primary_if;  /* rcu protected pointer */
        struct batadv_algo_ops *algo_ops;
        struct hlist_head softif_vlan_list;
@@ -1379,6 +1376,7 @@ struct batadv_skb_cb {
  *  locally generated packet
  * @if_outgoing: packet where the packet should be sent to, or NULL if
  *  unspecified
+ * @queue_left: The queue (counter) this packet was applied to
  */
 struct batadv_forw_packet {
        struct hlist_node list;
@@ -1391,11 +1389,13 @@ struct batadv_forw_packet {
        struct delayed_work delayed_work;
        struct batadv_hard_iface *if_incoming;
        struct batadv_hard_iface *if_outgoing;
+       atomic_t *queue_left;
 };
 
 /**
  * struct batadv_algo_iface_ops - mesh algorithm callbacks (interface specific)
  * @activate: start routing mechanisms when hard-interface is brought up
+ *  (optional)
  * @enable: init routing info when hard-interface is enabled
  * @disable: de-init routing info when hard-interface is disabled
  * @update_mac: (re-)init mac addresses of the protocol information
@@ -1413,11 +1413,13 @@ struct batadv_algo_iface_ops {
 /**
  * struct batadv_algo_neigh_ops - mesh algorithm callbacks (neighbour specific)
  * @hardif_init: called on creation of single hop entry
+ *  (optional)
  * @cmp: compare the metrics of two neighbors for their respective outgoing
  *  interfaces
  * @is_similar_or_better: check if neigh1 is equally similar or better than
  *  neigh2 for their respective outgoing interface from the metric prospective
  * @print: print the single hop neighbor list (optional)
+ * @dump: dump neighbors to a netlink socket (optional)
  */
 struct batadv_algo_neigh_ops {
        void (*hardif_init)(struct batadv_hardif_neigh_node *neigh);
@@ -1430,17 +1432,21 @@ struct batadv_algo_neigh_ops {
                                     struct batadv_neigh_node *neigh2,
                                     struct batadv_hard_iface *if_outgoing2);
        void (*print)(struct batadv_priv *priv, struct seq_file *seq);
+       void (*dump)(struct sk_buff *msg, struct netlink_callback *cb,
+                    struct batadv_priv *priv,
+                    struct batadv_hard_iface *hard_iface);
 };
 
 /**
  * struct batadv_algo_orig_ops - mesh algorithm callbacks (originator specific)
  * @free: free the resources allocated by the routing algorithm for an orig_node
- *  object
+ *  object (optional)
  * @add_if: ask the routing algorithm to apply the needed changes to the
- *  orig_node due to a new hard-interface being added into the mesh
+ *  orig_node due to a new hard-interface being added into the mesh (optional)
  * @del_if: ask the routing algorithm to apply the needed changes to the
- *  orig_node due to an hard-interface being removed from the mesh
+ *  orig_node due to an hard-interface being removed from the mesh (optional)
  * @print: print the originator table (optional)
+ * @dump: dump originators to a netlink socket (optional)
  */
 struct batadv_algo_orig_ops {
        void (*free)(struct batadv_orig_node *orig_node);
@@ -1449,6 +1455,34 @@ struct batadv_algo_orig_ops {
                      int del_if_num);
        void (*print)(struct batadv_priv *priv, struct seq_file *seq,
                      struct batadv_hard_iface *hard_iface);
+       void (*dump)(struct sk_buff *msg, struct netlink_callback *cb,
+                    struct batadv_priv *priv,
+                    struct batadv_hard_iface *hard_iface);
+};
+
+/**
+ * struct batadv_algo_gw_ops - mesh algorithm callbacks (GW specific)
+ * @store_sel_class: parse and stores a new GW selection class (optional)
+ * @show_sel_class: prints the current GW selection class (optional)
+ * @get_best_gw_node: select the best GW from the list of available nodes
+ *  (optional)
+ * @is_eligible: check if a newly discovered GW is a potential candidate for
+ *  the election as best GW (optional)
+ * @print: print the gateway table (optional)
+ * @dump: dump gateways to a netlink socket (optional)
+ */
+struct batadv_algo_gw_ops {
+       ssize_t (*store_sel_class)(struct batadv_priv *bat_priv, char *buff,
+                                  size_t count);
+       ssize_t (*show_sel_class)(struct batadv_priv *bat_priv, char *buff);
+       struct batadv_gw_node *(*get_best_gw_node)
+               (struct batadv_priv *bat_priv);
+       bool (*is_eligible)(struct batadv_priv *bat_priv,
+                           struct batadv_orig_node *curr_gw_orig,
+                           struct batadv_orig_node *orig_node);
+       void (*print)(struct batadv_priv *bat_priv, struct seq_file *seq);
+       void (*dump)(struct sk_buff *msg, struct netlink_callback *cb,
+                    struct batadv_priv *priv);
 };
 
 /**
@@ -1458,6 +1492,7 @@ struct batadv_algo_orig_ops {
  * @iface: callbacks related to interface handling
  * @neigh: callbacks related to neighbors handling
  * @orig: callbacks related to originators handling
+ * @gw: callbacks related to GW mode
  */
 struct batadv_algo_ops {
        struct hlist_node list;
@@ -1465,6 +1500,7 @@ struct batadv_algo_ops {
        struct batadv_algo_iface_ops iface;
        struct batadv_algo_neigh_ops neigh;
        struct batadv_algo_orig_ops orig;
+       struct batadv_algo_gw_ops gw;
 };
 
 /**
@@ -1564,4 +1600,17 @@ enum batadv_tvlv_handler_flags {
        BATADV_TVLV_HANDLER_OGM_CALLED = BIT(2),
 };
 
+/**
+ * struct batadv_store_mesh_work - Work queue item to detach add/del interface
+ *  from sysfs locks
+ * @net_dev: netdevice to add/remove to/from batman-adv soft-interface
+ * @soft_iface_name: name of soft-interface to modify
+ * @work: work queue item
+ */
+struct batadv_store_mesh_work {
+       struct net_device *net_dev;
+       char soft_iface_name[IFNAMSIZ];
+       struct work_struct work;
+};
+
 #endif /* _NET_BATMAN_ADV_TYPES_H_ */
index dd6ce59..a75df86 100644 (file)
@@ -7625,6 +7625,9 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
        INIT_LIST_HEAD(&dev->all_adj_list.lower);
        INIT_LIST_HEAD(&dev->ptype_all);
        INIT_LIST_HEAD(&dev->ptype_specific);
+#ifdef CONFIG_NET_SCHED
+       hash_init(dev->qdisc_hash);
+#endif
        dev->priv_flags = IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM;
        setup(dev);
 
index 61ad43f..91028ae 100644 (file)
@@ -6,6 +6,8 @@
 #include <linux/if_vlan.h>
 #include <net/ip.h>
 #include <net/ipv6.h>
+#include <net/gre.h>
+#include <net/pptp.h>
 #include <linux/igmp.h>
 #include <linux/icmp.h>
 #include <linux/sctp.h>
@@ -338,32 +340,42 @@ mpls:
 ip_proto_again:
        switch (ip_proto) {
        case IPPROTO_GRE: {
-               struct gre_hdr {
-                       __be16 flags;
-                       __be16 proto;
-               } *hdr, _hdr;
+               struct gre_base_hdr *hdr, _hdr;
+               u16 gre_ver;
+               int offset = 0;
 
                hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, hlen, &_hdr);
                if (!hdr)
                        goto out_bad;
-               /*
-                * Only look inside GRE if version zero and no
-                * routing
-                */
-               if (hdr->flags & (GRE_VERSION | GRE_ROUTING))
+
+               /* Only look inside GRE without routing */
+               if (hdr->flags & GRE_ROUTING)
                        break;
 
-               proto = hdr->proto;
-               nhoff += 4;
+               /* Only look inside GRE for version 0 and 1 */
+               gre_ver = ntohs(hdr->flags & GRE_VERSION);
+               if (gre_ver > 1)
+                       break;
+
+               proto = hdr->protocol;
+               if (gre_ver) {
+                       /* Version1 must be PPTP, and check the flags */
+                       if (!(proto == GRE_PROTO_PPP && (hdr->flags & GRE_KEY)))
+                               break;
+               }
+
+               offset += sizeof(struct gre_base_hdr);
+
                if (hdr->flags & GRE_CSUM)
-                       nhoff += 4;
+                       offset += sizeof(((struct gre_full_hdr *)0)->csum) +
+                                 sizeof(((struct gre_full_hdr *)0)->reserved1);
+
                if (hdr->flags & GRE_KEY) {
                        const __be32 *keyid;
                        __be32 _keyid;
 
-                       keyid = __skb_header_pointer(skb, nhoff, sizeof(_keyid),
+                       keyid = __skb_header_pointer(skb, nhoff + offset, sizeof(_keyid),
                                                     data, hlen, &_keyid);
-
                        if (!keyid)
                                goto out_bad;
 
@@ -372,32 +384,65 @@ ip_proto_again:
                                key_keyid = skb_flow_dissector_target(flow_dissector,
                                                                      FLOW_DISSECTOR_KEY_GRE_KEYID,
                                                                      target_container);
-                               key_keyid->keyid = *keyid;
+                               if (gre_ver == 0)
+                                       key_keyid->keyid = *keyid;
+                               else
+                                       key_keyid->keyid = *keyid & GRE_PPTP_KEY_MASK;
                        }
-                       nhoff += 4;
+                       offset += sizeof(((struct gre_full_hdr *)0)->key);
                }
+
                if (hdr->flags & GRE_SEQ)
-                       nhoff += 4;
-               if (proto == htons(ETH_P_TEB)) {
-                       const struct ethhdr *eth;
-                       struct ethhdr _eth;
-
-                       eth = __skb_header_pointer(skb, nhoff,
-                                                  sizeof(_eth),
-                                                  data, hlen, &_eth);
-                       if (!eth)
+                       offset += sizeof(((struct pptp_gre_header *)0)->seq);
+
+               if (gre_ver == 0) {
+                       if (proto == htons(ETH_P_TEB)) {
+                               const struct ethhdr *eth;
+                               struct ethhdr _eth;
+
+                               eth = __skb_header_pointer(skb, nhoff + offset,
+                                                          sizeof(_eth),
+                                                          data, hlen, &_eth);
+                               if (!eth)
+                                       goto out_bad;
+                               proto = eth->h_proto;
+                               offset += sizeof(*eth);
+
+                               /* Cap headers that we access via pointers at the
+                                * end of the Ethernet header as our maximum alignment
+                                * at that point is only 2 bytes.
+                                */
+                               if (NET_IP_ALIGN)
+                                       hlen = (nhoff + offset);
+                       }
+               } else { /* version 1, must be PPTP */
+                       u8 _ppp_hdr[PPP_HDRLEN];
+                       u8 *ppp_hdr;
+
+                       if (hdr->flags & GRE_ACK)
+                               offset += sizeof(((struct pptp_gre_header *)0)->ack);
+
+                       ppp_hdr = skb_header_pointer(skb, nhoff + offset,
+                                                    sizeof(_ppp_hdr), _ppp_hdr);
+                       if (!ppp_hdr)
                                goto out_bad;
-                       proto = eth->h_proto;
-                       nhoff += sizeof(*eth);
-
-                       /* Cap headers that we access via pointers at the
-                        * end of the Ethernet header as our maximum alignment
-                        * at that point is only 2 bytes.
-                        */
-                       if (NET_IP_ALIGN)
-                               hlen = nhoff;
+
+                       switch (PPP_PROTOCOL(ppp_hdr)) {
+                       case PPP_IP:
+                               proto = htons(ETH_P_IP);
+                               break;
+                       case PPP_IPV6:
+                               proto = htons(ETH_P_IPV6);
+                               break;
+                       default:
+                               /* Could probably catch some more like MPLS */
+                               break;
+                       }
+
+                       offset += PPP_HDRLEN;
                }
 
+               nhoff += offset;
                key_control->flags |= FLOW_DIS_ENCAPSULATION;
                if (flags & FLOW_DISSECTOR_F_STOP_AT_ENCAP)
                        goto out_good;
index cf26e04..2ae929f 100644 (file)
@@ -1148,7 +1148,8 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
                        } else
                                goto out;
                } else {
-                       if (lladdr == neigh->ha && new == NUD_STALE)
+                       if (lladdr == neigh->ha && new == NUD_STALE &&
+                           !(flags & NEIGH_UPDATE_F_ADMIN))
                                new = old;
                }
        }
index 2c2eb1b..1fe5816 100644 (file)
@@ -37,6 +37,8 @@ struct net init_net = {
 };
 EXPORT_SYMBOL(init_net);
 
+static bool init_net_initialized;
+
 #define INITIAL_NET_GEN_PTRS   13 /* +1 for len +2 for rcu_head */
 
 static unsigned int max_gen_ptrs = INITIAL_NET_GEN_PTRS;
@@ -750,6 +752,8 @@ static int __init net_ns_init(void)
        if (setup_net(&init_net, &init_user_ns))
                panic("Could not setup the initial network namespace");
 
+       init_net_initialized = true;
+
        rtnl_lock();
        list_add_tail_rcu(&init_net.list, &net_namespace_list);
        rtnl_unlock();
@@ -811,15 +815,24 @@ static void __unregister_pernet_operations(struct pernet_operations *ops)
 static int __register_pernet_operations(struct list_head *list,
                                        struct pernet_operations *ops)
 {
+       if (!init_net_initialized) {
+               list_add_tail(&ops->list, list);
+               return 0;
+       }
+
        return ops_init(ops, &init_net);
 }
 
 static void __unregister_pernet_operations(struct pernet_operations *ops)
 {
-       LIST_HEAD(net_exit_list);
-       list_add(&init_net.exit_list, &net_exit_list);
-       ops_exit_list(ops, &net_exit_list);
-       ops_free_list(ops, &net_exit_list);
+       if (!init_net_initialized) {
+               list_del(&ops->list);
+       } else {
+               LIST_HEAD(net_exit_list);
+               list_add(&init_net.exit_list, &net_exit_list);
+               ops_exit_list(ops, &net_exit_list);
+               ops_free_list(ops, &net_exit_list);
+       }
 }
 
 #endif /* CONFIG_NET_NS */
index ef2ebeb..317c319 100644 (file)
@@ -93,9 +93,6 @@ struct fib_table *fib_new_table(struct net *net, u32 id)
                return NULL;
 
        switch (id) {
-       case RT_TABLE_LOCAL:
-               rcu_assign_pointer(net->ipv4.fib_local, tb);
-               break;
        case RT_TABLE_MAIN:
                rcu_assign_pointer(net->ipv4.fib_main, tb);
                break;
@@ -137,9 +134,6 @@ static void fib_replace_table(struct net *net, struct fib_table *old,
 {
 #ifdef CONFIG_IP_MULTIPLE_TABLES
        switch (new->tb_id) {
-       case RT_TABLE_LOCAL:
-               rcu_assign_pointer(net->ipv4.fib_local, new);
-               break;
        case RT_TABLE_MAIN:
                rcu_assign_pointer(net->ipv4.fib_main, new);
                break;
@@ -1249,7 +1243,6 @@ static void ip_fib_net_exit(struct net *net)
 
        rtnl_lock();
 #ifdef CONFIG_IP_MULTIPLE_TABLES
-       RCU_INIT_POINTER(net->ipv4.fib_local, NULL);
        RCU_INIT_POINTER(net->ipv4.fib_main, NULL);
        RCU_INIT_POINTER(net->ipv4.fib_default, NULL);
 #endif
index 9b4ca87..606cc3e 100644 (file)
@@ -472,6 +472,15 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc,
                        continue;
                }
 
+               /* Based on RFC3376 5.1. Should not send source-list change
+                * records when there is a filter mode change.
+                */
+               if (((gdeleted && pmc->sfmode == MCAST_EXCLUDE) ||
+                    (!gdeleted && pmc->crcount)) &&
+                   (type == IGMPV3_ALLOW_NEW_SOURCES ||
+                    type == IGMPV3_BLOCK_OLD_SOURCES) && psf->sf_crcount)
+                       goto decrease_sf_crcount;
+
                /* clear marks on query responses */
                if (isquery)
                        psf->sf_gsresp = 0;
@@ -499,6 +508,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc,
                scount++; stotal++;
                if ((type == IGMPV3_ALLOW_NEW_SOURCES ||
                     type == IGMPV3_BLOCK_OLD_SOURCES) && psf->sf_crcount) {
+decrease_sf_crcount:
                        psf->sf_crcount--;
                        if ((sdeleted || gdeleted) && psf->sf_crcount == 0) {
                                if (psf_prev)
index 1d71c40..ba9cbea 100644 (file)
@@ -85,7 +85,6 @@
 /* Define the timeout for waiting for a DHCP/BOOTP/RARP reply */
 #define CONF_OPEN_RETRIES      2       /* (Re)open devices twice */
 #define CONF_SEND_RETRIES      6       /* Send six requests per open */
-#define CONF_INTER_TIMEOUT     (HZ)    /* Inter-device timeout: 1 second */
 #define CONF_BASE_TIMEOUT      (HZ*2)  /* Initial timeout: 2 seconds */
 #define CONF_TIMEOUT_RANDOM    (HZ)    /* Maximum amount of randomization */
 #define CONF_TIMEOUT_MULT      *7/4    /* Rate of timeout growth */
@@ -188,7 +187,7 @@ struct ic_device {
 };
 
 static struct ic_device *ic_first_dev __initdata;      /* List of open device */
-static struct net_device *ic_dev __initdata;           /* Selected device */
+static struct ic_device *ic_dev __initdata;            /* Selected device */
 
 static bool __init ic_is_init_dev(struct net_device *dev)
 {
@@ -307,7 +306,7 @@ static void __init ic_close_devs(void)
        while ((d = next)) {
                next = d->next;
                dev = d->dev;
-               if (dev != ic_dev && !netdev_uses_dsa(dev)) {
+               if (dev != ic_dev->dev && !netdev_uses_dsa(dev)) {
                        pr_debug("IP-Config: Downing %s\n", dev->name);
                        dev_change_flags(dev, d->flags);
                }
@@ -372,7 +371,7 @@ static int __init ic_setup_if(void)
        int err;
 
        memset(&ir, 0, sizeof(ir));
-       strcpy(ir.ifr_ifrn.ifrn_name, ic_dev->name);
+       strcpy(ir.ifr_ifrn.ifrn_name, ic_dev->dev->name);
        set_sockaddr(sin, ic_myaddr, 0);
        if ((err = ic_devinet_ioctl(SIOCSIFADDR, &ir)) < 0) {
                pr_err("IP-Config: Unable to set interface address (%d)\n",
@@ -396,7 +395,7 @@ static int __init ic_setup_if(void)
         * out, we'll try to muddle along.
         */
        if (ic_dev_mtu != 0) {
-               strcpy(ir.ifr_name, ic_dev->name);
+               strcpy(ir.ifr_name, ic_dev->dev->name);
                ir.ifr_mtu = ic_dev_mtu;
                if ((err = ic_dev_ioctl(SIOCSIFMTU, &ir)) < 0)
                        pr_err("IP-Config: Unable to set interface mtu to %d (%d)\n",
@@ -568,7 +567,7 @@ ic_rarp_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt
                goto drop_unlock;
 
        /* We have a winner! */
-       ic_dev = dev;
+       ic_dev = d;
        if (ic_myaddr == NONE)
                ic_myaddr = tip;
        ic_servaddr = sip;
@@ -655,8 +654,6 @@ static struct packet_type bootp_packet_type __initdata = {
        .func = ic_bootp_recv,
 };
 
-static __be32 ic_dev_xid;              /* Device under configuration */
-
 /*
  *  Initialize DHCP/BOOTP extension fields in the request.
  */
@@ -666,14 +663,14 @@ static const u8 ic_bootp_cookie[4] = { 99, 130, 83, 99 };
 #ifdef IPCONFIG_DHCP
 
 static void __init
-ic_dhcp_init_options(u8 *options)
+ic_dhcp_init_options(u8 *options, struct ic_device *d)
 {
        u8 mt = ((ic_servaddr == NONE)
                 ? DHCPDISCOVER : DHCPREQUEST);
        u8 *e = options;
        int len;
 
-       pr_debug("DHCP: Sending message type %d\n", mt);
+       pr_debug("DHCP: Sending message type %d (%s)\n", mt, d->dev->name);
 
        memcpy(e, ic_bootp_cookie, 4);  /* RFC1048 Magic Cookie */
        e += 4;
@@ -857,7 +854,7 @@ static void __init ic_bootp_send_if(struct ic_device *d, unsigned long jiffies_d
        /* add DHCP options or BOOTP extensions */
 #ifdef IPCONFIG_DHCP
        if (ic_proto_enabled & IC_USE_DHCP)
-               ic_dhcp_init_options(b->exten);
+               ic_dhcp_init_options(b->exten, d);
        else
 #endif
                ic_bootp_init_ext(b->exten);
@@ -1033,14 +1030,8 @@ static int __init ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, str
        /* Is it a reply to our BOOTP request? */
        if (b->op != BOOTP_REPLY ||
            b->xid != d->xid) {
-               net_err_ratelimited("DHCP/BOOTP: Reply not for us, op[%x] xid[%x]\n",
-                                   b->op, b->xid);
-               goto drop_unlock;
-       }
-
-       /* Is it a reply for the device we are configuring? */
-       if (b->xid != ic_dev_xid) {
-               net_err_ratelimited("DHCP/BOOTP: Ignoring delayed packet\n");
+               net_err_ratelimited("DHCP/BOOTP: Reply not for us on %s, op[%x] xid[%x]\n",
+                                   d->dev->name, b->op, b->xid);
                goto drop_unlock;
        }
 
@@ -1075,7 +1066,7 @@ static int __init ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, str
                                }
                        }
 
-                       pr_debug("DHCP: Got message type %d\n", mt);
+                       pr_debug("DHCP: Got message type %d (%s)\n", mt, d->dev->name);
 
                        switch (mt) {
                        case DHCPOFFER:
@@ -1130,7 +1121,7 @@ static int __init ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, str
        }
 
        /* We have a winner! */
-       ic_dev = dev;
+       ic_dev = d;
        ic_myaddr = b->your_ip;
        ic_servaddr = b->server_ip;
        ic_addrservaddr = b->iph.saddr;
@@ -1225,9 +1216,6 @@ static int __init ic_dynamic(void)
        timeout = CONF_BASE_TIMEOUT + (timeout % (unsigned int) CONF_TIMEOUT_RANDOM);
        for (;;) {
 #ifdef IPCONFIG_BOOTP
-               /* Track the device we are configuring */
-               ic_dev_xid = d->xid;
-
                if (do_bootp && (d->able & IC_BOOTP))
                        ic_bootp_send_if(d, jiffies - start_jiffies);
 #endif
@@ -1236,15 +1224,19 @@ static int __init ic_dynamic(void)
                        ic_rarp_send_if(d);
 #endif
 
-               jiff = jiffies + (d->next ? CONF_INTER_TIMEOUT : timeout);
-               while (time_before(jiffies, jiff) && !ic_got_reply)
-                       schedule_timeout_uninterruptible(1);
+               if (!d->next) {
+                       jiff = jiffies + timeout;
+                       while (time_before(jiffies, jiff) && !ic_got_reply)
+                               schedule_timeout_uninterruptible(1);
+               }
 #ifdef IPCONFIG_DHCP
                /* DHCP isn't done until we get a DHCPACK. */
                if ((ic_got_reply & IC_BOOTP) &&
                    (ic_proto_enabled & IC_USE_DHCP) &&
                    ic_dhcp_msgtype != DHCPACK) {
                        ic_got_reply = 0;
+                       /* continue on device that got the reply */
+                       d = ic_dev;
                        pr_cont(",");
                        continue;
                }
@@ -1487,7 +1479,7 @@ static int __init ip_auto_config(void)
 #endif /* IPCONFIG_DYNAMIC */
        } else {
                /* Device selected manually or only one device -> use it */
-               ic_dev = ic_first_dev->dev;
+               ic_dev = ic_first_dev;
        }
 
        addr = root_nfs_parse_addr(root_server_path);
@@ -1500,14 +1492,6 @@ static int __init ip_auto_config(void)
        if (ic_defaults() < 0)
                return -1;
 
-       /*
-        * Close all network devices except the device we've
-        * autoconfigured and set up routes.
-        */
-       ic_close_devs();
-       if (ic_setup_if() < 0 || ic_setup_routes() < 0)
-               return -1;
-
        /*
         * Record which protocol was actually used.
         */
@@ -1522,7 +1506,7 @@ static int __init ip_auto_config(void)
        pr_info("IP-Config: Complete:\n");
 
        pr_info("     device=%s, hwaddr=%*phC, ipaddr=%pI4, mask=%pI4, gw=%pI4\n",
-               ic_dev->name, ic_dev->addr_len, ic_dev->dev_addr,
+               ic_dev->dev->name, ic_dev->dev->addr_len, ic_dev->dev->dev_addr,
                &ic_myaddr, &ic_netmask, &ic_gateway);
        pr_info("     host=%s, domain=%s, nis-domain=%s\n",
                utsname()->nodename, ic_domain, utsname()->domainname);
@@ -1542,7 +1526,18 @@ static int __init ip_auto_config(void)
        pr_cont("\n");
 #endif /* !SILENT */
 
-       return 0;
+       /*
+        * Close all network devices except the device we've
+        * autoconfigured and set up routes.
+        */
+       if (ic_setup_if() < 0 || ic_setup_routes() < 0)
+               err = -1;
+       else
+               err = 0;
+
+       ic_close_devs();
+
+       return err;
 }
 
 late_initcall(ip_auto_config);
index ec9efbc..aba0998 100644 (file)
@@ -172,6 +172,5 @@ static void __exit ila_fini(void)
 
 module_init(ila_init);
 module_exit(ila_fini);
-MODULE_ALIAS_RTNL_LWT(ILA);
 MODULE_AUTHOR("Tom Herbert <tom@herbertland.com>");
 MODULE_LICENSE("GPL");
index 704274c..397e1ed 100644 (file)
@@ -61,12 +61,12 @@ static bool log_ecn_error = true;
 module_param(log_ecn_error, bool, 0644);
 MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
 
-#define HASH_SIZE_SHIFT  5
-#define HASH_SIZE (1 << HASH_SIZE_SHIFT)
+#define IP6_GRE_HASH_SIZE_SHIFT  5
+#define IP6_GRE_HASH_SIZE (1 << IP6_GRE_HASH_SIZE_SHIFT)
 
 static int ip6gre_net_id __read_mostly;
 struct ip6gre_net {
-       struct ip6_tnl __rcu *tunnels[4][HASH_SIZE];
+       struct ip6_tnl __rcu *tunnels[4][IP6_GRE_HASH_SIZE];
 
        struct net_device *fb_tunnel_dev;
 };
@@ -96,12 +96,12 @@ static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu);
    will match fallback tunnel.
  */
 
-#define HASH_KEY(key) (((__force u32)key^((__force u32)key>>4))&(HASH_SIZE - 1))
+#define HASH_KEY(key) (((__force u32)key^((__force u32)key>>4))&(IP6_GRE_HASH_SIZE - 1))
 static u32 HASH_ADDR(const struct in6_addr *addr)
 {
        u32 hash = ipv6_addr_hash(addr);
 
-       return hash_32(hash, HASH_SIZE_SHIFT);
+       return hash_32(hash, IP6_GRE_HASH_SIZE_SHIFT);
 }
 
 #define tunnels_r_l    tunnels[3]
@@ -1087,7 +1087,7 @@ static void ip6gre_destroy_tunnels(struct net *net, struct list_head *head)
 
        for (prio = 0; prio < 4; prio++) {
                int h;
-               for (h = 0; h < HASH_SIZE; h++) {
+               for (h = 0; h < IP6_GRE_HASH_SIZE; h++) {
                        struct ip6_tnl *t;
 
                        t = rtnl_dereference(ign->tunnels[prio][h]);
index 7b0481e..2050217 100644 (file)
@@ -64,8 +64,8 @@ MODULE_LICENSE("GPL");
 MODULE_ALIAS_RTNL_LINK("ip6tnl");
 MODULE_ALIAS_NETDEV("ip6tnl0");
 
-#define HASH_SIZE_SHIFT  5
-#define HASH_SIZE (1 << HASH_SIZE_SHIFT)
+#define IP6_TUNNEL_HASH_SIZE_SHIFT  5
+#define IP6_TUNNEL_HASH_SIZE (1 << IP6_TUNNEL_HASH_SIZE_SHIFT)
 
 static bool log_ecn_error = true;
 module_param(log_ecn_error, bool, 0644);
@@ -75,7 +75,7 @@ static u32 HASH(const struct in6_addr *addr1, const struct in6_addr *addr2)
 {
        u32 hash = ipv6_addr_hash(addr1) ^ ipv6_addr_hash(addr2);
 
-       return hash_32(hash, HASH_SIZE_SHIFT);
+       return hash_32(hash, IP6_TUNNEL_HASH_SIZE_SHIFT);
 }
 
 static int ip6_tnl_dev_init(struct net_device *dev);
@@ -87,7 +87,7 @@ struct ip6_tnl_net {
        /* the IPv6 tunnel fallback device */
        struct net_device *fb_tnl_dev;
        /* lists for storing tunnels in use */
-       struct ip6_tnl __rcu *tnls_r_l[HASH_SIZE];
+       struct ip6_tnl __rcu *tnls_r_l[IP6_TUNNEL_HASH_SIZE];
        struct ip6_tnl __rcu *tnls_wc[1];
        struct ip6_tnl __rcu **tnls[2];
 };
@@ -2031,7 +2031,7 @@ static void __net_exit ip6_tnl_destroy_tunnels(struct net *net)
                if (dev->rtnl_link_ops == &ip6_link_ops)
                        unregister_netdevice_queue(dev, &list);
 
-       for (h = 0; h < HASH_SIZE; h++) {
+       for (h = 0; h < IP6_TUNNEL_HASH_SIZE; h++) {
                t = rtnl_dereference(ip6n->tnls_r_l[h]);
                while (t) {
                        /* If dev is in the same netns, it has already
index d90a11f..cc7e058 100644 (file)
 #include <net/net_namespace.h>
 #include <net/netns/generic.h>
 
-#define HASH_SIZE_SHIFT  5
-#define HASH_SIZE (1 << HASH_SIZE_SHIFT)
+#define IP6_VTI_HASH_SIZE_SHIFT  5
+#define IP6_VTI_HASH_SIZE (1 << IP6_VTI_HASH_SIZE_SHIFT)
 
 static u32 HASH(const struct in6_addr *addr1, const struct in6_addr *addr2)
 {
        u32 hash = ipv6_addr_hash(addr1) ^ ipv6_addr_hash(addr2);
 
-       return hash_32(hash, HASH_SIZE_SHIFT);
+       return hash_32(hash, IP6_VTI_HASH_SIZE_SHIFT);
 }
 
 static int vti6_dev_init(struct net_device *dev);
@@ -69,7 +69,7 @@ struct vti6_net {
        /* the vti6 tunnel fallback device */
        struct net_device *fb_tnl_dev;
        /* lists for storing tunnels in use */
-       struct ip6_tnl __rcu *tnls_r_l[HASH_SIZE];
+       struct ip6_tnl __rcu *tnls_r_l[IP6_VTI_HASH_SIZE];
        struct ip6_tnl __rcu *tnls_wc[1];
        struct ip6_tnl __rcu **tnls[2];
 };
@@ -1040,7 +1040,7 @@ static void __net_exit vti6_destroy_tunnels(struct vti6_net *ip6n)
        struct ip6_tnl *t;
        LIST_HEAD(list);
 
-       for (h = 0; h < HASH_SIZE; h++) {
+       for (h = 0; h < IP6_VTI_HASH_SIZE; h++) {
                t = rtnl_dereference(ip6n->tnls_r_l[h]);
                while (t) {
                        unregister_netdevice_queue(t->dev, &list);
index d64ee7e..75c1fc5 100644 (file)
@@ -1739,6 +1739,15 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc,
                        continue;
                }
 
+               /* Based on RFC3810 6.1. Should not send source-list change
+                * records when there is a filter mode change.
+                */
+               if (((gdeleted && pmc->mca_sfmode == MCAST_EXCLUDE) ||
+                    (!gdeleted && pmc->mca_crcount)) &&
+                   (type == MLD2_ALLOW_NEW_SOURCES ||
+                    type == MLD2_BLOCK_OLD_SOURCES) && psf->sf_crcount)
+                       goto decrease_sf_crcount;
+
                /* clear marks on query responses */
                if (isquery)
                        psf->sf_gsresp = 0;
@@ -1766,6 +1775,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc,
                scount++; stotal++;
                if ((type == MLD2_ALLOW_NEW_SOURCES ||
                     type == MLD2_BLOCK_OLD_SOURCES) && psf->sf_crcount) {
+decrease_sf_crcount:
                        psf->sf_crcount--;
                        if ((sdeleted || gdeleted) && psf->sf_crcount == 0) {
                                if (psf_prev)
index 182b6a9..b1cdf80 100644 (file)
@@ -62,7 +62,7 @@
    For comments look at net/ipv4/ip_gre.c --ANK
  */
 
-#define HASH_SIZE  16
+#define IP6_SIT_HASH_SIZE  16
 #define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
 
 static bool log_ecn_error = true;
@@ -78,9 +78,9 @@ static struct rtnl_link_ops sit_link_ops __read_mostly;
 
 static int sit_net_id __read_mostly;
 struct sit_net {
-       struct ip_tunnel __rcu *tunnels_r_l[HASH_SIZE];
-       struct ip_tunnel __rcu *tunnels_r[HASH_SIZE];
-       struct ip_tunnel __rcu *tunnels_l[HASH_SIZE];
+       struct ip_tunnel __rcu *tunnels_r_l[IP6_SIT_HASH_SIZE];
+       struct ip_tunnel __rcu *tunnels_r[IP6_SIT_HASH_SIZE];
+       struct ip_tunnel __rcu *tunnels_l[IP6_SIT_HASH_SIZE];
        struct ip_tunnel __rcu *tunnels_wc[1];
        struct ip_tunnel __rcu **tunnels[4];
 
@@ -1126,7 +1126,7 @@ static int ipip6_tunnel_update_6rd(struct ip_tunnel *t,
 }
 #endif
 
-bool ipip6_valid_ip_proto(u8 ipproto)
+static bool ipip6_valid_ip_proto(u8 ipproto)
 {
        return ipproto == IPPROTO_IPV6 ||
                ipproto == IPPROTO_IPIP ||
@@ -1783,7 +1783,7 @@ static void __net_exit sit_destroy_tunnels(struct net *net,
 
        for (prio = 1; prio < 4; prio++) {
                int h;
-               for (h = 0; h < HASH_SIZE; h++) {
+               for (h = 0; h < IP6_SIT_HASH_SIZE; h++) {
                        struct ip_tunnel *t;
 
                        t = rtnl_dereference(sitn->tunnels[prio][h]);
index 5db94d9..87fca36 100644 (file)
@@ -3,6 +3,7 @@ config AF_KCM
        tristate "KCM sockets"
        depends on INET
        select BPF_SYSCALL
+       select STREAM_PARSER
        ---help---
          KCM (Kernel Connection Multiplexor) sockets provide a method
          for multiplexing messages of a message based application
index 16c2e03..47e4453 100644 (file)
@@ -155,8 +155,8 @@ static void kcm_format_psock(struct kcm_psock *psock, struct seq_file *seq,
        seq_printf(seq,
                   "   psock-%-5u %-10llu %-16llu %-10llu %-16llu %-8d %-8d %-8d %-8d ",
                   psock->index,
-                  psock->stats.rx_msgs,
-                  psock->stats.rx_bytes,
+                  psock->strp.stats.rx_msgs,
+                  psock->strp.stats.rx_bytes,
                   psock->stats.tx_msgs,
                   psock->stats.tx_bytes,
                   psock->sk->sk_receive_queue.qlen,
@@ -170,9 +170,12 @@ static void kcm_format_psock(struct kcm_psock *psock, struct seq_file *seq,
        if (psock->tx_stopped)
                seq_puts(seq, "TxStop ");
 
-       if (psock->rx_stopped)
+       if (psock->strp.rx_stopped)
                seq_puts(seq, "RxStop ");
 
+       if (psock->strp.rx_paused)
+               seq_puts(seq, "RxPause ");
+
        if (psock->tx_kcm)
                seq_printf(seq, "Rsvd-%d ", psock->tx_kcm->index);
 
@@ -275,6 +278,7 @@ static int kcm_stats_seq_show(struct seq_file *seq, void *v)
 {
        struct kcm_psock_stats psock_stats;
        struct kcm_mux_stats mux_stats;
+       struct strp_aggr_stats strp_stats;
        struct kcm_mux *mux;
        struct kcm_psock *psock;
        struct net *net = seq->private;
@@ -282,20 +286,28 @@ static int kcm_stats_seq_show(struct seq_file *seq, void *v)
 
        memset(&mux_stats, 0, sizeof(mux_stats));
        memset(&psock_stats, 0, sizeof(psock_stats));
+       memset(&strp_stats, 0, sizeof(strp_stats));
 
        mutex_lock(&knet->mutex);
 
        aggregate_mux_stats(&knet->aggregate_mux_stats, &mux_stats);
        aggregate_psock_stats(&knet->aggregate_psock_stats,
                              &psock_stats);
+       aggregate_strp_stats(&knet->aggregate_strp_stats,
+                            &strp_stats);
 
        list_for_each_entry_rcu(mux, &knet->mux_list, kcm_mux_list) {
                spin_lock_bh(&mux->lock);
                aggregate_mux_stats(&mux->stats, &mux_stats);
                aggregate_psock_stats(&mux->aggregate_psock_stats,
                                      &psock_stats);
-               list_for_each_entry(psock, &mux->psocks, psock_list)
+               aggregate_strp_stats(&mux->aggregate_strp_stats,
+                                    &strp_stats);
+               list_for_each_entry(psock, &mux->psocks, psock_list) {
                        aggregate_psock_stats(&psock->stats, &psock_stats);
+                       save_strp_stats(&psock->strp, &strp_stats);
+               }
+
                spin_unlock_bh(&mux->lock);
        }
 
@@ -328,7 +340,7 @@ static int kcm_stats_seq_show(struct seq_file *seq, void *v)
                   mux_stats.rx_ready_drops);
 
        seq_printf(seq,
-                  "%-8s %-10s %-16s %-10s %-16s %-10s %-10s %-10s %-10s %-10s %-10s %-10s %-10s %-10s\n",
+                  "%-8s %-10s %-16s %-10s %-16s %-10s %-10s %-10s %-10s %-10s %-10s %-10s %-10s %-10s %-10s %-10s\n",
                   "Psock",
                   "RX-Msgs",
                   "RX-Bytes",
@@ -337,6 +349,8 @@ static int kcm_stats_seq_show(struct seq_file *seq, void *v)
                   "Reserved",
                   "Unreserved",
                   "RX-Aborts",
+                  "RX-Intr",
+                  "RX-Unrecov",
                   "RX-MemFail",
                   "RX-NeedMor",
                   "RX-BadLen",
@@ -345,20 +359,22 @@ static int kcm_stats_seq_show(struct seq_file *seq, void *v)
                   "TX-Aborts");
 
        seq_printf(seq,
-                  "%-8s %-10llu %-16llu %-10llu %-16llu %-10llu %-10llu %-10u %-10u %-10u %-10u %-10u %-10u %-10u\n",
+                  "%-8s %-10llu %-16llu %-10llu %-16llu %-10llu %-10llu %-10u %-10u %-10u %-10u %-10u %-10u %-10u %-10u %-10u\n",
                   "",
-                  psock_stats.rx_msgs,
-                  psock_stats.rx_bytes,
+                  strp_stats.rx_msgs,
+                  strp_stats.rx_bytes,
                   psock_stats.tx_msgs,
                   psock_stats.tx_bytes,
                   psock_stats.reserved,
                   psock_stats.unreserved,
-                  psock_stats.rx_aborts,
-                  psock_stats.rx_mem_fail,
-                  psock_stats.rx_need_more_hdr,
-                  psock_stats.rx_bad_hdr_len,
-                  psock_stats.rx_msg_too_big,
-                  psock_stats.rx_msg_timeouts,
+                  strp_stats.rx_aborts,
+                  strp_stats.rx_interrupted,
+                  strp_stats.rx_unrecov_intr,
+                  strp_stats.rx_mem_fail,
+                  strp_stats.rx_need_more_hdr,
+                  strp_stats.rx_bad_hdr_len,
+                  strp_stats.rx_msg_too_big,
+                  strp_stats.rx_msg_timeouts,
                   psock_stats.tx_aborts);
 
        return 0;
index cb39e05..eedbe40 100644 (file)
@@ -1,3 +1,13 @@
+/*
+ * Kernel Connection Multiplexor
+ *
+ * Copyright (c) 2016 Tom Herbert <tom@herbertland.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation.
+ */
+
 #include <linux/bpf.h>
 #include <linux/errno.h>
 #include <linux/errqueue.h>
@@ -35,38 +45,12 @@ static inline struct kcm_tx_msg *kcm_tx_msg(struct sk_buff *skb)
        return (struct kcm_tx_msg *)skb->cb;
 }
 
-static inline struct kcm_rx_msg *kcm_rx_msg(struct sk_buff *skb)
-{
-       return (struct kcm_rx_msg *)((void *)skb->cb +
-                                    offsetof(struct qdisc_skb_cb, data));
-}
-
 static void report_csk_error(struct sock *csk, int err)
 {
        csk->sk_err = EPIPE;
        csk->sk_error_report(csk);
 }
 
-/* Callback lock held */
-static void kcm_abort_rx_psock(struct kcm_psock *psock, int err,
-                              struct sk_buff *skb)
-{
-       struct sock *csk = psock->sk;
-
-       /* Unrecoverable error in receive */
-
-       del_timer(&psock->rx_msg_timer);
-
-       if (psock->rx_stopped)
-               return;
-
-       psock->rx_stopped = 1;
-       KCM_STATS_INCR(psock->stats.rx_aborts);
-
-       /* Report an error on the lower socket */
-       report_csk_error(csk, err);
-}
-
 static void kcm_abort_tx_psock(struct kcm_psock *psock, int err,
                               bool wakeup_kcm)
 {
@@ -109,12 +93,13 @@ static void kcm_abort_tx_psock(struct kcm_psock *psock, int err,
 static void kcm_update_rx_mux_stats(struct kcm_mux *mux,
                                    struct kcm_psock *psock)
 {
-       KCM_STATS_ADD(mux->stats.rx_bytes,
-                     psock->stats.rx_bytes - psock->saved_rx_bytes);
+       STRP_STATS_ADD(mux->stats.rx_bytes,
+                      psock->strp.stats.rx_bytes -
+                      psock->saved_rx_bytes);
        mux->stats.rx_msgs +=
-               psock->stats.rx_msgs - psock->saved_rx_msgs;
-       psock->saved_rx_msgs = psock->stats.rx_msgs;
-       psock->saved_rx_bytes = psock->stats.rx_bytes;
+               psock->strp.stats.rx_msgs - psock->saved_rx_msgs;
+       psock->saved_rx_msgs = psock->strp.stats.rx_msgs;
+       psock->saved_rx_bytes = psock->strp.stats.rx_bytes;
 }
 
 static void kcm_update_tx_mux_stats(struct kcm_mux *mux,
@@ -167,11 +152,11 @@ static void kcm_rcv_ready(struct kcm_sock *kcm)
                 */
                list_del(&psock->psock_ready_list);
                psock->ready_rx_msg = NULL;
-
                /* Commit clearing of ready_rx_msg for queuing work */
                smp_mb();
 
-               queue_work(kcm_wq, &psock->rx_work);
+               strp_unpause(&psock->strp);
+               strp_check_rcv(&psock->strp);
        }
 
        /* Buffer limit is okay now, add to ready list */
@@ -285,6 +270,7 @@ static struct kcm_sock *reserve_rx_kcm(struct kcm_psock *psock,
 
        if (list_empty(&mux->kcm_rx_waiters)) {
                psock->ready_rx_msg = head;
+               strp_pause(&psock->strp);
                list_add_tail(&psock->psock_ready_list,
                              &mux->psocks_ready);
                spin_unlock_bh(&mux->rx_lock);
@@ -353,276 +339,6 @@ static void unreserve_rx_kcm(struct kcm_psock *psock,
        spin_unlock_bh(&mux->rx_lock);
 }
 
-static void kcm_start_rx_timer(struct kcm_psock *psock)
-{
-       if (psock->sk->sk_rcvtimeo)
-               mod_timer(&psock->rx_msg_timer, psock->sk->sk_rcvtimeo);
-}
-
-/* Macro to invoke filter function. */
-#define KCM_RUN_FILTER(prog, ctx) \
-       (*prog->bpf_func)(ctx, prog->insnsi)
-
-/* Lower socket lock held */
-static int kcm_tcp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb,
-                       unsigned int orig_offset, size_t orig_len)
-{
-       struct kcm_psock *psock = (struct kcm_psock *)desc->arg.data;
-       struct kcm_rx_msg *rxm;
-       struct kcm_sock *kcm;
-       struct sk_buff *head, *skb;
-       size_t eaten = 0, cand_len;
-       ssize_t extra;
-       int err;
-       bool cloned_orig = false;
-
-       if (psock->ready_rx_msg)
-               return 0;
-
-       head = psock->rx_skb_head;
-       if (head) {
-               /* Message already in progress */
-
-               rxm = kcm_rx_msg(head);
-               if (unlikely(rxm->early_eaten)) {
-                       /* Already some number of bytes on the receive sock
-                        * data saved in rx_skb_head, just indicate they
-                        * are consumed.
-                        */
-                       eaten = orig_len <= rxm->early_eaten ?
-                               orig_len : rxm->early_eaten;
-                       rxm->early_eaten -= eaten;
-
-                       return eaten;
-               }
-
-               if (unlikely(orig_offset)) {
-                       /* Getting data with a non-zero offset when a message is
-                        * in progress is not expected. If it does happen, we
-                        * need to clone and pull since we can't deal with
-                        * offsets in the skbs for a message expect in the head.
-                        */
-                       orig_skb = skb_clone(orig_skb, GFP_ATOMIC);
-                       if (!orig_skb) {
-                               KCM_STATS_INCR(psock->stats.rx_mem_fail);
-                               desc->error = -ENOMEM;
-                               return 0;
-                       }
-                       if (!pskb_pull(orig_skb, orig_offset)) {
-                               KCM_STATS_INCR(psock->stats.rx_mem_fail);
-                               kfree_skb(orig_skb);
-                               desc->error = -ENOMEM;
-                               return 0;
-                       }
-                       cloned_orig = true;
-                       orig_offset = 0;
-               }
-
-               if (!psock->rx_skb_nextp) {
-                       /* We are going to append to the frags_list of head.
-                        * Need to unshare the frag_list.
-                        */
-                       err = skb_unclone(head, GFP_ATOMIC);
-                       if (err) {
-                               KCM_STATS_INCR(psock->stats.rx_mem_fail);
-                               desc->error = err;
-                               return 0;
-                       }
-
-                       if (unlikely(skb_shinfo(head)->frag_list)) {
-                               /* We can't append to an sk_buff that already
-                                * has a frag_list. We create a new head, point
-                                * the frag_list of that to the old head, and
-                                * then are able to use the old head->next for
-                                * appending to the message.
-                                */
-                               if (WARN_ON(head->next)) {
-                                       desc->error = -EINVAL;
-                                       return 0;
-                               }
-
-                               skb = alloc_skb(0, GFP_ATOMIC);
-                               if (!skb) {
-                                       KCM_STATS_INCR(psock->stats.rx_mem_fail);
-                                       desc->error = -ENOMEM;
-                                       return 0;
-                               }
-                               skb->len = head->len;
-                               skb->data_len = head->len;
-                               skb->truesize = head->truesize;
-                               *kcm_rx_msg(skb) = *kcm_rx_msg(head);
-                               psock->rx_skb_nextp = &head->next;
-                               skb_shinfo(skb)->frag_list = head;
-                               psock->rx_skb_head = skb;
-                               head = skb;
-                       } else {
-                               psock->rx_skb_nextp =
-                                   &skb_shinfo(head)->frag_list;
-                       }
-               }
-       }
-
-       while (eaten < orig_len) {
-               /* Always clone since we will consume something */
-               skb = skb_clone(orig_skb, GFP_ATOMIC);
-               if (!skb) {
-                       KCM_STATS_INCR(psock->stats.rx_mem_fail);
-                       desc->error = -ENOMEM;
-                       break;
-               }
-
-               cand_len = orig_len - eaten;
-
-               head = psock->rx_skb_head;
-               if (!head) {
-                       head = skb;
-                       psock->rx_skb_head = head;
-                       /* Will set rx_skb_nextp on next packet if needed */
-                       psock->rx_skb_nextp = NULL;
-                       rxm = kcm_rx_msg(head);
-                       memset(rxm, 0, sizeof(*rxm));
-                       rxm->offset = orig_offset + eaten;
-               } else {
-                       /* Unclone since we may be appending to an skb that we
-                        * already share a frag_list with.
-                        */
-                       err = skb_unclone(skb, GFP_ATOMIC);
-                       if (err) {
-                               KCM_STATS_INCR(psock->stats.rx_mem_fail);
-                               desc->error = err;
-                               break;
-                       }
-
-                       rxm = kcm_rx_msg(head);
-                       *psock->rx_skb_nextp = skb;
-                       psock->rx_skb_nextp = &skb->next;
-                       head->data_len += skb->len;
-                       head->len += skb->len;
-                       head->truesize += skb->truesize;
-               }
-
-               if (!rxm->full_len) {
-                       ssize_t len;
-
-                       len = KCM_RUN_FILTER(psock->bpf_prog, head);
-
-                       if (!len) {
-                               /* Need more header to determine length */
-                               if (!rxm->accum_len) {
-                                       /* Start RX timer for new message */
-                                       kcm_start_rx_timer(psock);
-                               }
-                               rxm->accum_len += cand_len;
-                               eaten += cand_len;
-                               KCM_STATS_INCR(psock->stats.rx_need_more_hdr);
-                               WARN_ON(eaten != orig_len);
-                               break;
-                       } else if (len > psock->sk->sk_rcvbuf) {
-                               /* Message length exceeds maximum allowed */
-                               KCM_STATS_INCR(psock->stats.rx_msg_too_big);
-                               desc->error = -EMSGSIZE;
-                               psock->rx_skb_head = NULL;
-                               kcm_abort_rx_psock(psock, EMSGSIZE, head);
-                               break;
-                       } else if (len <= (ssize_t)head->len -
-                                         skb->len - rxm->offset) {
-                               /* Length must be into new skb (and also
-                                * greater than zero)
-                                */
-                               KCM_STATS_INCR(psock->stats.rx_bad_hdr_len);
-                               desc->error = -EPROTO;
-                               psock->rx_skb_head = NULL;
-                               kcm_abort_rx_psock(psock, EPROTO, head);
-                               break;
-                       }
-
-                       rxm->full_len = len;
-               }
-
-               extra = (ssize_t)(rxm->accum_len + cand_len) - rxm->full_len;
-
-               if (extra < 0) {
-                       /* Message not complete yet. */
-                       if (rxm->full_len - rxm->accum_len >
-                           tcp_inq(psock->sk)) {
-                               /* Don't have the whole messages in the socket
-                                * buffer. Set psock->rx_need_bytes to wait for
-                                * the rest of the message. Also, set "early
-                                * eaten" since we've already buffered the skb
-                                * but don't consume yet per tcp_read_sock.
-                                */
-
-                               if (!rxm->accum_len) {
-                                       /* Start RX timer for new message */
-                                       kcm_start_rx_timer(psock);
-                               }
-
-                               psock->rx_need_bytes = rxm->full_len -
-                                                      rxm->accum_len;
-                               rxm->accum_len += cand_len;
-                               rxm->early_eaten = cand_len;
-                               KCM_STATS_ADD(psock->stats.rx_bytes, cand_len);
-                               desc->count = 0; /* Stop reading socket */
-                               break;
-                       }
-                       rxm->accum_len += cand_len;
-                       eaten += cand_len;
-                       WARN_ON(eaten != orig_len);
-                       break;
-               }
-
-               /* Positive extra indicates ore bytes than needed for the
-                * message
-                */
-
-               WARN_ON(extra > cand_len);
-
-               eaten += (cand_len - extra);
-
-               /* Hurray, we have a new message! */
-               del_timer(&psock->rx_msg_timer);
-               psock->rx_skb_head = NULL;
-               KCM_STATS_INCR(psock->stats.rx_msgs);
-
-try_queue:
-               kcm = reserve_rx_kcm(psock, head);
-               if (!kcm) {
-                       /* Unable to reserve a KCM, message is held in psock. */
-                       break;
-               }
-
-               if (kcm_queue_rcv_skb(&kcm->sk, head)) {
-                       /* Should mean socket buffer full */
-                       unreserve_rx_kcm(psock, false);
-                       goto try_queue;
-               }
-       }
-
-       if (cloned_orig)
-               kfree_skb(orig_skb);
-
-       KCM_STATS_ADD(psock->stats.rx_bytes, eaten);
-
-       return eaten;
-}
-
-/* Called with lock held on lower socket */
-static int psock_tcp_read_sock(struct kcm_psock *psock)
-{
-       read_descriptor_t desc;
-
-       desc.arg.data = psock;
-       desc.error = 0;
-       desc.count = 1; /* give more than one skb per call */
-
-       /* sk should be locked here, so okay to do tcp_read_sock */
-       tcp_read_sock(psock->sk, &desc, kcm_tcp_recv);
-
-       unreserve_rx_kcm(psock, true);
-
-       return desc.error;
-}
-
 /* Lower sock lock held */
 static void psock_tcp_data_ready(struct sock *sk)
 {
@@ -631,65 +347,49 @@ static void psock_tcp_data_ready(struct sock *sk)
        read_lock_bh(&sk->sk_callback_lock);
 
        psock = (struct kcm_psock *)sk->sk_user_data;
-       if (unlikely(!psock || psock->rx_stopped))
-               goto out;
-
-       if (psock->ready_rx_msg)
-               goto out;
-
-       if (psock->rx_need_bytes) {
-               if (tcp_inq(sk) >= psock->rx_need_bytes)
-                       psock->rx_need_bytes = 0;
-               else
-                       goto out;
-       }
-
-       if (psock_tcp_read_sock(psock) == -ENOMEM)
-               queue_delayed_work(kcm_wq, &psock->rx_delayed_work, 0);
+       if (likely(psock))
+               strp_tcp_data_ready(&psock->strp);
 
-out:
        read_unlock_bh(&sk->sk_callback_lock);
 }
 
-static void do_psock_rx_work(struct kcm_psock *psock)
+/* Called with lower sock held */
+static void kcm_rcv_strparser(struct strparser *strp, struct sk_buff *skb)
 {
-       read_descriptor_t rd_desc;
-       struct sock *csk = psock->sk;
-
-       /* We need the read lock to synchronize with psock_tcp_data_ready. We
-        * need the socket lock for calling tcp_read_sock.
-        */
-       lock_sock(csk);
-       read_lock_bh(&csk->sk_callback_lock);
-
-       if (unlikely(csk->sk_user_data != psock))
-               goto out;
-
-       if (unlikely(psock->rx_stopped))
-               goto out;
-
-       if (psock->ready_rx_msg)
-               goto out;
-
-       rd_desc.arg.data = psock;
+       struct kcm_psock *psock = container_of(strp, struct kcm_psock, strp);
+       struct kcm_sock *kcm;
 
-       if (psock_tcp_read_sock(psock) == -ENOMEM)
-               queue_delayed_work(kcm_wq, &psock->rx_delayed_work, 0);
+try_queue:
+       kcm = reserve_rx_kcm(psock, skb);
+       if (!kcm) {
+                /* Unable to reserve a KCM, message is held in psock and strp
+                 * is paused.
+                 */
+               return;
+       }
 
-out:
-       read_unlock_bh(&csk->sk_callback_lock);
-       release_sock(csk);
+       if (kcm_queue_rcv_skb(&kcm->sk, skb)) {
+               /* Should mean socket buffer full */
+               unreserve_rx_kcm(psock, false);
+               goto try_queue;
+       }
 }
 
-static void psock_rx_work(struct work_struct *w)
+static int kcm_parse_func_strparser(struct strparser *strp, struct sk_buff *skb)
 {
-       do_psock_rx_work(container_of(w, struct kcm_psock, rx_work));
+       struct kcm_psock *psock = container_of(strp, struct kcm_psock, strp);
+       struct bpf_prog *prog = psock->bpf_prog;
+
+       return (*prog->bpf_func)(skb, prog->insnsi);
 }
 
-static void psock_rx_delayed_work(struct work_struct *w)
+static int kcm_read_sock_done(struct strparser *strp, int err)
 {
-       do_psock_rx_work(container_of(w, struct kcm_psock,
-                                     rx_delayed_work.work));
+       struct kcm_psock *psock = container_of(strp, struct kcm_psock, strp);
+
+       unreserve_rx_kcm(psock, true);
+
+       return err;
 }
 
 static void psock_tcp_state_change(struct sock *sk)
@@ -713,14 +413,13 @@ static void psock_tcp_write_space(struct sock *sk)
        psock = (struct kcm_psock *)sk->sk_user_data;
        if (unlikely(!psock))
                goto out;
-
        mux = psock->mux;
 
        spin_lock_bh(&mux->lock);
 
        /* Check if the socket is reserved so someone is waiting for sending. */
        kcm = psock->tx_kcm;
-       if (kcm)
+       if (kcm && !unlikely(kcm->tx_stopped))
                queue_work(kcm_wq, &kcm->tx_work);
 
        spin_unlock_bh(&mux->lock);
@@ -1411,7 +1110,7 @@ static int kcm_recvmsg(struct socket *sock, struct msghdr *msg,
        struct kcm_sock *kcm = kcm_sk(sk);
        int err = 0;
        long timeo;
-       struct kcm_rx_msg *rxm;
+       struct strp_rx_msg *rxm;
        int copied = 0;
        struct sk_buff *skb;
 
@@ -1425,7 +1124,7 @@ static int kcm_recvmsg(struct socket *sock, struct msghdr *msg,
 
        /* Okay, have a message on the receive queue */
 
-       rxm = kcm_rx_msg(skb);
+       rxm = strp_rx_msg(skb);
 
        if (len > rxm->full_len)
                len = rxm->full_len;
@@ -1481,7 +1180,7 @@ static ssize_t kcm_splice_read(struct socket *sock, loff_t *ppos,
        struct sock *sk = sock->sk;
        struct kcm_sock *kcm = kcm_sk(sk);
        long timeo;
-       struct kcm_rx_msg *rxm;
+       struct strp_rx_msg *rxm;
        int err = 0;
        ssize_t copied;
        struct sk_buff *skb;
@@ -1498,7 +1197,7 @@ static ssize_t kcm_splice_read(struct socket *sock, loff_t *ppos,
 
        /* Okay, have a message on the receive queue */
 
-       rxm = kcm_rx_msg(skb);
+       rxm = strp_rx_msg(skb);
 
        if (len > rxm->full_len)
                len = rxm->full_len;
@@ -1674,15 +1373,6 @@ static void init_kcm_sock(struct kcm_sock *kcm, struct kcm_mux *mux)
        spin_unlock_bh(&mux->rx_lock);
 }
 
-static void kcm_rx_msg_timeout(unsigned long arg)
-{
-       struct kcm_psock *psock = (struct kcm_psock *)arg;
-
-       /* Message assembly timed out */
-       KCM_STATS_INCR(psock->stats.rx_msg_timeouts);
-       kcm_abort_rx_psock(psock, ETIMEDOUT, NULL);
-}
-
 static int kcm_attach(struct socket *sock, struct socket *csock,
                      struct bpf_prog *prog)
 {
@@ -1692,6 +1382,7 @@ static int kcm_attach(struct socket *sock, struct socket *csock,
        struct kcm_psock *psock = NULL, *tpsock;
        struct list_head *head;
        int index = 0;
+       struct strp_callbacks cb;
 
        if (csock->ops->family != PF_INET &&
            csock->ops->family != PF_INET6)
@@ -1713,11 +1404,12 @@ static int kcm_attach(struct socket *sock, struct socket *csock,
        psock->sk = csk;
        psock->bpf_prog = prog;
 
-       setup_timer(&psock->rx_msg_timer, kcm_rx_msg_timeout,
-                   (unsigned long)psock);
+       cb.rcv_msg = kcm_rcv_strparser;
+       cb.abort_parser = NULL;
+       cb.parse_msg = kcm_parse_func_strparser;
+       cb.read_sock_done = kcm_read_sock_done;
 
-       INIT_WORK(&psock->rx_work, psock_rx_work);
-       INIT_DELAYED_WORK(&psock->rx_delayed_work, psock_rx_delayed_work);
+       strp_init(&psock->strp, csk, &cb);
 
        sock_hold(csk);
 
@@ -1750,7 +1442,7 @@ static int kcm_attach(struct socket *sock, struct socket *csock,
        spin_unlock_bh(&mux->lock);
 
        /* Schedule RX work in case there are already bytes queued */
-       queue_work(kcm_wq, &psock->rx_work);
+       strp_check_rcv(&psock->strp);
 
        return 0;
 }
@@ -1785,6 +1477,7 @@ out:
        return err;
 }
 
+/* Lower socket lock held */
 static void kcm_unattach(struct kcm_psock *psock)
 {
        struct sock *csk = psock->sk;
@@ -1798,7 +1491,7 @@ static void kcm_unattach(struct kcm_psock *psock)
        csk->sk_data_ready = psock->save_data_ready;
        csk->sk_write_space = psock->save_write_space;
        csk->sk_state_change = psock->save_state_change;
-       psock->rx_stopped = 1;
+       strp_stop(&psock->strp);
 
        if (WARN_ON(psock->rx_kcm)) {
                write_unlock_bh(&csk->sk_callback_lock);
@@ -1821,18 +1514,14 @@ static void kcm_unattach(struct kcm_psock *psock)
 
        write_unlock_bh(&csk->sk_callback_lock);
 
-       del_timer_sync(&psock->rx_msg_timer);
-       cancel_work_sync(&psock->rx_work);
-       cancel_delayed_work_sync(&psock->rx_delayed_work);
+       strp_done(&psock->strp);
 
        bpf_prog_put(psock->bpf_prog);
 
-       kfree_skb(psock->rx_skb_head);
-       psock->rx_skb_head = NULL;
-
        spin_lock_bh(&mux->lock);
 
        aggregate_psock_stats(&psock->stats, &mux->aggregate_psock_stats);
+       save_strp_stats(&psock->strp, &mux->aggregate_strp_stats);
 
        KCM_STATS_INCR(mux->stats.psock_unattach);
 
@@ -1915,6 +1604,7 @@ static int kcm_unattach_ioctl(struct socket *sock, struct kcm_unattach *info)
 
                spin_unlock_bh(&mux->lock);
 
+               /* Lower socket lock should already be held */
                kcm_unattach(psock);
 
                err = 0;
@@ -2059,8 +1749,11 @@ static void release_mux(struct kcm_mux *mux)
        /* Release psocks */
        list_for_each_entry_safe(psock, tmp_psock,
                                 &mux->psocks, psock_list) {
-               if (!WARN_ON(psock->unattaching))
+               if (!WARN_ON(psock->unattaching)) {
+                       lock_sock(psock->strp.sk);
                        kcm_unattach(psock);
+                       release_sock(psock->strp.sk);
+               }
        }
 
        if (WARN_ON(mux->psocks_cnt))
@@ -2072,6 +1765,8 @@ static void release_mux(struct kcm_mux *mux)
        aggregate_mux_stats(&mux->stats, &knet->aggregate_mux_stats);
        aggregate_psock_stats(&mux->aggregate_psock_stats,
                              &knet->aggregate_psock_stats);
+       aggregate_strp_stats(&mux->aggregate_strp_stats,
+                            &knet->aggregate_strp_stats);
        list_del_rcu(&mux->kcm_mux_list);
        knet->count--;
        mutex_unlock(&knet->mutex);
@@ -2151,6 +1846,13 @@ static int kcm_release(struct socket *sock)
         * it will just return.
         */
        __skb_queue_purge(&sk->sk_write_queue);
+
+       /* Set tx_stopped. This is checked when psock is bound to a kcm and we
+        * get a writespace callback. This prevents further work being queued
+        * from the callback (unbinding the psock occurs after canceling work.
+        */
+       kcm->tx_stopped = 1;
+
        release_sock(sk);
 
        spin_lock_bh(&mux->lock);
index ba5fc1f..42a41ae 100644 (file)
@@ -1088,13 +1088,13 @@ static inline void drv_leave_ibss(struct ieee80211_local *local,
 }
 
 static inline u32 drv_get_expected_throughput(struct ieee80211_local *local,
-                                             struct ieee80211_sta *sta)
+                                             struct sta_info *sta)
 {
        u32 ret = 0;
 
-       trace_drv_get_expected_throughput(sta);
-       if (local->ops->get_expected_throughput)
-               ret = local->ops->get_expected_throughput(&local->hw, sta);
+       trace_drv_get_expected_throughput(&sta->sta);
+       if (local->ops->get_expected_throughput && sta->uploaded)
+               ret = local->ops->get_expected_throughput(&local->hw, &sta->sta);
        trace_drv_return_u32(local, ret);
 
        return ret;
index 8f9c3bd..fa7d37c 100644 (file)
@@ -326,22 +326,33 @@ static u32 airtime_link_metric_get(struct ieee80211_local *local,
        u32 tx_time, estimated_retx;
        u64 result;
 
-       if (sta->mesh->fail_avg >= 100)
-               return MAX_METRIC;
+       /* Try to get rate based on HW/SW RC algorithm.
+        * Rate is returned in units of Kbps, correct this
+        * to comply with airtime calculation units
+        * Round up in case we get rate < 100Kbps
+        */
+       rate = DIV_ROUND_UP(sta_get_expected_throughput(sta), 100);
 
-       sta_set_rate_info_tx(sta, &sta->tx_stats.last_rate, &rinfo);
-       rate = cfg80211_calculate_bitrate(&rinfo);
-       if (WARN_ON(!rate))
-               return MAX_METRIC;
+       if (rate) {
+               err = 0;
+       } else {
+               if (sta->mesh->fail_avg >= 100)
+                       return MAX_METRIC;
 
-       err = (sta->mesh->fail_avg << ARITH_SHIFT) / 100;
+               sta_set_rate_info_tx(sta, &sta->tx_stats.last_rate, &rinfo);
+               rate = cfg80211_calculate_bitrate(&rinfo);
+               if (WARN_ON(!rate))
+                       return MAX_METRIC;
+
+               err = (sta->mesh->fail_avg << ARITH_SHIFT) / 100;
+       }
 
        /* bitrate is in units of 100 Kbps, while we need rate in units of
         * 1Mbps. This will be corrected on tx_time computation.
         */
        tx_time = (device_constant + 10 * test_frame_len / rate);
        estimated_retx = ((1 << (2 * ARITH_SHIFT)) / (s_unit - err));
-       result = (tx_time * estimated_retx) >> (2 * ARITH_SHIFT) ;
+       result = (tx_time * estimated_retx) >> (2 * ARITH_SHIFT);
        return (u32)result;
 }
 
index 76b737d..19f14c9 100644 (file)
@@ -2279,11 +2279,7 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo)
        if (test_sta_flag(sta, WLAN_STA_TDLS_PEER))
                sinfo->sta_flags.set |= BIT(NL80211_STA_FLAG_TDLS_PEER);
 
-       /* check if the driver has a SW RC implementation */
-       if (ref && ref->ops->get_expected_throughput)
-               thr = ref->ops->get_expected_throughput(sta->rate_ctrl_priv);
-       else
-               thr = drv_get_expected_throughput(local, &sta->sta);
+       thr = sta_get_expected_throughput(sta);
 
        if (thr != 0) {
                sinfo->filled |= BIT(NL80211_STA_INFO_EXPECTED_THROUGHPUT);
@@ -2291,6 +2287,25 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo)
        }
 }
 
+u32 sta_get_expected_throughput(struct sta_info *sta)
+{
+       struct ieee80211_sub_if_data *sdata = sta->sdata;
+       struct ieee80211_local *local = sdata->local;
+       struct rate_control_ref *ref = NULL;
+       u32 thr = 0;
+
+       if (test_sta_flag(sta, WLAN_STA_RATE_CONTROL))
+               ref = local->rate_ctrl;
+
+       /* check if the driver has a SW RC implementation */
+       if (ref && ref->ops->get_expected_throughput)
+               thr = ref->ops->get_expected_throughput(sta->rate_ctrl_priv);
+       else
+               thr = drv_get_expected_throughput(local, sta);
+
+       return thr;
+}
+
 unsigned long ieee80211_sta_last_active(struct sta_info *sta)
 {
        struct ieee80211_sta_rx_stats *stats = sta_get_last_rx_stats(sta);
index 78b0ef3..0556be3 100644 (file)
@@ -712,6 +712,8 @@ void sta_set_rate_info_tx(struct sta_info *sta,
                          struct rate_info *rinfo);
 void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo);
 
+u32 sta_get_expected_throughput(struct sta_info *sta);
+
 void ieee80211_sta_expire(struct ieee80211_sub_if_data *sdata,
                          unsigned long exp_time);
 u8 sta_info_tx_streams(struct sta_info *sta);
index 5023966..1d0746d 100644 (file)
@@ -2334,7 +2334,6 @@ static struct sk_buff *ieee80211_build_hdr(struct ieee80211_sub_if_data *sdata,
        struct mesh_path __maybe_unused *mppath = NULL, *mpath = NULL;
        const u8 *encaps_data;
        int encaps_len, skip_header_bytes;
-       int nh_pos, h_pos;
        bool wme_sta = false, authorized = false;
        bool tdls_peer;
        bool multicast;
@@ -2640,13 +2639,7 @@ static struct sk_buff *ieee80211_build_hdr(struct ieee80211_sub_if_data *sdata,
                encaps_len = 0;
        }
 
-       nh_pos = skb_network_header(skb) - skb->data;
-       h_pos = skb_transport_header(skb) - skb->data;
-
        skb_pull(skb, skip_header_bytes);
-       nh_pos -= skip_header_bytes;
-       h_pos -= skip_header_bytes;
-
        head_need = hdrlen + encaps_len + meshhdrlen - skb_headroom(skb);
 
        /*
@@ -2672,18 +2665,12 @@ static struct sk_buff *ieee80211_build_hdr(struct ieee80211_sub_if_data *sdata,
                }
        }
 
-       if (encaps_data) {
+       if (encaps_data)
                memcpy(skb_push(skb, encaps_len), encaps_data, encaps_len);
-               nh_pos += encaps_len;
-               h_pos += encaps_len;
-       }
 
 #ifdef CONFIG_MAC80211_MESH
-       if (meshhdrlen > 0) {
+       if (meshhdrlen > 0)
                memcpy(skb_push(skb, meshhdrlen), &mesh_hdr, meshhdrlen);
-               nh_pos += meshhdrlen;
-               h_pos += meshhdrlen;
-       }
 #endif
 
        if (ieee80211_is_data_qos(fc)) {
@@ -2699,15 +2686,7 @@ static struct sk_buff *ieee80211_build_hdr(struct ieee80211_sub_if_data *sdata,
        } else
                memcpy(skb_push(skb, hdrlen), &hdr, hdrlen);
 
-       nh_pos += hdrlen;
-       h_pos += hdrlen;
-
-       /* Update skb pointers to various headers since this modified frame
-        * is going to go through Linux networking code that may potentially
-        * need things like pointer to IP header. */
        skb_reset_mac_header(skb);
-       skb_set_network_header(skb, nh_pos);
-       skb_set_transport_header(skb, h_pos);
 
        info = IEEE80211_SKB_CB(skb);
        memset(info, 0, sizeof(*info));
@@ -4390,9 +4369,6 @@ void __ieee80211_tx_skb_tid_band(struct ieee80211_sub_if_data *sdata,
        int ac = ieee802_1d_to_ac[tid & 7];
 
        skb_reset_mac_header(skb);
-       skb_reset_network_header(skb);
-       skb_reset_transport_header(skb);
-
        skb_set_queue_mapping(skb, ac);
        skb->priority = tid;
 
index 046f750..45ac8e8 100644 (file)
@@ -333,6 +333,7 @@ void rds_ib_conn_path_shutdown(struct rds_conn_path *cp);
 void rds_ib_state_change(struct sock *sk);
 int rds_ib_listen_init(void);
 void rds_ib_listen_stop(void);
+__printf(2, 3)
 void __rds_ib_conn_error(struct rds_connection *conn, const char *, ...);
 int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id,
                             struct rdma_cm_event *event);
index b2d17f0..fd0bccb 100644 (file)
@@ -688,6 +688,7 @@ void __rds_conn_error(struct rds_connection *conn, const char *, ...);
 #define rds_conn_error(conn, fmt...) \
        __rds_conn_error(conn, KERN_WARNING "RDS: " fmt)
 
+__printf(2, 3)
 void __rds_conn_path_error(struct rds_conn_path *cp, const char *, ...);
 #define rds_conn_path_error(cp, fmt...) \
        __rds_conn_path_error(cp, KERN_WARNING "RDS: " fmt)
index 12ebde8..25aada7 100644 (file)
@@ -29,6 +29,7 @@
 #include <linux/hrtimer.h>
 #include <linux/lockdep.h>
 #include <linux/slab.h>
+#include <linux/hashtable.h>
 
 #include <net/net_namespace.h>
 #include <net/sock.h>
@@ -263,33 +264,33 @@ static struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle)
            root->handle == handle)
                return root;
 
-       list_for_each_entry_rcu(q, &root->list, list) {
+       hash_for_each_possible_rcu(qdisc_dev(root)->qdisc_hash, q, hash, handle) {
                if (q->handle == handle)
                        return q;
        }
        return NULL;
 }
 
-void qdisc_list_add(struct Qdisc *q)
+void qdisc_hash_add(struct Qdisc *q)
 {
        if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
                struct Qdisc *root = qdisc_dev(q)->qdisc;
 
                WARN_ON_ONCE(root == &noop_qdisc);
                ASSERT_RTNL();
-               list_add_tail_rcu(&q->list, &root->list);
+               hash_add_rcu(qdisc_dev(q)->qdisc_hash, &q->hash, q->handle);
        }
 }
-EXPORT_SYMBOL(qdisc_list_add);
+EXPORT_SYMBOL(qdisc_hash_add);
 
-void qdisc_list_del(struct Qdisc *q)
+void qdisc_hash_del(struct Qdisc *q)
 {
        if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
                ASSERT_RTNL();
-               list_del_rcu(&q->list);
+               hash_del_rcu(&q->hash);
        }
 }
-EXPORT_SYMBOL(qdisc_list_del);
+EXPORT_SYMBOL(qdisc_hash_del);
 
 struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
 {
@@ -998,7 +999,7 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue,
                                goto err_out4;
                }
 
-               qdisc_list_add(sch);
+               qdisc_hash_add(sch);
 
                return sch;
        }
@@ -1435,6 +1436,7 @@ static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb,
 {
        int ret = 0, q_idx = *q_idx_p;
        struct Qdisc *q;
+       int b;
 
        if (!root)
                return 0;
@@ -1449,7 +1451,7 @@ static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb,
                        goto done;
                q_idx++;
        }
-       list_for_each_entry(q, &root->list, list) {
+       hash_for_each(qdisc_dev(root)->qdisc_hash, b, q, hash) {
                if (q_idx < s_q_idx) {
                        q_idx++;
                        continue;
@@ -1765,6 +1767,7 @@ static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb,
                               int *t_p, int s_t)
 {
        struct Qdisc *q;
+       int b;
 
        if (!root)
                return 0;
@@ -1772,7 +1775,7 @@ static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb,
        if (tc_dump_tclass_qdisc(root, skb, tcm, cb, t_p, s_t) < 0)
                return -1;
 
-       list_for_each_entry(q, &root->list, list) {
+       hash_for_each(qdisc_dev(root)->qdisc_hash, b, q, hash) {
                if (tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0)
                        return -1;
        }
index e95b67c..18faecc 100644 (file)
@@ -423,7 +423,6 @@ struct Qdisc noop_qdisc = {
        .dequeue        =       noop_dequeue,
        .flags          =       TCQ_F_BUILTIN,
        .ops            =       &noop_qdisc_ops,
-       .list           =       LIST_HEAD_INIT(noop_qdisc.list),
        .q.lock         =       __SPIN_LOCK_UNLOCKED(noop_qdisc.q.lock),
        .dev_queue      =       &noop_netdev_queue,
        .running        =       SEQCNT_ZERO(noop_qdisc.running),
@@ -613,7 +612,6 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
                sch = (struct Qdisc *) QDISC_ALIGN((unsigned long) p);
                sch->padded = (char *) sch - (char *) p;
        }
-       INIT_LIST_HEAD(&sch->list);
        skb_queue_head_init(&sch->q);
 
        spin_lock_init(&sch->busylock);
@@ -700,7 +698,7 @@ void qdisc_destroy(struct Qdisc *qdisc)
                return;
 
 #ifdef CONFIG_NET_SCHED
-       qdisc_list_del(qdisc);
+       qdisc_hash_del(qdisc);
 
        qdisc_put_stab(rtnl_dereference(qdisc->stab));
 #endif
@@ -788,6 +786,10 @@ static void attach_default_qdiscs(struct net_device *dev)
                        qdisc->ops->attach(qdisc);
                }
        }
+#ifdef CONFIG_NET_SCHED
+       if (dev->qdisc)
+               qdisc_hash_add(dev->qdisc);
+#endif
 }
 
 static void transition_one_qdisc(struct net_device *dev,
index 3ddc7bd..000f1d3 100644 (file)
@@ -142,8 +142,6 @@ struct hfsc_class {
                                           link-sharing, max(myf, cfmin) */
        u64     cl_myf;                 /* my fit-time (calculated from this
                                           class's own upperlimit curve) */
-       u64     cl_myfadj;              /* my fit-time adjustment (to cancel
-                                          history dependence) */
        u64     cl_cfmin;               /* earliest children's fit-time (used
                                           with cl_myf to obtain cl_f) */
        u64     cl_cvtmin;              /* minimal virtual time among the
@@ -151,11 +149,8 @@ struct hfsc_class {
                                           (monotonic within a period) */
        u64     cl_vtadj;               /* intra-period cumulative vt
                                           adjustment */
-       u64     cl_vtoff;               /* inter-period cumulative vt offset */
-       u64     cl_cvtmax;              /* max child's vt in the last period */
-       u64     cl_cvtoff;              /* cumulative cvtmax of all periods */
-       u64     cl_pcvtoff;             /* parent's cvtoff at initialization
-                                          time */
+       u64     cl_cvtoff;              /* largest virtual time seen among
+                                          the children */
 
        struct internal_sc cl_rsc;      /* internal real-time service curve */
        struct internal_sc cl_fsc;      /* internal fair service curve */
@@ -701,28 +696,16 @@ init_vf(struct hfsc_class *cl, unsigned int len)
                        } else {
                                /*
                                 * first child for a new parent backlog period.
-                                * add parent's cvtmax to cvtoff to make a new
-                                * vt (vtoff + vt) larger than the vt in the
-                                * last period for all children.
+                                * initialize cl_vt to the highest value seen
+                                * among the siblings. this is analogous to
+                                * what cur_time would provide in realtime case.
                                 */
-                               vt = cl->cl_parent->cl_cvtmax;
-                               cl->cl_parent->cl_cvtoff += vt;
-                               cl->cl_parent->cl_cvtmax = 0;
+                               cl->cl_vt = cl->cl_parent->cl_cvtoff;
                                cl->cl_parent->cl_cvtmin = 0;
-                               cl->cl_vt = 0;
                        }
 
-                       cl->cl_vtoff = cl->cl_parent->cl_cvtoff -
-                                                       cl->cl_pcvtoff;
-
                        /* update the virtual curve */
-                       vt = cl->cl_vt + cl->cl_vtoff;
-                       rtsc_min(&cl->cl_virtual, &cl->cl_fsc, vt,
-                                                     cl->cl_total);
-                       if (cl->cl_virtual.x == vt) {
-                               cl->cl_virtual.x -= cl->cl_vtoff;
-                               cl->cl_vtoff = 0;
-                       }
+                       rtsc_min(&cl->cl_virtual, &cl->cl_fsc, cl->cl_vt, cl->cl_total);
                        cl->cl_vtadj = 0;
 
                        cl->cl_vtperiod++;  /* increment vt period */
@@ -745,7 +728,6 @@ init_vf(struct hfsc_class *cl, unsigned int len)
                                /* compute myf */
                                cl->cl_myf = rtsc_y2x(&cl->cl_ulimit,
                                                      cl->cl_total);
-                               cl->cl_myfadj = 0;
                        }
                }
 
@@ -779,8 +761,7 @@ update_vf(struct hfsc_class *cl, unsigned int len, u64 cur_time)
                        go_passive = 0;
 
                /* update vt */
-               cl->cl_vt = rtsc_y2x(&cl->cl_virtual, cl->cl_total)
-                           - cl->cl_vtoff + cl->cl_vtadj;
+               cl->cl_vt = rtsc_y2x(&cl->cl_virtual, cl->cl_total) + cl->cl_vtadj;
 
                /*
                 * if vt of the class is smaller than cvtmin,
@@ -795,9 +776,9 @@ update_vf(struct hfsc_class *cl, unsigned int len, u64 cur_time)
                if (go_passive) {
                        /* no more active child, going passive */
 
-                       /* update cvtmax of the parent class */
-                       if (cl->cl_vt > cl->cl_parent->cl_cvtmax)
-                               cl->cl_parent->cl_cvtmax = cl->cl_vt;
+                       /* update cvtoff of the parent class */
+                       if (cl->cl_vt > cl->cl_parent->cl_cvtoff)
+                               cl->cl_parent->cl_cvtoff = cl->cl_vt;
 
                        /* remove this class from the vt tree */
                        vttree_remove(cl);
@@ -813,9 +794,10 @@ update_vf(struct hfsc_class *cl, unsigned int len, u64 cur_time)
 
                /* update f */
                if (cl->cl_flags & HFSC_USC) {
+                       cl->cl_myf = rtsc_y2x(&cl->cl_ulimit, cl->cl_total);
+#if 0
                        cl->cl_myf = cl->cl_myfadj + rtsc_y2x(&cl->cl_ulimit,
                                                              cl->cl_total);
-#if 0
                        /*
                         * This code causes classes to stay way under their
                         * limit when multiple classes are used at gigabit
@@ -940,7 +922,7 @@ static void
 hfsc_change_fsc(struct hfsc_class *cl, struct tc_service_curve *fsc)
 {
        sc2isc(fsc, &cl->cl_fsc);
-       rtsc_init(&cl->cl_virtual, &cl->cl_fsc, cl->cl_vtoff + cl->cl_vt, cl->cl_total);
+       rtsc_init(&cl->cl_virtual, &cl->cl_fsc, cl->cl_vt, cl->cl_total);
        cl->cl_flags |= HFSC_FSC;
 }
 
@@ -1094,7 +1076,6 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
        if (parent->level == 0)
                hfsc_purge_queue(sch, parent);
        hfsc_adjust_levels(parent);
-       cl->cl_pcvtoff = parent->cl_cvtoff;
        sch_tree_unlock(sch);
 
        qdisc_class_hash_grow(sch, &q->clhash);
@@ -1482,16 +1463,12 @@ hfsc_reset_class(struct hfsc_class *cl)
        cl->cl_e            = 0;
        cl->cl_vt           = 0;
        cl->cl_vtadj        = 0;
-       cl->cl_vtoff        = 0;
        cl->cl_cvtmin       = 0;
-       cl->cl_cvtmax       = 0;
        cl->cl_cvtoff       = 0;
-       cl->cl_pcvtoff      = 0;
        cl->cl_vtperiod     = 0;
        cl->cl_parentperiod = 0;
        cl->cl_f            = 0;
        cl->cl_myf          = 0;
-       cl->cl_myfadj       = 0;
        cl->cl_cfmin        = 0;
        cl->cl_nactive      = 0;
 
index b943982..2bc8d7f 100644 (file)
@@ -88,7 +88,7 @@ static void mq_attach(struct Qdisc *sch)
                        qdisc_destroy(old);
 #ifdef CONFIG_NET_SCHED
                if (ntx < dev->real_num_tx_queues)
-                       qdisc_list_add(qdisc);
+                       qdisc_hash_add(qdisc);
 #endif
 
        }
index 549c663..b5c502c 100644 (file)
@@ -182,7 +182,7 @@ static void mqprio_attach(struct Qdisc *sch)
                if (old)
                        qdisc_destroy(old);
                if (ntx < dev->real_num_tx_queues)
-                       qdisc_list_add(qdisc);
+                       qdisc_hash_add(qdisc);
        }
        kfree(priv->qdiscs);
        priv->qdiscs = NULL;
diff --git a/net/strparser/Kconfig b/net/strparser/Kconfig
new file mode 100644 (file)
index 0000000..6cff3f6
--- /dev/null
@@ -0,0 +1,4 @@
+
+config STREAM_PARSER
+       tristate
+       default n
diff --git a/net/strparser/Makefile b/net/strparser/Makefile
new file mode 100644 (file)
index 0000000..858a126
--- /dev/null
@@ -0,0 +1 @@
+obj-$(CONFIG_STREAM_PARSER) += strparser.o
diff --git a/net/strparser/strparser.c b/net/strparser/strparser.c
new file mode 100644 (file)
index 0000000..fd688c0
--- /dev/null
@@ -0,0 +1,492 @@
+/*
+ * Stream Parser
+ *
+ * Copyright (c) 2016 Tom Herbert <tom@herbertland.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation.
+ */
+
+#include <linux/bpf.h>
+#include <linux/errno.h>
+#include <linux/errqueue.h>
+#include <linux/file.h>
+#include <linux/in.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/net.h>
+#include <linux/netdevice.h>
+#include <linux/poll.h>
+#include <linux/rculist.h>
+#include <linux/skbuff.h>
+#include <linux/socket.h>
+#include <linux/uaccess.h>
+#include <linux/workqueue.h>
+#include <net/strparser.h>
+#include <net/netns/generic.h>
+#include <net/sock.h>
+#include <net/tcp.h>
+
+static struct workqueue_struct *strp_wq;
+
+struct _strp_rx_msg {
+       /* Internal cb structure. struct strp_rx_msg must be first for passing
+        * to upper layer.
+        */
+       struct strp_rx_msg strp;
+       int accum_len;
+       int early_eaten;
+};
+
+static inline struct _strp_rx_msg *_strp_rx_msg(struct sk_buff *skb)
+{
+       return (struct _strp_rx_msg *)((void *)skb->cb +
+               offsetof(struct qdisc_skb_cb, data));
+}
+
+/* Lower lock held */
+static void strp_abort_rx_strp(struct strparser *strp, int err)
+{
+       struct sock *csk = strp->sk;
+
+       /* Unrecoverable error in receive */
+
+       del_timer(&strp->rx_msg_timer);
+
+       if (strp->rx_stopped)
+               return;
+
+       strp->rx_stopped = 1;
+
+       /* Report an error on the lower socket */
+       csk->sk_err = err;
+       csk->sk_error_report(csk);
+}
+
+static void strp_start_rx_timer(struct strparser *strp)
+{
+       if (strp->sk->sk_rcvtimeo)
+               mod_timer(&strp->rx_msg_timer, strp->sk->sk_rcvtimeo);
+}
+
+/* Lower lock held */
+static void strp_parser_err(struct strparser *strp, int err,
+                           read_descriptor_t *desc)
+{
+       desc->error = err;
+       kfree_skb(strp->rx_skb_head);
+       strp->rx_skb_head = NULL;
+       strp->cb.abort_parser(strp, err);
+}
+
+/* Lower socket lock held */
+static int strp_tcp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb,
+                        unsigned int orig_offset, size_t orig_len)
+{
+       struct strparser *strp = (struct strparser *)desc->arg.data;
+       struct _strp_rx_msg *rxm;
+       struct sk_buff *head, *skb;
+       size_t eaten = 0, cand_len;
+       ssize_t extra;
+       int err;
+       bool cloned_orig = false;
+
+       if (strp->rx_paused)
+               return 0;
+
+       head = strp->rx_skb_head;
+       if (head) {
+               /* Message already in progress */
+
+               rxm = _strp_rx_msg(head);
+               if (unlikely(rxm->early_eaten)) {
+                       /* Already some number of bytes on the receive sock
+                        * data saved in rx_skb_head, just indicate they
+                        * are consumed.
+                        */
+                       eaten = orig_len <= rxm->early_eaten ?
+                               orig_len : rxm->early_eaten;
+                       rxm->early_eaten -= eaten;
+
+                       return eaten;
+               }
+
+               if (unlikely(orig_offset)) {
+                       /* Getting data with a non-zero offset when a message is
+                        * in progress is not expected. If it does happen, we
+                        * need to clone and pull since we can't deal with
+                        * offsets in the skbs for a message expect in the head.
+                        */
+                       orig_skb = skb_clone(orig_skb, GFP_ATOMIC);
+                       if (!orig_skb) {
+                               STRP_STATS_INCR(strp->stats.rx_mem_fail);
+                               desc->error = -ENOMEM;
+                               return 0;
+                       }
+                       if (!pskb_pull(orig_skb, orig_offset)) {
+                               STRP_STATS_INCR(strp->stats.rx_mem_fail);
+                               kfree_skb(orig_skb);
+                               desc->error = -ENOMEM;
+                               return 0;
+                       }
+                       cloned_orig = true;
+                       orig_offset = 0;
+               }
+
+               if (!strp->rx_skb_nextp) {
+                       /* We are going to append to the frags_list of head.
+                        * Need to unshare the frag_list.
+                        */
+                       err = skb_unclone(head, GFP_ATOMIC);
+                       if (err) {
+                               STRP_STATS_INCR(strp->stats.rx_mem_fail);
+                               desc->error = err;
+                               return 0;
+                       }
+
+                       if (unlikely(skb_shinfo(head)->frag_list)) {
+                               /* We can't append to an sk_buff that already
+                                * has a frag_list. We create a new head, point
+                                * the frag_list of that to the old head, and
+                                * then are able to use the old head->next for
+                                * appending to the message.
+                                */
+                               if (WARN_ON(head->next)) {
+                                       desc->error = -EINVAL;
+                                       return 0;
+                               }
+
+                               skb = alloc_skb(0, GFP_ATOMIC);
+                               if (!skb) {
+                                       STRP_STATS_INCR(strp->stats.rx_mem_fail);
+                                       desc->error = -ENOMEM;
+                                       return 0;
+                               }
+                               skb->len = head->len;
+                               skb->data_len = head->len;
+                               skb->truesize = head->truesize;
+                               *_strp_rx_msg(skb) = *_strp_rx_msg(head);
+                               strp->rx_skb_nextp = &head->next;
+                               skb_shinfo(skb)->frag_list = head;
+                               strp->rx_skb_head = skb;
+                               head = skb;
+                       } else {
+                               strp->rx_skb_nextp =
+                                   &skb_shinfo(head)->frag_list;
+                       }
+               }
+       }
+
+       while (eaten < orig_len) {
+               /* Always clone since we will consume something */
+               skb = skb_clone(orig_skb, GFP_ATOMIC);
+               if (!skb) {
+                       STRP_STATS_INCR(strp->stats.rx_mem_fail);
+                       desc->error = -ENOMEM;
+                       break;
+               }
+
+               cand_len = orig_len - eaten;
+
+               head = strp->rx_skb_head;
+               if (!head) {
+                       head = skb;
+                       strp->rx_skb_head = head;
+                       /* Will set rx_skb_nextp on next packet if needed */
+                       strp->rx_skb_nextp = NULL;
+                       rxm = _strp_rx_msg(head);
+                       memset(rxm, 0, sizeof(*rxm));
+                       rxm->strp.offset = orig_offset + eaten;
+               } else {
+                       /* Unclone since we may be appending to an skb that we
+                        * already share a frag_list with.
+                        */
+                       err = skb_unclone(skb, GFP_ATOMIC);
+                       if (err) {
+                               STRP_STATS_INCR(strp->stats.rx_mem_fail);
+                               desc->error = err;
+                               break;
+                       }
+
+                       rxm = _strp_rx_msg(head);
+                       *strp->rx_skb_nextp = skb;
+                       strp->rx_skb_nextp = &skb->next;
+                       head->data_len += skb->len;
+                       head->len += skb->len;
+                       head->truesize += skb->truesize;
+               }
+
+               if (!rxm->strp.full_len) {
+                       ssize_t len;
+
+                       len = (*strp->cb.parse_msg)(strp, head);
+
+                       if (!len) {
+                               /* Need more header to determine length */
+                               if (!rxm->accum_len) {
+                                       /* Start RX timer for new message */
+                                       strp_start_rx_timer(strp);
+                               }
+                               rxm->accum_len += cand_len;
+                               eaten += cand_len;
+                               STRP_STATS_INCR(strp->stats.rx_need_more_hdr);
+                               WARN_ON(eaten != orig_len);
+                               break;
+                       } else if (len < 0) {
+                               if (len == -ESTRPIPE && rxm->accum_len) {
+                                       len = -ENODATA;
+                                       strp->rx_unrecov_intr = 1;
+                               } else {
+                                       strp->rx_interrupted = 1;
+                               }
+                               strp_parser_err(strp, err, desc);
+                               break;
+                       } else if (len > strp->sk->sk_rcvbuf) {
+                               /* Message length exceeds maximum allowed */
+                               STRP_STATS_INCR(strp->stats.rx_msg_too_big);
+                               strp_parser_err(strp, -EMSGSIZE, desc);
+                               break;
+                       } else if (len <= (ssize_t)head->len -
+                                         skb->len - rxm->strp.offset) {
+                               /* Length must be into new skb (and also
+                                * greater than zero)
+                                */
+                               STRP_STATS_INCR(strp->stats.rx_bad_hdr_len);
+                               strp_parser_err(strp, -EPROTO, desc);
+                               break;
+                       }
+
+                       rxm->strp.full_len = len;
+               }
+
+               extra = (ssize_t)(rxm->accum_len + cand_len) -
+                       rxm->strp.full_len;
+
+               if (extra < 0) {
+                       /* Message not complete yet. */
+                       if (rxm->strp.full_len - rxm->accum_len >
+                           tcp_inq(strp->sk)) {
+                               /* Don't have the whole messages in the socket
+                                * buffer. Set strp->rx_need_bytes to wait for
+                                * the rest of the message. Also, set "early
+                                * eaten" since we've already buffered the skb
+                                * but don't consume yet per tcp_read_sock.
+                                */
+
+                               if (!rxm->accum_len) {
+                                       /* Start RX timer for new message */
+                                       strp_start_rx_timer(strp);
+                               }
+
+                               strp->rx_need_bytes = rxm->strp.full_len -
+                                                      rxm->accum_len;
+                               rxm->accum_len += cand_len;
+                               rxm->early_eaten = cand_len;
+                               STRP_STATS_ADD(strp->stats.rx_bytes, cand_len);
+                               desc->count = 0; /* Stop reading socket */
+                               break;
+                       }
+                       rxm->accum_len += cand_len;
+                       eaten += cand_len;
+                       WARN_ON(eaten != orig_len);
+                       break;
+               }
+
+               /* Positive extra indicates ore bytes than needed for the
+                * message
+                */
+
+               WARN_ON(extra > cand_len);
+
+               eaten += (cand_len - extra);
+
+               /* Hurray, we have a new message! */
+               del_timer(&strp->rx_msg_timer);
+               strp->rx_skb_head = NULL;
+               STRP_STATS_INCR(strp->stats.rx_msgs);
+
+               /* Give skb to upper layer */
+               strp->cb.rcv_msg(strp, head);
+
+               if (unlikely(strp->rx_paused)) {
+                       /* Upper layer paused strp */
+                       break;
+               }
+       }
+
+       if (cloned_orig)
+               kfree_skb(orig_skb);
+
+       STRP_STATS_ADD(strp->stats.rx_bytes, eaten);
+
+       return eaten;
+}
+
+static int default_read_sock_done(struct strparser *strp, int err)
+{
+       return err;
+}
+
+/* Called with lock held on lower socket */
+static int strp_tcp_read_sock(struct strparser *strp)
+{
+       read_descriptor_t desc;
+
+       desc.arg.data = strp;
+       desc.error = 0;
+       desc.count = 1; /* give more than one skb per call */
+
+       /* sk should be locked here, so okay to do tcp_read_sock */
+       tcp_read_sock(strp->sk, &desc, strp_tcp_recv);
+
+       desc.error = strp->cb.read_sock_done(strp, desc.error);
+
+       return desc.error;
+}
+
+/* Lower sock lock held */
+void strp_tcp_data_ready(struct strparser *strp)
+{
+       struct sock *csk = strp->sk;
+
+       if (unlikely(strp->rx_stopped))
+               return;
+
+       /* This check is needed to synchronize with do_strp_rx_work.
+        * do_strp_rx_work acquires a process lock (lock_sock) whereas
+        * the lock held here is bh_lock_sock. The two locks can be
+        * held by different threads at the same time, but bh_lock_sock
+        * allows a thread in BH context to safely check if the process
+        * lock is held. In this case, if the lock is held, queue work.
+        */
+       if (sock_owned_by_user(csk)) {
+               queue_work(strp_wq, &strp->rx_work);
+               return;
+       }
+
+       if (strp->rx_paused)
+               return;
+
+       if (strp->rx_need_bytes) {
+               if (tcp_inq(csk) >= strp->rx_need_bytes)
+                       strp->rx_need_bytes = 0;
+               else
+                       return;
+       }
+
+       if (strp_tcp_read_sock(strp) == -ENOMEM)
+               queue_work(strp_wq, &strp->rx_work);
+}
+EXPORT_SYMBOL_GPL(strp_tcp_data_ready);
+
+static void do_strp_rx_work(struct strparser *strp)
+{
+       read_descriptor_t rd_desc;
+       struct sock *csk = strp->sk;
+
+       /* We need the read lock to synchronize with strp_tcp_data_ready. We
+        * need the socket lock for calling tcp_read_sock.
+        */
+       lock_sock(csk);
+
+       if (unlikely(csk->sk_user_data != strp))
+               goto out;
+
+       if (unlikely(strp->rx_stopped))
+               goto out;
+
+       if (strp->rx_paused)
+               goto out;
+
+       rd_desc.arg.data = strp;
+
+       if (strp_tcp_read_sock(strp) == -ENOMEM)
+               queue_work(strp_wq, &strp->rx_work);
+
+out:
+       release_sock(csk);
+}
+
+static void strp_rx_work(struct work_struct *w)
+{
+       do_strp_rx_work(container_of(w, struct strparser, rx_work));
+}
+
+static void strp_rx_msg_timeout(unsigned long arg)
+{
+       struct strparser *strp = (struct strparser *)arg;
+
+       /* Message assembly timed out */
+       STRP_STATS_INCR(strp->stats.rx_msg_timeouts);
+       lock_sock(strp->sk);
+       strp->cb.abort_parser(strp, ETIMEDOUT);
+       release_sock(strp->sk);
+}
+
+int strp_init(struct strparser *strp, struct sock *csk,
+             struct strp_callbacks *cb)
+{
+       if (!cb || !cb->rcv_msg || !cb->parse_msg)
+               return -EINVAL;
+
+       memset(strp, 0, sizeof(*strp));
+
+       strp->sk = csk;
+
+       setup_timer(&strp->rx_msg_timer, strp_rx_msg_timeout,
+                   (unsigned long)strp);
+
+       INIT_WORK(&strp->rx_work, strp_rx_work);
+
+       strp->cb.rcv_msg = cb->rcv_msg;
+       strp->cb.parse_msg = cb->parse_msg;
+       strp->cb.read_sock_done = cb->read_sock_done ? : default_read_sock_done;
+       strp->cb.abort_parser = cb->abort_parser ? : strp_abort_rx_strp;
+
+       return 0;
+}
+EXPORT_SYMBOL_GPL(strp_init);
+
+/* strp must already be stopped so that strp_tcp_recv will no longer be called.
+ * Note that strp_done is not called with the lower socket held.
+ */
+void strp_done(struct strparser *strp)
+{
+       WARN_ON(!strp->rx_stopped);
+
+       del_timer_sync(&strp->rx_msg_timer);
+       cancel_work_sync(&strp->rx_work);
+
+       if (strp->rx_skb_head) {
+               kfree_skb(strp->rx_skb_head);
+               strp->rx_skb_head = NULL;
+       }
+}
+EXPORT_SYMBOL_GPL(strp_done);
+
+void strp_stop(struct strparser *strp)
+{
+       strp->rx_stopped = 1;
+}
+EXPORT_SYMBOL_GPL(strp_stop);
+
+void strp_check_rcv(struct strparser *strp)
+{
+       queue_work(strp_wq, &strp->rx_work);
+}
+EXPORT_SYMBOL_GPL(strp_check_rcv);
+
+static int __init strp_mod_init(void)
+{
+       strp_wq = create_singlethread_workqueue("kstrp");
+
+       return 0;
+}
+
+static void __exit strp_mod_exit(void)
+{
+}
+module_init(strp_mod_init);
+module_exit(strp_mod_exit);
+MODULE_LICENSE("GPL");
index a5fc9dd..9e90129 100644 (file)
@@ -1306,6 +1306,7 @@ bool switchdev_port_same_parent_id(struct net_device *a,
 
        return netdev_phys_item_id_same(&a_attr.u.ppid, &b_attr.u.ppid);
 }
+EXPORT_SYMBOL_GPL(switchdev_port_same_parent_id);
 
 static u32 switchdev_port_fwd_mark_get(struct net_device *dev,
                                       struct net_device *group_dev)
@@ -1323,7 +1324,6 @@ static u32 switchdev_port_fwd_mark_get(struct net_device *dev,
 
        return dev->ifindex;
 }
-EXPORT_SYMBOL_GPL(switchdev_port_same_parent_id);
 
 static void switchdev_port_fwd_mark_reset(struct net_device *group_dev,
                                          u32 old_mark, u32 *reset_mark)
index 46a71c7..5bc1a3d 100644 (file)
@@ -42,26 +42,37 @@ static int net_ctl_permissions(struct ctl_table_header *head,
                               struct ctl_table *table)
 {
        struct net *net = container_of(head->set, struct net, sysctls);
-       kuid_t root_uid = make_kuid(net->user_ns, 0);
-       kgid_t root_gid = make_kgid(net->user_ns, 0);
 
        /* Allow network administrator to have same access as root. */
-       if (ns_capable_noaudit(net->user_ns, CAP_NET_ADMIN) ||
-           uid_eq(root_uid, current_euid())) {
+       if (ns_capable(net->user_ns, CAP_NET_ADMIN)) {
                int mode = (table->mode >> 6) & 7;
                return (mode << 6) | (mode << 3) | mode;
        }
-       /* Allow netns root group to have the same access as the root group */
-       if (in_egroup_p(root_gid)) {
-               int mode = (table->mode >> 3) & 7;
-               return (mode << 3) | mode;
-       }
+
        return table->mode;
 }
 
+static void net_ctl_set_ownership(struct ctl_table_header *head,
+                                 struct ctl_table *table,
+                                 kuid_t *uid, kgid_t *gid)
+{
+       struct net *net = container_of(head->set, struct net, sysctls);
+       kuid_t ns_root_uid;
+       kgid_t ns_root_gid;
+
+       ns_root_uid = make_kuid(net->user_ns, 0);
+       if (uid_valid(ns_root_uid))
+               *uid = ns_root_uid;
+
+       ns_root_gid = make_kgid(net->user_ns, 0);
+       if (gid_valid(ns_root_gid))
+               *gid = ns_root_gid;
+}
+
 static struct ctl_table_root net_sysctl_root = {
        .lookup = net_ctl_header_lookup,
        .permissions = net_ctl_permissions,
+       .set_ownership = net_ctl_set_ownership,
 };
 
 static int __net_init sysctl_net_init(struct net *net)
index 7645e97..2029b49 100644 (file)
@@ -906,6 +906,8 @@ void cfg80211_unregister_wdev(struct wireless_dev *wdev)
        if (WARN_ON(wdev->netdev))
                return;
 
+       nl80211_notify_iface(rdev, wdev, NL80211_CMD_DEL_INTERFACE);
+
        list_del_rcu(&wdev->list);
        rdev->devlist_generation++;
 
@@ -1079,6 +1081,8 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb,
                     wdev->iftype == NL80211_IFTYPE_P2P_CLIENT ||
                     wdev->iftype == NL80211_IFTYPE_ADHOC) && !wdev->use_4addr)
                        dev->priv_flags |= IFF_DONT_BRIDGE;
+
+               nl80211_notify_iface(rdev, wdev, NL80211_CMD_NEW_INTERFACE);
                break;
        case NETDEV_GOING_DOWN:
                cfg80211_leave(rdev, wdev);
@@ -1157,6 +1161,8 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb,
                 * remove and clean it up.
                 */
                if (!list_empty(&wdev->list)) {
+                       nl80211_notify_iface(rdev, wdev,
+                                            NL80211_CMD_DEL_INTERFACE);
                        sysfs_remove_link(&dev->dev.kobj, "phy80211");
                        list_del_rcu(&wdev->list);
                        rdev->devlist_generation++;
index f02653a..4997857 100644 (file)
@@ -2751,7 +2751,7 @@ static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info)
        struct cfg80211_registered_device *rdev = info->user_ptr[0];
        struct vif_params params;
        struct wireless_dev *wdev;
-       struct sk_buff *msg, *event;
+       struct sk_buff *msg;
        int err;
        enum nl80211_iftype type = NL80211_IFTYPE_UNSPECIFIED;
        u32 flags;
@@ -2855,20 +2855,15 @@ static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info)
                return -ENOBUFS;
        }
 
-       event = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
-       if (event) {
-               if (nl80211_send_iface(event, 0, 0, 0,
-                                      rdev, wdev, false) < 0) {
-                       nlmsg_free(event);
-                       goto out;
-               }
-
-               genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy),
-                                       event, 0, NL80211_MCGRP_CONFIG,
-                                       GFP_KERNEL);
-       }
+       /*
+        * For wdevs which have no associated netdev object (e.g. of type
+        * NL80211_IFTYPE_P2P_DEVICE), emit the NEW_INTERFACE event here.
+        * For all other types, the event will be generated from the
+        * netdev notifier
+        */
+       if (!wdev->netdev)
+               nl80211_notify_iface(rdev, wdev, NL80211_CMD_NEW_INTERFACE);
 
-out:
        return genlmsg_reply(msg, info);
 }
 
@@ -2876,18 +2871,10 @@ static int nl80211_del_interface(struct sk_buff *skb, struct genl_info *info)
 {
        struct cfg80211_registered_device *rdev = info->user_ptr[0];
        struct wireless_dev *wdev = info->user_ptr[1];
-       struct sk_buff *msg;
-       int status;
 
        if (!rdev->ops->del_virtual_intf)
                return -EOPNOTSUPP;
 
-       msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
-       if (msg && nl80211_send_iface(msg, 0, 0, 0, rdev, wdev, true) < 0) {
-               nlmsg_free(msg);
-               msg = NULL;
-       }
-
        /*
         * If we remove a wireless device without a netdev then clear
         * user_ptr[1] so that nl80211_post_doit won't dereference it
@@ -2898,15 +2885,7 @@ static int nl80211_del_interface(struct sk_buff *skb, struct genl_info *info)
        if (!wdev->netdev)
                info->user_ptr[1] = NULL;
 
-       status = rdev_del_virtual_intf(rdev, wdev);
-       if (status >= 0 && msg)
-               genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy),
-                                       msg, 0, NL80211_MCGRP_CONFIG,
-                                       GFP_KERNEL);
-       else
-               nlmsg_free(msg);
-
-       return status;
+       return rdev_del_virtual_intf(rdev, wdev);
 }
 
 static int nl80211_set_noack_map(struct sk_buff *skb, struct genl_info *info)
@@ -5374,6 +5353,18 @@ static int nl80211_check_s32(const struct nlattr *nla, s32 min, s32 max, s32 *ou
        return 0;
 }
 
+static int nl80211_check_power_mode(const struct nlattr *nla,
+                                   enum nl80211_mesh_power_mode min,
+                                   enum nl80211_mesh_power_mode max,
+                                   enum nl80211_mesh_power_mode *out)
+{
+       u32 val = nla_get_u32(nla);
+       if (val < min || val > max)
+               return -EINVAL;
+       *out = val;
+       return 0;
+}
+
 static int nl80211_parse_mesh_config(struct genl_info *info,
                                     struct mesh_config *cfg,
                                     u32 *mask_out)
@@ -5518,7 +5509,7 @@ do {                                                                          \
                                  NL80211_MESH_POWER_ACTIVE,
                                  NL80211_MESH_POWER_MAX,
                                  mask, NL80211_MESHCONF_POWER_MODE,
-                                 nl80211_check_u32);
+                                 nl80211_check_power_mode);
        FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshAwakeWindowDuration,
                                  0, 65535, mask,
                                  NL80211_MESHCONF_AWAKE_WINDOW, nl80211_check_u16);
@@ -7773,12 +7764,13 @@ static int nl80211_join_ibss(struct sk_buff *skb, struct genl_info *info)
 
        ibss.beacon_interval = 100;
 
-       if (info->attrs[NL80211_ATTR_BEACON_INTERVAL]) {
+       if (info->attrs[NL80211_ATTR_BEACON_INTERVAL])
                ibss.beacon_interval =
                        nla_get_u32(info->attrs[NL80211_ATTR_BEACON_INTERVAL]);
-               if (ibss.beacon_interval < 1 || ibss.beacon_interval > 10000)
-                       return -EINVAL;
-       }
+
+       err = cfg80211_validate_beacon_int(rdev, ibss.beacon_interval);
+       if (err)
+               return err;
 
        if (!rdev->ops->join_ibss)
                return -EOPNOTSUPP;
@@ -9252,9 +9244,10 @@ static int nl80211_join_mesh(struct sk_buff *skb, struct genl_info *info)
        if (info->attrs[NL80211_ATTR_BEACON_INTERVAL]) {
                setup.beacon_interval =
                        nla_get_u32(info->attrs[NL80211_ATTR_BEACON_INTERVAL]);
-               if (setup.beacon_interval < 10 ||
-                   setup.beacon_interval > 10000)
-                       return -EINVAL;
+
+               err = cfg80211_validate_beacon_int(rdev, setup.beacon_interval);
+               if (err)
+                       return err;
        }
 
        if (info->attrs[NL80211_ATTR_DTIM_PERIOD]) {
@@ -11847,6 +11840,29 @@ void nl80211_notify_wiphy(struct cfg80211_registered_device *rdev,
                                NL80211_MCGRP_CONFIG, GFP_KERNEL);
 }
 
+void nl80211_notify_iface(struct cfg80211_registered_device *rdev,
+                               struct wireless_dev *wdev,
+                               enum nl80211_commands cmd)
+{
+       struct sk_buff *msg;
+
+       WARN_ON(cmd != NL80211_CMD_NEW_INTERFACE &&
+               cmd != NL80211_CMD_DEL_INTERFACE);
+
+       msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+       if (!msg)
+               return;
+
+       if (nl80211_send_iface(msg, 0, 0, 0, rdev, wdev,
+                              cmd == NL80211_CMD_DEL_INTERFACE) < 0) {
+               nlmsg_free(msg);
+               return;
+       }
+
+       genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0,
+                               NL80211_MCGRP_CONFIG, GFP_KERNEL);
+}
+
 static int nl80211_add_scan_req(struct sk_buff *msg,
                                struct cfg80211_registered_device *rdev)
 {
index a63f402..7e3821d 100644 (file)
@@ -7,6 +7,9 @@ int nl80211_init(void);
 void nl80211_exit(void);
 void nl80211_notify_wiphy(struct cfg80211_registered_device *rdev,
                          enum nl80211_commands cmd);
+void nl80211_notify_iface(struct cfg80211_registered_device *rdev,
+                         struct wireless_dev *wdev,
+                         enum nl80211_commands cmd);
 void nl80211_send_scan_start(struct cfg80211_registered_device *rdev,
                             struct wireless_dev *wdev);
 struct sk_buff *nl80211_build_scan_msg(struct cfg80211_registered_device *rdev,
index b7d1592..0675f51 100644 (file)
@@ -1559,7 +1559,7 @@ int cfg80211_validate_beacon_int(struct cfg80211_registered_device *rdev,
        struct wireless_dev *wdev;
        int res = 0;
 
-       if (!beacon_int)
+       if (beacon_int < 10 || beacon_int > 10000)
                return -EINVAL;
 
        list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) {
index 90ebf7d..eb582c6 100644 (file)
@@ -24,6 +24,7 @@ hostprogs-y += test_overhead
 hostprogs-y += test_cgrp2_array_pin
 hostprogs-y += xdp1
 hostprogs-y += xdp2
+hostprogs-y += test_current_task_under_cgroup
 
 test_verifier-objs := test_verifier.o libbpf.o
 test_maps-objs := test_maps.o libbpf.o
@@ -49,6 +50,8 @@ test_cgrp2_array_pin-objs := libbpf.o test_cgrp2_array_pin.o
 xdp1-objs := bpf_load.o libbpf.o xdp1_user.o
 # reuse xdp1 source intentionally
 xdp2-objs := bpf_load.o libbpf.o xdp1_user.o
+test_current_task_under_cgroup-objs := bpf_load.o libbpf.o \
+                                      test_current_task_under_cgroup_user.o
 
 # Tell kbuild to always build the programs
 always := $(hostprogs-y)
@@ -74,6 +77,7 @@ always += parse_varlen.o parse_simple.o parse_ldabs.o
 always += test_cgrp2_tc_kern.o
 always += xdp1_kern.o
 always += xdp2_kern.o
+always += test_current_task_under_cgroup_kern.o
 
 HOSTCFLAGS += -I$(objtree)/usr/include
 
@@ -97,6 +101,7 @@ HOSTLOADLIBES_map_perf_test += -lelf -lrt
 HOSTLOADLIBES_test_overhead += -lelf -lrt
 HOSTLOADLIBES_xdp1 += -lelf
 HOSTLOADLIBES_xdp2 += -lelf
+HOSTLOADLIBES_test_current_task_under_cgroup += -lelf
 
 # Allows pointing LLC/CLANG to a LLVM backend with bpf support, redefine on cmdline:
 #  make samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang
index 7927a09..6f1672a 100644 (file)
@@ -37,12 +37,16 @@ static int (*bpf_clone_redirect)(void *ctx, int ifindex, int flags) =
        (void *) BPF_FUNC_clone_redirect;
 static int (*bpf_redirect)(int ifindex, int flags) =
        (void *) BPF_FUNC_redirect;
-static int (*bpf_perf_event_output)(void *ctx, void *map, int index, void *data, int size) =
+static int (*bpf_perf_event_output)(void *ctx, void *map,
+                                   unsigned long long flags, void *data,
+                                   int size) =
        (void *) BPF_FUNC_perf_event_output;
 static int (*bpf_get_stackid)(void *ctx, void *map, int flags) =
        (void *) BPF_FUNC_get_stackid;
 static int (*bpf_probe_write_user)(void *dst, void *src, int size) =
        (void *) BPF_FUNC_probe_write_user;
+static int (*bpf_current_task_under_cgroup)(void *map, int index) =
+       (void *) BPF_FUNC_current_task_under_cgroup;
 
 /* llvm builtin functions that eBPF C program may use to
  * emit BPF_LD_ABS and BPF_LD_IND instructions
diff --git a/samples/bpf/test_current_task_under_cgroup_kern.c b/samples/bpf/test_current_task_under_cgroup_kern.c
new file mode 100644 (file)
index 0000000..86b28d7
--- /dev/null
@@ -0,0 +1,43 @@
+/* Copyright (c) 2016 Sargun Dhillon <sargun@sargun.me>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+
+#include <linux/ptrace.h>
+#include <uapi/linux/bpf.h>
+#include <linux/version.h>
+#include "bpf_helpers.h"
+#include <uapi/linux/utsname.h>
+
+struct bpf_map_def SEC("maps") cgroup_map = {
+       .type                   = BPF_MAP_TYPE_CGROUP_ARRAY,
+       .key_size               = sizeof(u32),
+       .value_size             = sizeof(u32),
+       .max_entries    = 1,
+};
+
+struct bpf_map_def SEC("maps") perf_map = {
+       .type                   = BPF_MAP_TYPE_ARRAY,
+       .key_size               = sizeof(u32),
+       .value_size             = sizeof(u64),
+       .max_entries    = 1,
+};
+
+/* Writes the last PID that called sync to a map at index 0 */
+SEC("kprobe/sys_sync")
+int bpf_prog1(struct pt_regs *ctx)
+{
+       u64 pid = bpf_get_current_pid_tgid();
+       int idx = 0;
+
+       if (!bpf_current_task_under_cgroup(&cgroup_map, 0))
+               return 0;
+
+       bpf_map_update_elem(&perf_map, &idx, &pid, BPF_ANY);
+       return 0;
+}
+
+char _license[] SEC("license") = "GPL";
+u32 _version SEC("version") = LINUX_VERSION_CODE;
diff --git a/samples/bpf/test_current_task_under_cgroup_user.c b/samples/bpf/test_current_task_under_cgroup_user.c
new file mode 100644 (file)
index 0000000..30b0bce
--- /dev/null
@@ -0,0 +1,145 @@
+/* Copyright (c) 2016 Sargun Dhillon <sargun@sargun.me>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <linux/bpf.h>
+#include <unistd.h>
+#include "libbpf.h"
+#include "bpf_load.h"
+#include <string.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <linux/bpf.h>
+#include <sched.h>
+#include <sys/mount.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <linux/limits.h>
+
+#define CGROUP_MOUNT_PATH      "/mnt"
+#define CGROUP_PATH            "/mnt/my-cgroup"
+
+#define clean_errno() (errno == 0 ? "None" : strerror(errno))
+#define log_err(MSG, ...) fprintf(stderr, "(%s:%d: errno: %s) " MSG "\n", \
+       __FILE__, __LINE__, clean_errno(), ##__VA_ARGS__)
+
+static int join_cgroup(char *path)
+{
+       int fd, rc = 0;
+       pid_t pid = getpid();
+       char cgroup_path[PATH_MAX + 1];
+
+       snprintf(cgroup_path, sizeof(cgroup_path), "%s/cgroup.procs", path);
+
+       fd = open(cgroup_path, O_WRONLY);
+       if (fd < 0) {
+               log_err("Opening Cgroup");
+               return 1;
+       }
+
+       if (dprintf(fd, "%d\n", pid) < 0) {
+               log_err("Joining Cgroup");
+               rc = 1;
+       }
+       close(fd);
+       return rc;
+}
+
+int main(int argc, char **argv)
+{
+       char filename[256];
+       int cg2, idx = 0;
+       pid_t remote_pid, local_pid = getpid();
+
+       snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+       if (load_bpf_file(filename)) {
+               printf("%s", bpf_log_buf);
+               return 1;
+       }
+
+       /*
+        * This is to avoid interfering with existing cgroups. Unfortunately,
+        * most people don't have cgroupv2 enabled at this point in time.
+        * It's easier to create our own mount namespace and manage it
+        * ourselves.
+        */
+       if (unshare(CLONE_NEWNS)) {
+               log_err("unshare");
+               return 1;
+       }
+
+       if (mount("none", "/", NULL, MS_REC | MS_PRIVATE, NULL)) {
+               log_err("mount fakeroot");
+               return 1;
+       }
+
+       if (mount("none", CGROUP_MOUNT_PATH, "cgroup2", 0, NULL)) {
+               log_err("mount cgroup2");
+               return 1;
+       }
+
+       if (mkdir(CGROUP_PATH, 0777) && errno != EEXIST) {
+               log_err("mkdir cgroup");
+               return 1;
+       }
+
+       cg2 = open(CGROUP_PATH, O_RDONLY);
+       if (cg2 < 0) {
+               log_err("opening target cgroup");
+               goto cleanup_cgroup_err;
+       }
+
+       if (bpf_update_elem(map_fd[0], &idx, &cg2, BPF_ANY)) {
+               log_err("Adding target cgroup to map");
+               goto cleanup_cgroup_err;
+       }
+       if (join_cgroup("/mnt/my-cgroup")) {
+               log_err("Leaving target cgroup");
+               goto cleanup_cgroup_err;
+       }
+
+       /*
+        * The installed helper program catched the sync call, and should
+        * write it to the map.
+        */
+
+       sync();
+       bpf_lookup_elem(map_fd[1], &idx, &remote_pid);
+
+       if (local_pid != remote_pid) {
+               fprintf(stderr,
+                       "BPF Helper didn't write correct PID to map, but: %d\n",
+                       remote_pid);
+               goto leave_cgroup_err;
+       }
+
+       /* Verify the negative scenario; leave the cgroup */
+       if (join_cgroup(CGROUP_MOUNT_PATH))
+               goto leave_cgroup_err;
+
+       remote_pid = 0;
+       bpf_update_elem(map_fd[1], &idx, &remote_pid, BPF_ANY);
+
+       sync();
+       bpf_lookup_elem(map_fd[1], &idx, &remote_pid);
+
+       if (local_pid == remote_pid) {
+               fprintf(stderr, "BPF cgroup negative test did not work\n");
+               goto cleanup_cgroup_err;
+       }
+
+       rmdir(CGROUP_PATH);
+       return 0;
+
+       /* Error condition, cleanup */
+leave_cgroup_err:
+       join_cgroup(CGROUP_MOUNT_PATH);
+cleanup_cgroup_err:
+       rmdir(CGROUP_PATH);
+       return 1;
+}
index fe2fcec..78c6f13 100644 (file)
@@ -1449,7 +1449,7 @@ static struct bpf_test tests[] = {
                .prog_type = BPF_PROG_TYPE_SCHED_CLS,
        },
        {
-               "pkt: test1",
+               "direct packet access: test1",
                .insns = {
                        BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
                                    offsetof(struct __sk_buff, data)),
@@ -1466,7 +1466,7 @@ static struct bpf_test tests[] = {
                .prog_type = BPF_PROG_TYPE_SCHED_CLS,
        },
        {
-               "pkt: test2",
+               "direct packet access: test2",
                .insns = {
                        BPF_MOV64_IMM(BPF_REG_0, 1),
                        BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1,
@@ -1499,7 +1499,7 @@ static struct bpf_test tests[] = {
                .prog_type = BPF_PROG_TYPE_SCHED_CLS,
        },
        {
-               "pkt: test3",
+               "direct packet access: test3",
                .insns = {
                        BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
                                    offsetof(struct __sk_buff, data)),
@@ -1511,7 +1511,7 @@ static struct bpf_test tests[] = {
                .prog_type = BPF_PROG_TYPE_SOCKET_FILTER,
        },
        {
-               "pkt: test4",
+               "direct packet access: test4",
                .insns = {
                        BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
                                    offsetof(struct __sk_buff, data)),
@@ -1528,6 +1528,112 @@ static struct bpf_test tests[] = {
                .result = REJECT,
                .prog_type = BPF_PROG_TYPE_SCHED_CLS,
        },
+       {
+               "helper access to packet: test1, valid packet_ptr range",
+               .insns = {
+                       BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+                                   offsetof(struct xdp_md, data)),
+                       BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+                                   offsetof(struct xdp_md, data_end)),
+                       BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+                       BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 5),
+                       BPF_LD_MAP_FD(BPF_REG_1, 0),
+                       BPF_MOV64_REG(BPF_REG_3, BPF_REG_2),
+                       BPF_MOV64_IMM(BPF_REG_4, 0),
+                       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_update_elem),
+                       BPF_MOV64_IMM(BPF_REG_0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               .fixup = {5},
+               .result_unpriv = ACCEPT,
+               .result = ACCEPT,
+               .prog_type = BPF_PROG_TYPE_XDP,
+       },
+       {
+               "helper access to packet: test2, unchecked packet_ptr",
+               .insns = {
+                       BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+                                   offsetof(struct xdp_md, data)),
+                       BPF_LD_MAP_FD(BPF_REG_1, 0),
+                       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+                       BPF_MOV64_IMM(BPF_REG_0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               .fixup = {1},
+               .result = REJECT,
+               .errstr = "invalid access to packet",
+               .prog_type = BPF_PROG_TYPE_XDP,
+       },
+       {
+               "helper access to packet: test3, variable add",
+               .insns = {
+                       BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+                                       offsetof(struct xdp_md, data)),
+                       BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+                                       offsetof(struct xdp_md, data_end)),
+                       BPF_MOV64_REG(BPF_REG_4, BPF_REG_2),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 8),
+                       BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 10),
+                       BPF_LDX_MEM(BPF_B, BPF_REG_5, BPF_REG_2, 0),
+                       BPF_MOV64_REG(BPF_REG_4, BPF_REG_2),
+                       BPF_ALU64_REG(BPF_ADD, BPF_REG_4, BPF_REG_5),
+                       BPF_MOV64_REG(BPF_REG_5, BPF_REG_4),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 8),
+                       BPF_JMP_REG(BPF_JGT, BPF_REG_5, BPF_REG_3, 4),
+                       BPF_LD_MAP_FD(BPF_REG_1, 0),
+                       BPF_MOV64_REG(BPF_REG_2, BPF_REG_4),
+                       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+                       BPF_MOV64_IMM(BPF_REG_0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               .fixup = {11},
+               .result = ACCEPT,
+               .prog_type = BPF_PROG_TYPE_XDP,
+       },
+       {
+               "helper access to packet: test4, packet_ptr with bad range",
+               .insns = {
+                       BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+                                   offsetof(struct xdp_md, data)),
+                       BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+                                   offsetof(struct xdp_md, data_end)),
+                       BPF_MOV64_REG(BPF_REG_4, BPF_REG_2),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4),
+                       BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 2),
+                       BPF_MOV64_IMM(BPF_REG_0, 0),
+                       BPF_EXIT_INSN(),
+                       BPF_LD_MAP_FD(BPF_REG_1, 0),
+                       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+                       BPF_MOV64_IMM(BPF_REG_0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               .fixup = {7},
+               .result = REJECT,
+               .errstr = "invalid access to packet",
+               .prog_type = BPF_PROG_TYPE_XDP,
+       },
+       {
+               "helper access to packet: test5, packet_ptr with too short range",
+               .insns = {
+                       BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+                                   offsetof(struct xdp_md, data)),
+                       BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+                                   offsetof(struct xdp_md, data_end)),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 1),
+                       BPF_MOV64_REG(BPF_REG_4, BPF_REG_2),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 7),
+                       BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 3),
+                       BPF_LD_MAP_FD(BPF_REG_1, 0),
+                       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+                       BPF_MOV64_IMM(BPF_REG_0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               .fixup = {6},
+               .result = REJECT,
+               .errstr = "invalid access to packet",
+               .prog_type = BPF_PROG_TYPE_XDP,
+       },
 };
 
 static int probe_filter_length(struct bpf_insn *fp)