# Kdevelop4
*.kdev4
+
+#Automatically generated by ASN.1 compiler
+net/ipv4/netfilter/nf_nat_snmp_basic-asn1.c
+net/ipv4/netfilter/nf_nat_snmp_basic-asn1.h
--- /dev/null
+What: /sys/devices/platform/dock.N/docked
+Date: Dec, 2006
+KernelVersion: 2.6.19
+Contact: linux-acpi@vger.kernel.org
+Description:
+ (RO) Value 1 or 0 indicates whether the software believes the
+ laptop is docked in a docking station.
+
+What: /sys/devices/platform/dock.N/undock
+Date: Dec, 2006
+KernelVersion: 2.6.19
+Contact: linux-acpi@vger.kernel.org
+Description:
+ (WO) Writing to this file causes the software to initiate an
+ undock request to the firmware.
+
+What: /sys/devices/platform/dock.N/uid
+Date: Feb, 2007
+KernelVersion: v2.6.21
+Contact: linux-acpi@vger.kernel.org
+Description:
+ (RO) Displays the docking station the laptop is docked to.
+
+What: /sys/devices/platform/dock.N/flags
+Date: May, 2007
+KernelVersion: v2.6.21
+Contact: linux-acpi@vger.kernel.org
+Description:
+ (RO) Show dock station flags, useful for checking if undock
+ request has been made by the user (from the immediate_undock
+ option).
+
+What: /sys/devices/platform/dock.N/type
+Date: Aug, 2008
+KernelVersion: v2.6.27
+Contact: linux-acpi@vger.kernel.org
+Description:
+ (RO) Display the dock station type- dock_station, ata_bay or
+ battery_bay.
What: /sys/devices/system/cpu/cpuidle/current_driver
/sys/devices/system/cpu/cpuidle/current_governer_ro
+ /sys/devices/system/cpu/cpuidle/available_governors
+ /sys/devices/system/cpu/cpuidle/current_governor
Date: September 2007
Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org>
Description: Discover cpuidle policy and mechanism
Idle policy (governor) is differentiated from idle mechanism
(driver)
- current_driver: displays current idle mechanism
+ current_driver: (RO) displays current idle mechanism
- current_governor_ro: displays current idle policy
+ current_governor_ro: (RO) displays current idle policy
+
+ With the cpuidle_sysfs_switch boot option enabled (meant for
+ developer testing), the following three attributes are visible
+ instead:
+
+ current_driver: same as described above
+
+ available_governors: (RO) displays a space separated list of
+ available governors
+
+ current_governor: (RW) displays current idle policy. Users can
+ switch the governor at runtime by writing to this file.
See files in Documentation/cpuidle/ for more information.
+What: /sys/devices/system/cpu/cpuX/cpuidle/stateN/name
+ /sys/devices/system/cpu/cpuX/cpuidle/stateN/latency
+ /sys/devices/system/cpu/cpuX/cpuidle/stateN/power
+ /sys/devices/system/cpu/cpuX/cpuidle/stateN/time
+ /sys/devices/system/cpu/cpuX/cpuidle/stateN/usage
+Date: September 2007
+KernelVersion: v2.6.24
+Contact: Linux power management list <linux-pm@vger.kernel.org>
+Description:
+ The directory /sys/devices/system/cpu/cpuX/cpuidle contains per
+ logical CPU specific cpuidle information for each online cpu X.
+ The processor idle states which are available for use have the
+ following attributes:
+
+ name: (RO) Name of the idle state (string).
+
+ latency: (RO) The latency to exit out of this idle state (in
+ microseconds).
+
+ power: (RO) The power consumed while in this idle state (in
+ milliwatts).
+
+ time: (RO) The total time spent in this idle state (in microseconds).
+
+ usage: (RO) Number of times this state was entered (a count).
+
+
+What: /sys/devices/system/cpu/cpuX/cpuidle/stateN/desc
+Date: February 2008
+KernelVersion: v2.6.25
+Contact: Linux power management list <linux-pm@vger.kernel.org>
+Description:
+ (RO) A small description about the idle state (string).
+
+
+What: /sys/devices/system/cpu/cpuX/cpuidle/stateN/disable
+Date: March 2012
+KernelVersion: v3.10
+Contact: Linux power management list <linux-pm@vger.kernel.org>
+Description:
+ (RW) Option to disable this idle state (bool). The behavior and
+ the effect of the disable variable depends on the implementation
+ of a particular governor. In the ladder governor, for example,
+ it is not coherent, i.e. if one is disabling a light state, then
+ all deeper states are disabled as well, but the disable variable
+ does not reflect it. Likewise, if one enables a deep state but a
+ lighter state still is disabled, then this has no effect.
+
+
+What: /sys/devices/system/cpu/cpuX/cpuidle/stateN/residency
+Date: March 2014
+KernelVersion: v3.15
+Contact: Linux power management list <linux-pm@vger.kernel.org>
+Description:
+ (RO) Display the target residency i.e. the minimum amount of
+ time (in microseconds) this cpu should spend in this idle state
+ to make the transition worth the effort.
+
+
What: /sys/devices/system/cpu/cpu#/cpufreq/*
Date: pre-git history
Contact: linux-pm@vger.kernel.org
--- /dev/null
+What: /sys/bus/platform/devices/INT3407:00/dptf_power/charger_type
+Date: Jul, 2016
+KernelVersion: v4.10
+Contact: linux-acpi@vger.kernel.org
+Description:
+ (RO) The charger type - Traditional, Hybrid or NVDC.
+
+What: /sys/bus/platform/devices/INT3407:00/dptf_power/adapter_rating_mw
+Date: Jul, 2016
+KernelVersion: v4.10
+Contact: linux-acpi@vger.kernel.org
+Description:
+ (RO) Adapter rating in milliwatts (the maximum Adapter power).
+ Must be 0 if no AC Adaptor is plugged in.
+
+What: /sys/bus/platform/devices/INT3407:00/dptf_power/max_platform_power_mw
+Date: Jul, 2016
+KernelVersion: v4.10
+Contact: linux-acpi@vger.kernel.org
+Description:
+ (RO) Maximum platform power that can be supported by the battery
+ in milliwatts.
+
+What: /sys/bus/platform/devices/INT3407:00/dptf_power/platform_power_source
+Date: Jul, 2016
+KernelVersion: v4.10
+Contact: linux-acpi@vger.kernel.org
+Description:
+ (RO) Display the platform power source
+ 0x00 = DC
+ 0x01 = AC
+ 0x02 = USB
+ 0x03 = Wireless Charger
+
+What: /sys/bus/platform/devices/INT3407:00/dptf_power/battery_steady_power
+Date: Jul, 2016
+KernelVersion: v4.10
+Contact: linux-acpi@vger.kernel.org
+Description:
+ (RO) The maximum sustained power for battery in milliwatts.
- RMW operations that have a return value are fully ordered.
-Except for test_and_set_bit_lock() which has ACQUIRE semantics and
+ - RMW operations that are conditional are unordered on FAILURE,
+ otherwise the above rules apply. In the case of test_and_{}_bit() operations,
+ if the bit in memory is unchanged by the operation then it is deemed to have
+ failed.
+
+Except for a successful test_and_set_bit_lock() which has ACQUIRE semantics and
clear_bit_unlock() which has RELEASE semantics.
Since a platform only has a single means of achieving atomic operations
--- /dev/null
+* MCR20A IEEE 802.15.4 *
+
+Required properties:
+ - compatible: should be "nxp,mcr20a"
+ - spi-max-frequency: maximal bus speed, should be set to a frequency
+ lower than 9000000 depends sync or async operation mode
+ - reg: the chipselect index
+ - interrupts: the interrupt generated by the device. Non high-level
+ can occur deadlocks while handling isr.
+
+Optional properties:
+ - rst_b-gpio: GPIO spec for the RST_B pin
+
+Example:
+
+ mcr20a@0 {
+ compatible = "nxp,mcr20a";
+ spi-max-frequency = <9000000>;
+ reg = <0>;
+ interrupts = <17 2>;
+ interrupt-parent = <&gpio>;
+ rst_b-gpio = <&gpio 27 1>
+ };
Select (AKA RS1) output gpio signal (SFP+ only), low: low Tx rate, high:
high Tx rate. Must not be present for SFF modules
+- maximum-power-milliwatt : Maximum module power consumption
+ Specifies the maximum power consumption allowable by a module in the
+ slot, in milli-Watts. Presently, modules can be up to 1W, 1.5W or 2W.
+
Example #1: Direct serdes to SFP connection
sfp_eth3: sfp-eth3 {
i2c-bus = <&sfp_1g_i2c>;
los-gpios = <&cpm_gpio2 22 GPIO_ACTIVE_HIGH>;
mod-def0-gpios = <&cpm_gpio2 21 GPIO_ACTIVE_LOW>;
+ maximum-power-milliwatt = <1000>;
pinctrl-names = "default";
pinctrl-0 = <&cpm_sfp_1g_pins &cps_sfp_1g_pins>;
tx-disable-gpios = <&cps_gpio1 24 GPIO_ACTIVE_HIGH>;
software needs to take when this pin is
strapped in these modes. See data manual
for details.
+ - ti,clk-output-sel - Muxing option for CLK_OUT pin - see dt-bindings/net/ti-dp83867.h
+ for applicable values.
Note: ti,min-output-impedance and ti,max-output-impedance are mutually
exclusive. When both properties are present ti,max-output-impedance
--- /dev/null
+Binding for MIPS Cluster Power Controller (CPC).
+
+This binding allows a system to specify where the CPC registers are
+located.
+
+Required properties:
+compatible : Should be "mti,mips-cpc".
+regs: Should describe the address & size of the CPC register region.
--- /dev/null
+#
+# Feature name: membarrier-sync-core
+# Kconfig: ARCH_HAS_MEMBARRIER_SYNC_CORE
+# description: arch supports core serializing membarrier
+#
+# Architecture requirements
+#
+# * arm64
+#
+# Rely on eret context synchronization when returning from IPI handler, and
+# when returning to user-space.
+#
+# * x86
+#
+# x86-32 uses IRET as return from interrupt, which takes care of the IPI.
+# However, it uses both IRET and SYSEXIT to go back to user-space. The IRET
+# instruction is core serializing, but not SYSEXIT.
+#
+# x86-64 uses IRET as return from interrupt, which takes care of the IPI.
+# However, it can return to user-space through either SYSRETL (compat code),
+# SYSRETQ, or IRET.
+#
+# Given that neither SYSRET{L,Q}, nor SYSEXIT, are core serializing, we rely
+# instead on write_cr3() performed by switch_mm() to provide core serialization
+# after changing the current mm, and deal with the special case of kthread ->
+# uthread (temporarily keeping current mm into active_mm) by issuing a
+# sync_core_before_usermode() in that specific case.
+#
+ -----------------------
+ | arch |status|
+ -----------------------
+ | alpha: | TODO |
+ | arc: | TODO |
+ | arm: | TODO |
+ | arm64: | ok |
+ | blackfin: | TODO |
+ | c6x: | TODO |
+ | cris: | TODO |
+ | frv: | TODO |
+ | h8300: | TODO |
+ | hexagon: | TODO |
+ | ia64: | TODO |
+ | m32r: | TODO |
+ | m68k: | TODO |
+ | metag: | TODO |
+ | microblaze: | TODO |
+ | mips: | TODO |
+ | mn10300: | TODO |
+ | nios2: | TODO |
+ | openrisc: | TODO |
+ | parisc: | TODO |
+ | powerpc: | TODO |
+ | s390: | TODO |
+ | score: | TODO |
+ | sh: | TODO |
+ | sparc: | TODO |
+ | tile: | TODO |
+ | um: | TODO |
+ | unicore32: | TODO |
+ | x86: | ok |
+ | xtensa: | TODO |
+ -----------------------
==================================
.. kernel-doc:: drivers/gpu/drm/tve200/tve200_drv.c
- :doc: Faraday TV Encoder 200
+ :doc: Faraday TV Encoder TVE200 DRM Driver
* Intel Wildcat Point (PCH)
* Intel Wildcat Point-LP (PCH)
* Intel BayTrail (SOC)
+ * Intel Braswell (SOC)
* Intel Sunrise Point-H (PCH)
* Intel Sunrise Point-LP (PCH)
+ * Intel Kaby Lake-H (PCH)
* Intel DNV (SOC)
* Intel Broxton (SOC)
* Intel Lewisburg (PCH)
--------------
Mutexes are represented by 'struct mutex', defined in include/linux/mutex.h
-and implemented in kernel/locking/mutex.c. These locks use a three
-state atomic counter (->count) to represent the different possible
-transitions that can occur during the lifetime of a lock:
-
- 1: unlocked
- 0: locked, no waiters
- negative: locked, with potential waiters
-
-In its most basic form it also includes a wait-queue and a spinlock
-that serializes access to it. CONFIG_SMP systems can also include
-a pointer to the lock task owner (->owner) as well as a spinner MCS
-lock (->osq), both described below in (ii).
+and implemented in kernel/locking/mutex.c. These locks use an atomic variable
+(->owner) to keep track of the lock state during its lifetime. Field owner
+actually contains 'struct task_struct *' to the current lock owner and it is
+therefore NULL if not currently owned. Since task_struct pointers are aligned
+at at least L1_CACHE_BYTES, low bits (3) are used to store extra state (e.g.,
+if waiter list is non-empty). In its most basic form it also includes a
+wait-queue and a spinlock that serializes access to it. Furthermore,
+CONFIG_MUTEX_SPIN_ON_OWNER=y systems use a spinner MCS lock (->osq), described
+below in (ii).
When acquiring a mutex, there are three possible paths that can be
taken, depending on the state of the lock:
-(i) fastpath: tries to atomically acquire the lock by decrementing the
- counter. If it was already taken by another task it goes to the next
- possible path. This logic is architecture specific. On x86-64, the
- locking fastpath is 2 instructions:
-
- 0000000000000e10 <mutex_lock>:
- e21: f0 ff 0b lock decl (%rbx)
- e24: 79 08 jns e2e <mutex_lock+0x1e>
-
- the unlocking fastpath is equally tight:
-
- 0000000000000bc0 <mutex_unlock>:
- bc8: f0 ff 07 lock incl (%rdi)
- bcb: 7f 0a jg bd7 <mutex_unlock+0x17>
-
+(i) fastpath: tries to atomically acquire the lock by cmpxchg()ing the owner with
+ the current task. This only works in the uncontended case (cmpxchg() checks
+ against 0UL, so all 3 state bits above have to be 0). If the lock is
+ contended it goes to the next possible path.
(ii) midpath: aka optimistic spinning, tries to spin for acquisition
while the lock owner is running and there are no other tasks ready
Disadvantages
-------------
-Unlike its original design and purpose, 'struct mutex' is larger than
-most locks in the kernel. E.g: on x86-64 it is 40 bytes, almost twice
-as large as 'struct semaphore' (24 bytes) and tied, along with rwsems,
-for the largest lock in the kernel. Larger structure sizes mean more
-CPU cache and memory footprint.
+Unlike its original design and purpose, 'struct mutex' is among the largest
+locks in the kernel. E.g: on x86-64 it is 32 bytes, where 'struct semaphore'
+is 24 bytes and rw_semaphore is 40 bytes. Larger structure sizes mean more CPU
+cache and memory footprint.
When to use mutexes
-------------------
FALSE: disabled
Default: FALSE
+fib_multipath_hash_policy - INTEGER
+ Controls which hash policy to use for multipath routes.
+ Default: 0 (Layer 3)
+ Possible values:
+ 0 - Layer 3 (source and destination addresses plus flow label)
+ 1 - Layer 4 (standard 5-tuple)
+
anycast_src_echo_reply - BOOLEAN
Controls the use of anycast addresses as source addresses for ICMPv6
echo reply
* Generic Segmentation Offload - GSO
* Generic Receive Offload - GRO
* Partial Generic Segmentation Offload - GSO_PARTIAL
+ * SCTP accelleration with GSO - GSO_BY_FRAGS
TCP Segmentation Offload
========================
fragmentation offload are the same as TSO. However the IPv4 ID for
fragments should not increment as a single IPv4 datagram is fragmented.
+UFO is deprecated: modern kernels will no longer generate UFO skbs, but can
+still receive them from tuntap and similar devices. Offload of UDP-based
+tunnel protocols is still supported.
+
IPIP, SIT, GRE, UDP Tunnel, and Remote Checksum Offloads
========================================================
fact that the outer header also requests to have a non-zero checksum
included in the outer header.
-Finally there is SKB_GSO_REMCSUM which indicates that a given tunnel header
-has requested a remote checksum offload. In this case the inner headers
-will be left with a partial checksum and only the outer header checksum
-will be computed.
+Finally there is SKB_GSO_TUNNEL_REMCSUM which indicates that a given tunnel
+header has requested a remote checksum offload. In this case the inner
+headers will be left with a partial checksum and only the outer header
+checksum will be computed.
Generic Segmentation Offload
============================
is the outer IPv4 ID field. It is up to the device drivers to guarantee
that the IPv4 ID field is incremented in the case that a given header does
not have the DF bit set.
+
+SCTP accelleration with GSO
+===========================
+
+SCTP - despite the lack of hardware support - can still take advantage of
+GSO to pass one large packet through the network stack, rather than
+multiple small packets.
+
+This requires a different approach to other offloads, as SCTP packets
+cannot be just segmented to (P)MTU. Rather, the chunks must be contained in
+IP segments, padding respected. So unlike regular GSO, SCTP can't just
+generate a big skb, set gso_size to the fragmentation point and deliver it
+to IP layer.
+
+Instead, the SCTP protocol layer builds an skb with the segments correctly
+padded and stored as chained skbs, and skb_segment() splits based on those.
+To signal this, gso_size is set to the special value GSO_BY_FRAGS.
+
+Therefore, any code in the core networking stack must be aware of the
+possibility that gso_size will be GSO_BY_FRAGS and handle that case
+appropriately. (For size checks, the skb_gso_validate_*_len family of
+helpers do this automatically.)
+
+This also affects drivers with the NETIF_F_FRAGLIST & NETIF_F_GSO_SCTP bits
+set. Note also that NETIF_F_GSO_SCTP is included in NETIF_F_GSO_SOFTWARE.
F: scripts/leaking_addresses.pl
LED SUBSYSTEM
-M: Richard Purdie <rpurdie@rpsys.net>
M: Jacek Anaszewski <jacek.anaszewski@gmail.com>
M: Pavel Machek <pavel@ucw.cz>
L: linux-leds@vger.kernel.org
F: Documentation/ABI/testing/sysfs-bus-iio-potentiometer-mcp4531
F: drivers/iio/potentiometer/mcp4531.c
+MCR20A IEEE-802.15.4 RADIO DRIVER
+M: Xue Liu <liuxuenetmail@gmail.com>
+L: linux-wpan@vger.kernel.org
+W: https://github.com/xueliu/mcr20a-linux
+S: Maintained
+F: drivers/net/ieee802154/mcr20a.c
+F: drivers/net/ieee802154/mcr20a.h
+F: Documentation/devicetree/bindings/net/ieee802154/mcr20a.txt
+
MEASUREMENT COMPUTING CIO-DAC IIO DRIVER
M: William Breathitt Gray <vilhelm.gray@gmail.com>
L: linux-iio@vger.kernel.org
M: Paul Burton <paul.burton@mips.com>
L: linux-mips@linux-mips.org
S: Supported
+F: Documentation/devicetree/bindings/power/mti,mips-cpc.txt
F: arch/mips/generic/
F: arch/mips/tools/generic-board-config.sh
OBJTOOL
M: Josh Poimboeuf <jpoimboe@redhat.com>
+M: Peter Zijlstra <peterz@infradead.org>
S: Supported
F: tools/objtool/
VERSION = 4
PATCHLEVEL = 16
SUBLEVEL = 0
-EXTRAVERSION = -rc1
+EXTRAVERSION = -rc2
NAME = Fearless Coyote
# *DOCUMENTATION*
#define BUG() do { \
pr_warn("BUG: failure at %s:%d/%s()!\n", __FILE__, __LINE__, __func__); \
- dump_stack(); \
+ barrier_before_unreachable(); \
+ __builtin_trap(); \
} while (0)
#define HAVE_ARCH_BUG
/dts-v1/;
#include <dt-bindings/input/input.h>
+#include <dt-bindings/interrupt-controller/irq.h>
#include <dt-bindings/gpio/gpio.h>
#include "armada-370.dtsi"
#address-cells = <1>;
#size-cells = <0>;
reg = <0x10>;
+ interrupt-controller;
+ #interrupt-cells = <2>;
ports {
#address-cells = <1>;
};
};
};
+
+ mdio {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ switchphy0: switchphy@0 {
+ reg = <0>;
+ interrupt-parent = <&switch>;
+ interrupts = <0 IRQ_TYPE_LEVEL_HIGH>;
+ };
+
+ switchphy1: switchphy@1 {
+ reg = <1>;
+ interrupt-parent = <&switch>;
+ interrupts = <1 IRQ_TYPE_LEVEL_HIGH>;
+ };
+
+ switchphy2: switchphy@2 {
+ reg = <2>;
+ interrupt-parent = <&switch>;
+ interrupts = <2 IRQ_TYPE_LEVEL_HIGH>;
+ };
+
+ switchphy3: switchphy@3 {
+ reg = <3>;
+ interrupt-parent = <&switch>;
+ interrupts = <3 IRQ_TYPE_LEVEL_HIGH>;
+ };
+ };
};
};
#include <linux/of.h>
#include <linux/of_address.h>
#include <linux/of_platform.h>
-#include <linux/perf/arm_pmu.h>
#include <linux/regulator/machine.h>
#include <asm/outercache.h>
prcmu_system_reset(0);
}
-/*
- * The PMU IRQ lines of two cores are wired together into a single interrupt.
- * Bounce the interrupt to the other core if it's not ours.
- */
-static irqreturn_t db8500_pmu_handler(int irq, void *dev, irq_handler_t handler)
-{
- irqreturn_t ret = handler(irq, dev);
- int other = !smp_processor_id();
-
- if (ret == IRQ_NONE && cpu_online(other))
- irq_set_affinity(irq, cpumask_of(other));
-
- /*
- * We should be able to get away with the amount of IRQ_NONEs we give,
- * while still having the spurious IRQ detection code kick in if the
- * interrupt really starts hitting spuriously.
- */
- return ret;
-}
-
-static struct arm_pmu_platdata db8500_pmu_platdata = {
- .handle_irq = db8500_pmu_handler,
- .irq_flags = IRQF_NOBALANCING | IRQF_NO_THREAD,
-};
-
-static struct of_dev_auxdata u8500_auxdata_lookup[] __initdata = {
- /* Requires call-back bindings. */
- OF_DEV_AUXDATA("arm,cortex-a9-pmu", 0, "arm-pmu", &db8500_pmu_platdata),
- {},
-};
-
static struct of_dev_auxdata u8540_auxdata_lookup[] __initdata = {
OF_DEV_AUXDATA("stericsson,db8500-prcmu", 0x80157000, "db8500-prcmu", NULL),
{},
if (of_machine_is_compatible("st-ericsson,u8540"))
of_platform_populate(NULL, u8500_local_bus_nodes,
u8540_auxdata_lookup, NULL);
- else
- of_platform_populate(NULL, u8500_local_bus_nodes,
- u8500_auxdata_lookup, NULL);
}
static const char * stericsson_dt_platform_compat[] = {
#define MPIDR_UP_BITMASK (0x1 << 30)
#define MPIDR_MT_BITMASK (0x1 << 24)
-#define MPIDR_HWID_BITMASK 0xff00ffffff
+#define MPIDR_HWID_BITMASK UL(0xff00ffffff)
#define MPIDR_LEVEL_BITS_SHIFT 3
#define MPIDR_LEVEL_BITS (1 << MPIDR_LEVEL_BITS_SHIFT)
static inline pte_t huge_ptep_get(pte_t *ptep)
{
- return *ptep;
+ return READ_ONCE(*ptep);
}
return pmd;
}
-static inline void kvm_set_s2pte_readonly(pte_t *pte)
+static inline void kvm_set_s2pte_readonly(pte_t *ptep)
{
pteval_t old_pteval, pteval;
- pteval = READ_ONCE(pte_val(*pte));
+ pteval = READ_ONCE(pte_val(*ptep));
do {
old_pteval = pteval;
pteval &= ~PTE_S2_RDWR;
pteval |= PTE_S2_RDONLY;
- pteval = cmpxchg_relaxed(&pte_val(*pte), old_pteval, pteval);
+ pteval = cmpxchg_relaxed(&pte_val(*ptep), old_pteval, pteval);
} while (pteval != old_pteval);
}
-static inline bool kvm_s2pte_readonly(pte_t *pte)
+static inline bool kvm_s2pte_readonly(pte_t *ptep)
{
- return (pte_val(*pte) & PTE_S2_RDWR) == PTE_S2_RDONLY;
+ return (READ_ONCE(pte_val(*ptep)) & PTE_S2_RDWR) == PTE_S2_RDONLY;
}
-static inline bool kvm_s2pte_exec(pte_t *pte)
+static inline bool kvm_s2pte_exec(pte_t *ptep)
{
- return !(pte_val(*pte) & PTE_S2_XN);
+ return !(READ_ONCE(pte_val(*ptep)) & PTE_S2_XN);
}
-static inline void kvm_set_s2pmd_readonly(pmd_t *pmd)
+static inline void kvm_set_s2pmd_readonly(pmd_t *pmdp)
{
- kvm_set_s2pte_readonly((pte_t *)pmd);
+ kvm_set_s2pte_readonly((pte_t *)pmdp);
}
-static inline bool kvm_s2pmd_readonly(pmd_t *pmd)
+static inline bool kvm_s2pmd_readonly(pmd_t *pmdp)
{
- return kvm_s2pte_readonly((pte_t *)pmd);
+ return kvm_s2pte_readonly((pte_t *)pmdp);
}
-static inline bool kvm_s2pmd_exec(pmd_t *pmd)
+static inline bool kvm_s2pmd_exec(pmd_t *pmdp)
{
- return !(pmd_val(*pmd) & PMD_S2_XN);
+ return !(READ_ONCE(pmd_val(*pmdp)) & PMD_S2_XN);
}
static inline bool kvm_page_empty(void *ptr)
* Atomically replaces the active TTBR1_EL1 PGD with a new VA-compatible PGD,
* avoiding the possibility of conflicting TLB entries being allocated.
*/
-static inline void cpu_replace_ttbr1(pgd_t *pgd)
+static inline void cpu_replace_ttbr1(pgd_t *pgdp)
{
typedef void (ttbr_replace_func)(phys_addr_t);
extern ttbr_replace_func idmap_cpu_replace_ttbr1;
ttbr_replace_func *replace_phys;
- phys_addr_t pgd_phys = virt_to_phys(pgd);
+ phys_addr_t pgd_phys = virt_to_phys(pgdp);
replace_phys = (void *)__pa_symbol(idmap_cpu_replace_ttbr1);
return (pmd_t *)__get_free_page(PGALLOC_GFP);
}
-static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
+static inline void pmd_free(struct mm_struct *mm, pmd_t *pmdp)
{
- BUG_ON((unsigned long)pmd & (PAGE_SIZE-1));
- free_page((unsigned long)pmd);
+ BUG_ON((unsigned long)pmdp & (PAGE_SIZE-1));
+ free_page((unsigned long)pmdp);
}
-static inline void __pud_populate(pud_t *pud, phys_addr_t pmd, pudval_t prot)
+static inline void __pud_populate(pud_t *pudp, phys_addr_t pmdp, pudval_t prot)
{
- set_pud(pud, __pud(__phys_to_pud_val(pmd) | prot));
+ set_pud(pudp, __pud(__phys_to_pud_val(pmdp) | prot));
}
-static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
+static inline void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmdp)
{
- __pud_populate(pud, __pa(pmd), PMD_TYPE_TABLE);
+ __pud_populate(pudp, __pa(pmdp), PMD_TYPE_TABLE);
}
#else
-static inline void __pud_populate(pud_t *pud, phys_addr_t pmd, pudval_t prot)
+static inline void __pud_populate(pud_t *pudp, phys_addr_t pmdp, pudval_t prot)
{
BUILD_BUG();
}
return (pud_t *)__get_free_page(PGALLOC_GFP);
}
-static inline void pud_free(struct mm_struct *mm, pud_t *pud)
+static inline void pud_free(struct mm_struct *mm, pud_t *pudp)
{
- BUG_ON((unsigned long)pud & (PAGE_SIZE-1));
- free_page((unsigned long)pud);
+ BUG_ON((unsigned long)pudp & (PAGE_SIZE-1));
+ free_page((unsigned long)pudp);
}
-static inline void __pgd_populate(pgd_t *pgdp, phys_addr_t pud, pgdval_t prot)
+static inline void __pgd_populate(pgd_t *pgdp, phys_addr_t pudp, pgdval_t prot)
{
- set_pgd(pgdp, __pgd(__phys_to_pgd_val(pud) | prot));
+ set_pgd(pgdp, __pgd(__phys_to_pgd_val(pudp) | prot));
}
-static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud)
+static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgdp, pud_t *pudp)
{
- __pgd_populate(pgd, __pa(pud), PUD_TYPE_TABLE);
+ __pgd_populate(pgdp, __pa(pudp), PUD_TYPE_TABLE);
}
#else
-static inline void __pgd_populate(pgd_t *pgdp, phys_addr_t pud, pgdval_t prot)
+static inline void __pgd_populate(pgd_t *pgdp, phys_addr_t pudp, pgdval_t prot)
{
BUILD_BUG();
}
#endif /* CONFIG_PGTABLE_LEVELS > 3 */
extern pgd_t *pgd_alloc(struct mm_struct *mm);
-extern void pgd_free(struct mm_struct *mm, pgd_t *pgd);
+extern void pgd_free(struct mm_struct *mm, pgd_t *pgdp);
static inline pte_t *
pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr)
/*
* Free a PTE table.
*/
-static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
+static inline void pte_free_kernel(struct mm_struct *mm, pte_t *ptep)
{
- if (pte)
- free_page((unsigned long)pte);
+ if (ptep)
+ free_page((unsigned long)ptep);
}
static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
__free_page(pte);
}
-static inline void __pmd_populate(pmd_t *pmdp, phys_addr_t pte,
+static inline void __pmd_populate(pmd_t *pmdp, phys_addr_t ptep,
pmdval_t prot)
{
- set_pmd(pmdp, __pmd(__phys_to_pmd_val(pte) | prot));
+ set_pmd(pmdp, __pmd(__phys_to_pmd_val(ptep) | prot));
}
/*
static inline void set_pte(pte_t *ptep, pte_t pte)
{
- *ptep = pte;
+ WRITE_ONCE(*ptep, pte);
/*
* Only if the new pte is valid and kernel, otherwise TLB maintenance
static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, pte_t pte)
{
+ pte_t old_pte;
+
if (pte_present(pte) && pte_user_exec(pte) && !pte_special(pte))
__sync_icache_dcache(pte, addr);
* hardware updates of the pte (ptep_set_access_flags safely changes
* valid ptes without going through an invalid entry).
*/
- if (IS_ENABLED(CONFIG_DEBUG_VM) && pte_valid(*ptep) && pte_valid(pte) &&
+ old_pte = READ_ONCE(*ptep);
+ if (IS_ENABLED(CONFIG_DEBUG_VM) && pte_valid(old_pte) && pte_valid(pte) &&
(mm == current->active_mm || atomic_read(&mm->mm_users) > 1)) {
VM_WARN_ONCE(!pte_young(pte),
"%s: racy access flag clearing: 0x%016llx -> 0x%016llx",
- __func__, pte_val(*ptep), pte_val(pte));
- VM_WARN_ONCE(pte_write(*ptep) && !pte_dirty(pte),
+ __func__, pte_val(old_pte), pte_val(pte));
+ VM_WARN_ONCE(pte_write(old_pte) && !pte_dirty(pte),
"%s: racy dirty state clearing: 0x%016llx -> 0x%016llx",
- __func__, pte_val(*ptep), pte_val(pte));
+ __func__, pte_val(old_pte), pte_val(pte));
}
set_pte(ptep, pte);
static inline void set_pmd(pmd_t *pmdp, pmd_t pmd)
{
- *pmdp = pmd;
+ WRITE_ONCE(*pmdp, pmd);
dsb(ishst);
isb();
}
static inline void set_pud(pud_t *pudp, pud_t pud)
{
- *pudp = pud;
+ WRITE_ONCE(*pudp, pud);
dsb(ishst);
isb();
}
/* Find an entry in the second-level page table. */
#define pmd_index(addr) (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1))
-#define pmd_offset_phys(dir, addr) (pud_page_paddr(*(dir)) + pmd_index(addr) * sizeof(pmd_t))
+#define pmd_offset_phys(dir, addr) (pud_page_paddr(READ_ONCE(*(dir))) + pmd_index(addr) * sizeof(pmd_t))
#define pmd_offset(dir, addr) ((pmd_t *)__va(pmd_offset_phys((dir), (addr))))
#define pmd_set_fixmap(addr) ((pmd_t *)set_fixmap_offset(FIX_PMD, addr))
static inline void set_pgd(pgd_t *pgdp, pgd_t pgd)
{
- *pgdp = pgd;
+ WRITE_ONCE(*pgdp, pgd);
dsb(ishst);
}
/* Find an entry in the frst-level page table. */
#define pud_index(addr) (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1))
-#define pud_offset_phys(dir, addr) (pgd_page_paddr(*(dir)) + pud_index(addr) * sizeof(pud_t))
+#define pud_offset_phys(dir, addr) (pgd_page_paddr(READ_ONCE(*(dir))) + pud_index(addr) * sizeof(pud_t))
#define pud_offset(dir, addr) ((pud_t *)__va(pud_offset_phys((dir), (addr))))
#define pud_set_fixmap(addr) ((pud_t *)set_fixmap_offset(FIX_PUD, addr))
unsigned long fp;
unsigned long pc;
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
- unsigned int graph;
+ int graph;
#endif
};
* This is equivalent to the following test:
* (u65)addr + (u65)size <= (u65)current->addr_limit + 1
*/
-static inline unsigned long __range_ok(unsigned long addr, unsigned long size)
+static inline unsigned long __range_ok(const void __user *addr, unsigned long size)
{
- unsigned long limit = current_thread_info()->addr_limit;
+ unsigned long ret, limit = current_thread_info()->addr_limit;
__chk_user_ptr(addr);
asm volatile(
// A + B <= C + 1 for all A,B,C, in four easy steps:
// 1: X = A + B; X' = X % 2^64
- " adds %0, %0, %2\n"
+ " adds %0, %3, %2\n"
// 2: Set C = 0 if X > 2^64, to guarantee X' > C in step 4
" csel %1, xzr, %1, hi\n"
// 3: Set X' = ~0 if X >= 2^64. For X == 2^64, this decrements X'
// testing X' - C == 0, subject to the previous adjustments.
" sbcs xzr, %0, %1\n"
" cset %0, ls\n"
- : "+r" (addr), "+r" (limit) : "Ir" (size) : "cc");
+ : "=&r" (ret), "+r" (limit) : "Ir" (size), "0" (addr) : "cc");
- return addr;
+ return ret;
}
/*
*/
#define untagged_addr(addr) sign_extend64(addr, 55)
-#define access_ok(type, addr, size) __range_ok((unsigned long)(addr), size)
+#define access_ok(type, addr, size) __range_ok(addr, size)
#define user_addr_max get_fs
#define _ASM_EXTABLE(from, to) \
static int swp_handler(struct pt_regs *regs, u32 instr)
{
u32 destreg, data, type, address = 0;
+ const void __user *user_ptr;
int rn, rt2, res = 0;
perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, regs->pc);
aarch32_insn_extract_reg_num(instr, A32_RT2_OFFSET), data);
/* Check access in reasonable access range for both SWP and SWPB */
- if (!access_ok(VERIFY_WRITE, (address & ~3), 4)) {
+ user_ptr = (const void __user *)(unsigned long)(address & ~3);
+ if (!access_ok(VERIFY_WRITE, user_ptr, 4)) {
pr_debug("SWP{B} emulation: access to 0x%08x not allowed!\n",
address);
goto fault;
.capability = ARM64_HARDEN_BP_POST_GUEST_EXIT,
MIDR_ALL_VERSIONS(MIDR_QCOM_FALKOR_V1),
},
+ {
+ .capability = ARM64_HARDEN_BRANCH_PREDICTOR,
+ MIDR_ALL_VERSIONS(MIDR_QCOM_FALKOR),
+ .enable = qcom_enable_link_stack_sanitization,
+ },
+ {
+ .capability = ARM64_HARDEN_BP_POST_GUEST_EXIT,
+ MIDR_ALL_VERSIONS(MIDR_QCOM_FALKOR),
+ },
{
.capability = ARM64_HARDEN_BRANCH_PREDICTOR,
MIDR_ALL_VERSIONS(MIDR_BRCM_VULCAN),
};
static const struct arm64_ftr_bits ftr_ctr[] = {
- ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, 31, 1, 1), /* RAO */
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, 31, 1, 1), /* RES1 */
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, 29, 1, 1), /* DIC */
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, 28, 1, 1), /* IDC */
ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_HIGHER_SAFE, 24, 4, 0), /* CWG */
- ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, 20, 4, 0), /* ERG */
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_HIGHER_SAFE, 20, 4, 0), /* ERG */
ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, 16, 4, 1), /* DminLine */
/*
* Linux can handle differing I-cache policies. Userspace JITs will
unsigned long addr, void *data)
{
efi_memory_desc_t *md = data;
- pte_t pte = *ptep;
+ pte_t pte = READ_ONCE(*ptep);
if (md->attribute & EFI_MEMORY_RO)
pte = set_pte_bit(pte, __pgprot(PTE_RDONLY));
gfp_t mask)
{
int rc = 0;
- pgd_t *pgd;
- pud_t *pud;
- pmd_t *pmd;
- pte_t *pte;
+ pgd_t *pgdp;
+ pud_t *pudp;
+ pmd_t *pmdp;
+ pte_t *ptep;
unsigned long dst = (unsigned long)allocator(mask);
if (!dst) {
memcpy((void *)dst, src_start, length);
flush_icache_range(dst, dst + length);
- pgd = pgd_offset_raw(allocator(mask), dst_addr);
- if (pgd_none(*pgd)) {
- pud = allocator(mask);
- if (!pud) {
+ pgdp = pgd_offset_raw(allocator(mask), dst_addr);
+ if (pgd_none(READ_ONCE(*pgdp))) {
+ pudp = allocator(mask);
+ if (!pudp) {
rc = -ENOMEM;
goto out;
}
- pgd_populate(&init_mm, pgd, pud);
+ pgd_populate(&init_mm, pgdp, pudp);
}
- pud = pud_offset(pgd, dst_addr);
- if (pud_none(*pud)) {
- pmd = allocator(mask);
- if (!pmd) {
+ pudp = pud_offset(pgdp, dst_addr);
+ if (pud_none(READ_ONCE(*pudp))) {
+ pmdp = allocator(mask);
+ if (!pmdp) {
rc = -ENOMEM;
goto out;
}
- pud_populate(&init_mm, pud, pmd);
+ pud_populate(&init_mm, pudp, pmdp);
}
- pmd = pmd_offset(pud, dst_addr);
- if (pmd_none(*pmd)) {
- pte = allocator(mask);
- if (!pte) {
+ pmdp = pmd_offset(pudp, dst_addr);
+ if (pmd_none(READ_ONCE(*pmdp))) {
+ ptep = allocator(mask);
+ if (!ptep) {
rc = -ENOMEM;
goto out;
}
- pmd_populate_kernel(&init_mm, pmd, pte);
+ pmd_populate_kernel(&init_mm, pmdp, ptep);
}
- pte = pte_offset_kernel(pmd, dst_addr);
- set_pte(pte, pfn_pte(virt_to_pfn(dst), PAGE_KERNEL_EXEC));
+ ptep = pte_offset_kernel(pmdp, dst_addr);
+ set_pte(ptep, pfn_pte(virt_to_pfn(dst), PAGE_KERNEL_EXEC));
/*
* Load our new page tables. A strict BBM approach requires that we
*/
cpu_set_reserved_ttbr0();
local_flush_tlb_all();
- write_sysreg(phys_to_ttbr(virt_to_phys(pgd)), ttbr0_el1);
+ write_sysreg(phys_to_ttbr(virt_to_phys(pgdp)), ttbr0_el1);
isb();
*phys_dst_addr = virt_to_phys((void *)dst);
return ret;
}
-static void _copy_pte(pte_t *dst_pte, pte_t *src_pte, unsigned long addr)
+static void _copy_pte(pte_t *dst_ptep, pte_t *src_ptep, unsigned long addr)
{
- pte_t pte = *src_pte;
+ pte_t pte = READ_ONCE(*src_ptep);
if (pte_valid(pte)) {
/*
* read only (code, rodata). Clear the RDONLY bit from
* the temporary mappings we use during restore.
*/
- set_pte(dst_pte, pte_mkwrite(pte));
+ set_pte(dst_ptep, pte_mkwrite(pte));
} else if (debug_pagealloc_enabled() && !pte_none(pte)) {
/*
* debug_pagealloc will removed the PTE_VALID bit if
*/
BUG_ON(!pfn_valid(pte_pfn(pte)));
- set_pte(dst_pte, pte_mkpresent(pte_mkwrite(pte)));
+ set_pte(dst_ptep, pte_mkpresent(pte_mkwrite(pte)));
}
}
-static int copy_pte(pmd_t *dst_pmd, pmd_t *src_pmd, unsigned long start,
+static int copy_pte(pmd_t *dst_pmdp, pmd_t *src_pmdp, unsigned long start,
unsigned long end)
{
- pte_t *src_pte;
- pte_t *dst_pte;
+ pte_t *src_ptep;
+ pte_t *dst_ptep;
unsigned long addr = start;
- dst_pte = (pte_t *)get_safe_page(GFP_ATOMIC);
- if (!dst_pte)
+ dst_ptep = (pte_t *)get_safe_page(GFP_ATOMIC);
+ if (!dst_ptep)
return -ENOMEM;
- pmd_populate_kernel(&init_mm, dst_pmd, dst_pte);
- dst_pte = pte_offset_kernel(dst_pmd, start);
+ pmd_populate_kernel(&init_mm, dst_pmdp, dst_ptep);
+ dst_ptep = pte_offset_kernel(dst_pmdp, start);
- src_pte = pte_offset_kernel(src_pmd, start);
+ src_ptep = pte_offset_kernel(src_pmdp, start);
do {
- _copy_pte(dst_pte, src_pte, addr);
- } while (dst_pte++, src_pte++, addr += PAGE_SIZE, addr != end);
+ _copy_pte(dst_ptep, src_ptep, addr);
+ } while (dst_ptep++, src_ptep++, addr += PAGE_SIZE, addr != end);
return 0;
}
-static int copy_pmd(pud_t *dst_pud, pud_t *src_pud, unsigned long start,
+static int copy_pmd(pud_t *dst_pudp, pud_t *src_pudp, unsigned long start,
unsigned long end)
{
- pmd_t *src_pmd;
- pmd_t *dst_pmd;
+ pmd_t *src_pmdp;
+ pmd_t *dst_pmdp;
unsigned long next;
unsigned long addr = start;
- if (pud_none(*dst_pud)) {
- dst_pmd = (pmd_t *)get_safe_page(GFP_ATOMIC);
- if (!dst_pmd)
+ if (pud_none(READ_ONCE(*dst_pudp))) {
+ dst_pmdp = (pmd_t *)get_safe_page(GFP_ATOMIC);
+ if (!dst_pmdp)
return -ENOMEM;
- pud_populate(&init_mm, dst_pud, dst_pmd);
+ pud_populate(&init_mm, dst_pudp, dst_pmdp);
}
- dst_pmd = pmd_offset(dst_pud, start);
+ dst_pmdp = pmd_offset(dst_pudp, start);
- src_pmd = pmd_offset(src_pud, start);
+ src_pmdp = pmd_offset(src_pudp, start);
do {
+ pmd_t pmd = READ_ONCE(*src_pmdp);
+
next = pmd_addr_end(addr, end);
- if (pmd_none(*src_pmd))
+ if (pmd_none(pmd))
continue;
- if (pmd_table(*src_pmd)) {
- if (copy_pte(dst_pmd, src_pmd, addr, next))
+ if (pmd_table(pmd)) {
+ if (copy_pte(dst_pmdp, src_pmdp, addr, next))
return -ENOMEM;
} else {
- set_pmd(dst_pmd,
- __pmd(pmd_val(*src_pmd) & ~PMD_SECT_RDONLY));
+ set_pmd(dst_pmdp,
+ __pmd(pmd_val(pmd) & ~PMD_SECT_RDONLY));
}
- } while (dst_pmd++, src_pmd++, addr = next, addr != end);
+ } while (dst_pmdp++, src_pmdp++, addr = next, addr != end);
return 0;
}
-static int copy_pud(pgd_t *dst_pgd, pgd_t *src_pgd, unsigned long start,
+static int copy_pud(pgd_t *dst_pgdp, pgd_t *src_pgdp, unsigned long start,
unsigned long end)
{
- pud_t *dst_pud;
- pud_t *src_pud;
+ pud_t *dst_pudp;
+ pud_t *src_pudp;
unsigned long next;
unsigned long addr = start;
- if (pgd_none(*dst_pgd)) {
- dst_pud = (pud_t *)get_safe_page(GFP_ATOMIC);
- if (!dst_pud)
+ if (pgd_none(READ_ONCE(*dst_pgdp))) {
+ dst_pudp = (pud_t *)get_safe_page(GFP_ATOMIC);
+ if (!dst_pudp)
return -ENOMEM;
- pgd_populate(&init_mm, dst_pgd, dst_pud);
+ pgd_populate(&init_mm, dst_pgdp, dst_pudp);
}
- dst_pud = pud_offset(dst_pgd, start);
+ dst_pudp = pud_offset(dst_pgdp, start);
- src_pud = pud_offset(src_pgd, start);
+ src_pudp = pud_offset(src_pgdp, start);
do {
+ pud_t pud = READ_ONCE(*src_pudp);
+
next = pud_addr_end(addr, end);
- if (pud_none(*src_pud))
+ if (pud_none(pud))
continue;
- if (pud_table(*(src_pud))) {
- if (copy_pmd(dst_pud, src_pud, addr, next))
+ if (pud_table(pud)) {
+ if (copy_pmd(dst_pudp, src_pudp, addr, next))
return -ENOMEM;
} else {
- set_pud(dst_pud,
- __pud(pud_val(*src_pud) & ~PMD_SECT_RDONLY));
+ set_pud(dst_pudp,
+ __pud(pud_val(pud) & ~PMD_SECT_RDONLY));
}
- } while (dst_pud++, src_pud++, addr = next, addr != end);
+ } while (dst_pudp++, src_pudp++, addr = next, addr != end);
return 0;
}
-static int copy_page_tables(pgd_t *dst_pgd, unsigned long start,
+static int copy_page_tables(pgd_t *dst_pgdp, unsigned long start,
unsigned long end)
{
unsigned long next;
unsigned long addr = start;
- pgd_t *src_pgd = pgd_offset_k(start);
+ pgd_t *src_pgdp = pgd_offset_k(start);
- dst_pgd = pgd_offset_raw(dst_pgd, start);
+ dst_pgdp = pgd_offset_raw(dst_pgdp, start);
do {
next = pgd_addr_end(addr, end);
- if (pgd_none(*src_pgd))
+ if (pgd_none(READ_ONCE(*src_pgdp)))
continue;
- if (copy_pud(dst_pgd, src_pgd, addr, next))
+ if (copy_pud(dst_pgdp, src_pgdp, addr, next))
return -ENOMEM;
- } while (dst_pgd++, src_pgd++, addr = next, addr != end);
+ } while (dst_pgdp++, src_pgdp++, addr = next, addr != end);
return 0;
}
int pmuver;
dfr0 = read_sysreg(id_aa64dfr0_el1);
- pmuver = cpuid_feature_extract_signed_field(dfr0,
+ pmuver = cpuid_feature_extract_unsigned_field(dfr0,
ID_AA64DFR0_PMUVER_SHIFT);
- if (pmuver < 1)
+ if (pmuver == 0xf || pmuver == 0)
return;
probe->present = true;
show_regs_print_info(KERN_DEFAULT);
print_pstate(regs);
- printk("pc : %pS\n", (void *)regs->pc);
- printk("lr : %pS\n", (void *)lr);
+
+ if (!user_mode(regs)) {
+ printk("pc : %pS\n", (void *)regs->pc);
+ printk("lr : %pS\n", (void *)lr);
+ } else {
+ printk("pc : %016llx\n", regs->pc);
+ printk("lr : %016llx\n", lr);
+ }
+
printk("sp : %016llx\n", sp);
i = top_reg;
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
if (tsk->ret_stack &&
(frame->pc == (unsigned long)return_to_handler)) {
+ if (WARN_ON_ONCE(frame->graph == -1))
+ return -EINVAL;
+ if (frame->graph < -1)
+ frame->graph += FTRACE_NOTRACE_DEPTH;
+
/*
* This is a case where function graph tracer has
* modified a return address (LR) in a stack frame
if (end < start || flags)
return -EINVAL;
- if (!access_ok(VERIFY_READ, start, end - start))
+ if (!access_ok(VERIFY_READ, (const void __user *)start, end - start))
return -EFAULT;
return __do_compat_cache_op(start, end);
frame.fp = regs->regs[29];
frame.pc = regs->pc;
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
- frame.graph = -1; /* no task info */
+ frame.graph = current->curr_ret_stack;
#endif
do {
int ret = unwind_frame(NULL, &frame);
"Error"
};
-int show_unhandled_signals = 1;
+int show_unhandled_signals = 0;
static void dump_backtrace_entry(unsigned long where)
{
}
#endif
- if (show_unhandled_signals_ratelimited()) {
- pr_info("%s[%d]: syscall %d\n", current->comm,
- task_pid_nr(current), regs->syscallno);
- dump_instr("", regs);
- if (user_mode(regs))
- __show_regs(regs);
- }
-
return sys_ni_syscall();
}
u32 midr = read_cpuid_id();
/* Apply BTAC predictors mitigation to all Falkor chips */
- if ((midr & MIDR_CPU_MODEL_MASK) == MIDR_QCOM_FALKOR_V1)
+ if (((midr & MIDR_CPU_MODEL_MASK) == MIDR_QCOM_FALKOR) ||
+ ((midr & MIDR_CPU_MODEL_MASK) == MIDR_QCOM_FALKOR_V1)) {
__qcom_hyp_sanitize_btac_predictors();
+ }
}
fp_enabled = __fpsimd_enabled();
}
-static void walk_pte(struct pg_state *st, pmd_t *pmd, unsigned long start)
+static void walk_pte(struct pg_state *st, pmd_t *pmdp, unsigned long start)
{
- pte_t *pte = pte_offset_kernel(pmd, 0UL);
+ pte_t *ptep = pte_offset_kernel(pmdp, 0UL);
unsigned long addr;
unsigned i;
- for (i = 0; i < PTRS_PER_PTE; i++, pte++) {
+ for (i = 0; i < PTRS_PER_PTE; i++, ptep++) {
addr = start + i * PAGE_SIZE;
- note_page(st, addr, 4, pte_val(*pte));
+ note_page(st, addr, 4, READ_ONCE(pte_val(*ptep)));
}
}
-static void walk_pmd(struct pg_state *st, pud_t *pud, unsigned long start)
+static void walk_pmd(struct pg_state *st, pud_t *pudp, unsigned long start)
{
- pmd_t *pmd = pmd_offset(pud, 0UL);
+ pmd_t *pmdp = pmd_offset(pudp, 0UL);
unsigned long addr;
unsigned i;
- for (i = 0; i < PTRS_PER_PMD; i++, pmd++) {
+ for (i = 0; i < PTRS_PER_PMD; i++, pmdp++) {
+ pmd_t pmd = READ_ONCE(*pmdp);
+
addr = start + i * PMD_SIZE;
- if (pmd_none(*pmd) || pmd_sect(*pmd)) {
- note_page(st, addr, 3, pmd_val(*pmd));
+ if (pmd_none(pmd) || pmd_sect(pmd)) {
+ note_page(st, addr, 3, pmd_val(pmd));
} else {
- BUG_ON(pmd_bad(*pmd));
- walk_pte(st, pmd, addr);
+ BUG_ON(pmd_bad(pmd));
+ walk_pte(st, pmdp, addr);
}
}
}
-static void walk_pud(struct pg_state *st, pgd_t *pgd, unsigned long start)
+static void walk_pud(struct pg_state *st, pgd_t *pgdp, unsigned long start)
{
- pud_t *pud = pud_offset(pgd, 0UL);
+ pud_t *pudp = pud_offset(pgdp, 0UL);
unsigned long addr;
unsigned i;
- for (i = 0; i < PTRS_PER_PUD; i++, pud++) {
+ for (i = 0; i < PTRS_PER_PUD; i++, pudp++) {
+ pud_t pud = READ_ONCE(*pudp);
+
addr = start + i * PUD_SIZE;
- if (pud_none(*pud) || pud_sect(*pud)) {
- note_page(st, addr, 2, pud_val(*pud));
+ if (pud_none(pud) || pud_sect(pud)) {
+ note_page(st, addr, 2, pud_val(pud));
} else {
- BUG_ON(pud_bad(*pud));
- walk_pmd(st, pud, addr);
+ BUG_ON(pud_bad(pud));
+ walk_pmd(st, pudp, addr);
}
}
}
static void walk_pgd(struct pg_state *st, struct mm_struct *mm,
unsigned long start)
{
- pgd_t *pgd = pgd_offset(mm, 0UL);
+ pgd_t *pgdp = pgd_offset(mm, 0UL);
unsigned i;
unsigned long addr;
- for (i = 0; i < PTRS_PER_PGD; i++, pgd++) {
+ for (i = 0; i < PTRS_PER_PGD; i++, pgdp++) {
+ pgd_t pgd = READ_ONCE(*pgdp);
+
addr = start + i * PGDIR_SIZE;
- if (pgd_none(*pgd)) {
- note_page(st, addr, 1, pgd_val(*pgd));
+ if (pgd_none(pgd)) {
+ note_page(st, addr, 1, pgd_val(pgd));
} else {
- BUG_ON(pgd_bad(*pgd));
- walk_pud(st, pgd, addr);
+ BUG_ON(pgd_bad(pgd));
+ walk_pud(st, pgdp, addr);
}
}
}
void show_pte(unsigned long addr)
{
struct mm_struct *mm;
- pgd_t *pgd;
+ pgd_t *pgdp;
+ pgd_t pgd;
if (addr < TASK_SIZE) {
/* TTBR0 */
return;
}
- pr_alert("%s pgtable: %luk pages, %u-bit VAs, pgd = %p\n",
+ pr_alert("%s pgtable: %luk pages, %u-bit VAs, pgdp = %p\n",
mm == &init_mm ? "swapper" : "user", PAGE_SIZE / SZ_1K,
VA_BITS, mm->pgd);
- pgd = pgd_offset(mm, addr);
- pr_alert("[%016lx] *pgd=%016llx", addr, pgd_val(*pgd));
+ pgdp = pgd_offset(mm, addr);
+ pgd = READ_ONCE(*pgdp);
+ pr_alert("[%016lx] pgd=%016llx", addr, pgd_val(pgd));
do {
- pud_t *pud;
- pmd_t *pmd;
- pte_t *pte;
+ pud_t *pudp, pud;
+ pmd_t *pmdp, pmd;
+ pte_t *ptep, pte;
- if (pgd_none(*pgd) || pgd_bad(*pgd))
+ if (pgd_none(pgd) || pgd_bad(pgd))
break;
- pud = pud_offset(pgd, addr);
- pr_cont(", *pud=%016llx", pud_val(*pud));
- if (pud_none(*pud) || pud_bad(*pud))
+ pudp = pud_offset(pgdp, addr);
+ pud = READ_ONCE(*pudp);
+ pr_cont(", pud=%016llx", pud_val(pud));
+ if (pud_none(pud) || pud_bad(pud))
break;
- pmd = pmd_offset(pud, addr);
- pr_cont(", *pmd=%016llx", pmd_val(*pmd));
- if (pmd_none(*pmd) || pmd_bad(*pmd))
+ pmdp = pmd_offset(pudp, addr);
+ pmd = READ_ONCE(*pmdp);
+ pr_cont(", pmd=%016llx", pmd_val(pmd));
+ if (pmd_none(pmd) || pmd_bad(pmd))
break;
- pte = pte_offset_map(pmd, addr);
- pr_cont(", *pte=%016llx", pte_val(*pte));
- pte_unmap(pte);
+ ptep = pte_offset_map(pmdp, addr);
+ pte = READ_ONCE(*ptep);
+ pr_cont(", pte=%016llx", pte_val(pte));
+ pte_unmap(ptep);
} while(0);
pr_cont("\n");
pte_t entry, int dirty)
{
pteval_t old_pteval, pteval;
+ pte_t pte = READ_ONCE(*ptep);
- if (pte_same(*ptep, entry))
+ if (pte_same(pte, entry))
return 0;
/* only preserve the access flags and write permission */
* (calculated as: a & b == ~(~a | ~b)).
*/
pte_val(entry) ^= PTE_RDONLY;
- pteval = READ_ONCE(pte_val(*ptep));
+ pteval = pte_val(pte);
do {
old_pteval = pteval;
pteval ^= PTE_RDONLY;
static int find_num_contig(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, size_t *pgsize)
{
- pgd_t *pgd = pgd_offset(mm, addr);
- pud_t *pud;
- pmd_t *pmd;
+ pgd_t *pgdp = pgd_offset(mm, addr);
+ pud_t *pudp;
+ pmd_t *pmdp;
*pgsize = PAGE_SIZE;
- pud = pud_offset(pgd, addr);
- pmd = pmd_offset(pud, addr);
- if ((pte_t *)pmd == ptep) {
+ pudp = pud_offset(pgdp, addr);
+ pmdp = pmd_offset(pudp, addr);
+ if ((pte_t *)pmdp == ptep) {
*pgsize = PMD_SIZE;
return CONT_PMDS;
}
clear_flush(mm, addr, ptep, pgsize, ncontig);
- for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn) {
- pr_debug("%s: set pte %p to 0x%llx\n", __func__, ptep,
- pte_val(pfn_pte(pfn, hugeprot)));
+ for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn)
set_pte_at(mm, addr, ptep, pfn_pte(pfn, hugeprot));
- }
}
void set_huge_swap_pte_at(struct mm_struct *mm, unsigned long addr,
pte_t *huge_pte_alloc(struct mm_struct *mm,
unsigned long addr, unsigned long sz)
{
- pgd_t *pgd;
- pud_t *pud;
- pte_t *pte = NULL;
-
- pr_debug("%s: addr:0x%lx sz:0x%lx\n", __func__, addr, sz);
- pgd = pgd_offset(mm, addr);
- pud = pud_alloc(mm, pgd, addr);
- if (!pud)
+ pgd_t *pgdp;
+ pud_t *pudp;
+ pmd_t *pmdp;
+ pte_t *ptep = NULL;
+
+ pgdp = pgd_offset(mm, addr);
+ pudp = pud_alloc(mm, pgdp, addr);
+ if (!pudp)
return NULL;
if (sz == PUD_SIZE) {
- pte = (pte_t *)pud;
+ ptep = (pte_t *)pudp;
} else if (sz == (PAGE_SIZE * CONT_PTES)) {
- pmd_t *pmd = pmd_alloc(mm, pud, addr);
+ pmdp = pmd_alloc(mm, pudp, addr);
WARN_ON(addr & (sz - 1));
/*
* will be no pte_unmap() to correspond with this
* pte_alloc_map().
*/
- pte = pte_alloc_map(mm, pmd, addr);
+ ptep = pte_alloc_map(mm, pmdp, addr);
} else if (sz == PMD_SIZE) {
if (IS_ENABLED(CONFIG_ARCH_WANT_HUGE_PMD_SHARE) &&
- pud_none(*pud))
- pte = huge_pmd_share(mm, addr, pud);
+ pud_none(READ_ONCE(*pudp)))
+ ptep = huge_pmd_share(mm, addr, pudp);
else
- pte = (pte_t *)pmd_alloc(mm, pud, addr);
+ ptep = (pte_t *)pmd_alloc(mm, pudp, addr);
} else if (sz == (PMD_SIZE * CONT_PMDS)) {
- pmd_t *pmd;
-
- pmd = pmd_alloc(mm, pud, addr);
+ pmdp = pmd_alloc(mm, pudp, addr);
WARN_ON(addr & (sz - 1));
- return (pte_t *)pmd;
+ return (pte_t *)pmdp;
}
- pr_debug("%s: addr:0x%lx sz:0x%lx ret pte=%p/0x%llx\n", __func__, addr,
- sz, pte, pte_val(*pte));
- return pte;
+ return ptep;
}
pte_t *huge_pte_offset(struct mm_struct *mm,
unsigned long addr, unsigned long sz)
{
- pgd_t *pgd;
- pud_t *pud;
- pmd_t *pmd;
+ pgd_t *pgdp;
+ pud_t *pudp, pud;
+ pmd_t *pmdp, pmd;
- pgd = pgd_offset(mm, addr);
- pr_debug("%s: addr:0x%lx pgd:%p\n", __func__, addr, pgd);
- if (!pgd_present(*pgd))
+ pgdp = pgd_offset(mm, addr);
+ if (!pgd_present(READ_ONCE(*pgdp)))
return NULL;
- pud = pud_offset(pgd, addr);
- if (sz != PUD_SIZE && pud_none(*pud))
+ pudp = pud_offset(pgdp, addr);
+ pud = READ_ONCE(*pudp);
+ if (sz != PUD_SIZE && pud_none(pud))
return NULL;
/* hugepage or swap? */
- if (pud_huge(*pud) || !pud_present(*pud))
- return (pte_t *)pud;
+ if (pud_huge(pud) || !pud_present(pud))
+ return (pte_t *)pudp;
/* table; check the next level */
if (sz == CONT_PMD_SIZE)
addr &= CONT_PMD_MASK;
- pmd = pmd_offset(pud, addr);
+ pmdp = pmd_offset(pudp, addr);
+ pmd = READ_ONCE(*pmdp);
if (!(sz == PMD_SIZE || sz == CONT_PMD_SIZE) &&
- pmd_none(*pmd))
+ pmd_none(pmd))
return NULL;
- if (pmd_huge(*pmd) || !pmd_present(*pmd))
- return (pte_t *)pmd;
+ if (pmd_huge(pmd) || !pmd_present(pmd))
+ return (pte_t *)pmdp;
- if (sz == CONT_PTE_SIZE) {
- pte_t *pte = pte_offset_kernel(pmd, (addr & CONT_PTE_MASK));
- return pte;
- }
+ if (sz == CONT_PTE_SIZE)
+ return pte_offset_kernel(pmdp, (addr & CONT_PTE_MASK));
return NULL;
}
size_t pgsize;
pte_t pte;
- if (!pte_cont(*ptep)) {
+ if (!pte_cont(READ_ONCE(*ptep))) {
ptep_set_wrprotect(mm, addr, ptep);
return;
}
size_t pgsize;
int ncontig;
- if (!pte_cont(*ptep)) {
+ if (!pte_cont(READ_ONCE(*ptep))) {
ptep_clear_flush(vma, addr, ptep);
return;
}
return __pa(p);
}
-static pte_t *__init kasan_pte_offset(pmd_t *pmd, unsigned long addr, int node,
+static pte_t *__init kasan_pte_offset(pmd_t *pmdp, unsigned long addr, int node,
bool early)
{
- if (pmd_none(*pmd)) {
+ if (pmd_none(READ_ONCE(*pmdp))) {
phys_addr_t pte_phys = early ? __pa_symbol(kasan_zero_pte)
: kasan_alloc_zeroed_page(node);
- __pmd_populate(pmd, pte_phys, PMD_TYPE_TABLE);
+ __pmd_populate(pmdp, pte_phys, PMD_TYPE_TABLE);
}
- return early ? pte_offset_kimg(pmd, addr)
- : pte_offset_kernel(pmd, addr);
+ return early ? pte_offset_kimg(pmdp, addr)
+ : pte_offset_kernel(pmdp, addr);
}
-static pmd_t *__init kasan_pmd_offset(pud_t *pud, unsigned long addr, int node,
+static pmd_t *__init kasan_pmd_offset(pud_t *pudp, unsigned long addr, int node,
bool early)
{
- if (pud_none(*pud)) {
+ if (pud_none(READ_ONCE(*pudp))) {
phys_addr_t pmd_phys = early ? __pa_symbol(kasan_zero_pmd)
: kasan_alloc_zeroed_page(node);
- __pud_populate(pud, pmd_phys, PMD_TYPE_TABLE);
+ __pud_populate(pudp, pmd_phys, PMD_TYPE_TABLE);
}
- return early ? pmd_offset_kimg(pud, addr) : pmd_offset(pud, addr);
+ return early ? pmd_offset_kimg(pudp, addr) : pmd_offset(pudp, addr);
}
-static pud_t *__init kasan_pud_offset(pgd_t *pgd, unsigned long addr, int node,
+static pud_t *__init kasan_pud_offset(pgd_t *pgdp, unsigned long addr, int node,
bool early)
{
- if (pgd_none(*pgd)) {
+ if (pgd_none(READ_ONCE(*pgdp))) {
phys_addr_t pud_phys = early ? __pa_symbol(kasan_zero_pud)
: kasan_alloc_zeroed_page(node);
- __pgd_populate(pgd, pud_phys, PMD_TYPE_TABLE);
+ __pgd_populate(pgdp, pud_phys, PMD_TYPE_TABLE);
}
- return early ? pud_offset_kimg(pgd, addr) : pud_offset(pgd, addr);
+ return early ? pud_offset_kimg(pgdp, addr) : pud_offset(pgdp, addr);
}
-static void __init kasan_pte_populate(pmd_t *pmd, unsigned long addr,
+static void __init kasan_pte_populate(pmd_t *pmdp, unsigned long addr,
unsigned long end, int node, bool early)
{
unsigned long next;
- pte_t *pte = kasan_pte_offset(pmd, addr, node, early);
+ pte_t *ptep = kasan_pte_offset(pmdp, addr, node, early);
do {
phys_addr_t page_phys = early ? __pa_symbol(kasan_zero_page)
: kasan_alloc_zeroed_page(node);
next = addr + PAGE_SIZE;
- set_pte(pte, pfn_pte(__phys_to_pfn(page_phys), PAGE_KERNEL));
- } while (pte++, addr = next, addr != end && pte_none(*pte));
+ set_pte(ptep, pfn_pte(__phys_to_pfn(page_phys), PAGE_KERNEL));
+ } while (ptep++, addr = next, addr != end && pte_none(READ_ONCE(*ptep)));
}
-static void __init kasan_pmd_populate(pud_t *pud, unsigned long addr,
+static void __init kasan_pmd_populate(pud_t *pudp, unsigned long addr,
unsigned long end, int node, bool early)
{
unsigned long next;
- pmd_t *pmd = kasan_pmd_offset(pud, addr, node, early);
+ pmd_t *pmdp = kasan_pmd_offset(pudp, addr, node, early);
do {
next = pmd_addr_end(addr, end);
- kasan_pte_populate(pmd, addr, next, node, early);
- } while (pmd++, addr = next, addr != end && pmd_none(*pmd));
+ kasan_pte_populate(pmdp, addr, next, node, early);
+ } while (pmdp++, addr = next, addr != end && pmd_none(READ_ONCE(*pmdp)));
}
-static void __init kasan_pud_populate(pgd_t *pgd, unsigned long addr,
+static void __init kasan_pud_populate(pgd_t *pgdp, unsigned long addr,
unsigned long end, int node, bool early)
{
unsigned long next;
- pud_t *pud = kasan_pud_offset(pgd, addr, node, early);
+ pud_t *pudp = kasan_pud_offset(pgdp, addr, node, early);
do {
next = pud_addr_end(addr, end);
- kasan_pmd_populate(pud, addr, next, node, early);
- } while (pud++, addr = next, addr != end && pud_none(*pud));
+ kasan_pmd_populate(pudp, addr, next, node, early);
+ } while (pudp++, addr = next, addr != end && pud_none(READ_ONCE(*pudp)));
}
static void __init kasan_pgd_populate(unsigned long addr, unsigned long end,
int node, bool early)
{
unsigned long next;
- pgd_t *pgd;
+ pgd_t *pgdp;
- pgd = pgd_offset_k(addr);
+ pgdp = pgd_offset_k(addr);
do {
next = pgd_addr_end(addr, end);
- kasan_pud_populate(pgd, addr, next, node, early);
- } while (pgd++, addr = next, addr != end);
+ kasan_pud_populate(pgdp, addr, next, node, early);
+ } while (pgdp++, addr = next, addr != end);
}
/* The early shadow maps everything to a single page of zeroes */
*/
void __init kasan_copy_shadow(pgd_t *pgdir)
{
- pgd_t *pgd, *pgd_new, *pgd_end;
+ pgd_t *pgdp, *pgdp_new, *pgdp_end;
- pgd = pgd_offset_k(KASAN_SHADOW_START);
- pgd_end = pgd_offset_k(KASAN_SHADOW_END);
- pgd_new = pgd_offset_raw(pgdir, KASAN_SHADOW_START);
+ pgdp = pgd_offset_k(KASAN_SHADOW_START);
+ pgdp_end = pgd_offset_k(KASAN_SHADOW_END);
+ pgdp_new = pgd_offset_raw(pgdir, KASAN_SHADOW_START);
do {
- set_pgd(pgd_new, *pgd);
- } while (pgd++, pgd_new++, pgd != pgd_end);
+ set_pgd(pgdp_new, READ_ONCE(*pgdp));
+ } while (pgdp++, pgdp_new++, pgdp != pgdp_end);
}
static void __init clear_pgds(unsigned long start,
return ((old ^ new) & ~mask) == 0;
}
-static void init_pte(pmd_t *pmd, unsigned long addr, unsigned long end,
+static void init_pte(pmd_t *pmdp, unsigned long addr, unsigned long end,
phys_addr_t phys, pgprot_t prot)
{
- pte_t *pte;
+ pte_t *ptep;
- pte = pte_set_fixmap_offset(pmd, addr);
+ ptep = pte_set_fixmap_offset(pmdp, addr);
do {
- pte_t old_pte = *pte;
+ pte_t old_pte = READ_ONCE(*ptep);
- set_pte(pte, pfn_pte(__phys_to_pfn(phys), prot));
+ set_pte(ptep, pfn_pte(__phys_to_pfn(phys), prot));
/*
* After the PTE entry has been populated once, we
* only allow updates to the permission attributes.
*/
- BUG_ON(!pgattr_change_is_safe(pte_val(old_pte), pte_val(*pte)));
+ BUG_ON(!pgattr_change_is_safe(pte_val(old_pte),
+ READ_ONCE(pte_val(*ptep))));
phys += PAGE_SIZE;
- } while (pte++, addr += PAGE_SIZE, addr != end);
+ } while (ptep++, addr += PAGE_SIZE, addr != end);
pte_clear_fixmap();
}
-static void alloc_init_cont_pte(pmd_t *pmd, unsigned long addr,
+static void alloc_init_cont_pte(pmd_t *pmdp, unsigned long addr,
unsigned long end, phys_addr_t phys,
pgprot_t prot,
phys_addr_t (*pgtable_alloc)(void),
int flags)
{
unsigned long next;
+ pmd_t pmd = READ_ONCE(*pmdp);
- BUG_ON(pmd_sect(*pmd));
- if (pmd_none(*pmd)) {
+ BUG_ON(pmd_sect(pmd));
+ if (pmd_none(pmd)) {
phys_addr_t pte_phys;
BUG_ON(!pgtable_alloc);
pte_phys = pgtable_alloc();
- __pmd_populate(pmd, pte_phys, PMD_TYPE_TABLE);
+ __pmd_populate(pmdp, pte_phys, PMD_TYPE_TABLE);
+ pmd = READ_ONCE(*pmdp);
}
- BUG_ON(pmd_bad(*pmd));
+ BUG_ON(pmd_bad(pmd));
do {
pgprot_t __prot = prot;
(flags & NO_CONT_MAPPINGS) == 0)
__prot = __pgprot(pgprot_val(prot) | PTE_CONT);
- init_pte(pmd, addr, next, phys, __prot);
+ init_pte(pmdp, addr, next, phys, __prot);
phys += next - addr;
} while (addr = next, addr != end);
}
-static void init_pmd(pud_t *pud, unsigned long addr, unsigned long end,
+static void init_pmd(pud_t *pudp, unsigned long addr, unsigned long end,
phys_addr_t phys, pgprot_t prot,
phys_addr_t (*pgtable_alloc)(void), int flags)
{
unsigned long next;
- pmd_t *pmd;
+ pmd_t *pmdp;
- pmd = pmd_set_fixmap_offset(pud, addr);
+ pmdp = pmd_set_fixmap_offset(pudp, addr);
do {
- pmd_t old_pmd = *pmd;
+ pmd_t old_pmd = READ_ONCE(*pmdp);
next = pmd_addr_end(addr, end);
/* try section mapping first */
if (((addr | next | phys) & ~SECTION_MASK) == 0 &&
(flags & NO_BLOCK_MAPPINGS) == 0) {
- pmd_set_huge(pmd, phys, prot);
+ pmd_set_huge(pmdp, phys, prot);
/*
* After the PMD entry has been populated once, we
* only allow updates to the permission attributes.
*/
BUG_ON(!pgattr_change_is_safe(pmd_val(old_pmd),
- pmd_val(*pmd)));
+ READ_ONCE(pmd_val(*pmdp))));
} else {
- alloc_init_cont_pte(pmd, addr, next, phys, prot,
+ alloc_init_cont_pte(pmdp, addr, next, phys, prot,
pgtable_alloc, flags);
BUG_ON(pmd_val(old_pmd) != 0 &&
- pmd_val(old_pmd) != pmd_val(*pmd));
+ pmd_val(old_pmd) != READ_ONCE(pmd_val(*pmdp)));
}
phys += next - addr;
- } while (pmd++, addr = next, addr != end);
+ } while (pmdp++, addr = next, addr != end);
pmd_clear_fixmap();
}
-static void alloc_init_cont_pmd(pud_t *pud, unsigned long addr,
+static void alloc_init_cont_pmd(pud_t *pudp, unsigned long addr,
unsigned long end, phys_addr_t phys,
pgprot_t prot,
phys_addr_t (*pgtable_alloc)(void), int flags)
{
unsigned long next;
+ pud_t pud = READ_ONCE(*pudp);
/*
* Check for initial section mappings in the pgd/pud.
*/
- BUG_ON(pud_sect(*pud));
- if (pud_none(*pud)) {
+ BUG_ON(pud_sect(pud));
+ if (pud_none(pud)) {
phys_addr_t pmd_phys;
BUG_ON(!pgtable_alloc);
pmd_phys = pgtable_alloc();
- __pud_populate(pud, pmd_phys, PUD_TYPE_TABLE);
+ __pud_populate(pudp, pmd_phys, PUD_TYPE_TABLE);
+ pud = READ_ONCE(*pudp);
}
- BUG_ON(pud_bad(*pud));
+ BUG_ON(pud_bad(pud));
do {
pgprot_t __prot = prot;
(flags & NO_CONT_MAPPINGS) == 0)
__prot = __pgprot(pgprot_val(prot) | PTE_CONT);
- init_pmd(pud, addr, next, phys, __prot, pgtable_alloc, flags);
+ init_pmd(pudp, addr, next, phys, __prot, pgtable_alloc, flags);
phys += next - addr;
} while (addr = next, addr != end);
return true;
}
-static void alloc_init_pud(pgd_t *pgd, unsigned long addr, unsigned long end,
- phys_addr_t phys, pgprot_t prot,
- phys_addr_t (*pgtable_alloc)(void),
- int flags)
+static void alloc_init_pud(pgd_t *pgdp, unsigned long addr, unsigned long end,
+ phys_addr_t phys, pgprot_t prot,
+ phys_addr_t (*pgtable_alloc)(void),
+ int flags)
{
- pud_t *pud;
unsigned long next;
+ pud_t *pudp;
+ pgd_t pgd = READ_ONCE(*pgdp);
- if (pgd_none(*pgd)) {
+ if (pgd_none(pgd)) {
phys_addr_t pud_phys;
BUG_ON(!pgtable_alloc);
pud_phys = pgtable_alloc();
- __pgd_populate(pgd, pud_phys, PUD_TYPE_TABLE);
+ __pgd_populate(pgdp, pud_phys, PUD_TYPE_TABLE);
+ pgd = READ_ONCE(*pgdp);
}
- BUG_ON(pgd_bad(*pgd));
+ BUG_ON(pgd_bad(pgd));
- pud = pud_set_fixmap_offset(pgd, addr);
+ pudp = pud_set_fixmap_offset(pgdp, addr);
do {
- pud_t old_pud = *pud;
+ pud_t old_pud = READ_ONCE(*pudp);
next = pud_addr_end(addr, end);
*/
if (use_1G_block(addr, next, phys) &&
(flags & NO_BLOCK_MAPPINGS) == 0) {
- pud_set_huge(pud, phys, prot);
+ pud_set_huge(pudp, phys, prot);
/*
* After the PUD entry has been populated once, we
* only allow updates to the permission attributes.
*/
BUG_ON(!pgattr_change_is_safe(pud_val(old_pud),
- pud_val(*pud)));
+ READ_ONCE(pud_val(*pudp))));
} else {
- alloc_init_cont_pmd(pud, addr, next, phys, prot,
+ alloc_init_cont_pmd(pudp, addr, next, phys, prot,
pgtable_alloc, flags);
BUG_ON(pud_val(old_pud) != 0 &&
- pud_val(old_pud) != pud_val(*pud));
+ pud_val(old_pud) != READ_ONCE(pud_val(*pudp)));
}
phys += next - addr;
- } while (pud++, addr = next, addr != end);
+ } while (pudp++, addr = next, addr != end);
pud_clear_fixmap();
}
int flags)
{
unsigned long addr, length, end, next;
- pgd_t *pgd = pgd_offset_raw(pgdir, virt);
+ pgd_t *pgdp = pgd_offset_raw(pgdir, virt);
/*
* If the virtual and physical address don't have the same offset
end = addr + length;
do {
next = pgd_addr_end(addr, end);
- alloc_init_pud(pgd, addr, next, phys, prot, pgtable_alloc,
+ alloc_init_pud(pgdp, addr, next, phys, prot, pgtable_alloc,
flags);
phys += next - addr;
- } while (pgd++, addr = next, addr != end);
+ } while (pgdp++, addr = next, addr != end);
}
static phys_addr_t pgd_pgtable_alloc(void)
flush_tlb_kernel_range(virt, virt + size);
}
-static void __init __map_memblock(pgd_t *pgd, phys_addr_t start,
+static void __init __map_memblock(pgd_t *pgdp, phys_addr_t start,
phys_addr_t end, pgprot_t prot, int flags)
{
- __create_pgd_mapping(pgd, start, __phys_to_virt(start), end - start,
+ __create_pgd_mapping(pgdp, start, __phys_to_virt(start), end - start,
prot, early_pgtable_alloc, flags);
}
PAGE_KERNEL_RO);
}
-static void __init map_mem(pgd_t *pgd)
+static void __init map_mem(pgd_t *pgdp)
{
phys_addr_t kernel_start = __pa_symbol(_text);
phys_addr_t kernel_end = __pa_symbol(__init_begin);
if (memblock_is_nomap(reg))
continue;
- __map_memblock(pgd, start, end, PAGE_KERNEL, flags);
+ __map_memblock(pgdp, start, end, PAGE_KERNEL, flags);
}
/*
* Note that contiguous mappings cannot be remapped in this way,
* so we should avoid them here.
*/
- __map_memblock(pgd, kernel_start, kernel_end,
+ __map_memblock(pgdp, kernel_start, kernel_end,
PAGE_KERNEL, NO_CONT_MAPPINGS);
memblock_clear_nomap(kernel_start, kernel_end - kernel_start);
* through /sys/kernel/kexec_crash_size interface.
*/
if (crashk_res.end) {
- __map_memblock(pgd, crashk_res.start, crashk_res.end + 1,
+ __map_memblock(pgdp, crashk_res.start, crashk_res.end + 1,
PAGE_KERNEL,
NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS);
memblock_clear_nomap(crashk_res.start,
debug_checkwx();
}
-static void __init map_kernel_segment(pgd_t *pgd, void *va_start, void *va_end,
+static void __init map_kernel_segment(pgd_t *pgdp, void *va_start, void *va_end,
pgprot_t prot, struct vm_struct *vma,
int flags, unsigned long vm_flags)
{
BUG_ON(!PAGE_ALIGNED(pa_start));
BUG_ON(!PAGE_ALIGNED(size));
- __create_pgd_mapping(pgd, pa_start, (unsigned long)va_start, size, prot,
+ __create_pgd_mapping(pgdp, pa_start, (unsigned long)va_start, size, prot,
early_pgtable_alloc, flags);
if (!(vm_flags & VM_NO_GUARD))
/*
* Create fine-grained mappings for the kernel.
*/
-static void __init map_kernel(pgd_t *pgd)
+static void __init map_kernel(pgd_t *pgdp)
{
static struct vm_struct vmlinux_text, vmlinux_rodata, vmlinux_inittext,
vmlinux_initdata, vmlinux_data;
* Only rodata will be remapped with different permissions later on,
* all other segments are allowed to use contiguous mappings.
*/
- map_kernel_segment(pgd, _text, _etext, text_prot, &vmlinux_text, 0,
+ map_kernel_segment(pgdp, _text, _etext, text_prot, &vmlinux_text, 0,
VM_NO_GUARD);
- map_kernel_segment(pgd, __start_rodata, __inittext_begin, PAGE_KERNEL,
+ map_kernel_segment(pgdp, __start_rodata, __inittext_begin, PAGE_KERNEL,
&vmlinux_rodata, NO_CONT_MAPPINGS, VM_NO_GUARD);
- map_kernel_segment(pgd, __inittext_begin, __inittext_end, text_prot,
+ map_kernel_segment(pgdp, __inittext_begin, __inittext_end, text_prot,
&vmlinux_inittext, 0, VM_NO_GUARD);
- map_kernel_segment(pgd, __initdata_begin, __initdata_end, PAGE_KERNEL,
+ map_kernel_segment(pgdp, __initdata_begin, __initdata_end, PAGE_KERNEL,
&vmlinux_initdata, 0, VM_NO_GUARD);
- map_kernel_segment(pgd, _data, _end, PAGE_KERNEL, &vmlinux_data, 0, 0);
+ map_kernel_segment(pgdp, _data, _end, PAGE_KERNEL, &vmlinux_data, 0, 0);
- if (!pgd_val(*pgd_offset_raw(pgd, FIXADDR_START))) {
+ if (!READ_ONCE(pgd_val(*pgd_offset_raw(pgdp, FIXADDR_START)))) {
/*
* The fixmap falls in a separate pgd to the kernel, and doesn't
* live in the carveout for the swapper_pg_dir. We can simply
* re-use the existing dir for the fixmap.
*/
- set_pgd(pgd_offset_raw(pgd, FIXADDR_START),
- *pgd_offset_k(FIXADDR_START));
+ set_pgd(pgd_offset_raw(pgdp, FIXADDR_START),
+ READ_ONCE(*pgd_offset_k(FIXADDR_START)));
} else if (CONFIG_PGTABLE_LEVELS > 3) {
/*
* The fixmap shares its top level pgd entry with the kernel
* entry instead.
*/
BUG_ON(!IS_ENABLED(CONFIG_ARM64_16K_PAGES));
- pud_populate(&init_mm, pud_set_fixmap_offset(pgd, FIXADDR_START),
+ pud_populate(&init_mm,
+ pud_set_fixmap_offset(pgdp, FIXADDR_START),
lm_alias(bm_pmd));
pud_clear_fixmap();
} else {
BUG();
}
- kasan_copy_shadow(pgd);
+ kasan_copy_shadow(pgdp);
}
/*
void __init paging_init(void)
{
phys_addr_t pgd_phys = early_pgtable_alloc();
- pgd_t *pgd = pgd_set_fixmap(pgd_phys);
+ pgd_t *pgdp = pgd_set_fixmap(pgd_phys);
- map_kernel(pgd);
- map_mem(pgd);
+ map_kernel(pgdp);
+ map_mem(pgdp);
/*
* We want to reuse the original swapper_pg_dir so we don't have to
* To do this we need to go via a temporary pgd.
*/
cpu_replace_ttbr1(__va(pgd_phys));
- memcpy(swapper_pg_dir, pgd, PGD_SIZE);
+ memcpy(swapper_pg_dir, pgdp, PGD_SIZE);
cpu_replace_ttbr1(lm_alias(swapper_pg_dir));
pgd_clear_fixmap();
*/
int kern_addr_valid(unsigned long addr)
{
- pgd_t *pgd;
- pud_t *pud;
- pmd_t *pmd;
- pte_t *pte;
+ pgd_t *pgdp;
+ pud_t *pudp, pud;
+ pmd_t *pmdp, pmd;
+ pte_t *ptep, pte;
if ((((long)addr) >> VA_BITS) != -1UL)
return 0;
- pgd = pgd_offset_k(addr);
- if (pgd_none(*pgd))
+ pgdp = pgd_offset_k(addr);
+ if (pgd_none(READ_ONCE(*pgdp)))
return 0;
- pud = pud_offset(pgd, addr);
- if (pud_none(*pud))
+ pudp = pud_offset(pgdp, addr);
+ pud = READ_ONCE(*pudp);
+ if (pud_none(pud))
return 0;
- if (pud_sect(*pud))
- return pfn_valid(pud_pfn(*pud));
+ if (pud_sect(pud))
+ return pfn_valid(pud_pfn(pud));
- pmd = pmd_offset(pud, addr);
- if (pmd_none(*pmd))
+ pmdp = pmd_offset(pudp, addr);
+ pmd = READ_ONCE(*pmdp);
+ if (pmd_none(pmd))
return 0;
- if (pmd_sect(*pmd))
- return pfn_valid(pmd_pfn(*pmd));
+ if (pmd_sect(pmd))
+ return pfn_valid(pmd_pfn(pmd));
- pte = pte_offset_kernel(pmd, addr);
- if (pte_none(*pte))
+ ptep = pte_offset_kernel(pmdp, addr);
+ pte = READ_ONCE(*ptep);
+ if (pte_none(pte))
return 0;
- return pfn_valid(pte_pfn(*pte));
+ return pfn_valid(pte_pfn(pte));
}
#ifdef CONFIG_SPARSEMEM_VMEMMAP
#if !ARM64_SWAPPER_USES_SECTION_MAPS
{
unsigned long addr = start;
unsigned long next;
- pgd_t *pgd;
- pud_t *pud;
- pmd_t *pmd;
+ pgd_t *pgdp;
+ pud_t *pudp;
+ pmd_t *pmdp;
do {
next = pmd_addr_end(addr, end);
- pgd = vmemmap_pgd_populate(addr, node);
- if (!pgd)
+ pgdp = vmemmap_pgd_populate(addr, node);
+ if (!pgdp)
return -ENOMEM;
- pud = vmemmap_pud_populate(pgd, addr, node);
- if (!pud)
+ pudp = vmemmap_pud_populate(pgdp, addr, node);
+ if (!pudp)
return -ENOMEM;
- pmd = pmd_offset(pud, addr);
- if (pmd_none(*pmd)) {
+ pmdp = pmd_offset(pudp, addr);
+ if (pmd_none(READ_ONCE(*pmdp))) {
void *p = NULL;
p = vmemmap_alloc_block_buf(PMD_SIZE, node);
if (!p)
return -ENOMEM;
- pmd_set_huge(pmd, __pa(p), __pgprot(PROT_SECT_NORMAL));
+ pmd_set_huge(pmdp, __pa(p), __pgprot(PROT_SECT_NORMAL));
} else
- vmemmap_verify((pte_t *)pmd, node, addr, next);
+ vmemmap_verify((pte_t *)pmdp, node, addr, next);
} while (addr = next, addr != end);
return 0;
static inline pud_t * fixmap_pud(unsigned long addr)
{
- pgd_t *pgd = pgd_offset_k(addr);
+ pgd_t *pgdp = pgd_offset_k(addr);
+ pgd_t pgd = READ_ONCE(*pgdp);
- BUG_ON(pgd_none(*pgd) || pgd_bad(*pgd));
+ BUG_ON(pgd_none(pgd) || pgd_bad(pgd));
- return pud_offset_kimg(pgd, addr);
+ return pud_offset_kimg(pgdp, addr);
}
static inline pmd_t * fixmap_pmd(unsigned long addr)
{
- pud_t *pud = fixmap_pud(addr);
+ pud_t *pudp = fixmap_pud(addr);
+ pud_t pud = READ_ONCE(*pudp);
- BUG_ON(pud_none(*pud) || pud_bad(*pud));
+ BUG_ON(pud_none(pud) || pud_bad(pud));
- return pmd_offset_kimg(pud, addr);
+ return pmd_offset_kimg(pudp, addr);
}
static inline pte_t * fixmap_pte(unsigned long addr)
*/
void __init early_fixmap_init(void)
{
- pgd_t *pgd;
- pud_t *pud;
- pmd_t *pmd;
+ pgd_t *pgdp, pgd;
+ pud_t *pudp;
+ pmd_t *pmdp;
unsigned long addr = FIXADDR_START;
- pgd = pgd_offset_k(addr);
+ pgdp = pgd_offset_k(addr);
+ pgd = READ_ONCE(*pgdp);
if (CONFIG_PGTABLE_LEVELS > 3 &&
- !(pgd_none(*pgd) || pgd_page_paddr(*pgd) == __pa_symbol(bm_pud))) {
+ !(pgd_none(pgd) || pgd_page_paddr(pgd) == __pa_symbol(bm_pud))) {
/*
* We only end up here if the kernel mapping and the fixmap
* share the top level pgd entry, which should only happen on
* 16k/4 levels configurations.
*/
BUG_ON(!IS_ENABLED(CONFIG_ARM64_16K_PAGES));
- pud = pud_offset_kimg(pgd, addr);
+ pudp = pud_offset_kimg(pgdp, addr);
} else {
- if (pgd_none(*pgd))
- __pgd_populate(pgd, __pa_symbol(bm_pud), PUD_TYPE_TABLE);
- pud = fixmap_pud(addr);
+ if (pgd_none(pgd))
+ __pgd_populate(pgdp, __pa_symbol(bm_pud), PUD_TYPE_TABLE);
+ pudp = fixmap_pud(addr);
}
- if (pud_none(*pud))
- __pud_populate(pud, __pa_symbol(bm_pmd), PMD_TYPE_TABLE);
- pmd = fixmap_pmd(addr);
- __pmd_populate(pmd, __pa_symbol(bm_pte), PMD_TYPE_TABLE);
+ if (pud_none(READ_ONCE(*pudp)))
+ __pud_populate(pudp, __pa_symbol(bm_pmd), PMD_TYPE_TABLE);
+ pmdp = fixmap_pmd(addr);
+ __pmd_populate(pmdp, __pa_symbol(bm_pte), PMD_TYPE_TABLE);
/*
* The boot-ioremap range spans multiple pmds, for which
BUILD_BUG_ON((__fix_to_virt(FIX_BTMAP_BEGIN) >> PMD_SHIFT)
!= (__fix_to_virt(FIX_BTMAP_END) >> PMD_SHIFT));
- if ((pmd != fixmap_pmd(fix_to_virt(FIX_BTMAP_BEGIN)))
- || pmd != fixmap_pmd(fix_to_virt(FIX_BTMAP_END))) {
+ if ((pmdp != fixmap_pmd(fix_to_virt(FIX_BTMAP_BEGIN)))
+ || pmdp != fixmap_pmd(fix_to_virt(FIX_BTMAP_END))) {
WARN_ON(1);
- pr_warn("pmd %p != %p, %p\n",
- pmd, fixmap_pmd(fix_to_virt(FIX_BTMAP_BEGIN)),
+ pr_warn("pmdp %p != %p, %p\n",
+ pmdp, fixmap_pmd(fix_to_virt(FIX_BTMAP_BEGIN)),
fixmap_pmd(fix_to_virt(FIX_BTMAP_END)));
pr_warn("fix_to_virt(FIX_BTMAP_BEGIN): %08lx\n",
fix_to_virt(FIX_BTMAP_BEGIN));
phys_addr_t phys, pgprot_t flags)
{
unsigned long addr = __fix_to_virt(idx);
- pte_t *pte;
+ pte_t *ptep;
BUG_ON(idx <= FIX_HOLE || idx >= __end_of_fixed_addresses);
- pte = fixmap_pte(addr);
+ ptep = fixmap_pte(addr);
if (pgprot_val(flags)) {
- set_pte(pte, pfn_pte(phys >> PAGE_SHIFT, flags));
+ set_pte(ptep, pfn_pte(phys >> PAGE_SHIFT, flags));
} else {
- pte_clear(&init_mm, addr, pte);
+ pte_clear(&init_mm, addr, ptep);
flush_tlb_kernel_range(addr, addr+PAGE_SIZE);
}
}
return 1;
}
-int pud_set_huge(pud_t *pud, phys_addr_t phys, pgprot_t prot)
+int pud_set_huge(pud_t *pudp, phys_addr_t phys, pgprot_t prot)
{
pgprot_t sect_prot = __pgprot(PUD_TYPE_SECT |
pgprot_val(mk_sect_prot(prot)));
+
+ /* ioremap_page_range doesn't honour BBM */
+ if (pud_present(READ_ONCE(*pudp)))
+ return 0;
+
BUG_ON(phys & ~PUD_MASK);
- set_pud(pud, pfn_pud(__phys_to_pfn(phys), sect_prot));
+ set_pud(pudp, pfn_pud(__phys_to_pfn(phys), sect_prot));
return 1;
}
-int pmd_set_huge(pmd_t *pmd, phys_addr_t phys, pgprot_t prot)
+int pmd_set_huge(pmd_t *pmdp, phys_addr_t phys, pgprot_t prot)
{
pgprot_t sect_prot = __pgprot(PMD_TYPE_SECT |
pgprot_val(mk_sect_prot(prot)));
+
+ /* ioremap_page_range doesn't honour BBM */
+ if (pmd_present(READ_ONCE(*pmdp)))
+ return 0;
+
BUG_ON(phys & ~PMD_MASK);
- set_pmd(pmd, pfn_pmd(__phys_to_pfn(phys), sect_prot));
+ set_pmd(pmdp, pfn_pmd(__phys_to_pfn(phys), sect_prot));
return 1;
}
-int pud_clear_huge(pud_t *pud)
+int pud_clear_huge(pud_t *pudp)
{
- if (!pud_sect(*pud))
+ if (!pud_sect(READ_ONCE(*pudp)))
return 0;
- pud_clear(pud);
+ pud_clear(pudp);
return 1;
}
-int pmd_clear_huge(pmd_t *pmd)
+int pmd_clear_huge(pmd_t *pmdp)
{
- if (!pmd_sect(*pmd))
+ if (!pmd_sect(READ_ONCE(*pmdp)))
return 0;
- pmd_clear(pmd);
+ pmd_clear(pmdp);
return 1;
}
void *data)
{
struct page_change_data *cdata = data;
- pte_t pte = *ptep;
+ pte_t pte = READ_ONCE(*ptep);
pte = clear_pte_bit(pte, cdata->clear_mask);
pte = set_pte_bit(pte, cdata->set_mask);
*/
bool kernel_page_present(struct page *page)
{
- pgd_t *pgd;
- pud_t *pud;
- pmd_t *pmd;
- pte_t *pte;
+ pgd_t *pgdp;
+ pud_t *pudp, pud;
+ pmd_t *pmdp, pmd;
+ pte_t *ptep;
unsigned long addr = (unsigned long)page_address(page);
- pgd = pgd_offset_k(addr);
- if (pgd_none(*pgd))
+ pgdp = pgd_offset_k(addr);
+ if (pgd_none(READ_ONCE(*pgdp)))
return false;
- pud = pud_offset(pgd, addr);
- if (pud_none(*pud))
+ pudp = pud_offset(pgdp, addr);
+ pud = READ_ONCE(*pudp);
+ if (pud_none(pud))
return false;
- if (pud_sect(*pud))
+ if (pud_sect(pud))
return true;
- pmd = pmd_offset(pud, addr);
- if (pmd_none(*pmd))
+ pmdp = pmd_offset(pudp, addr);
+ pmd = READ_ONCE(*pmdp);
+ if (pmd_none(pmd))
return false;
- if (pmd_sect(*pmd))
+ if (pmd_sect(pmd))
return true;
- pte = pte_offset_kernel(pmd, addr);
- return pte_valid(*pte);
+ ptep = pte_offset_kernel(pmdp, addr);
+ return pte_valid(READ_ONCE(*ptep));
}
#endif /* CONFIG_HIBERNATION */
#endif /* CONFIG_DEBUG_PAGEALLOC */
dc cvac, cur_\()\type\()p // Ensure any existing dirty
dmb sy // lines are written back before
ldr \type, [cur_\()\type\()p] // loading the entry
- tbz \type, #0, next_\()\type // Skip invalid entries
+ tbz \type, #0, skip_\()\type // Skip invalid and
+ tbnz \type, #11, skip_\()\type // non-global entries
.endm
.macro __idmap_kpti_put_pgtable_ent_ng, type
add end_pgdp, cur_pgdp, #(PTRS_PER_PGD * 8)
do_pgd: __idmap_kpti_get_pgtable_ent pgd
tbnz pgd, #1, walk_puds
- __idmap_kpti_put_pgtable_ent_ng pgd
next_pgd:
+ __idmap_kpti_put_pgtable_ent_ng pgd
+skip_pgd:
add cur_pgdp, cur_pgdp, #8
cmp cur_pgdp, end_pgdp
b.ne do_pgd
add end_pudp, cur_pudp, #(PTRS_PER_PUD * 8)
do_pud: __idmap_kpti_get_pgtable_ent pud
tbnz pud, #1, walk_pmds
- __idmap_kpti_put_pgtable_ent_ng pud
next_pud:
+ __idmap_kpti_put_pgtable_ent_ng pud
+skip_pud:
add cur_pudp, cur_pudp, 8
cmp cur_pudp, end_pudp
b.ne do_pud
add end_pmdp, cur_pmdp, #(PTRS_PER_PMD * 8)
do_pmd: __idmap_kpti_get_pgtable_ent pmd
tbnz pmd, #1, walk_ptes
- __idmap_kpti_put_pgtable_ent_ng pmd
next_pmd:
+ __idmap_kpti_put_pgtable_ent_ng pmd
+skip_pmd:
add cur_pmdp, cur_pmdp, #8
cmp cur_pmdp, end_pmdp
b.ne do_pmd
add end_ptep, cur_ptep, #(PTRS_PER_PTE * 8)
do_pte: __idmap_kpti_get_pgtable_ent pte
__idmap_kpti_put_pgtable_ent_ng pte
-next_pte:
+skip_pte:
add cur_ptep, cur_ptep, #8
cmp cur_ptep, end_ptep
b.ne do_pte
off = offsetof(struct bpf_array, map.max_entries);
emit_a64_mov_i64(tmp, off, ctx);
emit(A64_LDR32(tmp, r2, tmp), ctx);
+ emit(A64_MOV(0, r3, r3), ctx);
emit(A64_CMP(0, r3, tmp), ctx);
- emit(A64_B_(A64_COND_GE, jmp_offset), ctx);
+ emit(A64_B_(A64_COND_CS, jmp_offset), ctx);
/* if (tail_call_cnt > MAX_TAIL_CALL_CNT)
* goto out;
*/
emit_a64_mov_i64(tmp, MAX_TAIL_CALL_CNT, ctx);
emit(A64_CMP(1, tcc, tmp), ctx);
- emit(A64_B_(A64_COND_GT, jmp_offset), ctx);
+ emit(A64_B_(A64_COND_HI, jmp_offset), ctx);
emit(A64_ADD_I(1, tcc, tcc, 1), ctx);
/* prog = array->ptrs[index];
* not be used like this with newer versions of gcc.
*/
#define BUG() \
+do { \
__asm__ __volatile__ ("clear.d [" __stringify(BUG_MAGIC) "]\n\t"\
"movu.w " __stringify(__LINE__) ",$r0\n\t"\
"jump 0f\n\t" \
".section .rodata\n" \
"0:\t.string \"" __FILE__ "\"\n\t" \
- ".previous")
+ ".previous"); \
+ unreachable(); \
+} while (0)
#endif
#else
/* This just causes an oops. */
-#define BUG() (*(int *)0 = 0)
+#define BUG() \
+do { \
+ barrier_before_unreachable(); \
+ __builtin_trap(); \
+} while (0)
#endif
#ifdef CONFIG_BUG
#define ia64_abort() __builtin_trap()
-#define BUG() do { printk("kernel BUG at %s:%d!\n", __FILE__, __LINE__); ia64_abort(); } while (0)
+#define BUG() do { \
+ printk("kernel BUG at %s:%d!\n", __FILE__, __LINE__); \
+ barrier_before_unreachable(); \
+ ia64_abort(); \
+} while (0)
/* should this BUG be made generic? */
#define HAVE_ARCH_BUG
obj-y += esi_stub.o # must be in kernel proper
endif
obj-$(CONFIG_INTEL_IOMMU) += pci-dma.o
-obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o
obj-$(CONFIG_BINFMT_ELF) += elfcore.o
#ifndef CONFIG_SUN3
#define BUG() do { \
pr_crit("kernel BUG at %s:%d!\n", __FILE__, __LINE__); \
+ barrier_before_unreachable(); \
__builtin_trap(); \
} while (0)
#else
#define BUG() do { \
pr_crit("kernel BUG at %s:%d!\n", __FILE__, __LINE__); \
+ barrier_before_unreachable(); \
panic("BUG!"); \
} while (0)
#endif
#else
#define BUG() do { \
+ barrier_before_unreachable(); \
__builtin_trap(); \
} while (0)
#endif
macintosh_config->expansion_type == MAC_EXP_PDS_COMM)
platform_device_register_simple("macsonic", -1, NULL, 0);
+ if (macintosh_config->expansion_type == MAC_EXP_PDS ||
+ macintosh_config->expansion_type == MAC_EXP_PDS_COMM)
+ platform_device_register_simple("mac89x0", -1, NULL, 0);
+
if (macintosh_config->ether_type == MAC_ETHER_MACE)
platform_device_register_simple("macmace", -1, NULL, 0);
quiet_cmd_cpp_its_S = ITS $@
cmd_cpp_its_S = $(CPP) $(cpp_flags) -P -C -o $@ $< \
+ -D__ASSEMBLY__ \
-DKERNEL_NAME="\"Linux $(KERNELRELEASE)\"" \
-DVMLINUX_BINARY="\"$(3)\"" \
-DVMLINUX_COMPRESSION="\"$(2)\"" \
compat_off_t l_len;
s32 l_sysid;
compat_pid_t l_pid;
- short __unused;
s32 pad[4];
};
#include <linux/errno.h>
#include <linux/percpu.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
#include <linux/spinlock.h>
#include <asm/mips-cps.h>
phys_addr_t __weak mips_cpc_default_phys_base(void)
{
+ struct device_node *cpc_node;
+ struct resource res;
+ int err;
+
+ cpc_node = of_find_compatible_node(of_root, NULL, "mti,mips-cpc");
+ if (cpc_node) {
+ err = of_address_to_resource(cpc_node, 0, &res);
+ if (!err)
+ return res.start;
+ }
+
return 0;
}
unsigned long reserved_end;
unsigned long mapstart = ~0UL;
unsigned long bootmap_size;
+ phys_addr_t ramstart = (phys_addr_t)ULLONG_MAX;
bool bootmap_valid = false;
int i;
max_low_pfn = 0;
/*
- * Find the highest page frame number we have available.
+ * Find the highest page frame number we have available
+ * and the lowest used RAM address
*/
for (i = 0; i < boot_mem_map.nr_map; i++) {
unsigned long start, end;
end = PFN_DOWN(boot_mem_map.map[i].addr
+ boot_mem_map.map[i].size);
+ ramstart = min(ramstart, boot_mem_map.map[i].addr);
+
#ifndef CONFIG_HIGHMEM
/*
* Skip highmem here so we get an accurate max_low_pfn if low
mapstart = max(reserved_end, start);
}
+ /*
+ * Reserve any memory between the start of RAM and PHYS_OFFSET
+ */
+ if (ramstart > PHYS_OFFSET)
+ add_memory_region(PHYS_OFFSET, ramstart - PHYS_OFFSET,
+ BOOT_MEM_RESERVED);
+
if (min_low_pfn >= max_low_pfn)
panic("Incorrect memory mapping !!!");
if (min_low_pfn > ARCH_PFN_OFFSET) {
add_memory_region(start, size, BOOT_MEM_RAM);
- if (start && start > PHYS_OFFSET)
- add_memory_region(PHYS_OFFSET, start - PHYS_OFFSET,
- BOOT_MEM_RESERVED);
return 0;
}
early_param("mem", early_parse_mem);
*/
}
-void __init bmips_cpu_setup(void)
+void bmips_cpu_setup(void)
{
void __iomem __maybe_unused *cbr = BMIPS_GET_CBR();
u32 __maybe_unused cfg;
#define PGD_INDEX_SIZE (32 - PGDIR_SHIFT)
#define PMD_CACHE_INDEX PMD_INDEX_SIZE
+#define PUD_CACHE_INDEX PUD_INDEX_SIZE
#ifndef __ASSEMBLY__
#define PTE_TABLE_SIZE (sizeof(pte_t) << PTE_INDEX_SIZE)
* keeping the prototype consistent across the two formats.
*/
static inline unsigned long pte_set_hidx(pte_t *ptep, real_pte_t rpte,
- unsigned int subpg_index, unsigned long hidx)
+ unsigned int subpg_index, unsigned long hidx,
+ int offset)
{
return (hidx << H_PAGE_F_GIX_SHIFT) &
(H_PAGE_F_SECOND | H_PAGE_F_GIX);
* generic accessors and iterators here
*/
#define __real_pte __real_pte
-static inline real_pte_t __real_pte(pte_t pte, pte_t *ptep)
+static inline real_pte_t __real_pte(pte_t pte, pte_t *ptep, int offset)
{
real_pte_t rpte;
unsigned long *hidxp;
*/
smp_rmb();
- hidxp = (unsigned long *)(ptep + PTRS_PER_PTE);
+ hidxp = (unsigned long *)(ptep + offset);
rpte.hidx = *hidxp;
return rpte;
}
* expected to modify the PTE bits accordingly and commit the PTE to memory.
*/
static inline unsigned long pte_set_hidx(pte_t *ptep, real_pte_t rpte,
- unsigned int subpg_index, unsigned long hidx)
+ unsigned int subpg_index,
+ unsigned long hidx, int offset)
{
- unsigned long *hidxp = (unsigned long *)(ptep + PTRS_PER_PTE);
+ unsigned long *hidxp = (unsigned long *)(ptep + offset);
rpte.hidx &= ~HIDX_BITS(0xfUL, subpg_index);
*hidxp = rpte.hidx | HIDX_BITS(HIDX_SHIFT_BY_ONE(hidx), subpg_index);
}
#define H_PTE_TABLE_SIZE PTE_FRAG_SIZE
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined (CONFIG_HUGETLB_PAGE)
#define H_PMD_TABLE_SIZE ((sizeof(pmd_t) << PMD_INDEX_SIZE) + \
(sizeof(unsigned long) << PMD_INDEX_SIZE))
#else
#define H_PMD_TABLE_SIZE (sizeof(pmd_t) << PMD_INDEX_SIZE)
#endif
+#ifdef CONFIG_HUGETLB_PAGE
+#define H_PUD_TABLE_SIZE ((sizeof(pud_t) << PUD_INDEX_SIZE) + \
+ (sizeof(unsigned long) << PUD_INDEX_SIZE))
+#else
#define H_PUD_TABLE_SIZE (sizeof(pud_t) << PUD_INDEX_SIZE)
+#endif
#define H_PGD_TABLE_SIZE (sizeof(pgd_t) << PGD_INDEX_SIZE)
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
H_PUD_INDEX_SIZE + H_PGD_INDEX_SIZE + PAGE_SHIFT)
#define H_PGTABLE_RANGE (ASM_CONST(1) << H_PGTABLE_EADDR_SIZE)
-#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && defined(CONFIG_PPC_64K_PAGES)
+#if (defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLB_PAGE)) && \
+ defined(CONFIG_PPC_64K_PAGES)
/*
* only with hash 64k we need to use the second half of pmd page table
* to store pointer to deposited pgtable_t
#else
#define H_PMD_CACHE_INDEX H_PMD_INDEX_SIZE
#endif
+/*
+ * We store the slot details in the second half of page table.
+ * Increase the pud level table so that hugetlb ptes can be stored
+ * at pud level.
+ */
+#if defined(CONFIG_HUGETLB_PAGE) && defined(CONFIG_PPC_64K_PAGES)
+#define H_PUD_CACHE_INDEX (H_PUD_INDEX_SIZE + 1)
+#else
+#define H_PUD_CACHE_INDEX (H_PUD_INDEX_SIZE)
+#endif
/*
* Define the address range of the kernel non-linear virtual area
*/
static inline pgd_t *pgd_alloc(struct mm_struct *mm)
{
+ pgd_t *pgd;
+
if (radix_enabled())
return radix__pgd_alloc(mm);
- return kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE),
- pgtable_gfp_flags(mm, GFP_KERNEL));
+
+ pgd = kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE),
+ pgtable_gfp_flags(mm, GFP_KERNEL));
+ memset(pgd, 0, PGD_TABLE_SIZE);
+
+ return pgd;
}
static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
{
- return kmem_cache_alloc(PGT_CACHE(PUD_INDEX_SIZE),
+ return kmem_cache_alloc(PGT_CACHE(PUD_CACHE_INDEX),
pgtable_gfp_flags(mm, GFP_KERNEL));
}
static inline void pud_free(struct mm_struct *mm, pud_t *pud)
{
- kmem_cache_free(PGT_CACHE(PUD_INDEX_SIZE), pud);
+ kmem_cache_free(PGT_CACHE(PUD_CACHE_INDEX), pud);
}
static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
* ahead and flush the page walk cache
*/
flush_tlb_pgtable(tlb, address);
- pgtable_free_tlb(tlb, pud, PUD_INDEX_SIZE);
+ pgtable_free_tlb(tlb, pud, PUD_CACHE_INDEX);
}
static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
extern unsigned long __pud_index_size;
extern unsigned long __pgd_index_size;
extern unsigned long __pmd_cache_index;
+extern unsigned long __pud_cache_index;
#define PTE_INDEX_SIZE __pte_index_size
#define PMD_INDEX_SIZE __pmd_index_size
#define PUD_INDEX_SIZE __pud_index_size
#define PGD_INDEX_SIZE __pgd_index_size
#define PMD_CACHE_INDEX __pmd_cache_index
+#define PUD_CACHE_INDEX __pud_cache_index
/*
* Because of use of pte fragments and THP, size of page table
* are not always derived out of index size above.
*/
#ifndef __real_pte
-#define __real_pte(e,p) ((real_pte_t){(e)})
+#define __real_pte(e, p, o) ((real_pte_t){(e)})
#define __rpte_to_pte(r) ((r).pte)
#define __rpte_to_hidx(r,index) (pte_val(__rpte_to_pte(r)) >> H_PAGE_F_GIX_SHIFT)
EXC_HV, SOFTEN_TEST_HV, bitmask)
#define MASKABLE_RELON_EXCEPTION_HV_OOL(vec, label, bitmask) \
- MASKABLE_EXCEPTION_PROLOG_1(PACA_EXGEN, SOFTEN_NOTEST_HV, vec, bitmask);\
+ MASKABLE_EXCEPTION_PROLOG_1(PACA_EXGEN, SOFTEN_TEST_HV, vec, bitmask);\
EXCEPTION_RELON_PROLOG_PSERIES_1(label, EXC_HV)
/*
#define PACA_IRQ_HMI 0x20
#define PACA_IRQ_PMI 0x40
+/*
+ * Some soft-masked interrupts must be hard masked until they are replayed
+ * (e.g., because the soft-masked handler does not clear the exception).
+ */
+#ifdef CONFIG_PPC_BOOK3S
+#define PACA_IRQ_MUST_HARD_MASK (PACA_IRQ_EE|PACA_IRQ_PMI)
+#else
+#define PACA_IRQ_MUST_HARD_MASK (PACA_IRQ_EE)
+#endif
+
/*
* flags for paca->irq_soft_mask
*/
static inline void may_hard_irq_enable(void)
{
get_paca()->irq_happened &= ~PACA_IRQ_HARD_DIS;
- if (!(get_paca()->irq_happened & PACA_IRQ_EE))
+ if (!(get_paca()->irq_happened & PACA_IRQ_MUST_HARD_MASK))
__hard_irq_enable();
}
return false;
}
+static inline void crash_ipi_callback(struct pt_regs *regs) { }
+
+static inline void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *))
+{
+}
+
#endif /* CONFIG_KEXEC_CORE */
#endif /* ! __ASSEMBLY__ */
#endif /* __KERNEL__ */
#define PGD_INDEX_SIZE (32 - PGDIR_SHIFT)
#define PMD_CACHE_INDEX PMD_INDEX_SIZE
+#define PUD_CACHE_INDEX PUD_INDEX_SIZE
#ifndef __ASSEMBLY__
#define PTE_TABLE_SIZE (sizeof(pte_t) << PTE_INDEX_SIZE)
#else
#define PMD_CACHE_INDEX PMD_INDEX_SIZE
#endif
+#define PUD_CACHE_INDEX PUD_INDEX_SIZE
/*
* Define the address range of the kernel non-linear virtual area
extern void sysfs_remove_device_from_node(struct device *dev, int nid);
extern int numa_update_cpu_topology(bool cpus_locked);
+static inline void update_numa_cpu_lookup_table(unsigned int cpu, int node)
+{
+ numa_cpu_lookup_table[cpu] = node;
+}
+
static inline int early_cpu_to_node(int cpu)
{
int nid;
{
return 0;
}
+
+static inline void update_numa_cpu_lookup_table(unsigned int cpu, int node) {}
+
#endif /* CONFIG_NUMA */
#if defined(CONFIG_NUMA) && defined(CONFIG_PPC_SPLPAR)
extern int start_topology_update(void);
extern int stop_topology_update(void);
extern int prrn_is_enabled(void);
+extern int find_and_online_cpu_nid(int cpu);
#else
static inline int start_topology_update(void)
{
{
return 0;
}
+static inline int find_and_online_cpu_nid(int cpu)
+{
+ return 0;
+}
#endif /* CONFIG_NUMA && CONFIG_PPC_SPLPAR */
#if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_NEED_MULTIPLE_NODES)
/*
* An interrupt came in while soft-disabled; We mark paca->irq_happened
* accordingly and if the interrupt is level sensitive, we hard disable
+ * hard disable (full_mask) corresponds to PACA_IRQ_MUST_HARD_MASK, so
+ * keep these in synch.
*/
.macro masked_interrupt_book3e paca_irq full_mask
* triggered and won't automatically refire.
* - If it was a HMI we return immediately since we handled it in realmode
* and it won't refire.
- * - else we hard disable and return.
+ * - Else it is one of PACA_IRQ_MUST_HARD_MASK, so hard disable and return.
* This is called with r10 containing the value to OR to the paca field.
*/
#define MASKED_INTERRUPT(_H) \
ori r10,r10,0xffff; \
mtspr SPRN_DEC,r10; \
b MASKED_DEC_HANDLER_LABEL; \
-1: andi. r10,r10,(PACA_IRQ_DBELL|PACA_IRQ_HMI); \
- bne 2f; \
+1: andi. r10,r10,PACA_IRQ_MUST_HARD_MASK; \
+ beq 2f; \
mfspr r10,SPRN_##_H##SRR1; \
xori r10,r10,MSR_EE; /* clear MSR_EE */ \
mtspr SPRN_##_H##SRR1,r10; \
if (cpu_has_feature(CPU_FTR_PPCAS_ARCH_V2))
device_create_file(s, &dev_attr_pir);
- if (cpu_has_feature(CPU_FTR_ARCH_206))
+ if (cpu_has_feature(CPU_FTR_ARCH_206) &&
+ !firmware_has_feature(FW_FEATURE_LPAR))
device_create_file(s, &dev_attr_tscr);
#endif /* CONFIG_PPC64 */
if (cpu_has_feature(CPU_FTR_PPCAS_ARCH_V2))
device_remove_file(s, &dev_attr_pir);
- if (cpu_has_feature(CPU_FTR_ARCH_206))
+ if (cpu_has_feature(CPU_FTR_ARCH_206) &&
+ !firmware_has_feature(FW_FEATURE_LPAR))
device_remove_file(s, &dev_attr_tscr);
#endif /* CONFIG_PPC64 */
u32 i, n_lmbs;
n_lmbs = of_read_number(prop++, 1);
+ if (n_lmbs == 0)
+ return;
for (i = 0; i < n_lmbs; i++) {
read_drconf_v1_cell(&lmb, &prop);
u32 i, j, lmb_sets;
lmb_sets = of_read_number(prop++, 1);
+ if (lmb_sets == 0)
+ return;
for (i = 0; i < lmb_sets; i++) {
read_drconf_v2_cell(&dr_cell, &prop);
struct drmem_lmb *lmb;
drmem_info->n_lmbs = of_read_number(prop++, 1);
+ if (drmem_info->n_lmbs == 0)
+ return;
drmem_info->lmbs = kcalloc(drmem_info->n_lmbs, sizeof(*lmb),
GFP_KERNEL);
int lmb_index;
lmb_sets = of_read_number(prop++, 1);
+ if (lmb_sets == 0)
+ return;
/* first pass, calculate the number of LMBs */
p = prop;
* need to add in 0x1 if it's a read-only user page
*/
rflags = htab_convert_pte_flags(new_pte);
- rpte = __real_pte(__pte(old_pte), ptep);
+ rpte = __real_pte(__pte(old_pte), ptep, PTRS_PER_PTE);
if (cpu_has_feature(CPU_FTR_NOEXECUTE) &&
!cpu_has_feature(CPU_FTR_COHERENT_ICACHE))
return -1;
}
new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | H_PAGE_HASHPTE;
- new_pte |= pte_set_hidx(ptep, rpte, 0, slot);
+ new_pte |= pte_set_hidx(ptep, rpte, 0, slot, PTRS_PER_PTE);
}
*ptep = __pte(new_pte & ~H_PAGE_BUSY);
return 0;
subpg_index = (ea & (PAGE_SIZE - 1)) >> shift;
vpn = hpt_vpn(ea, vsid, ssize);
- rpte = __real_pte(__pte(old_pte), ptep);
+ rpte = __real_pte(__pte(old_pte), ptep, PTRS_PER_PTE);
/*
*None of the sub 4k page is hashed
*/
return -1;
}
- new_pte |= pte_set_hidx(ptep, rpte, subpg_index, slot);
+ new_pte |= pte_set_hidx(ptep, rpte, subpg_index, slot, PTRS_PER_PTE);
new_pte |= H_PAGE_HASHPTE;
*ptep = __pte(new_pte & ~H_PAGE_BUSY);
} while (!pte_xchg(ptep, __pte(old_pte), __pte(new_pte)));
rflags = htab_convert_pte_flags(new_pte);
- rpte = __real_pte(__pte(old_pte), ptep);
+ rpte = __real_pte(__pte(old_pte), ptep, PTRS_PER_PTE);
if (cpu_has_feature(CPU_FTR_NOEXECUTE) &&
!cpu_has_feature(CPU_FTR_COHERENT_ICACHE))
}
new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | H_PAGE_HASHPTE;
- new_pte |= pte_set_hidx(ptep, rpte, 0, slot);
+ new_pte |= pte_set_hidx(ptep, rpte, 0, slot, PTRS_PER_PTE);
}
*ptep = __pte(new_pte & ~H_PAGE_BUSY);
return 0;
__pmd_index_size = H_PMD_INDEX_SIZE;
__pud_index_size = H_PUD_INDEX_SIZE;
__pgd_index_size = H_PGD_INDEX_SIZE;
+ __pud_cache_index = H_PUD_CACHE_INDEX;
__pmd_cache_index = H_PMD_CACHE_INDEX;
__pte_table_size = H_PTE_TABLE_SIZE;
__pmd_table_size = H_PMD_TABLE_SIZE;
unsigned long vpn;
unsigned long old_pte, new_pte;
unsigned long rflags, pa, sz;
- long slot;
+ long slot, offset;
BUG_ON(shift != mmu_psize_defs[mmu_psize].shift);
} while(!pte_xchg(ptep, __pte(old_pte), __pte(new_pte)));
rflags = htab_convert_pte_flags(new_pte);
- rpte = __real_pte(__pte(old_pte), ptep);
+ if (unlikely(mmu_psize == MMU_PAGE_16G))
+ offset = PTRS_PER_PUD;
+ else
+ offset = PTRS_PER_PMD;
+ rpte = __real_pte(__pte(old_pte), ptep, offset);
sz = ((1UL) << shift);
if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE))
return -1;
}
- new_pte |= pte_set_hidx(ptep, rpte, 0, slot);
+ new_pte |= pte_set_hidx(ptep, rpte, 0, slot, offset);
}
/*
* same size as either the pgd or pmd index except with THP enabled
* on book3s 64
*/
- if (PUD_INDEX_SIZE && !PGT_CACHE(PUD_INDEX_SIZE))
- pgtable_cache_add(PUD_INDEX_SIZE, pud_ctor);
+ if (PUD_CACHE_INDEX && !PGT_CACHE(PUD_CACHE_INDEX))
+ pgtable_cache_add(PUD_CACHE_INDEX, pud_ctor);
}
numa_cpu_lookup_table[cpu] = -1;
}
-static void update_numa_cpu_lookup_table(unsigned int cpu, int node)
-{
- numa_cpu_lookup_table[cpu] = node;
-}
-
static void map_cpu_to_node(int cpu, int node)
{
update_numa_cpu_lookup_table(cpu, node);
#include <linux/of_fdt.h>
#include <linux/mm.h>
#include <linux/string_helpers.h>
+#include <linux/stop_machine.h>
#include <asm/pgtable.h>
#include <asm/pgalloc.h>
+#include <asm/mmu_context.h>
#include <asm/dma.h>
#include <asm/machdep.h>
#include <asm/mmu.h>
"r" (TLBIEL_INVAL_SET_LPID), "r" (0));
asm volatile("eieio; tlbsync; ptesync" : : : "memory");
trace_tlbie(0, 0, TLBIEL_INVAL_SET_LPID, 0, 2, 1, 1);
+
+ /*
+ * The init_mm context is given the first available (non-zero) PID,
+ * which is the "guard PID" and contains no page table. PIDR should
+ * never be set to zero because that duplicates the kernel address
+ * space at the 0x0... offset (quadrant 0)!
+ *
+ * An arbitrary PID that may later be allocated by the PID allocator
+ * for userspace processes must not be used either, because that
+ * would cause stale user mappings for that PID on CPUs outside of
+ * the TLB invalidation scheme (because it won't be in mm_cpumask).
+ *
+ * So permanently carve out one PID for the purpose of a guard PID.
+ */
+ init_mm.context.id = mmu_base_pid;
+ mmu_base_pid++;
}
static void __init radix_init_partition_table(void)
__pmd_index_size = RADIX_PMD_INDEX_SIZE;
__pud_index_size = RADIX_PUD_INDEX_SIZE;
__pgd_index_size = RADIX_PGD_INDEX_SIZE;
+ __pud_cache_index = RADIX_PUD_INDEX_SIZE;
__pmd_cache_index = RADIX_PMD_INDEX_SIZE;
__pte_table_size = RADIX_PTE_TABLE_SIZE;
__pmd_table_size = RADIX_PMD_TABLE_SIZE;
radix_init_iamr();
radix_init_pgtable();
-
+ /* Switch to the guard PID before turning on MMU */
+ radix__switch_mmu_context(NULL, &init_mm);
if (cpu_has_feature(CPU_FTR_HVMODE))
tlbiel_all();
}
}
radix_init_iamr();
+ radix__switch_mmu_context(NULL, &init_mm);
if (cpu_has_feature(CPU_FTR_HVMODE))
tlbiel_all();
}
pud_clear(pud);
}
+struct change_mapping_params {
+ pte_t *pte;
+ unsigned long start;
+ unsigned long end;
+ unsigned long aligned_start;
+ unsigned long aligned_end;
+};
+
+static int stop_machine_change_mapping(void *data)
+{
+ struct change_mapping_params *params =
+ (struct change_mapping_params *)data;
+
+ if (!data)
+ return -1;
+
+ spin_unlock(&init_mm.page_table_lock);
+ pte_clear(&init_mm, params->aligned_start, params->pte);
+ create_physical_mapping(params->aligned_start, params->start);
+ create_physical_mapping(params->end, params->aligned_end);
+ spin_lock(&init_mm.page_table_lock);
+ return 0;
+}
+
static void remove_pte_table(pte_t *pte_start, unsigned long addr,
unsigned long end)
{
}
}
+/*
+ * clear the pte and potentially split the mapping helper
+ */
+static void split_kernel_mapping(unsigned long addr, unsigned long end,
+ unsigned long size, pte_t *pte)
+{
+ unsigned long mask = ~(size - 1);
+ unsigned long aligned_start = addr & mask;
+ unsigned long aligned_end = addr + size;
+ struct change_mapping_params params;
+ bool split_region = false;
+
+ if ((end - addr) < size) {
+ /*
+ * We're going to clear the PTE, but not flushed
+ * the mapping, time to remap and flush. The
+ * effects if visible outside the processor or
+ * if we are running in code close to the
+ * mapping we cleared, we are in trouble.
+ */
+ if (overlaps_kernel_text(aligned_start, addr) ||
+ overlaps_kernel_text(end, aligned_end)) {
+ /*
+ * Hack, just return, don't pte_clear
+ */
+ WARN_ONCE(1, "Linear mapping %lx->%lx overlaps kernel "
+ "text, not splitting\n", addr, end);
+ return;
+ }
+ split_region = true;
+ }
+
+ if (split_region) {
+ params.pte = pte;
+ params.start = addr;
+ params.end = end;
+ params.aligned_start = addr & ~(size - 1);
+ params.aligned_end = min_t(unsigned long, aligned_end,
+ (unsigned long)__va(memblock_end_of_DRAM()));
+ stop_machine(stop_machine_change_mapping, ¶ms, NULL);
+ return;
+ }
+
+ pte_clear(&init_mm, addr, pte);
+}
+
static void remove_pmd_table(pmd_t *pmd_start, unsigned long addr,
unsigned long end)
{
continue;
if (pmd_huge(*pmd)) {
- if (!IS_ALIGNED(addr, PMD_SIZE) ||
- !IS_ALIGNED(next, PMD_SIZE)) {
- WARN_ONCE(1, "%s: unaligned range\n", __func__);
- continue;
- }
-
- pte_clear(&init_mm, addr, (pte_t *)pmd);
+ split_kernel_mapping(addr, end, PMD_SIZE, (pte_t *)pmd);
continue;
}
continue;
if (pud_huge(*pud)) {
- if (!IS_ALIGNED(addr, PUD_SIZE) ||
- !IS_ALIGNED(next, PUD_SIZE)) {
- WARN_ONCE(1, "%s: unaligned range\n", __func__);
- continue;
- }
-
- pte_clear(&init_mm, addr, (pte_t *)pud);
+ split_kernel_mapping(addr, end, PUD_SIZE, (pte_t *)pud);
continue;
}
continue;
if (pgd_huge(*pgd)) {
- if (!IS_ALIGNED(addr, PGDIR_SIZE) ||
- !IS_ALIGNED(next, PGDIR_SIZE)) {
- WARN_ONCE(1, "%s: unaligned range\n", __func__);
- continue;
- }
-
- pte_clear(&init_mm, addr, (pte_t *)pgd);
+ split_kernel_mapping(addr, end, PGDIR_SIZE, (pte_t *)pgd);
continue;
}
EXPORT_SYMBOL(__pgd_index_size);
unsigned long __pmd_cache_index;
EXPORT_SYMBOL(__pmd_cache_index);
+unsigned long __pud_cache_index;
+EXPORT_SYMBOL(__pud_cache_index);
unsigned long __pte_table_size;
EXPORT_SYMBOL(__pte_table_size);
unsigned long __pmd_table_size;
if (old & PATB_HR) {
asm volatile(PPC_TLBIE_5(%0,%1,2,0,1) : :
"r" (TLBIEL_INVAL_SET_LPID), "r" (lpid));
+ asm volatile(PPC_TLBIE_5(%0,%1,2,1,1) : :
+ "r" (TLBIEL_INVAL_SET_LPID), "r" (lpid));
trace_tlbie(lpid, 0, TLBIEL_INVAL_SET_LPID, lpid, 2, 0, 1);
} else {
asm volatile(PPC_TLBIE_5(%0,%1,2,0,0) : :
unsigned int psize;
int ssize;
real_pte_t rpte;
- int i;
+ int i, offset;
i = batch->index;
psize = get_slice_psize(mm, addr);
/* Mask the address for the correct page size */
addr &= ~((1UL << mmu_psize_defs[psize].shift) - 1);
+ if (unlikely(psize == MMU_PAGE_16G))
+ offset = PTRS_PER_PUD;
+ else
+ offset = PTRS_PER_PMD;
#else
BUG();
psize = pte_pagesize_index(mm, addr, pte); /* shutup gcc */
* support 64k pages, this might be different from the
* hardware page size encoded in the slice table. */
addr &= PAGE_MASK;
+ offset = PTRS_PER_PTE;
}
}
WARN_ON(vsid == 0);
vpn = hpt_vpn(addr, vsid, ssize);
- rpte = __real_pte(__pte(pte), ptep);
+ rpte = __real_pte(__pte(pte), ptep, offset);
/*
* Check if we have an active batch on this CPU. If not, just
const struct cpumask *l_cpumask;
get_online_cpus();
- for_each_online_node(nid) {
+ for_each_node_with_cpus(nid) {
l_cpumask = cpumask_of_node(nid);
- cpu = cpumask_first(l_cpumask);
+ cpu = cpumask_first_and(l_cpumask, cpu_online_mask);
+ if (cpu >= nr_cpu_ids)
+ continue;
opal_imc_counters_stop(OPAL_IMC_COUNTERS_NEST,
get_hard_smp_processor_id(cpu));
}
rc = PTR_ERR(txwin->paste_kaddr);
goto free_window;
}
+ } else {
+ /*
+ * A user mapping must ensure that context switch issues
+ * CP_ABORT for this thread.
+ */
+ rc = set_thread_uses_vas();
+ if (rc)
+ goto free_window;
}
- /*
- * Now that we have a send window, ensure context switch issues
- * CP_ABORT for this thread.
- */
- rc = -EINVAL;
- if (set_thread_uses_vas() < 0)
- goto free_window;
-
set_vinst_win(vinst, txwin);
return txwin;
#include <asm/xics.h>
#include <asm/xive.h>
#include <asm/plpar_wrappers.h>
+#include <asm/topology.h>
#include "pseries.h"
#include "offline_states.h"
BUG_ON(cpu_online(cpu));
set_cpu_present(cpu, false);
set_hard_smp_processor_id(cpu, -1);
+ update_numa_cpu_lookup_table(cpu, -1);
break;
}
if (cpu >= nr_cpu_ids)
cpu_maps_update_done();
}
-extern int find_and_online_cpu_nid(int cpu);
-
static int dlpar_online_cpu(struct device_node *dn)
{
int rc = 0;
static irqreturn_t ras_error_interrupt(int irq, void *dev_id);
+/*
+ * Enable the hotplug interrupt late because processing them may touch other
+ * devices or systems (e.g. hugepages) that have not been initialized at the
+ * subsys stage.
+ */
+int __init init_ras_hotplug_IRQ(void)
+{
+ struct device_node *np;
+
+ /* Hotplug Events */
+ np = of_find_node_by_path("/event-sources/hot-plug-events");
+ if (np != NULL) {
+ if (dlpar_workqueue_init() == 0)
+ request_event_sources_irqs(np, ras_hotplug_interrupt,
+ "RAS_HOTPLUG");
+ of_node_put(np);
+ }
+
+ return 0;
+}
+machine_late_initcall(pseries, init_ras_hotplug_IRQ);
+
/*
* Initialize handlers for the set of interrupts caused by hardware errors
* and power system events.
of_node_put(np);
}
- /* Hotplug Events */
- np = of_find_node_by_path("/event-sources/hot-plug-events");
- if (np != NULL) {
- if (dlpar_workqueue_init() == 0)
- request_event_sources_irqs(np, ras_hotplug_interrupt,
- "RAS_HOTPLUG");
- of_node_put(np);
- }
-
/* EPOW Events */
np = of_find_node_by_path("/event-sources/epow-events");
if (np != NULL) {
rc = plpar_int_get_queue_info(0, target, prio, &esn_page, &esn_size);
if (rc) {
- pr_err("Error %lld getting queue info prio %d\n", rc, prio);
+ pr_err("Error %lld getting queue info CPU %d prio %d\n", rc,
+ target, prio);
rc = -EIO;
goto fail;
}
/* Configure and enable the queue in HW */
rc = plpar_int_set_queue_config(flags, target, prio, qpage_phys, order);
if (rc) {
- pr_err("Error %lld setting queue for prio %d\n", rc, prio);
+ pr_err("Error %lld setting queue for CPU %d prio %d\n", rc,
+ target, prio);
rc = -EIO;
} else {
q->qpage = qpage;
if (IS_ERR(qpage))
return PTR_ERR(qpage);
- return xive_spapr_configure_queue(cpu, q, prio, qpage,
- xive_queue_shift);
+ return xive_spapr_configure_queue(get_hard_smp_processor_id(cpu),
+ q, prio, qpage, xive_queue_shift);
}
static void xive_spapr_cleanup_queue(unsigned int cpu, struct xive_cpu *xc,
struct xive_q *q = &xc->queue[prio];
unsigned int alloc_order;
long rc;
+ int hw_cpu = get_hard_smp_processor_id(cpu);
- rc = plpar_int_set_queue_config(0, cpu, prio, 0, 0);
+ rc = plpar_int_set_queue_config(0, hw_cpu, prio, 0, 0);
if (rc)
- pr_err("Error %ld setting queue for prio %d\n", rc, prio);
+ pr_err("Error %ld setting queue for CPU %d prio %d\n", rc,
+ hw_cpu, prio);
alloc_order = xive_alloc_order(xive_queue_shift);
free_pages((unsigned long)q->qpage, alloc_order);
select OF
select OF_EARLY_FLATTREE
select OF_IRQ
- select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
select ARCH_WANT_FRAME_POINTERS
select CLONE_BACKWARDS
select COMMON_CLK
select GENERIC_STRNLEN_USER
select GENERIC_SMP_IDLE_THREAD
select GENERIC_ATOMIC64 if !64BIT || !RISCV_ISA_A
- select ARCH_WANT_OPTIONAL_GPIOLIB
select HAVE_MEMBLOCK
select HAVE_MEMBLOCK_NODE_MAP
select HAVE_DMA_API_DEBUG
select HAVE_ARCH_TRACEHOOK
select MODULES_USE_ELF_RELA if MODULES
select THREAD_INFO_IN_TASK
- select RISCV_IRQ_INTC
select RISCV_TIMER
config MMU
move a1, sp /* pt_regs */
tail do_IRQ
1:
+ /* Exceptions run with interrupts enabled */
+ csrs sstatus, SR_SIE
+
/* Handle syscalls */
li t0, EXC_SYSCALL
beq s4, t0, handle_syscall
*/
addi s2, s2, 0x4
REG_S s2, PT_SEPC(sp)
- /* System calls run with interrupts enabled */
- csrs sstatus, SR_SIE
/* Trace syscalls, but only if requested by the user. */
REG_L t0, TASK_TI_FLAGS(tp)
andi t0, t0, _TIF_SYSCALL_TRACE
/* Start the kernel */
mv a0, s0
mv a1, s1
- call sbi_save
+ call parse_dtb
tail start_kernel
relocate:
#endif
}
-void __init sbi_save(unsigned int hartid, void *dtb)
+void __init parse_dtb(unsigned int hartid, void *dtb)
{
early_init_dt_scan(__va(dtb));
}
depends on SPARC32
select USB_EHCI_BIG_ENDIAN_MMIO
select USB_EHCI_BIG_ENDIAN_DESC
+ select USB_UHCI_BIG_ENDIAN_MMIO
+ select USB_UHCI_BIG_ENDIAN_DESC
---help---
If you say Y here if you are running on a SPARC-LEON processor.
The LEON processor is a synthesizable VHDL model of the
void do_BUG(const char *file, int line);
#define BUG() do { \
do_BUG(__FILE__, __LINE__); \
+ barrier_before_unreachable(); \
__builtin_trap(); \
} while (0)
#else
-#define BUG() __builtin_trap()
+#define BUG() do { \
+ barrier_before_unreachable(); \
+ __builtin_trap(); \
+} while (0)
#endif
#define HAVE_ARCH_BUG
boot/compressed/vmlinux
tools/test_get_len
tools/insn_sanity
+tools/insn_decoder_test
purgatory/kexec-purgatory.c
purgatory/purgatory.ro
For old smp systems that do not have proper acpi support. Newer systems
(esp with 64bit cpus) with acpi support, MADT and DSDT will override it
-config X86_BIGSMP
- bool "Support for big SMP systems with more than 8 CPUs"
- depends on X86_32 && SMP
- ---help---
- This option is needed for the systems that have more than 8 CPUs
-
config GOLDFISH
def_bool y
depends on X86_GOLDFISH
Say N if unsure.
if X86_32
+config X86_BIGSMP
+ bool "Support for big SMP systems with more than 8 CPUs"
+ depends on SMP
+ ---help---
+ This option is needed for the systems that have more than 8 CPUs
+
config X86_EXTENDED_PLATFORM
bool "Support for extended (non-PC) x86 platforms"
default y
Enable maximum number of CPUS and NUMA Nodes for this architecture.
If unsure, say N.
+#
+# The maximum number of CPUs supported:
+#
+# The main config value is NR_CPUS, which defaults to NR_CPUS_DEFAULT,
+# and which can be configured interactively in the
+# [NR_CPUS_RANGE_BEGIN ... NR_CPUS_RANGE_END] range.
+#
+# The ranges are different on 32-bit and 64-bit kernels, depending on
+# hardware capabilities and scalability features of the kernel.
+#
+# ( If MAXSMP is enabled we just use the highest possible value and disable
+# interactive configuration. )
+#
+
+config NR_CPUS_RANGE_BEGIN
+ int
+ default NR_CPUS_RANGE_END if MAXSMP
+ default 1 if !SMP
+ default 2
+
+config NR_CPUS_RANGE_END
+ int
+ depends on X86_32
+ default 64 if SMP && X86_BIGSMP
+ default 8 if SMP && !X86_BIGSMP
+ default 1 if !SMP
+
+config NR_CPUS_RANGE_END
+ int
+ depends on X86_64
+ default 8192 if SMP && ( MAXSMP || CPUMASK_OFFSTACK)
+ default 512 if SMP && (!MAXSMP && !CPUMASK_OFFSTACK)
+ default 1 if !SMP
+
+config NR_CPUS_DEFAULT
+ int
+ depends on X86_32
+ default 32 if X86_BIGSMP
+ default 8 if SMP
+ default 1 if !SMP
+
+config NR_CPUS_DEFAULT
+ int
+ depends on X86_64
+ default 8192 if MAXSMP
+ default 64 if SMP
+ default 1 if !SMP
+
config NR_CPUS
int "Maximum number of CPUs" if SMP && !MAXSMP
- range 2 8 if SMP && X86_32 && !X86_BIGSMP
- range 2 64 if SMP && X86_32 && X86_BIGSMP
- range 2 512 if SMP && !MAXSMP && !CPUMASK_OFFSTACK && X86_64
- range 2 8192 if SMP && !MAXSMP && CPUMASK_OFFSTACK && X86_64
- default "1" if !SMP
- default "8192" if MAXSMP
- default "32" if SMP && X86_BIGSMP
- default "8" if SMP && X86_32
- default "64" if SMP
+ range NR_CPUS_RANGE_BEGIN NR_CPUS_RANGE_END
+ default NR_CPUS_DEFAULT
---help---
This allows you to specify the maximum number of CPUs which this
kernel will support. If CPUMASK_OFFSTACK is enabled, the maximum
supported value is 8192, otherwise the maximum value is 512. The
minimum value which makes sense is 2.
- This is purely to save memory - each supported CPU adds
- approximately eight kilobytes to the kernel image.
+ This is purely to save memory: each supported CPU adds about 8KB
+ to the kernel image.
config SCHED_SMT
bool "SMT (Hyperthreading) scheduler support"
config HIGHMEM64G
bool "64GB"
- depends on !M486
+ depends on !M486 && !M586 && !M586TSC && !M586MMX && !MGEODE_LX && !MGEODEGX1 && !MCYRIXIII && !MELAN && !MWINCHIPC6 && !WINCHIP3D && !MK6
select X86_PAE
---help---
Select this if you have a 32-bit processor and more than 4
config X86_CMPXCHG64
def_bool y
- depends on X86_PAE || X86_64 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MATOM
+ depends on X86_PAE || X86_64 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586TSC || M586MMX || MATOM || MGEODE_LX || MGEODEGX1 || MK6 || MK7 || MK8
# this should be set for all -march=.. options where the compiler
# generates cmov.
config X86_MINIMUM_CPU_FAMILY
int
default "64" if X86_64
- default "6" if X86_32 && X86_P6_NOP
+ default "6" if X86_32 && (MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MEFFICEON || MATOM || MCRUSOE || MCORE2 || MK7 || MK8)
default "5" if X86_32 && X86_CMPXCHG64
default "4"
{
unsigned int j;
- state->lens[0] = 0;
- state->lens[1] = 1;
- state->lens[2] = 2;
- state->lens[3] = 3;
+ /* initially all lanes are unused */
+ state->lens[0] = 0xFFFFFFFF00000000;
+ state->lens[1] = 0xFFFFFFFF00000001;
+ state->lens[2] = 0xFFFFFFFF00000002;
+ state->lens[3] = 0xFFFFFFFF00000003;
+
state->unused_lanes = 0xFF03020100;
for (j = 0; j < 4; j++)
state->ldata[j].job_in_lane = NULL;
#define SIZEOF_PTREGS 21*8
- .macro ALLOC_PT_GPREGS_ON_STACK
- addq $-(15*8), %rsp
- .endm
+.macro PUSH_AND_CLEAR_REGS rdx=%rdx rax=%rax
+ /*
+ * Push registers and sanitize registers of values that a
+ * speculation attack might otherwise want to exploit. The
+ * lower registers are likely clobbered well before they
+ * could be put to use in a speculative execution gadget.
+ * Interleave XOR with PUSH for better uop scheduling:
+ */
+ pushq %rdi /* pt_regs->di */
+ pushq %rsi /* pt_regs->si */
+ pushq \rdx /* pt_regs->dx */
+ pushq %rcx /* pt_regs->cx */
+ pushq \rax /* pt_regs->ax */
+ pushq %r8 /* pt_regs->r8 */
+ xorq %r8, %r8 /* nospec r8 */
+ pushq %r9 /* pt_regs->r9 */
+ xorq %r9, %r9 /* nospec r9 */
+ pushq %r10 /* pt_regs->r10 */
+ xorq %r10, %r10 /* nospec r10 */
+ pushq %r11 /* pt_regs->r11 */
+ xorq %r11, %r11 /* nospec r11*/
+ pushq %rbx /* pt_regs->rbx */
+ xorl %ebx, %ebx /* nospec rbx*/
+ pushq %rbp /* pt_regs->rbp */
+ xorl %ebp, %ebp /* nospec rbp*/
+ pushq %r12 /* pt_regs->r12 */
+ xorq %r12, %r12 /* nospec r12*/
+ pushq %r13 /* pt_regs->r13 */
+ xorq %r13, %r13 /* nospec r13*/
+ pushq %r14 /* pt_regs->r14 */
+ xorq %r14, %r14 /* nospec r14*/
+ pushq %r15 /* pt_regs->r15 */
+ xorq %r15, %r15 /* nospec r15*/
+ UNWIND_HINT_REGS
+.endm
- .macro SAVE_C_REGS_HELPER offset=0 rax=1 rcx=1 r8910=1 r11=1
- .if \r11
- movq %r11, 6*8+\offset(%rsp)
- .endif
- .if \r8910
- movq %r10, 7*8+\offset(%rsp)
- movq %r9, 8*8+\offset(%rsp)
- movq %r8, 9*8+\offset(%rsp)
- .endif
- .if \rax
- movq %rax, 10*8+\offset(%rsp)
- .endif
- .if \rcx
- movq %rcx, 11*8+\offset(%rsp)
- .endif
- movq %rdx, 12*8+\offset(%rsp)
- movq %rsi, 13*8+\offset(%rsp)
- movq %rdi, 14*8+\offset(%rsp)
- UNWIND_HINT_REGS offset=\offset extra=0
- .endm
- .macro SAVE_C_REGS offset=0
- SAVE_C_REGS_HELPER \offset, 1, 1, 1, 1
- .endm
- .macro SAVE_C_REGS_EXCEPT_RAX_RCX offset=0
- SAVE_C_REGS_HELPER \offset, 0, 0, 1, 1
- .endm
- .macro SAVE_C_REGS_EXCEPT_R891011
- SAVE_C_REGS_HELPER 0, 1, 1, 0, 0
- .endm
- .macro SAVE_C_REGS_EXCEPT_RCX_R891011
- SAVE_C_REGS_HELPER 0, 1, 0, 0, 0
- .endm
- .macro SAVE_C_REGS_EXCEPT_RAX_RCX_R11
- SAVE_C_REGS_HELPER 0, 0, 0, 1, 0
- .endm
-
- .macro SAVE_EXTRA_REGS offset=0
- movq %r15, 0*8+\offset(%rsp)
- movq %r14, 1*8+\offset(%rsp)
- movq %r13, 2*8+\offset(%rsp)
- movq %r12, 3*8+\offset(%rsp)
- movq %rbp, 4*8+\offset(%rsp)
- movq %rbx, 5*8+\offset(%rsp)
- UNWIND_HINT_REGS offset=\offset
- .endm
-
- .macro POP_EXTRA_REGS
+.macro POP_REGS pop_rdi=1 skip_r11rcx=0
popq %r15
popq %r14
popq %r13
popq %r12
popq %rbp
popq %rbx
- .endm
-
- .macro POP_C_REGS
+ .if \skip_r11rcx
+ popq %rsi
+ .else
popq %r11
+ .endif
popq %r10
popq %r9
popq %r8
popq %rax
+ .if \skip_r11rcx
+ popq %rsi
+ .else
popq %rcx
+ .endif
popq %rdx
popq %rsi
+ .if \pop_rdi
popq %rdi
- .endm
-
- .macro icebp
- .byte 0xf1
- .endm
+ .endif
+.endm
/*
* This is a sneaky trick to help the unwinder find pt_regs on the stack. The
* is just setting the LSB, which makes it an invalid stack address and is also
* a signal to the unwinder that it's a pt_regs pointer in disguise.
*
- * NOTE: This macro must be used *after* SAVE_EXTRA_REGS because it corrupts
+ * NOTE: This macro must be used *after* PUSH_AND_CLEAR_REGS because it corrupts
* the original rbp.
*/
.macro ENCODE_FRAME_POINTER ptregs_offset=0
swapgs
/*
- * This path is not taken when PAGE_TABLE_ISOLATION is disabled so it
+ * This path is only taken when PAGE_TABLE_ISOLATION is disabled so it
* is not required to switch CR3.
*/
movq %rsp, PER_CPU_VAR(rsp_scratch)
pushq %rcx /* pt_regs->ip */
GLOBAL(entry_SYSCALL_64_after_hwframe)
pushq %rax /* pt_regs->orig_ax */
- pushq %rdi /* pt_regs->di */
- pushq %rsi /* pt_regs->si */
- pushq %rdx /* pt_regs->dx */
- pushq %rcx /* pt_regs->cx */
- pushq $-ENOSYS /* pt_regs->ax */
- pushq %r8 /* pt_regs->r8 */
- pushq %r9 /* pt_regs->r9 */
- pushq %r10 /* pt_regs->r10 */
- pushq %r11 /* pt_regs->r11 */
- pushq %rbx /* pt_regs->rbx */
- pushq %rbp /* pt_regs->rbp */
- pushq %r12 /* pt_regs->r12 */
- pushq %r13 /* pt_regs->r13 */
- pushq %r14 /* pt_regs->r14 */
- pushq %r15 /* pt_regs->r15 */
- UNWIND_HINT_REGS
+
+ PUSH_AND_CLEAR_REGS rax=$-ENOSYS
TRACE_IRQS_OFF
syscall_return_via_sysret:
/* rcx and r11 are already restored (see code above) */
UNWIND_HINT_EMPTY
- POP_EXTRA_REGS
- popq %rsi /* skip r11 */
- popq %r10
- popq %r9
- popq %r8
- popq %rax
- popq %rsi /* skip rcx */
- popq %rdx
- popq %rsi
+ POP_REGS pop_rdi=0 skip_r11rcx=1
/*
* Now all regs are restored except RSP and RDI.
call switch_to_thread_stack
1:
- ALLOC_PT_GPREGS_ON_STACK
- SAVE_C_REGS
- SAVE_EXTRA_REGS
+ PUSH_AND_CLEAR_REGS
ENCODE_FRAME_POINTER
testb $3, CS(%rsp)
ud2
1:
#endif
- POP_EXTRA_REGS
- popq %r11
- popq %r10
- popq %r9
- popq %r8
- popq %rax
- popq %rcx
- popq %rdx
- popq %rsi
+ POP_REGS pop_rdi=0
/*
* The stack is now user RDI, orig_ax, RIP, CS, EFLAGS, RSP, SS.
ud2
1:
#endif
- POP_EXTRA_REGS
- POP_C_REGS
+ POP_REGS
addq $8, %rsp /* skip regs->orig_ax */
/*
* ARCH_HAS_MEMBARRIER_SYNC_CORE rely on IRET core serialization
pushq $-1 /* ORIG_RAX: no syscall to restart */
.endif
- ALLOC_PT_GPREGS_ON_STACK
+ /* Save all registers in pt_regs */
+ PUSH_AND_CLEAR_REGS
+ ENCODE_FRAME_POINTER
.if \paranoid < 2
testb $3, CS(%rsp) /* If coming from userspace, switch stacks */
addq $0x30, %rsp
UNWIND_HINT_IRET_REGS
pushq $-1 /* orig_ax = -1 => not a system call */
- ALLOC_PT_GPREGS_ON_STACK
- SAVE_C_REGS
- SAVE_EXTRA_REGS
+ PUSH_AND_CLEAR_REGS
ENCODE_FRAME_POINTER
jmp error_exit
END(xen_failsafe_callback)
#endif
/*
- * Save all registers in pt_regs, and switch gs if needed.
+ * Switch gs if needed.
* Use slow, but surefire "are we in kernel?" check.
* Return: ebx=0: need swapgs on exit, ebx=1: otherwise
*/
ENTRY(paranoid_entry)
UNWIND_HINT_FUNC
cld
- SAVE_C_REGS 8
- SAVE_EXTRA_REGS 8
- ENCODE_FRAME_POINTER 8
movl $1, %ebx
movl $MSR_GS_BASE, %ecx
rdmsr
jmp .Lparanoid_exit_restore
.Lparanoid_exit_no_swapgs:
TRACE_IRQS_IRETQ_DEBUG
+ RESTORE_CR3 scratch_reg=%rbx save_reg=%r14
.Lparanoid_exit_restore:
jmp restore_regs_and_return_to_kernel
END(paranoid_exit)
/*
- * Save all registers in pt_regs, and switch gs if needed.
+ * Switch gs if needed.
* Return: EBX=0: came from user mode; EBX=1: otherwise
*/
ENTRY(error_entry)
- UNWIND_HINT_FUNC
+ UNWIND_HINT_REGS offset=8
cld
- SAVE_C_REGS 8
- SAVE_EXTRA_REGS 8
- ENCODE_FRAME_POINTER 8
- xorl %ebx, %ebx
testb $3, CS+8(%rsp)
jz .Lerror_kernelspace
pushq 1*8(%rdx) /* pt_regs->rip */
UNWIND_HINT_IRET_REGS
pushq $-1 /* pt_regs->orig_ax */
- pushq %rdi /* pt_regs->di */
- pushq %rsi /* pt_regs->si */
- pushq (%rdx) /* pt_regs->dx */
- pushq %rcx /* pt_regs->cx */
- pushq %rax /* pt_regs->ax */
- pushq %r8 /* pt_regs->r8 */
- pushq %r9 /* pt_regs->r9 */
- pushq %r10 /* pt_regs->r10 */
- pushq %r11 /* pt_regs->r11 */
- pushq %rbx /* pt_regs->rbx */
- pushq %rbp /* pt_regs->rbp */
- pushq %r12 /* pt_regs->r12 */
- pushq %r13 /* pt_regs->r13 */
- pushq %r14 /* pt_regs->r14 */
- pushq %r15 /* pt_regs->r15 */
- UNWIND_HINT_REGS
+ PUSH_AND_CLEAR_REGS rdx=(%rdx)
ENCODE_FRAME_POINTER
/*
* frame to point back to repeat_nmi.
*/
pushq $-1 /* ORIG_RAX: no syscall to restart */
- ALLOC_PT_GPREGS_ON_STACK
+ PUSH_AND_CLEAR_REGS
+ ENCODE_FRAME_POINTER
/*
* Use paranoid_entry to handle SWAPGS, but no need to use paranoid_exit
nmi_swapgs:
SWAPGS_UNSAFE_STACK
nmi_restore:
- POP_EXTRA_REGS
- POP_C_REGS
+ POP_REGS
/*
* Skip orig_ax and the "outermost" frame to point RSP at the "iret"
pushq %rcx /* pt_regs->cx */
pushq $-ENOSYS /* pt_regs->ax */
pushq $0 /* pt_regs->r8 = 0 */
+ xorq %r8, %r8 /* nospec r8 */
pushq $0 /* pt_regs->r9 = 0 */
+ xorq %r9, %r9 /* nospec r9 */
pushq $0 /* pt_regs->r10 = 0 */
+ xorq %r10, %r10 /* nospec r10 */
pushq $0 /* pt_regs->r11 = 0 */
+ xorq %r11, %r11 /* nospec r11 */
pushq %rbx /* pt_regs->rbx */
+ xorl %ebx, %ebx /* nospec rbx */
pushq %rbp /* pt_regs->rbp (will be overwritten) */
+ xorl %ebp, %ebp /* nospec rbp */
pushq $0 /* pt_regs->r12 = 0 */
+ xorq %r12, %r12 /* nospec r12 */
pushq $0 /* pt_regs->r13 = 0 */
+ xorq %r13, %r13 /* nospec r13 */
pushq $0 /* pt_regs->r14 = 0 */
+ xorq %r14, %r14 /* nospec r14 */
pushq $0 /* pt_regs->r15 = 0 */
+ xorq %r15, %r15 /* nospec r15 */
cld
/*
pushq %rbp /* pt_regs->cx (stashed in bp) */
pushq $-ENOSYS /* pt_regs->ax */
pushq $0 /* pt_regs->r8 = 0 */
+ xorq %r8, %r8 /* nospec r8 */
pushq $0 /* pt_regs->r9 = 0 */
+ xorq %r9, %r9 /* nospec r9 */
pushq $0 /* pt_regs->r10 = 0 */
+ xorq %r10, %r10 /* nospec r10 */
pushq $0 /* pt_regs->r11 = 0 */
+ xorq %r11, %r11 /* nospec r11 */
pushq %rbx /* pt_regs->rbx */
+ xorl %ebx, %ebx /* nospec rbx */
pushq %rbp /* pt_regs->rbp (will be overwritten) */
+ xorl %ebp, %ebp /* nospec rbp */
pushq $0 /* pt_regs->r12 = 0 */
+ xorq %r12, %r12 /* nospec r12 */
pushq $0 /* pt_regs->r13 = 0 */
+ xorq %r13, %r13 /* nospec r13 */
pushq $0 /* pt_regs->r14 = 0 */
+ xorq %r14, %r14 /* nospec r14 */
pushq $0 /* pt_regs->r15 = 0 */
+ xorq %r15, %r15 /* nospec r15 */
/*
* User mode is traced as though IRQs are on, and SYSENTER
pushq %rcx /* pt_regs->cx */
pushq $-ENOSYS /* pt_regs->ax */
pushq $0 /* pt_regs->r8 = 0 */
+ xorq %r8, %r8 /* nospec r8 */
pushq $0 /* pt_regs->r9 = 0 */
+ xorq %r9, %r9 /* nospec r9 */
pushq $0 /* pt_regs->r10 = 0 */
+ xorq %r10, %r10 /* nospec r10 */
pushq $0 /* pt_regs->r11 = 0 */
+ xorq %r11, %r11 /* nospec r11 */
pushq %rbx /* pt_regs->rbx */
+ xorl %ebx, %ebx /* nospec rbx */
pushq %rbp /* pt_regs->rbp */
+ xorl %ebp, %ebp /* nospec rbp */
pushq %r12 /* pt_regs->r12 */
+ xorq %r12, %r12 /* nospec r12 */
pushq %r13 /* pt_regs->r13 */
+ xorq %r13, %r13 /* nospec r13 */
pushq %r14 /* pt_regs->r14 */
+ xorq %r14, %r14 /* nospec r14 */
pushq %r15 /* pt_regs->r15 */
+ xorq %r15, %r15 /* nospec r15 */
cld
/*
break;
case INTEL_FAM6_SANDYBRIDGE_X:
- switch (cpu_data(cpu).x86_mask) {
+ switch (cpu_data(cpu).x86_stepping) {
case 6: rev = 0x618; break;
case 7: rev = 0x70c; break;
}
* on PMU interrupt
*/
if (boot_cpu_data.x86_model == 28
- && boot_cpu_data.x86_mask < 10) {
+ && boot_cpu_data.x86_stepping < 10) {
pr_cont("LBR disabled due to erratum");
return;
}
static __init void p6_pmu_rdpmc_quirk(void)
{
- if (boot_cpu_data.x86_mask < 9) {
+ if (boot_cpu_data.x86_stepping < 9) {
/*
* PPro erratum 26; fixed in stepping 9 and above.
*/
if (boot_cpu_data.x86 == 0x0F &&
boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
boot_cpu_data.x86_model <= 0x05 &&
- boot_cpu_data.x86_mask < 0x0A)
+ boot_cpu_data.x86_stepping < 0x0A)
return 1;
else if (boot_cpu_has(X86_BUG_AMD_APIC_C1E))
return 1;
asm ("cmp %1,%2; sbb %0,%0;"
:"=r" (mask)
- :"r"(size),"r" (index)
+ :"g"(size),"r" (index)
:"cc");
return mask;
}
#include <linux/stringify.h>
/*
- * Since some emulators terminate on UD2, we cannot use it for WARN.
- * Since various instruction decoders disagree on the length of UD1,
- * we cannot use it either. So use UD0 for WARN.
+ * Despite that some emulators terminate on UD2, we use it for WARN().
*
- * (binutils knows about "ud1" but {en,de}codes it as 2 bytes, whereas
- * our kernel decoder thinks it takes a ModRM byte, which seems consistent
- * with various things like the Intel SDM instruction encoding rules)
+ * Since various instruction decoders/specs disagree on the encoding of
+ * UD0/UD1.
*/
-#define ASM_UD0 ".byte 0x0f, 0xff"
+#define ASM_UD0 ".byte 0x0f, 0xff" /* + ModRM (for Intel) */
#define ASM_UD1 ".byte 0x0f, 0xb9" /* + ModRM */
#define ASM_UD2 ".byte 0x0f, 0x0b"
#define INSN_UD0 0xff0f
#define INSN_UD2 0x0b0f
-#define LEN_UD0 2
+#define LEN_UD2 2
#ifdef CONFIG_GENERIC_BUG
unreachable(); \
} while (0)
-#define __WARN_FLAGS(flags) _BUG_FLAGS(ASM_UD0, BUGFLAG_WARNING|(flags))
+#define __WARN_FLAGS(flags) \
+do { \
+ _BUG_FLAGS(ASM_UD2, BUGFLAG_WARNING|(flags)); \
+ annotate_reachable(); \
+} while (0)
#include <asm-generic/bug.h>
*/
static __always_inline __pure bool _static_cpu_has(u16 bit)
{
- asm_volatile_goto("1: jmp 6f\n"
- "2:\n"
- ".skip -(((5f-4f) - (2b-1b)) > 0) * "
- "((5f-4f) - (2b-1b)),0x90\n"
- "3:\n"
- ".section .altinstructions,\"a\"\n"
- " .long 1b - .\n" /* src offset */
- " .long 4f - .\n" /* repl offset */
- " .word %P1\n" /* always replace */
- " .byte 3b - 1b\n" /* src len */
- " .byte 5f - 4f\n" /* repl len */
- " .byte 3b - 2b\n" /* pad len */
- ".previous\n"
- ".section .altinstr_replacement,\"ax\"\n"
- "4: jmp %l[t_no]\n"
- "5:\n"
- ".previous\n"
- ".section .altinstructions,\"a\"\n"
- " .long 1b - .\n" /* src offset */
- " .long 0\n" /* no replacement */
- " .word %P0\n" /* feature bit */
- " .byte 3b - 1b\n" /* src len */
- " .byte 0\n" /* repl len */
- " .byte 0\n" /* pad len */
- ".previous\n"
- ".section .altinstr_aux,\"ax\"\n"
- "6:\n"
- " testb %[bitnum],%[cap_byte]\n"
- " jnz %l[t_yes]\n"
- " jmp %l[t_no]\n"
- ".previous\n"
- : : "i" (bit), "i" (X86_FEATURE_ALWAYS),
- [bitnum] "i" (1 << (bit & 7)),
- [cap_byte] "m" (((const char *)boot_cpu_data.x86_capability)[bit >> 3])
- : : t_yes, t_no);
- t_yes:
- return true;
- t_no:
- return false;
+ asm_volatile_goto("1: jmp 6f\n"
+ "2:\n"
+ ".skip -(((5f-4f) - (2b-1b)) > 0) * "
+ "((5f-4f) - (2b-1b)),0x90\n"
+ "3:\n"
+ ".section .altinstructions,\"a\"\n"
+ " .long 1b - .\n" /* src offset */
+ " .long 4f - .\n" /* repl offset */
+ " .word %P[always]\n" /* always replace */
+ " .byte 3b - 1b\n" /* src len */
+ " .byte 5f - 4f\n" /* repl len */
+ " .byte 3b - 2b\n" /* pad len */
+ ".previous\n"
+ ".section .altinstr_replacement,\"ax\"\n"
+ "4: jmp %l[t_no]\n"
+ "5:\n"
+ ".previous\n"
+ ".section .altinstructions,\"a\"\n"
+ " .long 1b - .\n" /* src offset */
+ " .long 0\n" /* no replacement */
+ " .word %P[feature]\n" /* feature bit */
+ " .byte 3b - 1b\n" /* src len */
+ " .byte 0\n" /* repl len */
+ " .byte 0\n" /* pad len */
+ ".previous\n"
+ ".section .altinstr_aux,\"ax\"\n"
+ "6:\n"
+ " testb %[bitnum],%[cap_byte]\n"
+ " jnz %l[t_yes]\n"
+ " jmp %l[t_no]\n"
+ ".previous\n"
+ : : [feature] "i" (bit),
+ [always] "i" (X86_FEATURE_ALWAYS),
+ [bitnum] "i" (1 << (bit & 7)),
+ [cap_byte] "m" (((const char *)boot_cpu_data.x86_capability)[bit >> 3])
+ : : t_yes, t_no);
+t_yes:
+ return true;
+t_no:
+ return false;
}
#define static_cpu_has(bit) \
#include <asm/alternative.h>
#include <asm/alternative-asm.h>
#include <asm/cpufeatures.h>
+#include <asm/msr-index.h>
#ifdef __ASSEMBLY__
static inline void indirect_branch_prediction_barrier(void)
{
- alternative_input("",
- "call __ibp_barrier",
- X86_FEATURE_USE_IBPB,
- ASM_NO_INPUT_CLOBBER("eax", "ecx", "edx", "memory"));
+ asm volatile(ALTERNATIVE("",
+ "movl %[msr], %%ecx\n\t"
+ "movl %[val], %%eax\n\t"
+ "movl $0, %%edx\n\t"
+ "wrmsr",
+ X86_FEATURE_USE_IBPB)
+ : : [msr] "i" (MSR_IA32_PRED_CMD),
+ [val] "i" (PRED_CMD_IBPB)
+ : "eax", "ecx", "edx", "memory");
}
#endif /* __ASSEMBLY__ */
+
+/*
+ * Below is used in the eBPF JIT compiler and emits the byte sequence
+ * for the following assembly:
+ *
+ * With retpolines configured:
+ *
+ * callq do_rop
+ * spec_trap:
+ * pause
+ * lfence
+ * jmp spec_trap
+ * do_rop:
+ * mov %rax,(%rsp)
+ * retq
+ *
+ * Without retpolines configured:
+ *
+ * jmp *%rax
+ */
+#ifdef CONFIG_RETPOLINE
+# define RETPOLINE_RAX_BPF_JIT_SIZE 17
+# define RETPOLINE_RAX_BPF_JIT() \
+ EMIT1_off32(0xE8, 7); /* callq do_rop */ \
+ /* spec_trap: */ \
+ EMIT2(0xF3, 0x90); /* pause */ \
+ EMIT3(0x0F, 0xAE, 0xE8); /* lfence */ \
+ EMIT2(0xEB, 0xF9); /* jmp spec_trap */ \
+ /* do_rop: */ \
+ EMIT4(0x48, 0x89, 0x04, 0x24); /* mov %rax,(%rsp) */ \
+ EMIT1(0xC3); /* retq */
+#else
+# define RETPOLINE_RAX_BPF_JIT_SIZE 2
+# define RETPOLINE_RAX_BPF_JIT() \
+ EMIT2(0xFF, 0xE0); /* jmp *%rax */
+#endif
+
#endif /* _ASM_X86_NOSPEC_BRANCH_H_ */
void copy_page(void *to, void *from);
-#ifdef CONFIG_X86_MCE
-#define arch_unmap_kpfn arch_unmap_kpfn
-#endif
-
#endif /* !__ASSEMBLY__ */
#ifdef CONFIG_X86_VSYSCALL_EMULATION
{
PVOP_VCALL0(pv_mmu_ops.flush_tlb_kernel);
}
-static inline void __flush_tlb_single(unsigned long addr)
+static inline void __flush_tlb_one_user(unsigned long addr)
{
- PVOP_VCALL1(pv_mmu_ops.flush_tlb_single, addr);
+ PVOP_VCALL1(pv_mmu_ops.flush_tlb_one_user, addr);
}
static inline void flush_tlb_others(const struct cpumask *cpumask,
/* TLB operations */
void (*flush_tlb_user)(void);
void (*flush_tlb_kernel)(void);
- void (*flush_tlb_single)(unsigned long addr);
+ void (*flush_tlb_one_user)(unsigned long addr);
void (*flush_tlb_others)(const struct cpumask *cpus,
const struct flush_tlb_info *info);
#define kpte_clear_flush(ptep, vaddr) \
do { \
pte_clear(&init_mm, (vaddr), (ptep)); \
- __flush_tlb_one((vaddr)); \
+ __flush_tlb_one_kernel((vaddr)); \
} while (0)
#endif /* !__ASSEMBLY__ */
__u8 x86; /* CPU family */
__u8 x86_vendor; /* CPU vendor */
__u8 x86_model;
- __u8 x86_mask;
+ __u8 x86_stepping;
#ifdef CONFIG_X86_64
/* Number of 4K pages in DTLB/ITLB combined(in pages): */
int x86_tlbsize;
char x86_vendor_id[16];
char x86_model_id[64];
/* in KB - valid for CPUS which support this call: */
- int x86_cache_size;
+ unsigned int x86_cache_size;
int x86_cache_alignment; /* In bytes */
/* Cache QoS architectural values: */
int x86_cache_max_rmid; /* max index */
void stop_this_cpu(void *dummy);
void df_debug(struct pt_regs *regs, long error_code);
-
-void __ibp_barrier(void);
-
#endif /* _ASM_X86_PROCESSOR_H */
void cpu_disable_common(void);
void native_smp_prepare_boot_cpu(void);
void native_smp_prepare_cpus(unsigned int max_cpus);
+void calculate_max_logical_packages(void);
void native_smp_cpus_done(unsigned int max_cpus);
void common_cpu_up(unsigned int cpunum, struct task_struct *tidle);
int native_cpu_up(unsigned int cpunum, struct task_struct *tidle);
#else
#define __flush_tlb() __native_flush_tlb()
#define __flush_tlb_global() __native_flush_tlb_global()
-#define __flush_tlb_single(addr) __native_flush_tlb_single(addr)
+#define __flush_tlb_one_user(addr) __native_flush_tlb_one_user(addr)
#endif
static inline bool tlb_defer_switch_to_init_mm(void)
/*
* flush one page in the user mapping
*/
-static inline void __native_flush_tlb_single(unsigned long addr)
+static inline void __native_flush_tlb_one_user(unsigned long addr)
{
u32 loaded_mm_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid);
/*
* flush one page in the kernel mapping
*/
-static inline void __flush_tlb_one(unsigned long addr)
+static inline void __flush_tlb_one_kernel(unsigned long addr)
{
count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ONE);
- __flush_tlb_single(addr);
+
+ /*
+ * If PTI is off, then __flush_tlb_one_user() is just INVLPG or its
+ * paravirt equivalent. Even with PCID, this is sufficient: we only
+ * use PCID if we also use global PTEs for the kernel mapping, and
+ * INVLPG flushes global translations across all address spaces.
+ *
+ * If PTI is on, then the kernel is mapped with non-global PTEs, and
+ * __flush_tlb_one_user() will flush the given address for the current
+ * kernel address space and for its usermode counterpart, but it does
+ * not flush it for other address spaces.
+ */
+ __flush_tlb_one_user(addr);
if (!static_cpu_has(X86_FEATURE_PTI))
return;
/*
- * __flush_tlb_single() will have cleared the TLB entry for this ASID,
- * but since kernel space is replicated across all, we must also
- * invalidate all others.
+ * See above. We need to propagate the flush to all other address
+ * spaces. In principle, we only need to propagate it to kernelmode
+ * address spaces, but the extra bookkeeping we would need is not
+ * worth it.
*/
invalidate_other_asid();
}
if (boot_cpu_data.x86 == 0x10 &&
boot_cpu_data.x86_model >= 0x8 &&
(boot_cpu_data.x86_model > 0x9 ||
- boot_cpu_data.x86_mask >= 0x1))
+ boot_cpu_data.x86_stepping >= 0x1))
amd_northbridges.flags |= AMD_NB_L3_INDEX_DISABLE;
if (boot_cpu_data.x86 == 0x15)
static u32 hsx_deadline_rev(void)
{
- switch (boot_cpu_data.x86_mask) {
+ switch (boot_cpu_data.x86_stepping) {
case 0x02: return 0x3a; /* EP */
case 0x04: return 0x0f; /* EX */
}
static u32 bdx_deadline_rev(void)
{
- switch (boot_cpu_data.x86_mask) {
+ switch (boot_cpu_data.x86_stepping) {
case 0x02: return 0x00000011;
case 0x03: return 0x0700000e;
case 0x04: return 0x0f00000c;
static u32 skx_deadline_rev(void)
{
- switch (boot_cpu_data.x86_mask) {
+ switch (boot_cpu_data.x86_stepping) {
case 0x03: return 0x01000136;
case 0x04: return 0x02000014;
}
uv_gre_table = gre;
for (; gre->type != UV_GAM_RANGE_TYPE_UNUSED; gre++) {
+ unsigned long size = ((unsigned long)(gre->limit - lgre)
+ << UV_GAM_RANGE_SHFT);
+ int order = 0;
+ char suffix[] = " KMGTPE";
+
+ while (size > 9999 && order < sizeof(suffix)) {
+ size /= 1024;
+ order++;
+ }
+
if (!index) {
pr_info("UV: GAM Range Table...\n");
pr_info("UV: # %20s %14s %5s %4s %5s %3s %2s\n", "Range", "", "Size", "Type", "NASID", "SID", "PN");
}
- pr_info("UV: %2d: 0x%014lx-0x%014lx %5luG %3d %04x %02x %02x\n",
+ pr_info("UV: %2d: 0x%014lx-0x%014lx %5lu%c %3d %04x %02x %02x\n",
index++,
(unsigned long)lgre << UV_GAM_RANGE_SHFT,
(unsigned long)gre->limit << UV_GAM_RANGE_SHFT,
- ((unsigned long)(gre->limit - lgre)) >>
- (30 - UV_GAM_RANGE_SHFT), /* 64M -> 1G */
+ size, suffix[order],
gre->type, gre->nasid, gre->sockid, gre->pnode);
lgre = gre->limit;
OFFSET(CPUINFO_x86, cpuinfo_x86, x86);
OFFSET(CPUINFO_x86_vendor, cpuinfo_x86, x86_vendor);
OFFSET(CPUINFO_x86_model, cpuinfo_x86, x86_model);
- OFFSET(CPUINFO_x86_mask, cpuinfo_x86, x86_mask);
+ OFFSET(CPUINFO_x86_stepping, cpuinfo_x86, x86_stepping);
OFFSET(CPUINFO_cpuid_level, cpuinfo_x86, cpuid_level);
OFFSET(CPUINFO_x86_capability, cpuinfo_x86, x86_capability);
OFFSET(CPUINFO_x86_vendor_id, cpuinfo_x86, x86_vendor_id);
return;
}
- if (c->x86_model == 6 && c->x86_mask == 1) {
+ if (c->x86_model == 6 && c->x86_stepping == 1) {
const int K6_BUG_LOOP = 1000000;
int n;
void (*f_vide)(void);
/* K6 with old style WHCR */
if (c->x86_model < 8 ||
- (c->x86_model == 8 && c->x86_mask < 8)) {
+ (c->x86_model == 8 && c->x86_stepping < 8)) {
/* We can only write allocate on the low 508Mb */
if (mbytes > 508)
mbytes = 508;
return;
}
- if ((c->x86_model == 8 && c->x86_mask > 7) ||
+ if ((c->x86_model == 8 && c->x86_stepping > 7) ||
c->x86_model == 9 || c->x86_model == 13) {
/* The more serious chips .. */
* are more robust with CLK_CTL set to 200xxxxx instead of 600xxxxx
* As per AMD technical note 27212 0.2
*/
- if ((c->x86_model == 8 && c->x86_mask >= 1) || (c->x86_model > 8)) {
+ if ((c->x86_model == 8 && c->x86_stepping >= 1) || (c->x86_model > 8)) {
rdmsr(MSR_K7_CLK_CTL, l, h);
if ((l & 0xfff00000) != 0x20000000) {
pr_info("CPU: CLK_CTL MSR was %x. Reprogramming to %x\n",
* but they are not certified as MP capable.
*/
/* Athlon 660/661 is valid. */
- if ((c->x86_model == 6) && ((c->x86_mask == 0) ||
- (c->x86_mask == 1)))
+ if ((c->x86_model == 6) && ((c->x86_stepping == 0) ||
+ (c->x86_stepping == 1)))
return;
/* Duron 670 is valid */
- if ((c->x86_model == 7) && (c->x86_mask == 0))
+ if ((c->x86_model == 7) && (c->x86_stepping == 0))
return;
/*
* See http://www.heise.de/newsticker/data/jow-18.10.01-000 for
* more.
*/
- if (((c->x86_model == 6) && (c->x86_mask >= 2)) ||
- ((c->x86_model == 7) && (c->x86_mask >= 1)) ||
+ if (((c->x86_model == 6) && (c->x86_stepping >= 2)) ||
+ ((c->x86_model == 7) && (c->x86_stepping >= 1)) ||
(c->x86_model > 7))
if (cpu_has(c, X86_FEATURE_MP))
return;
/* Set MTRR capability flag if appropriate */
if (c->x86 == 5)
if (c->x86_model == 13 || c->x86_model == 9 ||
- (c->x86_model == 8 && c->x86_mask >= 8))
+ (c->x86_model == 8 && c->x86_stepping >= 8))
set_cpu_cap(c, X86_FEATURE_K6_MTRR);
#endif
#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_PCI)
* Fix erratum 1076: CPB feature bit not being set in CPUID. It affects
* all up to and including B1.
*/
- if (c->x86_model <= 1 && c->x86_mask <= 1)
+ if (c->x86_model <= 1 && c->x86_stepping <= 1)
set_cpu_cap(c, X86_FEATURE_CPB);
}
/* AMD errata T13 (order #21922) */
if ((c->x86 == 6)) {
/* Duron Rev A0 */
- if (c->x86_model == 3 && c->x86_mask == 0)
+ if (c->x86_model == 3 && c->x86_stepping == 0)
size = 64;
/* Tbird rev A1/A2 */
if (c->x86_model == 4 &&
- (c->x86_mask == 0 || c->x86_mask == 1))
+ (c->x86_stepping == 0 || c->x86_stepping == 1))
size = 256;
}
return size;
}
/* OSVW unavailable or ID unknown, match family-model-stepping range */
- ms = (cpu->x86_model << 4) | cpu->x86_mask;
+ ms = (cpu->x86_model << 4) | cpu->x86_stepping;
while ((range = *erratum++))
if ((cpu->x86 == AMD_MODEL_RANGE_FAMILY(range)) &&
(ms >= AMD_MODEL_RANGE_START(range)) &&
if (cmdline_find_option_bool(boot_command_line, "nospectre_v2"))
return SPECTRE_V2_CMD_NONE;
else {
- ret = cmdline_find_option(boot_command_line, "spectre_v2", arg,
- sizeof(arg));
+ ret = cmdline_find_option(boot_command_line, "spectre_v2", arg, sizeof(arg));
if (ret < 0)
return SPECTRE_V2_CMD_AUTO;
}
if (i >= ARRAY_SIZE(mitigation_options)) {
- pr_err("unknown option (%s). Switching to AUTO select\n",
- mitigation_options[i].option);
+ pr_err("unknown option (%s). Switching to AUTO select\n", arg);
return SPECTRE_V2_CMD_AUTO;
}
}
cmd == SPECTRE_V2_CMD_RETPOLINE_AMD ||
cmd == SPECTRE_V2_CMD_RETPOLINE_GENERIC) &&
!IS_ENABLED(CONFIG_RETPOLINE)) {
- pr_err("%s selected but not compiled in. Switching to AUTO select\n",
- mitigation_options[i].option);
+ pr_err("%s selected but not compiled in. Switching to AUTO select\n", mitigation_options[i].option);
return SPECTRE_V2_CMD_AUTO;
}
goto retpoline_auto;
break;
}
- pr_err("kernel not compiled with retpoline; no mitigation available!");
+ pr_err("Spectre mitigation: kernel not compiled with retpoline; no mitigation available!");
return;
retpoline_auto:
if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
retpoline_amd:
if (!boot_cpu_has(X86_FEATURE_LFENCE_RDTSC)) {
- pr_err("LFENCE not serializing. Switching to generic retpoline\n");
+ pr_err("Spectre mitigation: LFENCE not serializing, switching to generic retpoline\n");
goto retpoline_generic;
}
mode = retp_compiler() ? SPECTRE_V2_RETPOLINE_AMD :
pr_info("%s\n", spectre_v2_strings[mode]);
/*
- * If neither SMEP or KPTI are available, there is a risk of
+ * If neither SMEP nor PTI are available, there is a risk of
* hitting userspace addresses in the RSB after a context switch
* from a shallow call stack to a deeper one. To prevent this fill
* the entire RSB, even when using IBRS.
if ((!boot_cpu_has(X86_FEATURE_PTI) &&
!boot_cpu_has(X86_FEATURE_SMEP)) || is_skylake_era()) {
setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW);
- pr_info("Filling RSB on context switch\n");
+ pr_info("Spectre v2 mitigation: Filling RSB on context switch\n");
}
/* Initialize Indirect Branch Prediction Barrier if supported */
if (boot_cpu_has(X86_FEATURE_IBPB)) {
setup_force_cpu_cap(X86_FEATURE_USE_IBPB);
- pr_info("Enabling Indirect Branch Prediction Barrier\n");
+ pr_info("Spectre v2 mitigation: Enabling Indirect Branch Prediction Barrier\n");
}
}
#undef pr_fmt
#ifdef CONFIG_SYSFS
-ssize_t cpu_show_meltdown(struct device *dev,
- struct device_attribute *attr, char *buf)
+ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, char *buf)
{
if (!boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN))
return sprintf(buf, "Not affected\n");
return sprintf(buf, "Vulnerable\n");
}
-ssize_t cpu_show_spectre_v1(struct device *dev,
- struct device_attribute *attr, char *buf)
+ssize_t cpu_show_spectre_v1(struct device *dev, struct device_attribute *attr, char *buf)
{
if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V1))
return sprintf(buf, "Not affected\n");
return sprintf(buf, "Mitigation: __user pointer sanitization\n");
}
-ssize_t cpu_show_spectre_v2(struct device *dev,
- struct device_attribute *attr, char *buf)
+ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, char *buf)
{
if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
return sprintf(buf, "Not affected\n");
spectre_v2_module_string());
}
#endif
-
-void __ibp_barrier(void)
-{
- __wrmsr(MSR_IA32_PRED_CMD, PRED_CMD_IBPB, 0);
-}
-EXPORT_SYMBOL_GPL(__ibp_barrier);
clear_cpu_cap(c, X86_FEATURE_TSC);
break;
case 8:
- switch (c->x86_mask) {
+ switch (c->x86_stepping) {
default:
name = "2";
break;
* - Note, it seems this may only be in engineering samples.
*/
if ((c->x86 == 6) && (c->x86_model == 9) &&
- (c->x86_mask == 1) && (size == 65))
+ (c->x86_stepping == 1) && (size == 65))
size -= 1;
return size;
}
cpuid(0x00000001, &tfms, &misc, &junk, &cap0);
c->x86 = x86_family(tfms);
c->x86_model = x86_model(tfms);
- c->x86_mask = x86_stepping(tfms);
+ c->x86_stepping = x86_stepping(tfms);
if (cap0 & (1<<19)) {
c->x86_clflush_size = ((misc >> 8) & 0xff) * 8;
int i;
c->loops_per_jiffy = loops_per_jiffy;
- c->x86_cache_size = -1;
+ c->x86_cache_size = 0;
c->x86_vendor = X86_VENDOR_UNKNOWN;
- c->x86_model = c->x86_mask = 0; /* So far unknown... */
+ c->x86_model = c->x86_stepping = 0; /* So far unknown... */
c->x86_vendor_id[0] = '\0'; /* Unset */
c->x86_model_id[0] = '\0'; /* Unset */
c->x86_max_cores = 1;
pr_cont(" (family: 0x%x, model: 0x%x", c->x86, c->x86_model);
- if (c->x86_mask || c->cpuid_level >= 0)
- pr_cont(", stepping: 0x%x)\n", c->x86_mask);
+ if (c->x86_stepping || c->cpuid_level >= 0)
+ pr_cont(", stepping: 0x%x)\n", c->x86_stepping);
else
pr_cont(")\n");
}
/* common case step number/rev -- exceptions handled below */
c->x86_model = (dir1 >> 4) + 1;
- c->x86_mask = dir1 & 0xf;
+ c->x86_stepping = dir1 & 0xf;
/* Now cook; the original recipe is by Channing Corn, from Cyrix.
* We do the same thing for each generation: we work out
u32 microcode;
};
static const struct sku_microcode spectre_bad_microcodes[] = {
- { INTEL_FAM6_KABYLAKE_DESKTOP, 0x0B, 0x84 },
- { INTEL_FAM6_KABYLAKE_DESKTOP, 0x0A, 0x84 },
- { INTEL_FAM6_KABYLAKE_DESKTOP, 0x09, 0x84 },
- { INTEL_FAM6_KABYLAKE_MOBILE, 0x0A, 0x84 },
- { INTEL_FAM6_KABYLAKE_MOBILE, 0x09, 0x84 },
+ { INTEL_FAM6_KABYLAKE_DESKTOP, 0x0B, 0x80 },
+ { INTEL_FAM6_KABYLAKE_DESKTOP, 0x0A, 0x80 },
+ { INTEL_FAM6_KABYLAKE_DESKTOP, 0x09, 0x80 },
+ { INTEL_FAM6_KABYLAKE_MOBILE, 0x0A, 0x80 },
+ { INTEL_FAM6_KABYLAKE_MOBILE, 0x09, 0x80 },
{ INTEL_FAM6_SKYLAKE_X, 0x03, 0x0100013e },
{ INTEL_FAM6_SKYLAKE_X, 0x04, 0x0200003c },
- { INTEL_FAM6_SKYLAKE_MOBILE, 0x03, 0xc2 },
{ INTEL_FAM6_SKYLAKE_DESKTOP, 0x03, 0xc2 },
{ INTEL_FAM6_BROADWELL_CORE, 0x04, 0x28 },
{ INTEL_FAM6_BROADWELL_GT3E, 0x01, 0x1b },
{ INTEL_FAM6_HASWELL_X, 0x02, 0x3b },
{ INTEL_FAM6_HASWELL_X, 0x04, 0x10 },
{ INTEL_FAM6_IVYBRIDGE_X, 0x04, 0x42a },
- /* Updated in the 20180108 release; blacklist until we know otherwise */
- { INTEL_FAM6_ATOM_GEMINI_LAKE, 0x01, 0x22 },
/* Observed in the wild */
{ INTEL_FAM6_SANDYBRIDGE_X, 0x06, 0x61b },
{ INTEL_FAM6_SANDYBRIDGE_X, 0x07, 0x712 },
for (i = 0; i < ARRAY_SIZE(spectre_bad_microcodes); i++) {
if (c->x86_model == spectre_bad_microcodes[i].model &&
- c->x86_mask == spectre_bad_microcodes[i].stepping)
+ c->x86_stepping == spectre_bad_microcodes[i].stepping)
return (c->microcode <= spectre_bad_microcodes[i].microcode);
}
return false;
* need the microcode to have already been loaded... so if it is
* not, recommend a BIOS update and disable large pages.
*/
- if (c->x86 == 6 && c->x86_model == 0x1c && c->x86_mask <= 2 &&
+ if (c->x86 == 6 && c->x86_model == 0x1c && c->x86_stepping <= 2 &&
c->microcode < 0x20e) {
pr_warn("Atom PSE erratum detected, BIOS microcode update recommended\n");
clear_cpu_cap(c, X86_FEATURE_PSE);
/* CPUID workaround for 0F33/0F34 CPU */
if (c->x86 == 0xF && c->x86_model == 0x3
- && (c->x86_mask == 0x3 || c->x86_mask == 0x4))
+ && (c->x86_stepping == 0x3 || c->x86_stepping == 0x4))
c->x86_phys_bits = 36;
/*
if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
boot_cpu_data.x86 == 6 &&
boot_cpu_data.x86_model == 1 &&
- boot_cpu_data.x86_mask < 8) {
+ boot_cpu_data.x86_stepping < 8) {
pr_info("Pentium Pro with Errata#50 detected. Taking evasive action.\n");
return 1;
}
* Mask B, Pentium, but not Pentium MMX
*/
if (c->x86 == 5 &&
- c->x86_mask >= 1 && c->x86_mask <= 4 &&
+ c->x86_stepping >= 1 && c->x86_stepping <= 4 &&
c->x86_model <= 3) {
/*
* Remember we have B step Pentia with bugs
* SEP CPUID bug: Pentium Pro reports SEP but doesn't have it until
* model 3 mask 3
*/
- if ((c->x86<<8 | c->x86_model<<4 | c->x86_mask) < 0x633)
+ if ((c->x86<<8 | c->x86_model<<4 | c->x86_stepping) < 0x633)
clear_cpu_cap(c, X86_FEATURE_SEP);
/*
* P4 Xeon erratum 037 workaround.
* Hardware prefetcher may cause stale data to be loaded into the cache.
*/
- if ((c->x86 == 15) && (c->x86_model == 1) && (c->x86_mask == 1)) {
+ if ((c->x86 == 15) && (c->x86_model == 1) && (c->x86_stepping == 1)) {
if (msr_set_bit(MSR_IA32_MISC_ENABLE,
MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE_BIT) > 0) {
pr_info("CPU: C0 stepping P4 Xeon detected.\n");
* Specification Update").
*/
if (boot_cpu_has(X86_FEATURE_APIC) && (c->x86<<8 | c->x86_model<<4) == 0x520 &&
- (c->x86_mask < 0x6 || c->x86_mask == 0xb))
+ (c->x86_stepping < 0x6 || c->x86_stepping == 0xb))
set_cpu_bug(c, X86_BUG_11AP);
case 6:
if (l2 == 128)
p = "Celeron (Mendocino)";
- else if (c->x86_mask == 0 || c->x86_mask == 5)
+ else if (c->x86_stepping == 0 || c->x86_stepping == 5)
p = "Celeron-A";
break;
cache_alloc_hsw_probe();
break;
case INTEL_FAM6_SKYLAKE_X:
- if (boot_cpu_data.x86_mask <= 4)
+ if (boot_cpu_data.x86_stepping <= 4)
set_rdt_options("!cmt,!mbmtotal,!mbmlocal,!l3cat");
}
}
extern struct mca_config mca_cfg;
+#ifndef CONFIG_X86_64
+/*
+ * On 32-bit systems it would be difficult to safely unmap a poison page
+ * from the kernel 1:1 map because there are no non-canonical addresses that
+ * we can use to refer to the address without risking a speculative access.
+ * However, this isn't much of an issue because:
+ * 1) Few unmappable pages are in the 1:1 map. Most are in HIGHMEM which
+ * are only mapped into the kernel as needed
+ * 2) Few people would run a 32-bit kernel on a machine that supports
+ * recoverable errors because they have too much memory to boot 32-bit.
+ */
+static inline void mce_unmap_kpfn(unsigned long pfn) {}
+#define mce_unmap_kpfn mce_unmap_kpfn
+#endif
+
#endif /* __X86_MCE_INTERNAL_H__ */
static void (*quirk_no_way_out)(int bank, struct mce *m, struct pt_regs *regs);
+#ifndef mce_unmap_kpfn
+static void mce_unmap_kpfn(unsigned long pfn);
+#endif
+
/*
* CPU/chipset specific EDAC code can register a notifier call here to print
* MCE errors in a human-readable form.
m->cs, m->ip);
if (m->cs == __KERNEL_CS)
- pr_cont("{%pS}", (void *)m->ip);
+ pr_cont("{%pS}", (void *)(unsigned long)m->ip);
pr_cont("\n");
}
if (mce_usable_address(mce) && (mce->severity == MCE_AO_SEVERITY)) {
pfn = mce->addr >> PAGE_SHIFT;
- memory_failure(pfn, 0);
+ if (!memory_failure(pfn, 0))
+ mce_unmap_kpfn(pfn);
}
return NOTIFY_OK;
ret = memory_failure(m->addr >> PAGE_SHIFT, flags);
if (ret)
pr_err("Memory error not recovered");
+ else
+ mce_unmap_kpfn(m->addr >> PAGE_SHIFT);
return ret;
}
-#if defined(arch_unmap_kpfn) && defined(CONFIG_MEMORY_FAILURE)
-
-void arch_unmap_kpfn(unsigned long pfn)
+#ifndef mce_unmap_kpfn
+static void mce_unmap_kpfn(unsigned long pfn)
{
unsigned long decoy_addr;
* We would like to just call:
* set_memory_np((unsigned long)pfn_to_kaddr(pfn), 1);
* but doing that would radically increase the odds of a
- * speculative access to the posion page because we'd have
+ * speculative access to the poison page because we'd have
* the virtual address of the kernel 1:1 mapping sitting
* around in registers.
* Instead we get tricky. We create a non-canonical address
if (set_memory_np(decoy_addr, 1))
pr_warn("Could not invalidate pfn=0x%lx from 1:1 map\n", pfn);
-
}
#endif
*/
if (c->x86 == 6 &&
c->x86_model == INTEL_FAM6_BROADWELL_X &&
- c->x86_mask == 0x01 &&
+ c->x86_stepping == 0x01 &&
llc_size_per_core > 2621440 &&
c->microcode < 0x0b000021) {
pr_err_once("Erratum BDF90: late loading with revision < 0x0b000021 (0x%x) disabled.\n", c->microcode);
return UCODE_NFOUND;
sprintf(name, "intel-ucode/%02x-%02x-%02x",
- c->x86, c->x86_model, c->x86_mask);
+ c->x86, c->x86_model, c->x86_stepping);
if (request_firmware_direct(&firmware, name, device)) {
pr_debug("data file %s load failed\n", name);
static int __init calc_llc_size_per_core(struct cpuinfo_x86 *c)
{
- u64 llc_size = c->x86_cache_size * 1024;
+ u64 llc_size = c->x86_cache_size * 1024ULL;
do_div(llc_size, c->x86_max_cores);
*/
if (is_cpu(INTEL) && boot_cpu_data.x86 == 6 &&
boot_cpu_data.x86_model == 1 &&
- boot_cpu_data.x86_mask <= 7) {
+ boot_cpu_data.x86_stepping <= 7) {
if (base & ((1 << (22 - PAGE_SHIFT)) - 1)) {
pr_warn("mtrr: base(0x%lx000) is not 4 MiB aligned\n", base);
return -EINVAL;
if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
boot_cpu_data.x86 == 0xF &&
boot_cpu_data.x86_model == 0x3 &&
- (boot_cpu_data.x86_mask == 0x3 ||
- boot_cpu_data.x86_mask == 0x4))
+ (boot_cpu_data.x86_stepping == 0x3 ||
+ boot_cpu_data.x86_stepping == 0x4))
phys_addr = 36;
size_or_mask = SIZE_OR_MASK_BITS(phys_addr);
c->x86_model,
c->x86_model_id[0] ? c->x86_model_id : "unknown");
- if (c->x86_mask || c->cpuid_level >= 0)
- seq_printf(m, "stepping\t: %d\n", c->x86_mask);
+ if (c->x86_stepping || c->cpuid_level >= 0)
+ seq_printf(m, "stepping\t: %d\n", c->x86_stepping);
else
seq_puts(m, "stepping\t: unknown\n");
if (c->microcode)
}
/* Cache size */
- if (c->x86_cache_size >= 0)
- seq_printf(m, "cache size\t: %d KB\n", c->x86_cache_size);
+ if (c->x86_cache_size)
+ seq_printf(m, "cache size\t: %u KB\n", c->x86_cache_size);
show_cpuinfo_core(m, c, cpu);
show_cpuinfo_misc(m, c);
#define X86 new_cpu_data+CPUINFO_x86
#define X86_VENDOR new_cpu_data+CPUINFO_x86_vendor
#define X86_MODEL new_cpu_data+CPUINFO_x86_model
-#define X86_MASK new_cpu_data+CPUINFO_x86_mask
+#define X86_STEPPING new_cpu_data+CPUINFO_x86_stepping
#define X86_HARD_MATH new_cpu_data+CPUINFO_hard_math
#define X86_CPUID new_cpu_data+CPUINFO_cpuid_level
#define X86_CAPABILITY new_cpu_data+CPUINFO_x86_capability
shrb $4,%al
movb %al,X86_MODEL
andb $0x0f,%cl # mask mask revision
- movb %cl,X86_MASK
+ movb %cl,X86_STEPPING
movl %edx,X86_CAPABILITY
.Lis486:
goto overflow;
break;
case R_X86_64_PC32:
+ case R_X86_64_PLT32:
value -= (u64)address;
*(u32 *)location = value;
break;
goto overflow;
break;
case R_X86_64_PC32:
+ case R_X86_64_PLT32:
if (*(u32 *)loc != 0)
goto invalid_relocation;
val -= (u64)loc;
processor.apicver = mpc_default_type > 4 ? 0x10 : 0x01;
processor.cpuflag = CPU_ENABLED;
processor.cpufeature = (boot_cpu_data.x86 << 8) |
- (boot_cpu_data.x86_model << 4) | boot_cpu_data.x86_mask;
+ (boot_cpu_data.x86_model << 4) | boot_cpu_data.x86_stepping;
processor.featureflag = boot_cpu_data.x86_capability[CPUID_1_EDX];
processor.reserved[0] = 0;
processor.reserved[1] = 0;
__native_flush_tlb_global();
}
-static void native_flush_tlb_single(unsigned long addr)
+static void native_flush_tlb_one_user(unsigned long addr)
{
- __native_flush_tlb_single(addr);
+ __native_flush_tlb_one_user(addr);
}
struct static_key paravirt_steal_enabled;
.flush_tlb_user = native_flush_tlb,
.flush_tlb_kernel = native_flush_tlb_global,
- .flush_tlb_single = native_flush_tlb_single,
+ .flush_tlb_one_user = native_flush_tlb_one_user,
.flush_tlb_others = native_flush_tlb_others,
.pgd_alloc = __paravirt_pgd_alloc,
cpu_set_state_online(me);
}
-void __init native_smp_cpus_done(unsigned int max_cpus)
+void __init calculate_max_logical_packages(void)
{
int ncpus;
- pr_debug("Boot done\n");
/*
* Today neither Intel nor AMD support heterogenous systems so
* extrapolate the boot cpu's data to all packages.
ncpus = cpu_data(0).booted_cores * topology_max_smt_threads();
__max_logical_packages = DIV_ROUND_UP(nr_cpu_ids, ncpus);
pr_info("Max logical packages: %u\n", __max_logical_packages);
+}
+
+void __init native_smp_cpus_done(unsigned int max_cpus)
+{
+ pr_debug("Boot done\n");
+
+ calculate_max_logical_packages();
if (x86_has_numa_in_package)
set_sched_topology(x86_numa_in_package_topology);
cpumask_clear(cpu_llc_shared_mask(cpu));
cpumask_clear(topology_sibling_cpumask(cpu));
cpumask_clear(topology_core_cpumask(cpu));
- c->phys_proc_id = 0;
c->cpu_core_id = 0;
cpumask_clear_cpu(cpu, cpu_sibling_setup_mask);
recompute_smt_state();
break;
case BUG_TRAP_TYPE_WARN:
- regs->ip += LEN_UD0;
+ regs->ip += LEN_UD2;
return 1;
}
typedef bool (*slot_level_handler) (struct kvm *kvm, struct kvm_rmap_head *rmap_head);
/* The caller should hold mmu-lock before calling this function. */
-static bool
+static __always_inline bool
slot_handle_level_range(struct kvm *kvm, struct kvm_memory_slot *memslot,
slot_level_handler fn, int start_level, int end_level,
gfn_t start_gfn, gfn_t end_gfn, bool lock_flush_tlb)
return flush;
}
-static bool
+static __always_inline bool
slot_handle_level(struct kvm *kvm, struct kvm_memory_slot *memslot,
slot_level_handler fn, int start_level, int end_level,
bool lock_flush_tlb)
lock_flush_tlb);
}
-static bool
+static __always_inline bool
slot_handle_all_level(struct kvm *kvm, struct kvm_memory_slot *memslot,
slot_level_handler fn, bool lock_flush_tlb)
{
PT_MAX_HUGEPAGE_LEVEL, lock_flush_tlb);
}
-static bool
+static __always_inline bool
slot_handle_large_level(struct kvm *kvm, struct kvm_memory_slot *memslot,
slot_level_handler fn, bool lock_flush_tlb)
{
PT_MAX_HUGEPAGE_LEVEL, lock_flush_tlb);
}
-static bool
+static __always_inline bool
slot_handle_leaf(struct kvm *kvm, struct kvm_memory_slot *memslot,
slot_level_handler fn, bool lock_flush_tlb)
{
(unsigned long)(vmcs12->posted_intr_desc_addr &
(PAGE_SIZE - 1)));
}
- if (!nested_vmx_prepare_msr_bitmap(vcpu, vmcs12))
+ if (nested_vmx_prepare_msr_bitmap(vcpu, vmcs12))
+ vmcs_set_bits(CPU_BASED_VM_EXEC_CONTROL,
+ CPU_BASED_USE_MSR_BITMAPS);
+ else
vmcs_clear_bits(CPU_BASED_VM_EXEC_CONTROL,
CPU_BASED_USE_MSR_BITMAPS);
}
* updated to reflect this when L1 (or its L2s) actually write to
* the MSR.
*/
- bool pred_cmd = msr_write_intercepted_l01(vcpu, MSR_IA32_PRED_CMD);
- bool spec_ctrl = msr_write_intercepted_l01(vcpu, MSR_IA32_SPEC_CTRL);
+ bool pred_cmd = !msr_write_intercepted_l01(vcpu, MSR_IA32_PRED_CMD);
+ bool spec_ctrl = !msr_write_intercepted_l01(vcpu, MSR_IA32_SPEC_CTRL);
/* Nothing to do if the MSR bitmap is not in use. */
if (!cpu_has_vmx_msr_bitmap() ||
{
unsigned int fam, model;
- fam = x86_family(sig);
+ fam = x86_family(sig);
model = (sig >> 4) & 0xf;
asm(
".type just_return_func, @function\n"
+ ".globl just_return_func\n"
"just_return_func:\n"
" ret\n"
".size just_return_func, .-just_return_func\n"
* It's enough to flush this one mapping.
* (PGE mappings get flushed as well)
*/
- __flush_tlb_one(vaddr);
+ __flush_tlb_one_kernel(vaddr);
}
void set_pte_vaddr_p4d(p4d_t *p4d_page, unsigned long vaddr, pte_t new_pte)
register_page_bootmem_info();
/* Register memory areas for /proc/kcore */
- kclist_add(&kcore_vsyscall, (void *)VSYSCALL_ADDR,
- PAGE_SIZE, KCORE_OTHER);
+ if (get_gate_vma(&init_mm))
+ kclist_add(&kcore_vsyscall, (void *)VSYSCALL_ADDR, PAGE_SIZE, KCORE_USER);
mem_init_print_info(NULL);
}
set_pte(pte, pfn_pte(phys >> PAGE_SHIFT, flags));
else
pte_clear(&init_mm, addr, pte);
- __flush_tlb_one(addr);
+ __flush_tlb_one_kernel(addr);
}
return -1;
}
- __flush_tlb_one(f->addr);
+ __flush_tlb_one_kernel(f->addr);
return 0;
}
* It's enough to flush this one mapping.
* (PGE mappings get flushed as well)
*/
- __flush_tlb_one(vaddr);
+ __flush_tlb_one_kernel(vaddr);
}
unsigned long __FIXADDR_TOP = 0xfffff000;
* flush that changes context.tlb_gen from 2 to 3. If they get
* processed on this CPU in reverse order, we'll see
* local_tlb_gen == 1, mm_tlb_gen == 3, and end != TLB_FLUSH_ALL.
- * If we were to use __flush_tlb_single() and set local_tlb_gen to
+ * If we were to use __flush_tlb_one_user() and set local_tlb_gen to
* 3, we'd be break the invariant: we'd update local_tlb_gen above
* 1 without the full flush that's needed for tlb_gen 2.
*
addr = f->start;
while (addr < f->end) {
- __flush_tlb_single(addr);
+ __flush_tlb_one_user(addr);
addr += PAGE_SIZE;
}
if (local)
/* flush range by one by one 'invlpg' */
for (addr = f->start; addr < f->end; addr += PAGE_SIZE)
- __flush_tlb_one(addr);
+ __flush_tlb_one_kernel(addr);
}
void flush_tlb_kernel_range(unsigned long start, unsigned long end)
#include <linux/netdevice.h>
#include <linux/filter.h>
#include <linux/if_vlan.h>
-#include <asm/cacheflush.h>
-#include <asm/set_memory.h>
#include <linux/bpf.h>
+#include <asm/set_memory.h>
+#include <asm/nospec-branch.h>
+
/*
* assembly code in arch/x86/net/bpf_jit.S
*/
static bool is_simm32(s64 value)
{
- return value == (s64) (s32) value;
+ return value == (s64)(s32)value;
+}
+
+static bool is_uimm32(u64 value)
+{
+ return value == (u64)(u32)value;
}
/* mov dst, src */
#define X86_JLE 0x7E
#define X86_JG 0x7F
-static void bpf_flush_icache(void *start, void *end)
-{
- mm_segment_t old_fs = get_fs();
-
- set_fs(KERNEL_DS);
- smp_wmb();
- flush_icache_range((unsigned long)start, (unsigned long)end);
- set_fs(old_fs);
-}
-
#define CHOOSE_LOAD_FUNC(K, func) \
((int)K < 0 ? ((int)K >= SKF_LL_OFF ? func##_negative_offset : func) : func##_positive_offset)
/* emit x64 prologue code for BPF program and check it's size.
* bpf_tail_call helper will skip it while jumping into another program
*/
-static void emit_prologue(u8 **pprog, u32 stack_depth)
+static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf)
{
u8 *prog = *pprog;
int cnt = 0;
/* mov qword ptr [rbp+24],r15 */
EMIT4(0x4C, 0x89, 0x7D, 24);
- /* Clear the tail call counter (tail_call_cnt): for eBPF tail calls
- * we need to reset the counter to 0. It's done in two instructions,
- * resetting rax register to 0 (xor on eax gets 0 extended), and
- * moving it to the counter location.
- */
+ if (!ebpf_from_cbpf) {
+ /* Clear the tail call counter (tail_call_cnt): for eBPF tail
+ * calls we need to reset the counter to 0. It's done in two
+ * instructions, resetting rax register to 0, and moving it
+ * to the counter location.
+ */
- /* xor eax, eax */
- EMIT2(0x31, 0xc0);
- /* mov qword ptr [rbp+32], rax */
- EMIT4(0x48, 0x89, 0x45, 32);
+ /* xor eax, eax */
+ EMIT2(0x31, 0xc0);
+ /* mov qword ptr [rbp+32], rax */
+ EMIT4(0x48, 0x89, 0x45, 32);
+
+ BUILD_BUG_ON(cnt != PROLOGUE_SIZE);
+ }
- BUILD_BUG_ON(cnt != PROLOGUE_SIZE);
*pprog = prog;
}
EMIT2(0x89, 0xD2); /* mov edx, edx */
EMIT3(0x39, 0x56, /* cmp dword ptr [rsi + 16], edx */
offsetof(struct bpf_array, map.max_entries));
-#define OFFSET1 43 /* number of bytes to jump */
+#define OFFSET1 (41 + RETPOLINE_RAX_BPF_JIT_SIZE) /* number of bytes to jump */
EMIT2(X86_JBE, OFFSET1); /* jbe out */
label1 = cnt;
*/
EMIT2_off32(0x8B, 0x85, 36); /* mov eax, dword ptr [rbp + 36] */
EMIT3(0x83, 0xF8, MAX_TAIL_CALL_CNT); /* cmp eax, MAX_TAIL_CALL_CNT */
-#define OFFSET2 32
+#define OFFSET2 (30 + RETPOLINE_RAX_BPF_JIT_SIZE)
EMIT2(X86_JA, OFFSET2); /* ja out */
label2 = cnt;
EMIT3(0x83, 0xC0, 0x01); /* add eax, 1 */
* goto out;
*/
EMIT3(0x48, 0x85, 0xC0); /* test rax,rax */
-#define OFFSET3 10
+#define OFFSET3 (8 + RETPOLINE_RAX_BPF_JIT_SIZE)
EMIT2(X86_JE, OFFSET3); /* je out */
label3 = cnt;
* rdi == ctx (1st arg)
* rax == prog->bpf_func + prologue_size
*/
- EMIT2(0xFF, 0xE0); /* jmp rax */
+ RETPOLINE_RAX_BPF_JIT();
/* out: */
BUILD_BUG_ON(cnt - label1 != OFFSET1);
*pprog = prog;
}
+static void emit_mov_imm32(u8 **pprog, bool sign_propagate,
+ u32 dst_reg, const u32 imm32)
+{
+ u8 *prog = *pprog;
+ u8 b1, b2, b3;
+ int cnt = 0;
+
+ /* optimization: if imm32 is positive, use 'mov %eax, imm32'
+ * (which zero-extends imm32) to save 2 bytes.
+ */
+ if (sign_propagate && (s32)imm32 < 0) {
+ /* 'mov %rax, imm32' sign extends imm32 */
+ b1 = add_1mod(0x48, dst_reg);
+ b2 = 0xC7;
+ b3 = 0xC0;
+ EMIT3_off32(b1, b2, add_1reg(b3, dst_reg), imm32);
+ goto done;
+ }
+
+ /* optimization: if imm32 is zero, use 'xor %eax, %eax'
+ * to save 3 bytes.
+ */
+ if (imm32 == 0) {
+ if (is_ereg(dst_reg))
+ EMIT1(add_2mod(0x40, dst_reg, dst_reg));
+ b2 = 0x31; /* xor */
+ b3 = 0xC0;
+ EMIT2(b2, add_2reg(b3, dst_reg, dst_reg));
+ goto done;
+ }
+
+ /* mov %eax, imm32 */
+ if (is_ereg(dst_reg))
+ EMIT1(add_1mod(0x40, dst_reg));
+ EMIT1_off32(add_1reg(0xB8, dst_reg), imm32);
+done:
+ *pprog = prog;
+}
+
+static void emit_mov_imm64(u8 **pprog, u32 dst_reg,
+ const u32 imm32_hi, const u32 imm32_lo)
+{
+ u8 *prog = *pprog;
+ int cnt = 0;
+
+ if (is_uimm32(((u64)imm32_hi << 32) | (u32)imm32_lo)) {
+ /* For emitting plain u32, where sign bit must not be
+ * propagated LLVM tends to load imm64 over mov32
+ * directly, so save couple of bytes by just doing
+ * 'mov %eax, imm32' instead.
+ */
+ emit_mov_imm32(&prog, false, dst_reg, imm32_lo);
+ } else {
+ /* movabsq %rax, imm64 */
+ EMIT2(add_1mod(0x48, dst_reg), add_1reg(0xB8, dst_reg));
+ EMIT(imm32_lo, 4);
+ EMIT(imm32_hi, 4);
+ }
+
+ *pprog = prog;
+}
+
+static void emit_mov_reg(u8 **pprog, bool is64, u32 dst_reg, u32 src_reg)
+{
+ u8 *prog = *pprog;
+ int cnt = 0;
+
+ if (is64) {
+ /* mov dst, src */
+ EMIT_mov(dst_reg, src_reg);
+ } else {
+ /* mov32 dst, src */
+ if (is_ereg(dst_reg) || is_ereg(src_reg))
+ EMIT1(add_2mod(0x40, dst_reg, src_reg));
+ EMIT2(0x89, add_2reg(0xC0, dst_reg, src_reg));
+ }
+
+ *pprog = prog;
+}
+
static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
int oldproglen, struct jit_context *ctx)
{
int proglen = 0;
u8 *prog = temp;
- emit_prologue(&prog, bpf_prog->aux->stack_depth);
+ emit_prologue(&prog, bpf_prog->aux->stack_depth,
+ bpf_prog_was_classic(bpf_prog));
if (seen_ld_abs)
emit_load_skb_data_hlen(&prog);
const s32 imm32 = insn->imm;
u32 dst_reg = insn->dst_reg;
u32 src_reg = insn->src_reg;
- u8 b1 = 0, b2 = 0, b3 = 0;
+ u8 b2 = 0, b3 = 0;
s64 jmp_offset;
u8 jmp_cond;
bool reload_skb_data;
EMIT2(b2, add_2reg(0xC0, dst_reg, src_reg));
break;
- /* mov dst, src */
case BPF_ALU64 | BPF_MOV | BPF_X:
- EMIT_mov(dst_reg, src_reg);
- break;
-
- /* mov32 dst, src */
case BPF_ALU | BPF_MOV | BPF_X:
- if (is_ereg(dst_reg) || is_ereg(src_reg))
- EMIT1(add_2mod(0x40, dst_reg, src_reg));
- EMIT2(0x89, add_2reg(0xC0, dst_reg, src_reg));
+ emit_mov_reg(&prog,
+ BPF_CLASS(insn->code) == BPF_ALU64,
+ dst_reg, src_reg);
break;
/* neg dst */
break;
case BPF_ALU64 | BPF_MOV | BPF_K:
- /* optimization: if imm32 is positive,
- * use 'mov eax, imm32' (which zero-extends imm32)
- * to save 2 bytes
- */
- if (imm32 < 0) {
- /* 'mov rax, imm32' sign extends imm32 */
- b1 = add_1mod(0x48, dst_reg);
- b2 = 0xC7;
- b3 = 0xC0;
- EMIT3_off32(b1, b2, add_1reg(b3, dst_reg), imm32);
- break;
- }
-
case BPF_ALU | BPF_MOV | BPF_K:
- /* optimization: if imm32 is zero, use 'xor <dst>,<dst>'
- * to save 3 bytes.
- */
- if (imm32 == 0) {
- if (is_ereg(dst_reg))
- EMIT1(add_2mod(0x40, dst_reg, dst_reg));
- b2 = 0x31; /* xor */
- b3 = 0xC0;
- EMIT2(b2, add_2reg(b3, dst_reg, dst_reg));
- break;
- }
-
- /* mov %eax, imm32 */
- if (is_ereg(dst_reg))
- EMIT1(add_1mod(0x40, dst_reg));
- EMIT1_off32(add_1reg(0xB8, dst_reg), imm32);
+ emit_mov_imm32(&prog, BPF_CLASS(insn->code) == BPF_ALU64,
+ dst_reg, imm32);
break;
case BPF_LD | BPF_IMM | BPF_DW:
- /* optimization: if imm64 is zero, use 'xor <dst>,<dst>'
- * to save 7 bytes.
- */
- if (insn[0].imm == 0 && insn[1].imm == 0) {
- b1 = add_2mod(0x48, dst_reg, dst_reg);
- b2 = 0x31; /* xor */
- b3 = 0xC0;
- EMIT3(b1, b2, add_2reg(b3, dst_reg, dst_reg));
-
- insn++;
- i++;
- break;
- }
-
- /* movabsq %rax, imm64 */
- EMIT2(add_1mod(0x48, dst_reg), add_1reg(0xB8, dst_reg));
- EMIT(insn[0].imm, 4);
- EMIT(insn[1].imm, 4);
-
+ emit_mov_imm64(&prog, dst_reg, insn[1].imm, insn[0].imm);
insn++;
i++;
break;
case BPF_ALU | BPF_MUL | BPF_X:
case BPF_ALU64 | BPF_MUL | BPF_K:
case BPF_ALU64 | BPF_MUL | BPF_X:
- EMIT1(0x50); /* push rax */
- EMIT1(0x52); /* push rdx */
+ {
+ bool is64 = BPF_CLASS(insn->code) == BPF_ALU64;
+
+ if (dst_reg != BPF_REG_0)
+ EMIT1(0x50); /* push rax */
+ if (dst_reg != BPF_REG_3)
+ EMIT1(0x52); /* push rdx */
/* mov r11, dst_reg */
EMIT_mov(AUX_REG, dst_reg);
if (BPF_SRC(insn->code) == BPF_X)
- /* mov rax, src_reg */
- EMIT_mov(BPF_REG_0, src_reg);
+ emit_mov_reg(&prog, is64, BPF_REG_0, src_reg);
else
- /* mov rax, imm32 */
- EMIT3_off32(0x48, 0xC7, 0xC0, imm32);
+ emit_mov_imm32(&prog, is64, BPF_REG_0, imm32);
- if (BPF_CLASS(insn->code) == BPF_ALU64)
+ if (is64)
EMIT1(add_1mod(0x48, AUX_REG));
else if (is_ereg(AUX_REG))
EMIT1(add_1mod(0x40, AUX_REG));
/* mul(q) r11 */
EMIT2(0xF7, add_1reg(0xE0, AUX_REG));
- /* mov r11, rax */
- EMIT_mov(AUX_REG, BPF_REG_0);
-
- EMIT1(0x5A); /* pop rdx */
- EMIT1(0x58); /* pop rax */
-
- /* mov dst_reg, r11 */
- EMIT_mov(dst_reg, AUX_REG);
+ if (dst_reg != BPF_REG_3)
+ EMIT1(0x5A); /* pop rdx */
+ if (dst_reg != BPF_REG_0) {
+ /* mov dst_reg, rax */
+ EMIT_mov(dst_reg, BPF_REG_0);
+ EMIT1(0x58); /* pop rax */
+ }
break;
-
+ }
/* shifts */
case BPF_ALU | BPF_LSH | BPF_K:
case BPF_ALU | BPF_RSH | BPF_K:
case BPF_RSH: b3 = 0xE8; break;
case BPF_ARSH: b3 = 0xF8; break;
}
- EMIT3(0xC1, add_1reg(b3, dst_reg), imm32);
+
+ if (imm32 == 1)
+ EMIT2(0xD1, add_1reg(b3, dst_reg));
+ else
+ EMIT3(0xC1, add_1reg(b3, dst_reg), imm32);
break;
case BPF_ALU | BPF_LSH | BPF_X:
bpf_jit_dump(prog->len, proglen, pass + 1, image);
if (image) {
- bpf_flush_icache(header, image + proglen);
if (!prog->is_func || extra_pass) {
bpf_jit_binary_lock_ro(header);
} else {
local_flush_tlb();
stat->d_alltlb++;
} else {
- __flush_tlb_single(msg->address);
+ __flush_tlb_one_user(msg->address);
stat->d_onetlb++;
}
stat->d_requestee++;
break;
case R_X86_64_PC32:
+ case R_X86_64_PLT32:
/*
* PC relative relocations don't need to be adjusted unless
* referencing a percpu symbol.
+ *
+ * NB: R_X86_64_PLT32 can be treated as R_X86_64_PC32.
*/
if (is_percpu_sym(sym, symname))
add_reloc(&relocs32neg, offset);
preempt_enable();
}
-static void xen_flush_tlb_single(unsigned long addr)
+static void xen_flush_tlb_one_user(unsigned long addr)
{
struct mmuext_op *op;
struct multicall_space mcs;
- trace_xen_mmu_flush_tlb_single(addr);
+ trace_xen_mmu_flush_tlb_one_user(addr);
preempt_disable();
.flush_tlb_user = xen_flush_tlb,
.flush_tlb_kernel = xen_flush_tlb,
- .flush_tlb_single = xen_flush_tlb_single,
+ .flush_tlb_one_user = xen_flush_tlb_one_user,
.flush_tlb_others = xen_flush_tlb_others,
.pgd_alloc = xen_pgd_alloc,
if (xen_hvm_domain())
native_smp_cpus_done(max_cpus);
+ else
+ calculate_max_logical_packages();
if (xen_have_vcpu_info_placement)
return;
cpu_relax();
}
+ __set_current_state(TASK_RUNNING);
return false;
}
// SPDX-License-Identifier: GPL-2.0
#include "blacklist.h"
-const char __initdata *const blacklist_hashes[] = {
+const char __initconst *const blacklist_hashes[] = {
NULL
};
pr_devel("sinfo %u: Direct signer is key %x\n",
sinfo->index, key_serial(key));
x509 = NULL;
+ sig = sinfo->sig;
goto matched;
}
if (PTR_ERR(key) != -ENOKEY)
sinfo->index);
return 0;
}
- ret = public_key_verify_signature(p->pub, p->sig);
+ ret = public_key_verify_signature(p->pub, x509->sig);
if (ret < 0)
return ret;
x509->signer = p;
*
* (*) -EBADMSG if some part of the message was invalid, or:
*
- * (*) 0 if no signature chains were found to be blacklisted or to contain
- * unsupported crypto, or:
+ * (*) 0 if a signature chain passed verification, or:
*
* (*) -EKEYREJECTED if a blacklisted key was encountered, or:
*
for (sinfo = pkcs7->signed_infos; sinfo; sinfo = sinfo->next) {
ret = pkcs7_verify_one(pkcs7, sinfo);
- if (sinfo->blacklisted && actual_ret == -ENOPKG)
- actual_ret = -EKEYREJECTED;
+ if (sinfo->blacklisted) {
+ if (actual_ret == -ENOPKG)
+ actual_ret = -EKEYREJECTED;
+ continue;
+ }
if (ret < 0) {
if (ret == -ENOPKG) {
sinfo->unsupported_crypto = true;
BUG_ON(!pkey);
BUG_ON(!sig);
- BUG_ON(!sig->digest);
BUG_ON(!sig->s);
+ if (!sig->digest)
+ return -ENOPKG;
+
alg_name = sig->pkey_algo;
if (strcmp(sig->pkey_algo, "rsa") == 0) {
/* The data wangled by the RSA algorithm is typically padded
*
* Returns 0 if the new certificate was accepted, -ENOKEY if we couldn't find a
* matching parent certificate in the trusted list, -EKEYREJECTED if the
- * signature check fails or the key is blacklisted and some other error if
- * there is a matching certificate but the signature check cannot be performed.
+ * signature check fails or the key is blacklisted, -ENOPKG if the signature
+ * uses unsupported crypto, or some other error if there is a matching
+ * certificate but the signature check cannot be performed.
*/
int restrict_link_by_signature(struct key *dest_keyring,
const struct key_type *type,
return -EOPNOTSUPP;
sig = payload->data[asym_auth];
+ if (!sig)
+ return -ENOPKG;
if (!sig->auth_ids[0] && !sig->auth_ids[1])
return -ENOKEY;
return -EOPNOTSUPP;
sig = payload->data[asym_auth];
+ if (!sig)
+ return -ENOPKG;
if (!sig->auth_ids[0] && !sig->auth_ids[1])
return -ENOKEY;
*
* Returns 0 if the new certificate was accepted, -ENOKEY if we
* couldn't find a matching parent certificate in the trusted list,
- * -EKEYREJECTED if the signature check fails, and some other error if
- * there is a matching certificate but the signature check cannot be
- * performed.
+ * -EKEYREJECTED if the signature check fails, -ENOPKG if the signature uses
+ * unsupported crypto, or some other error if there is a matching certificate
+ * but the signature check cannot be performed.
*/
int restrict_link_by_key_or_keyring(struct key *dest_keyring,
const struct key_type *type,
*
* Returns 0 if the new certificate was accepted, -ENOKEY if we
* couldn't find a matching parent certificate in the trusted list,
- * -EKEYREJECTED if the signature check fails, and some other error if
- * there is a matching certificate but the signature check cannot be
- * performed.
+ * -EKEYREJECTED if the signature check fails, -ENOPKG if the signature uses
+ * unsupported crypto, or some other error if there is a matching certificate
+ * but the signature check cannot be performed.
*/
int restrict_link_by_key_or_keyring_chain(struct key *dest_keyring,
const struct key_type *type,
#include <crypto/sha3.h>
#include <asm/unaligned.h>
+/*
+ * On some 32-bit architectures (mn10300 and h8300), GCC ends up using
+ * over 1 KB of stack if we inline the round calculation into the loop
+ * in keccakf(). On the other hand, on 64-bit architectures with plenty
+ * of [64-bit wide] general purpose registers, not inlining it severely
+ * hurts performance. So let's use 64-bitness as a heuristic to decide
+ * whether to inline or not.
+ */
+#ifdef CONFIG_64BIT
+#define SHA3_INLINE inline
+#else
+#define SHA3_INLINE noinline
+#endif
+
#define KECCAK_ROUNDS 24
static const u64 keccakf_rndc[24] = {
/* update the state with given number of rounds */
-static void __attribute__((__optimize__("O3"))) keccakf(u64 st[25])
+static SHA3_INLINE void keccakf_round(u64 st[25])
{
u64 t[5], tt, bc[5];
- int round;
- for (round = 0; round < KECCAK_ROUNDS; round++) {
+ /* Theta */
+ bc[0] = st[0] ^ st[5] ^ st[10] ^ st[15] ^ st[20];
+ bc[1] = st[1] ^ st[6] ^ st[11] ^ st[16] ^ st[21];
+ bc[2] = st[2] ^ st[7] ^ st[12] ^ st[17] ^ st[22];
+ bc[3] = st[3] ^ st[8] ^ st[13] ^ st[18] ^ st[23];
+ bc[4] = st[4] ^ st[9] ^ st[14] ^ st[19] ^ st[24];
+
+ t[0] = bc[4] ^ rol64(bc[1], 1);
+ t[1] = bc[0] ^ rol64(bc[2], 1);
+ t[2] = bc[1] ^ rol64(bc[3], 1);
+ t[3] = bc[2] ^ rol64(bc[4], 1);
+ t[4] = bc[3] ^ rol64(bc[0], 1);
+
+ st[0] ^= t[0];
+
+ /* Rho Pi */
+ tt = st[1];
+ st[ 1] = rol64(st[ 6] ^ t[1], 44);
+ st[ 6] = rol64(st[ 9] ^ t[4], 20);
+ st[ 9] = rol64(st[22] ^ t[2], 61);
+ st[22] = rol64(st[14] ^ t[4], 39);
+ st[14] = rol64(st[20] ^ t[0], 18);
+ st[20] = rol64(st[ 2] ^ t[2], 62);
+ st[ 2] = rol64(st[12] ^ t[2], 43);
+ st[12] = rol64(st[13] ^ t[3], 25);
+ st[13] = rol64(st[19] ^ t[4], 8);
+ st[19] = rol64(st[23] ^ t[3], 56);
+ st[23] = rol64(st[15] ^ t[0], 41);
+ st[15] = rol64(st[ 4] ^ t[4], 27);
+ st[ 4] = rol64(st[24] ^ t[4], 14);
+ st[24] = rol64(st[21] ^ t[1], 2);
+ st[21] = rol64(st[ 8] ^ t[3], 55);
+ st[ 8] = rol64(st[16] ^ t[1], 45);
+ st[16] = rol64(st[ 5] ^ t[0], 36);
+ st[ 5] = rol64(st[ 3] ^ t[3], 28);
+ st[ 3] = rol64(st[18] ^ t[3], 21);
+ st[18] = rol64(st[17] ^ t[2], 15);
+ st[17] = rol64(st[11] ^ t[1], 10);
+ st[11] = rol64(st[ 7] ^ t[2], 6);
+ st[ 7] = rol64(st[10] ^ t[0], 3);
+ st[10] = rol64( tt ^ t[1], 1);
+
+ /* Chi */
+ bc[ 0] = ~st[ 1] & st[ 2];
+ bc[ 1] = ~st[ 2] & st[ 3];
+ bc[ 2] = ~st[ 3] & st[ 4];
+ bc[ 3] = ~st[ 4] & st[ 0];
+ bc[ 4] = ~st[ 0] & st[ 1];
+ st[ 0] ^= bc[ 0];
+ st[ 1] ^= bc[ 1];
+ st[ 2] ^= bc[ 2];
+ st[ 3] ^= bc[ 3];
+ st[ 4] ^= bc[ 4];
+
+ bc[ 0] = ~st[ 6] & st[ 7];
+ bc[ 1] = ~st[ 7] & st[ 8];
+ bc[ 2] = ~st[ 8] & st[ 9];
+ bc[ 3] = ~st[ 9] & st[ 5];
+ bc[ 4] = ~st[ 5] & st[ 6];
+ st[ 5] ^= bc[ 0];
+ st[ 6] ^= bc[ 1];
+ st[ 7] ^= bc[ 2];
+ st[ 8] ^= bc[ 3];
+ st[ 9] ^= bc[ 4];
+
+ bc[ 0] = ~st[11] & st[12];
+ bc[ 1] = ~st[12] & st[13];
+ bc[ 2] = ~st[13] & st[14];
+ bc[ 3] = ~st[14] & st[10];
+ bc[ 4] = ~st[10] & st[11];
+ st[10] ^= bc[ 0];
+ st[11] ^= bc[ 1];
+ st[12] ^= bc[ 2];
+ st[13] ^= bc[ 3];
+ st[14] ^= bc[ 4];
+
+ bc[ 0] = ~st[16] & st[17];
+ bc[ 1] = ~st[17] & st[18];
+ bc[ 2] = ~st[18] & st[19];
+ bc[ 3] = ~st[19] & st[15];
+ bc[ 4] = ~st[15] & st[16];
+ st[15] ^= bc[ 0];
+ st[16] ^= bc[ 1];
+ st[17] ^= bc[ 2];
+ st[18] ^= bc[ 3];
+ st[19] ^= bc[ 4];
+
+ bc[ 0] = ~st[21] & st[22];
+ bc[ 1] = ~st[22] & st[23];
+ bc[ 2] = ~st[23] & st[24];
+ bc[ 3] = ~st[24] & st[20];
+ bc[ 4] = ~st[20] & st[21];
+ st[20] ^= bc[ 0];
+ st[21] ^= bc[ 1];
+ st[22] ^= bc[ 2];
+ st[23] ^= bc[ 3];
+ st[24] ^= bc[ 4];
+}
- /* Theta */
- bc[0] = st[0] ^ st[5] ^ st[10] ^ st[15] ^ st[20];
- bc[1] = st[1] ^ st[6] ^ st[11] ^ st[16] ^ st[21];
- bc[2] = st[2] ^ st[7] ^ st[12] ^ st[17] ^ st[22];
- bc[3] = st[3] ^ st[8] ^ st[13] ^ st[18] ^ st[23];
- bc[4] = st[4] ^ st[9] ^ st[14] ^ st[19] ^ st[24];
-
- t[0] = bc[4] ^ rol64(bc[1], 1);
- t[1] = bc[0] ^ rol64(bc[2], 1);
- t[2] = bc[1] ^ rol64(bc[3], 1);
- t[3] = bc[2] ^ rol64(bc[4], 1);
- t[4] = bc[3] ^ rol64(bc[0], 1);
-
- st[0] ^= t[0];
-
- /* Rho Pi */
- tt = st[1];
- st[ 1] = rol64(st[ 6] ^ t[1], 44);
- st[ 6] = rol64(st[ 9] ^ t[4], 20);
- st[ 9] = rol64(st[22] ^ t[2], 61);
- st[22] = rol64(st[14] ^ t[4], 39);
- st[14] = rol64(st[20] ^ t[0], 18);
- st[20] = rol64(st[ 2] ^ t[2], 62);
- st[ 2] = rol64(st[12] ^ t[2], 43);
- st[12] = rol64(st[13] ^ t[3], 25);
- st[13] = rol64(st[19] ^ t[4], 8);
- st[19] = rol64(st[23] ^ t[3], 56);
- st[23] = rol64(st[15] ^ t[0], 41);
- st[15] = rol64(st[ 4] ^ t[4], 27);
- st[ 4] = rol64(st[24] ^ t[4], 14);
- st[24] = rol64(st[21] ^ t[1], 2);
- st[21] = rol64(st[ 8] ^ t[3], 55);
- st[ 8] = rol64(st[16] ^ t[1], 45);
- st[16] = rol64(st[ 5] ^ t[0], 36);
- st[ 5] = rol64(st[ 3] ^ t[3], 28);
- st[ 3] = rol64(st[18] ^ t[3], 21);
- st[18] = rol64(st[17] ^ t[2], 15);
- st[17] = rol64(st[11] ^ t[1], 10);
- st[11] = rol64(st[ 7] ^ t[2], 6);
- st[ 7] = rol64(st[10] ^ t[0], 3);
- st[10] = rol64( tt ^ t[1], 1);
-
- /* Chi */
- bc[ 0] = ~st[ 1] & st[ 2];
- bc[ 1] = ~st[ 2] & st[ 3];
- bc[ 2] = ~st[ 3] & st[ 4];
- bc[ 3] = ~st[ 4] & st[ 0];
- bc[ 4] = ~st[ 0] & st[ 1];
- st[ 0] ^= bc[ 0];
- st[ 1] ^= bc[ 1];
- st[ 2] ^= bc[ 2];
- st[ 3] ^= bc[ 3];
- st[ 4] ^= bc[ 4];
-
- bc[ 0] = ~st[ 6] & st[ 7];
- bc[ 1] = ~st[ 7] & st[ 8];
- bc[ 2] = ~st[ 8] & st[ 9];
- bc[ 3] = ~st[ 9] & st[ 5];
- bc[ 4] = ~st[ 5] & st[ 6];
- st[ 5] ^= bc[ 0];
- st[ 6] ^= bc[ 1];
- st[ 7] ^= bc[ 2];
- st[ 8] ^= bc[ 3];
- st[ 9] ^= bc[ 4];
-
- bc[ 0] = ~st[11] & st[12];
- bc[ 1] = ~st[12] & st[13];
- bc[ 2] = ~st[13] & st[14];
- bc[ 3] = ~st[14] & st[10];
- bc[ 4] = ~st[10] & st[11];
- st[10] ^= bc[ 0];
- st[11] ^= bc[ 1];
- st[12] ^= bc[ 2];
- st[13] ^= bc[ 3];
- st[14] ^= bc[ 4];
-
- bc[ 0] = ~st[16] & st[17];
- bc[ 1] = ~st[17] & st[18];
- bc[ 2] = ~st[18] & st[19];
- bc[ 3] = ~st[19] & st[15];
- bc[ 4] = ~st[15] & st[16];
- st[15] ^= bc[ 0];
- st[16] ^= bc[ 1];
- st[17] ^= bc[ 2];
- st[18] ^= bc[ 3];
- st[19] ^= bc[ 4];
-
- bc[ 0] = ~st[21] & st[22];
- bc[ 1] = ~st[22] & st[23];
- bc[ 2] = ~st[23] & st[24];
- bc[ 3] = ~st[24] & st[20];
- bc[ 4] = ~st[20] & st[21];
- st[20] ^= bc[ 0];
- st[21] ^= bc[ 1];
- st[22] ^= bc[ 2];
- st[23] ^= bc[ 3];
- st[24] ^= bc[ 4];
+static void __optimize("O3") keccakf(u64 st[25])
+{
+ int round;
+ for (round = 0; round < KECCAK_ROUNDS; round++) {
+ keccakf_round(st);
/* Iota */
st[0] ^= keccakf_rndc[round];
}
* acpi_of_match_device - Match device object using the "compatible" property.
* @adev: ACPI device object to match.
* @of_match_table: List of device IDs to match against.
+ * @of_id: OF ID if matched
*
* If @dev has an ACPI companion which has ACPI_DT_NAMESPACE_HID in its list of
* identifiers and a _DSD object with the "compatible" property, use that
* property to match against the given list of identifiers.
*/
static bool acpi_of_match_device(struct acpi_device *adev,
- const struct of_device_id *of_match_table)
+ const struct of_device_id *of_match_table,
+ const struct of_device_id **of_id)
{
const union acpi_object *of_compatible, *obj;
int i, nval;
const struct of_device_id *id;
for (id = of_match_table; id->compatible[0]; id++)
- if (!strcasecmp(obj->string.pointer, id->compatible))
+ if (!strcasecmp(obj->string.pointer, id->compatible)) {
+ if (of_id)
+ *of_id = id;
return true;
+ }
}
return false;
return true;
}
-static const struct acpi_device_id *__acpi_match_device(
- struct acpi_device *device,
- const struct acpi_device_id *ids,
- const struct of_device_id *of_ids)
+static bool __acpi_match_device(struct acpi_device *device,
+ const struct acpi_device_id *acpi_ids,
+ const struct of_device_id *of_ids,
+ const struct acpi_device_id **acpi_id,
+ const struct of_device_id **of_id)
{
const struct acpi_device_id *id;
struct acpi_hardware_id *hwid;
* driver for it.
*/
if (!device || !device->status.present)
- return NULL;
+ return false;
list_for_each_entry(hwid, &device->pnp.ids, list) {
/* First, check the ACPI/PNP IDs provided by the caller. */
- for (id = ids; id->id[0] || id->cls; id++) {
- if (id->id[0] && !strcmp((char *) id->id, hwid->id))
- return id;
- else if (id->cls && __acpi_match_device_cls(id, hwid))
- return id;
+ if (acpi_ids) {
+ for (id = acpi_ids; id->id[0] || id->cls; id++) {
+ if (id->id[0] && !strcmp((char *)id->id, hwid->id))
+ goto out_acpi_match;
+ if (id->cls && __acpi_match_device_cls(id, hwid))
+ goto out_acpi_match;
+ }
}
/*
* Next, check ACPI_DT_NAMESPACE_HID and try to match the
* "compatible" property if found.
- *
- * The id returned by the below is not valid, but the only
- * caller passing non-NULL of_ids here is only interested in
- * whether or not the return value is NULL.
*/
- if (!strcmp(ACPI_DT_NAMESPACE_HID, hwid->id)
- && acpi_of_match_device(device, of_ids))
- return id;
+ if (!strcmp(ACPI_DT_NAMESPACE_HID, hwid->id))
+ return acpi_of_match_device(device, of_ids, of_id);
}
- return NULL;
+ return false;
+
+out_acpi_match:
+ if (acpi_id)
+ *acpi_id = id;
+ return true;
}
/**
const struct acpi_device_id *acpi_match_device(const struct acpi_device_id *ids,
const struct device *dev)
{
- return __acpi_match_device(acpi_companion_match(dev), ids, NULL);
+ const struct acpi_device_id *id = NULL;
+
+ __acpi_match_device(acpi_companion_match(dev), ids, NULL, &id, NULL);
+ return id;
}
EXPORT_SYMBOL_GPL(acpi_match_device);
-void *acpi_get_match_data(const struct device *dev)
+const void *acpi_device_get_match_data(const struct device *dev)
{
const struct acpi_device_id *match;
- if (!dev->driver)
- return NULL;
-
- if (!dev->driver->acpi_match_table)
- return NULL;
-
match = acpi_match_device(dev->driver->acpi_match_table, dev);
if (!match)
return NULL;
- return (void *)match->driver_data;
+ return (const void *)match->driver_data;
}
-EXPORT_SYMBOL_GPL(acpi_get_match_data);
+EXPORT_SYMBOL_GPL(acpi_device_get_match_data);
int acpi_match_device_ids(struct acpi_device *device,
const struct acpi_device_id *ids)
{
- return __acpi_match_device(device, ids, NULL) ? 0 : -ENOENT;
+ return __acpi_match_device(device, ids, NULL, NULL, NULL) ? 0 : -ENOENT;
}
EXPORT_SYMBOL(acpi_match_device_ids);
{
if (!drv->acpi_match_table)
return acpi_of_match_device(ACPI_COMPANION(dev),
- drv->of_match_table);
+ drv->of_match_table,
+ NULL);
- return !!__acpi_match_device(acpi_companion_match(dev),
- drv->acpi_match_table, drv->of_match_table);
+ return __acpi_match_device(acpi_companion_match(dev),
+ drv->acpi_match_table, drv->of_match_table,
+ NULL, NULL);
}
EXPORT_SYMBOL_GPL(acpi_driver_match_device);
ec->reference_count >= 1)
acpi_set_gpe(NULL, ec->gpe, ACPI_GPE_DISABLE);
+ if (acpi_sleep_no_ec_events())
+ acpi_ec_enter_noirq(ec);
+
return 0;
}
{
struct acpi_ec *ec = acpi_driver_data(to_acpi_device(dev));
+ if (acpi_sleep_no_ec_events())
+ acpi_ec_leave_noirq(ec);
+
if (ec_no_wakeup && test_bit(EC_FLAGS_STARTED, &ec->flags) &&
ec->reference_count >= 1)
acpi_set_gpe(NULL, ec->gpe, ACPI_GPE_ENABLE);
return 0;
}
-static void *
+static const void *
acpi_fwnode_device_get_match_data(const struct fwnode_handle *fwnode,
const struct device *dev)
{
- return acpi_get_match_data(dev);
+ return acpi_device_get_match_data(dev);
}
#define DECLARE_ACPI_FWNODE_OPS(ops) \
table->serial_port.access_width))) {
default:
pr_err("Unexpected SPCR Access Width. Defaulting to byte size\n");
+ /* fall through */
case 8:
iotype = "mmio";
break;
&target_thread->reply_error.work);
wake_up_interruptible(&target_thread->wait);
} else {
- WARN(1, "Unexpected reply error: %u\n",
- target_thread->reply_error.cmd);
+ /*
+ * Cannot get here for normal operation, but
+ * we can if multiple synchronous transactions
+ * are sent without blocking for responses.
+ * Just ignore the 2nd error in this case.
+ */
+ pr_warn("Unexpected reply error: %u\n",
+ target_thread->reply_error.cmd);
}
binder_inner_proc_unlock(target_thread->proc);
binder_thread_dec_tmpref(target_thread);
int debug_id = buffer->debug_id;
binder_debug(BINDER_DEBUG_TRANSACTION,
- "%d buffer release %d, size %zd-%zd, failed at %p\n",
+ "%d buffer release %d, size %zd-%zd, failed at %pK\n",
proc->pid, buffer->debug_id,
buffer->data_size, buffer->offsets_size, failed_at);
}
}
binder_debug(BINDER_DEBUG_DEAD_BINDER,
- "%d:%d BC_DEAD_BINDER_DONE %016llx found %p\n",
+ "%d:%d BC_DEAD_BINDER_DONE %016llx found %pK\n",
proc->pid, thread->pid, (u64)cookie,
death);
if (death == NULL) {
binder_inner_proc_unlock(thread->proc);
+ /*
+ * This is needed to avoid races between wake_up_poll() above and
+ * and ep_remove_waitqueue() called for other reasons (eg the epoll file
+ * descriptor being closed); ep_remove_waitqueue() holds an RCU read
+ * lock, so we can be sure it's done after calling synchronize_rcu().
+ */
+ if (thread->looper & BINDER_LOOPER_STATE_POLL)
+ synchronize_rcu();
+
if (send_reply)
binder_send_failed_reply(send_reply, BR_DEAD_REPLY);
binder_release_work(proc, &thread->todo);
bool wait_for_proc_work;
thread = binder_get_thread(proc);
+ if (!thread)
+ return POLLERR;
binder_inner_proc_lock(thread->proc);
thread->looper |= BINDER_LOOPER_STATE_POLL;
spin_lock(&t->lock);
to_proc = t->to_proc;
seq_printf(m,
- "%s %d: %p from %d:%d to %d:%d code %x flags %x pri %ld r%d",
+ "%s %d: %pK from %d:%d to %d:%d code %x flags %x pri %ld r%d",
prefix, t->debug_id, t,
t->from ? t->from->proc->pid : 0,
t->from ? t->from->pid : 0,
}
if (buffer->target_node)
seq_printf(m, " node %d", buffer->target_node->debug_id);
- seq_printf(m, " size %zd:%zd data %p\n",
+ seq_printf(m, " size %zd:%zd data %pK\n",
buffer->data_size, buffer->offsets_size,
buffer->data);
}
(u32) 0xffffffff);
}
- writel((SAR_FBQ0_LOW << 28) | 0x00000000 | 0x00000000 |
- (SAR_FB_SIZE_0 / 48), SAR_REG_FBQS0);
- writel((SAR_FBQ1_LOW << 28) | 0x00000000 | 0x00000000 |
- (SAR_FB_SIZE_1 / 48), SAR_REG_FBQS1);
- writel((SAR_FBQ2_LOW << 28) | 0x00000000 | 0x00000000 |
- (SAR_FB_SIZE_2 / 48), SAR_REG_FBQS2);
- writel((SAR_FBQ3_LOW << 28) | 0x00000000 | 0x00000000 |
- (SAR_FB_SIZE_3 / 48), SAR_REG_FBQS3);
+ writel((SAR_FBQ0_LOW << 28) | (SAR_FB_SIZE_0 / 48), SAR_REG_FBQS0);
+ writel((SAR_FBQ1_LOW << 28) | (SAR_FB_SIZE_1 / 48), SAR_REG_FBQS1);
+ writel((SAR_FBQ2_LOW << 28) | (SAR_FB_SIZE_2 / 48), SAR_REG_FBQS2);
+ writel((SAR_FBQ3_LOW << 28) | (SAR_FB_SIZE_3 / 48), SAR_REG_FBQS3);
/* Initialize rate table */
for (i = 0; i < 256; i++) {
dev_info(link->consumer, "Dropping the link to %s\n",
dev_name(link->supplier));
+ if (link->flags & DL_FLAG_PM_RUNTIME)
+ pm_runtime_drop_link(link->consumer);
+
list_del(&link->s_node);
list_del(&link->c_node);
device_link_free(link);
return;
if (device_may_wakeup(wirq->dev)) {
- if (wirq->status & WAKE_IRQ_DEDICATED_ALLOCATED)
+ if (wirq->status & WAKE_IRQ_DEDICATED_ALLOCATED &&
+ !pm_runtime_status_suspended(wirq->dev))
enable_irq(wirq->irq);
enable_irq_wake(wirq->irq);
if (device_may_wakeup(wirq->dev)) {
disable_irq_wake(wirq->irq);
- if (wirq->status & WAKE_IRQ_DEDICATED_ALLOCATED)
+ if (wirq->status & WAKE_IRQ_DEDICATED_ALLOCATED &&
+ !pm_runtime_status_suspended(wirq->dev))
disable_irq_nosync(wirq->irq);
}
}
}
EXPORT_SYMBOL(fwnode_graph_parse_endpoint);
-void *device_get_match_data(struct device *dev)
+const void *device_get_match_data(struct device *dev)
{
- return fwnode_call_ptr_op(dev_fwnode(dev), device_get_match_data,
- dev);
+ return fwnode_call_ptr_op(dev_fwnode(dev), device_get_match_data, dev);
}
EXPORT_SYMBOL_GPL(device_get_match_data);
{ } /* Terminating entry */
};
+static inline void ath3k_log_failed_loading(int err, int len, int size)
+{
+ BT_ERR("Error in firmware loading err = %d, len = %d, size = %d",
+ err, len, size);
+}
+
#define USB_REQ_DFU_DNLOAD 1
#define BULK_SIZE 4096
#define FW_HDR_SIZE 20
return -ENOMEM;
}
- memcpy(send_buf, firmware->data, 20);
+ memcpy(send_buf, firmware->data, FW_HDR_SIZE);
err = usb_control_msg(udev, pipe, USB_REQ_DFU_DNLOAD, USB_TYPE_VENDOR,
- 0, 0, send_buf, 20, USB_CTRL_SET_TIMEOUT);
+ 0, 0, send_buf, FW_HDR_SIZE,
+ USB_CTRL_SET_TIMEOUT);
if (err < 0) {
BT_ERR("Can't change to loading configuration err");
goto error;
}
- sent += 20;
- count -= 20;
+ sent += FW_HDR_SIZE;
+ count -= FW_HDR_SIZE;
pipe = usb_sndbulkpipe(udev, 0x02);
&len, 3000);
if (err || (len != size)) {
- BT_ERR("Error in firmware loading err = %d,"
- "len = %d, size = %d", err, len, size);
+ ath3k_log_failed_loading(err, len, size);
goto error;
}
err = usb_bulk_msg(udev, pipe, send_buf, size,
&len, 3000);
if (err || (len != size)) {
- BT_ERR("Error in firmware loading err = %d,"
- "len = %d, size = %d", err, len, size);
+ ath3k_log_failed_loading(err, len, size);
kfree(send_buf);
return err;
}
static int ath3k_load_patch(struct usb_device *udev)
{
unsigned char fw_state;
- char filename[ATH3K_NAME_LEN] = {0};
+ char filename[ATH3K_NAME_LEN];
const struct firmware *firmware;
struct ath3k_version fw_version;
__u32 pt_rom_version, pt_build_version;
static int ath3k_load_syscfg(struct usb_device *udev)
{
unsigned char fw_state;
- char filename[ATH3K_NAME_LEN] = {0};
+ char filename[ATH3K_NAME_LEN];
const struct firmware *firmware;
struct ath3k_version fw_version;
int clk_value, ret;
/* load patch and sysconfig files for AR3012 */
if (id->driver_info & BTUSB_ATH3012) {
-
/* New firmware with patch and sysconfig files already loaded */
if (le16_to_cpu(udev->descriptor.bcdDevice) > 0x0001)
return -ENODEV;
static void ath3k_disconnect(struct usb_interface *intf)
{
- BT_DBG("ath3k_disconnect intf %p", intf);
+ BT_DBG("%s intf %p", __func__, intf);
}
static struct usb_driver ath3k_driver = {
return -EFAULT;
}
- skb = bt_skb_alloc(HCI_COMMAND_HDR_SIZE + len, GFP_ATOMIC);
+ skb = bt_skb_alloc(HCI_COMMAND_HDR_SIZE + len, GFP_KERNEL);
if (!skb) {
BT_ERR("No free skb");
return -ENOMEM;
#define RTL_ROM_LMP_8761A 0x8761
#define RTL_ROM_LMP_8822B 0x8822
+#define IC_MATCH_FL_LMPSUBV (1 << 0)
+#define IC_MATCH_FL_HCIREV (1 << 1)
+#define IC_INFO(lmps, hcir) \
+ .match_flags = IC_MATCH_FL_LMPSUBV | IC_MATCH_FL_HCIREV, \
+ .lmp_subver = (lmps), \
+ .hci_rev = (hcir)
+
+struct id_table {
+ __u16 match_flags;
+ __u16 lmp_subver;
+ __u16 hci_rev;
+ bool config_needed;
+ char *fw_name;
+ char *cfg_name;
+};
+
+static const struct id_table ic_id_table[] = {
+ /* 8723B */
+ { IC_INFO(RTL_ROM_LMP_8723B, 0xb),
+ .config_needed = false,
+ .fw_name = "rtl_bt/rtl8723b_fw.bin",
+ .cfg_name = "rtl_bt/rtl8723b_config.bin" },
+
+ /* 8723D */
+ { IC_INFO(RTL_ROM_LMP_8723B, 0xd),
+ .config_needed = true,
+ .fw_name = "rtl_bt/rtl8723d_fw.bin",
+ .cfg_name = "rtl_bt/rtl8723d_config.bin" },
+
+ /* 8821A */
+ { IC_INFO(RTL_ROM_LMP_8821A, 0xa),
+ .config_needed = false,
+ .fw_name = "rtl_bt/rtl8821a_fw.bin",
+ .cfg_name = "rtl_bt/rtl8821a_config.bin" },
+
+ /* 8821C */
+ { IC_INFO(RTL_ROM_LMP_8821A, 0xc),
+ .config_needed = false,
+ .fw_name = "rtl_bt/rtl8821c_fw.bin",
+ .cfg_name = "rtl_bt/rtl8821c_config.bin" },
+
+ /* 8761A */
+ { IC_MATCH_FL_LMPSUBV, RTL_ROM_LMP_8761A, 0x0,
+ .config_needed = false,
+ .fw_name = "rtl_bt/rtl8761a_fw.bin",
+ .cfg_name = "rtl_bt/rtl8761a_config.bin" },
+
+ /* 8822B */
+ { IC_INFO(RTL_ROM_LMP_8822B, 0xb),
+ .config_needed = true,
+ .fw_name = "rtl_bt/rtl8822b_fw.bin",
+ .cfg_name = "rtl_bt/rtl8822b_config.bin" },
+ };
+
static int rtl_read_rom_version(struct hci_dev *hdev, u8 *version)
{
struct rtl_rom_version_evt *rom_version;
return 0;
}
-static int rtl8723b_parse_firmware(struct hci_dev *hdev, u16 lmp_subver,
- const struct firmware *fw,
- unsigned char **_buf)
+static int rtlbt_parse_firmware(struct hci_dev *hdev, u16 lmp_subver,
+ const struct firmware *fw,
+ unsigned char **_buf)
{
const u8 extension_sig[] = { 0x51, 0x04, 0xfd, 0x77 };
struct rtl_epatch_header *epatch_info;
{ RTL_ROM_LMP_8821A, 2 },
{ RTL_ROM_LMP_8761A, 3 },
{ RTL_ROM_LMP_8822B, 8 },
+ { RTL_ROM_LMP_8723B, 9 }, /* 8723D */
+ { RTL_ROM_LMP_8821A, 10 }, /* 8821C */
};
ret = rtl_read_rom_version(hdev, &rom_version);
return ret;
}
-static int btrtl_setup_rtl8723b(struct hci_dev *hdev, u16 lmp_subver,
- const char *fw_name)
+static int btrtl_setup_rtl8723b(struct hci_dev *hdev, u16 hci_rev,
+ u16 lmp_subver)
{
unsigned char *fw_data = NULL;
const struct firmware *fw;
u8 *cfg_buff = NULL;
u8 *tbuff;
char *cfg_name = NULL;
- bool config_needed = false;
+ char *fw_name = NULL;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(ic_id_table); i++) {
+ if ((ic_id_table[i].match_flags & IC_MATCH_FL_LMPSUBV) &&
+ (ic_id_table[i].lmp_subver != lmp_subver))
+ continue;
+ if ((ic_id_table[i].match_flags & IC_MATCH_FL_HCIREV) &&
+ (ic_id_table[i].hci_rev != hci_rev))
+ continue;
- switch (lmp_subver) {
- case RTL_ROM_LMP_8723B:
- cfg_name = "rtl_bt/rtl8723b_config.bin";
- break;
- case RTL_ROM_LMP_8821A:
- cfg_name = "rtl_bt/rtl8821a_config.bin";
- break;
- case RTL_ROM_LMP_8761A:
- cfg_name = "rtl_bt/rtl8761a_config.bin";
- break;
- case RTL_ROM_LMP_8822B:
- cfg_name = "rtl_bt/rtl8822b_config.bin";
- config_needed = true;
- break;
- default:
- BT_ERR("%s: rtl: no config according to lmp_subver %04x",
- hdev->name, lmp_subver);
break;
}
+ if (i >= ARRAY_SIZE(ic_id_table)) {
+ BT_ERR("%s: unknown IC info, lmp subver %04x, hci rev %04x",
+ hdev->name, lmp_subver, hci_rev);
+ return -EINVAL;
+ }
+
+ cfg_name = ic_id_table[i].cfg_name;
+
if (cfg_name) {
cfg_sz = rtl_load_config(hdev, cfg_name, &cfg_buff);
if (cfg_sz < 0) {
cfg_sz = 0;
- if (config_needed)
+ if (ic_id_table[i].config_needed)
BT_ERR("Necessary config file %s not found\n",
cfg_name);
}
} else
cfg_sz = 0;
+ fw_name = ic_id_table[i].fw_name;
bt_dev_info(hdev, "rtl: loading %s", fw_name);
ret = request_firmware(&fw, fw_name, &hdev->dev);
if (ret < 0) {
goto err_req_fw;
}
- ret = rtl8723b_parse_firmware(hdev, lmp_subver, fw, &fw_data);
+ ret = rtlbt_parse_firmware(hdev, lmp_subver, fw, &fw_data);
if (ret < 0)
goto out;
{
struct sk_buff *skb;
struct hci_rp_read_local_version *resp;
- u16 lmp_subver;
+ u16 hci_rev, lmp_subver;
skb = btrtl_read_local_version(hdev);
if (IS_ERR(skb))
resp->hci_ver, resp->hci_rev,
resp->lmp_ver, resp->lmp_subver);
+ hci_rev = le16_to_cpu(resp->hci_rev);
lmp_subver = le16_to_cpu(resp->lmp_subver);
kfree_skb(skb);
case RTL_ROM_LMP_3499:
return btrtl_setup_rtl8723a(hdev);
case RTL_ROM_LMP_8723B:
- return btrtl_setup_rtl8723b(hdev, lmp_subver,
- "rtl_bt/rtl8723b_fw.bin");
case RTL_ROM_LMP_8821A:
- return btrtl_setup_rtl8723b(hdev, lmp_subver,
- "rtl_bt/rtl8821a_fw.bin");
case RTL_ROM_LMP_8761A:
- return btrtl_setup_rtl8723b(hdev, lmp_subver,
- "rtl_bt/rtl8761a_fw.bin");
case RTL_ROM_LMP_8822B:
- return btrtl_setup_rtl8723b(hdev, lmp_subver,
- "rtl_bt/rtl8822b_fw.bin");
+ return btrtl_setup_rtl8723b(hdev, hci_rev, lmp_subver);
default:
bt_dev_info(hdev, "rtl: assuming no firmware upload needed");
return 0;
/* Intel Bluetooth devices */
{ USB_DEVICE(0x8087, 0x0025), .driver_info = BTUSB_INTEL_NEW },
+ { USB_DEVICE(0x8087, 0x0026), .driver_info = BTUSB_INTEL_NEW },
{ USB_DEVICE(0x8087, 0x07da), .driver_info = BTUSB_CSR },
{ USB_DEVICE(0x8087, 0x07dc), .driver_info = BTUSB_INTEL },
{ USB_DEVICE(0x8087, 0x0a2a), .driver_info = BTUSB_INTEL },
{ USB_DEVICE(0x13d3, 0x3461), .driver_info = BTUSB_REALTEK },
{ USB_DEVICE(0x13d3, 0x3462), .driver_info = BTUSB_REALTEK },
+ /* Additional Realtek 8822BE Bluetooth devices */
+ { USB_DEVICE(0x0b05, 0x185c), .driver_info = BTUSB_REALTEK },
+
/* Silicon Wave based devices */
{ USB_DEVICE(0x0c10, 0x0000), .driver_info = BTUSB_SWAVE },
case 0x0c: /* WsP */
case 0x11: /* JfP */
case 0x12: /* ThP */
+ case 0x13: /* HrP */
+ case 0x14: /* QnJ, IcP */
break;
default:
BT_ERR("%s: Unsupported Intel hardware variant (%u)",
break;
case 0x11: /* JfP */
case 0x12: /* ThP */
+ case 0x13: /* HrP */
+ case 0x14: /* QnJ, IcP */
snprintf(fwname, sizeof(fwname), "intel/ibt-%u-%u-%u.sfi",
le16_to_cpu(ver.hw_variant),
le16_to_cpu(ver.hw_revision),
break;
case 0x11: /* JfP */
case 0x12: /* ThP */
+ case 0x13: /* HrP */
+ case 0x14: /* QnJ, IcP */
snprintf(fwname, sizeof(fwname), "intel/ibt-%u-%u-%u.ddc",
le16_to_cpu(ver.hw_variant),
le16_to_cpu(ver.hw_revision),
/* Clear RTS first */
tty->driver->ops->tiocmget(tty);
tty->driver->ops->tiocmset(tty, 0x00, TIOCM_RTS);
- mdelay(20);
+ msleep(20);
/* Set RTS, wake up board */
tty->driver->ops->tiocmget(tty);
tty->driver->ops->tiocmset(tty, TIOCM_RTS, 0x00);
- mdelay(20);
+ msleep(20);
status = tty->driver->ops->tiocmget(tty);
return status;
break;
case ACTION_DELAY: /* sleep */
bt_dev_info(lldev->hu.hdev, "sleep command in scr");
- mdelay(((struct bts_action_delay *)action_ptr)->msec);
+ msleep(((struct bts_action_delay *)action_ptr)->msec);
break;
}
len -= (sizeof(struct bts_action) +
/* Enable secondary noise source on CPUs where it is present. */
/* Nehemiah stepping 8 and higher */
- if ((c->x86_model == 9) && (c->x86_mask > 7))
+ if ((c->x86_model == 9) && (c->x86_stepping > 7))
lo |= VIA_NOISESRC2;
/* Esther */
if (c->x86_vendor == X86_VENDOR_INTEL) {
if ((c->x86 == 15) &&
(c->x86_model == 6) &&
- (c->x86_mask == 8)) {
+ (c->x86_stepping == 8)) {
pr_info("Intel(R) Xeon(R) 7100 Errata AL30, processors may lock up on frequency changes: disabling acpi-cpufreq\n");
return -ENODEV;
}
break;
case 7:
- switch (c->x86_mask) {
+ switch (c->x86_stepping) {
case 0:
longhaul_version = TYPE_LONGHAUL_V1;
cpu_model = CPU_SAMUEL2;
break;
case 1 ... 15:
longhaul_version = TYPE_LONGHAUL_V2;
- if (c->x86_mask < 8) {
+ if (c->x86_stepping < 8) {
cpu_model = CPU_SAMUEL2;
cpuname = "C3 'Samuel 2' [C5B]";
} else {
numscales = 32;
memcpy(mults, nehemiah_mults, sizeof(nehemiah_mults));
memcpy(eblcr, nehemiah_eblcr, sizeof(nehemiah_eblcr));
- switch (c->x86_mask) {
+ switch (c->x86_stepping) {
case 0 ... 1:
cpu_model = CPU_NEHEMIAH;
cpuname = "C3 'Nehemiah A' [C5XLOE]";
#endif
/* Errata workaround */
- cpuid = (c->x86 << 8) | (c->x86_model << 4) | c->x86_mask;
+ cpuid = (c->x86 << 8) | (c->x86_model << 4) | c->x86_stepping;
switch (cpuid) {
case 0x0f07:
case 0x0f0a:
return 0;
}
- if ((c->x86_model == 6) && (c->x86_mask == 0)) {
+ if ((c->x86_model == 6) && (c->x86_stepping == 0)) {
pr_info("K7 660[A0] core detected, enabling errata workarounds\n");
have_a0 = 1;
}
{
__u8 x86; /* CPU family */
__u8 x86_model; /* model */
- __u8 x86_mask; /* stepping */
+ __u8 x86_stepping; /* stepping */
};
enum {
{
if ((c->x86 == x->x86) &&
(c->x86_model == x->x86_model) &&
- (c->x86_mask == x->x86_mask))
+ (c->x86_stepping == x->x86_stepping))
return 1;
return 0;
}
ebx = cpuid_ebx(0x00000001);
ebx &= 0x000000FF;
- pr_debug("ebx value is %x, x86_mask is %x\n", ebx, c->x86_mask);
+ pr_debug("ebx value is %x, x86_stepping is %x\n", ebx, c->x86_stepping);
- switch (c->x86_mask) {
+ switch (c->x86_stepping) {
case 4:
/*
* B-stepping [M-P4-M]
msr_lo, msr_hi);
if ((msr_hi & (1<<18)) &&
(relaxed_check ? 1 : (msr_hi & (3<<24)))) {
- if (c->x86_mask == 0x01) {
+ if (c->x86_stepping == 0x01) {
pr_debug("early PIII version\n");
return SPEEDSTEP_CPU_PIII_C_EARLY;
} else
* without any error (HW optimizations for later
* CAAM eras), then try again.
*/
+ if (ret)
+ break;
+
rdsta_val = rd_reg32(&ctrl->r4tst[0].rdsta) & RDSTA_IFMASK;
if ((status && status != JRSTA_SSRC_JUMP_HALT_CC) ||
- !(rdsta_val & (1 << sh_idx)))
+ !(rdsta_val & (1 << sh_idx))) {
ret = -EAGAIN;
- if (ret)
break;
+ }
+
dev_info(ctrldev, "Instantiated RNG4 SH%d\n", sh_idx);
/* Clear the contents before recreating the descriptor */
memset(desc, 0x00, CAAM_CMD_SZ * 7);
printk(KERN_NOTICE PFX "Using VIA PadLock ACE for AES algorithm.\n");
- if (c->x86 == 6 && c->x86_model == 15 && c->x86_mask == 2) {
+ if (c->x86 == 6 && c->x86_model == 15 && c->x86_stepping == 2) {
ecb_fetch_blocks = MAX_ECB_FETCH_BLOCKS;
cbc_fetch_blocks = MAX_CBC_FETCH_BLOCKS;
printk(KERN_NOTICE PFX "VIA Nano stepping 2 detected: enabling workaround.\n");
uint32_t aes_control;
unsigned long flags;
int err;
+ u8 *iv;
aes_control = SSS_AES_KEY_CHANGE_MODE;
if (mode & FLAGS_AES_DECRYPT)
aes_control |= SSS_AES_MODE_DECRYPT;
- if ((mode & FLAGS_AES_MODE_MASK) == FLAGS_AES_CBC)
+ if ((mode & FLAGS_AES_MODE_MASK) == FLAGS_AES_CBC) {
aes_control |= SSS_AES_CHAIN_MODE_CBC;
- else if ((mode & FLAGS_AES_MODE_MASK) == FLAGS_AES_CTR)
+ iv = req->info;
+ } else if ((mode & FLAGS_AES_MODE_MASK) == FLAGS_AES_CTR) {
aes_control |= SSS_AES_CHAIN_MODE_CTR;
+ iv = req->info;
+ } else {
+ iv = NULL; /* AES_ECB */
+ }
if (dev->ctx->keylen == AES_KEYSIZE_192)
aes_control |= SSS_AES_KEY_SIZE_192;
goto outdata_error;
SSS_AES_WRITE(dev, AES_CONTROL, aes_control);
- s5p_set_aes(dev, dev->ctx->aes_key, req->info, dev->ctx->keylen);
+ s5p_set_aes(dev, dev->ctx->aes_key, iv, dev->ctx->keylen);
s5p_set_dma_indata(dev, dev->sg_src);
s5p_set_dma_outdata(dev, dev->sg_dst);
algt = container_of(alg, struct sun4i_ss_alg_template, alg.rng);
ss = algt->ss;
- spin_lock(&ss->slock);
+ spin_lock_bh(&ss->slock);
writel(mode, ss->base + SS_CTL);
}
writel(0, ss->base + SS_CTL);
- spin_unlock(&ss->slock);
- return dlen;
+ spin_unlock_bh(&ss->slock);
+ return 0;
}
struct talitos_private *priv = dev_get_drvdata(dev);
bool is_sec1 = has_ftr_sec1(priv);
+ if (!src) {
+ to_talitos_ptr(ptr, 0, 0, is_sec1);
+ return 1;
+ }
if (sg_count == 1) {
to_talitos_ptr(ptr, sg_dma_address(src) + offset, len, is_sec1);
return sg_count;
struct amd64_family_type *fam_type = NULL;
pvt->ext_model = boot_cpu_data.x86_model >> 4;
- pvt->stepping = boot_cpu_data.x86_mask;
+ pvt->stepping = boot_cpu_data.x86_stepping;
pvt->model = boot_cpu_data.x86_model;
pvt->fam = boot_cpu_data.x86;
/*
* extcon-axp288.c - X-Power AXP288 PMIC extcon cable detection driver
*
- * Copyright (C) 2016-2017 Hans de Goede <hdegoede@redhat.com>
* Copyright (C) 2015 Intel Corporation
* Author: Ramakrishna Pallala <ramakrishna.pallala@intel.com>
*
struct device *dev;
struct regmap *regmap;
struct regmap_irq_chip_data *regmap_irqc;
- struct delayed_work det_work;
int irq[EXTCON_IRQ_END];
struct extcon_dev *edev;
unsigned int previous_cable;
- bool first_detect_done;
};
/* Power up/down reason string array */
-static char *axp288_pwr_up_down_info[] = {
+static const char * const axp288_pwr_up_down_info[] = {
"Last wake caused by user pressing the power button",
"Last wake caused by a charger insertion",
"Last wake caused by a battery insertion",
*/
static void axp288_extcon_log_rsi(struct axp288_extcon_info *info)
{
- char **rsi;
+ const char * const *rsi;
unsigned int val, i, clear_mask = 0;
int ret;
regmap_write(info->regmap, AXP288_PS_BOOT_REASON_REG, clear_mask);
}
-static void axp288_chrg_detect_complete(struct axp288_extcon_info *info)
-{
- /*
- * We depend on other drivers to do things like mux the data lines,
- * enable/disable vbus based on the id-pin, etc. Sometimes the BIOS has
- * not set these things up correctly resulting in the initial charger
- * cable type detection giving a wrong result and we end up not charging
- * or charging at only 0.5A.
- *
- * So we schedule a second cable type detection after 2 seconds to
- * give the other drivers time to load and do their thing.
- */
- if (!info->first_detect_done) {
- queue_delayed_work(system_wq, &info->det_work,
- msecs_to_jiffies(2000));
- info->first_detect_done = true;
- }
-}
-
static int axp288_handle_chrg_det_event(struct axp288_extcon_info *info)
{
int ret, stat, cfg, pwr_stat;
info->previous_cable = cable;
}
- axp288_chrg_detect_complete(info);
-
return 0;
dev_det_ret:
return IRQ_HANDLED;
}
-static void axp288_extcon_det_work(struct work_struct *work)
+static void axp288_extcon_enable(struct axp288_extcon_info *info)
{
- struct axp288_extcon_info *info =
- container_of(work, struct axp288_extcon_info, det_work.work);
-
regmap_update_bits(info->regmap, AXP288_BC_GLOBAL_REG,
BC_GLOBAL_RUN, 0);
/* Enable the charger detection logic */
info->regmap = axp20x->regmap;
info->regmap_irqc = axp20x->regmap_irqc;
info->previous_cable = EXTCON_NONE;
- INIT_DELAYED_WORK(&info->det_work, axp288_extcon_det_work);
platform_set_drvdata(pdev, info);
}
/* Start charger cable type detection */
- queue_delayed_work(system_wq, &info->det_work, 0);
+ axp288_extcon_enable(info);
return 0;
}
return ret;
}
- /* queue initial processing of id-pin */
+ /* process id-pin so that we start with the right status */
queue_delayed_work(system_wq, &data->work, 0);
+ flush_delayed_work(&data->work);
platform_set_drvdata(pdev, data);
/* HG _PR3 doesn't seem to work on this A+A weston board */
{ 0x1002, 0x6900, 0x1002, 0x0124, AMDGPU_PX_QUIRK_FORCE_ATPX },
{ 0x1002, 0x6900, 0x1028, 0x0812, AMDGPU_PX_QUIRK_FORCE_ATPX },
+ { 0x1002, 0x6900, 0x1028, 0x0813, AMDGPU_PX_QUIRK_FORCE_ATPX },
{ 0, 0, 0, 0, 0 },
};
enum drm_connector_status ret = connector_status_disconnected;
int r;
- r = pm_runtime_get_sync(connector->dev->dev);
- if (r < 0)
- return connector_status_disconnected;
+ if (!drm_kms_helper_is_poll_worker()) {
+ r = pm_runtime_get_sync(connector->dev->dev);
+ if (r < 0)
+ return connector_status_disconnected;
+ }
if (encoder) {
struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
/* check acpi lid status ??? */
amdgpu_connector_update_scratch_regs(connector, ret);
- pm_runtime_mark_last_busy(connector->dev->dev);
- pm_runtime_put_autosuspend(connector->dev->dev);
+
+ if (!drm_kms_helper_is_poll_worker()) {
+ pm_runtime_mark_last_busy(connector->dev->dev);
+ pm_runtime_put_autosuspend(connector->dev->dev);
+ }
+
return ret;
}
enum drm_connector_status ret = connector_status_disconnected;
int r;
- r = pm_runtime_get_sync(connector->dev->dev);
- if (r < 0)
- return connector_status_disconnected;
+ if (!drm_kms_helper_is_poll_worker()) {
+ r = pm_runtime_get_sync(connector->dev->dev);
+ if (r < 0)
+ return connector_status_disconnected;
+ }
encoder = amdgpu_connector_best_single_encoder(connector);
if (!encoder)
amdgpu_connector_update_scratch_regs(connector, ret);
out:
- pm_runtime_mark_last_busy(connector->dev->dev);
- pm_runtime_put_autosuspend(connector->dev->dev);
+ if (!drm_kms_helper_is_poll_worker()) {
+ pm_runtime_mark_last_busy(connector->dev->dev);
+ pm_runtime_put_autosuspend(connector->dev->dev);
+ }
return ret;
}
enum drm_connector_status ret = connector_status_disconnected;
bool dret = false, broken_edid = false;
- r = pm_runtime_get_sync(connector->dev->dev);
- if (r < 0)
- return connector_status_disconnected;
+ if (!drm_kms_helper_is_poll_worker()) {
+ r = pm_runtime_get_sync(connector->dev->dev);
+ if (r < 0)
+ return connector_status_disconnected;
+ }
if (!force && amdgpu_connector_check_hpd_status_unchanged(connector)) {
ret = connector->status;
amdgpu_connector_update_scratch_regs(connector, ret);
exit:
- pm_runtime_mark_last_busy(connector->dev->dev);
- pm_runtime_put_autosuspend(connector->dev->dev);
+ if (!drm_kms_helper_is_poll_worker()) {
+ pm_runtime_mark_last_busy(connector->dev->dev);
+ pm_runtime_put_autosuspend(connector->dev->dev);
+ }
return ret;
}
struct drm_encoder *encoder = amdgpu_connector_best_single_encoder(connector);
int r;
- r = pm_runtime_get_sync(connector->dev->dev);
- if (r < 0)
- return connector_status_disconnected;
+ if (!drm_kms_helper_is_poll_worker()) {
+ r = pm_runtime_get_sync(connector->dev->dev);
+ if (r < 0)
+ return connector_status_disconnected;
+ }
if (!force && amdgpu_connector_check_hpd_status_unchanged(connector)) {
ret = connector->status;
amdgpu_connector_update_scratch_regs(connector, ret);
out:
- pm_runtime_mark_last_busy(connector->dev->dev);
- pm_runtime_put_autosuspend(connector->dev->dev);
+ if (!drm_kms_helper_is_poll_worker()) {
+ pm_runtime_mark_last_busy(connector->dev->dev);
+ pm_runtime_put_autosuspend(connector->dev->dev);
+ }
return ret;
}
{
}
-/*
- * This is called after a mode is programmed. It should reverse anything done
- * by the prepare function
- */
-static void cirrus_crtc_commit(struct drm_crtc *crtc)
-{
-}
-
-/*
- * The core can pass us a set of gamma values to program. We actually only
- * use this for 8-bit mode so can't perform smooth fades on deeper modes,
- * but it's a requirement that we provide the function
- */
-static int cirrus_crtc_gamma_set(struct drm_crtc *crtc, u16 *red, u16 *green,
- u16 *blue, uint32_t size,
- struct drm_modeset_acquire_ctx *ctx)
+static void cirrus_crtc_load_lut(struct drm_crtc *crtc)
{
struct drm_device *dev = crtc->dev;
struct cirrus_device *cdev = dev->dev_private;
int i;
if (!crtc->enabled)
- return 0;
+ return;
r = crtc->gamma_store;
g = r + crtc->gamma_size;
WREG8(PALETTE_DATA, *g++ >> 8);
WREG8(PALETTE_DATA, *b++ >> 8);
}
+}
+
+/*
+ * This is called after a mode is programmed. It should reverse anything done
+ * by the prepare function
+ */
+static void cirrus_crtc_commit(struct drm_crtc *crtc)
+{
+ cirrus_crtc_load_lut(crtc);
+}
+
+/*
+ * The core can pass us a set of gamma values to program. We actually only
+ * use this for 8-bit mode so can't perform smooth fades on deeper modes,
+ * but it's a requirement that we provide the function
+ */
+static int cirrus_crtc_gamma_set(struct drm_crtc *crtc, u16 *red, u16 *green,
+ u16 *blue, uint32_t size,
+ struct drm_modeset_acquire_ctx *ctx)
+{
+ cirrus_crtc_load_lut(crtc);
return 0;
}
new_crtc_state->event->base.completion = &commit->flip_done;
new_crtc_state->event->base.completion_release = release_crtc_commit;
drm_crtc_commit_get(commit);
+
+ commit->abort_completion = true;
}
for_each_oldnew_connector_in_state(state, conn, old_conn_state, new_conn_state, i) {
void __drm_atomic_helper_crtc_destroy_state(struct drm_crtc_state *state)
{
if (state->commit) {
+ /*
+ * In the event that a non-blocking commit returns
+ * -ERESTARTSYS before the commit_tail work is queued, we will
+ * have an extra reference to the commit object. Release it, if
+ * the event has not been consumed by the worker.
+ *
+ * state->event may be freed, so we can't directly look at
+ * state->event->base.completion.
+ */
+ if (state->event && state->commit->abort_completion)
+ drm_crtc_commit_put(state->commit);
+
kfree(state->commit->event);
state->commit->event = NULL;
+
drm_crtc_commit_put(state->commit);
}
/* AEO model 0 reports 8 bpc, but is a 6 bpc panel */
{ "AEO", 0, EDID_QUIRK_FORCE_6BPC },
+ /* CPT panel of Asus UX303LA reports 8 bpc, but is a 6 bpc panel */
+ { "CPT", 0x17df, EDID_QUIRK_FORCE_6BPC },
+
/* Belinea 10 15 55 */
{ "MAX", 1516, EDID_QUIRK_PREFER_LARGE_60 },
{ "MAX", 0x77e, EDID_QUIRK_PREFER_LARGE_60 },
/* HTC Vive VR Headset */
{ "HVR", 0xaa01, EDID_QUIRK_NON_DESKTOP },
+
+ /* Oculus Rift DK1, DK2, and CV1 VR Headsets */
+ { "OVR", 0x0001, EDID_QUIRK_NON_DESKTOP },
+ { "OVR", 0x0003, EDID_QUIRK_NON_DESKTOP },
+ { "OVR", 0x0004, EDID_QUIRK_NON_DESKTOP },
+
+ /* Windows Mixed Reality Headsets */
+ { "ACR", 0x7fce, EDID_QUIRK_NON_DESKTOP },
+ { "HPN", 0x3515, EDID_QUIRK_NON_DESKTOP },
+ { "LEN", 0x0408, EDID_QUIRK_NON_DESKTOP },
+ { "LEN", 0xb800, EDID_QUIRK_NON_DESKTOP },
+ { "FUJ", 0x1970, EDID_QUIRK_NON_DESKTOP },
+ { "DEL", 0x7fce, EDID_QUIRK_NON_DESKTOP },
+ { "SEC", 0x144a, EDID_QUIRK_NON_DESKTOP },
+ { "AUS", 0xc102, EDID_QUIRK_NON_DESKTOP },
+
+ /* Sony PlayStation VR Headset */
+ { "SNY", 0x0704, EDID_QUIRK_NON_DESKTOP },
};
/*
if (!mm->color_adjust)
return NULL;
- hole = list_first_entry(&mm->hole_stack, typeof(*hole), hole_stack);
- hole_start = __drm_mm_hole_node_start(hole);
- hole_end = hole_start + hole->hole_size;
+ /*
+ * The hole found during scanning should ideally be the first element
+ * in the hole_stack list, but due to side-effects in the driver it
+ * may not be.
+ */
+ list_for_each_entry(hole, &mm->hole_stack, hole_stack) {
+ hole_start = __drm_mm_hole_node_start(hole);
+ hole_end = hole_start + hole->hole_size;
+
+ if (hole_start <= scan->hit_start &&
+ hole_end >= scan->hit_end)
+ break;
+ }
+
+ /* We should only be called after we found the hole previously */
+ DRM_MM_BUG_ON(&hole->hole_stack == &mm->hole_stack);
+ if (unlikely(&hole->hole_stack == &mm->hole_stack))
+ return NULL;
DRM_MM_BUG_ON(hole_start > scan->hit_start);
DRM_MM_BUG_ON(hole_end < scan->hit_end);
schedule_delayed_work(delayed_work, DRM_OUTPUT_POLL_PERIOD);
}
+/**
+ * drm_kms_helper_is_poll_worker - is %current task an output poll worker?
+ *
+ * Determine if %current task is an output poll worker. This can be used
+ * to select distinct code paths for output polling versus other contexts.
+ *
+ * One use case is to avoid a deadlock between the output poll worker and
+ * the autosuspend worker wherein the latter waits for polling to finish
+ * upon calling drm_kms_helper_poll_disable(), while the former waits for
+ * runtime suspend to finish upon calling pm_runtime_get_sync() in a
+ * connector ->detect hook.
+ */
+bool drm_kms_helper_is_poll_worker(void)
+{
+ struct work_struct *work = current_work();
+
+ return work && work->func == output_poll_execute;
+}
+EXPORT_SYMBOL(drm_kms_helper_is_poll_worker);
+
/**
* drm_kms_helper_poll_disable - disable output polling
* @dev: drm_device
node = kcalloc(G2D_CMDLIST_NUM, sizeof(*node), GFP_KERNEL);
if (!node) {
- dev_err(dev, "failed to allocate memory\n");
ret = -ENOMEM;
goto err;
}
struct drm_device *drm_dev = g2d->subdrv.drm_dev;
struct g2d_runqueue_node *runqueue_node = g2d->runqueue_node;
struct drm_exynos_pending_g2d_event *e;
- struct timeval now;
+ struct timespec64 now;
if (list_empty(&runqueue_node->event_list))
return;
e = list_first_entry(&runqueue_node->event_list,
struct drm_exynos_pending_g2d_event, base.link);
- do_gettimeofday(&now);
+ ktime_get_ts64(&now);
e->event.tv_sec = now.tv_sec;
- e->event.tv_usec = now.tv_usec;
+ e->event.tv_usec = now.tv_nsec / NSEC_PER_USEC;
e->event.cmdlist_no = cmdlist_no;
drm_send_event(drm_dev, &e->base);
return -EFAULT;
runqueue_node = kmem_cache_alloc(g2d->runqueue_slab, GFP_KERNEL);
- if (!runqueue_node) {
- dev_err(dev, "failed to allocate memory\n");
+ if (!runqueue_node)
return -ENOMEM;
- }
+
run_cmdlist = &runqueue_node->run_cmdlist;
event_list = &runqueue_node->event_list;
INIT_LIST_HEAD(run_cmdlist);
+++ /dev/null
-/*
- * Copyright (c) 2012 Samsung Electronics Co., Ltd.
- *
- * Authors:
- * YoungJun Cho <yj44.cho@samsung.com>
- * Eunchul Kim <chulspro.kim@samsung.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
- */
-
-#ifndef _EXYNOS_DRM_ROTATOR_H_
-#define _EXYNOS_DRM_ROTATOR_H_
-
-/* TODO */
-
-#endif
/* Configuration I2S input ports. Configure I2S_PIN_SEL_0~4 */
hdmi_reg_writeb(hdata, HDMI_I2S_PIN_SEL_0, HDMI_I2S_SEL_SCLK(5)
| HDMI_I2S_SEL_LRCK(6));
- hdmi_reg_writeb(hdata, HDMI_I2S_PIN_SEL_1, HDMI_I2S_SEL_SDATA1(1)
- | HDMI_I2S_SEL_SDATA2(4));
+
+ hdmi_reg_writeb(hdata, HDMI_I2S_PIN_SEL_1, HDMI_I2S_SEL_SDATA1(3)
+ | HDMI_I2S_SEL_SDATA0(4));
+
hdmi_reg_writeb(hdata, HDMI_I2S_PIN_SEL_2, HDMI_I2S_SEL_SDATA3(1)
| HDMI_I2S_SEL_SDATA2(2));
+
hdmi_reg_writeb(hdata, HDMI_I2S_PIN_SEL_3, HDMI_I2S_SEL_DSD(0));
/* I2S_CON_1 & 2 */
#define EXYNOS_CIIMGEFF_FIN_EMBOSSING (4 << 26)
#define EXYNOS_CIIMGEFF_FIN_SILHOUETTE (5 << 26)
#define EXYNOS_CIIMGEFF_FIN_MASK (7 << 26)
-#define EXYNOS_CIIMGEFF_PAT_CBCR_MASK ((0xff < 13) | (0xff < 0))
+#define EXYNOS_CIIMGEFF_PAT_CBCR_MASK ((0xff << 13) | (0xff << 0))
/* Real input DMA size register */
#define EXYNOS_CIREAL_ISIZE_AUTOLOAD_ENABLE (1 << 31)
/* I2S_PIN_SEL_1 */
#define HDMI_I2S_SEL_SDATA1(x) (((x) & 0x7) << 4)
-#define HDMI_I2S_SEL_SDATA2(x) ((x) & 0x7)
+#define HDMI_I2S_SEL_SDATA0(x) ((x) & 0x7)
/* I2S_PIN_SEL_2 */
#define HDMI_I2S_SEL_SDATA3(x) (((x) & 0x7) << 4)
return ret == 0 ? count : ret;
}
+static bool gtt_entry(struct mdev_device *mdev, loff_t *ppos)
+{
+ struct intel_vgpu *vgpu = mdev_get_drvdata(mdev);
+ unsigned int index = VFIO_PCI_OFFSET_TO_INDEX(*ppos);
+ struct intel_gvt *gvt = vgpu->gvt;
+ int offset;
+
+ /* Only allow MMIO GGTT entry access */
+ if (index != PCI_BASE_ADDRESS_0)
+ return false;
+
+ offset = (u64)(*ppos & VFIO_PCI_OFFSET_MASK) -
+ intel_vgpu_get_bar_gpa(vgpu, PCI_BASE_ADDRESS_0);
+
+ return (offset >= gvt->device_info.gtt_start_offset &&
+ offset < gvt->device_info.gtt_start_offset + gvt_ggtt_sz(gvt)) ?
+ true : false;
+}
+
static ssize_t intel_vgpu_read(struct mdev_device *mdev, char __user *buf,
size_t count, loff_t *ppos)
{
while (count) {
size_t filled;
- if (count >= 4 && !(*ppos % 4)) {
+ /* Only support GGTT entry 8 bytes read */
+ if (count >= 8 && !(*ppos % 8) &&
+ gtt_entry(mdev, ppos)) {
+ u64 val;
+
+ ret = intel_vgpu_rw(mdev, (char *)&val, sizeof(val),
+ ppos, false);
+ if (ret <= 0)
+ goto read_err;
+
+ if (copy_to_user(buf, &val, sizeof(val)))
+ goto read_err;
+
+ filled = 8;
+ } else if (count >= 4 && !(*ppos % 4)) {
u32 val;
ret = intel_vgpu_rw(mdev, (char *)&val, sizeof(val),
while (count) {
size_t filled;
- if (count >= 4 && !(*ppos % 4)) {
+ /* Only support GGTT entry 8 bytes write */
+ if (count >= 8 && !(*ppos % 8) &&
+ gtt_entry(mdev, ppos)) {
+ u64 val;
+
+ if (copy_from_user(&val, buf, sizeof(val)))
+ goto write_err;
+
+ ret = intel_vgpu_rw(mdev, (char *)&val, sizeof(val),
+ ppos, true);
+ if (ret <= 0)
+ goto write_err;
+
+ filled = 8;
+ } else if (count >= 4 && !(*ppos % 4)) {
u32 val;
if (copy_from_user(&val, buf, sizeof(val)))
{RCS, HALF_SLICE_CHICKEN3, 0xffff, true}, /* 0xe184 */
{RCS, GEN9_HALF_SLICE_CHICKEN5, 0xffff, true}, /* 0xe188 */
{RCS, GEN9_HALF_SLICE_CHICKEN7, 0xffff, true}, /* 0xe194 */
+ {RCS, GEN8_ROW_CHICKEN, 0xffff, true}, /* 0xe4f0 */
{RCS, TRVATTL3PTRDW(0), 0, false}, /* 0x4de0 */
{RCS, TRVATTL3PTRDW(1), 0, false}, /* 0x4de4 */
{RCS, TRNULLDETCT, 0, false}, /* 0x4de8 */
TP_PROTO(int old_id, int new_id, char *action, unsigned int reg,
unsigned int old_val, unsigned int new_val),
- TP_ARGS(old_id, new_id, action, reg, new_val, old_val),
+ TP_ARGS(old_id, new_id, action, reg, old_val, new_val),
TP_STRUCT__entry(
__field(int, old_id)
intel_modeset_cleanup(dev);
- /*
- * free the memory space allocated for the child device
- * config parsed from VBT
- */
- if (dev_priv->vbt.child_dev && dev_priv->vbt.child_dev_num) {
- kfree(dev_priv->vbt.child_dev);
- dev_priv->vbt.child_dev = NULL;
- dev_priv->vbt.child_dev_num = 0;
- }
- kfree(dev_priv->vbt.sdvo_lvds_vbt_mode);
- dev_priv->vbt.sdvo_lvds_vbt_mode = NULL;
- kfree(dev_priv->vbt.lfp_lvds_vbt_mode);
- dev_priv->vbt.lfp_lvds_vbt_mode = NULL;
+ intel_bios_cleanup(dev_priv);
vga_switcheroo_unregister_client(pdev);
vga_client_register(pdev, NULL, NULL, NULL);
u32 size;
u8 *data;
const u8 *sequence[MIPI_SEQ_MAX];
+ u8 *deassert_seq; /* Used by fixup_mipi_sequences() */
} dsi;
int crt_ddc_pin;
/* intel_bios.c */
void intel_bios_init(struct drm_i915_private *dev_priv);
+void intel_bios_cleanup(struct drm_i915_private *dev_priv);
bool intel_bios_is_valid_vbt(const void *buf, size_t size);
bool intel_bios_is_tv_present(struct drm_i915_private *dev_priv);
bool intel_bios_is_lvds_present(struct drm_i915_private *dev_priv, u8 *i2c_pin);
case I915_CONTEXT_PARAM_PRIORITY:
{
- int priority = args->value;
+ s64 priority = args->value;
if (args->size)
ret = -EINVAL;
void
i915_perf_load_test_config_cflgt3(struct drm_i915_private *dev_priv)
{
- strncpy(dev_priv->perf.oa.test_config.uuid,
+ strlcpy(dev_priv->perf.oa.test_config.uuid,
"577e8e2c-3fa0-4875-8743-3538d585e3b0",
- UUID_STRING_LEN);
+ sizeof(dev_priv->perf.oa.test_config.uuid));
dev_priv->perf.oa.test_config.id = 1;
dev_priv->perf.oa.test_config.mux_regs = mux_config_test_oa;
void
i915_perf_load_test_config_cnl(struct drm_i915_private *dev_priv)
{
- strncpy(dev_priv->perf.oa.test_config.uuid,
+ strlcpy(dev_priv->perf.oa.test_config.uuid,
"db41edd4-d8e7-4730-ad11-b9a2d6833503",
- UUID_STRING_LEN);
+ sizeof(dev_priv->perf.oa.test_config.uuid));
dev_priv->perf.oa.test_config.id = 1;
dev_priv->perf.oa.test_config.mux_regs = mux_config_test_oa;
return sum;
}
-static void i915_pmu_event_destroy(struct perf_event *event)
+static void engine_event_destroy(struct perf_event *event)
{
- WARN_ON(event->parent);
+ struct drm_i915_private *i915 =
+ container_of(event->pmu, typeof(*i915), pmu.base);
+ struct intel_engine_cs *engine;
+
+ engine = intel_engine_lookup_user(i915,
+ engine_event_class(event),
+ engine_event_instance(event));
+ if (WARN_ON_ONCE(!engine))
+ return;
+
+ if (engine_event_sample(event) == I915_SAMPLE_BUSY &&
+ intel_engine_supports_stats(engine))
+ intel_disable_engine_stats(engine);
}
-static int engine_event_init(struct perf_event *event)
+static void i915_pmu_event_destroy(struct perf_event *event)
{
- struct drm_i915_private *i915 =
- container_of(event->pmu, typeof(*i915), pmu.base);
+ WARN_ON(event->parent);
- if (!intel_engine_lookup_user(i915, engine_event_class(event),
- engine_event_instance(event)))
- return -ENODEV;
+ if (is_engine_event(event))
+ engine_event_destroy(event);
+}
- switch (engine_event_sample(event)) {
+static int
+engine_event_status(struct intel_engine_cs *engine,
+ enum drm_i915_pmu_engine_sample sample)
+{
+ switch (sample) {
case I915_SAMPLE_BUSY:
case I915_SAMPLE_WAIT:
break;
case I915_SAMPLE_SEMA:
- if (INTEL_GEN(i915) < 6)
+ if (INTEL_GEN(engine->i915) < 6)
return -ENODEV;
break;
default:
return 0;
}
+static int engine_event_init(struct perf_event *event)
+{
+ struct drm_i915_private *i915 =
+ container_of(event->pmu, typeof(*i915), pmu.base);
+ struct intel_engine_cs *engine;
+ u8 sample;
+ int ret;
+
+ engine = intel_engine_lookup_user(i915, engine_event_class(event),
+ engine_event_instance(event));
+ if (!engine)
+ return -ENODEV;
+
+ sample = engine_event_sample(event);
+ ret = engine_event_status(engine, sample);
+ if (ret)
+ return ret;
+
+ if (sample == I915_SAMPLE_BUSY && intel_engine_supports_stats(engine))
+ ret = intel_enable_engine_stats(engine);
+
+ return ret;
+}
+
static int i915_pmu_event_init(struct perf_event *event)
{
struct drm_i915_private *i915 =
return 0;
}
-static u64 __i915_pmu_event_read(struct perf_event *event)
+static u64 __get_rc6(struct drm_i915_private *i915)
+{
+ u64 val;
+
+ val = intel_rc6_residency_ns(i915,
+ IS_VALLEYVIEW(i915) ?
+ VLV_GT_RENDER_RC6 :
+ GEN6_GT_GFX_RC6);
+
+ if (HAS_RC6p(i915))
+ val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6p);
+
+ if (HAS_RC6pp(i915))
+ val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6pp);
+
+ return val;
+}
+
+static u64 get_rc6(struct drm_i915_private *i915, bool locked)
+{
+#if IS_ENABLED(CONFIG_PM)
+ unsigned long flags;
+ u64 val;
+
+ if (intel_runtime_pm_get_if_in_use(i915)) {
+ val = __get_rc6(i915);
+ intel_runtime_pm_put(i915);
+
+ /*
+ * If we are coming back from being runtime suspended we must
+ * be careful not to report a larger value than returned
+ * previously.
+ */
+
+ if (!locked)
+ spin_lock_irqsave(&i915->pmu.lock, flags);
+
+ if (val >= i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur) {
+ i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur = 0;
+ i915->pmu.sample[__I915_SAMPLE_RC6].cur = val;
+ } else {
+ val = i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur;
+ }
+
+ if (!locked)
+ spin_unlock_irqrestore(&i915->pmu.lock, flags);
+ } else {
+ struct pci_dev *pdev = i915->drm.pdev;
+ struct device *kdev = &pdev->dev;
+ unsigned long flags2;
+
+ /*
+ * We are runtime suspended.
+ *
+ * Report the delta from when the device was suspended to now,
+ * on top of the last known real value, as the approximated RC6
+ * counter value.
+ */
+ if (!locked)
+ spin_lock_irqsave(&i915->pmu.lock, flags);
+
+ spin_lock_irqsave(&kdev->power.lock, flags2);
+
+ if (!i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur)
+ i915->pmu.suspended_jiffies_last =
+ kdev->power.suspended_jiffies;
+
+ val = kdev->power.suspended_jiffies -
+ i915->pmu.suspended_jiffies_last;
+ val += jiffies - kdev->power.accounting_timestamp;
+
+ spin_unlock_irqrestore(&kdev->power.lock, flags2);
+
+ val = jiffies_to_nsecs(val);
+ val += i915->pmu.sample[__I915_SAMPLE_RC6].cur;
+ i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur = val;
+
+ if (!locked)
+ spin_unlock_irqrestore(&i915->pmu.lock, flags);
+ }
+
+ return val;
+#else
+ return __get_rc6(i915);
+#endif
+}
+
+static u64 __i915_pmu_event_read(struct perf_event *event, bool locked)
{
struct drm_i915_private *i915 =
container_of(event->pmu, typeof(*i915), pmu.base);
if (WARN_ON_ONCE(!engine)) {
/* Do nothing */
} else if (sample == I915_SAMPLE_BUSY &&
- engine->pmu.busy_stats) {
+ intel_engine_supports_stats(engine)) {
val = ktime_to_ns(intel_engine_get_busy_time(engine));
} else {
val = engine->pmu.sample[sample].cur;
val = count_interrupts(i915);
break;
case I915_PMU_RC6_RESIDENCY:
- intel_runtime_pm_get(i915);
- val = intel_rc6_residency_ns(i915,
- IS_VALLEYVIEW(i915) ?
- VLV_GT_RENDER_RC6 :
- GEN6_GT_GFX_RC6);
- if (HAS_RC6p(i915))
- val += intel_rc6_residency_ns(i915,
- GEN6_GT_GFX_RC6p);
- if (HAS_RC6pp(i915))
- val += intel_rc6_residency_ns(i915,
- GEN6_GT_GFX_RC6pp);
- intel_runtime_pm_put(i915);
+ val = get_rc6(i915, locked);
break;
}
}
again:
prev = local64_read(&hwc->prev_count);
- new = __i915_pmu_event_read(event);
+ new = __i915_pmu_event_read(event, false);
if (local64_cmpxchg(&hwc->prev_count, prev, new) != prev)
goto again;
local64_add(new - prev, &event->count);
}
-static bool engine_needs_busy_stats(struct intel_engine_cs *engine)
-{
- return intel_engine_supports_stats(engine) &&
- (engine->pmu.enable & BIT(I915_SAMPLE_BUSY));
-}
-
static void i915_pmu_enable(struct perf_event *event)
{
struct drm_i915_private *i915 =
GEM_BUG_ON(sample >= I915_PMU_SAMPLE_BITS);
GEM_BUG_ON(engine->pmu.enable_count[sample] == ~0);
- if (engine->pmu.enable_count[sample]++ == 0) {
- /*
- * Enable engine busy stats tracking if needed or
- * alternatively cancel the scheduled disable.
- *
- * If the delayed disable was pending, cancel it and
- * in this case do not enable since it already is.
- */
- if (engine_needs_busy_stats(engine) &&
- !engine->pmu.busy_stats) {
- engine->pmu.busy_stats = true;
- if (!cancel_delayed_work(&engine->pmu.disable_busy_stats))
- intel_enable_engine_stats(engine);
- }
- }
+ engine->pmu.enable_count[sample]++;
}
/*
* for all listeners. Even when the event was already enabled and has
* an existing non-zero value.
*/
- local64_set(&event->hw.prev_count, __i915_pmu_event_read(event));
+ local64_set(&event->hw.prev_count, __i915_pmu_event_read(event, true));
spin_unlock_irqrestore(&i915->pmu.lock, flags);
}
-static void __disable_busy_stats(struct work_struct *work)
-{
- struct intel_engine_cs *engine =
- container_of(work, typeof(*engine), pmu.disable_busy_stats.work);
-
- intel_disable_engine_stats(engine);
-}
-
static void i915_pmu_disable(struct perf_event *event)
{
struct drm_i915_private *i915 =
* Decrement the reference count and clear the enabled
* bitmask when the last listener on an event goes away.
*/
- if (--engine->pmu.enable_count[sample] == 0) {
+ if (--engine->pmu.enable_count[sample] == 0)
engine->pmu.enable &= ~BIT(sample);
- if (!engine_needs_busy_stats(engine) &&
- engine->pmu.busy_stats) {
- engine->pmu.busy_stats = false;
- /*
- * We request a delayed disable to handle the
- * rapid on/off cycles on events, which can
- * happen when tools like perf stat start, in a
- * nicer way.
- *
- * In addition, this also helps with busy stats
- * accuracy with background CPU offline/online
- * migration events.
- */
- queue_delayed_work(system_wq,
- &engine->pmu.disable_busy_stats,
- round_jiffies_up_relative(HZ));
- }
- }
}
GEM_BUG_ON(bit >= I915_PMU_MASK_BITS);
void i915_pmu_register(struct drm_i915_private *i915)
{
- struct intel_engine_cs *engine;
- enum intel_engine_id id;
int ret;
if (INTEL_GEN(i915) <= 2) {
hrtimer_init(&i915->pmu.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
i915->pmu.timer.function = i915_sample;
- for_each_engine(engine, i915, id)
- INIT_DELAYED_WORK(&engine->pmu.disable_busy_stats,
- __disable_busy_stats);
-
ret = perf_pmu_register(&i915->pmu.base, "i915", -1);
if (ret)
goto err;
void i915_pmu_unregister(struct drm_i915_private *i915)
{
- struct intel_engine_cs *engine;
- enum intel_engine_id id;
-
if (!i915->pmu.base.event_init)
return;
hrtimer_cancel(&i915->pmu.timer);
- for_each_engine(engine, i915, id) {
- GEM_BUG_ON(engine->pmu.busy_stats);
- flush_delayed_work(&engine->pmu.disable_busy_stats);
- }
-
i915_pmu_unregister_cpuhp_state(i915);
perf_pmu_unregister(&i915->pmu.base);
enum {
__I915_SAMPLE_FREQ_ACT = 0,
__I915_SAMPLE_FREQ_REQ,
+ __I915_SAMPLE_RC6,
+ __I915_SAMPLE_RC6_ESTIMATED,
__I915_NUM_PMU_SAMPLERS
};
* struct intel_engine_cs.
*/
struct i915_pmu_sample sample[__I915_NUM_PMU_SAMPLERS];
+ /**
+ * @suspended_jiffies_last: Cached suspend time from PM core.
+ */
+ unsigned long suspended_jiffies_last;
};
#ifdef CONFIG_PERF_EVENTS
return 0;
}
+/*
+ * Get len of pre-fixed deassert fragment from a v1 init OTP sequence,
+ * skip all delay + gpio operands and stop at the first DSI packet op.
+ */
+static int get_init_otp_deassert_fragment_len(struct drm_i915_private *dev_priv)
+{
+ const u8 *data = dev_priv->vbt.dsi.sequence[MIPI_SEQ_INIT_OTP];
+ int index, len;
+
+ if (WARN_ON(!data || dev_priv->vbt.dsi.seq_version != 1))
+ return 0;
+
+ /* index = 1 to skip sequence byte */
+ for (index = 1; data[index] != MIPI_SEQ_ELEM_END; index += len) {
+ switch (data[index]) {
+ case MIPI_SEQ_ELEM_SEND_PKT:
+ return index == 1 ? 0 : index;
+ case MIPI_SEQ_ELEM_DELAY:
+ len = 5; /* 1 byte for operand + uint32 */
+ break;
+ case MIPI_SEQ_ELEM_GPIO:
+ len = 3; /* 1 byte for op, 1 for gpio_nr, 1 for value */
+ break;
+ default:
+ return 0;
+ }
+ }
+
+ return 0;
+}
+
+/*
+ * Some v1 VBT MIPI sequences do the deassert in the init OTP sequence.
+ * The deassert must be done before calling intel_dsi_device_ready, so for
+ * these devices we split the init OTP sequence into a deassert sequence and
+ * the actual init OTP part.
+ */
+static void fixup_mipi_sequences(struct drm_i915_private *dev_priv)
+{
+ u8 *init_otp;
+ int len;
+
+ /* Limit this to VLV for now. */
+ if (!IS_VALLEYVIEW(dev_priv))
+ return;
+
+ /* Limit this to v1 vid-mode sequences */
+ if (dev_priv->vbt.dsi.config->is_cmd_mode ||
+ dev_priv->vbt.dsi.seq_version != 1)
+ return;
+
+ /* Only do this if there are otp and assert seqs and no deassert seq */
+ if (!dev_priv->vbt.dsi.sequence[MIPI_SEQ_INIT_OTP] ||
+ !dev_priv->vbt.dsi.sequence[MIPI_SEQ_ASSERT_RESET] ||
+ dev_priv->vbt.dsi.sequence[MIPI_SEQ_DEASSERT_RESET])
+ return;
+
+ /* The deassert-sequence ends at the first DSI packet */
+ len = get_init_otp_deassert_fragment_len(dev_priv);
+ if (!len)
+ return;
+
+ DRM_DEBUG_KMS("Using init OTP fragment to deassert reset\n");
+
+ /* Copy the fragment, update seq byte and terminate it */
+ init_otp = (u8 *)dev_priv->vbt.dsi.sequence[MIPI_SEQ_INIT_OTP];
+ dev_priv->vbt.dsi.deassert_seq = kmemdup(init_otp, len + 1, GFP_KERNEL);
+ if (!dev_priv->vbt.dsi.deassert_seq)
+ return;
+ dev_priv->vbt.dsi.deassert_seq[0] = MIPI_SEQ_DEASSERT_RESET;
+ dev_priv->vbt.dsi.deassert_seq[len] = MIPI_SEQ_ELEM_END;
+ /* Use the copy for deassert */
+ dev_priv->vbt.dsi.sequence[MIPI_SEQ_DEASSERT_RESET] =
+ dev_priv->vbt.dsi.deassert_seq;
+ /* Replace the last byte of the fragment with init OTP seq byte */
+ init_otp[len - 1] = MIPI_SEQ_INIT_OTP;
+ /* And make MIPI_MIPI_SEQ_INIT_OTP point to it */
+ dev_priv->vbt.dsi.sequence[MIPI_SEQ_INIT_OTP] = init_otp + len - 1;
+}
+
static void
parse_mipi_sequence(struct drm_i915_private *dev_priv,
const struct bdb_header *bdb)
dev_priv->vbt.dsi.size = seq_size;
dev_priv->vbt.dsi.seq_version = sequence->version;
+ fixup_mipi_sequences(dev_priv);
+
DRM_DEBUG_DRIVER("MIPI related VBT parsing complete\n");
return;
pci_unmap_rom(pdev, bios);
}
+/**
+ * intel_bios_cleanup - Free any resources allocated by intel_bios_init()
+ * @dev_priv: i915 device instance
+ */
+void intel_bios_cleanup(struct drm_i915_private *dev_priv)
+{
+ kfree(dev_priv->vbt.child_dev);
+ dev_priv->vbt.child_dev = NULL;
+ dev_priv->vbt.child_dev_num = 0;
+ kfree(dev_priv->vbt.sdvo_lvds_vbt_mode);
+ dev_priv->vbt.sdvo_lvds_vbt_mode = NULL;
+ kfree(dev_priv->vbt.lfp_lvds_vbt_mode);
+ dev_priv->vbt.lfp_lvds_vbt_mode = NULL;
+ kfree(dev_priv->vbt.dsi.data);
+ dev_priv->vbt.dsi.data = NULL;
+ kfree(dev_priv->vbt.dsi.pps);
+ dev_priv->vbt.dsi.pps = NULL;
+ kfree(dev_priv->vbt.dsi.config);
+ dev_priv->vbt.dsi.config = NULL;
+ kfree(dev_priv->vbt.dsi.deassert_seq);
+ dev_priv->vbt.dsi.deassert_seq = NULL;
+}
+
/**
* intel_bios_is_tv_present - is integrated TV present in VBT
* @dev_priv: i915 device instance
spin_unlock_irq(&b->rb_lock);
}
-static bool signal_valid(const struct drm_i915_gem_request *request)
-{
- return intel_wait_check_request(&request->signaling.wait, request);
-}
-
static bool signal_complete(const struct drm_i915_gem_request *request)
{
if (!request)
return false;
- /* If another process served as the bottom-half it may have already
- * signalled that this wait is already completed.
- */
- if (intel_wait_complete(&request->signaling.wait))
- return signal_valid(request);
-
- /* Carefully check if the request is complete, giving time for the
+ /*
+ * Carefully check if the request is complete, giving time for the
* seqno to be visible or if the GPU hung.
*/
- if (__i915_request_irq_complete(request))
- return true;
-
- return false;
+ return __i915_request_irq_complete(request);
}
static struct drm_i915_gem_request *to_signaler(struct rb_node *rb)
request = i915_gem_request_get_rcu(request);
rcu_read_unlock();
if (signal_complete(request)) {
- local_bh_disable();
- dma_fence_signal(&request->fence);
- local_bh_enable(); /* kick start the tasklets */
+ if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
+ &request->fence.flags)) {
+ local_bh_disable();
+ dma_fence_signal(&request->fence);
+ GEM_BUG_ON(!i915_gem_request_completed(request));
+ local_bh_enable(); /* kick start the tasklets */
+ }
spin_lock_irq(&b->rb_lock);
if (crtc_state->has_audio && INTEL_GEN(dev_priv) >= 9)
min_cdclk = max(2 * 96000, min_cdclk);
+ /*
+ * On Valleyview some DSI panels lose (v|h)sync when the clock is lower
+ * than 320000KHz.
+ */
+ if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_DSI) &&
+ IS_VALLEYVIEW(dev_priv))
+ min_cdclk = max(320000, min_cdclk);
+
if (min_cdclk > dev_priv->max_cdclk_freq) {
DRM_DEBUG_KMS("required cdclk (%d kHz) exceeds max (%d kHz)\n",
min_cdclk, dev_priv->max_cdclk_freq);
struct drm_i915_private *dev_priv = engine->i915;
bool idle = true;
- intel_runtime_pm_get(dev_priv);
+ /* If the whole device is asleep, the engine must be idle */
+ if (!intel_runtime_pm_get_if_in_use(dev_priv))
+ return true;
/* First check that no commands are left in the ring */
if ((I915_READ_HEAD(engine) & HEAD_ADDR) !=
*/
int intel_enable_engine_stats(struct intel_engine_cs *engine)
{
+ struct intel_engine_execlists *execlists = &engine->execlists;
unsigned long flags;
+ int err = 0;
if (!intel_engine_supports_stats(engine))
return -ENODEV;
+ tasklet_disable(&execlists->tasklet);
spin_lock_irqsave(&engine->stats.lock, flags);
- if (engine->stats.enabled == ~0)
- goto busy;
+
+ if (unlikely(engine->stats.enabled == ~0)) {
+ err = -EBUSY;
+ goto unlock;
+ }
+
if (engine->stats.enabled++ == 0) {
- struct intel_engine_execlists *execlists = &engine->execlists;
const struct execlist_port *port = execlists->port;
unsigned int num_ports = execlists_num_ports(execlists);
if (engine->stats.active)
engine->stats.start = engine->stats.enabled_at;
}
- spin_unlock_irqrestore(&engine->stats.lock, flags);
-
- return 0;
-busy:
+unlock:
spin_unlock_irqrestore(&engine->stats.lock, flags);
+ tasklet_enable(&execlists->tasklet);
- return -EBUSY;
+ return err;
}
static ktime_t __intel_engine_get_busy_time(struct intel_engine_cs *engine)
*/
#define I915_ENGINE_SAMPLE_MAX (I915_SAMPLE_SEMA + 1)
struct i915_pmu_sample sample[I915_ENGINE_SAMPLE_MAX];
- /**
- * @busy_stats: Has enablement of engine stats tracking been
- * requested.
- */
- bool busy_stats;
- /**
- * @disable_busy_stats: Work item for busy stats disabling.
- *
- * Same as with @enable_busy_stats action, with the difference
- * that we delay it in case there are rapid enable-disable
- * actions, which can happen during tool startup (like perf
- * stat).
- */
- struct delayed_work disable_busy_stats;
} pmu;
/*
#include "meson_venc.h"
#include "meson_vpp.h"
#include "meson_viu.h"
+#include "meson_canvas.h"
#include "meson_registers.h"
/* CRTC definition */
} else
meson_vpp_disable_interlace_vscaler_osd1(priv);
+ meson_canvas_setup(priv, MESON_CANVAS_ID_OSD1,
+ priv->viu.osd1_addr, priv->viu.osd1_stride,
+ priv->viu.osd1_height, MESON_CANVAS_WRAP_NONE,
+ MESON_CANVAS_BLKMODE_LINEAR);
+
/* Enable OSD1 */
writel_bits_relaxed(VPP_OSD1_POSTBLEND, VPP_OSD1_POSTBLEND,
priv->io_base + _REG(VPP_MISC));
bool osd1_commit;
uint32_t osd1_ctrl_stat;
uint32_t osd1_blk0_cfg[5];
+ uint32_t osd1_addr;
+ uint32_t osd1_stride;
+ uint32_t osd1_height;
} viu;
struct {
/* Update Canvas with buffer address */
gem = drm_fb_cma_get_gem_obj(fb, 0);
- meson_canvas_setup(priv, MESON_CANVAS_ID_OSD1,
- gem->paddr, fb->pitches[0],
- fb->height, MESON_CANVAS_WRAP_NONE,
- MESON_CANVAS_BLKMODE_LINEAR);
+ priv->viu.osd1_addr = gem->paddr;
+ priv->viu.osd1_stride = fb->pitches[0];
+ priv->viu.osd1_height = fb->height;
spin_unlock_irqrestore(&priv->drm->event_lock, flags);
}
nv_connector->edid = NULL;
}
- ret = pm_runtime_get_sync(connector->dev->dev);
- if (ret < 0 && ret != -EACCES)
- return conn_status;
+ /* Outputs are only polled while runtime active, so acquiring a
+ * runtime PM ref here is unnecessary (and would deadlock upon
+ * runtime suspend because it waits for polling to finish).
+ */
+ if (!drm_kms_helper_is_poll_worker()) {
+ ret = pm_runtime_get_sync(connector->dev->dev);
+ if (ret < 0 && ret != -EACCES)
+ return conn_status;
+ }
nv_encoder = nouveau_connector_ddc_detect(connector);
if (nv_encoder && (i2c = nv_encoder->i2c) != NULL) {
out:
- pm_runtime_mark_last_busy(connector->dev->dev);
- pm_runtime_put_autosuspend(connector->dev->dev);
+ if (!drm_kms_helper_is_poll_worker()) {
+ pm_runtime_mark_last_busy(connector->dev->dev);
+ pm_runtime_put_autosuspend(connector->dev->dev);
+ }
return conn_status;
}
void
nvkm_therm_clkgate_enable(struct nvkm_therm *therm)
{
- if (!therm->func->clkgate_enable || !therm->clkgating_enabled)
+ if (!therm || !therm->func->clkgate_enable || !therm->clkgating_enabled)
return;
nvkm_debug(&therm->subdev,
void
nvkm_therm_clkgate_fini(struct nvkm_therm *therm, bool suspend)
{
- if (!therm->func->clkgate_fini || !therm->clkgating_enabled)
+ if (!therm || !therm->func->clkgate_fini || !therm->clkgating_enabled)
return;
nvkm_debug(&therm->subdev,
nvkm_therm_clkgate_init(struct nvkm_therm *therm,
const struct nvkm_therm_clkgate_pack *p)
{
- if (!therm->func->clkgate_init || !therm->clkgating_enabled)
+ if (!therm || !therm->func->clkgate_init || !therm->clkgating_enabled)
return;
therm->func->clkgate_init(therm, p);
enum drm_connector_status ret = connector_status_disconnected;
int r;
- r = pm_runtime_get_sync(connector->dev->dev);
- if (r < 0)
- return connector_status_disconnected;
+ if (!drm_kms_helper_is_poll_worker()) {
+ r = pm_runtime_get_sync(connector->dev->dev);
+ if (r < 0)
+ return connector_status_disconnected;
+ }
if (encoder) {
struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
/* check acpi lid status ??? */
radeon_connector_update_scratch_regs(connector, ret);
- pm_runtime_mark_last_busy(connector->dev->dev);
- pm_runtime_put_autosuspend(connector->dev->dev);
+
+ if (!drm_kms_helper_is_poll_worker()) {
+ pm_runtime_mark_last_busy(connector->dev->dev);
+ pm_runtime_put_autosuspend(connector->dev->dev);
+ }
+
return ret;
}
enum drm_connector_status ret = connector_status_disconnected;
int r;
- r = pm_runtime_get_sync(connector->dev->dev);
- if (r < 0)
- return connector_status_disconnected;
+ if (!drm_kms_helper_is_poll_worker()) {
+ r = pm_runtime_get_sync(connector->dev->dev);
+ if (r < 0)
+ return connector_status_disconnected;
+ }
encoder = radeon_best_single_encoder(connector);
if (!encoder)
radeon_connector_update_scratch_regs(connector, ret);
out:
- pm_runtime_mark_last_busy(connector->dev->dev);
- pm_runtime_put_autosuspend(connector->dev->dev);
+ if (!drm_kms_helper_is_poll_worker()) {
+ pm_runtime_mark_last_busy(connector->dev->dev);
+ pm_runtime_put_autosuspend(connector->dev->dev);
+ }
return ret;
}
if (!radeon_connector->dac_load_detect)
return ret;
- r = pm_runtime_get_sync(connector->dev->dev);
- if (r < 0)
- return connector_status_disconnected;
+ if (!drm_kms_helper_is_poll_worker()) {
+ r = pm_runtime_get_sync(connector->dev->dev);
+ if (r < 0)
+ return connector_status_disconnected;
+ }
encoder = radeon_best_single_encoder(connector);
if (!encoder)
if (ret == connector_status_connected)
ret = radeon_connector_analog_encoder_conflict_solve(connector, encoder, ret, false);
radeon_connector_update_scratch_regs(connector, ret);
- pm_runtime_mark_last_busy(connector->dev->dev);
- pm_runtime_put_autosuspend(connector->dev->dev);
+
+ if (!drm_kms_helper_is_poll_worker()) {
+ pm_runtime_mark_last_busy(connector->dev->dev);
+ pm_runtime_put_autosuspend(connector->dev->dev);
+ }
+
return ret;
}
enum drm_connector_status ret = connector_status_disconnected;
bool dret = false, broken_edid = false;
- r = pm_runtime_get_sync(connector->dev->dev);
- if (r < 0)
- return connector_status_disconnected;
+ if (!drm_kms_helper_is_poll_worker()) {
+ r = pm_runtime_get_sync(connector->dev->dev);
+ if (r < 0)
+ return connector_status_disconnected;
+ }
if (radeon_connector->detected_hpd_without_ddc) {
force = true;
}
exit:
- pm_runtime_mark_last_busy(connector->dev->dev);
- pm_runtime_put_autosuspend(connector->dev->dev);
+ if (!drm_kms_helper_is_poll_worker()) {
+ pm_runtime_mark_last_busy(connector->dev->dev);
+ pm_runtime_put_autosuspend(connector->dev->dev);
+ }
return ret;
}
if (radeon_dig_connector->is_mst)
return connector_status_disconnected;
- r = pm_runtime_get_sync(connector->dev->dev);
- if (r < 0)
- return connector_status_disconnected;
+ if (!drm_kms_helper_is_poll_worker()) {
+ r = pm_runtime_get_sync(connector->dev->dev);
+ if (r < 0)
+ return connector_status_disconnected;
+ }
if (!force && radeon_check_hpd_status_unchanged(connector)) {
ret = connector->status;
}
out:
- pm_runtime_mark_last_busy(connector->dev->dev);
- pm_runtime_put_autosuspend(connector->dev->dev);
+ if (!drm_kms_helper_is_poll_worker()) {
+ pm_runtime_mark_last_busy(connector->dev->dev);
+ pm_runtime_put_autosuspend(connector->dev->dev);
+ }
return ret;
}
{
struct ipu_soc *ipu = irq_desc_get_handler_data(desc);
struct irq_chip *chip = irq_desc_get_chip(desc);
- const int int_reg[] = { 0, 1, 2, 3, 10, 11, 12, 13, 14};
+ static const int int_reg[] = { 0, 1, 2, 3, 10, 11, 12, 13, 14};
chained_irq_enter(chip, desc);
{
struct ipu_soc *ipu = irq_desc_get_handler_data(desc);
struct irq_chip *chip = irq_desc_get_chip(desc);
- const int int_reg[] = { 4, 5, 8, 9};
+ static const int int_reg[] = { 4, 5, 8, 9};
chained_irq_enter(chip, desc);
case V4L2_PIX_FMT_SGBRG8:
case V4L2_PIX_FMT_SGRBG8:
case V4L2_PIX_FMT_SRGGB8:
+ case V4L2_PIX_FMT_GREY:
offset = image->rect.left + image->rect.top * pix->bytesperline;
break;
case V4L2_PIX_FMT_SBGGR16:
case V4L2_PIX_FMT_SGBRG16:
case V4L2_PIX_FMT_SGRBG16:
case V4L2_PIX_FMT_SRGGB16:
+ case V4L2_PIX_FMT_Y16:
offset = image->rect.left * 2 +
image->rect.top * pix->bytesperline;
break;
case MEDIA_BUS_FMT_SGBRG10_1X10:
case MEDIA_BUS_FMT_SGRBG10_1X10:
case MEDIA_BUS_FMT_SRGGB10_1X10:
+ case MEDIA_BUS_FMT_Y10_1X10:
cfg->data_fmt = CSI_SENS_CONF_DATA_FMT_BAYER;
cfg->mipi_dt = MIPI_DT_RAW10;
cfg->data_width = IPU_CSI_DATA_WIDTH_10;
case MEDIA_BUS_FMT_SGBRG12_1X12:
case MEDIA_BUS_FMT_SGRBG12_1X12:
case MEDIA_BUS_FMT_SRGGB12_1X12:
+ case MEDIA_BUS_FMT_Y12_1X12:
cfg->data_fmt = CSI_SENS_CONF_DATA_FMT_BAYER;
cfg->mipi_dt = MIPI_DT_RAW12;
cfg->data_width = IPU_CSI_DATA_WIDTH_12;
if (pre_node == pre->dev->of_node) {
mutex_unlock(&ipu_pre_list_mutex);
device_link_add(dev, pre->dev, DL_FLAG_AUTOREMOVE);
+ of_node_put(pre_node);
return pre;
}
}
mutex_unlock(&ipu_pre_list_mutex);
+ of_node_put(pre_node);
+
return NULL;
}
mutex_unlock(&ipu_prg_list_mutex);
device_link_add(dev, prg->dev, DL_FLAG_AUTOREMOVE);
prg->id = ipu_id;
+ of_node_put(prg_node);
return prg;
}
}
mutex_unlock(&ipu_prg_list_mutex);
+ of_node_put(prg_node);
+
return NULL;
}
#define USB_DEVICE_ID_LD_MICROCASSYTIME 0x1033
#define USB_DEVICE_ID_LD_MICROCASSYTEMPERATURE 0x1035
#define USB_DEVICE_ID_LD_MICROCASSYPH 0x1038
+#define USB_DEVICE_ID_LD_POWERANALYSERCASSY 0x1040
+#define USB_DEVICE_ID_LD_CONVERTERCONTROLLERCASSY 0x1042
+#define USB_DEVICE_ID_LD_MACHINETESTCASSY 0x1043
#define USB_DEVICE_ID_LD_JWM 0x1080
#define USB_DEVICE_ID_LD_DMMP 0x1081
#define USB_DEVICE_ID_LD_UMIP 0x1090
{ HID_USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_MICROCASSYTIME) },
{ HID_USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_MICROCASSYTEMPERATURE) },
{ HID_USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_MICROCASSYPH) },
+ { HID_USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_POWERANALYSERCASSY) },
+ { HID_USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_CONVERTERCONTROLLERCASSY) },
+ { HID_USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_MACHINETESTCASSY) },
{ HID_USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_JWM) },
{ HID_USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_DMMP) },
{ HID_USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_UMIP) },
for (i = 0; i < ARRAY_SIZE(tjmax_model_table); i++) {
const struct tjmax_model *tm = &tjmax_model_table[i];
if (c->x86_model == tm->model &&
- (tm->mask == ANY || c->x86_mask == tm->mask))
+ (tm->mask == ANY || c->x86_stepping == tm->mask))
return tm->tjmax;
}
/* Early chips have no MSR for TjMax */
- if (c->x86_model == 0xf && c->x86_mask < 4)
+ if (c->x86_model == 0xf && c->x86_stepping < 4)
usemsr_ee = 0;
if (c->x86_model > 0xe && usemsr_ee) {
* Readings might stop update when processor visited too deep sleep,
* fixed for stepping D0 (6EC).
*/
- if (c->x86_model == 0xe && c->x86_mask < 0xc && c->microcode < 0x39) {
+ if (c->x86_model == 0xe && c->x86_stepping < 0xc && c->microcode < 0x39) {
pr_err("Errata AE18 not fixed, update BIOS or microcode of the CPU!\n");
return -ENODEV;
}
if (c->x86 < 6) /* Any CPU with family lower than 6 */
return 0; /* doesn't have VID */
- vrm_ret = find_vrm(c->x86, c->x86_model, c->x86_mask, c->x86_vendor);
+ vrm_ret = find_vrm(c->x86, c->x86_model, c->x86_stepping, c->x86_vendor);
if (vrm_ret == 134)
vrm_ret = get_via_model_d_vrm();
if (vrm_ret == 0)
data->read_tempreg(data->pdev, ®val);
temp = (regval >> 21) * 125;
- temp -= data->temp_offset;
+ if (temp > data->temp_offset)
+ temp -= data->temp_offset;
+ else
+ temp = 0;
return sprintf(buf, "%u\n", temp);
}
* and AM3 formats, but that's the best we can do.
*/
return boot_cpu_data.x86_model < 4 ||
- (boot_cpu_data.x86_model == 4 && boot_cpu_data.x86_mask <= 2);
+ (boot_cpu_data.x86_model == 4 && boot_cpu_data.x86_stepping <= 2);
}
static int k10temp_probe(struct pci_dev *pdev,
return -ENOMEM;
model = boot_cpu_data.x86_model;
- stepping = boot_cpu_data.x86_mask;
+ stepping = boot_cpu_data.x86_stepping;
/* feature available since SH-C0, exclude older revisions */
if ((model == 4 && stepping == 0) ||
Wildcat Point (PCH)
Wildcat Point-LP (PCH)
BayTrail (SOC)
+ Braswell (SOC)
Sunrise Point-H (PCH)
Sunrise Point-LP (PCH)
+ Kaby Lake-H (PCH)
DNV (SOC)
Broxton (SOC)
Lewisburg (PCH)
#define BCM2835_I2C_S_CLKT BIT(9)
#define BCM2835_I2C_S_LEN BIT(10) /* Fake bit for SW error reporting */
+#define BCM2835_I2C_FEDL_SHIFT 16
+#define BCM2835_I2C_REDL_SHIFT 0
+
#define BCM2835_I2C_CDIV_MIN 0x0002
#define BCM2835_I2C_CDIV_MAX 0xFFFE
static int bcm2835_i2c_set_divider(struct bcm2835_i2c_dev *i2c_dev)
{
- u32 divider;
+ u32 divider, redl, fedl;
divider = DIV_ROUND_UP(clk_get_rate(i2c_dev->clk),
i2c_dev->bus_clk_rate);
bcm2835_i2c_writel(i2c_dev, BCM2835_I2C_DIV, divider);
+ /*
+ * Number of core clocks to wait after falling edge before
+ * outputting the next data bit. Note that both FEDL and REDL
+ * can't be greater than CDIV/2.
+ */
+ fedl = max(divider / 16, 1u);
+
+ /*
+ * Number of core clocks to wait after rising edge before
+ * sampling the next incoming data bit.
+ */
+ redl = max(divider / 4, 1u);
+
+ bcm2835_i2c_writel(i2c_dev, BCM2835_I2C_DEL,
+ (fedl << BCM2835_I2C_FEDL_SHIFT) |
+ (redl << BCM2835_I2C_REDL_SHIFT));
return 0;
}
i2c_dw_disable_int(dev);
/* Enable the adapter */
- __i2c_dw_enable(dev, true);
+ __i2c_dw_enable_and_wait(dev, true);
/* Clear and enable interrupts */
dw_readl(dev, DW_IC_CLR_INTR);
gpio = devm_gpiod_get(dev->dev, "scl", GPIOD_OUT_HIGH);
if (IS_ERR(gpio)) {
r = PTR_ERR(gpio);
- if (r == -ENOENT)
+ if (r == -ENOENT || r == -ENOSYS)
return 0;
return r;
}
* Wildcat Point (PCH) 0x8ca2 32 hard yes yes yes
* Wildcat Point-LP (PCH) 0x9ca2 32 hard yes yes yes
* BayTrail (SOC) 0x0f12 32 hard yes yes yes
+ * Braswell (SOC) 0x2292 32 hard yes yes yes
* Sunrise Point-H (PCH) 0xa123 32 hard yes yes yes
* Sunrise Point-LP (PCH) 0x9d23 32 hard yes yes yes
* DNV (SOC) 0x19df 32 hard yes yes yes
platform_set_drvdata(pdev, adap);
init_completion(&siic->done);
- /* Controller Initalisation */
+ /* Controller initialisation */
writel(SIRFSOC_I2C_RESET, siic->base + SIRFSOC_I2C_CTRL);
while (readl(siic->base + SIRFSOC_I2C_CTRL) & SIRFSOC_I2C_RESET)
* but they start to affect the speed when clock is set to faster
* frequencies.
* Through the actual tests, use the different user_div value(which
- * in the divider formular 'Fio / (Fi2c * user_div)') to adapt
+ * in the divider formula 'Fio / (Fi2c * user_div)') to adapt
* the different ranges of i2c bus clock frequency, to make the SCL
* more accurate.
*/
ASPEED_ADC_INIT_POLLING_TIME,
ASPEED_ADC_INIT_TIMEOUT);
if (ret)
- goto scaler_error;
+ goto poll_timeout_error;
}
/* Start all channels in normal mode. */
writel(ASPEED_OPERATION_MODE_POWER_DOWN,
data->base + ASPEED_REG_ENGINE_CONTROL);
clk_disable_unprepare(data->clk_scaler->clk);
-reset_error:
- reset_control_assert(data->rst);
clk_enable_error:
+poll_timeout_error:
+ reset_control_assert(data->rst);
+reset_error:
clk_hw_unregister_divider(data->clk_scaler);
scaler_error:
clk_hw_unregister_divider(data->clk_prescaler);
int ret;
u32 val;
- /* Clear ADRDY by writing one, then enable ADC */
- stm32_adc_set_bits(adc, STM32H7_ADC_ISR, STM32H7_ADRDY);
stm32_adc_set_bits(adc, STM32H7_ADC_CR, STM32H7_ADEN);
/* Poll for ADRDY to be set (after adc startup time) */
val & STM32H7_ADRDY,
100, STM32_ADC_TIMEOUT_US);
if (ret) {
- stm32_adc_clr_bits(adc, STM32H7_ADC_CR, STM32H7_ADEN);
+ stm32_adc_set_bits(adc, STM32H7_ADC_CR, STM32H7_ADDIS);
dev_err(&indio_dev->dev, "Failed to enable ADC\n");
+ } else {
+ /* Clear ADRDY by writing one */
+ stm32_adc_set_bits(adc, STM32H7_ADC_ISR, STM32H7_ADRDY);
}
return ret;
if (adis->trig == NULL)
return -ENOMEM;
+ adis->trig->dev.parent = &adis->spi->dev;
+ adis->trig->ops = &adis_trigger_ops;
+ iio_trigger_set_drvdata(adis->trig, adis);
+
ret = request_irq(adis->spi->irq,
&iio_trigger_generic_data_rdy_poll,
IRQF_TRIGGER_RISING,
if (ret)
goto error_free_trig;
- adis->trig->dev.parent = &adis->spi->dev;
- adis->trig->ops = &adis_trigger_ops;
- iio_trigger_set_drvdata(adis->trig, adis);
ret = iio_trigger_register(adis->trig);
indio_dev->trig = iio_trigger_get(adis->trig);
struct iio_dev *indio_dev = filp->private_data;
struct iio_buffer *rb = indio_dev->buffer;
- if (!indio_dev->info)
+ if (!indio_dev->info || rb == NULL)
return 0;
poll_wait(filp, &rb->pollq, wait);
config SRF08
tristate "Devantech SRF02/SRF08/SRF10 ultrasonic ranger sensor"
+ select IIO_BUFFER
+ select IIO_TRIGGERED_BUFFER
depends on I2C
help
Say Y here to build a driver for Devantech SRF02/SRF08/SRF10
IPV6_ADDR_LINKLOCAL;
struct rt6_info *rt = rt6_lookup(dev_net(net_dev), &dst_addr->sin6_addr,
&src_addr->sin6_addr, net_dev->ifindex,
- strict);
+ NULL, strict);
bool ret;
if (!rt)
.exit = cma_exit_net,
.id = &cma_pernet_id,
.size = sizeof(struct cma_pernet),
+ .async = true,
};
static int __init cma_init(void)
static inline struct ib_qp *_ib_create_qp(struct ib_device *dev,
struct ib_pd *pd,
struct ib_qp_init_attr *attr,
- struct ib_udata *udata)
+ struct ib_udata *udata,
+ struct ib_uobject *uobj)
{
struct ib_qp *qp;
+ if (!dev->create_qp)
+ return ERR_PTR(-EOPNOTSUPP);
+
qp = dev->create_qp(pd, attr, udata);
if (IS_ERR(qp))
return qp;
qp->device = dev;
qp->pd = pd;
+ qp->uobject = uobj;
/*
* We don't track XRC QPs for now, because they don't have PD
* and more importantly they are created internaly by driver,
*/
uobj->context = context;
uobj->type = type;
- atomic_set(&uobj->usecnt, 0);
+ /*
+ * Allocated objects start out as write locked to deny any other
+ * syscalls from accessing them until they are committed. See
+ * rdma_alloc_commit_uobject
+ */
+ atomic_set(&uobj->usecnt, -1);
kref_init(&uobj->ref);
return uobj;
goto free;
}
- uverbs_uobject_get(uobj);
+ /*
+ * The idr_find is guaranteed to return a pointer to something that
+ * isn't freed yet, or NULL, as the free after idr_remove goes through
+ * kfree_rcu(). However the object may still have been released and
+ * kfree() could be called at any time.
+ */
+ if (!kref_get_unless_zero(&uobj->ref))
+ uobj = ERR_PTR(-ENOENT);
+
free:
rcu_read_unlock();
return uobj;
return ret;
}
-static void lockdep_check(struct ib_uobject *uobj, bool exclusive)
+static void assert_uverbs_usecnt(struct ib_uobject *uobj, bool exclusive)
{
#ifdef CONFIG_LOCKDEP
if (exclusive)
- WARN_ON(atomic_read(&uobj->usecnt) > 0);
+ WARN_ON(atomic_read(&uobj->usecnt) != -1);
else
- WARN_ON(atomic_read(&uobj->usecnt) == -1);
+ WARN_ON(atomic_read(&uobj->usecnt) <= 0);
#endif
}
WARN(true, "ib_uverbs: Cleanup is running while removing an uobject\n");
return 0;
}
- lockdep_check(uobj, true);
+ assert_uverbs_usecnt(uobj, true);
ret = _rdma_remove_commit_uobject(uobj, RDMA_REMOVE_DESTROY);
up_read(&ucontext->cleanup_rwsem);
WARN(true, "ib_uverbs: Cleanup is running while removing an uobject\n");
return 0;
}
- lockdep_check(uobject, true);
+ assert_uverbs_usecnt(uobject, true);
ret = uobject->type->type_class->remove_commit(uobject,
RDMA_REMOVE_DESTROY);
if (ret)
- return ret;
+ goto out;
uobject->type = &null_obj_type;
+out:
up_read(&ucontext->cleanup_rwsem);
- return 0;
+ return ret;
}
static void alloc_commit_idr_uobject(struct ib_uobject *uobj)
return ret;
}
+ /* matches atomic_set(-1) in alloc_uobj */
+ assert_uverbs_usecnt(uobj, true);
+ atomic_set(&uobj->usecnt, 0);
+
uobj->type->type_class->alloc_commit(uobj);
up_read(&uobj->context->cleanup_rwsem);
void rdma_lookup_put_uobject(struct ib_uobject *uobj, bool exclusive)
{
- lockdep_check(uobj, exclusive);
+ assert_uverbs_usecnt(uobj, exclusive);
uobj->type->type_class->lookup_put(uobj, exclusive);
/*
* In order to unlock an object, either decrease its usecnt for
#include <rdma/restrack.h>
#include <linux/mutex.h>
#include <linux/sched/task.h>
-#include <linux/uaccess.h>
#include <linux/pid_namespace.h>
void rdma_restrack_init(struct rdma_restrack_root *res)
{
enum rdma_restrack_type type = res->type;
struct ib_device *dev;
- struct ib_xrcd *xrcd;
struct ib_pd *pd;
struct ib_cq *cq;
struct ib_qp *qp;
qp = container_of(res, struct ib_qp, res);
dev = qp->device;
break;
- case RDMA_RESTRACK_XRCD:
- xrcd = container_of(res, struct ib_xrcd, res);
- dev = xrcd->device;
- break;
default:
WARN_ONCE(true, "Wrong resource tracking type %u\n", type);
return NULL;
return dev;
}
+static bool res_is_user(struct rdma_restrack_entry *res)
+{
+ switch (res->type) {
+ case RDMA_RESTRACK_PD:
+ return container_of(res, struct ib_pd, res)->uobject;
+ case RDMA_RESTRACK_CQ:
+ return container_of(res, struct ib_cq, res)->uobject;
+ case RDMA_RESTRACK_QP:
+ return container_of(res, struct ib_qp, res)->uobject;
+ default:
+ WARN_ONCE(true, "Wrong resource tracking type %u\n", res->type);
+ return false;
+ }
+}
+
void rdma_restrack_add(struct rdma_restrack_entry *res)
{
struct ib_device *dev = res_to_dev(res);
if (!dev)
return;
- if (!uaccess_kernel()) {
+ if (res_is_user(res)) {
get_task_struct(current);
res->task = current;
res->kern_name = NULL;
if (f.file)
fdput(f);
+ mutex_unlock(&file->device->xrcd_tree_mutex);
+
uobj_alloc_commit(&obj->uobject);
- mutex_unlock(&file->device->xrcd_tree_mutex);
return in_len;
err_copy:
uobj = uobj_get_write(uobj_get_type(xrcd), cmd.xrcd_handle,
file->ucontext);
- if (IS_ERR(uobj)) {
- mutex_unlock(&file->device->xrcd_tree_mutex);
+ if (IS_ERR(uobj))
return PTR_ERR(uobj);
- }
ret = uobj_remove_commit(uobj);
return ret ?: in_len;
struct ib_uverbs_ex_create_cq_resp resp;
struct ib_cq_init_attr attr = {};
+ if (!ib_dev->create_cq)
+ return ERR_PTR(-EOPNOTSUPP);
+
if (cmd->comp_vector >= file->device->num_comp_vectors)
return ERR_PTR(-EINVAL);
resp.response_length = offsetof(typeof(resp), response_length) +
sizeof(resp.response_length);
+ cq->res.type = RDMA_RESTRACK_CQ;
+ rdma_restrack_add(&cq->res);
+
ret = cb(file, obj, &resp, ucore, context);
if (ret)
goto err_cb;
uobj_alloc_commit(&obj->uobject);
- cq->res.type = RDMA_RESTRACK_CQ;
- rdma_restrack_add(&cq->res);
-
return obj;
err_cb:
if (cmd->qp_type == IB_QPT_XRC_TGT)
qp = ib_create_qp(pd, &attr);
else
- qp = _ib_create_qp(device, pd, &attr, uhw);
+ qp = _ib_create_qp(device, pd, &attr, uhw,
+ &obj->uevent.uobject);
if (IS_ERR(qp)) {
ret = PTR_ERR(qp);
atomic_inc(&attr.srq->usecnt);
if (ind_tbl)
atomic_inc(&ind_tbl->usecnt);
+ } else {
+ /* It is done in _ib_create_qp for other QP types */
+ qp->uobject = &obj->uevent.uobject;
}
- qp->uobject = &obj->uevent.uobject;
obj->uevent.uobject.object = qp;
goto release_qp;
}
+ if ((cmd->base.attr_mask & IB_QP_AV) &&
+ !rdma_is_port_valid(qp->device, cmd->base.dest.port_num)) {
+ ret = -EINVAL;
+ goto release_qp;
+ }
+
if ((cmd->base.attr_mask & IB_QP_ALT_PATH) &&
- !rdma_is_port_valid(qp->device, cmd->base.alt_port_num)) {
+ (!rdma_is_port_valid(qp->device, cmd->base.alt_port_num) ||
+ !rdma_is_port_valid(qp->device, cmd->base.alt_dest.port_num))) {
ret = -EINVAL;
goto release_qp;
}
wq_init_attr.create_flags = cmd.create_flags;
obj->uevent.events_reported = 0;
INIT_LIST_HEAD(&obj->uevent.event_list);
+
+ if (!pd->device->create_wq) {
+ err = -EOPNOTSUPP;
+ goto err_put_cq;
+ }
wq = pd->device->create_wq(pd, &wq_init_attr, uhw);
if (IS_ERR(wq)) {
err = PTR_ERR(wq);
wq_attr.flags = cmd.flags;
wq_attr.flags_mask = cmd.flags_mask;
}
+ if (!wq->device->modify_wq) {
+ ret = -EOPNOTSUPP;
+ goto out;
+ }
ret = wq->device->modify_wq(wq, &wq_attr, cmd.attr_mask, uhw);
+out:
uobj_put_obj_read(wq);
return ret;
}
init_attr.log_ind_tbl_size = cmd.log_ind_tbl_size;
init_attr.ind_tbl = wqs;
+
+ if (!ib_dev->create_rwq_ind_table) {
+ err = -EOPNOTSUPP;
+ goto err_uobj;
+ }
rwq_ind_tbl = ib_dev->create_rwq_ind_table(ib_dev, &init_attr, uhw);
if (IS_ERR(rwq_ind_tbl)) {
struct ib_device_attr attr = {0};
int err;
+ if (!ib_dev->query_device)
+ return -EOPNOTSUPP;
+
if (ucore->inlen < sizeof(cmd))
return -EINVAL;
return 0;
}
+ if (test_bit(attr_id, attr_bundle_h->valid_bitmap))
+ return -EINVAL;
+
spec = &attr_spec_bucket->attrs[attr_id];
e = &elements[attr_id];
e->uattr = uattr_ptr;
short min = SHRT_MAX;
const void *elem;
int i, j, last_stored = -1;
+ unsigned int equal_min = 0;
for_each_element(elem, i, j, elements, num_elements, num_offset,
data_offset) {
*/
iters[last_stored == i ? num_iters - 1 : num_iters++] = elem;
last_stored = i;
+ if (min == GET_ID(id))
+ equal_min++;
+ else
+ equal_min = 1;
min = GET_ID(id);
}
* Therefore, we need to clean the beginning of the array to make sure
* all ids of final elements are equal to min.
*/
- for (i = num_iters - 1; i >= 0 &&
- GET_ID(*(u16 *)(iters[i] + id_offset)) == min; i--)
- ;
-
- num_iters -= i + 1;
- memmove(iters, iters + i + 1, sizeof(*iters) * num_iters);
+ memmove(iters, iters + num_iters - equal_min, sizeof(*iters) * equal_min);
*min_id = min;
- return num_iters;
+ return equal_min;
}
#define find_max_element_entry_id(num_elements, elements, num_objects_fld, \
hash = kzalloc(sizeof(*hash) +
ALIGN(sizeof(*hash->attrs) * (attr_max_bucket + 1),
sizeof(long)) +
- BITS_TO_LONGS(attr_max_bucket) * sizeof(long),
+ BITS_TO_LONGS(attr_max_bucket + 1) * sizeof(long),
GFP_KERNEL);
if (!hash) {
res = -ENOMEM;
* first handler which != NULL. This also defines the
* set of flags used for this handler.
*/
- for (i = num_object_defs - 1;
+ for (i = num_method_defs - 1;
i >= 0 && !method_defs[i]->handler; i--)
;
hash->methods[min_id++] = method;
return -1;
}
+static bool verify_command_idx(u32 command, bool extended)
+{
+ if (extended)
+ return command < ARRAY_SIZE(uverbs_ex_cmd_table);
+
+ return command < ARRAY_SIZE(uverbs_cmd_table);
+}
+
static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
size_t count, loff_t *pos)
{
struct ib_uverbs_file *file = filp->private_data;
struct ib_device *ib_dev;
struct ib_uverbs_cmd_hdr hdr;
+ bool extended_command;
__u32 command;
__u32 flags;
int srcu_key;
}
command = hdr.command & IB_USER_VERBS_CMD_COMMAND_MASK;
+ flags = (hdr.command &
+ IB_USER_VERBS_CMD_FLAGS_MASK) >> IB_USER_VERBS_CMD_FLAGS_SHIFT;
+
+ extended_command = flags & IB_USER_VERBS_CMD_FLAG_EXTENDED;
+ if (!verify_command_idx(command, extended_command)) {
+ ret = -EINVAL;
+ goto out;
+ }
+
if (verify_command_mask(ib_dev, command)) {
ret = -EOPNOTSUPP;
goto out;
goto out;
}
- flags = (hdr.command &
- IB_USER_VERBS_CMD_FLAGS_MASK) >> IB_USER_VERBS_CMD_FLAGS_SHIFT;
-
if (!flags) {
- if (command >= ARRAY_SIZE(uverbs_cmd_table) ||
- !uverbs_cmd_table[command]) {
+ if (!uverbs_cmd_table[command]) {
ret = -EINVAL;
goto out;
}
struct ib_udata uhw;
size_t written_count = count;
- if (command >= ARRAY_SIZE(uverbs_ex_cmd_table) ||
- !uverbs_ex_cmd_table[command]) {
+ if (!uverbs_ex_cmd_table[command]) {
ret = -ENOSYS;
goto out;
}
.llseek = no_llseek,
#if IS_ENABLED(CONFIG_INFINIBAND_EXP_USER_ACCESS)
.unlocked_ioctl = ib_uverbs_ioctl,
+ .compat_ioctl = ib_uverbs_ioctl,
#endif
};
.llseek = no_llseek,
#if IS_ENABLED(CONFIG_INFINIBAND_EXP_USER_ACCESS)
.unlocked_ioctl = ib_uverbs_ioctl,
+ .compat_ioctl = ib_uverbs_ioctl,
#endif
};
uverbs_attr_get(ctx, UVERBS_UHW_OUT);
if (!IS_ERR(uhw_in)) {
- udata->inbuf = uhw_in->ptr_attr.ptr;
udata->inlen = uhw_in->ptr_attr.len;
+ if (uverbs_attr_ptr_is_inline(uhw_in))
+ udata->inbuf = &uhw_in->uattr->data;
+ else
+ udata->inbuf = u64_to_user_ptr(uhw_in->ptr_attr.data);
} else {
udata->inbuf = NULL;
udata->inlen = 0;
}
if (!IS_ERR(uhw_out)) {
- udata->outbuf = uhw_out->ptr_attr.ptr;
+ udata->outbuf = u64_to_user_ptr(uhw_out->ptr_attr.data);
udata->outlen = uhw_out->ptr_attr.len;
} else {
udata->outbuf = NULL;
cq->res.type = RDMA_RESTRACK_CQ;
rdma_restrack_add(&cq->res);
- ret = uverbs_copy_to(attrs, CREATE_CQ_RESP_CQE, &cq->cqe);
+ ret = uverbs_copy_to(attrs, CREATE_CQ_RESP_CQE, &cq->cqe,
+ sizeof(cq->cqe));
if (ret)
goto err_cq;
resp.comp_events_reported = obj->comp_events_reported;
resp.async_events_reported = obj->async_events_reported;
- return uverbs_copy_to(attrs, DESTROY_CQ_RESP, &resp);
+ return uverbs_copy_to(attrs, DESTROY_CQ_RESP, &resp, sizeof(resp));
}
static DECLARE_UVERBS_METHOD(
if (qp_init_attr->cap.max_rdma_ctxs)
rdma_rw_init_qp(device, qp_init_attr);
- qp = _ib_create_qp(device, pd, qp_init_attr, NULL);
+ qp = _ib_create_qp(device, pd, qp_init_attr, NULL, NULL);
if (IS_ERR(qp))
return qp;
}
qp->real_qp = qp;
- qp->uobject = NULL;
qp->qp_type = qp_init_attr->qp_type;
qp->rwq_ind_tbl = qp_init_attr->rwq_ind_tbl;
#define BNXT_RE_FLAG_HAVE_L2_REF 3
#define BNXT_RE_FLAG_RCFW_CHANNEL_EN 4
#define BNXT_RE_FLAG_QOS_WORK_REG 5
-#define BNXT_RE_FLAG_TASK_IN_PROG 6
#define BNXT_RE_FLAG_ISSUE_ROCE_STATS 29
struct net_device *netdev;
unsigned int version, major, minor;
atomic_t srq_count;
atomic_t mr_count;
atomic_t mw_count;
+ atomic_t sched_count;
/* Max of 2 lossless traffic class supported per port */
u16 cosq[2];
ib_attr->max_pd = dev_attr->max_pd;
ib_attr->max_qp_rd_atom = dev_attr->max_qp_rd_atom;
ib_attr->max_qp_init_rd_atom = dev_attr->max_qp_init_rd_atom;
- if (dev_attr->is_atomic) {
- ib_attr->atomic_cap = IB_ATOMIC_HCA;
- ib_attr->masked_atomic_cap = IB_ATOMIC_HCA;
- }
+ ib_attr->atomic_cap = IB_ATOMIC_NONE;
+ ib_attr->masked_atomic_cap = IB_ATOMIC_NONE;
ib_attr->max_ee_rd_atom = 0;
ib_attr->max_res_rd_atom = 0;
return 0;
}
+static unsigned long bnxt_re_lock_cqs(struct bnxt_re_qp *qp)
+ __acquires(&qp->scq->cq_lock) __acquires(&qp->rcq->cq_lock)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&qp->scq->cq_lock, flags);
+ if (qp->rcq != qp->scq)
+ spin_lock(&qp->rcq->cq_lock);
+ else
+ __acquire(&qp->rcq->cq_lock);
+
+ return flags;
+}
+
+static void bnxt_re_unlock_cqs(struct bnxt_re_qp *qp,
+ unsigned long flags)
+ __releases(&qp->scq->cq_lock) __releases(&qp->rcq->cq_lock)
+{
+ if (qp->rcq != qp->scq)
+ spin_unlock(&qp->rcq->cq_lock);
+ else
+ __release(&qp->rcq->cq_lock);
+ spin_unlock_irqrestore(&qp->scq->cq_lock, flags);
+}
+
/* Queue Pairs */
int bnxt_re_destroy_qp(struct ib_qp *ib_qp)
{
struct bnxt_re_qp *qp = container_of(ib_qp, struct bnxt_re_qp, ib_qp);
struct bnxt_re_dev *rdev = qp->rdev;
int rc;
+ unsigned int flags;
bnxt_qplib_flush_cqn_wq(&qp->qplib_qp);
- bnxt_qplib_del_flush_qp(&qp->qplib_qp);
rc = bnxt_qplib_destroy_qp(&rdev->qplib_res, &qp->qplib_qp);
if (rc) {
dev_err(rdev_to_dev(rdev), "Failed to destroy HW QP");
return rc;
}
+
+ flags = bnxt_re_lock_cqs(qp);
+ bnxt_qplib_clean_qp(&qp->qplib_qp);
+ bnxt_re_unlock_cqs(qp, flags);
+ bnxt_qplib_free_qp_res(&rdev->qplib_res, &qp->qplib_qp);
+
if (ib_qp->qp_type == IB_QPT_GSI && rdev->qp1_sqp) {
rc = bnxt_qplib_destroy_ah(&rdev->qplib_res,
&rdev->sqp_ah->qplib_ah);
return rc;
}
- bnxt_qplib_del_flush_qp(&qp->qplib_qp);
+ bnxt_qplib_clean_qp(&qp->qplib_qp);
rc = bnxt_qplib_destroy_qp(&rdev->qplib_res,
&rdev->qp1_sqp->qplib_qp);
if (rc) {
goto fail;
}
qp->qplib_qp.scq = &cq->qplib_cq;
+ qp->scq = cq;
}
if (qp_init_attr->recv_cq) {
goto fail;
}
qp->qplib_qp.rcq = &cq->qplib_cq;
+ qp->rcq = cq;
}
if (qp_init_attr->srq) {
rc = bnxt_qplib_create_qp(&rdev->qplib_res, &qp->qplib_qp);
if (rc) {
dev_err(rdev_to_dev(rdev), "Failed to create HW QP");
- goto fail;
+ goto free_umem;
}
}
return &qp->ib_qp;
qp_destroy:
bnxt_qplib_destroy_qp(&rdev->qplib_res, &qp->qplib_qp);
+free_umem:
+ if (udata) {
+ if (qp->rumem)
+ ib_umem_release(qp->rumem);
+ if (qp->sumem)
+ ib_umem_release(qp->sumem);
+ }
fail:
kfree(qp);
return ERR_PTR(rc);
dev_dbg(rdev_to_dev(rdev),
"Move QP = %p out of flush list\n",
qp);
- bnxt_qplib_del_flush_qp(&qp->qplib_qp);
+ bnxt_qplib_clean_qp(&qp->qplib_qp);
}
}
if (qp_attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY) {
/* QP1 */
u32 send_psn;
struct ib_ud_header qp1_hdr;
+ struct bnxt_re_cq *scq;
+ struct bnxt_re_cq *rcq;
};
struct bnxt_re_cq {
mutex_unlock(&bnxt_re_dev_lock);
synchronize_rcu();
- flush_workqueue(bnxt_re_wq);
ib_dealloc_device(&rdev->ibdev);
/* rdev is gone */
break;
}
smp_mb__before_atomic();
- clear_bit(BNXT_RE_FLAG_TASK_IN_PROG, &rdev->flags);
+ atomic_dec(&rdev->sched_count);
kfree(re_work);
}
/* netdev notifier will call NETDEV_UNREGISTER again later since
* we are still holding the reference to the netdev
*/
- if (test_bit(BNXT_RE_FLAG_TASK_IN_PROG, &rdev->flags))
+ if (atomic_read(&rdev->sched_count) > 0)
goto exit;
bnxt_re_ib_unreg(rdev, false);
bnxt_re_remove_one(rdev);
re_work->vlan_dev = (real_dev == netdev ?
NULL : netdev);
INIT_WORK(&re_work->work, bnxt_re_task);
- set_bit(BNXT_RE_FLAG_TASK_IN_PROG, &rdev->flags);
+ atomic_inc(&rdev->sched_count);
queue_work(bnxt_re_wq, &re_work->work);
}
}
*/
list_for_each_entry_safe_reverse(rdev, next, &to_be_deleted, list) {
dev_info(rdev_to_dev(rdev), "Unregistering Device");
+ /*
+ * Flush out any scheduled tasks before destroying the
+ * resources
+ */
+ flush_workqueue(bnxt_re_wq);
bnxt_re_dev_stop(rdev);
bnxt_re_ib_unreg(rdev, true);
bnxt_re_remove_one(rdev);
}
}
-void bnxt_qplib_del_flush_qp(struct bnxt_qplib_qp *qp)
+void bnxt_qplib_clean_qp(struct bnxt_qplib_qp *qp)
{
unsigned long flags;
struct bnxt_qplib_rcfw *rcfw = res->rcfw;
struct cmdq_destroy_qp req;
struct creq_destroy_qp_resp resp;
- unsigned long flags;
u16 cmd_flags = 0;
int rc;
return rc;
}
- /* Must walk the associated CQs to nullified the QP ptr */
- spin_lock_irqsave(&qp->scq->hwq.lock, flags);
-
- __clean_cq(qp->scq, (u64)(unsigned long)qp);
-
- if (qp->rcq && qp->rcq != qp->scq) {
- spin_lock(&qp->rcq->hwq.lock);
- __clean_cq(qp->rcq, (u64)(unsigned long)qp);
- spin_unlock(&qp->rcq->hwq.lock);
- }
-
- spin_unlock_irqrestore(&qp->scq->hwq.lock, flags);
+ return 0;
+}
+void bnxt_qplib_free_qp_res(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_qp *qp)
+{
bnxt_qplib_free_qp_hdr_buf(res, qp);
bnxt_qplib_free_hwq(res->pdev, &qp->sq.hwq);
kfree(qp->sq.swq);
if (qp->orrq.max_elements)
bnxt_qplib_free_hwq(res->pdev, &qp->orrq);
- return 0;
}
void *bnxt_qplib_get_qp1_sq_buf(struct bnxt_qplib_qp *qp,
int bnxt_qplib_modify_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp);
int bnxt_qplib_query_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp);
int bnxt_qplib_destroy_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp);
+void bnxt_qplib_clean_qp(struct bnxt_qplib_qp *qp);
+void bnxt_qplib_free_qp_res(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_qp *qp);
void *bnxt_qplib_get_qp1_sq_buf(struct bnxt_qplib_qp *qp,
struct bnxt_qplib_sge *sge);
void *bnxt_qplib_get_qp1_rq_buf(struct bnxt_qplib_qp *qp,
void bnxt_qplib_free_nq(struct bnxt_qplib_nq *nq);
int bnxt_qplib_alloc_nq(struct pci_dev *pdev, struct bnxt_qplib_nq *nq);
void bnxt_qplib_add_flush_qp(struct bnxt_qplib_qp *qp);
-void bnxt_qplib_del_flush_qp(struct bnxt_qplib_qp *qp);
void bnxt_qplib_acquire_cq_locks(struct bnxt_qplib_qp *qp,
unsigned long *flags);
void bnxt_qplib_release_cq_locks(struct bnxt_qplib_qp *qp,
/* Device */
-static bool bnxt_qplib_is_atomic_cap(struct bnxt_qplib_rcfw *rcfw)
-{
- int rc;
- u16 pcie_ctl2;
-
- rc = pcie_capability_read_word(rcfw->pdev, PCI_EXP_DEVCTL2,
- &pcie_ctl2);
- if (rc)
- return false;
- return !!(pcie_ctl2 & PCI_EXP_DEVCTL2_ATOMIC_REQ);
-}
-
static void bnxt_qplib_query_version(struct bnxt_qplib_rcfw *rcfw,
char *fw_ver)
{
attr->tqm_alloc_reqs[i * 4 + 3] = *(++tqm_alloc);
}
- attr->is_atomic = bnxt_qplib_is_atomic_cap(rcfw);
+ attr->is_atomic = 0;
bail:
bnxt_qplib_rcfw_free_sbuf(rcfw, sbuf);
return rc;
mlx5_ib-y := main.o cq.o doorbell.o qp.o mem.o srq.o mr.o ah.o mad.o gsi.o ib_virt.o cmd.o cong.o
mlx5_ib-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += odp.o
+mlx5_ib-$(CONFIG_MLX5_ESWITCH) += ib_rep.o
}
}
-static void *get_cqe_from_buf(struct mlx5_ib_cq_buf *buf, int n, int size)
-{
- return mlx5_buf_offset(&buf->buf, n * size);
-}
-
static void *get_cqe(struct mlx5_ib_cq *cq, int n)
{
- return get_cqe_from_buf(&cq->buf, n, cq->mcq.cqe_sz);
+ return mlx5_frag_buf_get_wqe(&cq->buf.fbc, n);
}
static u8 sw_ownership_bit(int n, int nent)
static void free_cq_buf(struct mlx5_ib_dev *dev, struct mlx5_ib_cq_buf *buf)
{
- mlx5_buf_free(dev->mdev, &buf->buf);
+ mlx5_frag_buf_free(dev->mdev, &buf->fbc.frag_buf);
}
static void get_sig_err_item(struct mlx5_sig_err_cqe *cqe,
return ret;
}
-static int alloc_cq_buf(struct mlx5_ib_dev *dev, struct mlx5_ib_cq_buf *buf,
- int nent, int cqe_size)
+static int alloc_cq_frag_buf(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_cq_buf *buf,
+ int nent,
+ int cqe_size)
{
+ struct mlx5_frag_buf_ctrl *c = &buf->fbc;
+ struct mlx5_frag_buf *frag_buf = &c->frag_buf;
+ u32 cqc_buff[MLX5_ST_SZ_DW(cqc)] = {0};
int err;
- err = mlx5_buf_alloc(dev->mdev, nent * cqe_size, &buf->buf);
+ MLX5_SET(cqc, cqc_buff, log_cq_size, ilog2(cqe_size));
+ MLX5_SET(cqc, cqc_buff, cqe_sz, (cqe_size == 128) ? 1 : 0);
+
+ mlx5_core_init_cq_frag_buf(&buf->fbc, cqc_buff);
+
+ err = mlx5_frag_buf_alloc_node(dev->mdev,
+ nent * cqe_size,
+ frag_buf,
+ dev->mdev->priv.numa_node);
if (err)
return err;
ib_umem_release(cq->buf.umem);
}
-static void init_cq_buf(struct mlx5_ib_cq *cq, struct mlx5_ib_cq_buf *buf)
+static void init_cq_frag_buf(struct mlx5_ib_cq *cq,
+ struct mlx5_ib_cq_buf *buf)
{
int i;
void *cqe;
struct mlx5_cqe64 *cqe64;
for (i = 0; i < buf->nent; i++) {
- cqe = get_cqe_from_buf(buf, i, buf->cqe_size);
+ cqe = get_cqe(cq, i);
cqe64 = buf->cqe_size == 64 ? cqe : cqe + 64;
cqe64->op_own = MLX5_CQE_INVALID << 4;
}
cq->mcq.arm_db = cq->db.db + 1;
cq->mcq.cqe_sz = cqe_size;
- err = alloc_cq_buf(dev, &cq->buf, entries, cqe_size);
+ err = alloc_cq_frag_buf(dev, &cq->buf, entries, cqe_size);
if (err)
goto err_db;
- init_cq_buf(cq, &cq->buf);
+ init_cq_frag_buf(cq, &cq->buf);
*inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
- MLX5_FLD_SZ_BYTES(create_cq_in, pas[0]) * cq->buf.buf.npages;
+ MLX5_FLD_SZ_BYTES(create_cq_in, pas[0]) *
+ cq->buf.fbc.frag_buf.npages;
*cqb = kvzalloc(*inlen, GFP_KERNEL);
if (!*cqb) {
err = -ENOMEM;
}
pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, *cqb, pas);
- mlx5_fill_page_array(&cq->buf.buf, pas);
+ mlx5_fill_page_frag_array(&cq->buf.fbc.frag_buf, pas);
cqc = MLX5_ADDR_OF(create_cq_in, *cqb, cq_context);
MLX5_SET(cqc, cqc, log_page_size,
- cq->buf.buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
+ cq->buf.fbc.frag_buf.page_shift -
+ MLX5_ADAPTER_PAGE_SHIFT);
*index = dev->mdev->priv.uar->index;
if (!cq->resize_buf)
return -ENOMEM;
- err = alloc_cq_buf(dev, cq->resize_buf, entries, cqe_size);
+ err = alloc_cq_frag_buf(dev, cq->resize_buf, entries, cqe_size);
if (err)
goto ex;
- init_cq_buf(cq, cq->resize_buf);
+ init_cq_frag_buf(cq, cq->resize_buf);
return 0;
}
while ((scqe64->op_own >> 4) != MLX5_CQE_RESIZE_CQ) {
- dcqe = get_cqe_from_buf(cq->resize_buf,
- (i + 1) & (cq->resize_buf->nent),
- dsize);
+ dcqe = mlx5_frag_buf_get_wqe(&cq->resize_buf->fbc,
+ (i + 1) & cq->resize_buf->nent);
dcqe64 = dsize == 64 ? dcqe : dcqe + 64;
sw_own = sw_ownership_bit(i + 1, cq->resize_buf->nent);
memcpy(dcqe, scqe, dsize);
cqe_size = 64;
err = resize_kernel(dev, cq, entries, cqe_size);
if (!err) {
- npas = cq->resize_buf->buf.npages;
- page_shift = cq->resize_buf->buf.page_shift;
+ struct mlx5_frag_buf_ctrl *c;
+
+ c = &cq->resize_buf->fbc;
+ npas = c->frag_buf.npages;
+ page_shift = c->frag_buf.page_shift;
}
}
mlx5_ib_populate_pas(dev, cq->resize_umem, page_shift,
pas, 0);
else
- mlx5_fill_page_array(&cq->resize_buf->buf, pas);
+ mlx5_fill_page_frag_array(&cq->resize_buf->fbc.frag_buf,
+ pas);
MLX5_SET(modify_cq_in, in,
modify_field_select_resize_field_select.resize_field_select.resize_field_select,
--- /dev/null
+/* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */
+/*
+ * Copyright (c) 2018 Mellanox Technologies. All rights reserved.
+ */
+
+#include "ib_rep.h"
+
+static const struct mlx5_ib_profile rep_profile = {
+ STAGE_CREATE(MLX5_IB_STAGE_INIT,
+ mlx5_ib_stage_init_init,
+ mlx5_ib_stage_init_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_FLOW_DB,
+ mlx5_ib_stage_rep_flow_db_init,
+ NULL),
+ STAGE_CREATE(MLX5_IB_STAGE_CAPS,
+ mlx5_ib_stage_caps_init,
+ NULL),
+ STAGE_CREATE(MLX5_IB_STAGE_NON_DEFAULT_CB,
+ mlx5_ib_stage_rep_non_default_cb,
+ NULL),
+ STAGE_CREATE(MLX5_IB_STAGE_ROCE,
+ mlx5_ib_stage_rep_roce_init,
+ mlx5_ib_stage_rep_roce_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_DEVICE_RESOURCES,
+ mlx5_ib_stage_dev_res_init,
+ mlx5_ib_stage_dev_res_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_COUNTERS,
+ mlx5_ib_stage_counters_init,
+ mlx5_ib_stage_counters_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_BFREG,
+ mlx5_ib_stage_bfrag_init,
+ mlx5_ib_stage_bfrag_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_IB_REG,
+ mlx5_ib_stage_ib_reg_init,
+ mlx5_ib_stage_ib_reg_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_UMR_RESOURCES,
+ mlx5_ib_stage_umr_res_init,
+ mlx5_ib_stage_umr_res_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_CLASS_ATTR,
+ mlx5_ib_stage_class_attr_init,
+ NULL),
+};
+
+static int
+mlx5_ib_nic_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
+{
+ return 0;
+}
+
+static void
+mlx5_ib_nic_rep_unload(struct mlx5_eswitch_rep *rep)
+{
+ rep->rep_if[REP_IB].priv = NULL;
+}
+
+static int
+mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
+{
+ struct mlx5_ib_dev *ibdev;
+
+ ibdev = (struct mlx5_ib_dev *)ib_alloc_device(sizeof(*ibdev));
+ if (!ibdev)
+ return -ENOMEM;
+
+ ibdev->rep = rep;
+ ibdev->mdev = dev;
+ ibdev->num_ports = max(MLX5_CAP_GEN(dev, num_ports),
+ MLX5_CAP_GEN(dev, num_vhca_ports));
+ if (!__mlx5_ib_add(ibdev, &rep_profile))
+ return -EINVAL;
+
+ rep->rep_if[REP_IB].priv = ibdev;
+
+ return 0;
+}
+
+static void
+mlx5_ib_vport_rep_unload(struct mlx5_eswitch_rep *rep)
+{
+ struct mlx5_ib_dev *dev;
+
+ if (!rep->rep_if[REP_IB].priv)
+ return;
+
+ dev = mlx5_ib_rep_to_dev(rep);
+ __mlx5_ib_remove(dev, dev->profile, MLX5_IB_STAGE_MAX);
+ rep->rep_if[REP_IB].priv = NULL;
+}
+
+static void *mlx5_ib_vport_get_proto_dev(struct mlx5_eswitch_rep *rep)
+{
+ return mlx5_ib_rep_to_dev(rep);
+}
+
+static void mlx5_ib_rep_register_vf_vports(struct mlx5_ib_dev *dev)
+{
+ struct mlx5_eswitch *esw = dev->mdev->priv.eswitch;
+ int total_vfs = MLX5_TOTAL_VPORTS(dev->mdev);
+ int vport;
+
+ for (vport = 1; vport < total_vfs; vport++) {
+ struct mlx5_eswitch_rep_if rep_if = {};
+
+ rep_if.load = mlx5_ib_vport_rep_load;
+ rep_if.unload = mlx5_ib_vport_rep_unload;
+ rep_if.get_proto_dev = mlx5_ib_vport_get_proto_dev;
+ mlx5_eswitch_register_vport_rep(esw, vport, &rep_if, REP_IB);
+ }
+}
+
+static void mlx5_ib_rep_unregister_vf_vports(struct mlx5_ib_dev *dev)
+{
+ struct mlx5_eswitch *esw = dev->mdev->priv.eswitch;
+ int total_vfs = MLX5_TOTAL_VPORTS(dev->mdev);
+ int vport;
+
+ for (vport = 1; vport < total_vfs; vport++)
+ mlx5_eswitch_unregister_vport_rep(esw, vport, REP_IB);
+}
+
+void mlx5_ib_register_vport_reps(struct mlx5_ib_dev *dev)
+{
+ struct mlx5_eswitch *esw = dev->mdev->priv.eswitch;
+ struct mlx5_eswitch_rep_if rep_if = {};
+
+ rep_if.load = mlx5_ib_nic_rep_load;
+ rep_if.unload = mlx5_ib_nic_rep_unload;
+ rep_if.get_proto_dev = mlx5_ib_vport_get_proto_dev;
+ rep_if.priv = dev;
+
+ mlx5_eswitch_register_vport_rep(esw, 0, &rep_if, REP_IB);
+
+ mlx5_ib_rep_register_vf_vports(dev);
+}
+
+void mlx5_ib_unregister_vport_reps(struct mlx5_ib_dev *dev)
+{
+ struct mlx5_eswitch *esw = dev->mdev->priv.eswitch;
+
+ mlx5_ib_rep_unregister_vf_vports(dev); /* VFs vports */
+ mlx5_eswitch_unregister_vport_rep(esw, 0, REP_IB); /* UPLINK PF*/
+}
+
+u8 mlx5_ib_eswitch_mode(struct mlx5_eswitch *esw)
+{
+ return mlx5_eswitch_mode(esw);
+}
+
+struct mlx5_ib_dev *mlx5_ib_get_rep_ibdev(struct mlx5_eswitch *esw,
+ int vport_index)
+{
+ return mlx5_eswitch_get_proto_dev(esw, vport_index, REP_IB);
+}
+
+struct net_device *mlx5_ib_get_rep_netdev(struct mlx5_eswitch *esw,
+ int vport_index)
+{
+ return mlx5_eswitch_get_proto_dev(esw, vport_index, REP_ETH);
+}
+
+struct mlx5_ib_dev *mlx5_ib_get_uplink_ibdev(struct mlx5_eswitch *esw)
+{
+ return mlx5_eswitch_uplink_get_proto_dev(esw, REP_IB);
+}
+
+struct mlx5_eswitch_rep *mlx5_ib_vport_rep(struct mlx5_eswitch *esw, int vport)
+{
+ return mlx5_eswitch_vport_rep(esw, vport);
+}
+
+int create_flow_rule_vport_sq(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_sq *sq)
+{
+ struct mlx5_flow_handle *flow_rule;
+ struct mlx5_eswitch *esw = dev->mdev->priv.eswitch;
+
+ if (!dev->rep)
+ return 0;
+
+ flow_rule =
+ mlx5_eswitch_add_send_to_vport_rule(esw,
+ dev->rep->vport,
+ sq->base.mqp.qpn);
+ if (IS_ERR(flow_rule))
+ return PTR_ERR(flow_rule);
+ sq->flow_rule = flow_rule;
+
+ return 0;
+}
--- /dev/null
+/* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */
+/*
+ * Copyright (c) 2018 Mellanox Technologies. All rights reserved.
+ */
+
+#ifndef __MLX5_IB_REP_H__
+#define __MLX5_IB_REP_H__
+
+#include <linux/mlx5/eswitch.h>
+#include "mlx5_ib.h"
+
+#ifdef CONFIG_MLX5_ESWITCH
+u8 mlx5_ib_eswitch_mode(struct mlx5_eswitch *esw);
+struct mlx5_ib_dev *mlx5_ib_get_rep_ibdev(struct mlx5_eswitch *esw,
+ int vport_index);
+struct mlx5_ib_dev *mlx5_ib_get_uplink_ibdev(struct mlx5_eswitch *esw);
+struct mlx5_eswitch_rep *mlx5_ib_vport_rep(struct mlx5_eswitch *esw,
+ int vport_index);
+void mlx5_ib_register_vport_reps(struct mlx5_ib_dev *dev);
+void mlx5_ib_unregister_vport_reps(struct mlx5_ib_dev *dev);
+int create_flow_rule_vport_sq(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_sq *sq);
+struct net_device *mlx5_ib_get_rep_netdev(struct mlx5_eswitch *esw,
+ int vport_index);
+#else /* CONFIG_MLX5_ESWITCH */
+static inline u8 mlx5_ib_eswitch_mode(struct mlx5_eswitch *esw)
+{
+ return SRIOV_NONE;
+}
+
+static inline
+struct mlx5_ib_dev *mlx5_ib_get_rep_ibdev(struct mlx5_eswitch *esw,
+ int vport_index)
+{
+ return NULL;
+}
+
+static inline
+struct mlx5_ib_dev *mlx5_ib_get_uplink_ibdev(struct mlx5_eswitch *esw)
+{
+ return NULL;
+}
+
+static inline
+struct mlx5_eswitch_rep *mlx5_ib_vport_rep(struct mlx5_eswitch *esw,
+ int vport_index)
+{
+ return NULL;
+}
+
+static inline void mlx5_ib_register_vport_reps(struct mlx5_ib_dev *dev) {}
+static inline void mlx5_ib_unregister_vport_reps(struct mlx5_ib_dev *dev) {}
+static inline int create_flow_rule_vport_sq(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_sq *sq)
+{
+ return 0;
+}
+
+static inline
+struct net_device *mlx5_ib_get_rep_netdev(struct mlx5_eswitch *esw,
+ int vport_index)
+{
+ return NULL;
+}
+#endif
+
+static inline
+struct mlx5_ib_dev *mlx5_ib_rep_to_dev(struct mlx5_eswitch_rep *rep)
+{
+ return (struct mlx5_ib_dev *)rep->rep_if[REP_IB].priv;
+}
+#endif /* __MLX5_IB_REP_H__ */
#include <linux/in.h>
#include <linux/etherdevice.h>
#include "mlx5_ib.h"
+#include "ib_rep.h"
#include "cmd.h"
#define DRIVER_NAME "mlx5_ib"
int ret;
memset(&attr, 0, sizeof(attr));
- ret = mlx5_ib_query_port(ibdev, port_num, &attr);
+ ret = ibdev->query_port(ibdev, port_num, &attr);
if (!ret)
*state = attr.state;
return ret;
case NETDEV_REGISTER:
case NETDEV_UNREGISTER:
write_lock(&roce->netdev_lock);
-
- if (ndev->dev.parent == &mdev->pdev->dev)
- roce->netdev = (event == NETDEV_UNREGISTER) ?
+ if (ibdev->rep) {
+ struct mlx5_eswitch *esw = ibdev->mdev->priv.eswitch;
+ struct net_device *rep_ndev;
+
+ rep_ndev = mlx5_ib_get_rep_netdev(esw,
+ ibdev->rep->vport);
+ if (rep_ndev == ndev)
+ roce->netdev = (event == NETDEV_UNREGISTER) ?
NULL : ndev;
+ } else if (ndev->dev.parent == &ibdev->mdev->pdev->dev) {
+ roce->netdev = (event == NETDEV_UNREGISTER) ?
+ NULL : ndev;
+ }
write_unlock(&roce->netdev_lock);
break;
return ret;
}
+static int mlx5_ib_rep_query_port(struct ib_device *ibdev, u8 port,
+ struct ib_port_attr *props)
+{
+ int ret;
+
+ /* Only link layer == ethernet is valid for representors */
+ ret = mlx5_query_port_roce(ibdev, port, props);
+ if (ret || !props)
+ return ret;
+
+ /* We don't support GIDS */
+ props->gid_tbl_len = 0;
+
+ return ret;
+}
+
static int mlx5_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
union ib_gid *gid)
{
ibflow);
struct mlx5_ib_flow_handler *iter, *tmp;
- mutex_lock(&dev->flow_db.lock);
+ mutex_lock(&dev->flow_db->lock);
list_for_each_entry_safe(iter, tmp, &handler->list, list) {
mlx5_del_flow_rules(iter->rule);
mlx5_del_flow_rules(handler->rule);
put_flow_table(dev, handler->prio, true);
- mutex_unlock(&dev->flow_db.lock);
+ mutex_unlock(&dev->flow_db->lock);
kfree(handler);
MLX5_FLOW_NAMESPACE_BYPASS);
num_entries = MLX5_FS_MAX_ENTRIES;
num_groups = MLX5_FS_MAX_TYPES;
- prio = &dev->flow_db.prios[priority];
+ prio = &dev->flow_db->prios[priority];
} else if (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT) {
ns = mlx5_get_flow_namespace(dev->mdev,
build_leftovers_ft_param(&priority,
&num_entries,
&num_groups);
- prio = &dev->flow_db.prios[MLX5_IB_FLOW_LEFTOVERS_PRIO];
+ prio = &dev->flow_db->prios[MLX5_IB_FLOW_LEFTOVERS_PRIO];
} else if (flow_attr->type == IB_FLOW_ATTR_SNIFFER) {
if (!MLX5_CAP_FLOWTABLE(dev->mdev,
allow_sniffer_and_nic_rx_shared_tir))
MLX5_FLOW_NAMESPACE_SNIFFER_RX :
MLX5_FLOW_NAMESPACE_SNIFFER_TX);
- prio = &dev->flow_db.sniffer[ft_type];
+ prio = &dev->flow_db->sniffer[ft_type];
priority = 0;
num_entries = 1;
num_groups = 1;
if (!flow_is_multicast_only(flow_attr))
set_underlay_qp(dev, spec, underlay_qpn);
+ if (dev->rep) {
+ void *misc;
+
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ misc_parameters);
+ MLX5_SET(fte_match_set_misc, misc, source_port,
+ dev->rep->vport);
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+ misc_parameters);
+ MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
+ }
+
spec->match_criteria_enable = get_match_criteria_enable(spec->match_criteria);
if (is_drop) {
flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP;
if (!dst)
return ERR_PTR(-ENOMEM);
- mutex_lock(&dev->flow_db.lock);
+ mutex_lock(&dev->flow_db->lock);
ft_prio = get_flow_table(dev, flow_attr, MLX5_IB_FT_RX);
if (IS_ERR(ft_prio)) {
goto destroy_ft;
}
- mutex_unlock(&dev->flow_db.lock);
+ mutex_unlock(&dev->flow_db->lock);
kfree(dst);
return &handler->ibflow;
if (ft_prio_tx)
put_flow_table(dev, ft_prio_tx, false);
unlock:
- mutex_unlock(&dev->flow_db.lock);
+ mutex_unlock(&dev->flow_db->lock);
kfree(dst);
kfree(handler);
return ERR_PTR(err);
return 0;
}
+static int mlx5_port_rep_immutable(struct ib_device *ibdev, u8 port_num,
+ struct ib_port_immutable *immutable)
+{
+ struct ib_port_attr attr;
+ int err;
+
+ immutable->core_cap_flags = RDMA_CORE_PORT_RAW_PACKET;
+
+ err = ib_query_port(ibdev, port_num, &attr);
+ if (err)
+ return err;
+
+ immutable->pkey_tbl_len = attr.pkey_tbl_len;
+ immutable->gid_tbl_len = attr.gid_tbl_len;
+ immutable->core_cap_flags = RDMA_CORE_PORT_RAW_PACKET;
+
+ return 0;
+}
+
static void get_dev_fw_str(struct ib_device *ibdev, char *str)
{
struct mlx5_ib_dev *dev =
goto err_destroy_vport_lag;
}
- dev->flow_db.lag_demux_ft = ft;
+ dev->flow_db->lag_demux_ft = ft;
return 0;
err_destroy_vport_lag:
{
struct mlx5_core_dev *mdev = dev->mdev;
- if (dev->flow_db.lag_demux_ft) {
- mlx5_destroy_flow_table(dev->flow_db.lag_demux_ft);
- dev->flow_db.lag_demux_ft = NULL;
+ if (dev->flow_db->lag_demux_ft) {
+ mlx5_destroy_flow_table(dev->flow_db->lag_demux_ft);
+ dev->flow_db->lag_demux_ft = NULL;
mlx5_cmd_destroy_vport_lag(mdev);
}
{
int err;
- err = mlx5_add_netdev_notifier(dev, port_num);
- if (err)
- return err;
-
if (MLX5_CAP_GEN(dev->mdev, roce)) {
err = mlx5_nic_vport_enable_roce(dev->mdev);
if (err)
- goto err_unregister_netdevice_notifier;
+ return err;
}
err = mlx5_eth_lag_init(dev);
if (MLX5_CAP_GEN(dev->mdev, roce))
mlx5_nic_vport_disable_roce(dev->mdev);
-err_unregister_netdevice_notifier:
- mlx5_remove_netdev_notifier(dev, port_num);
return err;
}
mlx5_nic_vport_disable_roce(dev->mdev);
}
-static void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev)
+void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev)
{
mlx5_ib_cleanup_multiport_master(dev);
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
kfree(dev->port);
}
-static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev)
+int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev)
{
struct mlx5_core_dev *mdev = dev->mdev;
const char *name;
dev->mdev->priv.eq_table.num_comp_vectors;
dev->ib_dev.dev.parent = &mdev->pdev->dev;
- mutex_init(&dev->flow_db.lock);
mutex_init(&dev->cap_mask_mutex);
INIT_LIST_HEAD(&dev->qp_list);
spin_lock_init(&dev->reset_flow_resource_lock);
return -ENOMEM;
}
-static int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev)
+static int mlx5_ib_stage_flow_db_init(struct mlx5_ib_dev *dev)
+{
+ dev->flow_db = kzalloc(sizeof(*dev->flow_db), GFP_KERNEL);
+
+ if (!dev->flow_db)
+ return -ENOMEM;
+
+ mutex_init(&dev->flow_db->lock);
+
+ return 0;
+}
+
+int mlx5_ib_stage_rep_flow_db_init(struct mlx5_ib_dev *dev)
+{
+ struct mlx5_ib_dev *nic_dev;
+
+ nic_dev = mlx5_ib_get_uplink_ibdev(dev->mdev->priv.eswitch);
+
+ if (!nic_dev)
+ return -EINVAL;
+
+ dev->flow_db = nic_dev->flow_db;
+
+ return 0;
+}
+
+static void mlx5_ib_stage_flow_db_cleanup(struct mlx5_ib_dev *dev)
+{
+ kfree(dev->flow_db);
+}
+
+int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev)
{
struct mlx5_core_dev *mdev = dev->mdev;
int err;
(1ull << IB_USER_VERBS_EX_CMD_MODIFY_CQ);
dev->ib_dev.query_device = mlx5_ib_query_device;
- dev->ib_dev.query_port = mlx5_ib_query_port;
dev->ib_dev.get_link_layer = mlx5_ib_port_link_layer;
dev->ib_dev.query_gid = mlx5_ib_query_gid;
dev->ib_dev.add_gid = mlx5_ib_add_gid;
dev->ib_dev.alloc_mr = mlx5_ib_alloc_mr;
dev->ib_dev.map_mr_sg = mlx5_ib_map_mr_sg;
dev->ib_dev.check_mr_status = mlx5_ib_check_mr_status;
- dev->ib_dev.get_port_immutable = mlx5_port_immutable;
dev->ib_dev.get_dev_fw_str = get_dev_fw_str;
dev->ib_dev.get_vector_affinity = mlx5_ib_get_vector_affinity;
if (MLX5_CAP_GEN(mdev, ipoib_enhanced_offloads))
return 0;
}
+static int mlx5_ib_stage_non_default_cb(struct mlx5_ib_dev *dev)
+{
+ dev->ib_dev.get_port_immutable = mlx5_port_immutable;
+ dev->ib_dev.query_port = mlx5_ib_query_port;
+
+ return 0;
+}
+
+int mlx5_ib_stage_rep_non_default_cb(struct mlx5_ib_dev *dev)
+{
+ dev->ib_dev.get_port_immutable = mlx5_port_rep_immutable;
+ dev->ib_dev.query_port = mlx5_ib_rep_query_port;
+
+ return 0;
+}
+
+static int mlx5_ib_stage_common_roce_init(struct mlx5_ib_dev *dev,
+ u8 port_num)
+{
+ int i;
+
+ for (i = 0; i < dev->num_ports; i++) {
+ dev->roce[i].dev = dev;
+ dev->roce[i].native_port_num = i + 1;
+ dev->roce[i].last_port_state = IB_PORT_DOWN;
+ }
+
+ dev->ib_dev.get_netdev = mlx5_ib_get_netdev;
+ dev->ib_dev.create_wq = mlx5_ib_create_wq;
+ dev->ib_dev.modify_wq = mlx5_ib_modify_wq;
+ dev->ib_dev.destroy_wq = mlx5_ib_destroy_wq;
+ dev->ib_dev.create_rwq_ind_table = mlx5_ib_create_rwq_ind_table;
+ dev->ib_dev.destroy_rwq_ind_table = mlx5_ib_destroy_rwq_ind_table;
+
+ dev->ib_dev.uverbs_ex_cmd_mask |=
+ (1ull << IB_USER_VERBS_EX_CMD_CREATE_WQ) |
+ (1ull << IB_USER_VERBS_EX_CMD_MODIFY_WQ) |
+ (1ull << IB_USER_VERBS_EX_CMD_DESTROY_WQ) |
+ (1ull << IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL) |
+ (1ull << IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL);
+
+ return mlx5_add_netdev_notifier(dev, port_num);
+}
+
+static void mlx5_ib_stage_common_roce_cleanup(struct mlx5_ib_dev *dev)
+{
+ u8 port_num = mlx5_core_native_port_num(dev->mdev) - 1;
+
+ mlx5_remove_netdev_notifier(dev, port_num);
+}
+
+int mlx5_ib_stage_rep_roce_init(struct mlx5_ib_dev *dev)
+{
+ struct mlx5_core_dev *mdev = dev->mdev;
+ enum rdma_link_layer ll;
+ int port_type_cap;
+ int err = 0;
+ u8 port_num;
+
+ port_num = mlx5_core_native_port_num(dev->mdev) - 1;
+ port_type_cap = MLX5_CAP_GEN(mdev, port_type);
+ ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap);
+
+ if (ll == IB_LINK_LAYER_ETHERNET)
+ err = mlx5_ib_stage_common_roce_init(dev, port_num);
+
+ return err;
+}
+
+void mlx5_ib_stage_rep_roce_cleanup(struct mlx5_ib_dev *dev)
+{
+ mlx5_ib_stage_common_roce_cleanup(dev);
+}
+
static int mlx5_ib_stage_roce_init(struct mlx5_ib_dev *dev)
{
struct mlx5_core_dev *mdev = dev->mdev;
int port_type_cap;
u8 port_num;
int err;
- int i;
port_num = mlx5_core_native_port_num(dev->mdev) - 1;
port_type_cap = MLX5_CAP_GEN(mdev, port_type);
ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap);
if (ll == IB_LINK_LAYER_ETHERNET) {
- for (i = 0; i < dev->num_ports; i++) {
- dev->roce[i].dev = dev;
- dev->roce[i].native_port_num = i + 1;
- dev->roce[i].last_port_state = IB_PORT_DOWN;
- }
+ err = mlx5_ib_stage_common_roce_init(dev, port_num);
+ if (err)
+ return err;
- dev->ib_dev.get_netdev = mlx5_ib_get_netdev;
- dev->ib_dev.create_wq = mlx5_ib_create_wq;
- dev->ib_dev.modify_wq = mlx5_ib_modify_wq;
- dev->ib_dev.destroy_wq = mlx5_ib_destroy_wq;
- dev->ib_dev.create_rwq_ind_table = mlx5_ib_create_rwq_ind_table;
- dev->ib_dev.destroy_rwq_ind_table = mlx5_ib_destroy_rwq_ind_table;
- dev->ib_dev.uverbs_ex_cmd_mask |=
- (1ull << IB_USER_VERBS_EX_CMD_CREATE_WQ) |
- (1ull << IB_USER_VERBS_EX_CMD_MODIFY_WQ) |
- (1ull << IB_USER_VERBS_EX_CMD_DESTROY_WQ) |
- (1ull << IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL) |
- (1ull << IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL);
err = mlx5_enable_eth(dev, port_num);
if (err)
- return err;
+ goto cleanup;
}
return 0;
+cleanup:
+ mlx5_ib_stage_common_roce_cleanup(dev);
+
+ return err;
}
static void mlx5_ib_stage_roce_cleanup(struct mlx5_ib_dev *dev)
if (ll == IB_LINK_LAYER_ETHERNET) {
mlx5_disable_eth(dev);
- mlx5_remove_netdev_notifier(dev, port_num);
+ mlx5_ib_stage_common_roce_cleanup(dev);
}
}
-static int mlx5_ib_stage_dev_res_init(struct mlx5_ib_dev *dev)
+int mlx5_ib_stage_dev_res_init(struct mlx5_ib_dev *dev)
{
return create_dev_resources(&dev->devr);
}
-static void mlx5_ib_stage_dev_res_cleanup(struct mlx5_ib_dev *dev)
+void mlx5_ib_stage_dev_res_cleanup(struct mlx5_ib_dev *dev)
{
destroy_dev_resources(&dev->devr);
}
return mlx5_ib_odp_init_one(dev);
}
-static int mlx5_ib_stage_counters_init(struct mlx5_ib_dev *dev)
+int mlx5_ib_stage_counters_init(struct mlx5_ib_dev *dev)
{
if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt)) {
dev->ib_dev.get_hw_stats = mlx5_ib_get_hw_stats;
return 0;
}
-static void mlx5_ib_stage_counters_cleanup(struct mlx5_ib_dev *dev)
+void mlx5_ib_stage_counters_cleanup(struct mlx5_ib_dev *dev)
{
if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt))
mlx5_ib_dealloc_counters(dev);
mlx5_put_uars_page(dev->mdev, dev->mdev->priv.uar);
}
-static int mlx5_ib_stage_bfrag_init(struct mlx5_ib_dev *dev)
+int mlx5_ib_stage_bfrag_init(struct mlx5_ib_dev *dev)
{
int err;
return err;
}
-static void mlx5_ib_stage_bfrag_cleanup(struct mlx5_ib_dev *dev)
+void mlx5_ib_stage_bfrag_cleanup(struct mlx5_ib_dev *dev)
{
mlx5_free_bfreg(dev->mdev, &dev->fp_bfreg);
mlx5_free_bfreg(dev->mdev, &dev->bfreg);
}
-static int mlx5_ib_stage_ib_reg_init(struct mlx5_ib_dev *dev)
+int mlx5_ib_stage_ib_reg_init(struct mlx5_ib_dev *dev)
{
return ib_register_device(&dev->ib_dev, NULL);
}
-static void mlx5_ib_stage_ib_reg_cleanup(struct mlx5_ib_dev *dev)
+void mlx5_ib_stage_ib_reg_cleanup(struct mlx5_ib_dev *dev)
{
ib_unregister_device(&dev->ib_dev);
}
-static int mlx5_ib_stage_umr_res_init(struct mlx5_ib_dev *dev)
+int mlx5_ib_stage_umr_res_init(struct mlx5_ib_dev *dev)
{
return create_umr_res(dev);
}
-static void mlx5_ib_stage_umr_res_cleanup(struct mlx5_ib_dev *dev)
+void mlx5_ib_stage_umr_res_cleanup(struct mlx5_ib_dev *dev)
{
destroy_umrc_res(dev);
}
cancel_delay_drop(dev);
}
-static int mlx5_ib_stage_class_attr_init(struct mlx5_ib_dev *dev)
+int mlx5_ib_stage_class_attr_init(struct mlx5_ib_dev *dev)
{
int err;
int i;
return 0;
}
-static void __mlx5_ib_remove(struct mlx5_ib_dev *dev,
- const struct mlx5_ib_profile *profile,
- int stage)
+static int mlx5_ib_stage_rep_reg_init(struct mlx5_ib_dev *dev)
+{
+ mlx5_ib_register_vport_reps(dev);
+
+ return 0;
+}
+
+static void mlx5_ib_stage_rep_reg_cleanup(struct mlx5_ib_dev *dev)
+{
+ mlx5_ib_unregister_vport_reps(dev);
+}
+
+void __mlx5_ib_remove(struct mlx5_ib_dev *dev,
+ const struct mlx5_ib_profile *profile,
+ int stage)
{
/* Number of stages to cleanup */
while (stage) {
static void *mlx5_ib_add_slave_port(struct mlx5_core_dev *mdev, u8 port_num);
-static void *__mlx5_ib_add(struct mlx5_core_dev *mdev,
- const struct mlx5_ib_profile *profile)
+void *__mlx5_ib_add(struct mlx5_ib_dev *dev,
+ const struct mlx5_ib_profile *profile)
{
- struct mlx5_ib_dev *dev;
int err;
int i;
printk_once(KERN_INFO "%s", mlx5_version);
- dev = (struct mlx5_ib_dev *)ib_alloc_device(sizeof(*dev));
- if (!dev)
- return NULL;
-
- dev->mdev = mdev;
- dev->num_ports = max(MLX5_CAP_GEN(mdev, num_ports),
- MLX5_CAP_GEN(mdev, num_vhca_ports));
-
for (i = 0; i < MLX5_IB_STAGE_MAX; i++) {
if (profile->stage[i].init) {
err = profile->stage[i].init(dev);
STAGE_CREATE(MLX5_IB_STAGE_INIT,
mlx5_ib_stage_init_init,
mlx5_ib_stage_init_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_FLOW_DB,
+ mlx5_ib_stage_flow_db_init,
+ mlx5_ib_stage_flow_db_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_CAPS,
mlx5_ib_stage_caps_init,
NULL),
+ STAGE_CREATE(MLX5_IB_STAGE_NON_DEFAULT_CB,
+ mlx5_ib_stage_non_default_cb,
+ NULL),
STAGE_CREATE(MLX5_IB_STAGE_ROCE,
mlx5_ib_stage_roce_init,
mlx5_ib_stage_roce_cleanup),
NULL),
};
+static const struct mlx5_ib_profile nic_rep_profile = {
+ STAGE_CREATE(MLX5_IB_STAGE_INIT,
+ mlx5_ib_stage_init_init,
+ mlx5_ib_stage_init_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_FLOW_DB,
+ mlx5_ib_stage_flow_db_init,
+ mlx5_ib_stage_flow_db_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_CAPS,
+ mlx5_ib_stage_caps_init,
+ NULL),
+ STAGE_CREATE(MLX5_IB_STAGE_NON_DEFAULT_CB,
+ mlx5_ib_stage_rep_non_default_cb,
+ NULL),
+ STAGE_CREATE(MLX5_IB_STAGE_ROCE,
+ mlx5_ib_stage_rep_roce_init,
+ mlx5_ib_stage_rep_roce_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_DEVICE_RESOURCES,
+ mlx5_ib_stage_dev_res_init,
+ mlx5_ib_stage_dev_res_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_COUNTERS,
+ mlx5_ib_stage_counters_init,
+ mlx5_ib_stage_counters_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_UAR,
+ mlx5_ib_stage_uar_init,
+ mlx5_ib_stage_uar_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_BFREG,
+ mlx5_ib_stage_bfrag_init,
+ mlx5_ib_stage_bfrag_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_IB_REG,
+ mlx5_ib_stage_ib_reg_init,
+ mlx5_ib_stage_ib_reg_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_UMR_RESOURCES,
+ mlx5_ib_stage_umr_res_init,
+ mlx5_ib_stage_umr_res_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_CLASS_ATTR,
+ mlx5_ib_stage_class_attr_init,
+ NULL),
+ STAGE_CREATE(MLX5_IB_STAGE_REP_REG,
+ mlx5_ib_stage_rep_reg_init,
+ mlx5_ib_stage_rep_reg_cleanup),
+};
+
static void *mlx5_ib_add_slave_port(struct mlx5_core_dev *mdev, u8 port_num)
{
struct mlx5_ib_multiport_info *mpi;
static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
{
enum rdma_link_layer ll;
+ struct mlx5_ib_dev *dev;
int port_type_cap;
+ printk_once(KERN_INFO "%s", mlx5_version);
+
port_type_cap = MLX5_CAP_GEN(mdev, port_type);
ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap);
return mlx5_ib_add_slave_port(mdev, port_num);
}
- return __mlx5_ib_add(mdev, &pf_profile);
+ dev = (struct mlx5_ib_dev *)ib_alloc_device(sizeof(*dev));
+ if (!dev)
+ return NULL;
+
+ dev->mdev = mdev;
+ dev->num_ports = max(MLX5_CAP_GEN(mdev, num_ports),
+ MLX5_CAP_GEN(mdev, num_vhca_ports));
+
+ if (MLX5_VPORT_MANAGER(mdev) &&
+ mlx5_ib_eswitch_mode(mdev->priv.eswitch) == SRIOV_OFFLOADS) {
+ dev->rep = mlx5_ib_vport_rep(mdev->priv.eswitch, 0);
+
+ return __mlx5_ib_add(dev, &nic_rep_profile);
+ }
+
+ return __mlx5_ib_add(dev, &pf_profile);
}
static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context)
struct mlx5_ib_wq *sq;
struct mlx5_ib_ubuffer ubuffer;
struct mlx5_db *doorbell;
+ struct mlx5_flow_handle *flow_rule;
u32 tisn;
u8 state;
};
struct mlx5_ib_rss_qp rss_qp;
struct mlx5_ib_dct dct;
};
- struct mlx5_buf buf;
+ struct mlx5_frag_buf buf;
struct mlx5_db db;
struct mlx5_ib_wq rq;
};
struct mlx5_ib_cq_buf {
- struct mlx5_buf buf;
+ struct mlx5_frag_buf_ctrl fbc;
struct ib_umem *umem;
int cqe_size;
int nent;
struct mlx5_ib_srq {
struct ib_srq ibsrq;
struct mlx5_core_srq msrq;
- struct mlx5_buf buf;
+ struct mlx5_frag_buf buf;
struct mlx5_db db;
u64 *wrid;
/* protect SRQ hanlding
enum mlx5_ib_stages {
MLX5_IB_STAGE_INIT,
+ MLX5_IB_STAGE_FLOW_DB,
MLX5_IB_STAGE_CAPS,
+ MLX5_IB_STAGE_NON_DEFAULT_CB,
MLX5_IB_STAGE_ROCE,
MLX5_IB_STAGE_DEVICE_RESOURCES,
MLX5_IB_STAGE_ODP,
MLX5_IB_STAGE_UMR_RESOURCES,
MLX5_IB_STAGE_DELAY_DROP,
MLX5_IB_STAGE_CLASS_ATTR,
+ MLX5_IB_STAGE_REP_REG,
MLX5_IB_STAGE_MAX,
};
struct srcu_struct mr_srcu;
u32 null_mkey;
#endif
- struct mlx5_ib_flow_db flow_db;
+ struct mlx5_ib_flow_db *flow_db;
/* protect resources needed as part of reset flow */
spinlock_t reset_flow_resource_lock;
struct list_head qp_list;
struct mlx5_sq_bfreg fp_bfreg;
struct mlx5_ib_delay_drop delay_drop;
const struct mlx5_ib_profile *profile;
+ struct mlx5_eswitch_rep *rep;
/* protect the user_td */
struct mutex lb_mutex;
#endif /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
+/* Needed for rep profile */
+int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev);
+void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev);
+int mlx5_ib_stage_rep_flow_db_init(struct mlx5_ib_dev *dev);
+int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev);
+int mlx5_ib_stage_rep_non_default_cb(struct mlx5_ib_dev *dev);
+int mlx5_ib_stage_rep_roce_init(struct mlx5_ib_dev *dev);
+void mlx5_ib_stage_rep_roce_cleanup(struct mlx5_ib_dev *dev);
+int mlx5_ib_stage_dev_res_init(struct mlx5_ib_dev *dev);
+void mlx5_ib_stage_dev_res_cleanup(struct mlx5_ib_dev *dev);
+int mlx5_ib_stage_counters_init(struct mlx5_ib_dev *dev);
+void mlx5_ib_stage_counters_cleanup(struct mlx5_ib_dev *dev);
+int mlx5_ib_stage_bfrag_init(struct mlx5_ib_dev *dev);
+void mlx5_ib_stage_bfrag_cleanup(struct mlx5_ib_dev *dev);
+int mlx5_ib_stage_ib_reg_init(struct mlx5_ib_dev *dev);
+void mlx5_ib_stage_ib_reg_cleanup(struct mlx5_ib_dev *dev);
+int mlx5_ib_stage_umr_res_init(struct mlx5_ib_dev *dev);
+void mlx5_ib_stage_umr_res_cleanup(struct mlx5_ib_dev *dev);
+int mlx5_ib_stage_class_attr_init(struct mlx5_ib_dev *dev);
+void __mlx5_ib_remove(struct mlx5_ib_dev *dev,
+ const struct mlx5_ib_profile *profile,
+ int stage);
+void *__mlx5_ib_add(struct mlx5_ib_dev *dev,
+ const struct mlx5_ib_profile *profile);
+
int mlx5_ib_get_vf_config(struct ib_device *device, int vf,
u8 port, struct ifla_vf_info *info);
int mlx5_ib_set_vf_link_state(struct ib_device *device, int vf,
static void mlx5_mr_cache_debugfs_cleanup(struct mlx5_ib_dev *dev)
{
- if (!mlx5_debugfs_root)
+ if (!mlx5_debugfs_root || dev->rep)
return;
debugfs_remove_recursive(dev->cache.root);
struct mlx5_cache_ent *ent;
int i;
- if (!mlx5_debugfs_root)
+ if (!mlx5_debugfs_root || dev->rep)
return 0;
cache->root = debugfs_create_dir("mr_cache", dev->mdev->priv.dbg_root);
MLX5_IB_UMR_OCTOWORD;
ent->access_mode = MLX5_MKC_ACCESS_MODE_MTT;
if ((dev->mdev->profile->mask & MLX5_PROF_MASK_MR_CACHE) &&
+ !dev->rep &&
mlx5_core_is_pf(dev->mdev))
ent->limit = dev->mdev->profile->mr_cache[i].limit;
else
#include <rdma/ib_user_verbs.h>
#include <linux/mlx5/fs.h>
#include "mlx5_ib.h"
+#include "ib_rep.h"
/* not supported currently */
static int wq_signature;
mlx5_core_destroy_tis(dev->mdev, sq->tisn);
}
+static void destroy_flow_rule_vport_sq(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_sq *sq)
+{
+ if (sq->flow_rule)
+ mlx5_del_flow_rules(sq->flow_rule);
+}
+
static int create_raw_packet_qp_sq(struct mlx5_ib_dev *dev,
struct mlx5_ib_sq *sq, void *qpin,
struct ib_pd *pd)
if (err)
goto err_umem;
+ err = create_flow_rule_vport_sq(dev, sq);
+ if (err)
+ goto err_flow;
+
return 0;
+err_flow:
+ mlx5_core_destroy_sq_tracked(dev->mdev, &sq->base.mqp);
+
err_umem:
ib_umem_release(sq->ubuffer.umem);
sq->ubuffer.umem = NULL;
static void destroy_raw_packet_qp_sq(struct mlx5_ib_dev *dev,
struct mlx5_ib_sq *sq)
{
+ destroy_flow_rule_vport_sq(dev, sq);
mlx5_core_destroy_sq_tracked(dev->mdev, &sq->base.mqp);
ib_umem_release(sq->ubuffer.umem);
}
if (tunnel_offload_en)
MLX5_SET(tirc, tirc, tunneled_offload_en, 1);
+ if (dev->rep)
+ MLX5_SET(tirc, tirc, self_lb_block,
+ MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST_);
+
err = mlx5_core_create_tir(dev->mdev, in, inlen, &rq->tirn);
kvfree(in);
MLX5_SET(rx_hash_field_select, hfso, selected_fields, selected_fields);
create_tir:
+ if (dev->rep)
+ MLX5_SET(tirc, tirc, self_lb_block,
+ MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST_);
+
err = mlx5_core_create_tir(dev->mdev, in, inlen, &qp->rss_qp.tirn);
if (err)
int usnic_transport_sock_get_addr(struct socket *sock, int *proto,
uint32_t *addr, uint16_t *port)
{
- int len;
int err;
struct sockaddr_in sock_addr;
err = sock->ops->getname(sock,
(struct sockaddr *)&sock_addr,
- &len, 0);
- if (err)
+ 0);
+ if (err < 0)
return err;
if (sock_addr.sin_family != AF_INET)
union pvrdma_cmd_resp rsp;
struct pvrdma_cmd_create_cq *cmd = &req.create_cq;
struct pvrdma_cmd_create_cq_resp *resp = &rsp.create_cq_resp;
+ struct pvrdma_create_cq_resp cq_resp = {0};
struct pvrdma_create_cq ucmd;
BUILD_BUG_ON(sizeof(struct pvrdma_cqe) != 64);
cq->ibcq.cqe = resp->cqe;
cq->cq_handle = resp->cq_handle;
+ cq_resp.cqn = resp->cq_handle;
spin_lock_irqsave(&dev->cq_tbl_lock, flags);
dev->cq_tbl[cq->cq_handle % dev->dsr->caps.max_cq] = cq;
spin_unlock_irqrestore(&dev->cq_tbl_lock, flags);
cq->uar = &(to_vucontext(context)->uar);
/* Copy udata back. */
- if (ib_copy_to_udata(udata, &cq->cq_handle, sizeof(__u32))) {
+ if (ib_copy_to_udata(udata, &cq_resp, sizeof(cq_resp))) {
dev_warn(&dev->pdev->dev,
"failed to copy back udata\n");
pvrdma_destroy_cq(&cq->ibcq);
union pvrdma_cmd_resp rsp;
struct pvrdma_cmd_create_srq *cmd = &req.create_srq;
struct pvrdma_cmd_create_srq_resp *resp = &rsp.create_srq_resp;
+ struct pvrdma_create_srq_resp srq_resp = {0};
struct pvrdma_create_srq ucmd;
unsigned long flags;
int ret;
}
srq->srq_handle = resp->srqn;
+ srq_resp.srqn = resp->srqn;
spin_lock_irqsave(&dev->srq_tbl_lock, flags);
dev->srq_tbl[srq->srq_handle % dev->dsr->caps.max_srq] = srq;
spin_unlock_irqrestore(&dev->srq_tbl_lock, flags);
/* Copy udata back. */
- if (ib_copy_to_udata(udata, &srq->srq_handle, sizeof(__u32))) {
+ if (ib_copy_to_udata(udata, &srq_resp, sizeof(srq_resp))) {
dev_warn(&dev->pdev->dev, "failed to copy back udata\n");
pvrdma_destroy_srq(&srq->ibsrq);
return ERR_PTR(-EINVAL);
union pvrdma_cmd_resp rsp;
struct pvrdma_cmd_create_pd *cmd = &req.create_pd;
struct pvrdma_cmd_create_pd_resp *resp = &rsp.create_pd_resp;
+ struct pvrdma_alloc_pd_resp pd_resp = {0};
int ret;
void *ptr;
pd->privileged = !context;
pd->pd_handle = resp->pd_handle;
pd->pdn = resp->pd_handle;
+ pd_resp.pdn = resp->pd_handle;
if (context) {
- if (ib_copy_to_udata(udata, &pd->pdn, sizeof(__u32))) {
+ if (ib_copy_to_udata(udata, &pd_resp, sizeof(pd_resp))) {
dev_warn(&dev->pdev->dev,
"failed to copy back protection domain\n");
pvrdma_dealloc_pd(&pd->ibpd);
{
struct ipoib_dev_priv *priv = ipoib_priv(dev);
- WARN_ONCE(!priv->mcg_dentry, "null mcg debug file\n");
- WARN_ONCE(!priv->path_dentry, "null path debug file\n");
debugfs_remove(priv->mcg_dentry);
debugfs_remove(priv->path_dentry);
priv->mcg_dentry = priv->path_dentry = NULL;
goto out_unmap;
}
- pr_info("registered BCM7038 L1 intc (mem: 0x%p, IRQs: %d)\n",
- intc->cpus[0]->map_base, IRQS_PER_WORD * intc->n_words);
-
return 0;
out_unmap:
}
}
- pr_info("registered %s intc (mem: 0x%p, parent IRQ(s): %d)\n",
- intc_name, data->map_base[0], data->num_parent_irqs);
-
return 0;
out_free_domain:
ct->chip.irq_set_wake = irq_gc_set_wake;
}
- pr_info("registered L2 intc (mem: 0x%p, parent irq: %d)\n",
- base, parent_irq);
-
return 0;
out_free_domain:
static struct msi_domain_info gicv2m_msi_domain_info = {
.flags = (MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS |
- MSI_FLAG_PCI_MSIX),
+ MSI_FLAG_PCI_MSIX | MSI_FLAG_MULTI_PCI_MSI),
.chip = &gicv2m_msi_irq_chip,
};
return 0;
}
-static void gicv2m_unalloc_msi(struct v2m_data *v2m, unsigned int hwirq)
+static void gicv2m_unalloc_msi(struct v2m_data *v2m, unsigned int hwirq,
+ int nr_irqs)
{
- int pos;
-
- pos = hwirq - v2m->spi_start;
- if (pos < 0 || pos >= v2m->nr_spis) {
- pr_err("Failed to teardown msi. Invalid hwirq %d\n", hwirq);
- return;
- }
-
spin_lock(&v2m_lock);
- __clear_bit(pos, v2m->bm);
+ bitmap_release_region(v2m->bm, hwirq - v2m->spi_start,
+ get_count_order(nr_irqs));
spin_unlock(&v2m_lock);
}
unsigned int nr_irqs, void *args)
{
struct v2m_data *v2m = NULL, *tmp;
- int hwirq, offset, err = 0;
+ int hwirq, offset, i, err = 0;
spin_lock(&v2m_lock);
list_for_each_entry(tmp, &v2m_nodes, entry) {
- offset = find_first_zero_bit(tmp->bm, tmp->nr_spis);
- if (offset < tmp->nr_spis) {
- __set_bit(offset, tmp->bm);
+ offset = bitmap_find_free_region(tmp->bm, tmp->nr_spis,
+ get_count_order(nr_irqs));
+ if (offset >= 0) {
v2m = tmp;
break;
}
hwirq = v2m->spi_start + offset;
- err = gicv2m_irq_gic_domain_alloc(domain, virq, hwirq);
- if (err) {
- gicv2m_unalloc_msi(v2m, hwirq);
- return err;
- }
+ for (i = 0; i < nr_irqs; i++) {
+ err = gicv2m_irq_gic_domain_alloc(domain, virq + i, hwirq + i);
+ if (err)
+ goto fail;
- irq_domain_set_hwirq_and_chip(domain, virq, hwirq,
- &gicv2m_irq_chip, v2m);
+ irq_domain_set_hwirq_and_chip(domain, virq + i, hwirq + i,
+ &gicv2m_irq_chip, v2m);
+ }
return 0;
+
+fail:
+ irq_domain_free_irqs_parent(domain, virq, nr_irqs);
+ gicv2m_unalloc_msi(v2m, hwirq, get_count_order(nr_irqs));
+ return err;
}
static void gicv2m_irq_domain_free(struct irq_domain *domain,
struct irq_data *d = irq_domain_get_irq_data(domain, virq);
struct v2m_data *v2m = irq_data_get_irq_chip_data(d);
- BUG_ON(nr_irqs != 1);
- gicv2m_unalloc_msi(v2m, d->hwirq);
+ gicv2m_unalloc_msi(v2m, d->hwirq, nr_irqs);
irq_domain_free_irqs_parent(domain, virq, nr_irqs);
}
for (np = of_find_matching_node(NULL, its_device_id); np;
np = of_find_matching_node(np, its_device_id)) {
+ if (!of_device_is_available(np))
+ continue;
if (!of_property_read_bool(np, "msi-controller"))
continue;
for (np = of_find_matching_node(NULL, its_device_id); np;
np = of_find_matching_node(np, its_device_id)) {
+ if (!of_device_is_available(np))
+ continue;
if (!of_property_read_bool(np, "msi-controller"))
continue;
for (np = of_find_matching_node(node, its_device_id); np;
np = of_find_matching_node(np, its_device_id)) {
+ if (!of_device_is_available(np))
+ continue;
if (!of_property_read_bool(np, "msi-controller")) {
pr_warn("%pOF: no msi-controller property, ITS ignored\n",
np);
MPIDR_TO_SGI_RS(cluster_id) |
tlist << ICC_SGI1R_TARGET_LIST_SHIFT);
- pr_debug("CPU%d: ICC_SGI1R_EL1 %llx\n", smp_processor_id(), val);
+ pr_devel("CPU%d: ICC_SGI1R_EL1 %llx\n", smp_processor_id(), val);
gic_write_sgi1r(val);
}
* Ensure that stores to Normal memory are visible to the
* other CPUs before issuing the IPI.
*/
- smp_wmb();
+ wmb();
for_each_cpu(cpu, mask) {
u64 cluster_id = MPIDR_TO_SGI_CLUSTER_ID(cpu_logical_map(cpu));
spin_lock_irqsave(&gic_lock, flags);
write_gic_map_pin(intr, GIC_MAP_PIN_MAP_TO_PIN | gic_cpu_pin);
write_gic_map_vp(intr, BIT(mips_cm_vp_id(cpu)));
- gic_clear_pcpu_masks(intr);
- set_bit(intr, per_cpu_ptr(pcpu_masks, cpu));
irq_data_update_effective_affinity(data, cpumask_of(cpu));
spin_unlock_irqrestore(&gic_lock, flags);
static int
data_sock_getname(struct socket *sock, struct sockaddr *addr,
- int *addr_len, int peer)
+ int peer)
{
struct sockaddr_mISDN *maddr = (struct sockaddr_mISDN *) addr;
struct sock *sk = sock->sk;
lock_sock(sk);
- *addr_len = sizeof(*maddr);
maddr->family = AF_ISDN;
maddr->dev = _pms(sk)->dev->id;
maddr->channel = _pms(sk)->ch.nr;
maddr->sapi = _pms(sk)->ch.addr & 0xff;
maddr->tei = (_pms(sk)->ch.addr >> 8) & 0xff;
release_sock(sk);
- return 0;
+ return sizeof(*maddr);
}
static const struct proto_ops data_sock_ops = {
dev->ofdev.dev.of_node = np;
dev->ofdev.archdata.dma_mask = 0xffffffffUL;
dev->ofdev.dev.dma_mask = &dev->ofdev.archdata.dma_mask;
+ dev->ofdev.dev.coherent_dma_mask = dev->ofdev.archdata.dma_mask;
dev->ofdev.dev.parent = parent;
dev->ofdev.dev.bus = &macio_bus_type;
dev->ofdev.dev.release = macio_release_dev;
queue_io(md, bio);
} else {
/* done with normal IO or empty flush */
- bio->bi_status = io_error;
+ if (io_error)
+ bio->bi_status = io_error;
bio_endio(bio);
}
}
__FILE__, __LINE__, iocnum);
return -ENODEV;
}
+ if (karg.hdr.id >= MPT_MAX_FC_DEVICES)
+ return -EINVAL;
dctlprintk(ioc, printk(MYIOC_s_DEBUG_FMT "mptctl_hp_targetinfo called.\n",
ioc->name));
goto out;
}
- if (bus->dev_state == MEI_DEV_POWER_DOWN) {
- dev_dbg(bus->dev, "Device is powering down, don't bother with disconnection\n");
- err = 0;
- goto out;
- }
-
err = mei_cl_disconnect(cl);
if (err < 0)
dev_err(bus->dev, "Could not disconnect from the ME client\n");
return 0;
}
+ if (dev->dev_state == MEI_DEV_POWER_DOWN) {
+ cl_dbg(dev, cl, "Device is powering down, don't bother with disconnection\n");
+ mei_cl_set_disconnected(cl);
+ return 0;
+ }
+
rets = pm_runtime_get(dev->dev);
if (rets < 0 && rets != -EINPROGRESS) {
pm_runtime_put_noidle(dev->dev);
#define MEI_DEV_ID_KBP 0xA2BA /* Kaby Point */
#define MEI_DEV_ID_KBP_2 0xA2BB /* Kaby Point 2 */
+#define MEI_DEV_ID_CNP_LP 0x9DE0 /* Cannon Point LP */
+#define MEI_DEV_ID_CNP_LP_4 0x9DE4 /* Cannon Point LP 4 (iTouch) */
+#define MEI_DEV_ID_CNP_H 0xA360 /* Cannon Point H */
+#define MEI_DEV_ID_CNP_H_4 0xA364 /* Cannon Point H 4 (iTouch) */
+
/*
* MEI HW Section
*/
{MEI_PCI_DEVICE(MEI_DEV_ID_KBP, MEI_ME_PCH8_CFG)},
{MEI_PCI_DEVICE(MEI_DEV_ID_KBP_2, MEI_ME_PCH8_CFG)},
+ {MEI_PCI_DEVICE(MEI_DEV_ID_CNP_LP, MEI_ME_PCH8_CFG)},
+ {MEI_PCI_DEVICE(MEI_DEV_ID_CNP_LP_4, MEI_ME_PCH8_CFG)},
+ {MEI_PCI_DEVICE(MEI_DEV_ID_CNP_H, MEI_ME_PCH8_CFG)},
+ {MEI_PCI_DEVICE(MEI_DEV_ID_CNP_H_4, MEI_ME_PCH8_CFG)},
+
/* required last entry */
{0, }
};
struct ocxl_context *ctx = file->private_data;
struct ocxl_kernel_event_header header;
ssize_t rc;
- size_t used = 0;
+ ssize_t used = 0;
DEFINE_WAIT(event_wait);
memset(&header, 0, sizeof(header));
char pio_limit_string[20];
int ret;
- mmc->f_max = host->max_clk;
+ if (!mmc->f_max || mmc->f_max > host->max_clk)
+ mmc->f_max = host->max_clk;
mmc->f_min = host->max_clk / SDCDIV_MAX_CDIV;
mmc->max_busy_timeout = ~0 / (mmc->f_max / 1000);
static int meson_mmc_execute_tuning(struct mmc_host *mmc, u32 opcode)
{
struct meson_host *host = mmc_priv(mmc);
- int ret;
-
- /*
- * If this is the initial tuning, try to get a sane Rx starting
- * phase before doing the actual tuning.
- */
- if (!mmc->doing_retune) {
- ret = meson_mmc_clk_phase_tuning(mmc, opcode, host->rx_clk);
-
- if (ret)
- return ret;
- }
-
- ret = meson_mmc_clk_phase_tuning(mmc, opcode, host->tx_clk);
- if (ret)
- return ret;
return meson_mmc_clk_phase_tuning(mmc, opcode, host->rx_clk);
}
if (!IS_ERR(mmc->supply.vmmc))
mmc_regulator_set_ocr(mmc, mmc->supply.vmmc, ios->vdd);
- /* Reset phases */
+ /* Reset rx phase */
clk_set_phase(host->rx_clk, 0);
- clk_set_phase(host->tx_clk, 270);
break;
tristate "NAND controller support on Marvell boards"
depends on PXA3xx || ARCH_MMP || PLAT_ORION || ARCH_MVEBU || \
COMPILE_TEST
- depends on HAS_IOMEM
+ depends on HAS_IOMEM && HAS_DMA
help
This enables the NAND flash controller driver for Marvell boards,
including:
if (mtd->oobsize > 64)
mtd->oobsize = 64;
- /*
- * mtd->ecclayout is not specified here because we're using the
- * default large page ECC layout defined in NAND core.
- */
+ /* Use default large page ECC layout defined in NAND core */
+ mtd_set_ooblayout(mtd, &nand_ooblayout_lp_ops);
if (chip->ecc.strength == 32) {
nfc->ecc_mode = ECC_60_BYTE;
chip->ecc.bytes = 60;
config IPVLAN
tristate "IP-VLAN support"
depends on INET
- depends on IPV6
+ depends on IPV6 || !IPV6
depends on NETFILTER
- depends on NET_L3_MASTER_DEV
+ select NET_L3_MASTER_DEV
---help---
This allows one to create virtual devices off of a main interface
and packets will be delivered based on the dest L3 (IPv6/IPv4 addr)
#endif
#ifdef CONFIG_MVME147_NET /* MVME147 internal Ethernet */
{mvme147lance_probe, 0},
-#endif
-#ifdef CONFIG_MAC8390 /* NuBus NS8390-based cards */
- {mac8390_probe, 0},
-#endif
-#ifdef CONFIG_MAC89x0
- {mac89x0_probe, 0},
#endif
{NULL, 0},
};
.exit = bond_net_exit,
.id = &bond_net_id,
.size = sizeof(struct bond_net),
+ .async = true,
};
static int __init bonding_init(void)
}
EXPORT_SYMBOL(b53_get_ethtool_stats);
-int b53_get_sset_count(struct dsa_switch *ds)
+int b53_get_sset_count(struct dsa_switch *ds, int port)
{
struct b53_device *dev = ds->priv;
int b53_configure_vlan(struct dsa_switch *ds);
void b53_get_strings(struct dsa_switch *ds, int port, uint8_t *data);
void b53_get_ethtool_stats(struct dsa_switch *ds, int port, uint64_t *data);
-int b53_get_sset_count(struct dsa_switch *ds);
+int b53_get_sset_count(struct dsa_switch *ds, int port);
int b53_br_join(struct dsa_switch *ds, int port, struct net_device *bridge);
void b53_br_leave(struct dsa_switch *ds, int port, struct net_device *bridge);
void b53_br_set_stp_state(struct dsa_switch *ds, int port, u8 state);
return 0;
}
-static int dsa_loop_get_sset_count(struct dsa_switch *ds)
+static int dsa_loop_get_sset_count(struct dsa_switch *ds, int port)
{
return __DSA_LOOP_CNT_MAX;
}
}
}
-static int lan9303_get_sset_count(struct dsa_switch *ds)
+static int lan9303_get_sset_count(struct dsa_switch *ds, int port)
{
return ARRAY_SIZE(lan9303_mib);
}
ksz_port_cfg(dev, port, REG_PORT_CTRL_0, PORT_MAC_LOOPBACK, true);
}
-static int ksz_sset_count(struct dsa_switch *ds)
+static int ksz_sset_count(struct dsa_switch *ds, int port)
{
return TOTAL_SWITCH_COUNTER_NUM;
}
}
static int
-mt7530_get_sset_count(struct dsa_switch *ds)
+mt7530_get_sset_count(struct dsa_switch *ds, int port)
{
return ARRAY_SIZE(mt7530_mib);
}
It is required on most chips. If the chip you compile the support for
doesn't have such registers set, say N here. In doubt, say Y.
+
+config NET_DSA_MV88E6XXX_PTP
+ bool "PTP support for Marvell 88E6xxx"
+ default n
+ depends on NET_DSA_MV88E6XXX_GLOBAL2
+ imply NETWORK_PHY_TIMESTAMPING
+ imply PTP_1588_CLOCK
+ help
+ Say Y to enable PTP hardware timestamping on Marvell 88E6xxx switch
+ chips that support it.
mv88e6xxx-objs += global1_atu.o
mv88e6xxx-objs += global1_vtu.o
mv88e6xxx-$(CONFIG_NET_DSA_MV88E6XXX_GLOBAL2) += global2.o
+mv88e6xxx-$(CONFIG_NET_DSA_MV88E6XXX_GLOBAL2) += global2_avb.o
+mv88e6xxx-$(CONFIG_NET_DSA_MV88E6XXX_GLOBAL2) += global2_scratch.o
+mv88e6xxx-$(CONFIG_NET_DSA_MV88E6XXX_PTP) += hwtstamp.o
mv88e6xxx-objs += phy.o
mv88e6xxx-objs += port.o
+mv88e6xxx-$(CONFIG_NET_DSA_MV88E6XXX_PTP) += ptp.o
mv88e6xxx-objs += serdes.o
#include "chip.h"
#include "global1.h"
#include "global2.h"
+#include "hwtstamp.h"
#include "phy.h"
#include "port.h"
+#include "ptp.h"
#include "serdes.h"
static void assert_reg_lock(struct mv88e6xxx_chip *chip)
chip->g1_irq.masked &= ~(1 << n);
}
-static irqreturn_t mv88e6xxx_g1_irq_thread_fn(int irq, void *dev_id)
+static irqreturn_t mv88e6xxx_g1_irq_thread_work(struct mv88e6xxx_chip *chip)
{
- struct mv88e6xxx_chip *chip = dev_id;
unsigned int nhandled = 0;
unsigned int sub_irq;
unsigned int n;
return (nhandled > 0 ? IRQ_HANDLED : IRQ_NONE);
}
+static irqreturn_t mv88e6xxx_g1_irq_thread_fn(int irq, void *dev_id)
+{
+ struct mv88e6xxx_chip *chip = dev_id;
+
+ return mv88e6xxx_g1_irq_thread_work(chip);
+}
+
static void mv88e6xxx_g1_irq_bus_lock(struct irq_data *d)
{
struct mv88e6xxx_chip *chip = irq_data_get_irq_chip_data(d);
.xlate = irq_domain_xlate_twocell,
};
-static void mv88e6xxx_g1_irq_free(struct mv88e6xxx_chip *chip)
+static void mv88e6xxx_g1_irq_free_common(struct mv88e6xxx_chip *chip)
{
int irq, virq;
u16 mask;
mask &= ~GENMASK(chip->g1_irq.nirqs, 0);
mv88e6xxx_g1_write(chip, MV88E6XXX_G1_CTL1, mask);
- free_irq(chip->irq, chip);
-
for (irq = 0; irq < chip->g1_irq.nirqs; irq++) {
virq = irq_find_mapping(chip->g1_irq.domain, irq);
irq_dispose_mapping(virq);
irq_domain_remove(chip->g1_irq.domain);
}
-static int mv88e6xxx_g1_irq_setup(struct mv88e6xxx_chip *chip)
+static void mv88e6xxx_g1_irq_free(struct mv88e6xxx_chip *chip)
+{
+ mv88e6xxx_g1_irq_free(chip);
+
+ free_irq(chip->irq, chip);
+}
+
+static int mv88e6xxx_g1_irq_setup_common(struct mv88e6xxx_chip *chip)
{
int err, irq, virq;
u16 reg, mask;
if (err)
goto out_disable;
- err = request_threaded_irq(chip->irq, NULL,
- mv88e6xxx_g1_irq_thread_fn,
- IRQF_ONESHOT | IRQF_TRIGGER_FALLING,
- dev_name(chip->dev), chip);
- if (err)
- goto out_disable;
-
return 0;
out_disable:
return err;
}
+static int mv88e6xxx_g1_irq_setup(struct mv88e6xxx_chip *chip)
+{
+ int err;
+
+ err = mv88e6xxx_g1_irq_setup_common(chip);
+ if (err)
+ return err;
+
+ err = request_threaded_irq(chip->irq, NULL,
+ mv88e6xxx_g1_irq_thread_fn,
+ IRQF_ONESHOT | IRQF_TRIGGER_FALLING,
+ dev_name(chip->dev), chip);
+ if (err)
+ mv88e6xxx_g1_irq_free_common(chip);
+
+ return err;
+}
+
+static void mv88e6xxx_irq_poll(struct kthread_work *work)
+{
+ struct mv88e6xxx_chip *chip = container_of(work,
+ struct mv88e6xxx_chip,
+ irq_poll_work.work);
+ mv88e6xxx_g1_irq_thread_work(chip);
+
+ kthread_queue_delayed_work(chip->kworker, &chip->irq_poll_work,
+ msecs_to_jiffies(100));
+}
+
+static int mv88e6xxx_irq_poll_setup(struct mv88e6xxx_chip *chip)
+{
+ int err;
+
+ err = mv88e6xxx_g1_irq_setup_common(chip);
+ if (err)
+ return err;
+
+ kthread_init_delayed_work(&chip->irq_poll_work,
+ mv88e6xxx_irq_poll);
+
+ chip->kworker = kthread_create_worker(0, dev_name(chip->dev));
+ if (IS_ERR(chip->kworker))
+ return PTR_ERR(chip->kworker);
+
+ kthread_queue_delayed_work(chip->kworker, &chip->irq_poll_work,
+ msecs_to_jiffies(100));
+
+ return 0;
+}
+
+static void mv88e6xxx_irq_poll_free(struct mv88e6xxx_chip *chip)
+{
+ kthread_cancel_delayed_work_sync(&chip->irq_poll_work);
+ kthread_destroy_worker(chip->kworker);
+}
+
int mv88e6xxx_wait(struct mv88e6xxx_chip *chip, int addr, int reg, u16 mask)
{
int i;
return UINT64_MAX;
low = reg;
- if (s->sizeof_stat == 4) {
+ if (s->size == 4) {
err = mv88e6xxx_port_read(chip, port, s->reg + 1, ®);
if (err)
return UINT64_MAX;
case STATS_TYPE_BANK0:
reg |= s->reg | histogram;
mv88e6xxx_g1_stats_read(chip, reg, &low);
- if (s->sizeof_stat == 8)
+ if (s->size == 8)
mv88e6xxx_g1_stats_read(chip, reg + 1, &high);
break;
default:
return value;
}
-static void mv88e6xxx_stats_get_strings(struct mv88e6xxx_chip *chip,
- uint8_t *data, int types)
+static int mv88e6xxx_stats_get_strings(struct mv88e6xxx_chip *chip,
+ uint8_t *data, int types)
{
struct mv88e6xxx_hw_stat *stat;
int i, j;
j++;
}
}
+
+ return j;
}
-static void mv88e6095_stats_get_strings(struct mv88e6xxx_chip *chip,
- uint8_t *data)
+static int mv88e6095_stats_get_strings(struct mv88e6xxx_chip *chip,
+ uint8_t *data)
{
- mv88e6xxx_stats_get_strings(chip, data,
- STATS_TYPE_BANK0 | STATS_TYPE_PORT);
+ return mv88e6xxx_stats_get_strings(chip, data,
+ STATS_TYPE_BANK0 | STATS_TYPE_PORT);
}
-static void mv88e6320_stats_get_strings(struct mv88e6xxx_chip *chip,
- uint8_t *data)
+static int mv88e6320_stats_get_strings(struct mv88e6xxx_chip *chip,
+ uint8_t *data)
{
- mv88e6xxx_stats_get_strings(chip, data,
- STATS_TYPE_BANK0 | STATS_TYPE_BANK1);
+ return mv88e6xxx_stats_get_strings(chip, data,
+ STATS_TYPE_BANK0 | STATS_TYPE_BANK1);
}
static void mv88e6xxx_get_strings(struct dsa_switch *ds, int port,
uint8_t *data)
{
struct mv88e6xxx_chip *chip = ds->priv;
+ int count = 0;
+
+ mutex_lock(&chip->reg_lock);
if (chip->info->ops->stats_get_strings)
- chip->info->ops->stats_get_strings(chip, data);
+ count = chip->info->ops->stats_get_strings(chip, data);
+
+ if (chip->info->ops->serdes_get_strings) {
+ data += count * ETH_GSTRING_LEN;
+ chip->info->ops->serdes_get_strings(chip, port, data);
+ }
+
+ mutex_unlock(&chip->reg_lock);
}
static int mv88e6xxx_stats_get_sset_count(struct mv88e6xxx_chip *chip,
STATS_TYPE_BANK1);
}
-static int mv88e6xxx_get_sset_count(struct dsa_switch *ds)
+static int mv88e6xxx_get_sset_count(struct dsa_switch *ds, int port)
{
struct mv88e6xxx_chip *chip = ds->priv;
+ int serdes_count = 0;
+ int count = 0;
+ mutex_lock(&chip->reg_lock);
if (chip->info->ops->stats_get_sset_count)
- return chip->info->ops->stats_get_sset_count(chip);
+ count = chip->info->ops->stats_get_sset_count(chip);
+ if (count < 0)
+ goto out;
- return 0;
+ if (chip->info->ops->serdes_get_sset_count)
+ serdes_count = chip->info->ops->serdes_get_sset_count(chip,
+ port);
+ if (serdes_count < 0)
+ count = serdes_count;
+ else
+ count += serdes_count;
+out:
+ mutex_unlock(&chip->reg_lock);
+
+ return count;
}
-static void mv88e6xxx_stats_get_stats(struct mv88e6xxx_chip *chip, int port,
- uint64_t *data, int types,
- u16 bank1_select, u16 histogram)
+static int mv88e6xxx_stats_get_stats(struct mv88e6xxx_chip *chip, int port,
+ uint64_t *data, int types,
+ u16 bank1_select, u16 histogram)
{
struct mv88e6xxx_hw_stat *stat;
int i, j;
for (i = 0, j = 0; i < ARRAY_SIZE(mv88e6xxx_hw_stats); i++) {
stat = &mv88e6xxx_hw_stats[i];
if (stat->type & types) {
+ mutex_lock(&chip->reg_lock);
data[j] = _mv88e6xxx_get_ethtool_stat(chip, stat, port,
bank1_select,
histogram);
+ mutex_unlock(&chip->reg_lock);
+
j++;
}
}
+ return j;
}
-static void mv88e6095_stats_get_stats(struct mv88e6xxx_chip *chip, int port,
- uint64_t *data)
+static int mv88e6095_stats_get_stats(struct mv88e6xxx_chip *chip, int port,
+ uint64_t *data)
{
return mv88e6xxx_stats_get_stats(chip, port, data,
STATS_TYPE_BANK0 | STATS_TYPE_PORT,
0, MV88E6XXX_G1_STATS_OP_HIST_RX_TX);
}
-static void mv88e6320_stats_get_stats(struct mv88e6xxx_chip *chip, int port,
- uint64_t *data)
+static int mv88e6320_stats_get_stats(struct mv88e6xxx_chip *chip, int port,
+ uint64_t *data)
{
return mv88e6xxx_stats_get_stats(chip, port, data,
STATS_TYPE_BANK0 | STATS_TYPE_BANK1,
MV88E6XXX_G1_STATS_OP_HIST_RX_TX);
}
-static void mv88e6390_stats_get_stats(struct mv88e6xxx_chip *chip, int port,
- uint64_t *data)
+static int mv88e6390_stats_get_stats(struct mv88e6xxx_chip *chip, int port,
+ uint64_t *data)
{
return mv88e6xxx_stats_get_stats(chip, port, data,
STATS_TYPE_BANK0 | STATS_TYPE_BANK1,
static void mv88e6xxx_get_stats(struct mv88e6xxx_chip *chip, int port,
uint64_t *data)
{
+ int count = 0;
+
if (chip->info->ops->stats_get_stats)
- chip->info->ops->stats_get_stats(chip, port, data);
+ count = chip->info->ops->stats_get_stats(chip, port, data);
+
+ if (chip->info->ops->serdes_get_stats) {
+ data += count;
+ chip->info->ops->serdes_get_stats(chip, port, data);
+ }
}
static void mv88e6xxx_get_ethtool_stats(struct dsa_switch *ds, int port,
mutex_lock(&chip->reg_lock);
ret = mv88e6xxx_stats_snapshot(chip, port);
- if (ret < 0) {
- mutex_unlock(&chip->reg_lock);
+ mutex_unlock(&chip->reg_lock);
+
+ if (ret < 0)
return;
- }
mv88e6xxx_get_stats(chip, port, data);
- mutex_unlock(&chip->reg_lock);
}
static int mv88e6xxx_stats_set_histogram(struct mv88e6xxx_chip *chip)
eth_broadcast_addr(addr.mac);
do {
+ mutex_lock(&chip->reg_lock);
err = mv88e6xxx_g1_atu_getnext(chip, fid, &addr);
+ mutex_unlock(&chip->reg_lock);
if (err)
return err;
int err;
/* Dump port's default Filtering Information Database (VLAN ID 0) */
+ mutex_lock(&chip->reg_lock);
err = mv88e6xxx_port_get_fid(chip, port, &fid);
+ mutex_unlock(&chip->reg_lock);
+
if (err)
return err;
/* Dump VLANs' Filtering Information Databases */
do {
+ mutex_lock(&chip->reg_lock);
err = mv88e6xxx_vtu_getnext(chip, &vlan);
+ mutex_unlock(&chip->reg_lock);
if (err)
return err;
dsa_fdb_dump_cb_t *cb, void *data)
{
struct mv88e6xxx_chip *chip = ds->priv;
- int err;
- mutex_lock(&chip->reg_lock);
- err = mv88e6xxx_port_db_dump(chip, port, cb, data);
- mutex_unlock(&chip->reg_lock);
-
- return err;
+ return mv88e6xxx_port_db_dump(chip, port, cb, data);
}
static int mv88e6xxx_bridge_map(struct mv88e6xxx_chip *chip,
if (err)
goto unlock;
+ /* Setup PTP Hardware Clock and timestamping */
+ if (chip->info->ptp_support) {
+ err = mv88e6xxx_ptp_setup(chip);
+ if (err)
+ goto unlock;
+
+ err = mv88e6xxx_hwtstamp_setup(chip);
+ if (err)
+ goto unlock;
+ }
+
unlock:
mutex_unlock(&chip->reg_lock);
struct mii_bus *bus;
int err;
+ if (external) {
+ mutex_lock(&chip->reg_lock);
+ err = mv88e6xxx_g2_scratch_gpio_set_smi(chip, true);
+ mutex_unlock(&chip->reg_lock);
+
+ if (err)
+ return err;
+ }
+
bus = devm_mdiobus_alloc_size(chip->dev, sizeof(*mdio_bus));
if (!bus)
return -ENOMEM;
.reset = mv88e6352_g1_reset,
.vtu_getnext = mv88e6352_g1_vtu_getnext,
.vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
+ .gpio_ops = &mv88e6352_gpio_ops,
};
static const struct mv88e6xxx_ops mv88e6161_ops = {
.vtu_getnext = mv88e6352_g1_vtu_getnext,
.vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
.serdes_power = mv88e6352_serdes_power,
+ .gpio_ops = &mv88e6352_gpio_ops,
};
static const struct mv88e6xxx_ops mv88e6175_ops = {
.vtu_getnext = mv88e6352_g1_vtu_getnext,
.vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
.serdes_power = mv88e6352_serdes_power,
+ .gpio_ops = &mv88e6352_gpio_ops,
};
static const struct mv88e6xxx_ops mv88e6185_ops = {
.vtu_getnext = mv88e6390_g1_vtu_getnext,
.vtu_loadpurge = mv88e6390_g1_vtu_loadpurge,
.serdes_power = mv88e6390_serdes_power,
+ .gpio_ops = &mv88e6352_gpio_ops,
};
static const struct mv88e6xxx_ops mv88e6190x_ops = {
.vtu_getnext = mv88e6390_g1_vtu_getnext,
.vtu_loadpurge = mv88e6390_g1_vtu_loadpurge,
.serdes_power = mv88e6390_serdes_power,
+ .gpio_ops = &mv88e6352_gpio_ops,
};
static const struct mv88e6xxx_ops mv88e6191_ops = {
.vtu_getnext = mv88e6352_g1_vtu_getnext,
.vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
.serdes_power = mv88e6352_serdes_power,
+ .gpio_ops = &mv88e6352_gpio_ops,
+ .avb_ops = &mv88e6352_avb_ops,
};
static const struct mv88e6xxx_ops mv88e6290_ops = {
.vtu_getnext = mv88e6390_g1_vtu_getnext,
.vtu_loadpurge = mv88e6390_g1_vtu_loadpurge,
.serdes_power = mv88e6390_serdes_power,
+ .gpio_ops = &mv88e6352_gpio_ops,
+ .avb_ops = &mv88e6390_avb_ops,
};
static const struct mv88e6xxx_ops mv88e6320_ops = {
.reset = mv88e6352_g1_reset,
.vtu_getnext = mv88e6185_g1_vtu_getnext,
.vtu_loadpurge = mv88e6185_g1_vtu_loadpurge,
+ .gpio_ops = &mv88e6352_gpio_ops,
+ .avb_ops = &mv88e6352_avb_ops,
};
static const struct mv88e6xxx_ops mv88e6321_ops = {
.reset = mv88e6352_g1_reset,
.vtu_getnext = mv88e6185_g1_vtu_getnext,
.vtu_loadpurge = mv88e6185_g1_vtu_loadpurge,
+ .gpio_ops = &mv88e6352_gpio_ops,
+ .avb_ops = &mv88e6352_avb_ops,
};
static const struct mv88e6xxx_ops mv88e6341_ops = {
.reset = mv88e6352_g1_reset,
.vtu_getnext = mv88e6352_g1_vtu_getnext,
.vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
+ .gpio_ops = &mv88e6352_gpio_ops,
+ .avb_ops = &mv88e6390_avb_ops,
};
static const struct mv88e6xxx_ops mv88e6350_ops = {
.reset = mv88e6352_g1_reset,
.vtu_getnext = mv88e6352_g1_vtu_getnext,
.vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
+ .avb_ops = &mv88e6352_avb_ops,
};
static const struct mv88e6xxx_ops mv88e6352_ops = {
.vtu_getnext = mv88e6352_g1_vtu_getnext,
.vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
.serdes_power = mv88e6352_serdes_power,
+ .gpio_ops = &mv88e6352_gpio_ops,
+ .avb_ops = &mv88e6352_avb_ops,
+ .serdes_get_sset_count = mv88e6352_serdes_get_sset_count,
+ .serdes_get_strings = mv88e6352_serdes_get_strings,
+ .serdes_get_stats = mv88e6352_serdes_get_stats,
};
static const struct mv88e6xxx_ops mv88e6390_ops = {
.vtu_getnext = mv88e6390_g1_vtu_getnext,
.vtu_loadpurge = mv88e6390_g1_vtu_loadpurge,
.serdes_power = mv88e6390_serdes_power,
+ .gpio_ops = &mv88e6352_gpio_ops,
+ .avb_ops = &mv88e6390_avb_ops,
};
static const struct mv88e6xxx_ops mv88e6390x_ops = {
.vtu_getnext = mv88e6390_g1_vtu_getnext,
.vtu_loadpurge = mv88e6390_g1_vtu_loadpurge,
.serdes_power = mv88e6390_serdes_power,
+ .gpio_ops = &mv88e6352_gpio_ops,
+ .avb_ops = &mv88e6390_avb_ops,
};
static const struct mv88e6xxx_info mv88e6xxx_table[] = {
.name = "Marvell 88E6341",
.num_databases = 4096,
.num_ports = 6,
+ .num_gpio = 11,
.max_vid = 4095,
.port_base_addr = 0x10,
.global1_addr = 0x1b,
.name = "Marvell 88E6172",
.num_databases = 4096,
.num_ports = 7,
+ .num_gpio = 15,
.max_vid = 4095,
.port_base_addr = 0x10,
.global1_addr = 0x1b,
.name = "Marvell 88E6176",
.num_databases = 4096,
.num_ports = 7,
+ .num_gpio = 15,
.max_vid = 4095,
.port_base_addr = 0x10,
.global1_addr = 0x1b,
.name = "Marvell 88E6190",
.num_databases = 4096,
.num_ports = 11, /* 10 + Z80 */
+ .num_gpio = 16,
.max_vid = 8191,
.port_base_addr = 0x0,
.global1_addr = 0x1b,
.name = "Marvell 88E6190X",
.num_databases = 4096,
.num_ports = 11, /* 10 + Z80 */
+ .num_gpio = 16,
.max_vid = 8191,
.port_base_addr = 0x0,
.global1_addr = 0x1b,
.pvt = true,
.multi_chip = true,
.tag_protocol = DSA_TAG_PROTO_DSA,
+ .ptp_support = true,
.ops = &mv88e6191_ops,
},
.name = "Marvell 88E6240",
.num_databases = 4096,
.num_ports = 7,
+ .num_gpio = 15,
.max_vid = 4095,
.port_base_addr = 0x10,
.global1_addr = 0x1b,
.pvt = true,
.multi_chip = true,
.tag_protocol = DSA_TAG_PROTO_EDSA,
+ .ptp_support = true,
.ops = &mv88e6240_ops,
},
.name = "Marvell 88E6290",
.num_databases = 4096,
.num_ports = 11, /* 10 + Z80 */
+ .num_gpio = 16,
.max_vid = 8191,
.port_base_addr = 0x0,
.global1_addr = 0x1b,
.pvt = true,
.multi_chip = true,
.tag_protocol = DSA_TAG_PROTO_DSA,
+ .ptp_support = true,
.ops = &mv88e6290_ops,
},
.name = "Marvell 88E6320",
.num_databases = 4096,
.num_ports = 7,
+ .num_gpio = 15,
.max_vid = 4095,
.port_base_addr = 0x10,
.global1_addr = 0x1b,
.pvt = true,
.multi_chip = true,
.tag_protocol = DSA_TAG_PROTO_EDSA,
+ .ptp_support = true,
.ops = &mv88e6320_ops,
},
.name = "Marvell 88E6321",
.num_databases = 4096,
.num_ports = 7,
+ .num_gpio = 15,
.max_vid = 4095,
.port_base_addr = 0x10,
.global1_addr = 0x1b,
.atu_move_port_mask = 0xf,
.multi_chip = true,
.tag_protocol = DSA_TAG_PROTO_EDSA,
+ .ptp_support = true,
.ops = &mv88e6321_ops,
},
.name = "Marvell 88E6341",
.num_databases = 4096,
.num_ports = 6,
+ .num_gpio = 11,
.max_vid = 4095,
.port_base_addr = 0x10,
.global1_addr = 0x1b,
.pvt = true,
.multi_chip = true,
.tag_protocol = DSA_TAG_PROTO_EDSA,
+ .ptp_support = true,
.ops = &mv88e6341_ops,
},
.name = "Marvell 88E6352",
.num_databases = 4096,
.num_ports = 7,
+ .num_gpio = 15,
.max_vid = 4095,
.port_base_addr = 0x10,
.global1_addr = 0x1b,
.pvt = true,
.multi_chip = true,
.tag_protocol = DSA_TAG_PROTO_EDSA,
+ .ptp_support = true,
.ops = &mv88e6352_ops,
},
[MV88E6390] = {
.name = "Marvell 88E6390",
.num_databases = 4096,
.num_ports = 11, /* 10 + Z80 */
+ .num_gpio = 16,
.max_vid = 8191,
.port_base_addr = 0x0,
.global1_addr = 0x1b,
.pvt = true,
.multi_chip = true,
.tag_protocol = DSA_TAG_PROTO_DSA,
+ .ptp_support = true,
.ops = &mv88e6390_ops,
},
[MV88E6390X] = {
.name = "Marvell 88E6390X",
.num_databases = 4096,
.num_ports = 11, /* 10 + Z80 */
+ .num_gpio = 16,
.max_vid = 8191,
.port_base_addr = 0x0,
.global1_addr = 0x1b,
.pvt = true,
.multi_chip = true,
.tag_protocol = DSA_TAG_PROTO_DSA,
+ .ptp_support = true,
.ops = &mv88e6390x_ops,
},
};
.port_mdb_del = mv88e6xxx_port_mdb_del,
.crosschip_bridge_join = mv88e6xxx_crosschip_bridge_join,
.crosschip_bridge_leave = mv88e6xxx_crosschip_bridge_leave,
+ .port_hwtstamp_set = mv88e6xxx_port_hwtstamp_set,
+ .port_hwtstamp_get = mv88e6xxx_port_hwtstamp_get,
+ .port_txtstamp = mv88e6xxx_port_txtstamp,
+ .port_rxtstamp = mv88e6xxx_port_rxtstamp,
+ .get_ts_info = mv88e6xxx_get_ts_info,
};
static struct dsa_switch_driver mv88e6xxx_switch_drv = {
goto out;
}
- if (chip->irq > 0) {
- /* Has to be performed before the MDIO bus is created,
- * because the PHYs will link there interrupts to these
- * interrupt controllers
- */
- mutex_lock(&chip->reg_lock);
+ /* Has to be performed before the MDIO bus is created, because
+ * the PHYs will link there interrupts to these interrupt
+ * controllers
+ */
+ mutex_lock(&chip->reg_lock);
+ if (chip->irq > 0)
err = mv88e6xxx_g1_irq_setup(chip);
- mutex_unlock(&chip->reg_lock);
-
- if (err)
- goto out;
-
- if (chip->info->g2_irqs > 0) {
- err = mv88e6xxx_g2_irq_setup(chip);
- if (err)
- goto out_g1_irq;
- }
+ else
+ err = mv88e6xxx_irq_poll_setup(chip);
+ mutex_unlock(&chip->reg_lock);
- err = mv88e6xxx_g1_atu_prob_irq_setup(chip);
- if (err)
- goto out_g2_irq;
+ if (err)
+ goto out;
- err = mv88e6xxx_g1_vtu_prob_irq_setup(chip);
+ if (chip->info->g2_irqs > 0) {
+ err = mv88e6xxx_g2_irq_setup(chip);
if (err)
- goto out_g1_atu_prob_irq;
+ goto out_g1_irq;
}
+ err = mv88e6xxx_g1_atu_prob_irq_setup(chip);
+ if (err)
+ goto out_g2_irq;
+
+ err = mv88e6xxx_g1_vtu_prob_irq_setup(chip);
+ if (err)
+ goto out_g1_atu_prob_irq;
+
err = mv88e6xxx_mdios_register(chip, np);
if (err)
goto out_g1_vtu_prob_irq;
out_mdio:
mv88e6xxx_mdios_unregister(chip);
out_g1_vtu_prob_irq:
- if (chip->irq > 0)
- mv88e6xxx_g1_vtu_prob_irq_free(chip);
+ mv88e6xxx_g1_vtu_prob_irq_free(chip);
out_g1_atu_prob_irq:
- if (chip->irq > 0)
- mv88e6xxx_g1_atu_prob_irq_free(chip);
+ mv88e6xxx_g1_atu_prob_irq_free(chip);
out_g2_irq:
- if (chip->info->g2_irqs > 0 && chip->irq > 0)
+ if (chip->info->g2_irqs > 0)
mv88e6xxx_g2_irq_free(chip);
out_g1_irq:
- if (chip->irq > 0) {
- mutex_lock(&chip->reg_lock);
+ mutex_lock(&chip->reg_lock);
+ if (chip->irq > 0)
mv88e6xxx_g1_irq_free(chip);
- mutex_unlock(&chip->reg_lock);
- }
+ else
+ mv88e6xxx_irq_poll_free(chip);
+ mutex_unlock(&chip->reg_lock);
out:
return err;
}
struct dsa_switch *ds = dev_get_drvdata(&mdiodev->dev);
struct mv88e6xxx_chip *chip = ds->priv;
+ if (chip->info->ptp_support) {
+ mv88e6xxx_hwtstamp_free(chip);
+ mv88e6xxx_ptp_free(chip);
+ }
+
mv88e6xxx_phy_destroy(chip);
mv88e6xxx_unregister_switch(chip);
mv88e6xxx_mdios_unregister(chip);
#include <linux/if_vlan.h>
#include <linux/irq.h>
#include <linux/gpio/consumer.h>
+#include <linux/kthread.h>
#include <linux/phy.h>
+#include <linux/ptp_clock_kernel.h>
+#include <linux/timecounter.h>
#include <net/dsa.h>
#ifndef UINT64_MAX
#define MV88E6XXX_MAX_PVT_SWITCHES 32
#define MV88E6XXX_MAX_PVT_PORTS 16
+#define MV88E6XXX_MAX_GPIO 16
+
enum mv88e6xxx_egress_mode {
MV88E6XXX_EGRESS_MODE_UNMODIFIED,
MV88E6XXX_EGRESS_MODE_UNTAGGED,
const char *name;
unsigned int num_databases;
unsigned int num_ports;
+ unsigned int num_gpio;
unsigned int max_vid;
unsigned int port_base_addr;
unsigned int global1_addr;
*/
u8 atu_move_port_mask;
const struct mv88e6xxx_ops *ops;
+
+ /* Supports PTP */
+ bool ptp_support;
};
struct mv88e6xxx_atu_entry {
struct mv88e6xxx_bus_ops;
struct mv88e6xxx_irq_ops;
+struct mv88e6xxx_gpio_ops;
+struct mv88e6xxx_avb_ops;
struct mv88e6xxx_irq {
u16 masked;
unsigned int nirqs;
};
+/* state flags for mv88e6xxx_port_hwtstamp::state */
+enum {
+ MV88E6XXX_HWTSTAMP_ENABLED,
+ MV88E6XXX_HWTSTAMP_TX_IN_PROGRESS,
+};
+
+struct mv88e6xxx_port_hwtstamp {
+ /* Port index */
+ int port_id;
+
+ /* Timestamping state */
+ unsigned long state;
+
+ /* Resources for receive timestamping */
+ struct sk_buff_head rx_queue;
+ struct sk_buff_head rx_queue2;
+
+ /* Resources for transmit timestamping */
+ unsigned long tx_tstamp_start;
+ struct sk_buff *tx_skb;
+ u16 tx_seq_id;
+
+ /* Current timestamp configuration */
+ struct hwtstamp_config tstamp_config;
+};
+
+struct mv88e6xxx_port {
+ u64 serdes_stats[2];
+};
+
struct mv88e6xxx_chip {
const struct mv88e6xxx_info *info;
int irq;
int device_irq;
int watchdog_irq;
+
int atu_prob_irq;
int vtu_prob_irq;
+ struct kthread_worker *kworker;
+ struct kthread_delayed_work irq_poll_work;
+
+ /* GPIO resources */
+ u8 gpio_data[2];
+
+ /* This cyclecounter abstracts the switch PTP time.
+ * reg_lock must be held for any operation that read()s.
+ */
+ struct cyclecounter tstamp_cc;
+ struct timecounter tstamp_tc;
+ struct delayed_work overflow_work;
+
+ struct ptp_clock *ptp_clock;
+ struct ptp_clock_info ptp_clock_info;
+ struct delayed_work tai_event_work;
+ struct ptp_pin_desc pin_config[MV88E6XXX_MAX_GPIO];
+ u16 trig_config;
+ u16 evcap_config;
+
+ /* Per-port timestamping resources. */
+ struct mv88e6xxx_port_hwtstamp port_hwtstamp[DSA_MAX_PORTS];
+
+ /* Array of port structures. */
+ struct mv88e6xxx_port ports[DSA_MAX_PORTS];
};
struct mv88e6xxx_bus_ops {
/* Return the number of strings describing statistics */
int (*stats_get_sset_count)(struct mv88e6xxx_chip *chip);
- void (*stats_get_strings)(struct mv88e6xxx_chip *chip, uint8_t *data);
- void (*stats_get_stats)(struct mv88e6xxx_chip *chip, int port,
- uint64_t *data);
+ int (*stats_get_strings)(struct mv88e6xxx_chip *chip, uint8_t *data);
+ int (*stats_get_stats)(struct mv88e6xxx_chip *chip, int port,
+ uint64_t *data);
int (*set_cpu_port)(struct mv88e6xxx_chip *chip, int port);
int (*set_egress_port)(struct mv88e6xxx_chip *chip, int port);
const struct mv88e6xxx_irq_ops *watchdog_ops;
/* Power on/off a SERDES interface */
int (*serdes_power)(struct mv88e6xxx_chip *chip, int port, bool on);
+ /* Statistics from the SERDES interface */
+ int (*serdes_get_sset_count)(struct mv88e6xxx_chip *chip, int port);
+ void (*serdes_get_strings)(struct mv88e6xxx_chip *chip, int port,
+ uint8_t *data);
+ void (*serdes_get_stats)(struct mv88e6xxx_chip *chip, int port,
+ uint64_t *data);
+
/* VLAN Translation Unit operations */
int (*vtu_getnext)(struct mv88e6xxx_chip *chip,
struct mv88e6xxx_vtu_entry *entry);
int (*vtu_loadpurge)(struct mv88e6xxx_chip *chip,
struct mv88e6xxx_vtu_entry *entry);
+
+ /* GPIO operations */
+ const struct mv88e6xxx_gpio_ops *gpio_ops;
+
+ /* Interface to the AVB/PTP registers */
+ const struct mv88e6xxx_avb_ops *avb_ops;
};
struct mv88e6xxx_irq_ops {
void (*irq_free)(struct mv88e6xxx_chip *chip);
};
+struct mv88e6xxx_gpio_ops {
+ /* Get/set data on GPIO pin */
+ int (*get_data)(struct mv88e6xxx_chip *chip, unsigned int pin);
+ int (*set_data)(struct mv88e6xxx_chip *chip, unsigned int pin,
+ int value);
+
+ /* get/set GPIO direction */
+ int (*get_dir)(struct mv88e6xxx_chip *chip, unsigned int pin);
+ int (*set_dir)(struct mv88e6xxx_chip *chip, unsigned int pin,
+ bool input);
+
+ /* get/set GPIO pin control */
+ int (*get_pctl)(struct mv88e6xxx_chip *chip, unsigned int pin,
+ int *func);
+ int (*set_pctl)(struct mv88e6xxx_chip *chip, unsigned int pin,
+ int func);
+};
+
+struct mv88e6xxx_avb_ops {
+ /* Access port-scoped Precision Time Protocol registers */
+ int (*port_ptp_read)(struct mv88e6xxx_chip *chip, int port, int addr,
+ u16 *data, int len);
+ int (*port_ptp_write)(struct mv88e6xxx_chip *chip, int port, int addr,
+ u16 data);
+
+ /* Access global Precision Time Protocol registers */
+ int (*ptp_read)(struct mv88e6xxx_chip *chip, int addr, u16 *data,
+ int len);
+ int (*ptp_write)(struct mv88e6xxx_chip *chip, int addr, u16 data);
+
+ /* Access global Time Application Interface registers */
+ int (*tai_read)(struct mv88e6xxx_chip *chip, int addr, u16 *data,
+ int len);
+ int (*tai_write)(struct mv88e6xxx_chip *chip, int addr, u16 data);
+};
+
#define STATS_TYPE_PORT BIT(0)
#define STATS_TYPE_BANK0 BIT(1)
#define STATS_TYPE_BANK1 BIT(2)
struct mv88e6xxx_hw_stat {
char string[ETH_GSTRING_LEN];
- int sizeof_stat;
+ size_t size;
int reg;
int type;
};
return GENMASK(mv88e6xxx_num_ports(chip) - 1, 0);
}
+static inline unsigned int mv88e6xxx_num_gpio(struct mv88e6xxx_chip *chip)
+{
+ return chip->info->num_gpio;
+}
+
int mv88e6xxx_read(struct mv88e6xxx_chip *chip, int addr, int reg, u16 *val);
int mv88e6xxx_write(struct mv88e6xxx_chip *chip, int addr, int reg, u16 val);
int mv88e6xxx_update(struct mv88e6xxx_chip *chip, int addr, int reg,
#include "global1.h" /* for MV88E6XXX_G1_STS_IRQ_DEVICE */
#include "global2.h"
-static int mv88e6xxx_g2_read(struct mv88e6xxx_chip *chip, int reg, u16 *val)
+int mv88e6xxx_g2_read(struct mv88e6xxx_chip *chip, int reg, u16 *val)
{
return mv88e6xxx_read(chip, chip->info->global2_addr, reg, val);
}
-static int mv88e6xxx_g2_write(struct mv88e6xxx_chip *chip, int reg, u16 val)
+int mv88e6xxx_g2_write(struct mv88e6xxx_chip *chip, int reg, u16 val)
{
return mv88e6xxx_write(chip, chip->info->global2_addr, reg, val);
}
-static int mv88e6xxx_g2_update(struct mv88e6xxx_chip *chip, int reg, u16 update)
+int mv88e6xxx_g2_update(struct mv88e6xxx_chip *chip, int reg, u16 update)
{
return mv88e6xxx_update(chip, chip->info->global2_addr, reg, update);
}
-static int mv88e6xxx_g2_wait(struct mv88e6xxx_chip *chip, int reg, u16 mask)
+int mv88e6xxx_g2_wait(struct mv88e6xxx_chip *chip, int reg, u16 mask)
{
return mv88e6xxx_wait(chip, chip->info->global2_addr, reg, mask);
}
val);
}
+/* Offset 0x1B: Watchdog Control */
static int mv88e6097_watchdog_action(struct mv88e6xxx_chip *chip, int irq)
{
u16 reg;
#define MV88E6390_G2_EEPROM_ADDR_MASK 0xffff
/* Offset 0x16: AVB Command Register */
-#define MV88E6352_G2_AVB_CMD 0x16
+#define MV88E6352_G2_AVB_CMD 0x16
+#define MV88E6352_G2_AVB_CMD_BUSY 0x8000
+#define MV88E6352_G2_AVB_CMD_OP_READ 0x4000
+#define MV88E6352_G2_AVB_CMD_OP_READ_INCR 0x6000
+#define MV88E6352_G2_AVB_CMD_OP_WRITE 0x3000
+#define MV88E6390_G2_AVB_CMD_OP_READ 0x0000
+#define MV88E6390_G2_AVB_CMD_OP_READ_INCR 0x4000
+#define MV88E6390_G2_AVB_CMD_OP_WRITE 0x6000
+#define MV88E6352_G2_AVB_CMD_PORT_MASK 0x0f00
+#define MV88E6352_G2_AVB_CMD_PORT_TAIGLOBAL 0xe
+#define MV88E6352_G2_AVB_CMD_PORT_PTPGLOBAL 0xf
+#define MV88E6390_G2_AVB_CMD_PORT_MASK 0x1f00
+#define MV88E6390_G2_AVB_CMD_PORT_TAIGLOBAL 0x1e
+#define MV88E6390_G2_AVB_CMD_PORT_PTPGLOBAL 0x1f
+#define MV88E6352_G2_AVB_CMD_BLOCK_PTP 0
+#define MV88E6352_G2_AVB_CMD_BLOCK_AVB 1
+#define MV88E6352_G2_AVB_CMD_BLOCK_QAV 2
+#define MV88E6352_G2_AVB_CMD_BLOCK_QVB 3
+#define MV88E6352_G2_AVB_CMD_BLOCK_MASK 0x00e0
+#define MV88E6352_G2_AVB_CMD_ADDR_MASK 0x001f
/* Offset 0x17: AVB Data Register */
#define MV88E6352_G2_AVB_DATA 0x17
#define MV88E6352_G2_NOEGR_POLICY 0x2000
#define MV88E6390_G2_LAG_ID_4 0x2000
+/* Scratch/Misc registers accessed through MV88E6XXX_G2_SCRATCH_MISC */
+/* Offset 0x02: Misc Configuration */
+#define MV88E6352_G2_SCRATCH_MISC_CFG 0x02
+#define MV88E6352_G2_SCRATCH_MISC_CFG_NORMALSMI 0x80
+/* Offset 0x60-0x61: GPIO Configuration */
+#define MV88E6352_G2_SCRATCH_GPIO_CFG0 0x60
+#define MV88E6352_G2_SCRATCH_GPIO_CFG1 0x61
+/* Offset 0x62-0x63: GPIO Direction */
+#define MV88E6352_G2_SCRATCH_GPIO_DIR0 0x62
+#define MV88E6352_G2_SCRATCH_GPIO_DIR1 0x63
+#define MV88E6352_G2_SCRATCH_GPIO_DIR_OUT 0
+#define MV88E6352_G2_SCRATCH_GPIO_DIR_IN 1
+/* Offset 0x64-0x65: GPIO Data */
+#define MV88E6352_G2_SCRATCH_GPIO_DATA0 0x64
+#define MV88E6352_G2_SCRATCH_GPIO_DATA1 0x65
+/* Offset 0x68-0x6F: GPIO Pin Control */
+#define MV88E6352_G2_SCRATCH_GPIO_PCTL0 0x68
+#define MV88E6352_G2_SCRATCH_GPIO_PCTL1 0x69
+#define MV88E6352_G2_SCRATCH_GPIO_PCTL2 0x6A
+#define MV88E6352_G2_SCRATCH_GPIO_PCTL3 0x6B
+#define MV88E6352_G2_SCRATCH_GPIO_PCTL4 0x6C
+#define MV88E6352_G2_SCRATCH_GPIO_PCTL5 0x6D
+#define MV88E6352_G2_SCRATCH_GPIO_PCTL6 0x6E
+#define MV88E6352_G2_SCRATCH_GPIO_PCTL7 0x6F
+#define MV88E6352_G2_SCRATCH_CONFIG_DATA0 0x70
+#define MV88E6352_G2_SCRATCH_CONFIG_DATA1 0x71
+#define MV88E6352_G2_SCRATCH_CONFIG_DATA1_NO_CPU BIT(2)
+#define MV88E6352_G2_SCRATCH_CONFIG_DATA2 0x72
+#define MV88E6352_G2_SCRATCH_CONFIG_DATA2_P0_MODE_MASK 0x3
+
+#define MV88E6352_G2_SCRATCH_GPIO_PCTL_GPIO 0
+#define MV88E6352_G2_SCRATCH_GPIO_PCTL_TRIG 1
+#define MV88E6352_G2_SCRATCH_GPIO_PCTL_EVREQ 2
+
#ifdef CONFIG_NET_DSA_MV88E6XXX_GLOBAL2
static inline int mv88e6xxx_g2_require(struct mv88e6xxx_chip *chip)
return 0;
}
+int mv88e6xxx_g2_read(struct mv88e6xxx_chip *chip, int reg, u16 *val);
+int mv88e6xxx_g2_write(struct mv88e6xxx_chip *chip, int reg, u16 val);
+int mv88e6xxx_g2_update(struct mv88e6xxx_chip *chip, int reg, u16 update);
+int mv88e6xxx_g2_wait(struct mv88e6xxx_chip *chip, int reg, u16 mask);
+
int mv88e6352_g2_irl_init_all(struct mv88e6xxx_chip *chip, int port);
int mv88e6390_g2_irl_init_all(struct mv88e6xxx_chip *chip, int port);
extern const struct mv88e6xxx_irq_ops mv88e6097_watchdog_ops;
extern const struct mv88e6xxx_irq_ops mv88e6390_watchdog_ops;
+extern const struct mv88e6xxx_avb_ops mv88e6352_avb_ops;
+extern const struct mv88e6xxx_avb_ops mv88e6390_avb_ops;
+
+extern const struct mv88e6xxx_gpio_ops mv88e6352_gpio_ops;
+
+int mv88e6xxx_g2_scratch_gpio_set_smi(struct mv88e6xxx_chip *chip,
+ bool external);
+
#else /* !CONFIG_NET_DSA_MV88E6XXX_GLOBAL2 */
static inline int mv88e6xxx_g2_require(struct mv88e6xxx_chip *chip)
return 0;
}
+static inline int mv88e6xxx_g2_read(struct mv88e6xxx_chip *chip, int reg, u16 *val)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline int mv88e6xxx_g2_write(struct mv88e6xxx_chip *chip, int reg, u16 val)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline int mv88e6xxx_g2_update(struct mv88e6xxx_chip *chip, int reg, u16 update)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline int mv88e6xxx_g2_wait(struct mv88e6xxx_chip *chip, int reg, u16 mask)
+{
+ return -EOPNOTSUPP;
+}
+
static inline int mv88e6352_g2_irl_init_all(struct mv88e6xxx_chip *chip,
int port)
{
static const struct mv88e6xxx_irq_ops mv88e6097_watchdog_ops = {};
static const struct mv88e6xxx_irq_ops mv88e6390_watchdog_ops = {};
+static const struct mv88e6xxx_avb_ops mv88e6352_avb_ops = {};
+static const struct mv88e6xxx_avb_ops mv88e6390_avb_ops = {};
+
+static const struct mv88e6xxx_gpio_ops mv88e6352_gpio_ops = {};
+
+static inline int mv88e6xxx_g2_scratch_gpio_set_smi(struct mv88e6xxx_chip *chip,
+ bool external)
+{
+ return -EOPNOTSUPP;
+}
+
#endif /* CONFIG_NET_DSA_MV88E6XXX_GLOBAL2 */
#endif /* _MV88E6XXX_GLOBAL2_H */
--- /dev/null
+/*
+ * Marvell 88E6xxx Switch Global 2 Registers support
+ *
+ * Copyright (c) 2008 Marvell Semiconductor
+ *
+ * Copyright (c) 2016-2017 Savoir-faire Linux Inc.
+ * Vivien Didelot <vivien.didelot@savoirfairelinux.com>
+ *
+ * Copyright (c) 2017 National Instruments
+ * Brandon Streiff <brandon.streiff@ni.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include "global2.h"
+
+/* Offset 0x16: AVB Command Register
+ * Offset 0x17: AVB Data Register
+ *
+ * There are two different versions of this register interface:
+ * "6352": 3-bit "op" field, 4-bit "port" field.
+ * "6390": 2-bit "op" field, 5-bit "port" field.
+ *
+ * The "op" codes are different between the two, as well as the special
+ * port fields for global PTP and TAI configuration.
+ */
+
+/* mv88e6xxx_g2_avb_read -- Read one or multiple 16-bit words.
+ * The hardware supports snapshotting up to four contiguous registers.
+ */
+static int mv88e6xxx_g2_avb_read(struct mv88e6xxx_chip *chip, u16 readop,
+ u16 *data, int len)
+{
+ int err;
+ int i;
+
+ /* Hardware can only snapshot four words. */
+ if (len > 4)
+ return -E2BIG;
+
+ err = mv88e6xxx_g2_update(chip, MV88E6352_G2_AVB_CMD, readop);
+ if (err)
+ return err;
+
+ for (i = 0; i < len; ++i) {
+ err = mv88e6xxx_g2_read(chip, MV88E6352_G2_AVB_DATA,
+ &data[i]);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+/* mv88e6xxx_g2_avb_write -- Write one 16-bit word. */
+static int mv88e6xxx_g2_avb_write(struct mv88e6xxx_chip *chip, u16 writeop,
+ u16 data)
+{
+ int err;
+
+ err = mv88e6xxx_g2_write(chip, MV88E6352_G2_AVB_DATA, data);
+ if (err)
+ return err;
+
+ return mv88e6xxx_g2_update(chip, MV88E6352_G2_AVB_CMD, writeop);
+}
+
+static int mv88e6352_g2_avb_port_ptp_read(struct mv88e6xxx_chip *chip,
+ int port, int addr, u16 *data,
+ int len)
+{
+ u16 readop = (len == 1 ? MV88E6352_G2_AVB_CMD_OP_READ :
+ MV88E6352_G2_AVB_CMD_OP_READ_INCR) |
+ (port << 8) | (MV88E6352_G2_AVB_CMD_BLOCK_PTP << 5) |
+ addr;
+
+ return mv88e6xxx_g2_avb_read(chip, readop, data, len);
+}
+
+static int mv88e6352_g2_avb_port_ptp_write(struct mv88e6xxx_chip *chip,
+ int port, int addr, u16 data)
+{
+ u16 writeop = MV88E6352_G2_AVB_CMD_OP_WRITE | (port << 8) |
+ (MV88E6352_G2_AVB_CMD_BLOCK_PTP << 5) | addr;
+
+ return mv88e6xxx_g2_avb_write(chip, writeop, data);
+}
+
+static int mv88e6352_g2_avb_ptp_read(struct mv88e6xxx_chip *chip, int addr,
+ u16 *data, int len)
+{
+ return mv88e6352_g2_avb_port_ptp_read(chip,
+ MV88E6352_G2_AVB_CMD_PORT_PTPGLOBAL,
+ addr, data, len);
+}
+
+static int mv88e6352_g2_avb_ptp_write(struct mv88e6xxx_chip *chip, int addr,
+ u16 data)
+{
+ return mv88e6352_g2_avb_port_ptp_write(chip,
+ MV88E6352_G2_AVB_CMD_PORT_PTPGLOBAL,
+ addr, data);
+}
+
+static int mv88e6352_g2_avb_tai_read(struct mv88e6xxx_chip *chip, int addr,
+ u16 *data, int len)
+{
+ return mv88e6352_g2_avb_port_ptp_read(chip,
+ MV88E6352_G2_AVB_CMD_PORT_TAIGLOBAL,
+ addr, data, len);
+}
+
+static int mv88e6352_g2_avb_tai_write(struct mv88e6xxx_chip *chip, int addr,
+ u16 data)
+{
+ return mv88e6352_g2_avb_port_ptp_write(chip,
+ MV88E6352_G2_AVB_CMD_PORT_TAIGLOBAL,
+ addr, data);
+}
+
+const struct mv88e6xxx_avb_ops mv88e6352_avb_ops = {
+ .port_ptp_read = mv88e6352_g2_avb_port_ptp_read,
+ .port_ptp_write = mv88e6352_g2_avb_port_ptp_write,
+ .ptp_read = mv88e6352_g2_avb_ptp_read,
+ .ptp_write = mv88e6352_g2_avb_ptp_write,
+ .tai_read = mv88e6352_g2_avb_tai_read,
+ .tai_write = mv88e6352_g2_avb_tai_write,
+};
+
+static int mv88e6390_g2_avb_port_ptp_read(struct mv88e6xxx_chip *chip,
+ int port, int addr, u16 *data,
+ int len)
+{
+ u16 readop = (len == 1 ? MV88E6390_G2_AVB_CMD_OP_READ :
+ MV88E6390_G2_AVB_CMD_OP_READ_INCR) |
+ (port << 8) | (MV88E6352_G2_AVB_CMD_BLOCK_PTP << 5) |
+ addr;
+
+ return mv88e6xxx_g2_avb_read(chip, readop, data, len);
+}
+
+static int mv88e6390_g2_avb_port_ptp_write(struct mv88e6xxx_chip *chip,
+ int port, int addr, u16 data)
+{
+ u16 writeop = MV88E6390_G2_AVB_CMD_OP_WRITE | (port << 8) |
+ (MV88E6352_G2_AVB_CMD_BLOCK_PTP << 5) | addr;
+
+ return mv88e6xxx_g2_avb_write(chip, writeop, data);
+}
+
+static int mv88e6390_g2_avb_ptp_read(struct mv88e6xxx_chip *chip, int addr,
+ u16 *data, int len)
+{
+ return mv88e6390_g2_avb_port_ptp_read(chip,
+ MV88E6390_G2_AVB_CMD_PORT_PTPGLOBAL,
+ addr, data, len);
+}
+
+static int mv88e6390_g2_avb_ptp_write(struct mv88e6xxx_chip *chip, int addr,
+ u16 data)
+{
+ return mv88e6390_g2_avb_port_ptp_write(chip,
+ MV88E6390_G2_AVB_CMD_PORT_PTPGLOBAL,
+ addr, data);
+}
+
+static int mv88e6390_g2_avb_tai_read(struct mv88e6xxx_chip *chip, int addr,
+ u16 *data, int len)
+{
+ return mv88e6390_g2_avb_port_ptp_read(chip,
+ MV88E6390_G2_AVB_CMD_PORT_TAIGLOBAL,
+ addr, data, len);
+}
+
+static int mv88e6390_g2_avb_tai_write(struct mv88e6xxx_chip *chip, int addr,
+ u16 data)
+{
+ return mv88e6390_g2_avb_port_ptp_write(chip,
+ MV88E6390_G2_AVB_CMD_PORT_TAIGLOBAL,
+ addr, data);
+}
+
+const struct mv88e6xxx_avb_ops mv88e6390_avb_ops = {
+ .port_ptp_read = mv88e6390_g2_avb_port_ptp_read,
+ .port_ptp_write = mv88e6390_g2_avb_port_ptp_write,
+ .ptp_read = mv88e6390_g2_avb_ptp_read,
+ .ptp_write = mv88e6390_g2_avb_ptp_write,
+ .tai_read = mv88e6390_g2_avb_tai_read,
+ .tai_write = mv88e6390_g2_avb_tai_write,
+};
--- /dev/null
+/*
+ * Marvell 88E6xxx Switch Global 2 Scratch & Misc Registers support
+ *
+ * Copyright (c) 2008 Marvell Semiconductor
+ *
+ * Copyright (c) 2017 National Instruments
+ * Brandon Streiff <brandon.streiff@ni.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include "chip.h"
+#include "global2.h"
+
+/* Offset 0x1A: Scratch and Misc. Register */
+static int mv88e6xxx_g2_scratch_read(struct mv88e6xxx_chip *chip, int reg,
+ u8 *data)
+{
+ u16 value;
+ int err;
+
+ err = mv88e6xxx_g2_write(chip, MV88E6XXX_G2_SCRATCH_MISC_MISC,
+ reg << 8);
+ if (err)
+ return err;
+
+ err = mv88e6xxx_g2_read(chip, MV88E6XXX_G2_SCRATCH_MISC_MISC, &value);
+ if (err)
+ return err;
+
+ *data = (value & MV88E6XXX_G2_SCRATCH_MISC_DATA_MASK);
+
+ return 0;
+}
+
+static int mv88e6xxx_g2_scratch_write(struct mv88e6xxx_chip *chip, int reg,
+ u8 data)
+{
+ u16 value = (reg << 8) | data;
+
+ return mv88e6xxx_g2_update(chip, MV88E6XXX_G2_SCRATCH_MISC_MISC, value);
+}
+
+/**
+ * mv88e6xxx_g2_scratch_gpio_get_bit - get a bit
+ * @chip: chip private data
+ * @nr: bit index
+ * @set: is bit set?
+ */
+static int mv88e6xxx_g2_scratch_get_bit(struct mv88e6xxx_chip *chip,
+ int base_reg, unsigned int offset,
+ int *set)
+{
+ int reg = base_reg + (offset / 8);
+ u8 mask = (1 << (offset & 0x7));
+ u8 val;
+ int err;
+
+ err = mv88e6xxx_g2_scratch_read(chip, reg, &val);
+ if (err)
+ return err;
+
+ *set = !!(mask & val);
+
+ return 0;
+}
+
+/**
+ * mv88e6xxx_g2_scratch_gpio_set_bit - set (or clear) a bit
+ * @chip: chip private data
+ * @nr: bit index
+ * @set: set if true, clear if false
+ *
+ * Helper function for dealing with the direction and data registers.
+ */
+static int mv88e6xxx_g2_scratch_set_bit(struct mv88e6xxx_chip *chip,
+ int base_reg, unsigned int offset,
+ int set)
+{
+ int reg = base_reg + (offset / 8);
+ u8 mask = (1 << (offset & 0x7));
+ u8 val;
+ int err;
+
+ err = mv88e6xxx_g2_scratch_read(chip, reg, &val);
+ if (err)
+ return err;
+
+ if (set)
+ val |= mask;
+ else
+ val &= ~mask;
+
+ return mv88e6xxx_g2_scratch_write(chip, reg, val);
+}
+
+/**
+ * mv88e6352_g2_scratch_gpio_get_data - get data on gpio pin
+ * @chip: chip private data
+ * @pin: gpio index
+ *
+ * Return: 0 for low, 1 for high, negative error
+ */
+static int mv88e6352_g2_scratch_gpio_get_data(struct mv88e6xxx_chip *chip,
+ unsigned int pin)
+{
+ int val = 0;
+ int err;
+
+ err = mv88e6xxx_g2_scratch_get_bit(chip,
+ MV88E6352_G2_SCRATCH_GPIO_DATA0,
+ pin, &val);
+ if (err)
+ return err;
+
+ return val;
+}
+
+/**
+ * mv88e6352_g2_scratch_gpio_set_data - set data on gpio pin
+ * @chip: chip private data
+ * @pin: gpio index
+ * @value: value to set
+ */
+static int mv88e6352_g2_scratch_gpio_set_data(struct mv88e6xxx_chip *chip,
+ unsigned int pin, int value)
+{
+ u8 mask = (1 << (pin & 0x7));
+ int offset = (pin / 8);
+ int reg;
+
+ reg = MV88E6352_G2_SCRATCH_GPIO_DATA0 + offset;
+
+ if (value)
+ chip->gpio_data[offset] |= mask;
+ else
+ chip->gpio_data[offset] &= ~mask;
+
+ return mv88e6xxx_g2_scratch_write(chip, reg, chip->gpio_data[offset]);
+}
+
+/**
+ * mv88e6352_g2_scratch_gpio_get_dir - get direction of gpio pin
+ * @chip: chip private data
+ * @pin: gpio index
+ *
+ * Return: 0 for output, 1 for input (same as GPIOF_DIR_XXX).
+ */
+static int mv88e6352_g2_scratch_gpio_get_dir(struct mv88e6xxx_chip *chip,
+ unsigned int pin)
+{
+ int val = 0;
+ int err;
+
+ err = mv88e6xxx_g2_scratch_get_bit(chip,
+ MV88E6352_G2_SCRATCH_GPIO_DIR0,
+ pin, &val);
+ if (err)
+ return err;
+
+ return val;
+}
+
+/**
+ * mv88e6352_g2_scratch_gpio_set_dir - set direction of gpio pin
+ * @chip: chip private data
+ * @pin: gpio index
+ */
+static int mv88e6352_g2_scratch_gpio_set_dir(struct mv88e6xxx_chip *chip,
+ unsigned int pin, bool input)
+{
+ int value = (input ? MV88E6352_G2_SCRATCH_GPIO_DIR_IN :
+ MV88E6352_G2_SCRATCH_GPIO_DIR_OUT);
+
+ return mv88e6xxx_g2_scratch_set_bit(chip,
+ MV88E6352_G2_SCRATCH_GPIO_DIR0,
+ pin, value);
+}
+
+/**
+ * mv88e6352_g2_scratch_gpio_get_pctl - get pin control setting
+ * @chip: chip private data
+ * @pin: gpio index
+ * @func: function number
+ *
+ * Note that the function numbers themselves may vary by chipset.
+ */
+static int mv88e6352_g2_scratch_gpio_get_pctl(struct mv88e6xxx_chip *chip,
+ unsigned int pin, int *func)
+{
+ int reg = MV88E6352_G2_SCRATCH_GPIO_PCTL0 + (pin / 2);
+ int offset = (pin & 0x1) ? 4 : 0;
+ u8 mask = (0x7 << offset);
+ int err;
+ u8 val;
+
+ err = mv88e6xxx_g2_scratch_read(chip, reg, &val);
+ if (err)
+ return err;
+
+ *func = (val & mask) >> offset;
+
+ return 0;
+}
+
+/**
+ * mv88e6352_g2_scratch_gpio_set_pctl - set pin control setting
+ * @chip: chip private data
+ * @pin: gpio index
+ * @func: function number
+ */
+static int mv88e6352_g2_scratch_gpio_set_pctl(struct mv88e6xxx_chip *chip,
+ unsigned int pin, int func)
+{
+ int reg = MV88E6352_G2_SCRATCH_GPIO_PCTL0 + (pin / 2);
+ int offset = (pin & 0x1) ? 4 : 0;
+ u8 mask = (0x7 << offset);
+ int err;
+ u8 val;
+
+ err = mv88e6xxx_g2_scratch_read(chip, reg, &val);
+ if (err)
+ return err;
+
+ val = (val & ~mask) | ((func & mask) << offset);
+
+ return mv88e6xxx_g2_scratch_write(chip, reg, val);
+}
+
+const struct mv88e6xxx_gpio_ops mv88e6352_gpio_ops = {
+ .get_data = mv88e6352_g2_scratch_gpio_get_data,
+ .set_data = mv88e6352_g2_scratch_gpio_set_data,
+ .get_dir = mv88e6352_g2_scratch_gpio_get_dir,
+ .set_dir = mv88e6352_g2_scratch_gpio_set_dir,
+ .get_pctl = mv88e6352_g2_scratch_gpio_get_pctl,
+ .set_pctl = mv88e6352_g2_scratch_gpio_set_pctl,
+};
+
+/**
+ * mv88e6xxx_g2_gpio_set_smi - set gpio muxing for external smi
+ * @chip: chip private data
+ * @external: set mux for external smi, or free for gpio usage
+ *
+ * Some mv88e6xxx models have GPIO pins that may be configured as
+ * an external SMI interface, or they may be made free for other
+ * GPIO uses.
+ */
+int mv88e6xxx_g2_scratch_gpio_set_smi(struct mv88e6xxx_chip *chip,
+ bool external)
+{
+ int misc_cfg = MV88E6352_G2_SCRATCH_MISC_CFG;
+ int config_data1 = MV88E6352_G2_SCRATCH_CONFIG_DATA1;
+ int config_data2 = MV88E6352_G2_SCRATCH_CONFIG_DATA2;
+ bool no_cpu;
+ u8 p0_mode;
+ int err;
+ u8 val;
+
+ err = mv88e6xxx_g2_scratch_read(chip, config_data2, &val);
+ if (err)
+ return err;
+
+ p0_mode = val & MV88E6352_G2_SCRATCH_CONFIG_DATA2_P0_MODE_MASK;
+
+ if (p0_mode == 0x01 || p0_mode == 0x02)
+ return -EBUSY;
+
+ err = mv88e6xxx_g2_scratch_read(chip, config_data1, &val);
+ if (err)
+ return err;
+
+ no_cpu = !!(val & MV88E6352_G2_SCRATCH_CONFIG_DATA1_NO_CPU);
+
+ err = mv88e6xxx_g2_scratch_read(chip, misc_cfg, &val);
+ if (err)
+ return err;
+
+ /* NO_CPU being 0 inverts the meaning of the bit */
+ if (!no_cpu)
+ external = !external;
+
+ if (external)
+ val |= MV88E6352_G2_SCRATCH_MISC_CFG_NORMALSMI;
+ else
+ val &= ~MV88E6352_G2_SCRATCH_MISC_CFG_NORMALSMI;
+
+ return mv88e6xxx_g2_scratch_write(chip, misc_cfg, val);
+}
--- /dev/null
+/*
+ * Marvell 88E6xxx Switch hardware timestamping support
+ *
+ * Copyright (c) 2008 Marvell Semiconductor
+ *
+ * Copyright (c) 2017 National Instruments
+ * Erik Hons <erik.hons@ni.com>
+ * Brandon Streiff <brandon.streiff@ni.com>
+ * Dane Wagner <dane.wagner@ni.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include "chip.h"
+#include "global2.h"
+#include "hwtstamp.h"
+#include "ptp.h"
+#include <linux/ptp_classify.h>
+
+#define SKB_PTP_TYPE(__skb) (*(unsigned int *)((__skb)->cb))
+
+static int mv88e6xxx_port_ptp_read(struct mv88e6xxx_chip *chip, int port,
+ int addr, u16 *data, int len)
+{
+ if (!chip->info->ops->avb_ops->port_ptp_read)
+ return -EOPNOTSUPP;
+
+ return chip->info->ops->avb_ops->port_ptp_read(chip, port, addr,
+ data, len);
+}
+
+static int mv88e6xxx_port_ptp_write(struct mv88e6xxx_chip *chip, int port,
+ int addr, u16 data)
+{
+ if (!chip->info->ops->avb_ops->port_ptp_write)
+ return -EOPNOTSUPP;
+
+ return chip->info->ops->avb_ops->port_ptp_write(chip, port, addr,
+ data);
+}
+
+static int mv88e6xxx_ptp_write(struct mv88e6xxx_chip *chip, int addr,
+ u16 data)
+{
+ if (!chip->info->ops->avb_ops->ptp_write)
+ return -EOPNOTSUPP;
+
+ return chip->info->ops->avb_ops->ptp_write(chip, addr, data);
+}
+
+/* TX_TSTAMP_TIMEOUT: This limits the time spent polling for a TX
+ * timestamp. When working properly, hardware will produce a timestamp
+ * within 1ms. Software may enounter delays due to MDIO contention, so
+ * the timeout is set accordingly.
+ */
+#define TX_TSTAMP_TIMEOUT msecs_to_jiffies(20)
+
+int mv88e6xxx_get_ts_info(struct dsa_switch *ds, int port,
+ struct ethtool_ts_info *info)
+{
+ struct mv88e6xxx_chip *chip = ds->priv;
+
+ if (!chip->info->ptp_support)
+ return -EOPNOTSUPP;
+
+ info->so_timestamping =
+ SOF_TIMESTAMPING_TX_HARDWARE |
+ SOF_TIMESTAMPING_RX_HARDWARE |
+ SOF_TIMESTAMPING_RAW_HARDWARE;
+ info->phc_index = ptp_clock_index(chip->ptp_clock);
+ info->tx_types =
+ (1 << HWTSTAMP_TX_OFF) |
+ (1 << HWTSTAMP_TX_ON);
+ info->rx_filters =
+ (1 << HWTSTAMP_FILTER_NONE) |
+ (1 << HWTSTAMP_FILTER_PTP_V2_L4_EVENT) |
+ (1 << HWTSTAMP_FILTER_PTP_V2_L4_SYNC) |
+ (1 << HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ) |
+ (1 << HWTSTAMP_FILTER_PTP_V2_L2_EVENT) |
+ (1 << HWTSTAMP_FILTER_PTP_V2_L2_SYNC) |
+ (1 << HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ) |
+ (1 << HWTSTAMP_FILTER_PTP_V2_EVENT) |
+ (1 << HWTSTAMP_FILTER_PTP_V2_SYNC) |
+ (1 << HWTSTAMP_FILTER_PTP_V2_DELAY_REQ);
+
+ return 0;
+}
+
+static int mv88e6xxx_set_hwtstamp_config(struct mv88e6xxx_chip *chip, int port,
+ struct hwtstamp_config *config)
+{
+ struct mv88e6xxx_port_hwtstamp *ps = &chip->port_hwtstamp[port];
+ bool tstamp_enable = false;
+ u16 port_config0;
+ int err;
+
+ /* Prevent the TX/RX paths from trying to interact with the
+ * timestamp hardware while we reconfigure it.
+ */
+ clear_bit_unlock(MV88E6XXX_HWTSTAMP_ENABLED, &ps->state);
+
+ /* reserved for future extensions */
+ if (config->flags)
+ return -EINVAL;
+
+ switch (config->tx_type) {
+ case HWTSTAMP_TX_OFF:
+ tstamp_enable = false;
+ break;
+ case HWTSTAMP_TX_ON:
+ tstamp_enable = true;
+ break;
+ default:
+ return -ERANGE;
+ }
+
+ /* The switch supports timestamping both L2 and L4; one cannot be
+ * disabled independently of the other.
+ */
+ switch (config->rx_filter) {
+ case HWTSTAMP_FILTER_NONE:
+ tstamp_enable = false;
+ break;
+ case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
+ case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
+ case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
+ case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
+ case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
+ case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
+ case HWTSTAMP_FILTER_PTP_V2_EVENT:
+ case HWTSTAMP_FILTER_PTP_V2_SYNC:
+ case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
+ config->rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
+ break;
+ case HWTSTAMP_FILTER_ALL:
+ default:
+ config->rx_filter = HWTSTAMP_FILTER_NONE;
+ return -ERANGE;
+ }
+
+ if (tstamp_enable) {
+ /* Disable transportSpecific value matching, so that packets
+ * with either 1588 (0) and 802.1AS (1) will be timestamped.
+ */
+ port_config0 = MV88E6XXX_PORT_PTP_CFG0_DISABLE_TSPEC_MATCH;
+ } else {
+ /* Disable PTP. This disables both RX and TX timestamping. */
+ port_config0 = MV88E6XXX_PORT_PTP_CFG0_DISABLE_PTP;
+ }
+
+ mutex_lock(&chip->reg_lock);
+ err = mv88e6xxx_port_ptp_write(chip, port, MV88E6XXX_PORT_PTP_CFG0,
+ port_config0);
+ mutex_unlock(&chip->reg_lock);
+
+ if (err < 0)
+ return err;
+
+ /* Once hardware has been configured, enable timestamp checks
+ * in the RX/TX paths.
+ */
+ if (tstamp_enable)
+ set_bit(MV88E6XXX_HWTSTAMP_ENABLED, &ps->state);
+
+ return 0;
+}
+
+int mv88e6xxx_port_hwtstamp_set(struct dsa_switch *ds, int port,
+ struct ifreq *ifr)
+{
+ struct mv88e6xxx_chip *chip = ds->priv;
+ struct mv88e6xxx_port_hwtstamp *ps = &chip->port_hwtstamp[port];
+ struct hwtstamp_config config;
+ int err;
+
+ if (!chip->info->ptp_support)
+ return -EOPNOTSUPP;
+
+ if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
+ return -EFAULT;
+
+ err = mv88e6xxx_set_hwtstamp_config(chip, port, &config);
+ if (err)
+ return err;
+
+ /* Save the chosen configuration to be returned later. */
+ memcpy(&ps->tstamp_config, &config, sizeof(config));
+
+ return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
+ -EFAULT : 0;
+}
+
+int mv88e6xxx_port_hwtstamp_get(struct dsa_switch *ds, int port,
+ struct ifreq *ifr)
+{
+ struct mv88e6xxx_chip *chip = ds->priv;
+ struct mv88e6xxx_port_hwtstamp *ps = &chip->port_hwtstamp[port];
+ struct hwtstamp_config *config = &ps->tstamp_config;
+
+ if (!chip->info->ptp_support)
+ return -EOPNOTSUPP;
+
+ return copy_to_user(ifr->ifr_data, config, sizeof(*config)) ?
+ -EFAULT : 0;
+}
+
+/* Get the start of the PTP header in this skb */
+static u8 *parse_ptp_header(struct sk_buff *skb, unsigned int type)
+{
+ u8 *data = skb_mac_header(skb);
+ unsigned int offset = 0;
+
+ if (type & PTP_CLASS_VLAN)
+ offset += VLAN_HLEN;
+
+ switch (type & PTP_CLASS_PMASK) {
+ case PTP_CLASS_IPV4:
+ offset += ETH_HLEN + IPV4_HLEN(data + offset) + UDP_HLEN;
+ break;
+ case PTP_CLASS_IPV6:
+ offset += ETH_HLEN + IP6_HLEN + UDP_HLEN;
+ break;
+ case PTP_CLASS_L2:
+ offset += ETH_HLEN;
+ break;
+ default:
+ return NULL;
+ }
+
+ /* Ensure that the entire header is present in this packet. */
+ if (skb->len + ETH_HLEN < offset + 34)
+ return NULL;
+
+ return data + offset;
+}
+
+/* Returns a pointer to the PTP header if the caller should time stamp,
+ * or NULL if the caller should not.
+ */
+static u8 *mv88e6xxx_should_tstamp(struct mv88e6xxx_chip *chip, int port,
+ struct sk_buff *skb, unsigned int type)
+{
+ struct mv88e6xxx_port_hwtstamp *ps = &chip->port_hwtstamp[port];
+ u8 *hdr;
+
+ if (!chip->info->ptp_support)
+ return NULL;
+
+ hdr = parse_ptp_header(skb, type);
+ if (!hdr)
+ return NULL;
+
+ if (!test_bit(MV88E6XXX_HWTSTAMP_ENABLED, &ps->state))
+ return NULL;
+
+ return hdr;
+}
+
+static int mv88e6xxx_ts_valid(u16 status)
+{
+ if (!(status & MV88E6XXX_PTP_TS_VALID))
+ return 0;
+ if (status & MV88E6XXX_PTP_TS_STATUS_MASK)
+ return 0;
+ return 1;
+}
+
+static int seq_match(struct sk_buff *skb, u16 ts_seqid)
+{
+ unsigned int type = SKB_PTP_TYPE(skb);
+ u8 *hdr = parse_ptp_header(skb, type);
+ __be16 *seqid;
+
+ seqid = (__be16 *)(hdr + OFF_PTP_SEQUENCE_ID);
+
+ return ts_seqid == ntohs(*seqid);
+}
+
+static void mv88e6xxx_get_rxts(struct mv88e6xxx_chip *chip,
+ struct mv88e6xxx_port_hwtstamp *ps,
+ struct sk_buff *skb, u16 reg,
+ struct sk_buff_head *rxq)
+{
+ u16 buf[4] = { 0 }, status, seq_id;
+ u64 ns, timelo, timehi;
+ struct skb_shared_hwtstamps *shwt;
+ int err;
+
+ mutex_lock(&chip->reg_lock);
+ err = mv88e6xxx_port_ptp_read(chip, ps->port_id,
+ reg, buf, ARRAY_SIZE(buf));
+ mutex_unlock(&chip->reg_lock);
+ if (err)
+ pr_err("failed to get the receive time stamp\n");
+
+ status = buf[0];
+ timelo = buf[1];
+ timehi = buf[2];
+ seq_id = buf[3];
+
+ if (status & MV88E6XXX_PTP_TS_VALID) {
+ mutex_lock(&chip->reg_lock);
+ err = mv88e6xxx_port_ptp_write(chip, ps->port_id, reg, 0);
+ mutex_unlock(&chip->reg_lock);
+ if (err)
+ pr_err("failed to clear the receive status\n");
+ }
+ /* Since the device can only handle one time stamp at a time,
+ * we purge any extra frames from the queue.
+ */
+ for ( ; skb; skb = skb_dequeue(rxq)) {
+ if (mv88e6xxx_ts_valid(status) && seq_match(skb, seq_id)) {
+ ns = timehi << 16 | timelo;
+
+ mutex_lock(&chip->reg_lock);
+ ns = timecounter_cyc2time(&chip->tstamp_tc, ns);
+ mutex_unlock(&chip->reg_lock);
+ shwt = skb_hwtstamps(skb);
+ memset(shwt, 0, sizeof(*shwt));
+ shwt->hwtstamp = ns_to_ktime(ns);
+ status &= ~MV88E6XXX_PTP_TS_VALID;
+ }
+ netif_rx_ni(skb);
+ }
+}
+
+static void mv88e6xxx_rxtstamp_work(struct mv88e6xxx_chip *chip,
+ struct mv88e6xxx_port_hwtstamp *ps)
+{
+ struct sk_buff *skb;
+
+ skb = skb_dequeue(&ps->rx_queue);
+
+ if (skb)
+ mv88e6xxx_get_rxts(chip, ps, skb, MV88E6XXX_PORT_PTP_ARR0_STS,
+ &ps->rx_queue);
+
+ skb = skb_dequeue(&ps->rx_queue2);
+ if (skb)
+ mv88e6xxx_get_rxts(chip, ps, skb, MV88E6XXX_PORT_PTP_ARR1_STS,
+ &ps->rx_queue2);
+}
+
+static int is_pdelay_resp(u8 *msgtype)
+{
+ return (*msgtype & 0xf) == 3;
+}
+
+bool mv88e6xxx_port_rxtstamp(struct dsa_switch *ds, int port,
+ struct sk_buff *skb, unsigned int type)
+{
+ struct mv88e6xxx_port_hwtstamp *ps;
+ struct mv88e6xxx_chip *chip;
+ u8 *hdr;
+
+ chip = ds->priv;
+ ps = &chip->port_hwtstamp[port];
+
+ if (ps->tstamp_config.rx_filter != HWTSTAMP_FILTER_PTP_V2_EVENT)
+ return false;
+
+ hdr = mv88e6xxx_should_tstamp(chip, port, skb, type);
+ if (!hdr)
+ return false;
+
+ SKB_PTP_TYPE(skb) = type;
+
+ if (is_pdelay_resp(hdr))
+ skb_queue_tail(&ps->rx_queue2, skb);
+ else
+ skb_queue_tail(&ps->rx_queue, skb);
+
+ ptp_schedule_worker(chip->ptp_clock, 0);
+
+ return true;
+}
+
+static int mv88e6xxx_txtstamp_work(struct mv88e6xxx_chip *chip,
+ struct mv88e6xxx_port_hwtstamp *ps)
+{
+ struct skb_shared_hwtstamps shhwtstamps;
+ u16 departure_block[4], status;
+ struct sk_buff *tmp_skb;
+ u32 time_raw;
+ int err;
+ u64 ns;
+
+ if (!ps->tx_skb)
+ return 0;
+
+ mutex_lock(&chip->reg_lock);
+ err = mv88e6xxx_port_ptp_read(chip, ps->port_id,
+ MV88E6XXX_PORT_PTP_DEP_STS,
+ departure_block,
+ ARRAY_SIZE(departure_block));
+ mutex_unlock(&chip->reg_lock);
+
+ if (err)
+ goto free_and_clear_skb;
+
+ if (!(departure_block[0] & MV88E6XXX_PTP_TS_VALID)) {
+ if (time_is_before_jiffies(ps->tx_tstamp_start +
+ TX_TSTAMP_TIMEOUT)) {
+ dev_warn(chip->dev, "p%d: clearing tx timestamp hang\n",
+ ps->port_id);
+ goto free_and_clear_skb;
+ }
+ /* The timestamp should be available quickly, while getting it
+ * is high priority and time bounded to only 10ms. A poll is
+ * warranted so restart the work.
+ */
+ return 1;
+ }
+
+ /* We have the timestamp; go ahead and clear valid now */
+ mutex_lock(&chip->reg_lock);
+ mv88e6xxx_port_ptp_write(chip, ps->port_id,
+ MV88E6XXX_PORT_PTP_DEP_STS, 0);
+ mutex_unlock(&chip->reg_lock);
+
+ status = departure_block[0] & MV88E6XXX_PTP_TS_STATUS_MASK;
+ if (status != MV88E6XXX_PTP_TS_STATUS_NORMAL) {
+ dev_warn(chip->dev, "p%d: tx timestamp overrun\n", ps->port_id);
+ goto free_and_clear_skb;
+ }
+
+ if (departure_block[3] != ps->tx_seq_id) {
+ dev_warn(chip->dev, "p%d: unexpected seq. id\n", ps->port_id);
+ goto free_and_clear_skb;
+ }
+
+ memset(&shhwtstamps, 0, sizeof(shhwtstamps));
+ time_raw = ((u32)departure_block[2] << 16) | departure_block[1];
+ mutex_lock(&chip->reg_lock);
+ ns = timecounter_cyc2time(&chip->tstamp_tc, time_raw);
+ mutex_unlock(&chip->reg_lock);
+ shhwtstamps.hwtstamp = ns_to_ktime(ns);
+
+ dev_dbg(chip->dev,
+ "p%d: txtstamp %llx status 0x%04x skb ID 0x%04x hw ID 0x%04x\n",
+ ps->port_id, ktime_to_ns(shhwtstamps.hwtstamp),
+ departure_block[0], ps->tx_seq_id, departure_block[3]);
+
+ /* skb_complete_tx_timestamp() will free up the client to make
+ * another timestamp-able transmit. We have to be ready for it
+ * -- by clearing the ps->tx_skb "flag" -- beforehand.
+ */
+
+ tmp_skb = ps->tx_skb;
+ ps->tx_skb = NULL;
+ clear_bit_unlock(MV88E6XXX_HWTSTAMP_TX_IN_PROGRESS, &ps->state);
+ skb_complete_tx_timestamp(tmp_skb, &shhwtstamps);
+
+ return 0;
+
+free_and_clear_skb:
+ dev_kfree_skb_any(ps->tx_skb);
+ ps->tx_skb = NULL;
+ clear_bit_unlock(MV88E6XXX_HWTSTAMP_TX_IN_PROGRESS, &ps->state);
+
+ return 0;
+}
+
+long mv88e6xxx_hwtstamp_work(struct ptp_clock_info *ptp)
+{
+ struct mv88e6xxx_chip *chip = ptp_to_chip(ptp);
+ struct dsa_switch *ds = chip->ds;
+ struct mv88e6xxx_port_hwtstamp *ps;
+ int i, restart = 0;
+
+ for (i = 0; i < ds->num_ports; i++) {
+ if (!dsa_is_user_port(ds, i))
+ continue;
+
+ ps = &chip->port_hwtstamp[i];
+ if (test_bit(MV88E6XXX_HWTSTAMP_TX_IN_PROGRESS, &ps->state))
+ restart |= mv88e6xxx_txtstamp_work(chip, ps);
+
+ mv88e6xxx_rxtstamp_work(chip, ps);
+ }
+
+ return restart ? 1 : -1;
+}
+
+bool mv88e6xxx_port_txtstamp(struct dsa_switch *ds, int port,
+ struct sk_buff *clone, unsigned int type)
+{
+ struct mv88e6xxx_chip *chip = ds->priv;
+ struct mv88e6xxx_port_hwtstamp *ps = &chip->port_hwtstamp[port];
+ __be16 *seq_ptr;
+ u8 *hdr;
+
+ if (!(skb_shinfo(clone)->tx_flags & SKBTX_HW_TSTAMP))
+ return false;
+
+ hdr = mv88e6xxx_should_tstamp(chip, port, clone, type);
+ if (!hdr)
+ return false;
+
+ seq_ptr = (__be16 *)(hdr + OFF_PTP_SEQUENCE_ID);
+
+ if (test_and_set_bit_lock(MV88E6XXX_HWTSTAMP_TX_IN_PROGRESS,
+ &ps->state))
+ return false;
+
+ ps->tx_skb = clone;
+ ps->tx_tstamp_start = jiffies;
+ ps->tx_seq_id = be16_to_cpup(seq_ptr);
+
+ ptp_schedule_worker(chip->ptp_clock, 0);
+ return true;
+}
+
+static int mv88e6xxx_hwtstamp_port_setup(struct mv88e6xxx_chip *chip, int port)
+{
+ struct mv88e6xxx_port_hwtstamp *ps = &chip->port_hwtstamp[port];
+
+ ps->port_id = port;
+
+ skb_queue_head_init(&ps->rx_queue);
+ skb_queue_head_init(&ps->rx_queue2);
+
+ return mv88e6xxx_port_ptp_write(chip, port, MV88E6XXX_PORT_PTP_CFG0,
+ MV88E6XXX_PORT_PTP_CFG0_DISABLE_PTP);
+}
+
+int mv88e6xxx_hwtstamp_setup(struct mv88e6xxx_chip *chip)
+{
+ int err;
+ int i;
+
+ /* Disable timestamping on all ports. */
+ for (i = 0; i < mv88e6xxx_num_ports(chip); ++i) {
+ err = mv88e6xxx_hwtstamp_port_setup(chip, i);
+ if (err)
+ return err;
+ }
+
+ /* MV88E6XXX_PTP_MSG_TYPE is a mask of PTP message types to
+ * timestamp. This affects all ports that have timestamping enabled,
+ * but the timestamp config is per-port; thus we configure all events
+ * here and only support the HWTSTAMP_FILTER_*_EVENT filter types.
+ */
+ err = mv88e6xxx_ptp_write(chip, MV88E6XXX_PTP_MSGTYPE,
+ MV88E6XXX_PTP_MSGTYPE_ALL_EVENT);
+ if (err)
+ return err;
+
+ /* Use ARRIVAL1 for peer delay response messages. */
+ err = mv88e6xxx_ptp_write(chip, MV88E6XXX_PTP_TS_ARRIVAL_PTR,
+ MV88E6XXX_PTP_MSGTYPE_PDLAY_RES);
+ if (err)
+ return err;
+
+ /* 88E6341 devices default to timestamping at the PHY, but this has
+ * a hardware issue that results in unreliable timestamps. Force
+ * these devices to timestamp at the MAC.
+ */
+ if (chip->info->family == MV88E6XXX_FAMILY_6341) {
+ u16 val = MV88E6341_PTP_CFG_UPDATE |
+ MV88E6341_PTP_CFG_MODE_IDX |
+ MV88E6341_PTP_CFG_MODE_TS_AT_MAC;
+ err = mv88e6xxx_ptp_write(chip, MV88E6341_PTP_CFG, val);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+void mv88e6xxx_hwtstamp_free(struct mv88e6xxx_chip *chip)
+{
+}
--- /dev/null
+/*
+ * Marvell 88E6xxx Switch hardware timestamping support
+ *
+ * Copyright (c) 2008 Marvell Semiconductor
+ *
+ * Copyright (c) 2017 National Instruments
+ * Erik Hons <erik.hons@ni.com>
+ * Brandon Streiff <brandon.streiff@ni.com>
+ * Dane Wagner <dane.wagner@ni.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#ifndef _MV88E6XXX_HWTSTAMP_H
+#define _MV88E6XXX_HWTSTAMP_H
+
+#include "chip.h"
+
+/* Global PTP registers */
+/* Offset 0x00: PTP EtherType */
+#define MV88E6XXX_PTP_ETHERTYPE 0x00
+
+/* Offset 0x01: Message Type Timestamp Enables */
+#define MV88E6XXX_PTP_MSGTYPE 0x01
+#define MV88E6XXX_PTP_MSGTYPE_SYNC 0x0001
+#define MV88E6XXX_PTP_MSGTYPE_DELAY_REQ 0x0002
+#define MV88E6XXX_PTP_MSGTYPE_PDLAY_REQ 0x0004
+#define MV88E6XXX_PTP_MSGTYPE_PDLAY_RES 0x0008
+#define MV88E6XXX_PTP_MSGTYPE_ALL_EVENT 0x000f
+
+/* Offset 0x02: Timestamp Arrival Capture Pointers */
+#define MV88E6XXX_PTP_TS_ARRIVAL_PTR 0x02
+
+/* Offset 0x07: PTP Global Configuration */
+#define MV88E6341_PTP_CFG 0x07
+#define MV88E6341_PTP_CFG_UPDATE 0x8000
+#define MV88E6341_PTP_CFG_IDX_MASK 0x7f00
+#define MV88E6341_PTP_CFG_DATA_MASK 0x00ff
+#define MV88E6341_PTP_CFG_MODE_IDX 0x0
+#define MV88E6341_PTP_CFG_MODE_TS_AT_PHY 0x00
+#define MV88E6341_PTP_CFG_MODE_TS_AT_MAC 0x80
+
+/* Offset 0x08: PTP Interrupt Status */
+#define MV88E6XXX_PTP_IRQ_STATUS 0x08
+
+/* Per-Port PTP Registers */
+/* Offset 0x00: PTP Configuration 0 */
+#define MV88E6XXX_PORT_PTP_CFG0 0x00
+#define MV88E6XXX_PORT_PTP_CFG0_TSPEC_SHIFT 12
+#define MV88E6XXX_PORT_PTP_CFG0_TSPEC_MASK 0xf000
+#define MV88E6XXX_PORT_PTP_CFG0_TSPEC_1588 0x0000
+#define MV88E6XXX_PORT_PTP_CFG0_TSPEC_8021AS 0x1000
+#define MV88E6XXX_PORT_PTP_CFG0_DISABLE_TSPEC_MATCH 0x0800
+#define MV88E6XXX_PORT_PTP_CFG0_DISABLE_OVERWRITE 0x0002
+#define MV88E6XXX_PORT_PTP_CFG0_DISABLE_PTP 0x0001
+
+/* Offset 0x01: PTP Configuration 1 */
+#define MV88E6XXX_PORT_PTP_CFG1 0x01
+
+/* Offset 0x02: PTP Configuration 2 */
+#define MV88E6XXX_PORT_PTP_CFG2 0x02
+#define MV88E6XXX_PORT_PTP_CFG2_EMBED_ARRIVAL 0x1000
+#define MV88E6XXX_PORT_PTP_CFG2_DEP_IRQ_EN 0x0002
+#define MV88E6XXX_PORT_PTP_CFG2_ARR_IRQ_EN 0x0001
+
+/* Offset 0x03: PTP LED Configuration */
+#define MV88E6XXX_PORT_PTP_LED_CFG 0x03
+
+/* Offset 0x08: PTP Arrival 0 Status */
+#define MV88E6XXX_PORT_PTP_ARR0_STS 0x08
+
+/* Offset 0x09/0x0A: PTP Arrival 0 Time */
+#define MV88E6XXX_PORT_PTP_ARR0_TIME_LO 0x09
+#define MV88E6XXX_PORT_PTP_ARR0_TIME_HI 0x0a
+
+/* Offset 0x0B: PTP Arrival 0 Sequence ID */
+#define MV88E6XXX_PORT_PTP_ARR0_SEQID 0x0b
+
+/* Offset 0x0C: PTP Arrival 1 Status */
+#define MV88E6XXX_PORT_PTP_ARR1_STS 0x0c
+
+/* Offset 0x0D/0x0E: PTP Arrival 1 Time */
+#define MV88E6XXX_PORT_PTP_ARR1_TIME_LO 0x0d
+#define MV88E6XXX_PORT_PTP_ARR1_TIME_HI 0x0e
+
+/* Offset 0x0F: PTP Arrival 1 Sequence ID */
+#define MV88E6XXX_PORT_PTP_ARR1_SEQID 0x0f
+
+/* Offset 0x10: PTP Departure Status */
+#define MV88E6XXX_PORT_PTP_DEP_STS 0x10
+
+/* Offset 0x11/0x12: PTP Deperture Time */
+#define MV88E6XXX_PORT_PTP_DEP_TIME_LO 0x11
+#define MV88E6XXX_PORT_PTP_DEP_TIME_HI 0x12
+
+/* Offset 0x13: PTP Departure Sequence ID */
+#define MV88E6XXX_PORT_PTP_DEP_SEQID 0x13
+
+/* Status fields for arrival and depature timestamp status registers */
+#define MV88E6XXX_PTP_TS_STATUS_MASK 0x0006
+#define MV88E6XXX_PTP_TS_STATUS_NORMAL 0x0000
+#define MV88E6XXX_PTP_TS_STATUS_OVERWITTEN 0x0002
+#define MV88E6XXX_PTP_TS_STATUS_DISCARDED 0x0004
+#define MV88E6XXX_PTP_TS_VALID 0x0001
+
+#ifdef CONFIG_NET_DSA_MV88E6XXX_PTP
+
+int mv88e6xxx_port_hwtstamp_set(struct dsa_switch *ds, int port,
+ struct ifreq *ifr);
+int mv88e6xxx_port_hwtstamp_get(struct dsa_switch *ds, int port,
+ struct ifreq *ifr);
+
+bool mv88e6xxx_port_rxtstamp(struct dsa_switch *ds, int port,
+ struct sk_buff *clone, unsigned int type);
+bool mv88e6xxx_port_txtstamp(struct dsa_switch *ds, int port,
+ struct sk_buff *clone, unsigned int type);
+
+int mv88e6xxx_get_ts_info(struct dsa_switch *ds, int port,
+ struct ethtool_ts_info *info);
+
+int mv88e6xxx_hwtstamp_setup(struct mv88e6xxx_chip *chip);
+void mv88e6xxx_hwtstamp_free(struct mv88e6xxx_chip *chip);
+
+#else /* !CONFIG_NET_DSA_MV88E6XXX_PTP */
+
+static inline int mv88e6xxx_port_hwtstamp_set(struct dsa_switch *ds,
+ int port, struct ifreq *ifr)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline int mv88e6xxx_port_hwtstamp_get(struct dsa_switch *ds,
+ int port, struct ifreq *ifr)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline bool mv88e6xxx_port_rxtstamp(struct dsa_switch *ds, int port,
+ struct sk_buff *clone,
+ unsigned int type)
+{
+ return false;
+}
+
+static inline bool mv88e6xxx_port_txtstamp(struct dsa_switch *ds, int port,
+ struct sk_buff *clone,
+ unsigned int type)
+{
+ return false;
+}
+
+static inline int mv88e6xxx_get_ts_info(struct dsa_switch *ds, int port,
+ struct ethtool_ts_info *info)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline int mv88e6xxx_hwtstamp_setup(struct mv88e6xxx_chip *chip)
+{
+ return 0;
+}
+
+static inline void mv88e6xxx_hwtstamp_free(struct mv88e6xxx_chip *chip)
+{
+}
+
+#endif /* CONFIG_NET_DSA_MV88E6XXX_PTP */
+
+#endif /* _MV88E6XXX_HWTSTAMP_H */
--- /dev/null
+/*
+ * Marvell 88E6xxx Switch PTP support
+ *
+ * Copyright (c) 2008 Marvell Semiconductor
+ *
+ * Copyright (c) 2017 National Instruments
+ * Erik Hons <erik.hons@ni.com>
+ * Brandon Streiff <brandon.streiff@ni.com>
+ * Dane Wagner <dane.wagner@ni.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include "chip.h"
+#include "global2.h"
+#include "ptp.h"
+
+/* Raw timestamps are in units of 8-ns clock periods. */
+#define CC_SHIFT 28
+#define CC_MULT (8 << CC_SHIFT)
+#define CC_MULT_NUM (1 << 9)
+#define CC_MULT_DEM 15625ULL
+
+#define TAI_EVENT_WORK_INTERVAL msecs_to_jiffies(100)
+
+#define cc_to_chip(cc) container_of(cc, struct mv88e6xxx_chip, tstamp_cc)
+#define dw_overflow_to_chip(dw) container_of(dw, struct mv88e6xxx_chip, \
+ overflow_work)
+#define dw_tai_event_to_chip(dw) container_of(dw, struct mv88e6xxx_chip, \
+ tai_event_work)
+
+static int mv88e6xxx_tai_read(struct mv88e6xxx_chip *chip, int addr,
+ u16 *data, int len)
+{
+ if (!chip->info->ops->avb_ops->tai_read)
+ return -EOPNOTSUPP;
+
+ return chip->info->ops->avb_ops->tai_read(chip, addr, data, len);
+}
+
+static int mv88e6xxx_tai_write(struct mv88e6xxx_chip *chip, int addr, u16 data)
+{
+ if (!chip->info->ops->avb_ops->tai_write)
+ return -EOPNOTSUPP;
+
+ return chip->info->ops->avb_ops->tai_write(chip, addr, data);
+}
+
+/* TODO: places where this are called should be using pinctrl */
+static int mv88e6xxx_set_gpio_func(struct mv88e6xxx_chip *chip, int pin,
+ int func, int input)
+{
+ int err;
+
+ if (!chip->info->ops->gpio_ops)
+ return -EOPNOTSUPP;
+
+ err = chip->info->ops->gpio_ops->set_dir(chip, pin, input);
+ if (err)
+ return err;
+
+ return chip->info->ops->gpio_ops->set_pctl(chip, pin, func);
+}
+
+static u64 mv88e6xxx_ptp_clock_read(const struct cyclecounter *cc)
+{
+ struct mv88e6xxx_chip *chip = cc_to_chip(cc);
+ u16 phc_time[2];
+ int err;
+
+ err = mv88e6xxx_tai_read(chip, MV88E6XXX_TAI_TIME_LO, phc_time,
+ ARRAY_SIZE(phc_time));
+ if (err)
+ return 0;
+ else
+ return ((u32)phc_time[1] << 16) | phc_time[0];
+}
+
+/* mv88e6xxx_config_eventcap - configure TAI event capture
+ * @event: PTP_CLOCK_PPS (internal) or PTP_CLOCK_EXTTS (external)
+ * @rising: zero for falling-edge trigger, else rising-edge trigger
+ *
+ * This will also reset the capture sequence counter.
+ */
+static int mv88e6xxx_config_eventcap(struct mv88e6xxx_chip *chip, int event,
+ int rising)
+{
+ u16 global_config;
+ u16 cap_config;
+ int err;
+
+ chip->evcap_config = MV88E6XXX_TAI_CFG_CAP_OVERWRITE |
+ MV88E6XXX_TAI_CFG_CAP_CTR_START;
+ if (!rising)
+ chip->evcap_config |= MV88E6XXX_TAI_CFG_EVREQ_FALLING;
+
+ global_config = (chip->evcap_config | chip->trig_config);
+ err = mv88e6xxx_tai_write(chip, MV88E6XXX_TAI_CFG, global_config);
+ if (err)
+ return err;
+
+ if (event == PTP_CLOCK_PPS) {
+ cap_config = MV88E6XXX_TAI_EVENT_STATUS_CAP_TRIG;
+ } else if (event == PTP_CLOCK_EXTTS) {
+ /* if STATUS_CAP_TRIG is unset we capture PTP_EVREQ events */
+ cap_config = 0;
+ } else {
+ return -EINVAL;
+ }
+
+ /* Write the capture config; this also clears the capture counter */
+ err = mv88e6xxx_tai_write(chip, MV88E6XXX_TAI_EVENT_STATUS,
+ cap_config);
+
+ return err;
+}
+
+static void mv88e6xxx_tai_event_work(struct work_struct *ugly)
+{
+ struct delayed_work *dw = to_delayed_work(ugly);
+ struct mv88e6xxx_chip *chip = dw_tai_event_to_chip(dw);
+ struct ptp_clock_event ev;
+ u16 status[4];
+ u32 raw_ts;
+ int err;
+
+ mutex_lock(&chip->reg_lock);
+ err = mv88e6xxx_tai_read(chip, MV88E6XXX_TAI_EVENT_STATUS,
+ status, ARRAY_SIZE(status));
+ mutex_unlock(&chip->reg_lock);
+
+ if (err) {
+ dev_err(chip->dev, "failed to read TAI status register\n");
+ return;
+ }
+ if (status[0] & MV88E6XXX_TAI_EVENT_STATUS_ERROR) {
+ dev_warn(chip->dev, "missed event capture\n");
+ return;
+ }
+ if (!(status[0] & MV88E6XXX_TAI_EVENT_STATUS_VALID))
+ goto out;
+
+ raw_ts = ((u32)status[2] << 16) | status[1];
+
+ /* Clear the valid bit so the next timestamp can come in */
+ status[0] &= ~MV88E6XXX_TAI_EVENT_STATUS_VALID;
+ mutex_lock(&chip->reg_lock);
+ err = mv88e6xxx_tai_write(chip, MV88E6XXX_TAI_EVENT_STATUS, status[0]);
+ mutex_unlock(&chip->reg_lock);
+
+ /* This is an external timestamp */
+ ev.type = PTP_CLOCK_EXTTS;
+
+ /* We only have one timestamping channel. */
+ ev.index = 0;
+ mutex_lock(&chip->reg_lock);
+ ev.timestamp = timecounter_cyc2time(&chip->tstamp_tc, raw_ts);
+ mutex_unlock(&chip->reg_lock);
+
+ ptp_clock_event(chip->ptp_clock, &ev);
+out:
+ schedule_delayed_work(&chip->tai_event_work, TAI_EVENT_WORK_INTERVAL);
+}
+
+static int mv88e6xxx_ptp_adjfine(struct ptp_clock_info *ptp, long scaled_ppm)
+{
+ struct mv88e6xxx_chip *chip = ptp_to_chip(ptp);
+ int neg_adj = 0;
+ u32 diff, mult;
+ u64 adj;
+
+ if (scaled_ppm < 0) {
+ neg_adj = 1;
+ scaled_ppm = -scaled_ppm;
+ }
+ mult = CC_MULT;
+ adj = CC_MULT_NUM;
+ adj *= scaled_ppm;
+ diff = div_u64(adj, CC_MULT_DEM);
+
+ mutex_lock(&chip->reg_lock);
+
+ timecounter_read(&chip->tstamp_tc);
+ chip->tstamp_cc.mult = neg_adj ? mult - diff : mult + diff;
+
+ mutex_unlock(&chip->reg_lock);
+
+ return 0;
+}
+
+static int mv88e6xxx_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta)
+{
+ struct mv88e6xxx_chip *chip = ptp_to_chip(ptp);
+
+ mutex_lock(&chip->reg_lock);
+ timecounter_adjtime(&chip->tstamp_tc, delta);
+ mutex_unlock(&chip->reg_lock);
+
+ return 0;
+}
+
+static int mv88e6xxx_ptp_gettime(struct ptp_clock_info *ptp,
+ struct timespec64 *ts)
+{
+ struct mv88e6xxx_chip *chip = ptp_to_chip(ptp);
+ u64 ns;
+
+ mutex_lock(&chip->reg_lock);
+ ns = timecounter_read(&chip->tstamp_tc);
+ mutex_unlock(&chip->reg_lock);
+
+ *ts = ns_to_timespec64(ns);
+
+ return 0;
+}
+
+static int mv88e6xxx_ptp_settime(struct ptp_clock_info *ptp,
+ const struct timespec64 *ts)
+{
+ struct mv88e6xxx_chip *chip = ptp_to_chip(ptp);
+ u64 ns;
+
+ ns = timespec64_to_ns(ts);
+
+ mutex_lock(&chip->reg_lock);
+ timecounter_init(&chip->tstamp_tc, &chip->tstamp_cc, ns);
+ mutex_unlock(&chip->reg_lock);
+
+ return 0;
+}
+
+static int mv88e6xxx_ptp_enable_extts(struct mv88e6xxx_chip *chip,
+ struct ptp_clock_request *rq, int on)
+{
+ int rising = (rq->extts.flags & PTP_RISING_EDGE);
+ int func;
+ int pin;
+ int err;
+
+ pin = ptp_find_pin(chip->ptp_clock, PTP_PF_EXTTS, rq->extts.index);
+
+ if (pin < 0)
+ return -EBUSY;
+
+ mutex_lock(&chip->reg_lock);
+
+ if (on) {
+ func = MV88E6352_G2_SCRATCH_GPIO_PCTL_EVREQ;
+
+ err = mv88e6xxx_set_gpio_func(chip, pin, func, true);
+ if (err)
+ goto out;
+
+ schedule_delayed_work(&chip->tai_event_work,
+ TAI_EVENT_WORK_INTERVAL);
+
+ err = mv88e6xxx_config_eventcap(chip, PTP_CLOCK_EXTTS, rising);
+ } else {
+ func = MV88E6352_G2_SCRATCH_GPIO_PCTL_GPIO;
+
+ err = mv88e6xxx_set_gpio_func(chip, pin, func, true);
+
+ cancel_delayed_work_sync(&chip->tai_event_work);
+ }
+
+out:
+ mutex_unlock(&chip->reg_lock);
+
+ return err;
+}
+
+static int mv88e6xxx_ptp_enable(struct ptp_clock_info *ptp,
+ struct ptp_clock_request *rq, int on)
+{
+ struct mv88e6xxx_chip *chip = ptp_to_chip(ptp);
+
+ switch (rq->type) {
+ case PTP_CLK_REQ_EXTTS:
+ return mv88e6xxx_ptp_enable_extts(chip, rq, on);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static int mv88e6xxx_ptp_verify(struct ptp_clock_info *ptp, unsigned int pin,
+ enum ptp_pin_function func, unsigned int chan)
+{
+ switch (func) {
+ case PTP_PF_NONE:
+ case PTP_PF_EXTTS:
+ break;
+ case PTP_PF_PEROUT:
+ case PTP_PF_PHYSYNC:
+ return -EOPNOTSUPP;
+ }
+ return 0;
+}
+
+/* With a 125MHz input clock, the 32-bit timestamp counter overflows in ~34.3
+ * seconds; this task forces periodic reads so that we don't miss any.
+ */
+#define MV88E6XXX_TAI_OVERFLOW_PERIOD (HZ * 16)
+static void mv88e6xxx_ptp_overflow_check(struct work_struct *work)
+{
+ struct delayed_work *dw = to_delayed_work(work);
+ struct mv88e6xxx_chip *chip = dw_overflow_to_chip(dw);
+ struct timespec64 ts;
+
+ mv88e6xxx_ptp_gettime(&chip->ptp_clock_info, &ts);
+
+ schedule_delayed_work(&chip->overflow_work,
+ MV88E6XXX_TAI_OVERFLOW_PERIOD);
+}
+
+int mv88e6xxx_ptp_setup(struct mv88e6xxx_chip *chip)
+{
+ int i;
+
+ /* Set up the cycle counter */
+ memset(&chip->tstamp_cc, 0, sizeof(chip->tstamp_cc));
+ chip->tstamp_cc.read = mv88e6xxx_ptp_clock_read;
+ chip->tstamp_cc.mask = CYCLECOUNTER_MASK(32);
+ chip->tstamp_cc.mult = CC_MULT;
+ chip->tstamp_cc.shift = CC_SHIFT;
+
+ timecounter_init(&chip->tstamp_tc, &chip->tstamp_cc,
+ ktime_to_ns(ktime_get_real()));
+
+ INIT_DELAYED_WORK(&chip->overflow_work, mv88e6xxx_ptp_overflow_check);
+ INIT_DELAYED_WORK(&chip->tai_event_work, mv88e6xxx_tai_event_work);
+
+ chip->ptp_clock_info.owner = THIS_MODULE;
+ snprintf(chip->ptp_clock_info.name, sizeof(chip->ptp_clock_info.name),
+ dev_name(chip->dev));
+ chip->ptp_clock_info.max_adj = 1000000;
+
+ chip->ptp_clock_info.n_ext_ts = 1;
+ chip->ptp_clock_info.n_per_out = 0;
+ chip->ptp_clock_info.n_pins = mv88e6xxx_num_gpio(chip);
+ chip->ptp_clock_info.pps = 0;
+
+ for (i = 0; i < chip->ptp_clock_info.n_pins; ++i) {
+ struct ptp_pin_desc *ppd = &chip->pin_config[i];
+
+ snprintf(ppd->name, sizeof(ppd->name), "mv88e6xxx_gpio%d", i);
+ ppd->index = i;
+ ppd->func = PTP_PF_NONE;
+ }
+ chip->ptp_clock_info.pin_config = chip->pin_config;
+
+ chip->ptp_clock_info.adjfine = mv88e6xxx_ptp_adjfine;
+ chip->ptp_clock_info.adjtime = mv88e6xxx_ptp_adjtime;
+ chip->ptp_clock_info.gettime64 = mv88e6xxx_ptp_gettime;
+ chip->ptp_clock_info.settime64 = mv88e6xxx_ptp_settime;
+ chip->ptp_clock_info.enable = mv88e6xxx_ptp_enable;
+ chip->ptp_clock_info.verify = mv88e6xxx_ptp_verify;
+ chip->ptp_clock_info.do_aux_work = mv88e6xxx_hwtstamp_work;
+
+ chip->ptp_clock = ptp_clock_register(&chip->ptp_clock_info, chip->dev);
+ if (IS_ERR(chip->ptp_clock))
+ return PTR_ERR(chip->ptp_clock);
+
+ schedule_delayed_work(&chip->overflow_work,
+ MV88E6XXX_TAI_OVERFLOW_PERIOD);
+
+ return 0;
+}
+
+void mv88e6xxx_ptp_free(struct mv88e6xxx_chip *chip)
+{
+ if (chip->ptp_clock) {
+ cancel_delayed_work_sync(&chip->overflow_work);
+ cancel_delayed_work_sync(&chip->tai_event_work);
+
+ ptp_clock_unregister(chip->ptp_clock);
+ chip->ptp_clock = NULL;
+ }
+}
--- /dev/null
+/*
+ * Marvell 88E6xxx Switch PTP support
+ *
+ * Copyright (c) 2008 Marvell Semiconductor
+ *
+ * Copyright (c) 2017 National Instruments
+ * Erik Hons <erik.hons@ni.com>
+ * Brandon Streiff <brandon.streiff@ni.com>
+ * Dane Wagner <dane.wagner@ni.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#ifndef _MV88E6XXX_PTP_H
+#define _MV88E6XXX_PTP_H
+
+#include "chip.h"
+
+/* Offset 0x00: TAI Global Config */
+#define MV88E6XXX_TAI_CFG 0x00
+#define MV88E6XXX_TAI_CFG_CAP_OVERWRITE 0x8000
+#define MV88E6XXX_TAI_CFG_CAP_CTR_START 0x4000
+#define MV88E6XXX_TAI_CFG_EVREQ_FALLING 0x2000
+#define MV88E6XXX_TAI_CFG_TRIG_ACTIVE_LO 0x1000
+#define MV88E6XXX_TAI_CFG_IRL_ENABLE 0x0400
+#define MV88E6XXX_TAI_CFG_TRIG_IRQ_EN 0x0200
+#define MV88E6XXX_TAI_CFG_EVREQ_IRQ_EN 0x0100
+#define MV88E6XXX_TAI_CFG_TRIG_LOCK 0x0080
+#define MV88E6XXX_TAI_CFG_BLOCK_UPDATE 0x0008
+#define MV88E6XXX_TAI_CFG_MULTI_PTP 0x0004
+#define MV88E6XXX_TAI_CFG_TRIG_MODE_ONESHOT 0x0002
+#define MV88E6XXX_TAI_CFG_TRIG_ENABLE 0x0001
+
+/* Offset 0x01: Timestamp Clock Period (ps) */
+#define MV88E6XXX_TAI_CLOCK_PERIOD 0x01
+
+/* Offset 0x02/0x03: Trigger Generation Amount */
+#define MV88E6XXX_TAI_TRIG_GEN_AMOUNT_LO 0x02
+#define MV88E6XXX_TAI_TRIG_GEN_AMOUNT_HI 0x03
+
+/* Offset 0x04: Clock Compensation */
+#define MV88E6XXX_TAI_TRIG_CLOCK_COMP 0x04
+
+/* Offset 0x05: Trigger Configuration */
+#define MV88E6XXX_TAI_TRIG_CFG 0x05
+
+/* Offset 0x06: Ingress Rate Limiter Clock Generation Amount */
+#define MV88E6XXX_TAI_IRL_AMOUNT 0x06
+
+/* Offset 0x07: Ingress Rate Limiter Compensation */
+#define MV88E6XXX_TAI_IRL_COMP 0x07
+
+/* Offset 0x08: Ingress Rate Limiter Compensation */
+#define MV88E6XXX_TAI_IRL_COMP_PS 0x08
+
+/* Offset 0x09: Event Status */
+#define MV88E6XXX_TAI_EVENT_STATUS 0x09
+#define MV88E6XXX_TAI_EVENT_STATUS_CAP_TRIG 0x4000
+#define MV88E6XXX_TAI_EVENT_STATUS_ERROR 0x0200
+#define MV88E6XXX_TAI_EVENT_STATUS_VALID 0x0100
+#define MV88E6XXX_TAI_EVENT_STATUS_CTR_MASK 0x00ff
+
+/* Offset 0x0A/0x0B: Event Time */
+#define MV88E6XXX_TAI_EVENT_TIME_LO 0x0a
+#define MV88E6XXX_TAI_EVENT_TYPE_HI 0x0b
+
+/* Offset 0x0E/0x0F: PTP Global Time */
+#define MV88E6XXX_TAI_TIME_LO 0x0e
+#define MV88E6XXX_TAI_TIME_HI 0x0f
+
+/* Offset 0x10/0x11: Trig Generation Time */
+#define MV88E6XXX_TAI_TRIG_TIME_LO 0x10
+#define MV88E6XXX_TAI_TRIG_TIME_HI 0x11
+
+/* Offset 0x12: Lock Status */
+#define MV88E6XXX_TAI_LOCK_STATUS 0x12
+
+#ifdef CONFIG_NET_DSA_MV88E6XXX_PTP
+
+long mv88e6xxx_hwtstamp_work(struct ptp_clock_info *ptp);
+int mv88e6xxx_ptp_setup(struct mv88e6xxx_chip *chip);
+void mv88e6xxx_ptp_free(struct mv88e6xxx_chip *chip);
+
+#define ptp_to_chip(ptp) container_of(ptp, struct mv88e6xxx_chip, \
+ ptp_clock_info)
+
+#else /* !CONFIG_NET_DSA_MV88E6XXX_PTP */
+
+static inline long mv88e6xxx_hwtstamp_work(struct ptp_clock_info *ptp)
+{
+ return -1;
+}
+
+static inline int mv88e6xxx_ptp_setup(struct mv88e6xxx_chip *chip)
+{
+ return 0;
+}
+
+static inline void mv88e6xxx_ptp_free(struct mv88e6xxx_chip *chip)
+{
+}
+
+#endif /* CONFIG_NET_DSA_MV88E6XXX_PTP */
+
+#endif /* _MV88E6XXX_PTP_H */
return err;
}
-int mv88e6352_serdes_power(struct mv88e6xxx_chip *chip, int port, bool on)
+static bool mv88e6352_port_has_serdes(struct mv88e6xxx_chip *chip, int port)
{
- int err;
u8 cmode;
+ int err;
err = mv88e6xxx_port_get_cmode(chip, port, &cmode);
- if (err)
- return err;
+ if (err) {
+ dev_err(chip->dev, "failed to read cmode\n");
+ return 0;
+ }
if ((cmode == MV88E6XXX_PORT_STS_CMODE_100BASE_X) ||
(cmode == MV88E6XXX_PORT_STS_CMODE_1000BASE_X) ||
- (cmode == MV88E6XXX_PORT_STS_CMODE_SGMII)) {
+ (cmode == MV88E6XXX_PORT_STS_CMODE_SGMII))
+ return 1;
+
+ return 0;
+}
+
+int mv88e6352_serdes_power(struct mv88e6xxx_chip *chip, int port, bool on)
+{
+ int err;
+
+ if (mv88e6352_port_has_serdes(chip, port)) {
err = mv88e6352_serdes_power_set(chip, on);
if (err < 0)
return err;
return 0;
}
+struct mv88e6352_serdes_hw_stat {
+ char string[ETH_GSTRING_LEN];
+ int sizeof_stat;
+ int reg;
+};
+
+static struct mv88e6352_serdes_hw_stat mv88e6352_serdes_hw_stats[] = {
+ { "serdes_fibre_rx_error", 16, 21 },
+ { "serdes_PRBS_error", 32, 24 },
+};
+
+int mv88e6352_serdes_get_sset_count(struct mv88e6xxx_chip *chip, int port)
+{
+ if (mv88e6352_port_has_serdes(chip, port))
+ return ARRAY_SIZE(mv88e6352_serdes_hw_stats);
+
+ return 0;
+}
+
+void mv88e6352_serdes_get_strings(struct mv88e6xxx_chip *chip,
+ int port, uint8_t *data)
+{
+ struct mv88e6352_serdes_hw_stat *stat;
+ int i;
+
+ if (!mv88e6352_port_has_serdes(chip, port))
+ return;
+
+ for (i = 0; i < ARRAY_SIZE(mv88e6352_serdes_hw_stats); i++) {
+ stat = &mv88e6352_serdes_hw_stats[i];
+ memcpy(data + i * ETH_GSTRING_LEN, stat->string,
+ ETH_GSTRING_LEN);
+ }
+}
+
+static uint64_t mv88e6352_serdes_get_stat(struct mv88e6xxx_chip *chip,
+ struct mv88e6352_serdes_hw_stat *stat)
+{
+ u64 val = 0;
+ u16 reg;
+ int err;
+
+ err = mv88e6352_serdes_read(chip, stat->reg, ®);
+ if (err) {
+ dev_err(chip->dev, "failed to read statistic\n");
+ return 0;
+ }
+
+ val = reg;
+
+ if (stat->sizeof_stat == 32) {
+ err = mv88e6352_serdes_read(chip, stat->reg + 1, ®);
+ if (err) {
+ dev_err(chip->dev, "failed to read statistic\n");
+ return 0;
+ }
+ val = val << 16 | reg;
+ }
+
+ return val;
+}
+
+void mv88e6352_serdes_get_stats(struct mv88e6xxx_chip *chip, int port,
+ uint64_t *data)
+{
+ struct mv88e6xxx_port *mv88e6xxx_port = &chip->ports[port];
+ struct mv88e6352_serdes_hw_stat *stat;
+ u64 value;
+ int i;
+
+ if (!mv88e6352_port_has_serdes(chip, port))
+ return;
+
+ BUILD_BUG_ON(ARRAY_SIZE(mv88e6352_serdes_hw_stats) >
+ ARRAY_SIZE(mv88e6xxx_port->serdes_stats));
+
+ for (i = 0; i < ARRAY_SIZE(mv88e6352_serdes_hw_stats); i++) {
+ stat = &mv88e6352_serdes_hw_stats[i];
+ value = mv88e6352_serdes_get_stat(chip, stat);
+ mv88e6xxx_port->serdes_stats[i] += value;
+ data[i] = mv88e6xxx_port->serdes_stats[i];
+ }
+}
+
/* Set the power on/off for 10GBASE-R and 10GBASE-X4/X2 */
static int mv88e6390_serdes_10g(struct mv88e6xxx_chip *chip, int addr, bool on)
{
int mv88e6352_serdes_power(struct mv88e6xxx_chip *chip, int port, bool on);
int mv88e6390_serdes_power(struct mv88e6xxx_chip *chip, int port, bool on);
-
+int mv88e6352_serdes_get_sset_count(struct mv88e6xxx_chip *chip, int port);
+void mv88e6352_serdes_get_strings(struct mv88e6xxx_chip *chip,
+ int port, uint8_t *data);
+void mv88e6352_serdes_get_stats(struct mv88e6xxx_chip *chip, int port,
+ uint64_t *data);
#endif
}
static int
-qca8k_get_sset_count(struct dsa_switch *ds)
+qca8k_get_sset_count(struct dsa_switch *ds, int port)
{
return ARRAY_SIZE(ar8327_mib);
}
obj-$(CONFIG_APNE) += apne.o 8390.o
obj-$(CONFIG_ARM_ETHERH) += etherh.o
obj-$(CONFIG_AX88796) += ax88796.o
-obj-$(CONFIG_HYDRA) += hydra.o 8390.o
-obj-$(CONFIG_MCF8390) += mcf8390.o 8390.o
+obj-$(CONFIG_HYDRA) += hydra.o
+obj-$(CONFIG_MCF8390) += mcf8390.o
obj-$(CONFIG_NE2000) += ne.o 8390p.o
obj-$(CONFIG_NE2K_PCI) += ne2k-pci.o 8390.o
obj-$(CONFIG_PCMCIA_AXNET) += axnet_cs.o 8390.o
obj-$(CONFIG_STNIC) += stnic.o 8390.o
obj-$(CONFIG_ULTRA) += smc-ultra.o 8390.o
obj-$(CONFIG_WD80x3) += wd.o 8390.o
-obj-$(CONFIG_ZORRO8390) += zorro8390.o 8390.o
+obj-$(CONFIG_ZORRO8390) += zorro8390.o
#define AX_GPOC_PPDSET BIT(6)
-static u32 ax_msg_enable;
-
/* device private data */
struct ax_device {
ei_local->block_output = &ax_block_output;
ei_local->get_8390_hdr = &ax_get_8390_hdr;
ei_local->priv = 0;
- ei_local->msg_enable = ax_msg_enable;
dev->netdev_ops = &ax_netdev_ops;
dev->ethtool_ops = &ax_ethtool_ops;
static int ax_open(struct net_device *dev);
static int ax_close(struct net_device *dev);
static irqreturn_t ax_interrupt(int irq, void *dev_id);
-static u32 axnet_msg_enable;
/*====================================================================*/
return -ENOMEM;
ei_local = netdev_priv(dev);
- ei_local->msg_enable = axnet_msg_enable;
spin_lock_init(&ei_local->page_lock);
info = PRIV(dev);
#include "lib8390.c"
-static u32 etherh_msg_enable;
-
struct etherh_priv {
void __iomem *ioc_fast;
void __iomem *memc;
return 0;
}
-/*
- * Initialisation
- */
-
-static void __init etherh_banner(void)
-{
- static int version_printed;
-
- if ((etherh_msg_enable & NETIF_MSG_DRV) && (version_printed++ == 0))
- pr_info("%s", version);
-}
-
/*
* Read the ethernet address string from the on board rom.
* This is an ascii string...
struct etherh_priv *eh;
int ret;
- etherh_banner();
-
ret = ecard_request_resources(ec);
if (ret)
goto out;
ei_local->block_output = etherh_block_output;
ei_local->get_8390_hdr = etherh_get_header;
ei_local->interface_num = 0;
- ei_local->msg_enable = etherh_msg_enable;
etherh_reset(dev);
__NS8390_init(dev, 0);
static void hydra_block_output(struct net_device *dev, int count,
const unsigned char *buf, int start_page);
static void hydra_remove_one(struct zorro_dev *z);
-static u32 hydra_msg_enable;
static struct zorro_device_id hydra_zorro_tbl[] = {
{ ZORRO_PROD_HYDRA_SYSTEMS_AMIGANET },
int start_page, stop_page;
int j;
int err;
- struct ei_device *ei_local;
static u32 hydra_offsets[16] = {
0x00, 0x02, 0x04, 0x06, 0x08, 0x0a, 0x0c, 0x0e,
start_page = NESM_START_PG;
stop_page = NESM_STOP_PG;
- ei_local = netdev_priv(dev);
- ei_local->msg_enable = hydra_msg_enable;
dev->base_addr = ioaddr;
dev->irq = IRQ_AMIGA_PORTS;
ether_setup(dev);
spin_lock_init(&ei_local->page_lock);
+
+ ei_local->msg_enable = msg_enable;
}
/**
};
extern int mac8390_memtest(struct net_device *dev);
-static int mac8390_initdev(struct net_device *dev,
- struct nubus_rsrc *ndev,
+static int mac8390_initdev(struct net_device *dev, struct nubus_board *board,
enum mac8390_type type);
static int mac8390_open(struct net_device *dev);
const unsigned char *buf, int start_page);
static void word_memcpy_tocard(unsigned long tp, const void *fp, int count);
static void word_memcpy_fromcard(void *tp, unsigned long fp, int count);
-static u32 mac8390_msg_enable;
-static enum mac8390_type __init mac8390_ident(struct nubus_rsrc *fres)
+static enum mac8390_type mac8390_ident(struct nubus_rsrc *fres)
{
switch (fres->dr_sw) {
case NUBUS_DRSW_3COM:
return MAC8390_NONE;
}
-static enum mac8390_access __init mac8390_testio(volatile unsigned long membase)
+static enum mac8390_access mac8390_testio(unsigned long membase)
{
unsigned long outdata = 0xA5A0B5B0;
unsigned long indata = 0x00000000;
return ACCESS_UNKNOWN;
}
-static int __init mac8390_memsize(unsigned long membase)
+static int mac8390_memsize(unsigned long membase)
{
unsigned long flags;
int i, j;
return i * 0x1000;
}
-static bool __init mac8390_init(struct net_device *dev,
- struct nubus_rsrc *ndev,
- enum mac8390_type cardtype)
+static bool mac8390_rsrc_init(struct net_device *dev,
+ struct nubus_rsrc *fres,
+ enum mac8390_type cardtype)
{
+ struct nubus_board *board = fres->board;
struct nubus_dir dir;
struct nubus_dirent ent;
int offset;
volatile unsigned short *i;
- printk_once(KERN_INFO pr_fmt("%s"), version);
-
- dev->irq = SLOT2IRQ(ndev->board->slot);
+ dev->irq = SLOT2IRQ(board->slot);
/* This is getting to be a habit */
- dev->base_addr = (ndev->board->slot_addr |
- ((ndev->board->slot & 0xf) << 20));
+ dev->base_addr = board->slot_addr | ((board->slot & 0xf) << 20);
/*
* Get some Nubus info - we will trust the card's idea
* of where its memory and registers are.
*/
- if (nubus_get_func_dir(ndev, &dir) == -1) {
- pr_err("%s: Unable to get Nubus functional directory for slot %X!\n",
- dev->name, ndev->board->slot);
+ if (nubus_get_func_dir(fres, &dir) == -1) {
+ dev_err(&board->dev,
+ "Unable to get Nubus functional directory\n");
return false;
}
/* Get the MAC address */
if (nubus_find_rsrc(&dir, NUBUS_RESID_MAC_ADDRESS, &ent) == -1) {
- pr_info("%s: Couldn't get MAC address!\n", dev->name);
+ dev_info(&board->dev, "MAC address resource not found\n");
return false;
}
nubus_rewinddir(&dir);
if (nubus_find_rsrc(&dir, NUBUS_RESID_MINOR_BASEOS,
&ent) == -1) {
- pr_err("%s: Memory offset resource for slot %X not found!\n",
- dev->name, ndev->board->slot);
+ dev_err(&board->dev,
+ "Memory offset resource not found\n");
return false;
}
nubus_get_rsrc_mem(&offset, &ent, 4);
nubus_rewinddir(&dir);
if (nubus_find_rsrc(&dir, NUBUS_RESID_MINOR_LENGTH,
&ent) == -1) {
- pr_info("%s: Memory length resource for slot %X not found, probing\n",
- dev->name, ndev->board->slot);
+ dev_info(&board->dev,
+ "Memory length resource not found, probing\n");
offset = mac8390_memsize(dev->mem_start);
} else {
nubus_get_rsrc_mem(&offset, &ent, 4);
switch (cardtype) {
case MAC8390_KINETICS:
case MAC8390_DAYNA: /* it's the same */
- dev->base_addr = (int)(ndev->board->slot_addr +
+ dev->base_addr = (int)(board->slot_addr +
DAYNA_8390_BASE);
- dev->mem_start = (int)(ndev->board->slot_addr +
+ dev->mem_start = (int)(board->slot_addr +
DAYNA_8390_MEM);
dev->mem_end = dev->mem_start +
mac8390_memsize(dev->mem_start);
break;
case MAC8390_INTERLAN:
- dev->base_addr = (int)(ndev->board->slot_addr +
+ dev->base_addr = (int)(board->slot_addr +
INTERLAN_8390_BASE);
- dev->mem_start = (int)(ndev->board->slot_addr +
+ dev->mem_start = (int)(board->slot_addr +
INTERLAN_8390_MEM);
dev->mem_end = dev->mem_start +
mac8390_memsize(dev->mem_start);
break;
case MAC8390_CABLETRON:
- dev->base_addr = (int)(ndev->board->slot_addr +
+ dev->base_addr = (int)(board->slot_addr +
CABLETRON_8390_BASE);
- dev->mem_start = (int)(ndev->board->slot_addr +
+ dev->mem_start = (int)(board->slot_addr +
CABLETRON_8390_MEM);
/* The base address is unreadable if 0x00
* has been written to the command register
break;
default:
- pr_err("Card type %s is unsupported, sorry\n",
- ndev->board->name);
+ dev_err(&board->dev,
+ "No known base address for card type\n");
return false;
}
}
return true;
}
-struct net_device * __init mac8390_probe(int unit)
+static int mac8390_device_probe(struct nubus_board *board)
{
struct net_device *dev;
- struct nubus_rsrc *ndev = NULL;
int err = -ENODEV;
- struct ei_device *ei_local;
-
- static unsigned int slots;
-
- enum mac8390_type cardtype;
-
- /* probably should check for Nubus instead */
-
- if (!MACH_IS_MAC)
- return ERR_PTR(-ENODEV);
+ struct nubus_rsrc *fres;
+ enum mac8390_type cardtype = MAC8390_NONE;
dev = ____alloc_ei_netdev(0);
if (!dev)
- return ERR_PTR(-ENOMEM);
-
- if (unit >= 0)
- sprintf(dev->name, "eth%d", unit);
+ return -ENOMEM;
- for_each_func_rsrc(ndev) {
- if (ndev->category != NUBUS_CAT_NETWORK ||
- ndev->type != NUBUS_TYPE_ETHERNET)
- continue;
+ SET_NETDEV_DEV(dev, &board->dev);
- /* Have we seen it already? */
- if (slots & (1 << ndev->board->slot))
+ for_each_board_func_rsrc(board, fres) {
+ if (fres->category != NUBUS_CAT_NETWORK ||
+ fres->type != NUBUS_TYPE_ETHERNET)
continue;
- slots |= 1 << ndev->board->slot;
- cardtype = mac8390_ident(ndev);
+ cardtype = mac8390_ident(fres);
if (cardtype == MAC8390_NONE)
continue;
- if (!mac8390_init(dev, ndev, cardtype))
- continue;
-
- /* Do the nasty 8390 stuff */
- if (!mac8390_initdev(dev, ndev, cardtype))
+ if (mac8390_rsrc_init(dev, fres, cardtype))
break;
}
-
- if (!ndev)
+ if (!fres)
goto out;
- ei_local = netdev_priv(dev);
- ei_local->msg_enable = mac8390_msg_enable;
+ err = mac8390_initdev(dev, board, cardtype);
+ if (err)
+ goto out;
err = register_netdev(dev);
if (err)
goto out;
- return dev;
+
+ nubus_set_drvdata(board, dev);
+ return 0;
out:
free_netdev(dev);
- return ERR_PTR(err);
+ return err;
+}
+
+static int mac8390_device_remove(struct nubus_board *board)
+{
+ struct net_device *dev = nubus_get_drvdata(board);
+
+ unregister_netdev(dev);
+ free_netdev(dev);
+ return 0;
}
-#ifdef MODULE
+static struct nubus_driver mac8390_driver = {
+ .probe = mac8390_device_probe,
+ .remove = mac8390_device_remove,
+ .driver = {
+ .name = KBUILD_MODNAME,
+ .owner = THIS_MODULE,
+ }
+};
+
MODULE_AUTHOR("David Huggins-Daines <dhd@debian.org> and others");
MODULE_DESCRIPTION("Macintosh NS8390-based Nubus Ethernet driver");
MODULE_LICENSE("GPL");
-static struct net_device *dev_mac8390;
-
-int __init init_module(void)
+static int __init mac8390_init(void)
{
- dev_mac8390 = mac8390_probe(-1);
- if (IS_ERR(dev_mac8390)) {
- pr_warn("mac8390: No card found\n");
- return PTR_ERR(dev_mac8390);
- }
- return 0;
+ return nubus_driver_register(&mac8390_driver);
}
+module_init(mac8390_init);
-void __exit cleanup_module(void)
+static void __exit mac8390_exit(void)
{
- unregister_netdev(dev_mac8390);
- free_netdev(dev_mac8390);
+ nubus_driver_unregister(&mac8390_driver);
}
-
-#endif /* MODULE */
+module_exit(mac8390_exit);
static const struct net_device_ops mac8390_netdev_ops = {
.ndo_open = mac8390_open,
#endif
};
-static int __init mac8390_initdev(struct net_device *dev,
- struct nubus_rsrc *ndev,
- enum mac8390_type type)
+static int mac8390_initdev(struct net_device *dev, struct nubus_board *board,
+ enum mac8390_type type)
{
static u32 fwrd4_offsets[16] = {
0, 4, 8, 12,
case MAC8390_APPLE:
switch (mac8390_testio(dev->mem_start)) {
case ACCESS_UNKNOWN:
- pr_err("Don't know how to access card memory!\n");
+ dev_err(&board->dev,
+ "Don't know how to access card memory\n");
return -ENODEV;
case ACCESS_16:
break;
default:
- pr_err("Card type %s is unsupported, sorry\n",
- ndev->board->name);
+ dev_err(&board->dev, "Unsupported card type\n");
return -ENODEV;
}
__NS8390_init(dev, 0);
/* Good, done, now spit out some messages */
- pr_info("%s: %s in slot %X (type %s)\n",
- dev->name, ndev->board->name, ndev->board->slot,
- cardname[type]);
- pr_info("MAC %pM IRQ %d, %d KB shared memory at %#lx, %d-bit access.\n",
- dev->dev_addr, dev->irq,
- (unsigned int)(dev->mem_end - dev->mem_start) >> 10,
- dev->mem_start, access_bitmode ? 32 : 16);
+ dev_info(&board->dev, "%s (type %s)\n", board->name, cardname[type]);
+ dev_info(&board->dev, "MAC %pM, IRQ %d, %d KB shared memory at %#lx, %d-bit access.\n",
+ dev->dev_addr, dev->irq,
+ (unsigned int)(dev->mem_end - dev->mem_start) >> 10,
+ dev->mem_start, access_bitmode ? 32 : 16);
return 0;
}
#define NESM_START_PG 0x40 /* First page of TX buffer */
#define NESM_STOP_PG 0x80 /* Last page +1 of RX ring */
-static u32 mcf8390_msg_enable;
#ifdef NE2000_ODDOFFSET
/*
static int mcf8390_probe(struct platform_device *pdev)
{
struct net_device *dev;
- struct ei_device *ei_local;
struct resource *mem, *irq;
resource_size_t msize;
int ret;
SET_NETDEV_DEV(dev, &pdev->dev);
platform_set_drvdata(pdev, dev);
- ei_local = netdev_priv(dev);
- ei_local->msg_enable = mcf8390_msg_enable;
dev->irq = irq->start;
dev->base_addr = mem->start;
mdelay(10); /* wait 10ms for interrupt to propagate */
outb_p(0x00, ioaddr + EN0_IMR); /* Mask it again. */
dev->irq = probe_irq_off(cookie);
- if (netif_msg_probe(ei_local))
+ if (ne_msg_enable & NETIF_MSG_PROBE)
pr_cont(" autoirq is %d", dev->irq);
} else if (dev->irq == 2)
/* Fixup for users that don't know that IRQ 2 is really IRQ 9,
#define PCNET_RDC_TIMEOUT (2*HZ/100) /* Max wait in jiffies for Tx RDC */
static const char *if_names[] = { "auto", "10baseT", "10base2"};
-static u32 pcnet_msg_enable;
/*====================================================================*/
int start_pg, stop_pg, cm_offset;
int has_shmem = 0;
struct hw_info *local_hw_info;
- struct ei_device *ei_local;
dev_dbg(&link->dev, "pcnet_config\n");
mii_phy_probe(dev);
SET_NETDEV_DEV(dev, &link->dev);
- ei_local = netdev_priv(dev);
- ei_local->msg_enable = pcnet_msg_enable;
if (register_netdev(dev) != 0) {
pr_notice("register_netdev() failed\n");
outb_p(0x00, nic_addr+EN0_IMR); /* Mask all intrs. again. */
- if (netif_msg_drv(ei_local))
+ if (wd_msg_enable & NETIF_MSG_PROBE)
pr_cont(" autoirq is %d", dev->irq);
if (dev->irq < 2)
dev->irq = word16 ? 10 : 5;
static const char version[] =
"8390.c:v1.10cvs 9/23/94 Donald Becker (becker@cesdis.gsfc.nasa.gov)\n";
-static u32 zorro8390_msg_enable;
-
#include "lib8390.c"
#define DRV_NAME "zorro8390"
int err;
unsigned char SA_prom[32];
int start_page, stop_page;
- struct ei_device *ei_local = netdev_priv(dev);
static u32 zorro8390_offsets[16] = {
0x00, 0x02, 0x04, 0x06, 0x08, 0x0a, 0x0c, 0x0e,
0x10, 0x12, 0x14, 0x16, 0x18, 0x1a, 0x1c, 0x1e,
dev->netdev_ops = &zorro8390_netdev_ops;
__NS8390_init(dev, 0);
- ei_local->msg_enable = zorro8390_msg_enable;
-
err = register_netdev(dev);
if (err) {
free_irq(IRQ_AMIGA_PORTS, dev);
static int amd8111e_tx(struct net_device *dev)
{
struct amd8111e_priv *lp = netdev_priv(dev);
- int tx_index = lp->tx_complete_idx & TX_RING_DR_MOD_MASK;
+ int tx_index;
int status;
/* Complete all the transmit packet */
while (lp->tx_complete_idx != lp->tx_idx){
struct net_device *netdev = pdata->netdev;
int ret = 0;
+ XP_IOWRITE(pdata, XP_INT_EN, 0x1fffff);
+
pdata->lpm_ctrl &= ~MDIO_CTRL1_LPOWER;
XMDIO_WRITE(pdata, MDIO_MMD_PCS, MDIO_CTRL1, pdata->lpm_ctrl);
dev->netdev_ops = &mace_netdev_ops;
dev->watchdog_timeo = TX_TIMEOUT;
- printk(KERN_INFO "%s: 68K MACE, hardware address %pM\n",
- dev->name, dev->dev_addr);
+ pr_info("Onboard MACE, hardware address %pM, chip revision 0x%04X\n",
+ dev->dev_addr, mp->chipid);
err = register_netdev(dev);
if (!err)
else if (fs & (UFLO|LCOL|RTRY)) {
++dev->stats.tx_aborted_errors;
if (mb->xmtfs & UFLO) {
- printk(KERN_ERR "%s: DMA underrun.\n", dev->name);
dev->stats.tx_fifo_errors++;
mace_txdma_reset(dev);
}
if (frame_status & (RS_OFLO | RS_CLSN | RS_FRAMERR | RS_FCSERR)) {
dev->stats.rx_errors++;
- if (frame_status & RS_OFLO) {
- printk(KERN_DEBUG "%s: fifo overflow.\n", dev->name);
+ if (frame_status & RS_OFLO)
dev->stats.rx_fifo_errors++;
- }
if (frame_status & RS_CLSN)
dev->stats.collisions++;
if (frame_status & RS_FRAMERR)
},
};
-static int __init mac_mace_init_module(void)
-{
- if (!MACH_IS_MAC)
- return -ENODEV;
-
- return platform_driver_register(&mac_mace_driver);
-}
-
-static void __exit mac_mace_cleanup_module(void)
-{
- platform_driver_unregister(&mac_mace_driver);
-}
-
-module_init(mac_mace_init_module);
-module_exit(mac_mace_cleanup_module);
+module_platform_driver(mac_mace_driver);
goto err_ioremap;
self->aq_hw = kzalloc(sizeof(*self->aq_hw), GFP_KERNEL);
+ if (!self->aq_hw) {
+ err = -ENOMEM;
+ goto err_ioremap;
+ }
self->aq_hw->aq_nic_cfg = aq_nic_get_cfg(self);
for (bar = 0; bar < 4; ++bar) {
mmio_pa = pci_resource_start(pdev, bar);
if (mmio_pa == 0U) {
err = -EIO;
- goto err_ioremap;
+ goto err_free_aq_hw;
}
reg_sz = pci_resource_len(pdev, bar);
if ((reg_sz <= 24 /*ATL_REGS_SIZE*/)) {
err = -EIO;
- goto err_ioremap;
+ goto err_free_aq_hw;
}
self->aq_hw->mmio = ioremap_nocache(mmio_pa, reg_sz);
if (!self->aq_hw->mmio) {
err = -EIO;
- goto err_ioremap;
+ goto err_free_aq_hw;
}
break;
}
if (bar == 4) {
err = -EIO;
- goto err_ioremap;
+ goto err_free_aq_hw;
}
numvecs = min((u8)AQ_CFG_VECS_DEF,
aq_pci_free_irq_vectors(self);
err_hwinit:
iounmap(self->aq_hw->mmio);
+err_free_aq_hw:
+ kfree(self->aq_hw);
err_ioremap:
free_netdev(ndev);
err_pci_func:
--- /dev/null
+# SPDX-License-Identifier: GPL-2.0
+# kbuild requires Makefile in a directory to build individual objects
tg3_ape_unlock(tp, TG3_APE_LOCK_MEM);
- udelay(10);
+ usleep_range(10, 20);
timeout_us -= (timeout_us > 10) ? 10 : timeout_us;
}
if (!(apedata & APE_FW_STATUS_READY))
return -EAGAIN;
- /* Wait for up to 1 millisecond for APE to service previous event. */
- err = tg3_ape_event_lock(tp, 1000);
+ /* Wait for up to 20 millisecond for APE to service previous event. */
+ err = tg3_ape_event_lock(tp, 20000);
if (err)
return err;
switch (kind) {
case RESET_KIND_INIT:
+ tg3_ape_write32(tp, TG3_APE_HOST_HEARTBEAT_COUNT, tp->ape_hb++);
tg3_ape_write32(tp, TG3_APE_HOST_SEG_SIG,
APE_HOST_SEG_SIG_MAGIC);
tg3_ape_write32(tp, TG3_APE_HOST_SEG_LEN,
event = APE_EVENT_STATUS_STATE_START;
break;
case RESET_KIND_SHUTDOWN:
- /* With the interface we are currently using,
- * APE does not track driver state. Wiping
- * out the HOST SEGMENT SIGNATURE forces
- * the APE to assume OS absent status.
- */
- tg3_ape_write32(tp, TG3_APE_HOST_SEG_SIG, 0x0);
-
if (device_may_wakeup(&tp->pdev->dev) &&
tg3_flag(tp, WOL_ENABLE)) {
tg3_ape_write32(tp, TG3_APE_HOST_WOL_SPEED,
tg3_ape_send_event(tp, event);
}
+static void tg3_send_ape_heartbeat(struct tg3 *tp,
+ unsigned long interval)
+{
+ /* Check if hb interval has exceeded */
+ if (!tg3_flag(tp, ENABLE_APE) ||
+ time_before(jiffies, tp->ape_hb_jiffies + interval))
+ return;
+
+ tg3_ape_write32(tp, TG3_APE_HOST_HEARTBEAT_COUNT, tp->ape_hb++);
+ tp->ape_hb_jiffies = jiffies;
+}
+
static void tg3_disable_ints(struct tg3 *tp)
{
int i;
}
}
+ tg3_send_ape_heartbeat(tp, TG3_APE_HB_INTERVAL << 1);
return work_done;
tx_recovery:
}
}
+ tg3_send_ape_heartbeat(tp, TG3_APE_HB_INTERVAL << 1);
return work_done;
tx_recovery:
if (tg3_flag(tp, ENABLE_APE))
/* Write our heartbeat update interval to APE. */
tg3_ape_write32(tp, TG3_APE_HOST_HEARTBEAT_INT_MS,
- APE_HOST_HEARTBEAT_INT_DISABLE);
+ APE_HOST_HEARTBEAT_INT_5SEC);
tg3_write_sig_post_reset(tp, RESET_KIND_INIT);
tp->asf_counter = tp->asf_multiplier;
}
+ /* Update the APE heartbeat every 5 seconds.*/
+ tg3_send_ape_heartbeat(tp, TG3_APE_HB_INTERVAL);
+
spin_unlock(&tp->lock);
restart_timer:
pci_state_reg);
tg3_ape_lock_init(tp);
+ tp->ape_hb_interval =
+ msecs_to_jiffies(APE_HOST_HEARTBEAT_INT_5SEC);
}
/* Set up tp->grc_local_ctrl before calling
#define TG3_APE_LOCK_PHY3 5
#define TG3_APE_LOCK_GPIO 7
+#define TG3_APE_HB_INTERVAL (tp->ape_hb_interval)
#define TG3_EEPROM_SB_F1R2_MBA_OFF 0x10
struct device *hwmon_dev;
bool link_up;
bool pcierr_recovery;
+
+ u32 ape_hb;
+ unsigned long ape_hb_interval;
+ unsigned long ape_hb_jiffies;
};
/* Accessor macros for chip and asic attributes
void cavium_ptp_put(struct cavium_ptp *ptp)
{
+ if (!ptp)
+ return;
pci_dev_put(ptp->pdev);
}
EXPORT_SYMBOL(cavium_ptp_put);
MODULE_PARM_DESC(cpi_alg,
"PFC algorithm (0=none, 1=VLAN, 2=VLAN16, 3=IP Diffserv)");
-struct nicvf_xdp_tx {
- u64 dma_addr;
- u8 qidx;
-};
-
static inline u8 nicvf_netdev_qidx(struct nicvf *nic, u8 qidx)
{
if (nic->sqs_mode)
return 0;
}
-static void nicvf_unmap_page(struct nicvf *nic, struct page *page, u64 dma_addr)
-{
- /* Check if it's a recycled page, if not unmap the DMA mapping.
- * Recycled page holds an extra reference.
- */
- if (page_ref_count(page) == 1) {
- dma_addr &= PAGE_MASK;
- dma_unmap_page_attrs(&nic->pdev->dev, dma_addr,
- RCV_FRAG_LEN + XDP_HEADROOM,
- DMA_FROM_DEVICE,
- DMA_ATTR_SKIP_CPU_SYNC);
- }
-}
-
static inline bool nicvf_xdp_rx(struct nicvf *nic, struct bpf_prog *prog,
struct cqe_rx_t *cqe_rx, struct snd_queue *sq,
struct rcv_queue *rq, struct sk_buff **skb)
{
struct xdp_buff xdp;
struct page *page;
- struct nicvf_xdp_tx *xdp_tx = NULL;
u32 action;
- u16 len, err, offset = 0;
+ u16 len, offset = 0;
u64 dma_addr, cpu_addr;
void *orig_data;
cpu_addr = (u64)phys_to_virt(cpu_addr);
page = virt_to_page((void *)cpu_addr);
- xdp.data_hard_start = page_address(page) + RCV_BUF_HEADROOM;
+ xdp.data_hard_start = page_address(page);
xdp.data = (void *)cpu_addr;
xdp_set_data_meta_invalid(&xdp);
xdp.data_end = xdp.data + len;
switch (action) {
case XDP_PASS:
- nicvf_unmap_page(nic, page, dma_addr);
+ /* Check if it's a recycled page, if not
+ * unmap the DMA mapping.
+ *
+ * Recycled page holds an extra reference.
+ */
+ if (page_ref_count(page) == 1) {
+ dma_addr &= PAGE_MASK;
+ dma_unmap_page_attrs(&nic->pdev->dev, dma_addr,
+ RCV_FRAG_LEN + XDP_PACKET_HEADROOM,
+ DMA_FROM_DEVICE,
+ DMA_ATTR_SKIP_CPU_SYNC);
+ }
/* Build SKB and pass on packet to network stack */
*skb = build_skb(xdp.data,
case XDP_TX:
nicvf_xdp_sq_append_pkt(nic, sq, (u64)xdp.data, dma_addr, len);
return true;
- case XDP_REDIRECT:
- /* Save DMA address for use while transmitting */
- xdp_tx = (struct nicvf_xdp_tx *)page_address(page);
- xdp_tx->dma_addr = dma_addr;
- xdp_tx->qidx = nicvf_netdev_qidx(nic, cqe_rx->rq_idx);
-
- err = xdp_do_redirect(nic->pnicvf->netdev, &xdp, prog);
- if (!err)
- return true;
-
- /* Free the page on error */
- nicvf_unmap_page(nic, page, dma_addr);
- put_page(page);
- break;
default:
bpf_warn_invalid_xdp_action(action);
/* fall through */
trace_xdp_exception(nic->netdev, prog, action);
/* fall through */
case XDP_DROP:
- nicvf_unmap_page(nic, page, dma_addr);
+ /* Check if it's a recycled page, if not
+ * unmap the DMA mapping.
+ *
+ * Recycled page holds an extra reference.
+ */
+ if (page_ref_count(page) == 1) {
+ dma_addr &= PAGE_MASK;
+ dma_unmap_page_attrs(&nic->pdev->dev, dma_addr,
+ RCV_FRAG_LEN + XDP_PACKET_HEADROOM,
+ DMA_FROM_DEVICE,
+ DMA_ATTR_SKIP_CPU_SYNC);
+ }
put_page(page);
return true;
}
}
}
-static int nicvf_xdp_xmit(struct net_device *netdev, struct xdp_buff *xdp)
-{
- struct nicvf *nic = netdev_priv(netdev);
- struct nicvf *snic = nic;
- struct nicvf_xdp_tx *xdp_tx;
- struct snd_queue *sq;
- struct page *page;
- int err, qidx;
-
- if (!netif_running(netdev) || !nic->xdp_prog)
- return -EINVAL;
-
- page = virt_to_page(xdp->data);
- xdp_tx = (struct nicvf_xdp_tx *)page_address(page);
- qidx = xdp_tx->qidx;
-
- if (xdp_tx->qidx >= nic->xdp_tx_queues)
- return -EINVAL;
-
- /* Get secondary Qset's info */
- if (xdp_tx->qidx >= MAX_SND_QUEUES_PER_QS) {
- qidx = xdp_tx->qidx / MAX_SND_QUEUES_PER_QS;
- snic = (struct nicvf *)nic->snicvf[qidx - 1];
- if (!snic)
- return -EINVAL;
- qidx = xdp_tx->qidx % MAX_SND_QUEUES_PER_QS;
- }
-
- sq = &snic->qs->sq[qidx];
- err = nicvf_xdp_sq_append_pkt(snic, sq, (u64)xdp->data,
- xdp_tx->dma_addr,
- xdp->data_end - xdp->data);
- if (err)
- return -ENOMEM;
-
- nicvf_xdp_sq_doorbell(snic, sq, qidx);
- return 0;
-}
-
-static void nicvf_xdp_flush(struct net_device *dev)
-{
- return;
-}
-
static int nicvf_config_hwtstamp(struct net_device *netdev, struct ifreq *ifr)
{
struct hwtstamp_config config;
.ndo_fix_features = nicvf_fix_features,
.ndo_set_features = nicvf_set_features,
.ndo_bpf = nicvf_xdp,
- .ndo_xdp_xmit = nicvf_xdp_xmit,
- .ndo_xdp_flush = nicvf_xdp_flush,
.ndo_do_ioctl = nicvf_ioctl,
};
/* Reserve space for header modifications by BPF program */
if (rbdr->is_xdp)
- buf_len += XDP_HEADROOM;
+ buf_len += XDP_PACKET_HEADROOM;
/* Check if it's recycled */
if (pgcache)
nic->rb_page = NULL;
return -ENOMEM;
}
-
if (pgcache)
- pgcache->dma_addr = *rbuf + XDP_HEADROOM;
+ pgcache->dma_addr = *rbuf + XDP_PACKET_HEADROOM;
nic->rb_page_offset += buf_len;
}
int qentry;
if (subdesc_cnt > sq->xdp_free_cnt)
- return -1;
+ return 0;
qentry = nicvf_get_sq_desc(sq, subdesc_cnt);
sq->xdp_desc_cnt += subdesc_cnt;
- return 0;
+ return 1;
}
/* Calculate no of SQ subdescriptors needed to transmit all
if (page_ref_count(page) != 1)
return;
- len += XDP_HEADROOM;
+ len += XDP_PACKET_HEADROOM;
/* Receive buffers in XDP mode are mapped from page start */
dma_addr &= PAGE_MASK;
}
#include <linux/netdevice.h>
#include <linux/iommu.h>
-#include <linux/bpf.h>
#include <net/xdp.h>
#include "q_struct.h"
#define RCV_FRAG_LEN (SKB_DATA_ALIGN(DMA_BUFFER_LEN + NET_SKB_PAD) + \
SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))
-#define RCV_BUF_HEADROOM 128 /* To store dma address for XDP redirect */
-#define XDP_HEADROOM (XDP_PACKET_HEADROOM + RCV_BUF_HEADROOM)
-
#define MAX_CQES_FOR_TX ((SND_QUEUE_LEN / MIN_SQ_DESC_PER_PKT_XMIT) * \
MAX_CQE_PER_PKT_XMIT)
if (is_t6(padap->params.chip)) {
size = padap->params.cim_la_size / 10 + 1;
- size *= 11 * sizeof(u32);
+ size *= 10 * sizeof(u32);
} else {
size = padap->params.cim_la_size / 8;
size *= 8 * sizeof(u32);
&payload->start, &payload->end);
}
+static int cudbg_memory_read(struct cudbg_init *pdbg_init, int win,
+ int mtype, u32 addr, u32 len, void *hbuf)
+{
+ u32 win_pf, memoffset, mem_aperture, mem_base;
+ struct adapter *adap = pdbg_init->adap;
+ u32 pos, offset, resid;
+ u32 *res_buf;
+ u64 *buf;
+ int ret;
+
+ /* Argument sanity checks ...
+ */
+ if (addr & 0x3 || (uintptr_t)hbuf & 0x3)
+ return -EINVAL;
+
+ buf = (u64 *)hbuf;
+
+ /* Try to do 64-bit reads. Residual will be handled later. */
+ resid = len & 0x7;
+ len -= resid;
+
+ ret = t4_memory_rw_init(adap, win, mtype, &memoffset, &mem_base,
+ &mem_aperture);
+ if (ret)
+ return ret;
+
+ addr = addr + memoffset;
+ win_pf = is_t4(adap->params.chip) ? 0 : PFNUM_V(adap->pf);
+
+ pos = addr & ~(mem_aperture - 1);
+ offset = addr - pos;
+
+ /* Set up initial PCI-E Memory Window to cover the start of our
+ * transfer.
+ */
+ t4_memory_update_win(adap, win, pos | win_pf);
+
+ /* Transfer data from the adapter */
+ while (len > 0) {
+ *buf++ = le64_to_cpu((__force __le64)
+ t4_read_reg64(adap, mem_base + offset));
+ offset += sizeof(u64);
+ len -= sizeof(u64);
+
+ /* If we've reached the end of our current window aperture,
+ * move the PCI-E Memory Window on to the next.
+ */
+ if (offset == mem_aperture) {
+ pos += mem_aperture;
+ offset = 0;
+ t4_memory_update_win(adap, win, pos | win_pf);
+ }
+ }
+
+ res_buf = (u32 *)buf;
+ /* Read residual in 32-bit multiples */
+ while (resid > sizeof(u32)) {
+ *res_buf++ = le32_to_cpu((__force __le32)
+ t4_read_reg(adap, mem_base + offset));
+ offset += sizeof(u32);
+ resid -= sizeof(u32);
+
+ /* If we've reached the end of our current window aperture,
+ * move the PCI-E Memory Window on to the next.
+ */
+ if (offset == mem_aperture) {
+ pos += mem_aperture;
+ offset = 0;
+ t4_memory_update_win(adap, win, pos | win_pf);
+ }
+ }
+
+ /* Transfer residual < 32-bits */
+ if (resid)
+ t4_memory_rw_residual(adap, resid, mem_base + offset,
+ (u8 *)res_buf, T4_MEMORY_READ);
+
+ return 0;
+}
+
#define CUDBG_YIELD_ITERATION 256
static int cudbg_read_fw_mem(struct cudbg_init *pdbg_init,
goto skip_read;
spin_lock(&padap->win0_lock);
- rc = t4_memory_rw(padap, MEMWIN_NIC, mem_type,
- bytes_read, bytes,
- (__be32 *)temp_buff.data,
- 1);
+ rc = cudbg_memory_read(pdbg_init, MEMWIN_NIC, mem_type,
+ bytes_read, bytes, temp_buff.data);
spin_unlock(&padap->win0_lock);
if (rc) {
cudbg_err->sys_err = rc;
u32 t4_get_util_window(struct adapter *adap);
void t4_setup_memwin(struct adapter *adap, u32 memwin_base, u32 window);
+int t4_memory_rw_init(struct adapter *adap, int win, int mtype, u32 *mem_off,
+ u32 *mem_base, u32 *mem_aperture);
+void t4_memory_update_win(struct adapter *adap, int win, u32 addr);
+void t4_memory_rw_residual(struct adapter *adap, u32 off, u32 addr, u8 *buf,
+ int dir);
#define T4_MEMORY_WRITE 0
#define T4_MEMORY_READ 1
int t4_memory_rw(struct adapter *adap, int win, int mtype, u32 addr, u32 len,
case CUDBG_CIM_LA:
if (is_t6(adap->params.chip)) {
len = adap->params.cim_la_size / 10 + 1;
- len *= 11 * sizeof(u32);
+ len *= 10 * sizeof(u32);
} else {
len = adap->params.cim_la_size / 8;
len *= 8 * sizeof(u32);
return ret;
}
- /* Clear out any old resources being used by the filter before
- * we start constructing the new filter.
- */
- if (f->valid)
- clear_filter(adapter, f);
-
if (is_t6(adapter->params.chip) && fs->type &&
ipv6_addr_type((const struct in6_addr *)fs->val.lip) !=
IPV6_ADDR_ANY) {
pcie_fw = readl(adap->regs + PCIE_FW_A);
/* Check if cxgb4 is the MASTER and fw is initialized */
- if (!(pcie_fw & PCIE_FW_INIT_F) ||
+ if (num_vfs &&
+ (!(pcie_fw & PCIE_FW_INIT_F) ||
!(pcie_fw & PCIE_FW_MASTER_VLD_F) ||
- PCIE_FW_MASTER_G(pcie_fw) != CXGB4_UNIFIED_PF) {
+ PCIE_FW_MASTER_G(pcie_fw) != CXGB4_UNIFIED_PF)) {
dev_warn(&pdev->dev,
"cxgb4 driver needs to be MASTER to support SRIOV\n");
return -EOPNOTSUPP;
#if IS_ENABLED(CONFIG_IPV6)
t4_cleanup_clip_tbl(adapter);
#endif
- iounmap(adapter->regs);
if (!is_t4(adapter->params.chip))
iounmap(adapter->bar2);
- pci_disable_pcie_error_reporting(pdev);
- if ((adapter->flags & DEV_ENABLED)) {
- pci_disable_device(pdev);
- adapter->flags &= ~DEV_ENABLED;
- }
- pci_release_regions(pdev);
- kfree(adapter->mbox_log);
- synchronize_rcu();
- kfree(adapter);
}
#ifdef CONFIG_PCI_IOV
else {
cxgb4_iov_configure(adapter->pdev, 0);
}
#endif
+ iounmap(adapter->regs);
+ pci_disable_pcie_error_reporting(pdev);
+ if ((adapter->flags & DEV_ENABLED)) {
+ pci_disable_device(pdev);
+ adapter->flags &= ~DEV_ENABLED;
+ }
+ pci_release_regions(pdev);
+ kfree(adapter->mbox_log);
+ synchronize_rcu();
+ kfree(adapter);
}
/* "Shutdown" quiesces the device, stopping Ingress Packet and Interrupt
return 0;
}
+/**
+ * t4_memory_rw_init - Get memory window relative offset, base, and size.
+ * @adap: the adapter
+ * @win: PCI-E Memory Window to use
+ * @mtype: memory type: MEM_EDC0, MEM_EDC1 or MEM_MC
+ * @mem_off: memory relative offset with respect to @mtype.
+ * @mem_base: configured memory base address.
+ * @mem_aperture: configured memory window aperture.
+ *
+ * Get the configured memory window's relative offset, base, and size.
+ */
+int t4_memory_rw_init(struct adapter *adap, int win, int mtype, u32 *mem_off,
+ u32 *mem_base, u32 *mem_aperture)
+{
+ u32 edc_size, mc_size, mem_reg;
+
+ /* Offset into the region of memory which is being accessed
+ * MEM_EDC0 = 0
+ * MEM_EDC1 = 1
+ * MEM_MC = 2 -- MEM_MC for chips with only 1 memory controller
+ * MEM_MC1 = 3 -- for chips with 2 memory controllers (e.g. T5)
+ * MEM_HMA = 4
+ */
+ edc_size = EDRAM0_SIZE_G(t4_read_reg(adap, MA_EDRAM0_BAR_A));
+ if (mtype == MEM_HMA) {
+ *mem_off = 2 * (edc_size * 1024 * 1024);
+ } else if (mtype != MEM_MC1) {
+ *mem_off = (mtype * (edc_size * 1024 * 1024));
+ } else {
+ mc_size = EXT_MEM0_SIZE_G(t4_read_reg(adap,
+ MA_EXT_MEMORY0_BAR_A));
+ *mem_off = (MEM_MC0 * edc_size + mc_size) * 1024 * 1024;
+ }
+
+ /* Each PCI-E Memory Window is programmed with a window size -- or
+ * "aperture" -- which controls the granularity of its mapping onto
+ * adapter memory. We need to grab that aperture in order to know
+ * how to use the specified window. The window is also programmed
+ * with the base address of the Memory Window in BAR0's address
+ * space. For T4 this is an absolute PCI-E Bus Address. For T5
+ * the address is relative to BAR0.
+ */
+ mem_reg = t4_read_reg(adap,
+ PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_BASE_WIN_A,
+ win));
+ /* a dead adapter will return 0xffffffff for PIO reads */
+ if (mem_reg == 0xffffffff)
+ return -ENXIO;
+
+ *mem_aperture = 1 << (WINDOW_G(mem_reg) + WINDOW_SHIFT_X);
+ *mem_base = PCIEOFST_G(mem_reg) << PCIEOFST_SHIFT_X;
+ if (is_t4(adap->params.chip))
+ *mem_base -= adap->t4_bar0;
+
+ return 0;
+}
+
+/**
+ * t4_memory_update_win - Move memory window to specified address.
+ * @adap: the adapter
+ * @win: PCI-E Memory Window to use
+ * @addr: location to move.
+ *
+ * Move memory window to specified address.
+ */
+void t4_memory_update_win(struct adapter *adap, int win, u32 addr)
+{
+ t4_write_reg(adap,
+ PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_OFFSET_A, win),
+ addr);
+ /* Read it back to ensure that changes propagate before we
+ * attempt to use the new value.
+ */
+ t4_read_reg(adap,
+ PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_OFFSET_A, win));
+}
+
+/**
+ * t4_memory_rw_residual - Read/Write residual data.
+ * @adap: the adapter
+ * @off: relative offset within residual to start read/write.
+ * @addr: address within indicated memory type.
+ * @buf: host memory buffer
+ * @dir: direction of transfer T4_MEMORY_READ (1) or T4_MEMORY_WRITE (0)
+ *
+ * Read/Write residual data less than 32-bits.
+ */
+void t4_memory_rw_residual(struct adapter *adap, u32 off, u32 addr, u8 *buf,
+ int dir)
+{
+ union {
+ u32 word;
+ char byte[4];
+ } last;
+ unsigned char *bp;
+ int i;
+
+ if (dir == T4_MEMORY_READ) {
+ last.word = le32_to_cpu((__force __le32)
+ t4_read_reg(adap, addr));
+ for (bp = (unsigned char *)buf, i = off; i < 4; i++)
+ bp[i] = last.byte[i];
+ } else {
+ last.word = *buf;
+ for (i = off; i < 4; i++)
+ last.byte[i] = 0;
+ t4_write_reg(adap, addr,
+ (__force u32)cpu_to_le32(last.word));
+ }
+}
+
/**
* t4_memory_rw - read/write EDC 0, EDC 1 or MC via PCIE memory window
* @adap: the adapter
u32 len, void *hbuf, int dir)
{
u32 pos, offset, resid, memoffset;
- u32 edc_size, mc_size, win_pf, mem_reg, mem_aperture, mem_base;
+ u32 win_pf, mem_aperture, mem_base;
u32 *buf;
+ int ret;
/* Argument sanity checks ...
*/
resid = len & 0x3;
len -= resid;
- /* Offset into the region of memory which is being accessed
- * MEM_EDC0 = 0
- * MEM_EDC1 = 1
- * MEM_MC = 2 -- MEM_MC for chips with only 1 memory controller
- * MEM_MC1 = 3 -- for chips with 2 memory controllers (e.g. T5)
- * MEM_HMA = 4
- */
- edc_size = EDRAM0_SIZE_G(t4_read_reg(adap, MA_EDRAM0_BAR_A));
- if (mtype == MEM_HMA) {
- memoffset = 2 * (edc_size * 1024 * 1024);
- } else if (mtype != MEM_MC1) {
- memoffset = (mtype * (edc_size * 1024 * 1024));
- } else {
- mc_size = EXT_MEM0_SIZE_G(t4_read_reg(adap,
- MA_EXT_MEMORY0_BAR_A));
- memoffset = (MEM_MC0 * edc_size + mc_size) * 1024 * 1024;
- }
+ ret = t4_memory_rw_init(adap, win, mtype, &memoffset, &mem_base,
+ &mem_aperture);
+ if (ret)
+ return ret;
/* Determine the PCIE_MEM_ACCESS_OFFSET */
addr = addr + memoffset;
- /* Each PCI-E Memory Window is programmed with a window size -- or
- * "aperture" -- which controls the granularity of its mapping onto
- * adapter memory. We need to grab that aperture in order to know
- * how to use the specified window. The window is also programmed
- * with the base address of the Memory Window in BAR0's address
- * space. For T4 this is an absolute PCI-E Bus Address. For T5
- * the address is relative to BAR0.
- */
- mem_reg = t4_read_reg(adap,
- PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_BASE_WIN_A,
- win));
- mem_aperture = 1 << (WINDOW_G(mem_reg) + WINDOW_SHIFT_X);
- mem_base = PCIEOFST_G(mem_reg) << PCIEOFST_SHIFT_X;
- if (is_t4(adap->params.chip))
- mem_base -= adap->t4_bar0;
win_pf = is_t4(adap->params.chip) ? 0 : PFNUM_V(adap->pf);
/* Calculate our initial PCI-E Memory Window Position and Offset into
* that Window.
*/
- pos = addr & ~(mem_aperture-1);
+ pos = addr & ~(mem_aperture - 1);
offset = addr - pos;
/* Set up initial PCI-E Memory Window to cover the start of our
- * transfer. (Read it back to ensure that changes propagate before we
- * attempt to use the new value.)
+ * transfer.
*/
- t4_write_reg(adap,
- PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_OFFSET_A, win),
- pos | win_pf);
- t4_read_reg(adap,
- PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_OFFSET_A, win));
+ t4_memory_update_win(adap, win, pos | win_pf);
/* Transfer data to/from the adapter as long as there's an integral
* number of 32-bit transfers to complete.
if (offset == mem_aperture) {
pos += mem_aperture;
offset = 0;
- t4_write_reg(adap,
- PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_OFFSET_A,
- win), pos | win_pf);
- t4_read_reg(adap,
- PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_OFFSET_A,
- win));
+ t4_memory_update_win(adap, win, pos | win_pf);
}
}
* residual amount. The PCI-E Memory Window has already been moved
* above (if necessary) to cover this final transfer.
*/
- if (resid) {
- union {
- u32 word;
- char byte[4];
- } last;
- unsigned char *bp;
- int i;
-
- if (dir == T4_MEMORY_READ) {
- last.word = le32_to_cpu(
- (__force __le32)t4_read_reg(adap,
- mem_base + offset));
- for (bp = (unsigned char *)buf, i = resid; i < 4; i++)
- bp[i] = last.byte[i];
- } else {
- last.word = *buf;
- for (i = resid; i < 4; i++)
- last.byte[i] = 0;
- t4_write_reg(adap, mem_base + offset,
- (__force u32)cpu_to_le32(last.word));
- }
- }
+ if (resid)
+ t4_memory_rw_residual(adap, resid, mem_base + offset,
+ (u8 *)buf, dir);
return 0;
}
}
#define EEPROM_STAT_ADDR 0x7bfc
-#define VPD_SIZE 0x800
#define VPD_BASE 0x400
#define VPD_BASE_OLD 0
#define VPD_LEN 1024
if (!vpd)
return -ENOMEM;
- /* We have two VPD data structures stored in the adapter VPD area.
- * By default, Linux calculates the size of the VPD area by traversing
- * the first VPD area at offset 0x0, so we need to tell the OS what
- * our real VPD size is.
- */
- ret = pci_set_vpd_size(adapter->pdev, VPD_SIZE);
- if (ret < 0)
- goto out;
-
/* Card information normally starts at VPD_BASE but early cards had
* it at 0.
*/
case CHELSIO_T6:
switch (nports) {
+ case 1:
case 2: return 1 << pidx;
}
break;
const char *fc;
const struct port_info *pi = netdev_priv(dev);
- netif_carrier_on(dev);
-
switch (pi->link_cfg.speed) {
case 100:
s = "100Mbps";
netdev_info(dev, "link up, %s, full-duplex, %s PAUSE\n", s, fc);
} else {
- netif_carrier_off(dev);
netdev_info(dev, "link down\n");
}
}
*/
if (ret == 0)
ret = t4vf_enable_vi(pi->adapter, pi->viid, true, true);
+
+ /* The Virtual Interfaces are connected to an internal switch on the
+ * chip which allows VIs attached to the same port to talk to each
+ * other even when the port link is down. As a result, we generally
+ * want to always report a VI's link as being "up", provided there are
+ * no errors in enabling vi.
+ */
+
+ if (ret == 0)
+ netif_carrier_on(dev);
+
return ret;
}
local_irq_{dis,en}able()
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
static const char version[] =
"cs89x0.c:v1.02 11/26/96 Russell Nelson <nelson@crynwr.com>\n";
-/* ======================= configure the driver here ======================= */
-
-/* use 0 for production, 1 for verification, >2 for debug */
-#ifndef NET_DEBUG
-#define NET_DEBUG 0
-#endif
-
-/* ======================= end of configuration ======================= */
-
-
-/* Always include 'config.h' first in case the user wants to turn on
- or override something. */
#include <linux/module.h>
/*
#include <linux/errno.h>
#include <linux/init.h>
#include <linux/netdevice.h>
+#include <linux/platform_device.h>
#include <linux/etherdevice.h>
#include <linux/skbuff.h>
#include <linux/delay.h>
#include "cs89x0.h"
-static unsigned int net_debug = NET_DEBUG;
+static int debug = -1;
+module_param(debug, int, 0);
+MODULE_PARM_DESC(debug, "debug message level");
/* Information that need to be kept for each board. */
struct net_local {
+ int msg_enable;
int chip_type; /* one of: CS8900, CS8920, CS8920M */
char chip_revision; /* revision letter of the chip ('A'...) */
int send_cmd; /* the propercommand used to send a packet. */
int rx_mode;
int curr_rx_cfg;
int send_underrun; /* keep track of how many underruns in a row we get */
- struct sk_buff *skb;
};
/* Index to functions, as function prototypes. */
-
-#if 0
-extern void reset_chip(struct net_device *dev);
-#endif
static int net_open(struct net_device *dev);
static int net_send_packet(struct sk_buff *skb, struct net_device *dev);
static irqreturn_t net_interrupt(int irq, void *dev_id);
static struct net_device_stats *net_get_stats(struct net_device *dev);
static int set_mac_address(struct net_device *dev, void *addr);
-
-/* Example routines you must write ;->. */
-#define tx_done(dev) 1
-
/* For reading/writing registers ISA-style */
static inline int
readreg_io(struct net_device *dev, int portno)
/* Probe for the CS8900 card in slot E. We won't bother looking
anywhere else until we have a really good reason to do so. */
-struct net_device * __init mac89x0_probe(int unit)
+static int mac89x0_device_probe(struct platform_device *pdev)
{
struct net_device *dev;
- static int once_is_enough;
struct net_local *lp;
- static unsigned version_printed;
int i, slot;
unsigned rev_type = 0;
unsigned long ioaddr;
int err = -ENODEV;
struct nubus_rsrc *fres;
- if (!MACH_IS_MAC)
- return ERR_PTR(-ENODEV);
-
dev = alloc_etherdev(sizeof(struct net_local));
if (!dev)
- return ERR_PTR(-ENOMEM);
-
- if (unit >= 0) {
- sprintf(dev->name, "eth%d", unit);
- netdev_boot_setup_check(dev);
- }
-
- if (once_is_enough)
- goto out;
- once_is_enough = 1;
+ return -ENOMEM;
/* We might have to parameterize this later */
slot = 0xE;
if (sig != swab16(CHIP_EISA_ID_SIG))
goto out;
+ SET_NETDEV_DEV(dev, &pdev->dev);
+
/* Initialize the net_device structure. */
lp = netdev_priv(dev);
+ lp->msg_enable = netif_msg_init(debug, 0);
+
/* Fill in the 'dev' fields. */
dev->base_addr = ioaddr;
dev->mem_start = (unsigned long)
if (lp->chip_type != CS8900 && lp->chip_revision >= 'C')
lp->send_cmd = TX_NOW;
- if (net_debug && version_printed++ == 0)
- printk(version);
+ netif_dbg(lp, drv, dev, "%s", version);
- printk(KERN_INFO "%s: cs89%c0%s rev %c found at %#8lx",
- dev->name,
- lp->chip_type==CS8900?'0':'2',
- lp->chip_type==CS8920M?"M":"",
- lp->chip_revision,
- dev->base_addr);
+ pr_info("cs89%c0%s rev %c found at %#8lx\n",
+ lp->chip_type == CS8900 ? '0' : '2',
+ lp->chip_type == CS8920M ? "M" : "",
+ lp->chip_revision, dev->base_addr);
/* Try to read the MAC address */
if ((readreg(dev, PP_SelfST) & (EEPROM_PRESENT | EEPROM_OK)) == 0) {
- printk("\nmac89x0: No EEPROM, giving up now.\n");
+ pr_info("No EEPROM, giving up now.\n");
goto out1;
} else {
for (i = 0; i < ETH_ALEN; i += 2) {
/* print the IRQ and ethernet address. */
- printk(" IRQ %d ADDR %pM\n", dev->irq, dev->dev_addr);
+ pr_info("MAC %pM, IRQ %d\n", dev->dev_addr, dev->irq);
dev->netdev_ops = &mac89x0_netdev_ops;
err = register_netdev(dev);
if (err)
goto out1;
- return NULL;
+
+ platform_set_drvdata(pdev, dev);
+ return 0;
out1:
nubus_writew(0, dev->base_addr + ADD_PORT);
out:
free_netdev(dev);
- return ERR_PTR(err);
+ return err;
}
-#if 0
-/* This is useful for something, but I don't know what yet. */
-void __init reset_chip(struct net_device *dev)
-{
- int reset_start_time;
-
- writereg(dev, PP_SelfCTL, readreg(dev, PP_SelfCTL) | POWER_ON_RESET);
-
- /* wait 30 ms */
- msleep_interruptible(30);
-
- /* Wait until the chip is reset */
- reset_start_time = jiffies;
- while( (readreg(dev, PP_SelfST) & INIT_DONE) == 0 && jiffies - reset_start_time < 2)
- ;
-}
-#endif
-
/* Open/initialize the board. This is called (in the current kernel)
sometime after booting when the 'ifconfig' program is run.
struct net_local *lp = netdev_priv(dev);
unsigned long flags;
- if (net_debug > 3)
- printk("%s: sent %d byte packet of type %x\n",
- dev->name, skb->len,
- (skb->data[ETH_ALEN+ETH_ALEN] << 8)
- | skb->data[ETH_ALEN+ETH_ALEN+1]);
+ netif_dbg(lp, tx_queued, dev, "sent %d byte packet of type %x\n",
+ skb->len, skb->data[ETH_ALEN + ETH_ALEN] << 8 |
+ skb->data[ETH_ALEN + ETH_ALEN + 1]);
/* keep the upload from being interrupted, since we
ask the chip to start transmitting before the
struct net_local *lp;
int ioaddr, status;
- if (dev == NULL) {
- printk ("net_interrupt(): irq %d for unknown device.\n", irq);
- return IRQ_NONE;
- }
-
ioaddr = dev->base_addr;
lp = netdev_priv(dev);
faster than you can read them off, you're screwed. Hasta la
vista, baby! */
while ((status = swab16(nubus_readw(dev->base_addr + ISQ_PORT)))) {
- if (net_debug > 4)printk("%s: event=%04x\n", dev->name, status);
+ netif_dbg(lp, intr, dev, "status=%04x\n", status);
switch(status & ISQ_EVENT_MASK) {
case ISQ_RECEIVER_EVENT:
/* Got a packet(s). */
netif_wake_queue(dev);
}
if (status & TX_UNDERRUN) {
- if (net_debug > 0) printk("%s: transmit underrun\n", dev->name);
+ netif_dbg(lp, tx_err, dev, "transmit underrun\n");
lp->send_underrun++;
if (lp->send_underrun == 3) lp->send_cmd = TX_AFTER_381;
else if (lp->send_underrun == 6) lp->send_cmd = TX_AFTER_ALL;
static void
net_rx(struct net_device *dev)
{
+ struct net_local *lp = netdev_priv(dev);
struct sk_buff *skb;
int status, length;
/* Malloc up new buffer. */
skb = alloc_skb(length, GFP_ATOMIC);
if (skb == NULL) {
- printk("%s: Memory squeeze, dropping packet.\n", dev->name);
dev->stats.rx_dropped++;
return;
}
skb_copy_to_linear_data(skb, (void *)(dev->mem_start + PP_RxFrame),
length);
- if (net_debug > 3)printk("%s: received %d byte packet of type %x\n",
- dev->name, length,
- (skb->data[ETH_ALEN+ETH_ALEN] << 8)
- | skb->data[ETH_ALEN+ETH_ALEN+1]);
+ netif_dbg(lp, rx_status, dev, "received %d byte packet of type %x\n",
+ length, skb->data[ETH_ALEN + ETH_ALEN] << 8 |
+ skb->data[ETH_ALEN + ETH_ALEN + 1]);
skb->protocol=eth_type_trans(skb,dev);
netif_rx(skb);
return -EADDRNOTAVAIL;
memcpy(dev->dev_addr, saddr->sa_data, ETH_ALEN);
- printk("%s: Setting MAC address to %pM\n", dev->name, dev->dev_addr);
+ netdev_info(dev, "Setting MAC address to %pM\n", dev->dev_addr);
/* set the Ethernet address */
for (i=0; i < ETH_ALEN/2; i++)
return 0;
}
-#ifdef MODULE
-
-static struct net_device *dev_cs89x0;
-static int debug;
-
-module_param(debug, int, 0);
-MODULE_PARM_DESC(debug, "CS89[02]0 debug level (0-5)");
MODULE_LICENSE("GPL");
-int __init
-init_module(void)
+static int mac89x0_device_remove(struct platform_device *pdev)
{
- net_debug = debug;
- dev_cs89x0 = mac89x0_probe(-1);
- if (IS_ERR(dev_cs89x0)) {
- printk(KERN_WARNING "mac89x0.c: No card found\n");
- return PTR_ERR(dev_cs89x0);
- }
+ struct net_device *dev = platform_get_drvdata(pdev);
+
+ unregister_netdev(dev);
+ nubus_writew(0, dev->base_addr + ADD_PORT);
+ free_netdev(dev);
return 0;
}
-void
-cleanup_module(void)
-{
- unregister_netdev(dev_cs89x0);
- nubus_writew(0, dev_cs89x0->base_addr + ADD_PORT);
- free_netdev(dev_cs89x0);
-}
-#endif /* MODULE */
+static struct platform_driver mac89x0_platform_driver = {
+ .probe = mac89x0_device_probe,
+ .remove = mac89x0_device_remove,
+ .driver = {
+ .name = "mac89x0",
+ },
+};
+
+module_platform_driver(mac89x0_platform_driver);
#define DRV_NAME "enic"
#define DRV_DESCRIPTION "Cisco VIC Ethernet NIC Driver"
-#define DRV_VERSION "2.3.0.45"
+#define DRV_VERSION "2.3.0.53"
#define DRV_COPYRIGHT "Copyright 2008-2013 Cisco Systems, Inc"
#define ENIC_BARS_MAX 6
struct vxlan_offload {
u16 vxlan_udp_port_number;
u8 patch_level;
+ u8 flags;
};
/* Per-instance private data structure */
return 0;
}
+static int enic_get_rx_flow_hash(struct enic *enic, struct ethtool_rxnfc *cmd)
+{
+ cmd->data = 0;
+
+ switch (cmd->flow_type) {
+ case TCP_V6_FLOW:
+ case TCP_V4_FLOW:
+ cmd->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
+ /* Fall through */
+ case UDP_V6_FLOW:
+ case UDP_V4_FLOW:
+ if (vnic_dev_capable_udp_rss(enic->vdev))
+ cmd->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
+ /* Fall through */
+ case SCTP_V4_FLOW:
+ case AH_ESP_V4_FLOW:
+ case AH_V4_FLOW:
+ case ESP_V4_FLOW:
+ case SCTP_V6_FLOW:
+ case AH_ESP_V6_FLOW:
+ case AH_V6_FLOW:
+ case ESP_V6_FLOW:
+ case IPV4_FLOW:
+ case IPV6_FLOW:
+ cmd->data |= RXH_IP_SRC | RXH_IP_DST;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
static int enic_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd,
u32 *rule_locs)
{
ret = enic_grxclsrule(enic, cmd);
spin_unlock_bh(&enic->rfs_h.lock);
break;
+ case ETHTOOL_GRXFH:
+ ret = enic_get_rx_flow_hash(enic, cmd);
+ break;
default:
ret = -EOPNOTSUPP;
break;
goto error;
}
- if (ti->sa_family != AF_INET) {
- netdev_info(netdev, "vxlan: only IPv4 offload supported");
+ switch (ti->sa_family) {
+ case AF_INET6:
+ if (!(enic->vxlan.flags & ENIC_VXLAN_OUTER_IPV6)) {
+ netdev_info(netdev, "vxlan: only IPv4 offload supported");
+ goto error;
+ }
+ /* Fall through */
+ case AF_INET:
+ break;
+ default:
goto error;
}
goto error;
}
+ if ((vnic_dev_get_res_count(enic->vdev, RES_TYPE_WQ) != 1) &&
+ !(enic->vxlan.flags & ENIC_VXLAN_MULTI_WQ)) {
+ netdev_info(netdev, "vxlan: vxlan offload with multi wq not supported on this adapter");
+ goto error;
+ }
err = vnic_dev_overlay_offload_cfg(enic->vdev,
OVERLAY_CFG_VXLAN_PORT_UPDATE,
struct enic *enic = netdev_priv(dev);
struct udphdr *udph;
u16 port = 0;
- u16 proto;
+ u8 proto;
if (!skb->encapsulation)
return features;
features = vxlan_features_check(skb, features);
- /* hardware only supports IPv4 vxlan tunnel */
- if (vlan_get_protocol(skb) != htons(ETH_P_IP))
+ switch (vlan_get_protocol(skb)) {
+ case htons(ETH_P_IPV6):
+ if (!(enic->vxlan.flags & ENIC_VXLAN_OUTER_IPV6))
+ goto out;
+ proto = ipv6_hdr(skb)->nexthdr;
+ break;
+ case htons(ETH_P_IP):
+ proto = ip_hdr(skb)->protocol;
+ break;
+ default:
goto out;
+ }
- /* hardware does not support offload of ipv6 inner pkt */
- if (eth->h_proto != ntohs(ETH_P_IP))
+ switch (eth->h_proto) {
+ case ntohs(ETH_P_IPV6):
+ if (!(enic->vxlan.flags & ENIC_VXLAN_INNER_IPV6))
+ goto out;
+ /* Fall through */
+ case ntohs(ETH_P_IP):
+ break;
+ default:
goto out;
+ }
- proto = ip_hdr(skb)->protocol;
if (proto == IPPROTO_UDP) {
udph = udp_hdr(skb);
static void enic_preload_tcp_csum_encap(struct sk_buff *skb)
{
- if (skb->protocol == cpu_to_be16(ETH_P_IP)) {
+ const struct ethhdr *eth = (struct ethhdr *)skb_inner_mac_header(skb);
+
+ switch (eth->h_proto) {
+ case ntohs(ETH_P_IP):
inner_ip_hdr(skb)->check = 0;
inner_tcp_hdr(skb)->check =
~csum_tcpudp_magic(inner_ip_hdr(skb)->saddr,
inner_ip_hdr(skb)->daddr, 0,
IPPROTO_TCP, 0);
+ break;
+ case ntohs(ETH_P_IPV6):
+ inner_tcp_hdr(skb)->check =
+ ~csum_ipv6_magic(&inner_ipv6_hdr(skb)->saddr,
+ &inner_ipv6_hdr(skb)->daddr, 0,
+ IPPROTO_TCP, 0);
+ break;
+ default:
+ WARN_ONCE(1, "Non ipv4/ipv6 inner pkt for encap offload");
+ break;
}
}
}
for (i = 0; i < enic->rq_count; i++) {
+ /* enable rq before updating rq desc */
+ vnic_rq_enable(&enic->rq[i]);
vnic_rq_fill(&enic->rq[i], enic_rq_alloc_buf);
/* Need at least one buffer on ring to get going */
if (vnic_rq_desc_used(&enic->rq[i]) == 0) {
for (i = 0; i < enic->wq_count; i++)
vnic_wq_enable(&enic->wq[i]);
- for (i = 0; i < enic->rq_count; i++)
- vnic_rq_enable(&enic->rq[i]);
if (!enic_is_dynamic(enic) && !enic_is_sriov_vf(enic))
enic_dev_add_station_addr(enic);
return 0;
err_out_free_rq:
- for (i = 0; i < enic->rq_count; i++)
+ for (i = 0; i < enic->rq_count; i++) {
+ err = vnic_rq_disable(&enic->rq[i]);
+ if (err)
+ return err;
vnic_rq_clean(&enic->rq[i], enic_free_rq_buf);
+ }
enic_dev_notify_unset(enic);
err_out_free_intr:
enic_unset_affinity_hint(enic);
static int enic_dev_open(struct enic *enic)
{
int err;
+ u32 flags = CMD_OPENF_IG_DESCCACHE;
err = enic_dev_wait(enic->vdev, vnic_dev_open,
- vnic_dev_open_done, 0);
+ vnic_dev_open_done, flags);
if (err)
dev_err(enic_get_dev(enic), "vNIC device open failed, err %d\n",
err);
{
struct device *dev = enic_get_dev(enic);
const u8 rss_default_cpu = 0;
- const u8 rss_hash_type = NIC_CFG_RSS_HASH_TYPE_IPV4 |
+ u8 rss_hash_type = NIC_CFG_RSS_HASH_TYPE_IPV4 |
NIC_CFG_RSS_HASH_TYPE_TCP_IPV4 |
NIC_CFG_RSS_HASH_TYPE_IPV6 |
NIC_CFG_RSS_HASH_TYPE_TCP_IPV6;
const u8 rss_base_cpu = 0;
u8 rss_enable = ENIC_SETTING(enic, RSS) && (enic->rq_count > 1);
+ if (vnic_dev_capable_udp_rss(enic->vdev))
+ rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_UDP;
if (rss_enable) {
if (!enic_set_rsskey(enic)) {
if (enic_set_rsscpu(enic, rss_hash_bits)) {
netdev->hw_features |= NETIF_F_RXCSUM;
if (ENIC_SETTING(enic, VXLAN)) {
u64 patch_level;
+ u64 a1 = 0;
netdev->hw_enc_features |= NETIF_F_RXCSUM |
NETIF_F_TSO |
+ NETIF_F_TSO6 |
NETIF_F_TSO_ECN |
NETIF_F_GSO_UDP_TUNNEL |
NETIF_F_HW_CSUM |
*/
err = vnic_dev_get_supported_feature_ver(enic->vdev,
VIC_FEATURE_VXLAN,
- &patch_level);
+ &patch_level, &a1);
if (err)
patch_level = 0;
+ enic->vxlan.flags = (u8)a1;
/* mask bits that are supported by driver
*/
patch_level &= BIT_ULL(0) | BIT_ULL(2);
}
int vnic_dev_get_supported_feature_ver(struct vnic_dev *vdev, u8 feature,
- u64 *supported_versions)
+ u64 *supported_versions, u64 *a1)
{
u64 a0 = feature;
int wait = 1000;
- u64 a1 = 0;
int ret;
- ret = vnic_dev_cmd(vdev, CMD_GET_SUPP_FEATURE_VER, &a0, &a1, wait);
+ ret = vnic_dev_cmd(vdev, CMD_GET_SUPP_FEATURE_VER, &a0, a1, wait);
if (!ret)
*supported_versions = a0;
return ret;
}
+
+bool vnic_dev_capable_udp_rss(struct vnic_dev *vdev)
+{
+ u64 a0 = CMD_NIC_CFG, a1 = 0;
+ u64 rss_hash_type;
+ int wait = 1000;
+ int err;
+
+ err = vnic_dev_cmd(vdev, CMD_CAPABILITY, &a0, &a1, wait);
+ if (err || !a0)
+ return 0;
+
+ rss_hash_type = (a1 >> NIC_CFG_RSS_HASH_TYPE_SHIFT) &
+ NIC_CFG_RSS_HASH_TYPE_MASK_FIELD;
+
+ return (rss_hash_type & NIC_CFG_RSS_HASH_TYPE_UDP);
+}
int vnic_dev_overlay_offload_cfg(struct vnic_dev *vdev, u8 overlay,
u16 vxlan_udp_port_number);
int vnic_dev_get_supported_feature_ver(struct vnic_dev *vdev, u8 feature,
- u64 *supported_versions);
+ u64 *supported_versions, u64 *a1);
+bool vnic_dev_capable_udp_rss(struct vnic_dev *vdev);
#endif /* _VNIC_DEV_H_ */
/* flags for CMD_OPEN */
#define CMD_OPENF_OPROM 0x1 /* open coming from option rom */
+#define CMD_OPENF_IG_DESCCACHE 0x2 /* Do not flush IG DESC cache */
/* flags for CMD_INIT */
#define CMD_INITF_DEFAULT_MAC 0x1 /* init with default mac addr */
#define OVERLAY_CFG_VXLAN_PORT_UPDATE 0
+#define ENIC_VXLAN_INNER_IPV6 BIT(0)
+#define ENIC_VXLAN_OUTER_IPV6 BIT(1)
+#define ENIC_VXLAN_MULTI_WQ BIT(2)
+
/* Use this enum to get the supported versions for each of these features
* If you need to use the devcmd_get_supported_feature_version(), add
* the new feature into this enum and install function handler in devcmd.c
#define NIC_CFG_RSS_HASH_TYPE_TCP_IPV6 (1 << 4)
#define NIC_CFG_RSS_HASH_TYPE_IPV6_EX (1 << 5)
#define NIC_CFG_RSS_HASH_TYPE_TCP_IPV6_EX (1 << 6)
+#define NIC_CFG_RSS_HASH_TYPE_UDP (1 << 7)
static inline void vnic_set_nic_cfg(u32 *nic_cfg,
u8 rss_default_cpu, u8 rss_hash_type,
#include "be.h"
#include "be_cmds.h"
-char *be_misconfig_evt_port_state[] = {
+const char * const be_misconfig_evt_port_state[] = {
"Physical Link is functional",
"Optics faulted/incorrectly installed/not installed - Reseat optics. If issue not resolved, replace.",
"Optics of two types installed – Remove one optic or install matching pair of optics.",
phy_state == BE_PHY_UNQUALIFIED || \
phy_state == BE_PHY_UNCERTIFIED)
-extern char *be_misconfig_evt_port_state[];
+extern const char * const be_misconfig_evt_port_state[];
/* async event indicating misconfigured port */
struct be_async_event_misconfig_port {
err);
}
+ if (!!(net_dev->flags & IFF_ALLMULTI) != priv->mac_dev->allmulti) {
+ priv->mac_dev->allmulti = !priv->mac_dev->allmulti;
+ err = priv->mac_dev->set_allmulti(priv->mac_dev->fman_mac,
+ priv->mac_dev->allmulti);
+ if (err < 0)
+ netif_err(priv, drv, net_dev,
+ "mac_dev->set_allmulti() = %d\n",
+ err);
+ }
+
err = priv->mac_dev->set_multi(net_dev, priv->mac_dev);
if (err < 0)
netif_err(priv, drv, net_dev, "mac_dev->set_multi() = %d\n",
goto csum_failed;
}
+ /* SGT[0] is used by the linear part */
sgt = (struct qm_sg_entry *)(sgt_buf + priv->tx_headroom);
- qm_sg_entry_set_len(&sgt[0], skb_headlen(skb));
+ frag_len = skb_headlen(skb);
+ qm_sg_entry_set_len(&sgt[0], frag_len);
sgt[0].bpid = FSL_DPAA_BPID_INV;
sgt[0].offset = 0;
addr = dma_map_single(dev, skb->data,
qm_sg_entry_set64(&sgt[0], addr);
/* populate the rest of SGT entries */
- frag = &skb_shinfo(skb)->frags[0];
- frag_len = frag->size;
- for (i = 1; i <= nr_frags; i++, frag++) {
+ for (i = 0; i < nr_frags; i++) {
+ frag = &skb_shinfo(skb)->frags[i];
+ frag_len = frag->size;
WARN_ON(!skb_frag_page(frag));
addr = skb_frag_dma_map(dev, frag, 0,
frag_len, dma_dir);
goto sg_map_failed;
}
- qm_sg_entry_set_len(&sgt[i], frag_len);
- sgt[i].bpid = FSL_DPAA_BPID_INV;
- sgt[i].offset = 0;
+ qm_sg_entry_set_len(&sgt[i + 1], frag_len);
+ sgt[i + 1].bpid = FSL_DPAA_BPID_INV;
+ sgt[i + 1].offset = 0;
/* keep the offset in the address */
- qm_sg_entry_set64(&sgt[i], addr);
- frag_len = frag->size;
+ qm_sg_entry_set64(&sgt[i + 1], addr);
}
- qm_sg_entry_set_f(&sgt[i - 1], frag_len);
+
+ /* Set the final bit in the last used entry of the SGT */
+ qm_sg_entry_set_f(&sgt[nr_frags], frag_len);
qm_fd_set_sg(fd, priv->tx_headroom, skb->len);
/* MAX_SKB_FRAGS is equal or larger than our dpaa_SGT_MAX_ENTRIES;
* make sure we don't feed FMan with more fragments than it supports.
*/
- if (nonlinear &&
- likely(skb_shinfo(skb)->nr_frags < DPAA_SGT_MAX_ENTRIES)) {
- /* Just create a S/G fd based on the skb */
- err = skb_to_sg_fd(priv, skb, &fd);
- percpu_priv->tx_frag_skbuffs++;
- } else {
+ if (unlikely(nonlinear &&
+ (skb_shinfo(skb)->nr_frags >= DPAA_SGT_MAX_ENTRIES))) {
/* If the egress skb contains more fragments than we support
* we have no choice but to linearize it ourselves.
*/
- if (unlikely(nonlinear) && __skb_linearize(skb))
+ if (__skb_linearize(skb))
goto enomem;
- /* Finally, create a contig FD from this skb */
+ nonlinear = skb_is_nonlinear(skb);
+ }
+
+ if (nonlinear) {
+ /* Just create a S/G fd based on the skb */
+ err = skb_to_sg_fd(priv, skb, &fd);
+ percpu_priv->tx_frag_skbuffs++;
+ } else {
+ /* Create a contig FD from this skb */
err = skb_to_contig_fd(priv, skb, &fd, &offset);
}
if (unlikely(err < 0))
if (dpaa_eth_napi_schedule(percpu_priv, portal))
return qman_cb_dqrr_stop;
- if (dpaa_eth_refill_bpools(priv))
- /* Unable to refill the buffer pool due to insufficient
- * system memory. Just release the frame back into the pool,
- * otherwise we'll soon end up with an empty buffer pool.
- */
- dpaa_fd_release(net_dev, &dq->fd);
- else
- dpaa_rx_error(net_dev, priv, percpu_priv, &dq->fd, fq->fqid);
+ dpaa_eth_refill_bpools(priv);
+ dpaa_rx_error(net_dev, priv, percpu_priv, &dq->fd, fq->fqid);
return qman_cb_dqrr_consume;
}
priv->channel = (u16)channel;
- /* Start a thread that will walk the CPUs with affine portals
+ /* Walk the CPUs with affine portals
* and add this pool channel to each's dequeue mask.
*/
dpaa_eth_add_channel(priv->channel);
if (epause->rx_pause)
newadv = ADVERTISED_Pause | ADVERTISED_Asym_Pause;
if (epause->tx_pause)
- newadv |= ADVERTISED_Asym_Pause;
+ newadv ^= ADVERTISED_Asym_Pause;
oldadv = phydev->advertising &
(ADVERTISED_Pause | ADVERTISED_Asym_Pause);
return 0;
}
+int dtsec_set_allmulti(struct fman_mac *dtsec, bool enable)
+{
+ u32 tmp;
+ struct dtsec_regs __iomem *regs = dtsec->regs;
+
+ if (!is_init_done(dtsec->dtsec_drv_param))
+ return -EINVAL;
+
+ tmp = ioread32be(®s->rctrl);
+ if (enable)
+ tmp |= RCTRL_MPROM;
+ else
+ tmp &= ~RCTRL_MPROM;
+
+ iowrite32be(tmp, ®s->rctrl);
+
+ return 0;
+}
+
int dtsec_del_hash_mac_address(struct fman_mac *dtsec, enet_addr_t *eth_addr)
{
struct dtsec_regs __iomem *regs = dtsec->regs;
int dtsec_add_hash_mac_address(struct fman_mac *dtsec, enet_addr_t *eth_addr);
int dtsec_del_hash_mac_address(struct fman_mac *dtsec, enet_addr_t *eth_addr);
int dtsec_get_version(struct fman_mac *dtsec, u32 *mac_version);
+int dtsec_set_allmulti(struct fman_mac *dtsec, bool enable);
#endif /* __DTSEC_H */
struct fman_rev_info fm_rev_info;
bool basex_if;
struct phy_device *pcsphy;
+ bool allmulti_enabled;
};
static void add_addr_in_paddr(struct memac_regs __iomem *regs, u8 *adr,
return 0;
}
+int memac_set_allmulti(struct fman_mac *memac, bool enable)
+{
+ u32 entry;
+ struct memac_regs __iomem *regs = memac->regs;
+
+ if (!is_init_done(memac->memac_drv_param))
+ return -EINVAL;
+
+ if (enable) {
+ for (entry = 0; entry < HASH_TABLE_SIZE; entry++)
+ iowrite32be(entry | HASH_CTRL_MCAST_EN,
+ ®s->hashtable_ctrl);
+ } else {
+ for (entry = 0; entry < HASH_TABLE_SIZE; entry++)
+ iowrite32be(entry & ~HASH_CTRL_MCAST_EN,
+ ®s->hashtable_ctrl);
+ }
+
+ memac->allmulti_enabled = enable;
+
+ return 0;
+}
+
int memac_del_hash_mac_address(struct fman_mac *memac, enet_addr_t *eth_addr)
{
struct memac_regs __iomem *regs = memac->regs;
break;
}
}
- if (list_empty(&memac->multicast_addr_hash->lsts[hash]))
- iowrite32be(hash & ~HASH_CTRL_MCAST_EN, ®s->hashtable_ctrl);
+
+ if (!memac->allmulti_enabled) {
+ if (list_empty(&memac->multicast_addr_hash->lsts[hash]))
+ iowrite32be(hash & ~HASH_CTRL_MCAST_EN,
+ ®s->hashtable_ctrl);
+ }
return 0;
}
enum fman_mac_exceptions exception, bool enable);
int memac_add_hash_mac_address(struct fman_mac *memac, enet_addr_t *eth_addr);
int memac_del_hash_mac_address(struct fman_mac *memac, enet_addr_t *eth_addr);
+int memac_set_allmulti(struct fman_mac *memac, bool enable);
#endif /* __MEMAC_H */
struct tgec_cfg *cfg;
void *fm;
struct fman_rev_info fm_rev_info;
+ bool allmulti_enabled;
};
static void set_mac_address(struct tgec_regs __iomem *regs, u8 *adr)
return 0;
}
+int tgec_set_allmulti(struct fman_mac *tgec, bool enable)
+{
+ u32 entry;
+ struct tgec_regs __iomem *regs = tgec->regs;
+
+ if (!is_init_done(tgec->cfg))
+ return -EINVAL;
+
+ if (enable) {
+ for (entry = 0; entry < TGEC_HASH_TABLE_SIZE; entry++)
+ iowrite32be(entry | TGEC_HASH_MCAST_EN,
+ ®s->hashtable_ctrl);
+ } else {
+ for (entry = 0; entry < TGEC_HASH_TABLE_SIZE; entry++)
+ iowrite32be(entry & ~TGEC_HASH_MCAST_EN,
+ ®s->hashtable_ctrl);
+ }
+
+ tgec->allmulti_enabled = enable;
+
+ return 0;
+}
+
int tgec_del_hash_mac_address(struct fman_mac *tgec, enet_addr_t *eth_addr)
{
struct tgec_regs __iomem *regs = tgec->regs;
break;
}
}
- if (list_empty(&tgec->multicast_addr_hash->lsts[hash]))
- iowrite32be((hash & ~TGEC_HASH_MCAST_EN),
- ®s->hashtable_ctrl);
+
+ if (!tgec->allmulti_enabled) {
+ if (list_empty(&tgec->multicast_addr_hash->lsts[hash]))
+ iowrite32be((hash & ~TGEC_HASH_MCAST_EN),
+ ®s->hashtable_ctrl);
+ }
return 0;
}
int tgec_add_hash_mac_address(struct fman_mac *tgec, enet_addr_t *eth_addr);
int tgec_del_hash_mac_address(struct fman_mac *tgec, enet_addr_t *eth_addr);
int tgec_get_version(struct fman_mac *tgec, u32 *mac_version);
+int tgec_set_allmulti(struct fman_mac *tgec, bool enable);
#endif /* __TGEC_H */
mac_dev->set_tx_pause = dtsec_set_tx_pause_frames;
mac_dev->set_rx_pause = dtsec_accept_rx_pause_frames;
mac_dev->set_exception = dtsec_set_exception;
+ mac_dev->set_allmulti = dtsec_set_allmulti;
mac_dev->set_multi = set_multi;
mac_dev->start = start;
mac_dev->stop = stop;
mac_dev->set_tx_pause = tgec_set_tx_pause_frames;
mac_dev->set_rx_pause = tgec_accept_rx_pause_frames;
mac_dev->set_exception = tgec_set_exception;
+ mac_dev->set_allmulti = tgec_set_allmulti;
mac_dev->set_multi = set_multi;
mac_dev->start = start;
mac_dev->stop = stop;
mac_dev->set_tx_pause = memac_set_tx_pause_frames;
mac_dev->set_rx_pause = memac_accept_rx_pause_frames;
mac_dev->set_exception = memac_set_exception;
+ mac_dev->set_allmulti = memac_set_allmulti;
mac_dev->set_multi = set_multi;
mac_dev->start = start;
mac_dev->stop = stop;
bool rx_pause_active;
bool tx_pause_active;
bool promisc;
+ bool allmulti;
int (*init)(struct mac_device *mac_dev);
int (*start)(struct mac_device *mac_dev);
void (*adjust_link)(struct mac_device *mac_dev);
int (*set_promisc)(struct fman_mac *mac_dev, bool enable);
int (*change_addr)(struct fman_mac *mac_dev, enet_addr_t *enet_addr);
+ int (*set_allmulti)(struct fman_mac *mac_dev, bool enable);
int (*set_multi)(struct net_device *net_dev,
struct mac_device *mac_dev);
int (*set_rx_pause)(struct fman_mac *mac_dev, bool en);
{
int size = lstatus & BD_LENGTH_MASK;
struct page *page = rxb->page;
- bool last = !!(lstatus & BD_LFLAG(RXBD_LAST));
-
- /* Remove the FCS from the packet length */
- if (last)
- size -= ETH_FCS_LEN;
if (likely(first)) {
skb_put(skb, size);
} else {
/* the last fragments' length contains the full frame length */
- if (last)
+ if (lstatus & BD_LFLAG(RXBD_LAST))
size -= skb->len;
- /* Add the last fragment if it contains something other than
- * the FCS, otherwise drop it and trim off any part of the FCS
- * that was already received.
- */
- if (size > 0)
- skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page,
- rxb->page_offset + RXBUF_ALIGNMENT,
- size, GFAR_RXB_TRUESIZE);
- else if (size < 0)
- pskb_trim(skb, skb->len + size);
+ skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page,
+ rxb->page_offset + RXBUF_ALIGNMENT,
+ size, GFAR_RXB_TRUESIZE);
}
/* try reuse page */
if (priv->padding)
skb_pull(skb, priv->padding);
+ /* Trim off the FCS */
+ pskb_trim(skb, skb->len - ETH_FCS_LEN);
+
if (ndev->features & NETIF_F_RXCSUM)
gfar_rx_checksum(skb, fcb);
static int ibmvnic_version = IBMVNIC_INITIAL_VERSION;
static int ibmvnic_remove(struct vio_dev *);
-static void release_sub_crqs(struct ibmvnic_adapter *);
+static void release_sub_crqs(struct ibmvnic_adapter *, bool);
static int ibmvnic_reset_crq(struct ibmvnic_adapter *);
static int ibmvnic_send_crq_init(struct ibmvnic_adapter *);
static int ibmvnic_reenable_crq_queue(struct ibmvnic_adapter *);
static void send_map_query(struct ibmvnic_adapter *adapter);
static void send_request_map(struct ibmvnic_adapter *, dma_addr_t, __be32, u8);
static void send_request_unmap(struct ibmvnic_adapter *, u8);
-static void send_login(struct ibmvnic_adapter *adapter);
+static int send_login(struct ibmvnic_adapter *adapter);
static void send_cap_queries(struct ibmvnic_adapter *adapter);
static int init_sub_crqs(struct ibmvnic_adapter *);
static int init_sub_crq_irqs(struct ibmvnic_adapter *adapter);
static int init_stats_buffers(struct ibmvnic_adapter *adapter)
{
adapter->tx_stats_buffers =
- kcalloc(adapter->req_tx_queues,
+ kcalloc(IBMVNIC_MAX_QUEUES,
sizeof(struct ibmvnic_tx_queue_stats),
GFP_KERNEL);
if (!adapter->tx_stats_buffers)
return -ENOMEM;
adapter->rx_stats_buffers =
- kcalloc(adapter->req_rx_queues,
+ kcalloc(IBMVNIC_MAX_QUEUES,
sizeof(struct ibmvnic_rx_queue_stats),
GFP_KERNEL);
if (!adapter->rx_stats_buffers)
return -1;
}
- adapter->num_active_rx_pools = 0;
+ adapter->num_active_rx_pools = rxadd_subcrqs;
for (i = 0; i < rxadd_subcrqs; i++) {
rx_pool = &adapter->rx_pool[i];
rx_pool->next_free = 0;
}
- adapter->num_active_rx_pools = rxadd_subcrqs;
-
return 0;
}
if (!adapter->tx_pool)
return -1;
- adapter->num_active_tx_pools = 0;
+ adapter->num_active_tx_pools = tx_subcrqs;
for (i = 0; i < tx_subcrqs; i++) {
tx_pool = &adapter->tx_pool[i];
tx_pool->producer_index = 0;
}
- adapter->num_active_tx_pools = tx_subcrqs;
-
return 0;
}
adapter->napi_enabled = false;
}
+static int init_napi(struct ibmvnic_adapter *adapter)
+{
+ int i;
+
+ adapter->napi = kcalloc(adapter->req_rx_queues,
+ sizeof(struct napi_struct), GFP_KERNEL);
+ if (!adapter->napi)
+ return -ENOMEM;
+
+ for (i = 0; i < adapter->req_rx_queues; i++) {
+ netdev_dbg(adapter->netdev, "Adding napi[%d]\n", i);
+ netif_napi_add(adapter->netdev, &adapter->napi[i],
+ ibmvnic_poll, NAPI_POLL_WEIGHT);
+ }
+
+ adapter->num_active_rx_napi = adapter->req_rx_queues;
+ return 0;
+}
+
+static void release_napi(struct ibmvnic_adapter *adapter)
+{
+ int i;
+
+ if (!adapter->napi)
+ return;
+
+ for (i = 0; i < adapter->num_active_rx_napi; i++) {
+ if (&adapter->napi[i]) {
+ netdev_dbg(adapter->netdev,
+ "Releasing napi[%d]\n", i);
+ netif_napi_del(&adapter->napi[i]);
+ }
+ }
+
+ kfree(adapter->napi);
+ adapter->napi = NULL;
+ adapter->num_active_rx_napi = 0;
+}
+
static int ibmvnic_login(struct net_device *netdev)
{
struct ibmvnic_adapter *adapter = netdev_priv(netdev);
do {
if (adapter->renegotiate) {
adapter->renegotiate = false;
- release_sub_crqs(adapter);
+ release_sub_crqs(adapter, 1);
reinit_completion(&adapter->init_done);
send_cap_queries(adapter);
}
reinit_completion(&adapter->init_done);
- send_login(adapter);
- if (!wait_for_completion_timeout(&adapter->init_done,
+ rc = send_login(adapter);
+ if (rc) {
+ dev_err(dev, "Unable to attempt device login\n");
+ return rc;
+ } else if (!wait_for_completion_timeout(&adapter->init_done,
timeout)) {
dev_err(dev, "Login timeout\n");
return -1;
return 0;
}
-static void release_resources(struct ibmvnic_adapter *adapter)
+static void release_login_buffer(struct ibmvnic_adapter *adapter)
{
- int i;
+ kfree(adapter->login_buf);
+ adapter->login_buf = NULL;
+}
+static void release_login_rsp_buffer(struct ibmvnic_adapter *adapter)
+{
+ kfree(adapter->login_rsp_buf);
+ adapter->login_rsp_buf = NULL;
+}
+
+static void release_resources(struct ibmvnic_adapter *adapter)
+{
release_vpd_data(adapter);
release_tx_pools(adapter);
release_rx_pools(adapter);
- release_stats_token(adapter);
- release_stats_buffers(adapter);
release_error_buffers(adapter);
-
- if (adapter->napi) {
- for (i = 0; i < adapter->req_rx_queues; i++) {
- if (&adapter->napi[i]) {
- netdev_dbg(adapter->netdev,
- "Releasing napi[%d]\n", i);
- netif_napi_del(&adapter->napi[i]);
- }
- }
- }
+ release_napi(adapter);
+ release_login_rsp_buffer(adapter);
}
static int set_link_state(struct ibmvnic_adapter *adapter, u8 link_state)
static int init_resources(struct ibmvnic_adapter *adapter)
{
struct net_device *netdev = adapter->netdev;
- int i, rc;
+ int rc;
rc = set_real_num_queues(netdev);
if (rc)
return rc;
- rc = init_stats_buffers(adapter);
- if (rc)
- return rc;
-
- rc = init_stats_token(adapter);
- if (rc)
- return rc;
-
adapter->vpd = kzalloc(sizeof(*adapter->vpd), GFP_KERNEL);
if (!adapter->vpd)
return -ENOMEM;
}
adapter->map_id = 1;
- adapter->napi = kcalloc(adapter->req_rx_queues,
- sizeof(struct napi_struct), GFP_KERNEL);
- if (!adapter->napi)
- return -ENOMEM;
- for (i = 0; i < adapter->req_rx_queues; i++) {
- netdev_dbg(netdev, "Adding napi[%d]\n", i);
- netif_napi_add(netdev, &adapter->napi[i], ibmvnic_poll,
- NAPI_POLL_WEIGHT);
- }
+ rc = init_napi(adapter);
+ if (rc)
+ return rc;
send_map_query(adapter);
return rc;
}
+static void clean_rx_pools(struct ibmvnic_adapter *adapter)
+{
+ struct ibmvnic_rx_pool *rx_pool;
+ struct ibmvnic_rx_buff *rx_buff;
+ u64 rx_entries;
+ int rx_scrqs;
+ int i, j;
+
+ if (!adapter->rx_pool)
+ return;
+
+ rx_scrqs = be32_to_cpu(adapter->login_rsp_buf->num_rxadd_subcrqs);
+ rx_entries = adapter->req_rx_add_entries_per_subcrq;
+
+ /* Free any remaining skbs in the rx buffer pools */
+ for (i = 0; i < rx_scrqs; i++) {
+ rx_pool = &adapter->rx_pool[i];
+ if (!rx_pool || !rx_pool->rx_buff)
+ continue;
+
+ netdev_dbg(adapter->netdev, "Cleaning rx_pool[%d]\n", i);
+ for (j = 0; j < rx_entries; j++) {
+ rx_buff = &rx_pool->rx_buff[j];
+ if (rx_buff && rx_buff->skb) {
+ dev_kfree_skb_any(rx_buff->skb);
+ rx_buff->skb = NULL;
+ }
+ }
+ }
+}
+
static void clean_tx_pools(struct ibmvnic_adapter *adapter)
{
struct ibmvnic_tx_pool *tx_pool;
+ struct ibmvnic_tx_buff *tx_buff;
u64 tx_entries;
int tx_scrqs;
int i, j;
/* Free any remaining skbs in the tx buffer pools */
for (i = 0; i < tx_scrqs; i++) {
tx_pool = &adapter->tx_pool[i];
- if (!tx_pool)
+ if (!tx_pool && !tx_pool->tx_buff)
continue;
netdev_dbg(adapter->netdev, "Cleaning tx_pool[%d]\n", i);
for (j = 0; j < tx_entries; j++) {
- if (tx_pool->tx_buff[j].skb) {
- dev_kfree_skb_any(tx_pool->tx_buff[j].skb);
- tx_pool->tx_buff[j].skb = NULL;
+ tx_buff = &tx_pool->tx_buff[j];
+ if (tx_buff && tx_buff->skb) {
+ dev_kfree_skb_any(tx_buff->skb);
+ tx_buff->skb = NULL;
}
}
}
}
}
}
-
+ clean_rx_pools(adapter);
clean_tx_pools(adapter);
adapter->state = VNIC_CLOSED;
return rc;
if ((*hdrs >> 7) & 1) {
build_hdr_descs_arr(tx_buff, &num_entries, *hdrs);
tx_crq.v1.n_crq_elem = num_entries;
+ tx_buff->num_entries = num_entries;
tx_buff->indir_arr[0] = tx_crq;
tx_buff->indir_dma = dma_map_single(dev, tx_buff->indir_arr,
sizeof(tx_buff->indir_arr),
(u64)tx_buff->indir_dma,
(u64)num_entries);
} else {
+ tx_buff->num_entries = num_entries;
lpar_rc = send_subcrq(adapter, handle_array[queue_num],
&tx_crq);
}
goto out;
}
- if (atomic_inc_return(&tx_scrq->used)
+ if (atomic_add_return(num_entries, &tx_scrq->used)
>= adapter->req_tx_entries_per_subcrq) {
- netdev_info(netdev, "Stopping queue %d\n", queue_num);
+ netdev_dbg(netdev, "Stopping queue %d\n", queue_num);
netif_stop_subqueue(netdev, queue_num);
}
if (adapter->reset_reason == VNIC_RESET_CHANGE_PARAM ||
adapter->wait_for_reset) {
release_resources(adapter);
- release_sub_crqs(adapter);
+ release_sub_crqs(adapter, 1);
release_crq_queue(adapter);
}
release_tx_pools(adapter);
init_rx_pools(netdev);
init_tx_pools(netdev);
+
+ release_napi(adapter);
+ init_napi(adapter);
} else {
rc = reset_tx_pools(adapter);
if (rc)
return 0;
}
- netif_carrier_on(netdev);
-
/* kick napi */
for (i = 0; i < adapter->req_rx_queues; i++)
napi_schedule(&adapter->napi[i]);
if (adapter->reset_reason != VNIC_RESET_FAILOVER)
netdev_notify_peers(netdev);
+ netif_carrier_on(netdev);
+
return 0;
}
be16_to_cpu(next->rx_comp.rc));
/* free the entry */
next->rx_comp.first = 0;
+ dev_kfree_skb_any(rx_buff->skb);
+ remove_buff_from_pool(adapter, rx_buff);
+ continue;
+ } else if (!rx_buff->skb) {
+ /* free the entry */
+ next->rx_comp.first = 0;
remove_buff_from_pool(adapter, rx_buff);
continue;
}
}
static void release_sub_crq_queue(struct ibmvnic_adapter *adapter,
- struct ibmvnic_sub_crq_queue *scrq)
+ struct ibmvnic_sub_crq_queue *scrq,
+ bool do_h_free)
{
struct device *dev = &adapter->vdev->dev;
long rc;
netdev_dbg(adapter->netdev, "Releasing sub-CRQ\n");
- /* Close the sub-crqs */
- do {
- rc = plpar_hcall_norets(H_FREE_SUB_CRQ,
- adapter->vdev->unit_address,
- scrq->crq_num);
- } while (rc == H_BUSY || H_IS_LONG_BUSY(rc));
+ if (do_h_free) {
+ /* Close the sub-crqs */
+ do {
+ rc = plpar_hcall_norets(H_FREE_SUB_CRQ,
+ adapter->vdev->unit_address,
+ scrq->crq_num);
+ } while (rc == H_BUSY || H_IS_LONG_BUSY(rc));
- if (rc) {
- netdev_err(adapter->netdev,
- "Failed to release sub-CRQ %16lx, rc = %ld\n",
- scrq->crq_num, rc);
+ if (rc) {
+ netdev_err(adapter->netdev,
+ "Failed to release sub-CRQ %16lx, rc = %ld\n",
+ scrq->crq_num, rc);
+ }
}
dma_unmap_single(dev, scrq->msg_token, 4 * PAGE_SIZE,
return NULL;
}
-static void release_sub_crqs(struct ibmvnic_adapter *adapter)
+static void release_sub_crqs(struct ibmvnic_adapter *adapter, bool do_h_free)
{
int i;
if (adapter->tx_scrq) {
- for (i = 0; i < adapter->req_tx_queues; i++) {
+ for (i = 0; i < adapter->num_active_tx_scrqs; i++) {
if (!adapter->tx_scrq[i])
continue;
adapter->tx_scrq[i]->irq = 0;
}
- release_sub_crq_queue(adapter, adapter->tx_scrq[i]);
+ release_sub_crq_queue(adapter, adapter->tx_scrq[i],
+ do_h_free);
}
kfree(adapter->tx_scrq);
adapter->tx_scrq = NULL;
+ adapter->num_active_tx_scrqs = 0;
}
if (adapter->rx_scrq) {
- for (i = 0; i < adapter->req_rx_queues; i++) {
+ for (i = 0; i < adapter->num_active_rx_scrqs; i++) {
if (!adapter->rx_scrq[i])
continue;
adapter->rx_scrq[i]->irq = 0;
}
- release_sub_crq_queue(adapter, adapter->rx_scrq[i]);
+ release_sub_crq_queue(adapter, adapter->rx_scrq[i],
+ do_h_free);
}
kfree(adapter->rx_scrq);
adapter->rx_scrq = NULL;
+ adapter->num_active_rx_scrqs = 0;
}
}
restart_loop:
while (pending_scrq(adapter, scrq)) {
unsigned int pool = scrq->pool_index;
+ int num_entries = 0;
next = ibmvnic_next_scrq(adapter, scrq);
for (i = 0; i < next->tx_comp.num_comps; i++) {
txbuff->skb = NULL;
}
+ num_entries += txbuff->num_entries;
+
adapter->tx_pool[pool].free_map[adapter->tx_pool[pool].
producer_index] = index;
adapter->tx_pool[pool].producer_index =
/* remove tx_comp scrq*/
next->tx_comp.first = 0;
- if (atomic_sub_return(next->tx_comp.num_comps, &scrq->used) <=
+ if (atomic_sub_return(num_entries, &scrq->used) <=
(adapter->req_tx_entries_per_subcrq / 2) &&
__netif_subqueue_stopped(adapter->netdev,
scrq->pool_index)) {
netif_wake_subqueue(adapter->netdev, scrq->pool_index);
- netdev_info(adapter->netdev, "Started queue %d\n",
- scrq->pool_index);
+ netdev_dbg(adapter->netdev, "Started queue %d\n",
+ scrq->pool_index);
}
}
dev_err(dev, "Couldn't register tx irq 0x%x. rc=%d\n",
scrq->irq, rc);
irq_dispose_mapping(scrq->irq);
- goto req_rx_irq_failed;
+ goto req_tx_irq_failed;
}
}
free_irq(adapter->tx_scrq[j]->irq, adapter->tx_scrq[j]);
irq_dispose_mapping(adapter->rx_scrq[j]->irq);
}
- release_sub_crqs(adapter);
+ release_sub_crqs(adapter, 1);
return rc;
}
for (i = 0; i < adapter->req_tx_queues; i++) {
adapter->tx_scrq[i] = allqueues[i];
adapter->tx_scrq[i]->pool_index = i;
+ adapter->num_active_tx_scrqs++;
}
adapter->rx_scrq = kcalloc(adapter->req_rx_queues,
for (i = 0; i < adapter->req_rx_queues; i++) {
adapter->rx_scrq[i] = allqueues[i + adapter->req_tx_queues];
adapter->rx_scrq[i]->scrq_num = i;
+ adapter->num_active_rx_scrqs++;
}
kfree(allqueues);
adapter->tx_scrq = NULL;
tx_failed:
for (i = 0; i < registered_queues; i++)
- release_sub_crq_queue(adapter, allqueues[i]);
+ release_sub_crq_queue(adapter, allqueues[i], 1);
kfree(allqueues);
return -1;
}
strncpy(&vlcd->name, adapter->netdev->name, len);
}
-static void send_login(struct ibmvnic_adapter *adapter)
+static int send_login(struct ibmvnic_adapter *adapter)
{
struct ibmvnic_login_rsp_buffer *login_rsp_buffer;
struct ibmvnic_login_buffer *login_buffer;
struct vnic_login_client_data *vlcd;
int i;
+ if (!adapter->tx_scrq || !adapter->rx_scrq) {
+ netdev_err(adapter->netdev,
+ "RX or TX queues are not allocated, device login failed\n");
+ return -1;
+ }
+
+ release_login_rsp_buffer(adapter);
client_data_len = vnic_client_data_len(adapter);
buffer_size =
crq.login.len = cpu_to_be32(buffer_size);
ibmvnic_send_crq(adapter, &crq);
- return;
+ return 0;
buf_rsp_map_failed:
kfree(login_rsp_buffer);
buf_map_failed:
kfree(login_buffer);
buf_alloc_failed:
- return;
+ return -1;
}
static void send_request_map(struct ibmvnic_adapter *adapter, dma_addr_t addr,
ibmvnic_remove(adapter->vdev);
return -EIO;
}
+ release_login_buffer(adapter);
complete(&adapter->init_done);
return 0;
{
struct device *dev = &adapter->vdev->dev;
unsigned long timeout = msecs_to_jiffies(30000);
+ u64 old_num_rx_queues, old_num_tx_queues;
int rc;
if (adapter->resetting && !adapter->wait_for_reset) {
adapter->from_passive_init = false;
+ old_num_rx_queues = adapter->req_rx_queues;
+ old_num_tx_queues = adapter->req_tx_queues;
+
init_completion(&adapter->init_done);
adapter->init_done_rc = 0;
ibmvnic_send_crq_init(adapter);
return -1;
}
- if (adapter->resetting && !adapter->wait_for_reset)
- rc = reset_sub_crq_queues(adapter);
- else
+ if (adapter->resetting && !adapter->wait_for_reset) {
+ if (adapter->req_rx_queues != old_num_rx_queues ||
+ adapter->req_tx_queues != old_num_tx_queues) {
+ release_sub_crqs(adapter, 0);
+ rc = init_sub_crqs(adapter);
+ } else {
+ rc = reset_sub_crq_queues(adapter);
+ }
+ } else {
rc = init_sub_crqs(adapter);
+ }
+
if (rc) {
dev_err(dev, "Initialization of sub crqs failed\n");
release_crq_queue(adapter);
release_crq_queue(adapter);
}
+ rc = init_stats_buffers(adapter);
+ if (rc)
+ return rc;
+
+ rc = init_stats_token(adapter);
+ if (rc)
+ return rc;
+
return rc;
}
device_remove_file(&dev->dev, &dev_attr_failover);
ibmvnic_init_fail:
- release_sub_crqs(adapter);
+ release_sub_crqs(adapter, 1);
release_crq_queue(adapter);
free_netdev(netdev);
mutex_lock(&adapter->reset_lock);
release_resources(adapter);
- release_sub_crqs(adapter);
+ release_sub_crqs(adapter, 1);
release_crq_queue(adapter);
+ release_stats_token(adapter);
+ release_stats_buffers(adapter);
+
adapter->state = VNIC_REMOVED;
mutex_unlock(&adapter->reset_lock);
union sub_crq indir_arr[6];
u8 hdr_data[140];
dma_addr_t indir_dma;
+ int num_entries;
};
struct ibmvnic_tx_pool {
u64 opt_rxba_entries_per_subcrq;
__be64 tx_rx_desc_req;
u8 map_id;
- u64 num_active_rx_pools;
- u64 num_active_tx_pools;
+ u32 num_active_rx_scrqs;
+ u32 num_active_rx_pools;
+ u32 num_active_rx_napi;
+ u32 num_active_tx_scrqs;
+ u32 num_active_tx_pools;
struct tasklet_struct tasklet;
enum vnic_state state;
/* Intel(R) Ethernet Switch Host Interface Driver
- * Copyright(c) 2013 - 2017 Intel Corporation.
+ * Copyright(c) 2013 - 2018 Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* fm10k_read_hw_stats_32b - Reads value of 32-bit registers
* @hw: pointer to the hardware structure
* @addr: address of register containing a 32-bit value
+ * @stat: pointer to structure holding hw stat information
*
* Function reads the content of the register and returns the delta
* between the base and the current value.
* fm10k_read_hw_stats_48b - Reads value of 48-bit registers
* @hw: pointer to the hardware structure
* @addr: address of register containing the lower 32-bit value
+ * @stat: pointer to structure holding hw stat information
*
* Function reads the content of 2 registers, combined to represent a 48-bit
* statistical value. Extra processing is required to handle overflowing.
/**
* fm10k_unbind_hw_stats_q - Unbind the queue counters from their queues
- * @hw: pointer to the hardware structure
* @q: pointer to the ring of hardware statistics queue
* @idx: index pointing to the start of the ring iteration
* @count: number of queues to iterate over
#include "fm10k.h"
-#define DRV_VERSION "0.22.1-k"
+#define DRV_VERSION "0.23.4-k"
#define DRV_SUMMARY "Intel(R) Ethernet Switch Host Interface Driver"
const char fm10k_driver_version[] = DRV_VERSION;
char fm10k_driver_name[] = "fm10k";
static const char fm10k_driver_string[] = DRV_SUMMARY;
static const char fm10k_copyright[] =
- "Copyright(c) 2013 - 2017 Intel Corporation.";
+ "Copyright(c) 2013 - 2018 Intel Corporation.";
MODULE_AUTHOR("Intel Corporation, <linux.nics@intel.com>");
MODULE_DESCRIPTION(DRV_SUMMARY);
/* Intel(R) Ethernet Switch Host Interface Driver
- * Copyright(c) 2013 - 2017 Intel Corporation.
+ * Copyright(c) 2013 - 2018 Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
/**
* fm10k_udp_tunnel_add
- * @netdev: network interface device structure
+ * @dev: network interface device structure
* @ti: Tunnel endpoint information
*
* This function is called when a new UDP tunnel port has been added.
/**
* fm10k_udp_tunnel_del
- * @netdev: network interface device structure
- * @ti: Tunnel endpoint information
+ * @dev: network interface device structure
+ * @ti: Tunnel end point information
*
* This function is called when a new UDP tunnel port is deleted. The freed
* port will be removed from the list, then we reprogram the offloaded port
* @glort: the target glort for this update
* @addr: the address to update
* @vid: the vid to update
- * @sync: whether to add or remove
+ * @set: whether to add or remove
*
* This function queues up a MAC request for sending to the switch manager.
* A separate thread monitors the queue and sends updates to the switch
/* Intel(R) Ethernet Switch Host Interface Driver
- * Copyright(c) 2013 - 2017 Intel Corporation.
+ * Copyright(c) 2013 - 2018 Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
[fm10k_device_vf] = &fm10k_vf_info,
};
-/**
+/*
* fm10k_pci_tbl - PCI Device ID Table
*
* Wildcard entries (PCI_ANY_ID) should come last
/**
* fm10k_service_timer - Timer Call-back
- * @data: pointer to interface cast into an unsigned long
+ * @t: pointer to timer data
**/
static void fm10k_service_timer(struct timer_list *t)
{
/**
* fm10k_watchdog_flush_tx - flush queues on host not ready
- * @interface - pointer to the device interface structure
+ * @interface: pointer to the device interface structure
**/
static void fm10k_watchdog_flush_tx(struct fm10k_intfc *interface)
{
/**
* fm10k_watchdog_subtask - check and bring link up
- * @interface - pointer to the device interface structure
+ * @interface: pointer to the device interface structure
**/
static void fm10k_watchdog_subtask(struct fm10k_intfc *interface)
{
/**
* fm10k_check_hang_subtask - check for hung queues and dropped interrupts
- * @interface - pointer to the device interface structure
+ * @interface: pointer to the device interface structure
*
* This function serves two purposes. First it strobes the interrupt lines
* in order to make certain interrupts are occurring. Secondly it sets the
/**
* fm10k_sw_init - Initialize general software structures
* @interface: host interface private structure to initialize
+ * @ent: PCI device ID entry
*
* fm10k_sw_init initializes the interface private data structure.
* Fields are initialized based on PCI device information and
/* Intel(R) Ethernet Switch Host Interface Driver
- * Copyright(c) 2013 - 2017 Intel Corporation.
+ * Copyright(c) 2013 - 2018 Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
/**
* fm10k_iov_select_vid - Select correct default VLAN ID
- * @hw: Pointer to hardware structure
+ * @vf_info: pointer to VF information structure
* @vid: VLAN ID to correct
*
* Will report an error if the VLAN ID is out of range. For VID = 0, it will
/* Intel(R) Ethernet Switch Host Interface Driver
- * Copyright(c) 2013 - 2016 Intel Corporation.
+ * Copyright(c) 2013 - 2018 Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* @msg: Pointer to message block
* @attr_id: Attribute ID
* @mac_addr: MAC address to be stored
+ * @vlan: VLAN to be stored
*
* This function will reorder a MAC address to be CPU endian and store it
* in the attribute buffer. It will return success if provided with a
/**
* fm10k_tlv_attr_get_mac_vlan - Get MAC/VLAN stored in attribute
* @attr: Pointer to attribute
- * @attr_id: Attribute ID
* @mac_addr: location of buffer to store MAC address
+ * @vlan: location of buffer to store VLAN
*
* This function pulls the MAC address back out of the attribute and will
* place it in the array pointed by by mac_addr. It will return success
* @hw: Pointer to hardware structure
* @msg: Pointer to message
* @mbx: Pointer to mailbox information structure
- * @func: Function array containing list of message handling functions
+ * @data: Pointer to message handler data structure
*
* This function should be the first function called upon receiving a
* message. The handler will identify the message type and call the correct
#define I40E_HW_STOP_FW_LLDP BIT(16)
#define I40E_HW_PORT_ID_VALID BIT(17)
#define I40E_HW_RESTART_AUTONEG BIT(18)
+#define I40E_HW_STOPPABLE_FW_LLDP BIT(19)
u64 flags;
#define I40E_FLAG_RX_CSUM_ENABLED BIT_ULL(0)
struct i40e_ring_container rx;
struct i40e_ring_container tx;
+ u8 itr_countdown; /* when 0 should adjust adaptive ITR */
u8 num_ringpairs; /* total number of ring pairs in vector */
cpumask_t affinity_mask;
struct rcu_head rcu; /* to avoid race with update stats on free */
char name[I40E_INT_NAME_STR_LEN];
bool arm_wb_state;
-#define ITR_COUNTDOWN_START 100
- u8 itr_countdown; /* when 0 should adjust ITR */
} ____cacheline_internodealigned_in_smp;
/* lan device */
int i40e_create_queue_channel(struct i40e_vsi *vsi, struct i40e_channel *ch);
int i40e_set_bw_limit(struct i40e_vsi *vsi, u16 seid, u64 max_tx_rate);
+int i40e_add_del_cloud_filter(struct i40e_vsi *vsi,
+ struct i40e_cloud_filter *filter,
+ bool add);
+int i40e_add_del_cloud_filter_big_buf(struct i40e_vsi *vsi,
+ struct i40e_cloud_filter *filter,
+ bool add);
#endif /* _I40E_H_ */
I40E_PHY_TYPE_DEFAULT = 0xFF,
};
+#define I40E_PHY_TYPES_BITMASK (BIT_ULL(I40E_PHY_TYPE_SGMII) | \
+ BIT_ULL(I40E_PHY_TYPE_1000BASE_KX) | \
+ BIT_ULL(I40E_PHY_TYPE_10GBASE_KX4) | \
+ BIT_ULL(I40E_PHY_TYPE_10GBASE_KR) | \
+ BIT_ULL(I40E_PHY_TYPE_40GBASE_KR4) | \
+ BIT_ULL(I40E_PHY_TYPE_XAUI) | \
+ BIT_ULL(I40E_PHY_TYPE_XFI) | \
+ BIT_ULL(I40E_PHY_TYPE_SFI) | \
+ BIT_ULL(I40E_PHY_TYPE_XLAUI) | \
+ BIT_ULL(I40E_PHY_TYPE_XLPPI) | \
+ BIT_ULL(I40E_PHY_TYPE_40GBASE_CR4_CU) | \
+ BIT_ULL(I40E_PHY_TYPE_10GBASE_CR1_CU) | \
+ BIT_ULL(I40E_PHY_TYPE_10GBASE_AOC) | \
+ BIT_ULL(I40E_PHY_TYPE_40GBASE_AOC) | \
+ BIT_ULL(I40E_PHY_TYPE_UNRECOGNIZED) | \
+ BIT_ULL(I40E_PHY_TYPE_UNSUPPORTED) | \
+ BIT_ULL(I40E_PHY_TYPE_100BASE_TX) | \
+ BIT_ULL(I40E_PHY_TYPE_1000BASE_T) | \
+ BIT_ULL(I40E_PHY_TYPE_10GBASE_T) | \
+ BIT_ULL(I40E_PHY_TYPE_10GBASE_SR) | \
+ BIT_ULL(I40E_PHY_TYPE_10GBASE_LR) | \
+ BIT_ULL(I40E_PHY_TYPE_10GBASE_SFPP_CU) | \
+ BIT_ULL(I40E_PHY_TYPE_10GBASE_CR1) | \
+ BIT_ULL(I40E_PHY_TYPE_40GBASE_CR4) | \
+ BIT_ULL(I40E_PHY_TYPE_40GBASE_SR4) | \
+ BIT_ULL(I40E_PHY_TYPE_40GBASE_LR4) | \
+ BIT_ULL(I40E_PHY_TYPE_1000BASE_SX) | \
+ BIT_ULL(I40E_PHY_TYPE_1000BASE_LX) | \
+ BIT_ULL(I40E_PHY_TYPE_1000BASE_T_OPTICAL) | \
+ BIT_ULL(I40E_PHY_TYPE_20GBASE_KR2) | \
+ BIT_ULL(I40E_PHY_TYPE_25GBASE_KR) | \
+ BIT_ULL(I40E_PHY_TYPE_25GBASE_CR) | \
+ BIT_ULL(I40E_PHY_TYPE_25GBASE_SR) | \
+ BIT_ULL(I40E_PHY_TYPE_25GBASE_LR) | \
+ BIT_ULL(I40E_PHY_TYPE_25GBASE_AOC) | \
+ BIT_ULL(I40E_PHY_TYPE_25GBASE_ACC))
+
#define I40E_LINK_SPEED_100MB_SHIFT 0x1
#define I40E_LINK_SPEED_1000MB_SHIFT 0x2
#define I40E_LINK_SPEED_10GB_SHIFT 0x3
dev_info(&pf->pdev->dev, " vlan_features = 0x%08lx\n",
(unsigned long int)nd->vlan_features);
}
- dev_info(&pf->pdev->dev,
- " vlgrp: & = %p\n", vsi->active_vlans);
+ dev_info(&pf->pdev->dev, " active_vlans is %s\n",
+ vsi->active_vlans ? "<valid>" : "<null>");
dev_info(&pf->pdev->dev,
" flags = 0x%08lx, netdev_registered = %i, current_netdev_flags = 0x%04x\n",
vsi->flags, vsi->netdev_registered, vsi->current_netdev_flags);
if (!rx_ring)
continue;
- dev_info(&pf->pdev->dev,
- " rx_rings[%i]: desc = %p\n",
- i, rx_ring->desc);
- dev_info(&pf->pdev->dev,
- " rx_rings[%i]: dev = %p, netdev = %p, rx_bi = %p\n",
- i, rx_ring->dev,
- rx_ring->netdev,
- rx_ring->rx_bi);
dev_info(&pf->pdev->dev,
" rx_rings[%i]: state = %lu, queue_index = %d, reg_idx = %d\n",
i, *rx_ring->state,
rx_ring->rx_stats.realloc_count,
rx_ring->rx_stats.page_reuse_count);
dev_info(&pf->pdev->dev,
- " rx_rings[%i]: size = %i, dma = 0x%08lx\n",
- i, rx_ring->size,
- (unsigned long int)rx_ring->dma);
- dev_info(&pf->pdev->dev,
- " rx_rings[%i]: vsi = %p, q_vector = %p\n",
- i, rx_ring->vsi,
- rx_ring->q_vector);
+ " rx_rings[%i]: size = %i\n",
+ i, rx_ring->size);
dev_info(&pf->pdev->dev,
- " rx_rings[%i]: rx_itr_setting = %d (%s)\n",
- i, rx_ring->rx_itr_setting,
- ITR_IS_DYNAMIC(rx_ring->rx_itr_setting) ? "dynamic" : "fixed");
+ " rx_rings[%i]: itr_setting = %d (%s)\n",
+ i, rx_ring->itr_setting,
+ ITR_IS_DYNAMIC(rx_ring->itr_setting) ? "dynamic" : "fixed");
}
for (i = 0; i < vsi->num_queue_pairs; i++) {
struct i40e_ring *tx_ring = READ_ONCE(vsi->tx_rings[i]);
if (!tx_ring)
continue;
- dev_info(&pf->pdev->dev,
- " tx_rings[%i]: desc = %p\n",
- i, tx_ring->desc);
- dev_info(&pf->pdev->dev,
- " tx_rings[%i]: dev = %p, netdev = %p, tx_bi = %p\n",
- i, tx_ring->dev,
- tx_ring->netdev,
- tx_ring->tx_bi);
dev_info(&pf->pdev->dev,
" tx_rings[%i]: state = %lu, queue_index = %d, reg_idx = %d\n",
i, *tx_ring->state,
tx_ring->tx_stats.tx_busy,
tx_ring->tx_stats.tx_done_old);
dev_info(&pf->pdev->dev,
- " tx_rings[%i]: size = %i, dma = 0x%08lx\n",
- i, tx_ring->size,
- (unsigned long int)tx_ring->dma);
- dev_info(&pf->pdev->dev,
- " tx_rings[%i]: vsi = %p, q_vector = %p\n",
- i, tx_ring->vsi,
- tx_ring->q_vector);
+ " tx_rings[%i]: size = %i\n",
+ i, tx_ring->size);
dev_info(&pf->pdev->dev,
" tx_rings[%i]: DCB tc = %d\n",
i, tx_ring->dcb_tc);
dev_info(&pf->pdev->dev,
- " tx_rings[%i]: tx_itr_setting = %d (%s)\n",
- i, tx_ring->tx_itr_setting,
- ITR_IS_DYNAMIC(tx_ring->tx_itr_setting) ? "dynamic" : "fixed");
+ " tx_rings[%i]: itr_setting = %d (%s)\n",
+ i, tx_ring->itr_setting,
+ ITR_IS_DYNAMIC(tx_ring->itr_setting) ? "dynamic" : "fixed");
}
rcu_read_unlock();
dev_info(&pf->pdev->dev,
vsi->info.resp_reserved[6], vsi->info.resp_reserved[7],
vsi->info.resp_reserved[8], vsi->info.resp_reserved[9],
vsi->info.resp_reserved[10], vsi->info.resp_reserved[11]);
- if (vsi->back)
- dev_info(&pf->pdev->dev, " PF = %p\n", vsi->back);
dev_info(&pf->pdev->dev, " idx = %d\n", vsi->idx);
dev_info(&pf->pdev->dev,
" tc_config: numtc = %d, enabled_tc = 0x%x\n",
I40E_PRIV_FLAG("flow-director-atr", I40E_FLAG_FD_ATR_ENABLED, 0),
I40E_PRIV_FLAG("veb-stats", I40E_FLAG_VEB_STATS_ENABLED, 0),
I40E_PRIV_FLAG("hw-atr-eviction", I40E_FLAG_HW_ATR_EVICT_ENABLED, 0),
+ I40E_PRIV_FLAG("link-down-on-close",
+ I40E_FLAG_LINK_DOWN_ON_CLOSE_ENABLED, 0),
I40E_PRIV_FLAG("legacy-rx", I40E_FLAG_LEGACY_RX, 0),
I40E_PRIV_FLAG("disable-source-pruning",
I40E_FLAG_SOURCE_PRUNING_DISABLED, 0),
rx_ring = vsi->rx_rings[queue];
tx_ring = vsi->tx_rings[queue];
- if (ITR_IS_DYNAMIC(rx_ring->rx_itr_setting))
+ if (ITR_IS_DYNAMIC(rx_ring->itr_setting))
ec->use_adaptive_rx_coalesce = 1;
- if (ITR_IS_DYNAMIC(tx_ring->tx_itr_setting))
+ if (ITR_IS_DYNAMIC(tx_ring->itr_setting))
ec->use_adaptive_tx_coalesce = 1;
- ec->rx_coalesce_usecs = rx_ring->rx_itr_setting & ~I40E_ITR_DYNAMIC;
- ec->tx_coalesce_usecs = tx_ring->tx_itr_setting & ~I40E_ITR_DYNAMIC;
+ ec->rx_coalesce_usecs = rx_ring->itr_setting & ~I40E_ITR_DYNAMIC;
+ ec->tx_coalesce_usecs = tx_ring->itr_setting & ~I40E_ITR_DYNAMIC;
/* we use the _usecs_high to store/set the interrupt rate limit
* that the hardware supports, that almost but not quite
struct i40e_pf *pf = vsi->back;
struct i40e_hw *hw = &pf->hw;
struct i40e_q_vector *q_vector;
- u16 vector, intrl;
+ u16 intrl;
intrl = i40e_intrl_usec_to_reg(vsi->int_rate_limit);
- rx_ring->rx_itr_setting = ec->rx_coalesce_usecs;
- tx_ring->tx_itr_setting = ec->tx_coalesce_usecs;
+ rx_ring->itr_setting = ITR_REG_ALIGN(ec->rx_coalesce_usecs);
+ tx_ring->itr_setting = ITR_REG_ALIGN(ec->tx_coalesce_usecs);
if (ec->use_adaptive_rx_coalesce)
- rx_ring->rx_itr_setting |= I40E_ITR_DYNAMIC;
+ rx_ring->itr_setting |= I40E_ITR_DYNAMIC;
else
- rx_ring->rx_itr_setting &= ~I40E_ITR_DYNAMIC;
+ rx_ring->itr_setting &= ~I40E_ITR_DYNAMIC;
if (ec->use_adaptive_tx_coalesce)
- tx_ring->tx_itr_setting |= I40E_ITR_DYNAMIC;
+ tx_ring->itr_setting |= I40E_ITR_DYNAMIC;
else
- tx_ring->tx_itr_setting &= ~I40E_ITR_DYNAMIC;
+ tx_ring->itr_setting &= ~I40E_ITR_DYNAMIC;
q_vector = rx_ring->q_vector;
- q_vector->rx.itr = ITR_TO_REG(rx_ring->rx_itr_setting);
- vector = vsi->base_vector + q_vector->v_idx;
- wr32(hw, I40E_PFINT_ITRN(I40E_RX_ITR, vector - 1), q_vector->rx.itr);
+ q_vector->rx.target_itr = ITR_TO_REG(rx_ring->itr_setting);
q_vector = tx_ring->q_vector;
- q_vector->tx.itr = ITR_TO_REG(tx_ring->tx_itr_setting);
- vector = vsi->base_vector + q_vector->v_idx;
- wr32(hw, I40E_PFINT_ITRN(I40E_TX_ITR, vector - 1), q_vector->tx.itr);
+ q_vector->tx.target_itr = ITR_TO_REG(tx_ring->itr_setting);
- wr32(hw, I40E_PFINT_RATEN(vector - 1), intrl);
+ /* The interrupt handler itself will take care of programming
+ * the Tx and Rx ITR values based on the values we have entered
+ * into the q_vector, no need to write the values now.
+ */
+
+ wr32(hw, I40E_PFINT_RATEN(q_vector->reg_idx), intrl);
i40e_flush(hw);
}
vsi->work_limit = ec->tx_max_coalesced_frames_irq;
if (queue < 0) {
- cur_rx_itr = vsi->rx_rings[0]->rx_itr_setting;
- cur_tx_itr = vsi->tx_rings[0]->tx_itr_setting;
+ cur_rx_itr = vsi->rx_rings[0]->itr_setting;
+ cur_tx_itr = vsi->tx_rings[0]->itr_setting;
} else if (queue < vsi->num_queue_pairs) {
- cur_rx_itr = vsi->rx_rings[queue]->rx_itr_setting;
- cur_tx_itr = vsi->tx_rings[queue]->tx_itr_setting;
+ cur_rx_itr = vsi->rx_rings[queue]->itr_setting;
+ cur_tx_itr = vsi->tx_rings[queue]->itr_setting;
} else {
netif_info(pf, drv, netdev, "Invalid queue value, queue range is 0 - %d\n",
vsi->num_queue_pairs - 1);
return -EINVAL;
}
- if (ec->rx_coalesce_usecs > (I40E_MAX_ITR << 1)) {
+ if (ec->rx_coalesce_usecs > I40E_MAX_ITR) {
netif_info(pf, drv, netdev, "Invalid value, rx-usecs range is 0-8160\n");
return -EINVAL;
}
return -EINVAL;
}
- if (ec->tx_coalesce_usecs > (I40E_MAX_ITR << 1)) {
+ if (ec->tx_coalesce_usecs > I40E_MAX_ITR) {
netif_info(pf, drv, netdev, "Invalid value, tx-usecs range is 0-8160\n");
return -EINVAL;
}
if (ec->use_adaptive_rx_coalesce && !cur_rx_itr)
- ec->rx_coalesce_usecs = I40E_MIN_ITR << 1;
+ ec->rx_coalesce_usecs = I40E_MIN_ITR;
if (ec->use_adaptive_tx_coalesce && !cur_tx_itr)
- ec->tx_coalesce_usecs = I40E_MIN_ITR << 1;
+ ec->tx_coalesce_usecs = I40E_MIN_ITR;
intrl_reg = i40e_intrl_usec_to_reg(ec->rx_coalesce_usecs_high);
vsi->int_rate_limit = INTRL_REG_TO_USEC(intrl_reg);
}
flags_complete:
+ changed_flags = orig_flags ^ new_flags;
+
/* Before we finalize any flag changes, we need to perform some
* checks to ensure that the changes are supported and safe.
*/
!(pf->hw_features & I40E_HW_ATR_EVICT_CAPABLE))
return -EOPNOTSUPP;
- /* Disable FW LLDP not supported if NPAR active or if FW
- * API version < 1.7
+ /* If the driver detected FW LLDP was disabled on init, this flag could
+ * be set, however we do not support _changing_ the flag if NPAR is
+ * enabled or FW API version < 1.7. There are situations where older
+ * FW versions/NPAR enabled PFs could disable LLDP, however we _must_
+ * not allow the user to enable/disable LLDP with this flag on
+ * unsupported FW versions.
*/
- if (new_flags & I40E_FLAG_DISABLE_FW_LLDP) {
- if (pf->hw.func_caps.npar_enable) {
- dev_warn(&pf->pdev->dev,
- "Unable to stop FW LLDP if NPAR active\n");
- return -EOPNOTSUPP;
- }
-
- if (pf->hw.aq.api_maj_ver < 1 ||
- (pf->hw.aq.api_maj_ver == 1 &&
- pf->hw.aq.api_min_ver < 7)) {
+ if (changed_flags & I40E_FLAG_DISABLE_FW_LLDP) {
+ if (!(pf->hw_features & I40E_HW_STOPPABLE_FW_LLDP)) {
dev_warn(&pf->pdev->dev,
- "FW ver does not support stopping FW LLDP\n");
+ "Device does not support changing FW LLDP\n");
return -EOPNOTSUPP;
}
}
* something else has modified the flags variable since we copied it
* originally. We'll just punt with an error and log something in the
* message buffer.
+ *
+ * This is the point of no return for this function. We need to have
+ * checked any discrepancies or misconfigurations and returned
+ * EOPNOTSUPP before updating pf->flags here.
*/
if (cmpxchg64(&pf->flags, orig_flags, new_flags) != orig_flags) {
dev_warn(&pf->pdev->dev,
return -EAGAIN;
}
- changed_flags = orig_flags ^ new_flags;
-
/* Process any additional changes needed as a result of flag changes.
* The changed_flags value reflects the list of bits that were
* changed in the code above.
}
}
+ if ((changed_flags & pf->flags &
+ I40E_FLAG_LINK_DOWN_ON_CLOSE_ENABLED) &&
+ (pf->flags & I40E_FLAG_MFP_ENABLED))
+ dev_warn(&pf->pdev->dev,
+ "Turning on link-down-on-close flag may affect other partitions\n");
+
if (changed_flags & I40E_FLAG_DISABLE_FW_LLDP) {
if (pf->flags & I40E_FLAG_DISABLE_FW_LLDP) {
struct i40e_dcbx_config *dcbcfg;
static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired);
static void i40e_fdir_sb_setup(struct i40e_pf *pf);
static int i40e_veb_get_bw_info(struct i40e_veb *veb);
-static int i40e_add_del_cloud_filter(struct i40e_vsi *vsi,
- struct i40e_cloud_filter *filter,
- bool add);
-static int i40e_add_del_cloud_filter_big_buf(struct i40e_vsi *vsi,
- struct i40e_cloud_filter *filter,
- bool add);
static int i40e_get_capabilities(struct i40e_pf *pf,
enum i40e_admin_queue_opc list_type);
if (!pile || needed == 0 || id >= I40E_PILE_VALID_BIT) {
dev_info(&pf->pdev->dev,
- "param err: pile=%p needed=%d id=0x%04x\n",
- pile, needed, id);
+ "param err: pile=%s needed=%d id=0x%04x\n",
+ pile ? "<valid>" : "<null>", needed, id);
return -EINVAL;
}
ether_addr_copy(f->macaddr, macaddr);
f->vlan = vlan;
- /* If we're in overflow promisc mode, set the state directly
- * to failed, so we don't bother to try sending the filter
- * to the hardware.
- */
- if (test_bit(__I40E_VSI_OVERFLOW_PROMISC, vsi->state))
- f->state = I40E_FILTER_FAILED;
- else
- f->state = I40E_FILTER_NEW;
+ f->state = I40E_FILTER_NEW;
INIT_HLIST_NODE(&f->hlist);
key = i40e_addr_to_hkey(macaddr);
* @list: the list of filters to send to firmware
* @add_head: Position in the add hlist
* @num_add: the number of filters to add
- * @promisc_change: set to true on exit if promiscuous mode was forced on
*
* Send a request to firmware via AdminQ to add a chunk of filters. Will set
- * promisc_changed to true if the firmware has run out of space for more
- * filters.
+ * __I40E_VSI_OVERFLOW_PROMISC bit in vsi->state if the firmware has run out of
+ * space for more filters.
*/
static
void i40e_aqc_add_filters(struct i40e_vsi *vsi, const char *vsi_name,
struct i40e_aqc_add_macvlan_element_data *list,
struct i40e_new_mac_filter *add_head,
- int num_add, bool *promisc_changed)
+ int num_add)
{
struct i40e_hw *hw = &vsi->back->hw;
int aq_err, fcnt;
fcnt = i40e_update_filter_state(num_add, list, add_head);
if (fcnt != num_add) {
- *promisc_changed = true;
set_bit(__I40E_VSI_OVERFLOW_PROMISC, vsi->state);
dev_warn(&vsi->back->pdev->dev,
"Error %s adding RX filters on %s, promiscuous mode forced on\n",
NULL);
}
- if (aq_ret)
+ if (aq_ret) {
+ set_bit(__I40E_VSI_OVERFLOW_PROMISC, vsi->state);
dev_warn(&vsi->back->pdev->dev,
- "Error %s setting broadcast promiscuous mode on %s\n",
+ "Error %s, forcing overflow promiscuous on %s\n",
i40e_aq_str(hw, hw->aq.asq_last_status),
vsi_name);
+ }
return aq_ret;
}
struct i40e_mac_filter *f;
struct i40e_new_mac_filter *new, *add_head = NULL;
struct i40e_hw *hw = &vsi->back->hw;
+ bool old_overflow, new_overflow;
unsigned int failed_filters = 0;
unsigned int vlan_filters = 0;
- bool promisc_changed = false;
char vsi_name[16] = "PF";
int filter_list_len = 0;
i40e_status aq_ret = 0;
usleep_range(1000, 2000);
pf = vsi->back;
+ old_overflow = test_bit(__I40E_VSI_OVERFLOW_PROMISC, vsi->state);
+
if (vsi->netdev) {
changed_flags = vsi->current_netdev_flags ^ vsi->netdev->flags;
vsi->current_netdev_flags = vsi->netdev->flags;
num_add = 0;
hlist_for_each_entry_safe(new, h, &tmp_add_list, hlist) {
- if (test_bit(__I40E_VSI_OVERFLOW_PROMISC,
- vsi->state)) {
- new->state = I40E_FILTER_FAILED;
- continue;
- }
-
/* handle broadcast filters by updating the broadcast
* promiscuous flag instead of adding a MAC filter.
*/
/* flush a full buffer */
if (num_add == filter_list_len) {
i40e_aqc_add_filters(vsi, vsi_name, add_list,
- add_head, num_add,
- &promisc_changed);
+ add_head, num_add);
memset(add_list, 0, list_size);
num_add = 0;
}
}
if (num_add) {
i40e_aqc_add_filters(vsi, vsi_name, add_list, add_head,
- num_add, &promisc_changed);
+ num_add);
}
/* Now move all of the filters from the temp add list back to
* the VSI's list.
}
spin_unlock_bh(&vsi->mac_filter_hash_lock);
- /* If promiscuous mode has changed, we need to calculate a new
- * threshold for when we are safe to exit
- */
- if (promisc_changed)
- vsi->promisc_threshold = (vsi->active_filters * 3) / 4;
-
/* Check if we are able to exit overflow promiscuous mode. We can
* safely exit if we didn't just enter, we no longer have any failed
* filters, and we have reduced filters below the threshold value.
*/
- if (test_bit(__I40E_VSI_OVERFLOW_PROMISC, vsi->state) &&
- !promisc_changed && !failed_filters &&
- (vsi->active_filters < vsi->promisc_threshold)) {
+ if (old_overflow && !failed_filters &&
+ vsi->active_filters < vsi->promisc_threshold) {
dev_info(&pf->pdev->dev,
"filter logjam cleared on %s, leaving overflow promiscuous mode\n",
vsi_name);
clear_bit(__I40E_VSI_OVERFLOW_PROMISC, vsi->state);
- promisc_changed = true;
vsi->promisc_threshold = 0;
}
goto out;
}
+ new_overflow = test_bit(__I40E_VSI_OVERFLOW_PROMISC, vsi->state);
+
+ /* If we are entering overflow promiscuous, we need to calculate a new
+ * threshold for when we are safe to exit
+ */
+ if (!old_overflow && new_overflow)
+ vsi->promisc_threshold = (vsi->active_filters * 3) / 4;
+
/* check for changes in promiscuous modes */
if (changed_flags & IFF_ALLMULTI) {
bool cur_multipromisc;
}
}
- if ((changed_flags & IFF_PROMISC) || promisc_changed) {
+ if ((changed_flags & IFF_PROMISC) || old_overflow != new_overflow) {
bool cur_promisc;
cur_promisc = (!!(vsi->current_netdev_flags & IFF_PROMISC) ||
- test_bit(__I40E_VSI_OVERFLOW_PROMISC,
- vsi->state));
+ new_overflow);
aq_ret = i40e_set_promiscuous(pf, cur_promisc);
if (aq_ret) {
retval = i40e_aq_rc_to_posix(aq_ret,
for (i = 0; i < vsi->num_q_vectors; i++, vector++) {
struct i40e_q_vector *q_vector = vsi->q_vectors[i];
- q_vector->itr_countdown = ITR_COUNTDOWN_START;
- q_vector->rx.itr = ITR_TO_REG(vsi->rx_rings[i]->rx_itr_setting);
- q_vector->rx.latency_range = I40E_LOW_LATENCY;
+ q_vector->rx.next_update = jiffies + 1;
+ q_vector->rx.target_itr =
+ ITR_TO_REG(vsi->rx_rings[i]->itr_setting);
wr32(hw, I40E_PFINT_ITRN(I40E_RX_ITR, vector - 1),
- q_vector->rx.itr);
- q_vector->tx.itr = ITR_TO_REG(vsi->tx_rings[i]->tx_itr_setting);
- q_vector->tx.latency_range = I40E_LOW_LATENCY;
+ q_vector->rx.target_itr);
+ q_vector->rx.current_itr = q_vector->rx.target_itr;
+
+ q_vector->tx.next_update = jiffies + 1;
+ q_vector->tx.target_itr =
+ ITR_TO_REG(vsi->tx_rings[i]->itr_setting);
wr32(hw, I40E_PFINT_ITRN(I40E_TX_ITR, vector - 1),
- q_vector->tx.itr);
+ q_vector->tx.target_itr);
+ q_vector->tx.current_itr = q_vector->tx.target_itr;
+
wr32(hw, I40E_PFINT_RATEN(vector - 1),
i40e_intrl_usec_to_reg(vsi->int_rate_limit));
u32 val;
/* set the ITR configuration */
- q_vector->itr_countdown = ITR_COUNTDOWN_START;
- q_vector->rx.itr = ITR_TO_REG(vsi->rx_rings[0]->rx_itr_setting);
- q_vector->rx.latency_range = I40E_LOW_LATENCY;
- wr32(hw, I40E_PFINT_ITR0(I40E_RX_ITR), q_vector->rx.itr);
- q_vector->tx.itr = ITR_TO_REG(vsi->tx_rings[0]->tx_itr_setting);
- q_vector->tx.latency_range = I40E_LOW_LATENCY;
- wr32(hw, I40E_PFINT_ITR0(I40E_TX_ITR), q_vector->tx.itr);
+ q_vector->rx.next_update = jiffies + 1;
+ q_vector->rx.target_itr = ITR_TO_REG(vsi->rx_rings[0]->itr_setting);
+ wr32(hw, I40E_PFINT_ITR0(I40E_RX_ITR), q_vector->rx.target_itr);
+ q_vector->rx.current_itr = q_vector->rx.target_itr;
+ q_vector->tx.next_update = jiffies + 1;
+ q_vector->tx.target_itr = ITR_TO_REG(vsi->tx_rings[0]->itr_setting);
+ wr32(hw, I40E_PFINT_ITR0(I40E_TX_ITR), q_vector->tx.target_itr);
+ q_vector->tx.current_itr = q_vector->tx.target_itr;
i40e_enable_misc_int_causes(pf);
* @vsi: VSI to be configured
*
**/
-int i40e_get_link_speed(struct i40e_vsi *vsi)
+static int i40e_get_link_speed(struct i40e_vsi *vsi)
{
struct i40e_pf *pf = vsi->back;
return err;
}
+/**
+ * i40e_force_link_state - Force the link status
+ * @pf: board private structure
+ * @is_up: whether the link state should be forced up or down
+ **/
+static i40e_status i40e_force_link_state(struct i40e_pf *pf, bool is_up)
+{
+ struct i40e_aq_get_phy_abilities_resp abilities;
+ struct i40e_aq_set_phy_config config = {0};
+ struct i40e_hw *hw = &pf->hw;
+ i40e_status err;
+ u64 mask;
+
+ /* Get the current phy config */
+ err = i40e_aq_get_phy_capabilities(hw, false, false, &abilities,
+ NULL);
+ if (err) {
+ dev_err(&pf->pdev->dev,
+ "failed to get phy cap., ret = %s last_status = %s\n",
+ i40e_stat_str(hw, err),
+ i40e_aq_str(hw, hw->aq.asq_last_status));
+ return err;
+ }
+
+ /* If link needs to go up, but was not forced to go down,
+ * no need for a flap
+ */
+ if (is_up && abilities.phy_type != 0)
+ return I40E_SUCCESS;
+
+ /* To force link we need to set bits for all supported PHY types,
+ * but there are now more than 32, so we need to split the bitmap
+ * across two fields.
+ */
+ mask = I40E_PHY_TYPES_BITMASK;
+ config.phy_type = is_up ? cpu_to_le32((u32)(mask & 0xffffffff)) : 0;
+ config.phy_type_ext = is_up ? (u8)((mask >> 32) & 0xff) : 0;
+ /* Copy the old settings, except of phy_type */
+ config.abilities = abilities.abilities;
+ config.link_speed = abilities.link_speed;
+ config.eee_capability = abilities.eee_capability;
+ config.eeer = abilities.eeer_val;
+ config.low_power_ctrl = abilities.d3_lpan;
+ err = i40e_aq_set_phy_config(hw, &config, NULL);
+
+ if (err) {
+ dev_err(&pf->pdev->dev,
+ "set phy config ret = %s last_status = %s\n",
+ i40e_stat_str(&pf->hw, err),
+ i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
+ return err;
+ }
+
+ /* Update the link info */
+ err = i40e_update_link_info(hw);
+ if (err) {
+ /* Wait a little bit (on 40G cards it sometimes takes a really
+ * long time for link to come back from the atomic reset)
+ * and try once more
+ */
+ msleep(1000);
+ i40e_update_link_info(hw);
+ }
+
+ i40e_aq_set_link_restart_an(hw, true, NULL);
+
+ return I40E_SUCCESS;
+}
+
/**
* i40e_down - Shutdown the connection processing
* @vsi: the VSI being stopped
}
i40e_vsi_disable_irq(vsi);
i40e_vsi_stop_rings(vsi);
+ if (vsi->type == I40E_VSI_MAIN &&
+ vsi->back->flags & I40E_FLAG_LINK_DOWN_ON_CLOSE_ENABLED)
+ i40e_force_link_state(vsi->back, false);
i40e_napi_disable_all(vsi);
for (i = 0; i < vsi->num_queue_pairs; i++) {
* Add or delete a cloud filter for a specific flow spec.
* Returns 0 if the filter were successfully added.
**/
-static int i40e_add_del_cloud_filter(struct i40e_vsi *vsi,
- struct i40e_cloud_filter *filter, bool add)
+int i40e_add_del_cloud_filter(struct i40e_vsi *vsi,
+ struct i40e_cloud_filter *filter, bool add)
{
struct i40e_aqc_cloud_filters_element_data cld_filter;
struct i40e_pf *pf = vsi->back;
* Add or delete a cloud filter for a specific flow spec using big buffer.
* Returns 0 if the filter were successfully added.
**/
-static int i40e_add_del_cloud_filter_big_buf(struct i40e_vsi *vsi,
- struct i40e_cloud_filter *filter,
- bool add)
+int i40e_add_del_cloud_filter_big_buf(struct i40e_vsi *vsi,
+ struct i40e_cloud_filter *filter,
+ bool add)
{
struct i40e_aqc_cloud_filters_element_bb cld_filter;
struct i40e_pf *pf = vsi->back;
netif_carrier_off(netdev);
+ if (i40e_force_link_state(pf, true))
+ return -EAGAIN;
+
err = i40e_vsi_open(vsi);
if (err)
return err;
}
i40e_get_oem_version(&pf->hw);
+ if (test_bit(__I40E_EMP_RESET_INTR_RECEIVED, pf->state) &&
+ ((hw->aq.fw_maj_ver == 4 && hw->aq.fw_min_ver <= 33) ||
+ hw->aq.fw_maj_ver < 4) && hw->mac.type == I40E_MAC_XL710) {
+ /* The following delay is necessary for 4.33 firmware and older
+ * to recover after EMP reset. 200 ms should suffice but we
+ * put here 300 ms to be sure that FW is ready to operate
+ * after reset.
+ */
+ mdelay(300);
+ }
+
/* re-verify the eeprom if we just had an EMP reset */
if (test_and_clear_bit(__I40E_EMP_RESET_INTR_RECEIVED, pf->state))
i40e_verify_eeprom(pf);
mutex_lock(&pf->switch_mutex);
if (!pf->vsi[vsi->idx]) {
- dev_err(&pf->pdev->dev, "pf->vsi[%d] is NULL, just free vsi[%d](%p,type %d)\n",
- vsi->idx, vsi->idx, vsi, vsi->type);
+ dev_err(&pf->pdev->dev, "pf->vsi[%d] is NULL, just free vsi[%d](type %d)\n",
+ vsi->idx, vsi->idx, vsi->type);
goto unlock_vsi;
}
if (pf->vsi[vsi->idx] != vsi) {
dev_err(&pf->pdev->dev,
- "pf->vsi[%d](%p, type %d) != vsi[%d](%p,type %d): no free!\n",
+ "pf->vsi[%d](type %d) != vsi[%d](type %d): no free!\n",
pf->vsi[vsi->idx]->idx,
- pf->vsi[vsi->idx],
pf->vsi[vsi->idx]->type,
- vsi->idx, vsi, vsi->type);
+ vsi->idx, vsi->type);
goto unlock_vsi;
}
ring->dcb_tc = 0;
if (vsi->back->hw_features & I40E_HW_WB_ON_ITR_CAPABLE)
ring->flags = I40E_TXR_FLAGS_WB_ON_ITR;
- ring->tx_itr_setting = pf->tx_itr_default;
+ ring->itr_setting = pf->tx_itr_default;
vsi->tx_rings[i] = ring++;
if (!i40e_enabled_xdp_vsi(vsi))
if (vsi->back->hw_features & I40E_HW_WB_ON_ITR_CAPABLE)
ring->flags = I40E_TXR_FLAGS_WB_ON_ITR;
set_ring_xdp(ring);
- ring->tx_itr_setting = pf->tx_itr_default;
+ ring->itr_setting = pf->tx_itr_default;
vsi->xdp_rings[i] = ring++;
setup_rx:
ring->count = vsi->num_desc;
ring->size = 0;
ring->dcb_tc = 0;
- ring->rx_itr_setting = pf->rx_itr_default;
+ ring->itr_setting = pf->rx_itr_default;
vsi->rx_rings[i] = ring;
}
netif_napi_add(vsi->netdev, &q_vector->napi,
i40e_napi_poll, NAPI_POLL_WEIGHT);
- q_vector->rx.latency_range = I40E_LOW_LATENCY;
- q_vector->tx.latency_range = I40E_LOW_LATENCY;
-
/* tie q_vector and vsi together */
vsi->q_vectors[v_idx] = q_vector;
/* IWARP needs one extra vector for CQP just like MISC.*/
pf->num_iwarp_msix = (int)num_online_cpus() + 1;
}
+ /* Stopping the FW LLDP engine is only supported on the
+ * XL710 with a FW ver >= 1.7. Also, stopping FW LLDP
+ * engine is not supported if NPAR is functioning on this
+ * part
+ */
+ if (pf->hw.mac.type == I40E_MAC_XL710 &&
+ !pf->hw.func_caps.npar_enable &&
+ (pf->hw.aq.api_maj_ver > 1 ||
+ (pf->hw.aq.api_maj_ver == 1 && pf->hw.aq.api_min_ver > 6)))
+ pf->hw_features |= I40E_HW_STOPPABLE_FW_LLDP;
#ifdef CONFIG_PCI_IOV
if (pf->hw.func_caps.num_vfs && pf->hw.partition_id == 1) {
/**
* i40e_get_tx_pending - how many tx descriptors not processed
* @tx_ring: the ring of descriptors
+ * @in_sw: use SW variables
*
* Since there is no access to the ring head register
* in XL710, we need to use our local copies
**/
-u32 i40e_get_tx_pending(struct i40e_ring *ring)
+u32 i40e_get_tx_pending(struct i40e_ring *ring, bool in_sw)
{
u32 head, tail;
- head = i40e_get_head(ring);
- tail = readl(ring->tail);
+ if (!in_sw) {
+ head = i40e_get_head(ring);
+ tail = readl(ring->tail);
+ } else {
+ head = ring->next_to_clean;
+ tail = ring->next_to_use;
+ }
if (head != tail)
return (head < tail) ?
*/
smp_rmb();
tx_ring->tx_stats.prev_pkt_ctr =
- i40e_get_tx_pending(tx_ring) ? packets : -1;
+ i40e_get_tx_pending(tx_ring, true) ? packets : -1;
}
}
}
* them to be written back in case we stay in NAPI.
* In this mode on X722 we do not enable Interrupt.
*/
- unsigned int j = i40e_get_tx_pending(tx_ring);
+ unsigned int j = i40e_get_tx_pending(tx_ring, false);
if (budget &&
((j / WB_STRIDE) == 0) && (j > 0) &&
}
}
+static inline bool i40e_container_is_rx(struct i40e_q_vector *q_vector,
+ struct i40e_ring_container *rc)
+{
+ return &q_vector->rx == rc;
+}
+
+static inline unsigned int i40e_itr_divisor(struct i40e_q_vector *q_vector)
+{
+ unsigned int divisor;
+
+ switch (q_vector->vsi->back->hw.phy.link_info.link_speed) {
+ case I40E_LINK_SPEED_40GB:
+ divisor = I40E_ITR_ADAPTIVE_MIN_INC * 1024;
+ break;
+ case I40E_LINK_SPEED_25GB:
+ case I40E_LINK_SPEED_20GB:
+ divisor = I40E_ITR_ADAPTIVE_MIN_INC * 512;
+ break;
+ default:
+ case I40E_LINK_SPEED_10GB:
+ divisor = I40E_ITR_ADAPTIVE_MIN_INC * 256;
+ break;
+ case I40E_LINK_SPEED_1GB:
+ case I40E_LINK_SPEED_100MB:
+ divisor = I40E_ITR_ADAPTIVE_MIN_INC * 32;
+ break;
+ }
+
+ return divisor;
+}
+
/**
- * i40e_set_new_dynamic_itr - Find new ITR level
+ * i40e_update_itr - update the dynamic ITR value based on statistics
+ * @q_vector: structure containing interrupt and ring information
* @rc: structure containing ring performance data
*
- * Returns true if ITR changed, false if not
- *
- * Stores a new ITR value based on packets and byte counts during
- * the last interrupt. The advantage of per interrupt computation
- * is faster updates and more accurate ITR for the current traffic
- * pattern. Constants in this function were computed based on
- * theoretical maximum wire speed and thresholds were set based on
- * testing data as well as attempting to minimize response time
+ * Stores a new ITR value based on packets and byte
+ * counts during the last interrupt. The advantage of per interrupt
+ * computation is faster updates and more accurate ITR for the current
+ * traffic pattern. Constants in this function were computed
+ * based on theoretical maximum wire speed and thresholds were set based
+ * on testing data as well as attempting to minimize response time
* while increasing bulk throughput.
**/
-static bool i40e_set_new_dynamic_itr(struct i40e_ring_container *rc)
+static void i40e_update_itr(struct i40e_q_vector *q_vector,
+ struct i40e_ring_container *rc)
{
- enum i40e_latency_range new_latency_range = rc->latency_range;
- u32 new_itr = rc->itr;
- int bytes_per_usec;
- unsigned int usecs, estimated_usecs;
+ unsigned int avg_wire_size, packets, bytes, itr;
+ unsigned long next_update = jiffies;
- if (rc->total_packets == 0 || !rc->itr)
- return false;
+ /* If we don't have any rings just leave ourselves set for maximum
+ * possible latency so we take ourselves out of the equation.
+ */
+ if (!rc->ring || !ITR_IS_DYNAMIC(rc->ring->itr_setting))
+ return;
+
+ /* For Rx we want to push the delay up and default to low latency.
+ * for Tx we want to pull the delay down and default to high latency.
+ */
+ itr = i40e_container_is_rx(q_vector, rc) ?
+ I40E_ITR_ADAPTIVE_MIN_USECS | I40E_ITR_ADAPTIVE_LATENCY :
+ I40E_ITR_ADAPTIVE_MAX_USECS | I40E_ITR_ADAPTIVE_LATENCY;
+
+ /* If we didn't update within up to 1 - 2 jiffies we can assume
+ * that either packets are coming in so slow there hasn't been
+ * any work, or that there is so much work that NAPI is dealing
+ * with interrupt moderation and we don't need to do anything.
+ */
+ if (time_after(next_update, rc->next_update))
+ goto clear_counts;
+
+ /* If itr_countdown is set it means we programmed an ITR within
+ * the last 4 interrupt cycles. This has a side effect of us
+ * potentially firing an early interrupt. In order to work around
+ * this we need to throw out any data received for a few
+ * interrupts following the update.
+ */
+ if (q_vector->itr_countdown) {
+ itr = rc->target_itr;
+ goto clear_counts;
+ }
+
+ packets = rc->total_packets;
+ bytes = rc->total_bytes;
- usecs = (rc->itr << 1) * ITR_COUNTDOWN_START;
- bytes_per_usec = rc->total_bytes / usecs;
+ if (i40e_container_is_rx(q_vector, rc)) {
+ /* If Rx there are 1 to 4 packets and bytes are less than
+ * 9000 assume insufficient data to use bulk rate limiting
+ * approach unless Tx is already in bulk rate limiting. We
+ * are likely latency driven.
+ */
+ if (packets && packets < 4 && bytes < 9000 &&
+ (q_vector->tx.target_itr & I40E_ITR_ADAPTIVE_LATENCY)) {
+ itr = I40E_ITR_ADAPTIVE_LATENCY;
+ goto adjust_by_size;
+ }
+ } else if (packets < 4) {
+ /* If we have Tx and Rx ITR maxed and Tx ITR is running in
+ * bulk mode and we are receiving 4 or fewer packets just
+ * reset the ITR_ADAPTIVE_LATENCY bit for latency mode so
+ * that the Rx can relax.
+ */
+ if (rc->target_itr == I40E_ITR_ADAPTIVE_MAX_USECS &&
+ (q_vector->rx.target_itr & I40E_ITR_MASK) ==
+ I40E_ITR_ADAPTIVE_MAX_USECS)
+ goto clear_counts;
+ } else if (packets > 32) {
+ /* If we have processed over 32 packets in a single interrupt
+ * for Tx assume we need to switch over to "bulk" mode.
+ */
+ rc->target_itr &= ~I40E_ITR_ADAPTIVE_LATENCY;
+ }
- /* The calculations in this algorithm depend on interrupts actually
- * firing at the ITR rate. This may not happen if the packet rate is
- * really low, or if we've been napi polling. Check to make sure
- * that's not the case before we continue.
+ /* We have no packets to actually measure against. This means
+ * either one of the other queues on this vector is active or
+ * we are a Tx queue doing TSO with too high of an interrupt rate.
+ *
+ * Between 4 and 56 we can assume that our current interrupt delay
+ * is only slightly too low. As such we should increase it by a small
+ * fixed amount.
*/
- estimated_usecs = jiffies_to_usecs(jiffies - rc->last_itr_update);
- if (estimated_usecs > usecs) {
- new_latency_range = I40E_LOW_LATENCY;
- goto reset_latency;
+ if (packets < 56) {
+ itr = rc->target_itr + I40E_ITR_ADAPTIVE_MIN_INC;
+ if ((itr & I40E_ITR_MASK) > I40E_ITR_ADAPTIVE_MAX_USECS) {
+ itr &= I40E_ITR_ADAPTIVE_LATENCY;
+ itr += I40E_ITR_ADAPTIVE_MAX_USECS;
+ }
+ goto clear_counts;
+ }
+
+ if (packets <= 256) {
+ itr = min(q_vector->tx.current_itr, q_vector->rx.current_itr);
+ itr &= I40E_ITR_MASK;
+
+ /* Between 56 and 112 is our "goldilocks" zone where we are
+ * working out "just right". Just report that our current
+ * ITR is good for us.
+ */
+ if (packets <= 112)
+ goto clear_counts;
+
+ /* If packet count is 128 or greater we are likely looking
+ * at a slight overrun of the delay we want. Try halving
+ * our delay to see if that will cut the number of packets
+ * in half per interrupt.
+ */
+ itr /= 2;
+ itr &= I40E_ITR_MASK;
+ if (itr < I40E_ITR_ADAPTIVE_MIN_USECS)
+ itr = I40E_ITR_ADAPTIVE_MIN_USECS;
+
+ goto clear_counts;
}
- /* simple throttlerate management
- * 0-10MB/s lowest (50000 ints/s)
- * 10-20MB/s low (20000 ints/s)
- * 20-1249MB/s bulk (18000 ints/s)
+ /* The paths below assume we are dealing with a bulk ITR since
+ * number of packets is greater than 256. We are just going to have
+ * to compute a value and try to bring the count under control,
+ * though for smaller packet sizes there isn't much we can do as
+ * NAPI polling will likely be kicking in sooner rather than later.
+ */
+ itr = I40E_ITR_ADAPTIVE_BULK;
+
+adjust_by_size:
+ /* If packet counts are 256 or greater we can assume we have a gross
+ * overestimation of what the rate should be. Instead of trying to fine
+ * tune it just use the formula below to try and dial in an exact value
+ * give the current packet size of the frame.
+ */
+ avg_wire_size = bytes / packets;
+
+ /* The following is a crude approximation of:
+ * wmem_default / (size + overhead) = desired_pkts_per_int
+ * rate / bits_per_byte / (size + ethernet overhead) = pkt_rate
+ * (desired_pkt_rate / pkt_rate) * usecs_per_sec = ITR value
+ *
+ * Assuming wmem_default is 212992 and overhead is 640 bytes per
+ * packet, (256 skb, 64 headroom, 320 shared info), we can reduce the
+ * formula down to
*
- * The math works out because the divisor is in 10^(-6) which
- * turns the bytes/us input value into MB/s values, but
- * make sure to use usecs, as the register values written
- * are in 2 usec increments in the ITR registers, and make sure
- * to use the smoothed values that the countdown timer gives us.
+ * (170 * (size + 24)) / (size + 640) = ITR
+ *
+ * We first do some math on the packet size and then finally bitshift
+ * by 8 after rounding up. We also have to account for PCIe link speed
+ * difference as ITR scales based on this.
*/
- switch (new_latency_range) {
- case I40E_LOWEST_LATENCY:
- if (bytes_per_usec > 10)
- new_latency_range = I40E_LOW_LATENCY;
- break;
- case I40E_LOW_LATENCY:
- if (bytes_per_usec > 20)
- new_latency_range = I40E_BULK_LATENCY;
- else if (bytes_per_usec <= 10)
- new_latency_range = I40E_LOWEST_LATENCY;
- break;
- case I40E_BULK_LATENCY:
- default:
- if (bytes_per_usec <= 20)
- new_latency_range = I40E_LOW_LATENCY;
- break;
+ if (avg_wire_size <= 60) {
+ /* Start at 250k ints/sec */
+ avg_wire_size = 4096;
+ } else if (avg_wire_size <= 380) {
+ /* 250K ints/sec to 60K ints/sec */
+ avg_wire_size *= 40;
+ avg_wire_size += 1696;
+ } else if (avg_wire_size <= 1084) {
+ /* 60K ints/sec to 36K ints/sec */
+ avg_wire_size *= 15;
+ avg_wire_size += 11452;
+ } else if (avg_wire_size <= 1980) {
+ /* 36K ints/sec to 30K ints/sec */
+ avg_wire_size *= 5;
+ avg_wire_size += 22420;
+ } else {
+ /* plateau at a limit of 30K ints/sec */
+ avg_wire_size = 32256;
}
-reset_latency:
- rc->latency_range = new_latency_range;
+ /* If we are in low latency mode halve our delay which doubles the
+ * rate to somewhere between 100K to 16K ints/sec
+ */
+ if (itr & I40E_ITR_ADAPTIVE_LATENCY)
+ avg_wire_size /= 2;
- switch (new_latency_range) {
- case I40E_LOWEST_LATENCY:
- new_itr = I40E_ITR_50K;
- break;
- case I40E_LOW_LATENCY:
- new_itr = I40E_ITR_20K;
- break;
- case I40E_BULK_LATENCY:
- new_itr = I40E_ITR_18K;
- break;
- default:
- break;
+ /* Resultant value is 256 times larger than it needs to be. This
+ * gives us room to adjust the value as needed to either increase
+ * or decrease the value based on link speeds of 10G, 2.5G, 1G, etc.
+ *
+ * Use addition as we have already recorded the new latency flag
+ * for the ITR value.
+ */
+ itr += DIV_ROUND_UP(avg_wire_size, i40e_itr_divisor(q_vector)) *
+ I40E_ITR_ADAPTIVE_MIN_INC;
+
+ if ((itr & I40E_ITR_MASK) > I40E_ITR_ADAPTIVE_MAX_USECS) {
+ itr &= I40E_ITR_ADAPTIVE_LATENCY;
+ itr += I40E_ITR_ADAPTIVE_MAX_USECS;
}
+clear_counts:
+ /* write back value */
+ rc->target_itr = itr;
+
+ /* next update should occur within next jiffy */
+ rc->next_update = next_update + 1;
+
rc->total_bytes = 0;
rc->total_packets = 0;
- rc->last_itr_update = jiffies;
-
- if (new_itr != rc->itr) {
- rc->itr = new_itr;
- return true;
- }
- return false;
}
/**
* @rx_buffer: rx buffer to pull data from
*
* This function will clean up the contents of the rx_buffer. It will
- * either recycle the bufer or unmap it and free the associated resources.
+ * either recycle the buffer or unmap it and free the associated resources.
*/
static void i40e_put_rx_buffer(struct i40e_ring *rx_ring,
struct i40e_rx_buffer *rx_buffer)
return failure ? budget : (int)total_rx_packets;
}
-static u32 i40e_buildreg_itr(const int type, const u16 itr)
+static inline u32 i40e_buildreg_itr(const int type, u16 itr)
{
u32 val;
+ /* We don't bother with setting the CLEARPBA bit as the data sheet
+ * points out doing so is "meaningless since it was already
+ * auto-cleared". The auto-clearing happens when the interrupt is
+ * asserted.
+ *
+ * Hardware errata 28 for also indicates that writing to a
+ * xxINT_DYN_CTLx CSR with INTENA_MSK (bit 31) set to 0 will clear
+ * an event in the PBA anyway so we need to rely on the automask
+ * to hold pending events for us until the interrupt is re-enabled
+ *
+ * The itr value is reported in microseconds, and the register
+ * value is recorded in 2 microsecond units. For this reason we
+ * only need to shift by the interval shift - 1 instead of the
+ * full value.
+ */
+ itr &= I40E_ITR_MASK;
+
val = I40E_PFINT_DYN_CTLN_INTENA_MASK |
- I40E_PFINT_DYN_CTLN_CLEARPBA_MASK |
(type << I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT) |
- (itr << I40E_PFINT_DYN_CTLN_INTERVAL_SHIFT);
+ (itr << (I40E_PFINT_DYN_CTLN_INTERVAL_SHIFT - 1));
return val;
}
/* a small macro to shorten up some long lines */
#define INTREG I40E_PFINT_DYN_CTLN
-static inline int get_rx_itr(struct i40e_vsi *vsi, int idx)
-{
- return vsi->rx_rings[idx]->rx_itr_setting;
-}
-static inline int get_tx_itr(struct i40e_vsi *vsi, int idx)
-{
- return vsi->tx_rings[idx]->tx_itr_setting;
-}
+/* The act of updating the ITR will cause it to immediately trigger. In order
+ * to prevent this from throwing off adaptive update statistics we defer the
+ * update so that it can only happen so often. So after either Tx or Rx are
+ * updated we make the adaptive scheme wait until either the ITR completely
+ * expires via the next_update expiration or we have been through at least
+ * 3 interrupts.
+ */
+#define ITR_COUNTDOWN_START 3
/**
* i40e_update_enable_itr - Update itr and re-enable MSIX interrupt
struct i40e_q_vector *q_vector)
{
struct i40e_hw *hw = &vsi->back->hw;
- bool rx = false, tx = false;
- u32 rxval, txval;
- int idx = q_vector->v_idx;
- int rx_itr_setting, tx_itr_setting;
+ u32 intval;
/* If we don't have MSIX, then we only need to re-enable icr0 */
if (!(vsi->back->flags & I40E_FLAG_MSIX_ENABLED)) {
return;
}
- /* avoid dynamic calculation if in countdown mode OR if
- * all dynamic is disabled
- */
- rxval = txval = i40e_buildreg_itr(I40E_ITR_NONE, 0);
-
- rx_itr_setting = get_rx_itr(vsi, idx);
- tx_itr_setting = get_tx_itr(vsi, idx);
-
- if (q_vector->itr_countdown > 0 ||
- (!ITR_IS_DYNAMIC(rx_itr_setting) &&
- !ITR_IS_DYNAMIC(tx_itr_setting))) {
- goto enable_int;
- }
-
- if (ITR_IS_DYNAMIC(rx_itr_setting)) {
- rx = i40e_set_new_dynamic_itr(&q_vector->rx);
- rxval = i40e_buildreg_itr(I40E_RX_ITR, q_vector->rx.itr);
- }
-
- if (ITR_IS_DYNAMIC(tx_itr_setting)) {
- tx = i40e_set_new_dynamic_itr(&q_vector->tx);
- txval = i40e_buildreg_itr(I40E_TX_ITR, q_vector->tx.itr);
- }
+ /* These will do nothing if dynamic updates are not enabled */
+ i40e_update_itr(q_vector, &q_vector->tx);
+ i40e_update_itr(q_vector, &q_vector->rx);
- if (rx || tx) {
- /* get the higher of the two ITR adjustments and
- * use the same value for both ITR registers
- * when in adaptive mode (Rx and/or Tx)
- */
- u16 itr = max(q_vector->tx.itr, q_vector->rx.itr);
-
- q_vector->tx.itr = q_vector->rx.itr = itr;
- txval = i40e_buildreg_itr(I40E_TX_ITR, itr);
- tx = true;
- rxval = i40e_buildreg_itr(I40E_RX_ITR, itr);
- rx = true;
- }
-
- /* only need to enable the interrupt once, but need
- * to possibly update both ITR values
+ /* This block of logic allows us to get away with only updating
+ * one ITR value with each interrupt. The idea is to perform a
+ * pseudo-lazy update with the following criteria.
+ *
+ * 1. Rx is given higher priority than Tx if both are in same state
+ * 2. If we must reduce an ITR that is given highest priority.
+ * 3. We then give priority to increasing ITR based on amount.
*/
- if (rx) {
- /* set the INTENA_MSK_MASK so that this first write
- * won't actually enable the interrupt, instead just
- * updating the ITR (it's bit 31 PF and VF)
+ if (q_vector->rx.target_itr < q_vector->rx.current_itr) {
+ /* Rx ITR needs to be reduced, this is highest priority */
+ intval = i40e_buildreg_itr(I40E_RX_ITR,
+ q_vector->rx.target_itr);
+ q_vector->rx.current_itr = q_vector->rx.target_itr;
+ q_vector->itr_countdown = ITR_COUNTDOWN_START;
+ } else if ((q_vector->tx.target_itr < q_vector->tx.current_itr) ||
+ ((q_vector->rx.target_itr - q_vector->rx.current_itr) <
+ (q_vector->tx.target_itr - q_vector->tx.current_itr))) {
+ /* Tx ITR needs to be reduced, this is second priority
+ * Tx ITR needs to be increased more than Rx, fourth priority
*/
- rxval |= BIT(31);
- /* don't check _DOWN because interrupt isn't being enabled */
- wr32(hw, INTREG(q_vector->reg_idx), rxval);
+ intval = i40e_buildreg_itr(I40E_TX_ITR,
+ q_vector->tx.target_itr);
+ q_vector->tx.current_itr = q_vector->tx.target_itr;
+ q_vector->itr_countdown = ITR_COUNTDOWN_START;
+ } else if (q_vector->rx.current_itr != q_vector->rx.target_itr) {
+ /* Rx ITR needs to be increased, third priority */
+ intval = i40e_buildreg_itr(I40E_RX_ITR,
+ q_vector->rx.target_itr);
+ q_vector->rx.current_itr = q_vector->rx.target_itr;
+ q_vector->itr_countdown = ITR_COUNTDOWN_START;
+ } else {
+ /* No ITR update, lowest priority */
+ intval = i40e_buildreg_itr(I40E_ITR_NONE, 0);
+ if (q_vector->itr_countdown)
+ q_vector->itr_countdown--;
}
-enable_int:
if (!test_bit(__I40E_VSI_DOWN, vsi->state))
- wr32(hw, INTREG(q_vector->reg_idx), txval);
-
- if (q_vector->itr_countdown)
- q_vector->itr_countdown--;
- else
- q_vector->itr_countdown = ITR_COUNTDOWN_START;
+ wr32(hw, INTREG(q_vector->reg_idx), intval);
}
/**
#include <net/xdp.h>
/* Interrupt Throttling and Rate Limiting Goodies */
-
-#define I40E_MAX_ITR 0x0FF0 /* reg uses 2 usec resolution */
-#define I40E_MIN_ITR 0x0001 /* reg uses 2 usec resolution */
-#define I40E_ITR_100K 0x0005
-#define I40E_ITR_50K 0x000A
-#define I40E_ITR_20K 0x0019
-#define I40E_ITR_18K 0x001B
-#define I40E_ITR_8K 0x003E
-#define I40E_ITR_4K 0x007A
-#define I40E_MAX_INTRL 0x3B /* reg uses 4 usec resolution */
-#define I40E_ITR_RX_DEF (ITR_REG_TO_USEC(I40E_ITR_20K) | \
- I40E_ITR_DYNAMIC)
-#define I40E_ITR_TX_DEF (ITR_REG_TO_USEC(I40E_ITR_20K) | \
- I40E_ITR_DYNAMIC)
-#define I40E_ITR_DYNAMIC 0x8000 /* use top bit as a flag */
-#define I40E_MIN_INT_RATE 250 /* ~= 1000000 / (I40E_MAX_ITR * 2) */
-#define I40E_MAX_INT_RATE 500000 /* == 1000000 / (I40E_MIN_ITR * 2) */
#define I40E_DEFAULT_IRQ_WORK 256
-#define ITR_TO_REG(setting) ((setting & ~I40E_ITR_DYNAMIC) >> 1)
-#define ITR_IS_DYNAMIC(setting) (!!(setting & I40E_ITR_DYNAMIC))
-#define ITR_REG_TO_USEC(itr_reg) (itr_reg << 1)
+
+/* The datasheet for the X710 and XL710 indicate that the maximum value for
+ * the ITR is 8160usec which is then called out as 0xFF0 with a 2usec
+ * resolution. 8160 is 0x1FE0 when written out in hex. So instead of storing
+ * the register value which is divided by 2 lets use the actual values and
+ * avoid an excessive amount of translation.
+ */
+#define I40E_ITR_DYNAMIC 0x8000 /* use top bit as a flag */
+#define I40E_ITR_MASK 0x1FFE /* mask for ITR register value */
+#define I40E_MIN_ITR 2 /* reg uses 2 usec resolution */
+#define I40E_ITR_100K 10 /* all values below must be even */
+#define I40E_ITR_50K 20
+#define I40E_ITR_20K 50
+#define I40E_ITR_18K 60
+#define I40E_ITR_8K 122
+#define I40E_MAX_ITR 8160 /* maximum value as per datasheet */
+#define ITR_TO_REG(setting) ((setting) & ~I40E_ITR_DYNAMIC)
+#define ITR_REG_ALIGN(setting) __ALIGN_MASK(setting, ~I40E_ITR_MASK)
+#define ITR_IS_DYNAMIC(setting) (!!((setting) & I40E_ITR_DYNAMIC))
+
+#define I40E_ITR_RX_DEF (I40E_ITR_20K | I40E_ITR_DYNAMIC)
+#define I40E_ITR_TX_DEF (I40E_ITR_20K | I40E_ITR_DYNAMIC)
+
/* 0x40 is the enable bit for interrupt rate limiting, and must be set if
* the value of the rate limit is non-zero
*/
#define INTRL_ENA BIT(6)
+#define I40E_MAX_INTRL 0x3B /* reg uses 4 usec resolution */
#define INTRL_REG_TO_USEC(intrl) ((intrl & ~INTRL_ENA) << 2)
+
/**
* i40e_intrl_usec_to_reg - convert interrupt rate limit to register
* @intrl: interrupt rate limit to convert
* these values always store the USER setting, and must be converted
* before programming to a register.
*/
- u16 rx_itr_setting;
- u16 tx_itr_setting;
+ u16 itr_setting;
u16 count; /* Number of descriptors */
u16 reg_idx; /* HW register index of the ring */
ring->flags |= I40E_TXR_FLAGS_XDP;
}
-enum i40e_latency_range {
- I40E_LOWEST_LATENCY = 0,
- I40E_LOW_LATENCY = 1,
- I40E_BULK_LATENCY = 2,
-};
+#define I40E_ITR_ADAPTIVE_MIN_INC 0x0002
+#define I40E_ITR_ADAPTIVE_MIN_USECS 0x0002
+#define I40E_ITR_ADAPTIVE_MAX_USECS 0x007e
+#define I40E_ITR_ADAPTIVE_LATENCY 0x8000
+#define I40E_ITR_ADAPTIVE_BULK 0x0000
+#define ITR_IS_BULK(x) (!((x) & I40E_ITR_ADAPTIVE_LATENCY))
struct i40e_ring_container {
- /* array of pointers to rings */
- struct i40e_ring *ring;
+ struct i40e_ring *ring; /* pointer to linked list of ring(s) */
+ unsigned long next_update; /* jiffies value of next update */
unsigned int total_bytes; /* total bytes processed this int */
unsigned int total_packets; /* total packets processed this int */
- unsigned long last_itr_update; /* jiffies of last ITR update */
u16 count;
- enum i40e_latency_range latency_range;
- u16 itr;
+ u16 target_itr; /* target ITR setting for ring(s) */
+ u16 current_itr; /* current ITR setting for ring(s) */
};
/* iterator for handling rings in ring container */
void i40e_free_rx_resources(struct i40e_ring *rx_ring);
int i40e_napi_poll(struct napi_struct *napi, int budget);
void i40e_force_wb(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector);
-u32 i40e_get_tx_pending(struct i40e_ring *ring);
+u32 i40e_get_tx_pending(struct i40e_ring *ring, bool in_sw);
void i40e_detect_recover_hung(struct i40e_vsi *vsi);
int __i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size);
bool __i40e_chk_linearize(struct sk_buff *skb);
#define I40E_MASK(mask, shift) ((u32)(mask) << (shift))
#define I40E_MAX_VSI_QP 16
-#define I40E_MAX_VF_VSI 3
+#define I40E_MAX_VF_VSI 4
#define I40E_MAX_CHAINED_RX_BUFFERS 5
#define I40E_MAX_PF_UDP_OFFLOAD_PORTS 16
return pf_queue_id;
}
+/**
+ * i40e_get_real_pf_qid
+ * @vf: pointer to the VF info
+ * @vsi_id: vsi id
+ * @queue_id: queue number
+ *
+ * wrapper function to get pf_queue_id handling ADq code as well
+ **/
+static u16 i40e_get_real_pf_qid(struct i40e_vf *vf, u16 vsi_id, u16 queue_id)
+{
+ int i;
+
+ if (vf->adq_enabled) {
+ /* Although VF considers all the queues(can be 1 to 16) as its
+ * own but they may actually belong to different VSIs(up to 4).
+ * We need to find which queues belongs to which VSI.
+ */
+ for (i = 0; i < vf->num_tc; i++) {
+ if (queue_id < vf->ch[i].num_qps) {
+ vsi_id = vf->ch[i].vsi_id;
+ break;
+ }
+ /* find right queue id which is relative to a
+ * given VSI.
+ */
+ queue_id -= vf->ch[i].num_qps;
+ }
+ }
+
+ return i40e_vc_get_pf_queue_id(vf, vsi_id, queue_id);
+}
+
/**
* i40e_config_irq_link_list
* @vf: pointer to the VF info
vsi_queue_id = next_q / I40E_VIRTCHNL_SUPPORTED_QTYPES;
qtype = next_q % I40E_VIRTCHNL_SUPPORTED_QTYPES;
- pf_queue_id = i40e_vc_get_pf_queue_id(vf, vsi_id, vsi_queue_id);
+ pf_queue_id = i40e_get_real_pf_qid(vf, vsi_id, vsi_queue_id);
reg = ((qtype << I40E_VPINT_LNKLSTN_FIRSTQ_TYPE_SHIFT) | pf_queue_id);
wr32(hw, reg_idx, reg);
if (next_q < size) {
vsi_queue_id = next_q / I40E_VIRTCHNL_SUPPORTED_QTYPES;
qtype = next_q % I40E_VIRTCHNL_SUPPORTED_QTYPES;
- pf_queue_id = i40e_vc_get_pf_queue_id(vf, vsi_id,
- vsi_queue_id);
+ pf_queue_id = i40e_get_real_pf_qid(vf,
+ vsi_id,
+ vsi_queue_id);
} else {
pf_queue_id = I40E_QUEUE_END_OF_LIST;
qtype = 0;
/**
* i40e_alloc_vsi_res
* @vf: pointer to the VF info
- * @type: type of VSI to allocate
+ * @idx: VSI index, applies only for ADq mode, zero otherwise
*
* alloc VF vsi context & resources
**/
-static int i40e_alloc_vsi_res(struct i40e_vf *vf, enum i40e_vsi_type type)
+static int i40e_alloc_vsi_res(struct i40e_vf *vf, u8 idx)
{
struct i40e_mac_filter *f = NULL;
struct i40e_pf *pf = vf->pf;
struct i40e_vsi *vsi;
+ u64 max_tx_rate = 0;
int ret = 0;
- vsi = i40e_vsi_setup(pf, type, pf->vsi[pf->lan_vsi]->seid, vf->vf_id);
+ vsi = i40e_vsi_setup(pf, I40E_VSI_SRIOV, pf->vsi[pf->lan_vsi]->seid,
+ vf->vf_id);
if (!vsi) {
dev_err(&pf->pdev->dev,
ret = -ENOENT;
goto error_alloc_vsi_res;
}
- if (type == I40E_VSI_SRIOV) {
+
+ if (!idx) {
u64 hena = i40e_pf_get_default_rss_hena(pf);
u8 broadcast[ETH_ALEN];
spin_unlock_bh(&vsi->mac_filter_hash_lock);
wr32(&pf->hw, I40E_VFQF_HENA1(0, vf->vf_id), (u32)hena);
wr32(&pf->hw, I40E_VFQF_HENA1(1, vf->vf_id), (u32)(hena >> 32));
+ /* program mac filter only for VF VSI */
+ ret = i40e_sync_vsi_filters(vsi);
+ if (ret)
+ dev_err(&pf->pdev->dev, "Unable to program ucast filters\n");
}
- /* program mac filter */
- ret = i40e_sync_vsi_filters(vsi);
- if (ret)
- dev_err(&pf->pdev->dev, "Unable to program ucast filters\n");
+ /* storing VSI index and id for ADq and don't apply the mac filter */
+ if (vf->adq_enabled) {
+ vf->ch[idx].vsi_idx = vsi->idx;
+ vf->ch[idx].vsi_id = vsi->id;
+ }
/* Set VF bandwidth if specified */
if (vf->tx_rate) {
+ max_tx_rate = vf->tx_rate;
+ } else if (vf->ch[idx].max_tx_rate) {
+ max_tx_rate = vf->ch[idx].max_tx_rate;
+ }
+
+ if (max_tx_rate) {
+ max_tx_rate = div_u64(max_tx_rate, I40E_BW_CREDIT_DIVISOR);
ret = i40e_aq_config_vsi_bw_limit(&pf->hw, vsi->seid,
- vf->tx_rate / 50, 0, NULL);
+ max_tx_rate, 0, NULL);
if (ret)
dev_err(&pf->pdev->dev, "Unable to set tx rate, VF %d, error code %d.\n",
vf->vf_id, ret);
return ret;
}
+/**
+ * i40e_map_pf_queues_to_vsi
+ * @vf: pointer to the VF info
+ *
+ * PF maps LQPs to a VF by programming VSILAN_QTABLE & VPLAN_QTABLE. This
+ * function takes care of first part VSILAN_QTABLE, mapping pf queues to VSI.
+ **/
+static void i40e_map_pf_queues_to_vsi(struct i40e_vf *vf)
+{
+ struct i40e_pf *pf = vf->pf;
+ struct i40e_hw *hw = &pf->hw;
+ u32 reg, num_tc = 1; /* VF has at least one traffic class */
+ u16 vsi_id, qps;
+ int i, j;
+
+ if (vf->adq_enabled)
+ num_tc = vf->num_tc;
+
+ for (i = 0; i < num_tc; i++) {
+ if (vf->adq_enabled) {
+ qps = vf->ch[i].num_qps;
+ vsi_id = vf->ch[i].vsi_id;
+ } else {
+ qps = pf->vsi[vf->lan_vsi_idx]->alloc_queue_pairs;
+ vsi_id = vf->lan_vsi_id;
+ }
+
+ for (j = 0; j < 7; j++) {
+ if (j * 2 >= qps) {
+ /* end of list */
+ reg = 0x07FF07FF;
+ } else {
+ u16 qid = i40e_vc_get_pf_queue_id(vf,
+ vsi_id,
+ j * 2);
+ reg = qid;
+ qid = i40e_vc_get_pf_queue_id(vf, vsi_id,
+ (j * 2) + 1);
+ reg |= qid << 16;
+ }
+ i40e_write_rx_ctl(hw,
+ I40E_VSILAN_QTABLE(j, vsi_id),
+ reg);
+ }
+ }
+}
+
+/**
+ * i40e_map_pf_to_vf_queues
+ * @vf: pointer to the VF info
+ *
+ * PF maps LQPs to a VF by programming VSILAN_QTABLE & VPLAN_QTABLE. This
+ * function takes care of the second part VPLAN_QTABLE & completes VF mappings.
+ **/
+static void i40e_map_pf_to_vf_queues(struct i40e_vf *vf)
+{
+ struct i40e_pf *pf = vf->pf;
+ struct i40e_hw *hw = &pf->hw;
+ u32 reg, total_qps = 0;
+ u32 qps, num_tc = 1; /* VF has at least one traffic class */
+ u16 vsi_id, qid;
+ int i, j;
+
+ if (vf->adq_enabled)
+ num_tc = vf->num_tc;
+
+ for (i = 0; i < num_tc; i++) {
+ if (vf->adq_enabled) {
+ qps = vf->ch[i].num_qps;
+ vsi_id = vf->ch[i].vsi_id;
+ } else {
+ qps = pf->vsi[vf->lan_vsi_idx]->alloc_queue_pairs;
+ vsi_id = vf->lan_vsi_id;
+ }
+
+ for (j = 0; j < qps; j++) {
+ qid = i40e_vc_get_pf_queue_id(vf, vsi_id, j);
+
+ reg = (qid & I40E_VPLAN_QTABLE_QINDEX_MASK);
+ wr32(hw, I40E_VPLAN_QTABLE(total_qps, vf->vf_id),
+ reg);
+ total_qps++;
+ }
+ }
+}
+
/**
* i40e_enable_vf_mappings
* @vf: pointer to the VF info
{
struct i40e_pf *pf = vf->pf;
struct i40e_hw *hw = &pf->hw;
- u32 reg, total_queue_pairs = 0;
- int j;
+ u32 reg;
/* Tell the hardware we're using noncontiguous mapping. HW requires
* that VF queues be mapped using this method, even when they are
reg = I40E_VPLAN_MAPENA_TXRX_ENA_MASK;
wr32(hw, I40E_VPLAN_MAPENA(vf->vf_id), reg);
- /* map PF queues to VF queues */
- for (j = 0; j < pf->vsi[vf->lan_vsi_idx]->alloc_queue_pairs; j++) {
- u16 qid = i40e_vc_get_pf_queue_id(vf, vf->lan_vsi_id, j);
-
- reg = (qid & I40E_VPLAN_QTABLE_QINDEX_MASK);
- wr32(hw, I40E_VPLAN_QTABLE(total_queue_pairs, vf->vf_id), reg);
- total_queue_pairs++;
- }
-
- /* map PF queues to VSI */
- for (j = 0; j < 7; j++) {
- if (j * 2 >= pf->vsi[vf->lan_vsi_idx]->alloc_queue_pairs) {
- reg = 0x07FF07FF; /* unused */
- } else {
- u16 qid = i40e_vc_get_pf_queue_id(vf, vf->lan_vsi_id,
- j * 2);
- reg = qid;
- qid = i40e_vc_get_pf_queue_id(vf, vf->lan_vsi_id,
- (j * 2) + 1);
- reg |= qid << 16;
- }
- i40e_write_rx_ctl(hw, I40E_VSILAN_QTABLE(j, vf->lan_vsi_id),
- reg);
- }
+ i40e_map_pf_to_vf_queues(vf);
+ i40e_map_pf_queues_to_vsi(vf);
i40e_flush(hw);
}
struct i40e_pf *pf = vf->pf;
struct i40e_hw *hw = &pf->hw;
u32 reg_idx, reg;
- int i, msix_vf;
+ int i, j, msix_vf;
/* Start by disabling VF's configuration API to prevent the OS from
* accessing the VF's VSI after it's freed / invalidated.
vf->lan_vsi_id = 0;
vf->num_mac = 0;
}
+
+ /* do the accounting and remove additional ADq VSI's */
+ if (vf->adq_enabled && vf->ch[0].vsi_idx) {
+ for (j = 0; j < vf->num_tc; j++) {
+ /* At this point VSI0 is already released so don't
+ * release it again and only clear their values in
+ * structure variables
+ */
+ if (j)
+ i40e_vsi_release(pf->vsi[vf->ch[j].vsi_idx]);
+ vf->ch[j].vsi_idx = 0;
+ vf->ch[j].vsi_id = 0;
+ }
+ }
msix_vf = pf->hw.func_caps.num_msix_vectors_vf;
/* disable interrupts so the VF starts in a known state */
{
struct i40e_pf *pf = vf->pf;
int total_queue_pairs = 0;
- int ret;
+ int ret, idx;
if (vf->num_req_queues &&
vf->num_req_queues <= pf->queues_left + I40E_DEFAULT_QUEUES_PER_VF)
pf->num_vf_qps = I40E_DEFAULT_QUEUES_PER_VF;
/* allocate hw vsi context & associated resources */
- ret = i40e_alloc_vsi_res(vf, I40E_VSI_SRIOV);
+ ret = i40e_alloc_vsi_res(vf, 0);
if (ret)
goto error_alloc;
total_queue_pairs += pf->vsi[vf->lan_vsi_idx]->alloc_queue_pairs;
+ /* allocate additional VSIs based on tc information for ADq */
+ if (vf->adq_enabled) {
+ if (pf->queues_left >=
+ (I40E_MAX_VF_QUEUES - I40E_DEFAULT_QUEUES_PER_VF)) {
+ /* TC 0 always belongs to VF VSI */
+ for (idx = 1; idx < vf->num_tc; idx++) {
+ ret = i40e_alloc_vsi_res(vf, idx);
+ if (ret)
+ goto error_alloc;
+ }
+ /* send correct number of queues */
+ total_queue_pairs = I40E_MAX_VF_QUEUES;
+ } else {
+ dev_info(&pf->pdev->dev, "VF %d: Not enough queues to allocate, disabling ADq\n",
+ vf->vf_id);
+ vf->adq_enabled = false;
+ }
+ }
+
/* We account for each VF to get a default number of queue pairs. If
* the VF has now requested more, we need to account for that to make
* certain we never request more queues than we actually have left in
sizeof(struct virtchnl_version_info));
}
+/**
+ * i40e_del_qch - delete all the additional VSIs created as a part of ADq
+ * @vf: pointer to VF structure
+ **/
+static void i40e_del_qch(struct i40e_vf *vf)
+{
+ struct i40e_pf *pf = vf->pf;
+ int i;
+
+ /* first element in the array belongs to primary VF VSI and we shouldn't
+ * delete it. We should however delete the rest of the VSIs created
+ */
+ for (i = 1; i < vf->num_tc; i++) {
+ if (vf->ch[i].vsi_idx) {
+ i40e_vsi_release(pf->vsi[vf->ch[i].vsi_idx]);
+ vf->ch[i].vsi_idx = 0;
+ vf->ch[i].vsi_id = 0;
+ }
+ }
+}
+
/**
* i40e_vc_get_vf_resources_msg
* @vf: pointer to the VF info
if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_REQ_QUEUES)
vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_REQ_QUEUES;
+ if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_ADQ)
+ vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_ADQ;
+
vfres->num_vsis = num_vsis;
vfres->num_queue_pairs = vf->num_queue_pairs;
vfres->max_vectors = pf->hw.func_caps.num_msix_vectors_vf;
(struct virtchnl_vsi_queue_config_info *)msg;
struct virtchnl_queue_pair_info *qpi;
struct i40e_pf *pf = vf->pf;
- u16 vsi_id, vsi_queue_id;
+ u16 vsi_id, vsi_queue_id = 0;
i40e_status aq_ret = 0;
- int i;
+ int i, j = 0, idx = 0;
+
+ vsi_id = qci->vsi_id;
if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) {
aq_ret = I40E_ERR_PARAM;
goto error_param;
}
- vsi_id = qci->vsi_id;
if (!i40e_vc_isvalid_vsi_id(vf, vsi_id)) {
aq_ret = I40E_ERR_PARAM;
goto error_param;
}
+
for (i = 0; i < qci->num_queue_pairs; i++) {
qpi = &qci->qpair[i];
- vsi_queue_id = qpi->txq.queue_id;
- if ((qpi->txq.vsi_id != vsi_id) ||
- (qpi->rxq.vsi_id != vsi_id) ||
- (qpi->rxq.queue_id != vsi_queue_id) ||
- !i40e_vc_isvalid_queue_id(vf, vsi_id, vsi_queue_id)) {
+
+ if (!vf->adq_enabled) {
+ vsi_queue_id = qpi->txq.queue_id;
+
+ if (qpi->txq.vsi_id != qci->vsi_id ||
+ qpi->rxq.vsi_id != qci->vsi_id ||
+ qpi->rxq.queue_id != vsi_queue_id) {
+ aq_ret = I40E_ERR_PARAM;
+ goto error_param;
+ }
+ }
+
+ if (!i40e_vc_isvalid_queue_id(vf, vsi_id, vsi_queue_id)) {
aq_ret = I40E_ERR_PARAM;
goto error_param;
}
aq_ret = I40E_ERR_PARAM;
goto error_param;
}
+
+ /* For ADq there can be up to 4 VSIs with max 4 queues each.
+ * VF does not know about these additional VSIs and all
+ * it cares is about its own queues. PF configures these queues
+ * to its appropriate VSIs based on TC mapping
+ **/
+ if (vf->adq_enabled) {
+ if (j == (vf->ch[idx].num_qps - 1)) {
+ idx++;
+ j = 0; /* resetting the queue count */
+ vsi_queue_id = 0;
+ } else {
+ j++;
+ vsi_queue_id++;
+ }
+ vsi_id = vf->ch[idx].vsi_id;
+ }
}
/* set vsi num_queue_pairs in use to num configured by VF */
- pf->vsi[vf->lan_vsi_idx]->num_queue_pairs = qci->num_queue_pairs;
+ if (!vf->adq_enabled) {
+ pf->vsi[vf->lan_vsi_idx]->num_queue_pairs =
+ qci->num_queue_pairs;
+ } else {
+ for (i = 0; i < vf->num_tc; i++)
+ pf->vsi[vf->ch[i].vsi_idx]->num_queue_pairs =
+ vf->ch[i].num_qps;
+ }
error_param:
/* send the response to the VF */
aq_ret);
}
+/**
+ * i40e_validate_queue_map
+ * @vsi_id: vsi id
+ * @queuemap: Tx or Rx queue map
+ *
+ * check if Tx or Rx queue map is valid
+ **/
+static int i40e_validate_queue_map(struct i40e_vf *vf, u16 vsi_id,
+ unsigned long queuemap)
+{
+ u16 vsi_queue_id, queue_id;
+
+ for_each_set_bit(vsi_queue_id, &queuemap, I40E_MAX_VSI_QP) {
+ if (vf->adq_enabled) {
+ vsi_id = vf->ch[vsi_queue_id / I40E_MAX_VF_VSI].vsi_id;
+ queue_id = (vsi_queue_id % I40E_DEFAULT_QUEUES_PER_VF);
+ } else {
+ queue_id = vsi_queue_id;
+ }
+
+ if (!i40e_vc_isvalid_queue_id(vf, vsi_id, queue_id))
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
/**
* i40e_vc_config_irq_map_msg
* @vf: pointer to the VF info
struct virtchnl_irq_map_info *irqmap_info =
(struct virtchnl_irq_map_info *)msg;
struct virtchnl_vector_map *map;
- u16 vsi_id, vsi_queue_id, vector_id;
+ u16 vsi_id, vector_id;
i40e_status aq_ret = 0;
- unsigned long tempmap;
int i;
if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) {
for (i = 0; i < irqmap_info->num_vectors; i++) {
map = &irqmap_info->vecmap[i];
-
vector_id = map->vector_id;
vsi_id = map->vsi_id;
/* validate msg params */
goto error_param;
}
- /* lookout for the invalid queue index */
- tempmap = map->rxq_map;
- for_each_set_bit(vsi_queue_id, &tempmap, I40E_MAX_VSI_QP) {
- if (!i40e_vc_isvalid_queue_id(vf, vsi_id,
- vsi_queue_id)) {
- aq_ret = I40E_ERR_PARAM;
- goto error_param;
- }
+ if (i40e_validate_queue_map(vf, vsi_id, map->rxq_map)) {
+ aq_ret = I40E_ERR_PARAM;
+ goto error_param;
}
- tempmap = map->txq_map;
- for_each_set_bit(vsi_queue_id, &tempmap, I40E_MAX_VSI_QP) {
- if (!i40e_vc_isvalid_queue_id(vf, vsi_id,
- vsi_queue_id)) {
- aq_ret = I40E_ERR_PARAM;
- goto error_param;
- }
+ if (i40e_validate_queue_map(vf, vsi_id, map->txq_map)) {
+ aq_ret = I40E_ERR_PARAM;
+ goto error_param;
}
i40e_config_irq_link_list(vf, vsi_id, map);
struct i40e_pf *pf = vf->pf;
u16 vsi_id = vqs->vsi_id;
i40e_status aq_ret = 0;
+ int i;
if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) {
aq_ret = I40E_ERR_PARAM;
if (i40e_vsi_start_rings(pf->vsi[vf->lan_vsi_idx]))
aq_ret = I40E_ERR_TIMEOUT;
+
+ /* need to start the rings for additional ADq VSI's as well */
+ if (vf->adq_enabled) {
+ /* zero belongs to LAN VSI */
+ for (i = 1; i < vf->num_tc; i++) {
+ if (i40e_vsi_start_rings(pf->vsi[vf->ch[i].vsi_idx]))
+ aq_ret = I40E_ERR_TIMEOUT;
+ }
+ }
+
error_param:
/* send the response to the VF */
return i40e_vc_send_resp_to_vf(vf, VIRTCHNL_OP_ENABLE_QUEUES,
aq_ret);
}
+/**
+ * i40e_validate_cloud_filter
+ * @mask: mask for TC filter
+ * @data: data for TC filter
+ *
+ * This function validates cloud filter programmed as TC filter for ADq
+ **/
+static int i40e_validate_cloud_filter(struct i40e_vf *vf,
+ struct virtchnl_filter *tc_filter)
+{
+ struct virtchnl_l4_spec mask = tc_filter->mask.tcp_spec;
+ struct virtchnl_l4_spec data = tc_filter->data.tcp_spec;
+ struct i40e_pf *pf = vf->pf;
+ struct i40e_vsi *vsi = NULL;
+ struct i40e_mac_filter *f;
+ struct hlist_node *h;
+ bool found = false;
+ int bkt;
+
+ if (!tc_filter->action) {
+ dev_info(&pf->pdev->dev,
+ "VF %d: Currently ADq doesn't support Drop Action\n",
+ vf->vf_id);
+ goto err;
+ }
+
+ /* action_meta is TC number here to which the filter is applied */
+ if (!tc_filter->action_meta ||
+ tc_filter->action_meta > I40E_MAX_VF_VSI) {
+ dev_info(&pf->pdev->dev, "VF %d: Invalid TC number %u\n",
+ vf->vf_id, tc_filter->action_meta);
+ goto err;
+ }
+
+ /* Check filter if it's programmed for advanced mode or basic mode.
+ * There are two ADq modes (for VF only),
+ * 1. Basic mode: intended to allow as many filter options as possible
+ * to be added to a VF in Non-trusted mode. Main goal is
+ * to add filters to its own MAC and VLAN id.
+ * 2. Advanced mode: is for allowing filters to be applied other than
+ * its own MAC or VLAN. This mode requires the VF to be
+ * Trusted.
+ */
+ if (mask.dst_mac[0] && !mask.dst_ip[0]) {
+ vsi = pf->vsi[vf->lan_vsi_idx];
+ f = i40e_find_mac(vsi, data.dst_mac);
+
+ if (!f) {
+ dev_info(&pf->pdev->dev,
+ "Destination MAC %pM doesn't belong to VF %d\n",
+ data.dst_mac, vf->vf_id);
+ goto err;
+ }
+
+ if (mask.vlan_id) {
+ hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f,
+ hlist) {
+ if (f->vlan == ntohs(data.vlan_id)) {
+ found = true;
+ break;
+ }
+ }
+ if (!found) {
+ dev_info(&pf->pdev->dev,
+ "VF %d doesn't have any VLAN id %u\n",
+ vf->vf_id, ntohs(data.vlan_id));
+ goto err;
+ }
+ }
+ } else {
+ /* Check if VF is trusted */
+ if (!test_bit(I40E_VIRTCHNL_VF_CAP_PRIVILEGE, &vf->vf_caps)) {
+ dev_err(&pf->pdev->dev,
+ "VF %d not trusted, make VF trusted to add advanced mode ADq cloud filters\n",
+ vf->vf_id);
+ return I40E_ERR_CONFIG;
+ }
+ }
+
+ if (mask.dst_mac[0] & data.dst_mac[0]) {
+ if (is_broadcast_ether_addr(data.dst_mac) ||
+ is_zero_ether_addr(data.dst_mac)) {
+ dev_info(&pf->pdev->dev, "VF %d: Invalid Dest MAC addr %pM\n",
+ vf->vf_id, data.dst_mac);
+ goto err;
+ }
+ }
+
+ if (mask.src_mac[0] & data.src_mac[0]) {
+ if (is_broadcast_ether_addr(data.src_mac) ||
+ is_zero_ether_addr(data.src_mac)) {
+ dev_info(&pf->pdev->dev, "VF %d: Invalid Source MAC addr %pM\n",
+ vf->vf_id, data.src_mac);
+ goto err;
+ }
+ }
+
+ if (mask.dst_port & data.dst_port) {
+ if (!data.dst_port || be16_to_cpu(data.dst_port) > 0xFFFF) {
+ dev_info(&pf->pdev->dev, "VF %d: Invalid Dest port\n",
+ vf->vf_id);
+ goto err;
+ }
+ }
+
+ if (mask.src_port & data.src_port) {
+ if (!data.src_port || be16_to_cpu(data.src_port) > 0xFFFF) {
+ dev_info(&pf->pdev->dev, "VF %d: Invalid Source port\n",
+ vf->vf_id);
+ goto err;
+ }
+ }
+
+ if (tc_filter->flow_type != VIRTCHNL_TCP_V6_FLOW &&
+ tc_filter->flow_type != VIRTCHNL_TCP_V4_FLOW) {
+ dev_info(&pf->pdev->dev, "VF %d: Invalid Flow type\n",
+ vf->vf_id);
+ goto err;
+ }
+
+ if (mask.vlan_id & data.vlan_id) {
+ if (ntohs(data.vlan_id) > I40E_MAX_VLANID) {
+ dev_info(&pf->pdev->dev, "VF %d: invalid VLAN ID\n",
+ vf->vf_id);
+ goto err;
+ }
+ }
+
+ return I40E_SUCCESS;
+err:
+ return I40E_ERR_CONFIG;
+}
+
+/**
+ * i40e_find_vsi_from_seid - searches for the vsi with the given seid
+ * @vf: pointer to the VF info
+ * @seid - seid of the vsi it is searching for
+ **/
+static struct i40e_vsi *i40e_find_vsi_from_seid(struct i40e_vf *vf, u16 seid)
+{
+ struct i40e_pf *pf = vf->pf;
+ struct i40e_vsi *vsi = NULL;
+ int i;
+
+ for (i = 0; i < vf->num_tc ; i++) {
+ vsi = i40e_find_vsi_from_id(pf, vf->ch[i].vsi_id);
+ if (vsi && vsi->seid == seid)
+ return vsi;
+ }
+ return NULL;
+}
+
+/**
+ * i40e_del_all_cloud_filters
+ * @vf: pointer to the VF info
+ *
+ * This function deletes all cloud filters
+ **/
+static void i40e_del_all_cloud_filters(struct i40e_vf *vf)
+{
+ struct i40e_cloud_filter *cfilter = NULL;
+ struct i40e_pf *pf = vf->pf;
+ struct i40e_vsi *vsi = NULL;
+ struct hlist_node *node;
+ int ret;
+
+ hlist_for_each_entry_safe(cfilter, node,
+ &vf->cloud_filter_list, cloud_node) {
+ vsi = i40e_find_vsi_from_seid(vf, cfilter->seid);
+
+ if (!vsi) {
+ dev_err(&pf->pdev->dev, "VF %d: no VSI found for matching %u seid, can't delete cloud filter\n",
+ vf->vf_id, cfilter->seid);
+ continue;
+ }
+
+ if (cfilter->dst_port)
+ ret = i40e_add_del_cloud_filter_big_buf(vsi, cfilter,
+ false);
+ else
+ ret = i40e_add_del_cloud_filter(vsi, cfilter, false);
+ if (ret)
+ dev_err(&pf->pdev->dev,
+ "VF %d: Failed to delete cloud filter, err %s aq_err %s\n",
+ vf->vf_id, i40e_stat_str(&pf->hw, ret),
+ i40e_aq_str(&pf->hw,
+ pf->hw.aq.asq_last_status));
+
+ hlist_del(&cfilter->cloud_node);
+ kfree(cfilter);
+ vf->num_cloud_filters--;
+ }
+}
+
+/**
+ * i40e_vc_del_cloud_filter
+ * @vf: pointer to the VF info
+ * @msg: pointer to the msg buffer
+ *
+ * This function deletes a cloud filter programmed as TC filter for ADq
+ **/
+static int i40e_vc_del_cloud_filter(struct i40e_vf *vf, u8 *msg)
+{
+ struct virtchnl_filter *vcf = (struct virtchnl_filter *)msg;
+ struct virtchnl_l4_spec mask = vcf->mask.tcp_spec;
+ struct virtchnl_l4_spec tcf = vcf->data.tcp_spec;
+ struct i40e_cloud_filter cfilter, *cf = NULL;
+ struct i40e_pf *pf = vf->pf;
+ struct i40e_vsi *vsi = NULL;
+ struct hlist_node *node;
+ i40e_status aq_ret = 0;
+ int i, ret;
+
+ if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) {
+ aq_ret = I40E_ERR_PARAM;
+ goto err;
+ }
+
+ if (!vf->adq_enabled) {
+ dev_info(&pf->pdev->dev,
+ "VF %d: ADq not enabled, can't apply cloud filter\n",
+ vf->vf_id);
+ aq_ret = I40E_ERR_PARAM;
+ goto err;
+ }
+
+ if (i40e_validate_cloud_filter(vf, vcf)) {
+ dev_info(&pf->pdev->dev,
+ "VF %d: Invalid input, can't apply cloud filter\n",
+ vf->vf_id);
+ aq_ret = I40E_ERR_PARAM;
+ goto err;
+ }
+
+ memset(&cfilter, 0, sizeof(cfilter));
+ /* parse destination mac address */
+ for (i = 0; i < ETH_ALEN; i++)
+ cfilter.dst_mac[i] = mask.dst_mac[i] & tcf.dst_mac[i];
+
+ /* parse source mac address */
+ for (i = 0; i < ETH_ALEN; i++)
+ cfilter.src_mac[i] = mask.src_mac[i] & tcf.src_mac[i];
+
+ cfilter.vlan_id = mask.vlan_id & tcf.vlan_id;
+ cfilter.dst_port = mask.dst_port & tcf.dst_port;
+ cfilter.src_port = mask.src_port & tcf.src_port;
+
+ switch (vcf->flow_type) {
+ case VIRTCHNL_TCP_V4_FLOW:
+ cfilter.n_proto = ETH_P_IP;
+ if (mask.dst_ip[0] & tcf.dst_ip[0])
+ memcpy(&cfilter.ip.v4.dst_ip, tcf.dst_ip,
+ ARRAY_SIZE(tcf.dst_ip));
+ else if (mask.src_ip[0] & tcf.dst_ip[0])
+ memcpy(&cfilter.ip.v4.src_ip, tcf.src_ip,
+ ARRAY_SIZE(tcf.dst_ip));
+ break;
+ case VIRTCHNL_TCP_V6_FLOW:
+ cfilter.n_proto = ETH_P_IPV6;
+ if (mask.dst_ip[3] & tcf.dst_ip[3])
+ memcpy(&cfilter.ip.v6.dst_ip6, tcf.dst_ip,
+ sizeof(cfilter.ip.v6.dst_ip6));
+ if (mask.src_ip[3] & tcf.src_ip[3])
+ memcpy(&cfilter.ip.v6.src_ip6, tcf.src_ip,
+ sizeof(cfilter.ip.v6.src_ip6));
+ break;
+ default:
+ /* TC filter can be configured based on different combinations
+ * and in this case IP is not a part of filter config
+ */
+ dev_info(&pf->pdev->dev, "VF %d: Flow type not configured\n",
+ vf->vf_id);
+ }
+
+ /* get the vsi to which the tc belongs to */
+ vsi = pf->vsi[vf->ch[vcf->action_meta].vsi_idx];
+ cfilter.seid = vsi->seid;
+ cfilter.flags = vcf->field_flags;
+
+ /* Deleting TC filter */
+ if (tcf.dst_port)
+ ret = i40e_add_del_cloud_filter_big_buf(vsi, &cfilter, false);
+ else
+ ret = i40e_add_del_cloud_filter(vsi, &cfilter, false);
+ if (ret) {
+ dev_err(&pf->pdev->dev,
+ "VF %d: Failed to delete cloud filter, err %s aq_err %s\n",
+ vf->vf_id, i40e_stat_str(&pf->hw, ret),
+ i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
+ goto err;
+ }
+
+ hlist_for_each_entry_safe(cf, node,
+ &vf->cloud_filter_list, cloud_node) {
+ if (cf->seid != cfilter.seid)
+ continue;
+ if (mask.dst_port)
+ if (cfilter.dst_port != cf->dst_port)
+ continue;
+ if (mask.dst_mac[0])
+ if (!ether_addr_equal(cf->src_mac, cfilter.src_mac))
+ continue;
+ /* for ipv4 data to be valid, only first byte of mask is set */
+ if (cfilter.n_proto == ETH_P_IP && mask.dst_ip[0])
+ if (memcmp(&cfilter.ip.v4.dst_ip, &cf->ip.v4.dst_ip,
+ ARRAY_SIZE(tcf.dst_ip)))
+ continue;
+ /* for ipv6, mask is set for all sixteen bytes (4 words) */
+ if (cfilter.n_proto == ETH_P_IPV6 && mask.dst_ip[3])
+ if (memcmp(&cfilter.ip.v6.dst_ip6, &cf->ip.v6.dst_ip6,
+ sizeof(cfilter.ip.v6.src_ip6)))
+ continue;
+ if (mask.vlan_id)
+ if (cfilter.vlan_id != cf->vlan_id)
+ continue;
+
+ hlist_del(&cf->cloud_node);
+ kfree(cf);
+ vf->num_cloud_filters--;
+ }
+
+err:
+ return i40e_vc_send_resp_to_vf(vf, VIRTCHNL_OP_DEL_CLOUD_FILTER,
+ aq_ret);
+}
+
+/**
+ * i40e_vc_add_cloud_filter
+ * @vf: pointer to the VF info
+ * @msg: pointer to the msg buffer
+ *
+ * This function adds a cloud filter programmed as TC filter for ADq
+ **/
+static int i40e_vc_add_cloud_filter(struct i40e_vf *vf, u8 *msg)
+{
+ struct virtchnl_filter *vcf = (struct virtchnl_filter *)msg;
+ struct virtchnl_l4_spec mask = vcf->mask.tcp_spec;
+ struct virtchnl_l4_spec tcf = vcf->data.tcp_spec;
+ struct i40e_cloud_filter *cfilter = NULL;
+ struct i40e_pf *pf = vf->pf;
+ struct i40e_vsi *vsi = NULL;
+ i40e_status aq_ret = 0;
+ int i, ret;
+
+ if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) {
+ aq_ret = I40E_ERR_PARAM;
+ goto err;
+ }
+
+ if (!vf->adq_enabled) {
+ dev_info(&pf->pdev->dev,
+ "VF %d: ADq is not enabled, can't apply cloud filter\n",
+ vf->vf_id);
+ aq_ret = I40E_ERR_PARAM;
+ goto err;
+ }
+
+ if (i40e_validate_cloud_filter(vf, vcf)) {
+ dev_info(&pf->pdev->dev,
+ "VF %d: Invalid input/s, can't apply cloud filter\n",
+ vf->vf_id);
+ aq_ret = I40E_ERR_PARAM;
+ goto err;
+ }
+
+ cfilter = kzalloc(sizeof(*cfilter), GFP_KERNEL);
+ if (!cfilter)
+ return -ENOMEM;
+
+ /* parse destination mac address */
+ for (i = 0; i < ETH_ALEN; i++)
+ cfilter->dst_mac[i] = mask.dst_mac[i] & tcf.dst_mac[i];
+
+ /* parse source mac address */
+ for (i = 0; i < ETH_ALEN; i++)
+ cfilter->src_mac[i] = mask.src_mac[i] & tcf.src_mac[i];
+
+ cfilter->vlan_id = mask.vlan_id & tcf.vlan_id;
+ cfilter->dst_port = mask.dst_port & tcf.dst_port;
+ cfilter->src_port = mask.src_port & tcf.src_port;
+
+ switch (vcf->flow_type) {
+ case VIRTCHNL_TCP_V4_FLOW:
+ cfilter->n_proto = ETH_P_IP;
+ if (mask.dst_ip[0] & tcf.dst_ip[0])
+ memcpy(&cfilter->ip.v4.dst_ip, tcf.dst_ip,
+ ARRAY_SIZE(tcf.dst_ip));
+ else if (mask.src_ip[0] & tcf.dst_ip[0])
+ memcpy(&cfilter->ip.v4.src_ip, tcf.src_ip,
+ ARRAY_SIZE(tcf.dst_ip));
+ break;
+ case VIRTCHNL_TCP_V6_FLOW:
+ cfilter->n_proto = ETH_P_IPV6;
+ if (mask.dst_ip[3] & tcf.dst_ip[3])
+ memcpy(&cfilter->ip.v6.dst_ip6, tcf.dst_ip,
+ sizeof(cfilter->ip.v6.dst_ip6));
+ if (mask.src_ip[3] & tcf.src_ip[3])
+ memcpy(&cfilter->ip.v6.src_ip6, tcf.src_ip,
+ sizeof(cfilter->ip.v6.src_ip6));
+ break;
+ default:
+ /* TC filter can be configured based on different combinations
+ * and in this case IP is not a part of filter config
+ */
+ dev_info(&pf->pdev->dev, "VF %d: Flow type not configured\n",
+ vf->vf_id);
+ }
+
+ /* get the VSI to which the TC belongs to */
+ vsi = pf->vsi[vf->ch[vcf->action_meta].vsi_idx];
+ cfilter->seid = vsi->seid;
+ cfilter->flags = vcf->field_flags;
+
+ /* Adding cloud filter programmed as TC filter */
+ if (tcf.dst_port)
+ ret = i40e_add_del_cloud_filter_big_buf(vsi, cfilter, true);
+ else
+ ret = i40e_add_del_cloud_filter(vsi, cfilter, true);
+ if (ret) {
+ dev_err(&pf->pdev->dev,
+ "VF %d: Failed to add cloud filter, err %s aq_err %s\n",
+ vf->vf_id, i40e_stat_str(&pf->hw, ret),
+ i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
+ goto err;
+ }
+
+ INIT_HLIST_NODE(&cfilter->cloud_node);
+ hlist_add_head(&cfilter->cloud_node, &vf->cloud_filter_list);
+ vf->num_cloud_filters++;
+err:
+ return i40e_vc_send_resp_to_vf(vf, VIRTCHNL_OP_ADD_CLOUD_FILTER,
+ aq_ret);
+}
+
+/**
+ * i40e_vc_add_qch_msg: Add queue channel and enable ADq
+ * @vf: pointer to the VF info
+ * @msg: pointer to the msg buffer
+ **/
+static int i40e_vc_add_qch_msg(struct i40e_vf *vf, u8 *msg)
+{
+ struct virtchnl_tc_info *tci =
+ (struct virtchnl_tc_info *)msg;
+ struct i40e_pf *pf = vf->pf;
+ struct i40e_link_status *ls = &pf->hw.phy.link_info;
+ int i, adq_request_qps = 0, speed = 0;
+ i40e_status aq_ret = 0;
+
+ if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) {
+ aq_ret = I40E_ERR_PARAM;
+ goto err;
+ }
+
+ /* ADq cannot be applied if spoof check is ON */
+ if (vf->spoofchk) {
+ dev_err(&pf->pdev->dev,
+ "Spoof check is ON, turn it OFF to enable ADq\n");
+ aq_ret = I40E_ERR_PARAM;
+ goto err;
+ }
+
+ if (!(vf->driver_caps & VIRTCHNL_VF_OFFLOAD_ADQ)) {
+ dev_err(&pf->pdev->dev,
+ "VF %d attempting to enable ADq, but hasn't properly negotiated that capability\n",
+ vf->vf_id);
+ aq_ret = I40E_ERR_PARAM;
+ goto err;
+ }
+
+ /* max number of traffic classes for VF currently capped at 4 */
+ if (!tci->num_tc || tci->num_tc > I40E_MAX_VF_VSI) {
+ dev_err(&pf->pdev->dev,
+ "VF %d trying to set %u TCs, valid range 1-4 TCs per VF\n",
+ vf->vf_id, tci->num_tc);
+ aq_ret = I40E_ERR_PARAM;
+ goto err;
+ }
+
+ /* validate queues for each TC */
+ for (i = 0; i < tci->num_tc; i++)
+ if (!tci->list[i].count ||
+ tci->list[i].count > I40E_DEFAULT_QUEUES_PER_VF) {
+ dev_err(&pf->pdev->dev,
+ "VF %d: TC %d trying to set %u queues, valid range 1-4 queues per TC\n",
+ vf->vf_id, i, tci->list[i].count);
+ aq_ret = I40E_ERR_PARAM;
+ goto err;
+ }
+
+ /* need Max VF queues but already have default number of queues */
+ adq_request_qps = I40E_MAX_VF_QUEUES - I40E_DEFAULT_QUEUES_PER_VF;
+
+ if (pf->queues_left < adq_request_qps) {
+ dev_err(&pf->pdev->dev,
+ "No queues left to allocate to VF %d\n",
+ vf->vf_id);
+ aq_ret = I40E_ERR_PARAM;
+ goto err;
+ } else {
+ /* we need to allocate max VF queues to enable ADq so as to
+ * make sure ADq enabled VF always gets back queues when it
+ * goes through a reset.
+ */
+ vf->num_queue_pairs = I40E_MAX_VF_QUEUES;
+ }
+
+ /* get link speed in MB to validate rate limit */
+ switch (ls->link_speed) {
+ case VIRTCHNL_LINK_SPEED_100MB:
+ speed = SPEED_100;
+ break;
+ case VIRTCHNL_LINK_SPEED_1GB:
+ speed = SPEED_1000;
+ break;
+ case VIRTCHNL_LINK_SPEED_10GB:
+ speed = SPEED_10000;
+ break;
+ case VIRTCHNL_LINK_SPEED_20GB:
+ speed = SPEED_20000;
+ break;
+ case VIRTCHNL_LINK_SPEED_25GB:
+ speed = SPEED_25000;
+ break;
+ case VIRTCHNL_LINK_SPEED_40GB:
+ speed = SPEED_40000;
+ break;
+ default:
+ dev_err(&pf->pdev->dev,
+ "Cannot detect link speed\n");
+ aq_ret = I40E_ERR_PARAM;
+ goto err;
+ }
+
+ /* parse data from the queue channel info */
+ vf->num_tc = tci->num_tc;
+ for (i = 0; i < vf->num_tc; i++) {
+ if (tci->list[i].max_tx_rate) {
+ if (tci->list[i].max_tx_rate > speed) {
+ dev_err(&pf->pdev->dev,
+ "Invalid max tx rate %llu specified for VF %d.",
+ tci->list[i].max_tx_rate,
+ vf->vf_id);
+ aq_ret = I40E_ERR_PARAM;
+ goto err;
+ } else {
+ vf->ch[i].max_tx_rate =
+ tci->list[i].max_tx_rate;
+ }
+ }
+ vf->ch[i].num_qps = tci->list[i].count;
+ }
+
+ /* set this flag only after making sure all inputs are sane */
+ vf->adq_enabled = true;
+ /* num_req_queues is set when user changes number of queues via ethtool
+ * and this causes issue for default VSI(which depends on this variable)
+ * when ADq is enabled, hence reset it.
+ */
+ vf->num_req_queues = 0;
+
+ /* reset the VF in order to allocate resources */
+ i40e_vc_notify_vf_reset(vf);
+ i40e_reset_vf(vf, false);
+
+ return I40E_SUCCESS;
+
+ /* send the response to the VF */
+err:
+ return i40e_vc_send_resp_to_vf(vf, VIRTCHNL_OP_ENABLE_CHANNELS,
+ aq_ret);
+}
+
+/**
+ * i40e_vc_del_qch_msg
+ * @vf: pointer to the VF info
+ * @msg: pointer to the msg buffer
+ **/
+static int i40e_vc_del_qch_msg(struct i40e_vf *vf, u8 *msg)
+{
+ struct i40e_pf *pf = vf->pf;
+ i40e_status aq_ret = 0;
+
+ if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) {
+ aq_ret = I40E_ERR_PARAM;
+ goto err;
+ }
+
+ if (vf->adq_enabled) {
+ i40e_del_all_cloud_filters(vf);
+ i40e_del_qch(vf);
+ vf->adq_enabled = false;
+ vf->num_tc = 0;
+ dev_info(&pf->pdev->dev,
+ "Deleting Queue Channels and cloud filters for ADq on VF %d\n",
+ vf->vf_id);
+ } else {
+ dev_info(&pf->pdev->dev, "VF %d trying to delete queue channels but ADq isn't enabled\n",
+ vf->vf_id);
+ aq_ret = I40E_ERR_PARAM;
+ }
+
+ /* reset the VF in order to allocate resources */
+ i40e_vc_notify_vf_reset(vf);
+ i40e_reset_vf(vf, false);
+
+ return I40E_SUCCESS;
+
+err:
+ return i40e_vc_send_resp_to_vf(vf, VIRTCHNL_OP_DISABLE_CHANNELS,
+ aq_ret);
+}
+
/**
* i40e_vc_process_vf_msg
* @pf: pointer to the PF structure
case VIRTCHNL_OP_REQUEST_QUEUES:
ret = i40e_vc_request_queues_msg(vf, msg, msglen);
break;
-
+ case VIRTCHNL_OP_ENABLE_CHANNELS:
+ ret = i40e_vc_add_qch_msg(vf, msg);
+ break;
+ case VIRTCHNL_OP_DISABLE_CHANNELS:
+ ret = i40e_vc_del_qch_msg(vf, msg);
+ break;
+ case VIRTCHNL_OP_ADD_CLOUD_FILTER:
+ ret = i40e_vc_add_cloud_filter(vf, msg);
+ break;
+ case VIRTCHNL_OP_DEL_CLOUD_FILTER:
+ ret = i40e_vc_del_cloud_filter(vf, msg);
+ break;
case VIRTCHNL_OP_UNKNOWN:
default:
dev_err(&pf->pdev->dev, "Unsupported opcode %d from VF %d\n",
i40e_vc_disable_vf(vf);
dev_info(&pf->pdev->dev, "VF %u is now %strusted\n",
vf_id, setting ? "" : "un");
+
+ if (vf->adq_enabled) {
+ if (!vf->trusted) {
+ dev_info(&pf->pdev->dev,
+ "VF %u no longer Trusted, deleting all cloud filters\n",
+ vf_id);
+ i40e_del_all_cloud_filters(vf);
+ }
+ }
+
out:
return ret;
}
I40E_VIRTCHNL_VF_CAP_IWARP,
};
+/* In ADq, max 4 VSI's can be allocated per VF including primary VF VSI.
+ * These variables are used to store indices, id's and number of queues
+ * for each VSI including that of primary VF VSI. Each Traffic class is
+ * termed as channel and each channel can in-turn have 4 queues which
+ * means max 16 queues overall per VF.
+ */
+struct i40evf_channel {
+ u16 vsi_idx; /* index in PF struct for all channel VSIs */
+ u16 vsi_id; /* VSI ID used by firmware */
+ u16 num_qps; /* number of queue pairs requested by user */
+ u64 max_tx_rate; /* bandwidth rate allocation for VSIs */
+};
+
/* VF information structure */
struct i40e_vf {
struct i40e_pf *pf;
u16 num_mac;
u16 num_vlan;
+ /* ADq related variables */
+ bool adq_enabled; /* flag to enable adq */
+ u8 num_tc;
+ struct i40evf_channel ch[I40E_MAX_VF_VSI];
+ struct hlist_head cloud_filter_list;
+ u16 num_cloud_filters;
+
/* RDMA Client */
struct virtchnl_iwarp_qvlist_info *qvlist_info;
};
*/
smp_rmb();
tx_ring->tx_stats.prev_pkt_ctr =
- i40evf_get_tx_pending(tx_ring, false) ? packets : -1;
+ i40evf_get_tx_pending(tx_ring, true) ? packets : -1;
}
}
}
val);
}
+static inline bool i40e_container_is_rx(struct i40e_q_vector *q_vector,
+ struct i40e_ring_container *rc)
+{
+ return &q_vector->rx == rc;
+}
+
+static inline unsigned int i40e_itr_divisor(struct i40e_q_vector *q_vector)
+{
+ unsigned int divisor;
+
+ switch (q_vector->adapter->link_speed) {
+ case I40E_LINK_SPEED_40GB:
+ divisor = I40E_ITR_ADAPTIVE_MIN_INC * 1024;
+ break;
+ case I40E_LINK_SPEED_25GB:
+ case I40E_LINK_SPEED_20GB:
+ divisor = I40E_ITR_ADAPTIVE_MIN_INC * 512;
+ break;
+ default:
+ case I40E_LINK_SPEED_10GB:
+ divisor = I40E_ITR_ADAPTIVE_MIN_INC * 256;
+ break;
+ case I40E_LINK_SPEED_1GB:
+ case I40E_LINK_SPEED_100MB:
+ divisor = I40E_ITR_ADAPTIVE_MIN_INC * 32;
+ break;
+ }
+
+ return divisor;
+}
+
/**
- * i40e_set_new_dynamic_itr - Find new ITR level
+ * i40e_update_itr - update the dynamic ITR value based on statistics
+ * @q_vector: structure containing interrupt and ring information
* @rc: structure containing ring performance data
*
- * Returns true if ITR changed, false if not
- *
- * Stores a new ITR value based on packets and byte counts during
- * the last interrupt. The advantage of per interrupt computation
- * is faster updates and more accurate ITR for the current traffic
- * pattern. Constants in this function were computed based on
- * theoretical maximum wire speed and thresholds were set based on
- * testing data as well as attempting to minimize response time
+ * Stores a new ITR value based on packets and byte
+ * counts during the last interrupt. The advantage of per interrupt
+ * computation is faster updates and more accurate ITR for the current
+ * traffic pattern. Constants in this function were computed
+ * based on theoretical maximum wire speed and thresholds were set based
+ * on testing data as well as attempting to minimize response time
* while increasing bulk throughput.
**/
-static bool i40e_set_new_dynamic_itr(struct i40e_ring_container *rc)
+static void i40e_update_itr(struct i40e_q_vector *q_vector,
+ struct i40e_ring_container *rc)
{
- enum i40e_latency_range new_latency_range = rc->latency_range;
- u32 new_itr = rc->itr;
- int bytes_per_usec;
- unsigned int usecs, estimated_usecs;
+ unsigned int avg_wire_size, packets, bytes, itr;
+ unsigned long next_update = jiffies;
- if (rc->total_packets == 0 || !rc->itr)
- return false;
+ /* If we don't have any rings just leave ourselves set for maximum
+ * possible latency so we take ourselves out of the equation.
+ */
+ if (!rc->ring || !ITR_IS_DYNAMIC(rc->ring->itr_setting))
+ return;
+
+ /* For Rx we want to push the delay up and default to low latency.
+ * for Tx we want to pull the delay down and default to high latency.
+ */
+ itr = i40e_container_is_rx(q_vector, rc) ?
+ I40E_ITR_ADAPTIVE_MIN_USECS | I40E_ITR_ADAPTIVE_LATENCY :
+ I40E_ITR_ADAPTIVE_MAX_USECS | I40E_ITR_ADAPTIVE_LATENCY;
+
+ /* If we didn't update within up to 1 - 2 jiffies we can assume
+ * that either packets are coming in so slow there hasn't been
+ * any work, or that there is so much work that NAPI is dealing
+ * with interrupt moderation and we don't need to do anything.
+ */
+ if (time_after(next_update, rc->next_update))
+ goto clear_counts;
+
+ /* If itr_countdown is set it means we programmed an ITR within
+ * the last 4 interrupt cycles. This has a side effect of us
+ * potentially firing an early interrupt. In order to work around
+ * this we need to throw out any data received for a few
+ * interrupts following the update.
+ */
+ if (q_vector->itr_countdown) {
+ itr = rc->target_itr;
+ goto clear_counts;
+ }
+
+ packets = rc->total_packets;
+ bytes = rc->total_bytes;
- usecs = (rc->itr << 1) * ITR_COUNTDOWN_START;
- bytes_per_usec = rc->total_bytes / usecs;
+ if (i40e_container_is_rx(q_vector, rc)) {
+ /* If Rx there are 1 to 4 packets and bytes are less than
+ * 9000 assume insufficient data to use bulk rate limiting
+ * approach unless Tx is already in bulk rate limiting. We
+ * are likely latency driven.
+ */
+ if (packets && packets < 4 && bytes < 9000 &&
+ (q_vector->tx.target_itr & I40E_ITR_ADAPTIVE_LATENCY)) {
+ itr = I40E_ITR_ADAPTIVE_LATENCY;
+ goto adjust_by_size;
+ }
+ } else if (packets < 4) {
+ /* If we have Tx and Rx ITR maxed and Tx ITR is running in
+ * bulk mode and we are receiving 4 or fewer packets just
+ * reset the ITR_ADAPTIVE_LATENCY bit for latency mode so
+ * that the Rx can relax.
+ */
+ if (rc->target_itr == I40E_ITR_ADAPTIVE_MAX_USECS &&
+ (q_vector->rx.target_itr & I40E_ITR_MASK) ==
+ I40E_ITR_ADAPTIVE_MAX_USECS)
+ goto clear_counts;
+ } else if (packets > 32) {
+ /* If we have processed over 32 packets in a single interrupt
+ * for Tx assume we need to switch over to "bulk" mode.
+ */
+ rc->target_itr &= ~I40E_ITR_ADAPTIVE_LATENCY;
+ }
- /* The calculations in this algorithm depend on interrupts actually
- * firing at the ITR rate. This may not happen if the packet rate is
- * really low, or if we've been napi polling. Check to make sure
- * that's not the case before we continue.
+ /* We have no packets to actually measure against. This means
+ * either one of the other queues on this vector is active or
+ * we are a Tx queue doing TSO with too high of an interrupt rate.
+ *
+ * Between 4 and 56 we can assume that our current interrupt delay
+ * is only slightly too low. As such we should increase it by a small
+ * fixed amount.
*/
- estimated_usecs = jiffies_to_usecs(jiffies - rc->last_itr_update);
- if (estimated_usecs > usecs) {
- new_latency_range = I40E_LOW_LATENCY;
- goto reset_latency;
+ if (packets < 56) {
+ itr = rc->target_itr + I40E_ITR_ADAPTIVE_MIN_INC;
+ if ((itr & I40E_ITR_MASK) > I40E_ITR_ADAPTIVE_MAX_USECS) {
+ itr &= I40E_ITR_ADAPTIVE_LATENCY;
+ itr += I40E_ITR_ADAPTIVE_MAX_USECS;
+ }
+ goto clear_counts;
+ }
+
+ if (packets <= 256) {
+ itr = min(q_vector->tx.current_itr, q_vector->rx.current_itr);
+ itr &= I40E_ITR_MASK;
+
+ /* Between 56 and 112 is our "goldilocks" zone where we are
+ * working out "just right". Just report that our current
+ * ITR is good for us.
+ */
+ if (packets <= 112)
+ goto clear_counts;
+
+ /* If packet count is 128 or greater we are likely looking
+ * at a slight overrun of the delay we want. Try halving
+ * our delay to see if that will cut the number of packets
+ * in half per interrupt.
+ */
+ itr /= 2;
+ itr &= I40E_ITR_MASK;
+ if (itr < I40E_ITR_ADAPTIVE_MIN_USECS)
+ itr = I40E_ITR_ADAPTIVE_MIN_USECS;
+
+ goto clear_counts;
}
- /* simple throttlerate management
- * 0-10MB/s lowest (50000 ints/s)
- * 10-20MB/s low (20000 ints/s)
- * 20-1249MB/s bulk (18000 ints/s)
+ /* The paths below assume we are dealing with a bulk ITR since
+ * number of packets is greater than 256. We are just going to have
+ * to compute a value and try to bring the count under control,
+ * though for smaller packet sizes there isn't much we can do as
+ * NAPI polling will likely be kicking in sooner rather than later.
+ */
+ itr = I40E_ITR_ADAPTIVE_BULK;
+
+adjust_by_size:
+ /* If packet counts are 256 or greater we can assume we have a gross
+ * overestimation of what the rate should be. Instead of trying to fine
+ * tune it just use the formula below to try and dial in an exact value
+ * give the current packet size of the frame.
+ */
+ avg_wire_size = bytes / packets;
+
+ /* The following is a crude approximation of:
+ * wmem_default / (size + overhead) = desired_pkts_per_int
+ * rate / bits_per_byte / (size + ethernet overhead) = pkt_rate
+ * (desired_pkt_rate / pkt_rate) * usecs_per_sec = ITR value
*
- * The math works out because the divisor is in 10^(-6) which
- * turns the bytes/us input value into MB/s values, but
- * make sure to use usecs, as the register values written
- * are in 2 usec increments in the ITR registers, and make sure
- * to use the smoothed values that the countdown timer gives us.
+ * Assuming wmem_default is 212992 and overhead is 640 bytes per
+ * packet, (256 skb, 64 headroom, 320 shared info), we can reduce the
+ * formula down to
+ *
+ * (170 * (size + 24)) / (size + 640) = ITR
+ *
+ * We first do some math on the packet size and then finally bitshift
+ * by 8 after rounding up. We also have to account for PCIe link speed
+ * difference as ITR scales based on this.
*/
- switch (new_latency_range) {
- case I40E_LOWEST_LATENCY:
- if (bytes_per_usec > 10)
- new_latency_range = I40E_LOW_LATENCY;
- break;
- case I40E_LOW_LATENCY:
- if (bytes_per_usec > 20)
- new_latency_range = I40E_BULK_LATENCY;
- else if (bytes_per_usec <= 10)
- new_latency_range = I40E_LOWEST_LATENCY;
- break;
- case I40E_BULK_LATENCY:
- default:
- if (bytes_per_usec <= 20)
- new_latency_range = I40E_LOW_LATENCY;
- break;
+ if (avg_wire_size <= 60) {
+ /* Start at 250k ints/sec */
+ avg_wire_size = 4096;
+ } else if (avg_wire_size <= 380) {
+ /* 250K ints/sec to 60K ints/sec */
+ avg_wire_size *= 40;
+ avg_wire_size += 1696;
+ } else if (avg_wire_size <= 1084) {
+ /* 60K ints/sec to 36K ints/sec */
+ avg_wire_size *= 15;
+ avg_wire_size += 11452;
+ } else if (avg_wire_size <= 1980) {
+ /* 36K ints/sec to 30K ints/sec */
+ avg_wire_size *= 5;
+ avg_wire_size += 22420;
+ } else {
+ /* plateau at a limit of 30K ints/sec */
+ avg_wire_size = 32256;
}
-reset_latency:
- rc->latency_range = new_latency_range;
+ /* If we are in low latency mode halve our delay which doubles the
+ * rate to somewhere between 100K to 16K ints/sec
+ */
+ if (itr & I40E_ITR_ADAPTIVE_LATENCY)
+ avg_wire_size /= 2;
- switch (new_latency_range) {
- case I40E_LOWEST_LATENCY:
- new_itr = I40E_ITR_50K;
- break;
- case I40E_LOW_LATENCY:
- new_itr = I40E_ITR_20K;
- break;
- case I40E_BULK_LATENCY:
- new_itr = I40E_ITR_18K;
- break;
- default:
- break;
+ /* Resultant value is 256 times larger than it needs to be. This
+ * gives us room to adjust the value as needed to either increase
+ * or decrease the value based on link speeds of 10G, 2.5G, 1G, etc.
+ *
+ * Use addition as we have already recorded the new latency flag
+ * for the ITR value.
+ */
+ itr += DIV_ROUND_UP(avg_wire_size, i40e_itr_divisor(q_vector)) *
+ I40E_ITR_ADAPTIVE_MIN_INC;
+
+ if ((itr & I40E_ITR_MASK) > I40E_ITR_ADAPTIVE_MAX_USECS) {
+ itr &= I40E_ITR_ADAPTIVE_LATENCY;
+ itr += I40E_ITR_ADAPTIVE_MAX_USECS;
}
+clear_counts:
+ /* write back value */
+ rc->target_itr = itr;
+
+ /* next update should occur within next jiffy */
+ rc->next_update = next_update + 1;
+
rc->total_bytes = 0;
rc->total_packets = 0;
- rc->last_itr_update = jiffies;
-
- if (new_itr != rc->itr) {
- rc->itr = new_itr;
- return true;
- }
- return false;
}
/**
* @rx_buffer: rx buffer to pull data from
*
* This function will clean up the contents of the rx_buffer. It will
- * either recycle the bufer or unmap it and free the associated resources.
+ * either recycle the buffer or unmap it and free the associated resources.
*/
static void i40e_put_rx_buffer(struct i40e_ring *rx_ring,
struct i40e_rx_buffer *rx_buffer)
return failure ? budget : (int)total_rx_packets;
}
-static u32 i40e_buildreg_itr(const int type, const u16 itr)
+static inline u32 i40e_buildreg_itr(const int type, u16 itr)
{
u32 val;
+ /* We don't bother with setting the CLEARPBA bit as the data sheet
+ * points out doing so is "meaningless since it was already
+ * auto-cleared". The auto-clearing happens when the interrupt is
+ * asserted.
+ *
+ * Hardware errata 28 for also indicates that writing to a
+ * xxINT_DYN_CTLx CSR with INTENA_MSK (bit 31) set to 0 will clear
+ * an event in the PBA anyway so we need to rely on the automask
+ * to hold pending events for us until the interrupt is re-enabled
+ *
+ * The itr value is reported in microseconds, and the register
+ * value is recorded in 2 microsecond units. For this reason we
+ * only need to shift by the interval shift - 1 instead of the
+ * full value.
+ */
+ itr &= I40E_ITR_MASK;
+
val = I40E_VFINT_DYN_CTLN1_INTENA_MASK |
- I40E_VFINT_DYN_CTLN1_CLEARPBA_MASK |
(type << I40E_VFINT_DYN_CTLN1_ITR_INDX_SHIFT) |
- (itr << I40E_VFINT_DYN_CTLN1_INTERVAL_SHIFT);
+ (itr << (I40E_VFINT_DYN_CTLN1_INTERVAL_SHIFT - 1));
return val;
}
/* a small macro to shorten up some long lines */
#define INTREG I40E_VFINT_DYN_CTLN1
-static inline int get_rx_itr(struct i40e_vsi *vsi, int idx)
-{
- struct i40evf_adapter *adapter = vsi->back;
- return adapter->rx_rings[idx].rx_itr_setting;
-}
-
-static inline int get_tx_itr(struct i40e_vsi *vsi, int idx)
-{
- struct i40evf_adapter *adapter = vsi->back;
-
- return adapter->tx_rings[idx].tx_itr_setting;
-}
+/* The act of updating the ITR will cause it to immediately trigger. In order
+ * to prevent this from throwing off adaptive update statistics we defer the
+ * update so that it can only happen so often. So after either Tx or Rx are
+ * updated we make the adaptive scheme wait until either the ITR completely
+ * expires via the next_update expiration or we have been through at least
+ * 3 interrupts.
+ */
+#define ITR_COUNTDOWN_START 3
/**
* i40e_update_enable_itr - Update itr and re-enable MSIX interrupt
struct i40e_q_vector *q_vector)
{
struct i40e_hw *hw = &vsi->back->hw;
- bool rx = false, tx = false;
- u32 rxval, txval;
- int idx = q_vector->v_idx;
- int rx_itr_setting, tx_itr_setting;
-
- /* avoid dynamic calculation if in countdown mode OR if
- * all dynamic is disabled
- */
- rxval = txval = i40e_buildreg_itr(I40E_ITR_NONE, 0);
-
- rx_itr_setting = get_rx_itr(vsi, idx);
- tx_itr_setting = get_tx_itr(vsi, idx);
+ u32 intval;
- if (q_vector->itr_countdown > 0 ||
- (!ITR_IS_DYNAMIC(rx_itr_setting) &&
- !ITR_IS_DYNAMIC(tx_itr_setting))) {
- goto enable_int;
- }
-
- if (ITR_IS_DYNAMIC(rx_itr_setting)) {
- rx = i40e_set_new_dynamic_itr(&q_vector->rx);
- rxval = i40e_buildreg_itr(I40E_RX_ITR, q_vector->rx.itr);
- }
+ /* These will do nothing if dynamic updates are not enabled */
+ i40e_update_itr(q_vector, &q_vector->tx);
+ i40e_update_itr(q_vector, &q_vector->rx);
- if (ITR_IS_DYNAMIC(tx_itr_setting)) {
- tx = i40e_set_new_dynamic_itr(&q_vector->tx);
- txval = i40e_buildreg_itr(I40E_TX_ITR, q_vector->tx.itr);
- }
-
- if (rx || tx) {
- /* get the higher of the two ITR adjustments and
- * use the same value for both ITR registers
- * when in adaptive mode (Rx and/or Tx)
- */
- u16 itr = max(q_vector->tx.itr, q_vector->rx.itr);
-
- q_vector->tx.itr = q_vector->rx.itr = itr;
- txval = i40e_buildreg_itr(I40E_TX_ITR, itr);
- tx = true;
- rxval = i40e_buildreg_itr(I40E_RX_ITR, itr);
- rx = true;
- }
-
- /* only need to enable the interrupt once, but need
- * to possibly update both ITR values
+ /* This block of logic allows us to get away with only updating
+ * one ITR value with each interrupt. The idea is to perform a
+ * pseudo-lazy update with the following criteria.
+ *
+ * 1. Rx is given higher priority than Tx if both are in same state
+ * 2. If we must reduce an ITR that is given highest priority.
+ * 3. We then give priority to increasing ITR based on amount.
*/
- if (rx) {
- /* set the INTENA_MSK_MASK so that this first write
- * won't actually enable the interrupt, instead just
- * updating the ITR (it's bit 31 PF and VF)
+ if (q_vector->rx.target_itr < q_vector->rx.current_itr) {
+ /* Rx ITR needs to be reduced, this is highest priority */
+ intval = i40e_buildreg_itr(I40E_RX_ITR,
+ q_vector->rx.target_itr);
+ q_vector->rx.current_itr = q_vector->rx.target_itr;
+ q_vector->itr_countdown = ITR_COUNTDOWN_START;
+ } else if ((q_vector->tx.target_itr < q_vector->tx.current_itr) ||
+ ((q_vector->rx.target_itr - q_vector->rx.current_itr) <
+ (q_vector->tx.target_itr - q_vector->tx.current_itr))) {
+ /* Tx ITR needs to be reduced, this is second priority
+ * Tx ITR needs to be increased more than Rx, fourth priority
*/
- rxval |= BIT(31);
- /* don't check _DOWN because interrupt isn't being enabled */
- wr32(hw, INTREG(q_vector->reg_idx), rxval);
+ intval = i40e_buildreg_itr(I40E_TX_ITR,
+ q_vector->tx.target_itr);
+ q_vector->tx.current_itr = q_vector->tx.target_itr;
+ q_vector->itr_countdown = ITR_COUNTDOWN_START;
+ } else if (q_vector->rx.current_itr != q_vector->rx.target_itr) {
+ /* Rx ITR needs to be increased, third priority */
+ intval = i40e_buildreg_itr(I40E_RX_ITR,
+ q_vector->rx.target_itr);
+ q_vector->rx.current_itr = q_vector->rx.target_itr;
+ q_vector->itr_countdown = ITR_COUNTDOWN_START;
+ } else {
+ /* No ITR update, lowest priority */
+ intval = i40e_buildreg_itr(I40E_ITR_NONE, 0);
+ if (q_vector->itr_countdown)
+ q_vector->itr_countdown--;
}
-enable_int:
if (!test_bit(__I40E_VSI_DOWN, vsi->state))
- wr32(hw, INTREG(q_vector->reg_idx), txval);
-
- if (q_vector->itr_countdown)
- q_vector->itr_countdown--;
- else
- q_vector->itr_countdown = ITR_COUNTDOWN_START;
+ wr32(hw, INTREG(q_vector->reg_idx), intval);
}
/**
#define _I40E_TXRX_H_
/* Interrupt Throttling and Rate Limiting Goodies */
-
-#define I40E_MAX_ITR 0x0FF0 /* reg uses 2 usec resolution */
-#define I40E_MIN_ITR 0x0001 /* reg uses 2 usec resolution */
-#define I40E_ITR_100K 0x0005
-#define I40E_ITR_50K 0x000A
-#define I40E_ITR_20K 0x0019
-#define I40E_ITR_18K 0x001B
-#define I40E_ITR_8K 0x003E
-#define I40E_ITR_4K 0x007A
-#define I40E_MAX_INTRL 0x3B /* reg uses 4 usec resolution */
-#define I40E_ITR_RX_DEF (ITR_REG_TO_USEC(I40E_ITR_20K) | \
- I40E_ITR_DYNAMIC)
-#define I40E_ITR_TX_DEF (ITR_REG_TO_USEC(I40E_ITR_20K) | \
- I40E_ITR_DYNAMIC)
-#define I40E_ITR_DYNAMIC 0x8000 /* use top bit as a flag */
-#define I40E_MIN_INT_RATE 250 /* ~= 1000000 / (I40E_MAX_ITR * 2) */
-#define I40E_MAX_INT_RATE 500000 /* == 1000000 / (I40E_MIN_ITR * 2) */
#define I40E_DEFAULT_IRQ_WORK 256
-#define ITR_TO_REG(setting) ((setting & ~I40E_ITR_DYNAMIC) >> 1)
-#define ITR_IS_DYNAMIC(setting) (!!(setting & I40E_ITR_DYNAMIC))
-#define ITR_REG_TO_USEC(itr_reg) (itr_reg << 1)
+
+/* The datasheet for the X710 and XL710 indicate that the maximum value for
+ * the ITR is 8160usec which is then called out as 0xFF0 with a 2usec
+ * resolution. 8160 is 0x1FE0 when written out in hex. So instead of storing
+ * the register value which is divided by 2 lets use the actual values and
+ * avoid an excessive amount of translation.
+ */
+#define I40E_ITR_DYNAMIC 0x8000 /* use top bit as a flag */
+#define I40E_ITR_MASK 0x1FFE /* mask for ITR register value */
+#define I40E_MIN_ITR 2 /* reg uses 2 usec resolution */
+#define I40E_ITR_100K 10 /* all values below must be even */
+#define I40E_ITR_50K 20
+#define I40E_ITR_20K 50
+#define I40E_ITR_18K 60
+#define I40E_ITR_8K 122
+#define I40E_MAX_ITR 8160 /* maximum value as per datasheet */
+#define ITR_TO_REG(setting) ((setting) & ~I40E_ITR_DYNAMIC)
+#define ITR_REG_ALIGN(setting) __ALIGN_MASK(setting, ~I40E_ITR_MASK)
+#define ITR_IS_DYNAMIC(setting) (!!((setting) & I40E_ITR_DYNAMIC))
+
+#define I40E_ITR_RX_DEF (I40E_ITR_20K | I40E_ITR_DYNAMIC)
+#define I40E_ITR_TX_DEF (I40E_ITR_20K | I40E_ITR_DYNAMIC)
+
/* 0x40 is the enable bit for interrupt rate limiting, and must be set if
* the value of the rate limit is non-zero
*/
#define INTRL_ENA BIT(6)
+#define I40E_MAX_INTRL 0x3B /* reg uses 4 usec resolution */
#define INTRL_REG_TO_USEC(intrl) ((intrl & ~INTRL_ENA) << 2)
#define INTRL_USEC_TO_REG(set) ((set) ? ((set) >> 2) | INTRL_ENA : 0)
#define I40E_INTRL_8K 125 /* 8000 ints/sec */
* these values always store the USER setting, and must be converted
* before programming to a register.
*/
- u16 rx_itr_setting;
- u16 tx_itr_setting;
+ u16 itr_setting;
u16 count; /* Number of descriptors */
u16 reg_idx; /* HW register index of the ring */
ring->flags &= ~I40E_RXR_FLAGS_BUILD_SKB_ENABLED;
}
-enum i40e_latency_range {
- I40E_LOWEST_LATENCY = 0,
- I40E_LOW_LATENCY = 1,
- I40E_BULK_LATENCY = 2,
-};
+#define I40E_ITR_ADAPTIVE_MIN_INC 0x0002
+#define I40E_ITR_ADAPTIVE_MIN_USECS 0x0002
+#define I40E_ITR_ADAPTIVE_MAX_USECS 0x007e
+#define I40E_ITR_ADAPTIVE_LATENCY 0x8000
+#define I40E_ITR_ADAPTIVE_BULK 0x0000
+#define ITR_IS_BULK(x) (!((x) & I40E_ITR_ADAPTIVE_LATENCY))
struct i40e_ring_container {
- /* array of pointers to rings */
- struct i40e_ring *ring;
+ struct i40e_ring *ring; /* pointer to linked list of ring(s) */
+ unsigned long next_update; /* jiffies value of next update */
unsigned int total_bytes; /* total bytes processed this int */
unsigned int total_packets; /* total packets processed this int */
- unsigned long last_itr_update; /* jiffies of last ITR update */
u16 count;
- enum i40e_latency_range latency_range;
- u16 itr;
+ u16 target_itr; /* target ITR setting for ring(s) */
+ u16 current_itr; /* current ITR setting for ring(s) */
};
/* iterator for handling rings in ring container */
#include <linux/socket.h>
#include <linux/jiffies.h>
#include <net/ip6_checksum.h>
+#include <net/pkt_cls.h>
#include <net/udp.h>
+#include <net/tc_act/tc_gact.h>
+#include <net/tc_act/tc_mirred.h>
#include "i40e_type.h"
#include <linux/avf/virtchnl.h>
#define I40EVF_HKEY_ARRAY_SIZE ((I40E_VFQF_HKEY_MAX_INDEX + 1) * 4)
#define I40EVF_HLUT_ARRAY_SIZE ((I40E_VFQF_HLUT_MAX_INDEX + 1) * 4)
+#define I40EVF_MBPS_DIVISOR 125000 /* divisor to convert to Mbps */
/* MAX_MSIX_Q_VECTORS of these are allocated,
* but we only use one per queue-specific vector.
struct i40e_ring_container rx;
struct i40e_ring_container tx;
u32 ring_mask;
+ u8 itr_countdown; /* when 0 should adjust adaptive ITR */
u8 num_ringpairs; /* total number of ring pairs in vector */
-#define ITR_COUNTDOWN_START 100
- u8 itr_countdown; /* when 0 or 1 update ITR */
u16 v_idx; /* index in the vsi->q_vector array. */
u16 reg_idx; /* register index of the interrupt */
char name[IFNAMSIZ + 15];
bool add; /* filter needs to be added */
};
+#define I40EVF_MAX_TRAFFIC_CLASS 4
+/* State of traffic class creation */
+enum i40evf_tc_state_t {
+ __I40EVF_TC_INVALID, /* no traffic class, default state */
+ __I40EVF_TC_RUNNING, /* traffic classes have been created */
+};
+
+/* channel info */
+struct i40evf_channel_config {
+ struct virtchnl_channel_info ch_info[I40EVF_MAX_TRAFFIC_CLASS];
+ enum i40evf_tc_state_t state;
+ u8 total_qps;
+};
+
+/* State of cloud filter */
+enum i40evf_cloud_filter_state_t {
+ __I40EVF_CF_INVALID, /* cloud filter not added */
+ __I40EVF_CF_ADD_PENDING, /* cloud filter pending add by the PF */
+ __I40EVF_CF_DEL_PENDING, /* cloud filter pending del by the PF */
+ __I40EVF_CF_ACTIVE, /* cloud filter is active */
+};
+
/* Driver state. The order of these is important! */
enum i40evf_state_t {
__I40EVF_STARTUP, /* driver loaded, probe complete */
__I40EVF_IN_REMOVE_TASK, /* device being removed */
};
+#define I40EVF_CLOUD_FIELD_OMAC 0x01
+#define I40EVF_CLOUD_FIELD_IMAC 0x02
+#define I40EVF_CLOUD_FIELD_IVLAN 0x04
+#define I40EVF_CLOUD_FIELD_TEN_ID 0x08
+#define I40EVF_CLOUD_FIELD_IIP 0x10
+
+#define I40EVF_CF_FLAGS_OMAC I40EVF_CLOUD_FIELD_OMAC
+#define I40EVF_CF_FLAGS_IMAC I40EVF_CLOUD_FIELD_IMAC
+#define I40EVF_CF_FLAGS_IMAC_IVLAN (I40EVF_CLOUD_FIELD_IMAC |\
+ I40EVF_CLOUD_FIELD_IVLAN)
+#define I40EVF_CF_FLAGS_IMAC_TEN_ID (I40EVF_CLOUD_FIELD_IMAC |\
+ I40EVF_CLOUD_FIELD_TEN_ID)
+#define I40EVF_CF_FLAGS_OMAC_TEN_ID_IMAC (I40EVF_CLOUD_FIELD_OMAC |\
+ I40EVF_CLOUD_FIELD_IMAC |\
+ I40EVF_CLOUD_FIELD_TEN_ID)
+#define I40EVF_CF_FLAGS_IMAC_IVLAN_TEN_ID (I40EVF_CLOUD_FIELD_IMAC |\
+ I40EVF_CLOUD_FIELD_IVLAN |\
+ I40EVF_CLOUD_FIELD_TEN_ID)
+#define I40EVF_CF_FLAGS_IIP I40E_CLOUD_FIELD_IIP
+
+/* bookkeeping of cloud filters */
+struct i40evf_cloud_filter {
+ enum i40evf_cloud_filter_state_t state;
+ struct list_head list;
+ struct virtchnl_filter f;
+ unsigned long cookie;
+ bool del; /* filter needs to be deleted */
+ bool add; /* filter needs to be added */
+};
+
/* board specific private data structure */
struct i40evf_adapter {
struct timer_list watchdog_timer;
#define I40EVF_FLAG_ALLMULTI_ON BIT(14)
#define I40EVF_FLAG_LEGACY_RX BIT(15)
#define I40EVF_FLAG_REINIT_ITR_NEEDED BIT(16)
+#define I40EVF_FLAG_QUEUES_DISABLED BIT(17)
/* duplicates for common code */
#define I40E_FLAG_DCB_ENABLED 0
#define I40E_FLAG_RX_CSUM_ENABLED I40EVF_FLAG_RX_CSUM_ENABLED
#define I40EVF_FLAG_AQ_RELEASE_ALLMULTI BIT(18)
#define I40EVF_FLAG_AQ_ENABLE_VLAN_STRIPPING BIT(19)
#define I40EVF_FLAG_AQ_DISABLE_VLAN_STRIPPING BIT(20)
+#define I40EVF_FLAG_AQ_ENABLE_CHANNELS BIT(21)
+#define I40EVF_FLAG_AQ_DISABLE_CHANNELS BIT(22)
+#define I40EVF_FLAG_AQ_ADD_CLOUD_FILTER BIT(23)
+#define I40EVF_FLAG_AQ_DEL_CLOUD_FILTER BIT(24)
/* OS defined structs */
struct net_device *netdev;
u16 rss_lut_size;
u8 *rss_key;
u8 *rss_lut;
+ /* ADQ related members */
+ struct i40evf_channel_config ch_config;
+ u8 num_tc;
+ struct list_head cloud_filter_list;
+ /* lock to protest access to the cloud filter list */
+ spinlock_t cloud_filter_list_lock;
+ u16 num_cloud_filters;
};
void i40evf_notify_client_l2_params(struct i40e_vsi *vsi);
void i40evf_notify_client_open(struct i40e_vsi *vsi);
void i40evf_notify_client_close(struct i40e_vsi *vsi, bool reset);
+void i40evf_enable_channels(struct i40evf_adapter *adapter);
+void i40evf_disable_channels(struct i40evf_adapter *adapter);
+void i40evf_add_cloud_filter(struct i40evf_adapter *adapter);
+void i40evf_del_cloud_filter(struct i40evf_adapter *adapter);
#endif /* _I40EVF_H_ */
rx_ring = &adapter->rx_rings[queue];
tx_ring = &adapter->tx_rings[queue];
- if (ITR_IS_DYNAMIC(rx_ring->rx_itr_setting))
+ if (ITR_IS_DYNAMIC(rx_ring->itr_setting))
ec->use_adaptive_rx_coalesce = 1;
- if (ITR_IS_DYNAMIC(tx_ring->tx_itr_setting))
+ if (ITR_IS_DYNAMIC(tx_ring->itr_setting))
ec->use_adaptive_tx_coalesce = 1;
- ec->rx_coalesce_usecs = rx_ring->rx_itr_setting & ~I40E_ITR_DYNAMIC;
- ec->tx_coalesce_usecs = tx_ring->tx_itr_setting & ~I40E_ITR_DYNAMIC;
+ ec->rx_coalesce_usecs = rx_ring->itr_setting & ~I40E_ITR_DYNAMIC;
+ ec->tx_coalesce_usecs = tx_ring->itr_setting & ~I40E_ITR_DYNAMIC;
return 0;
}
/**
* i40evf_set_itr_per_queue - set ITR values for specific queue
- * @vsi: the VSI to set values for
+ * @adapter: the VF adapter struct to set values for
* @ec: coalesce settings from ethtool
* @queue: the queue to modify
*
{
struct i40e_ring *rx_ring = &adapter->rx_rings[queue];
struct i40e_ring *tx_ring = &adapter->tx_rings[queue];
- struct i40e_vsi *vsi = &adapter->vsi;
- struct i40e_hw *hw = &adapter->hw;
struct i40e_q_vector *q_vector;
- u16 vector;
- rx_ring->rx_itr_setting = ec->rx_coalesce_usecs;
- tx_ring->tx_itr_setting = ec->tx_coalesce_usecs;
+ rx_ring->itr_setting = ITR_REG_ALIGN(ec->rx_coalesce_usecs);
+ tx_ring->itr_setting = ITR_REG_ALIGN(ec->tx_coalesce_usecs);
- rx_ring->rx_itr_setting |= I40E_ITR_DYNAMIC;
+ rx_ring->itr_setting |= I40E_ITR_DYNAMIC;
if (!ec->use_adaptive_rx_coalesce)
- rx_ring->rx_itr_setting ^= I40E_ITR_DYNAMIC;
+ rx_ring->itr_setting ^= I40E_ITR_DYNAMIC;
- tx_ring->tx_itr_setting |= I40E_ITR_DYNAMIC;
+ tx_ring->itr_setting |= I40E_ITR_DYNAMIC;
if (!ec->use_adaptive_tx_coalesce)
- tx_ring->tx_itr_setting ^= I40E_ITR_DYNAMIC;
+ tx_ring->itr_setting ^= I40E_ITR_DYNAMIC;
q_vector = rx_ring->q_vector;
- q_vector->rx.itr = ITR_TO_REG(rx_ring->rx_itr_setting);
- vector = vsi->base_vector + q_vector->v_idx;
- wr32(hw, I40E_VFINT_ITRN1(I40E_RX_ITR, vector - 1), q_vector->rx.itr);
+ q_vector->rx.target_itr = ITR_TO_REG(rx_ring->itr_setting);
q_vector = tx_ring->q_vector;
- q_vector->tx.itr = ITR_TO_REG(tx_ring->tx_itr_setting);
- vector = vsi->base_vector + q_vector->v_idx;
- wr32(hw, I40E_VFINT_ITRN1(I40E_TX_ITR, vector - 1), q_vector->tx.itr);
+ q_vector->tx.target_itr = ITR_TO_REG(tx_ring->itr_setting);
- i40e_flush(hw);
+ /* The interrupt handler itself will take care of programming
+ * the Tx and Rx ITR values based on the values we have entered
+ * into the q_vector, no need to write the values now.
+ */
}
/**
if (ec->rx_coalesce_usecs == 0) {
if (ec->use_adaptive_rx_coalesce)
netif_info(adapter, drv, netdev, "rx-usecs=0, need to disable adaptive-rx for a complete disable\n");
- } else if ((ec->rx_coalesce_usecs < (I40E_MIN_ITR << 1)) ||
- (ec->rx_coalesce_usecs > (I40E_MAX_ITR << 1))) {
+ } else if ((ec->rx_coalesce_usecs < I40E_MIN_ITR) ||
+ (ec->rx_coalesce_usecs > I40E_MAX_ITR)) {
netif_info(adapter, drv, netdev, "Invalid value, rx-usecs range is 0-8160\n");
return -EINVAL;
}
if (ec->tx_coalesce_usecs == 0) {
if (ec->use_adaptive_tx_coalesce)
netif_info(adapter, drv, netdev, "tx-usecs=0, need to disable adaptive-tx for a complete disable\n");
- } else if ((ec->tx_coalesce_usecs < (I40E_MIN_ITR << 1)) ||
- (ec->tx_coalesce_usecs > (I40E_MAX_ITR << 1))) {
+ } else if ((ec->tx_coalesce_usecs < I40E_MIN_ITR) ||
+ (ec->tx_coalesce_usecs > I40E_MAX_ITR)) {
netif_info(adapter, drv, netdev, "Invalid value, tx-usecs range is 0-8160\n");
return -EINVAL;
}
return -EINVAL;
}
+ if ((adapter->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_ADQ) &&
+ adapter->num_tc) {
+ dev_info(&adapter->pdev->dev, "Cannot set channels since ADq is enabled.\n");
+ return -EINVAL;
+ }
+
/* All of these should have already been checked by ethtool before this
* even gets to us, but just to be sure.
*/
rx_ring->vsi = &adapter->vsi;
q_vector->rx.ring = rx_ring;
q_vector->rx.count++;
- q_vector->rx.latency_range = I40E_LOW_LATENCY;
- q_vector->rx.itr = ITR_TO_REG(rx_ring->rx_itr_setting);
+ q_vector->rx.next_update = jiffies + 1;
+ q_vector->rx.target_itr = ITR_TO_REG(rx_ring->itr_setting);
q_vector->ring_mask |= BIT(r_idx);
- q_vector->itr_countdown = ITR_COUNTDOWN_START;
- wr32(hw, I40E_VFINT_ITRN1(I40E_RX_ITR, v_idx - 1), q_vector->rx.itr);
+ wr32(hw, I40E_VFINT_ITRN1(I40E_RX_ITR, q_vector->reg_idx),
+ q_vector->rx.current_itr);
+ q_vector->rx.current_itr = q_vector->rx.target_itr;
}
/**
tx_ring->vsi = &adapter->vsi;
q_vector->tx.ring = tx_ring;
q_vector->tx.count++;
- q_vector->tx.latency_range = I40E_LOW_LATENCY;
- q_vector->tx.itr = ITR_TO_REG(tx_ring->tx_itr_setting);
- q_vector->itr_countdown = ITR_COUNTDOWN_START;
+ q_vector->tx.next_update = jiffies + 1;
+ q_vector->tx.target_itr = ITR_TO_REG(tx_ring->itr_setting);
q_vector->num_ringpairs++;
- wr32(hw, I40E_VFINT_ITRN1(I40E_TX_ITR, v_idx - 1), q_vector->tx.itr);
+ wr32(hw, I40E_VFINT_ITRN1(I40E_TX_ITR, q_vector->reg_idx),
+ q_vector->tx.target_itr);
+ q_vector->tx.current_itr = q_vector->tx.target_itr;
}
/**
**/
static struct
i40evf_mac_filter *i40evf_find_filter(struct i40evf_adapter *adapter,
- u8 *macaddr)
+ const u8 *macaddr)
{
struct i40evf_mac_filter *f;
**/
static struct
i40evf_mac_filter *i40evf_add_filter(struct i40evf_adapter *adapter,
- u8 *macaddr)
+ const u8 *macaddr)
{
struct i40evf_mac_filter *f;
if (!macaddr)
return NULL;
- spin_lock_bh(&adapter->mac_vlan_list_lock);
-
f = i40evf_find_filter(adapter, macaddr);
if (!f) {
f = kzalloc(sizeof(*f), GFP_ATOMIC);
if (!f)
- goto clearout;
+ return f;
ether_addr_copy(f->macaddr, macaddr);
f->remove = false;
}
-clearout:
- spin_unlock_bh(&adapter->mac_vlan_list_lock);
return f;
}
adapter->aq_required |= I40EVF_FLAG_AQ_DEL_MAC_FILTER;
}
+ f = i40evf_add_filter(adapter, addr->sa_data);
+
spin_unlock_bh(&adapter->mac_vlan_list_lock);
- f = i40evf_add_filter(adapter, addr->sa_data);
if (f) {
ether_addr_copy(hw->mac.addr, addr->sa_data);
ether_addr_copy(netdev->dev_addr, adapter->hw.mac.addr);
}
/**
- * i40evf_set_rx_mode - NDO callback to set the netdev filters
- * @netdev: network interface device structure
- **/
-static void i40evf_set_rx_mode(struct net_device *netdev)
+ * i40evf_addr_sync - Callback for dev_(mc|uc)_sync to add address
+ * @netdev: the netdevice
+ * @addr: address to add
+ *
+ * Called by __dev_(mc|uc)_sync when an address needs to be added. We call
+ * __dev_(uc|mc)_sync from .set_rx_mode and guarantee to hold the hash lock.
+ */
+static int i40evf_addr_sync(struct net_device *netdev, const u8 *addr)
{
struct i40evf_adapter *adapter = netdev_priv(netdev);
- struct i40evf_mac_filter *f, *ftmp;
- struct netdev_hw_addr *uca;
- struct netdev_hw_addr *mca;
- struct netdev_hw_addr *ha;
-
- /* add addr if not already in the filter list */
- netdev_for_each_uc_addr(uca, netdev) {
- i40evf_add_filter(adapter, uca->addr);
- }
- netdev_for_each_mc_addr(mca, netdev) {
- i40evf_add_filter(adapter, mca->addr);
- }
- spin_lock_bh(&adapter->mac_vlan_list_lock);
-
- list_for_each_entry_safe(f, ftmp, &adapter->mac_filter_list, list) {
- netdev_for_each_mc_addr(mca, netdev)
- if (ether_addr_equal(mca->addr, f->macaddr))
- goto bottom_of_search_loop;
-
- netdev_for_each_uc_addr(uca, netdev)
- if (ether_addr_equal(uca->addr, f->macaddr))
- goto bottom_of_search_loop;
+ if (i40evf_add_filter(adapter, addr))
+ return 0;
+ else
+ return -ENOMEM;
+}
- for_each_dev_addr(netdev, ha)
- if (ether_addr_equal(ha->addr, f->macaddr))
- goto bottom_of_search_loop;
+/**
+ * i40evf_addr_unsync - Callback for dev_(mc|uc)_sync to remove address
+ * @netdev: the netdevice
+ * @addr: address to add
+ *
+ * Called by __dev_(mc|uc)_sync when an address needs to be removed. We call
+ * __dev_(uc|mc)_sync from .set_rx_mode and guarantee to hold the hash lock.
+ */
+static int i40evf_addr_unsync(struct net_device *netdev, const u8 *addr)
+{
+ struct i40evf_adapter *adapter = netdev_priv(netdev);
+ struct i40evf_mac_filter *f;
- if (ether_addr_equal(f->macaddr, adapter->hw.mac.addr))
- goto bottom_of_search_loop;
+ /* Under some circumstances, we might receive a request to delete
+ * our own device address from our uc list. Because we store the
+ * device address in the VSI's MAC/VLAN filter list, we need to ignore
+ * such requests and not delete our device address from this list.
+ */
+ if (ether_addr_equal(addr, netdev->dev_addr))
+ return 0;
- /* f->macaddr wasn't found in uc, mc, or ha list so delete it */
+ f = i40evf_find_filter(adapter, addr);
+ if (f) {
f->remove = true;
adapter->aq_required |= I40EVF_FLAG_AQ_DEL_MAC_FILTER;
-
-bottom_of_search_loop:
- continue;
}
+ return 0;
+}
+
+/**
+ * i40evf_set_rx_mode - NDO callback to set the netdev filters
+ * @netdev: network interface device structure
+ **/
+static void i40evf_set_rx_mode(struct net_device *netdev)
+{
+ struct i40evf_adapter *adapter = netdev_priv(netdev);
+
+ spin_lock_bh(&adapter->mac_vlan_list_lock);
+ __dev_uc_sync(netdev, i40evf_addr_sync, i40evf_addr_unsync);
+ __dev_mc_sync(netdev, i40evf_addr_sync, i40evf_addr_unsync);
+ spin_unlock_bh(&adapter->mac_vlan_list_lock);
if (netdev->flags & IFF_PROMISC &&
!(adapter->flags & I40EVF_FLAG_PROMISC_ON))
else if (!(netdev->flags & IFF_ALLMULTI) &&
adapter->flags & I40EVF_FLAG_ALLMULTI_ON)
adapter->aq_required |= I40EVF_FLAG_AQ_RELEASE_ALLMULTI;
-
- spin_unlock_bh(&adapter->mac_vlan_list_lock);
}
/**
void i40evf_down(struct i40evf_adapter *adapter)
{
struct net_device *netdev = adapter->netdev;
+ struct i40evf_vlan_filter *vlf;
struct i40evf_mac_filter *f;
+ struct i40evf_cloud_filter *cf;
if (adapter->state <= __I40EVF_DOWN_PENDING)
return;
spin_lock_bh(&adapter->mac_vlan_list_lock);
+ /* clear the sync flag on all filters */
+ __dev_uc_unsync(adapter->netdev, NULL);
+ __dev_mc_unsync(adapter->netdev, NULL);
+
/* remove all MAC filters */
list_for_each_entry(f, &adapter->mac_filter_list, list) {
f->remove = true;
}
+
/* remove all VLAN filters */
- list_for_each_entry(f, &adapter->vlan_filter_list, list) {
- f->remove = true;
+ list_for_each_entry(vlf, &adapter->vlan_filter_list, list) {
+ vlf->remove = true;
}
spin_unlock_bh(&adapter->mac_vlan_list_lock);
+ /* remove all cloud filters */
+ spin_lock_bh(&adapter->cloud_filter_list_lock);
+ list_for_each_entry(cf, &adapter->cloud_filter_list, list) {
+ cf->del = true;
+ }
+ spin_unlock_bh(&adapter->cloud_filter_list_lock);
+
if (!(adapter->flags & I40EVF_FLAG_PF_COMMS_FAILED) &&
adapter->state != __I40EVF_RESETTING) {
/* cancel any current operation */
*/
adapter->aq_required = I40EVF_FLAG_AQ_DEL_MAC_FILTER;
adapter->aq_required |= I40EVF_FLAG_AQ_DEL_VLAN_FILTER;
+ adapter->aq_required |= I40EVF_FLAG_AQ_DEL_CLOUD_FILTER;
adapter->aq_required |= I40EVF_FLAG_AQ_DISABLE_QUEUES;
}
*/
if (adapter->num_req_queues)
num_active_queues = adapter->num_req_queues;
+ else if ((adapter->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_ADQ) &&
+ adapter->num_tc)
+ num_active_queues = adapter->ch_config.total_qps;
else
num_active_queues = min_t(int,
adapter->vsi_res->num_queue_pairs,
tx_ring->netdev = adapter->netdev;
tx_ring->dev = &adapter->pdev->dev;
tx_ring->count = adapter->tx_desc_count;
- tx_ring->tx_itr_setting = I40E_ITR_TX_DEF;
+ tx_ring->itr_setting = I40E_ITR_TX_DEF;
if (adapter->flags & I40EVF_FLAG_WB_ON_ITR_CAPABLE)
tx_ring->flags |= I40E_TXR_FLAGS_WB_ON_ITR;
rx_ring->netdev = adapter->netdev;
rx_ring->dev = &adapter->pdev->dev;
rx_ring->count = adapter->rx_desc_count;
- rx_ring->rx_itr_setting = I40E_ITR_RX_DEF;
+ rx_ring->itr_setting = I40E_ITR_RX_DEF;
}
adapter->num_active_queues = num_active_queues;
goto err_alloc_q_vectors;
}
+ /* If we've made it so far while ADq flag being ON, then we haven't
+ * bailed out anywhere in middle. And ADq isn't just enabled but actual
+ * resources have been allocated in the reset path.
+ * Now we can truly claim that ADq is enabled.
+ */
+ if ((adapter->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_ADQ) &&
+ adapter->num_tc)
+ dev_info(&adapter->pdev->dev, "ADq Enabled, %u TCs created",
+ adapter->num_tc);
+
dev_info(&adapter->pdev->dev, "Multiqueue %s: Queue pair count = %u",
(adapter->num_active_queues > 1) ? "Enabled" : "Disabled",
adapter->num_active_queues);
i40evf_set_promiscuous(adapter, 0);
goto watchdog_done;
}
+
+ if (adapter->aq_required & I40EVF_FLAG_AQ_ENABLE_CHANNELS) {
+ i40evf_enable_channels(adapter);
+ goto watchdog_done;
+ }
+
+ if (adapter->aq_required & I40EVF_FLAG_AQ_DISABLE_CHANNELS) {
+ i40evf_disable_channels(adapter);
+ goto watchdog_done;
+ }
+
+ if (adapter->aq_required & I40EVF_FLAG_AQ_ADD_CLOUD_FILTER) {
+ i40evf_add_cloud_filter(adapter);
+ goto watchdog_done;
+ }
+
+ if (adapter->aq_required & I40EVF_FLAG_AQ_DEL_CLOUD_FILTER) {
+ i40evf_del_cloud_filter(adapter);
+ goto watchdog_done;
+ }
+
schedule_delayed_work(&adapter->client_task, msecs_to_jiffies(5));
if (adapter->state == __I40EVF_RUNNING)
{
struct i40evf_mac_filter *f, *ftmp;
struct i40evf_vlan_filter *fv, *fvtmp;
+ struct i40evf_cloud_filter *cf, *cftmp;
adapter->flags |= I40EVF_FLAG_PF_COMMS_FAILED;
spin_lock_bh(&adapter->mac_vlan_list_lock);
- /* Delete all of the filters, both MAC and VLAN. */
+ /* Delete all of the filters */
list_for_each_entry_safe(f, ftmp, &adapter->mac_filter_list, list) {
list_del(&f->list);
kfree(f);
spin_unlock_bh(&adapter->mac_vlan_list_lock);
+ spin_lock_bh(&adapter->cloud_filter_list_lock);
+ list_for_each_entry_safe(cf, cftmp, &adapter->cloud_filter_list, list) {
+ list_del(&cf->list);
+ kfree(cf);
+ adapter->num_cloud_filters--;
+ }
+ spin_unlock_bh(&adapter->cloud_filter_list_lock);
+
i40evf_free_misc_irq(adapter);
i40evf_reset_interrupt_capability(adapter);
i40evf_free_queues(adapter);
struct i40evf_adapter *adapter = container_of(work,
struct i40evf_adapter,
reset_task);
+ struct virtchnl_vf_resource *vfres = adapter->vf_res;
struct net_device *netdev = adapter->netdev;
struct i40e_hw *hw = &adapter->hw;
struct i40evf_vlan_filter *vlf;
+ struct i40evf_cloud_filter *cf;
struct i40evf_mac_filter *f;
u32 reg_val;
int i = 0, err;
i40evf_free_all_rx_resources(adapter);
i40evf_free_all_tx_resources(adapter);
+ adapter->flags |= I40EVF_FLAG_QUEUES_DISABLED;
/* kill and reinit the admin queue */
i40evf_shutdown_adminq(hw);
adapter->current_op = VIRTCHNL_OP_UNKNOWN;
spin_unlock_bh(&adapter->mac_vlan_list_lock);
+ /* check if TCs are running and re-add all cloud filters */
+ spin_lock_bh(&adapter->cloud_filter_list_lock);
+ if ((vfres->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_ADQ) &&
+ adapter->num_tc) {
+ list_for_each_entry(cf, &adapter->cloud_filter_list, list) {
+ cf->add = true;
+ }
+ }
+ spin_unlock_bh(&adapter->cloud_filter_list_lock);
+
adapter->aq_required |= I40EVF_FLAG_AQ_ADD_MAC_FILTER;
adapter->aq_required |= I40EVF_FLAG_AQ_ADD_VLAN_FILTER;
+ adapter->aq_required |= I40EVF_FLAG_AQ_ADD_CLOUD_FILTER;
i40evf_misc_irq_enable(adapter);
mod_timer(&adapter->watchdog_timer, jiffies + 2);
i40evf_free_rx_resources(&adapter->rx_rings[i]);
}
+/**
+ * i40evf_validate_tx_bandwidth - validate the max Tx bandwidth
+ * @adapter: board private structure
+ * @max_tx_rate: max Tx bw for a tc
+ **/
+static int i40evf_validate_tx_bandwidth(struct i40evf_adapter *adapter,
+ u64 max_tx_rate)
+{
+ int speed = 0, ret = 0;
+
+ switch (adapter->link_speed) {
+ case I40E_LINK_SPEED_40GB:
+ speed = 40000;
+ break;
+ case I40E_LINK_SPEED_25GB:
+ speed = 25000;
+ break;
+ case I40E_LINK_SPEED_20GB:
+ speed = 20000;
+ break;
+ case I40E_LINK_SPEED_10GB:
+ speed = 10000;
+ break;
+ case I40E_LINK_SPEED_1GB:
+ speed = 1000;
+ break;
+ case I40E_LINK_SPEED_100MB:
+ speed = 100;
+ break;
+ default:
+ break;
+ }
+
+ if (max_tx_rate > speed) {
+ dev_err(&adapter->pdev->dev,
+ "Invalid tx rate specified\n");
+ ret = -EINVAL;
+ }
+
+ return ret;
+}
+
+/**
+ * i40evf_validate_channel_config - validate queue mapping info
+ * @adapter: board private structure
+ * @mqprio_qopt: queue parameters
+ *
+ * This function validates if the config provided by the user to
+ * configure queue channels is valid or not. Returns 0 on a valid
+ * config.
+ **/
+static int i40evf_validate_ch_config(struct i40evf_adapter *adapter,
+ struct tc_mqprio_qopt_offload *mqprio_qopt)
+{
+ u64 total_max_rate = 0;
+ int i, num_qps = 0;
+ u64 tx_rate = 0;
+ int ret = 0;
+
+ if (mqprio_qopt->qopt.num_tc > I40EVF_MAX_TRAFFIC_CLASS ||
+ mqprio_qopt->qopt.num_tc < 1)
+ return -EINVAL;
+
+ for (i = 0; i <= mqprio_qopt->qopt.num_tc - 1; i++) {
+ if (!mqprio_qopt->qopt.count[i] ||
+ mqprio_qopt->qopt.offset[i] != num_qps)
+ return -EINVAL;
+ if (mqprio_qopt->min_rate[i]) {
+ dev_err(&adapter->pdev->dev,
+ "Invalid min tx rate (greater than 0) specified\n");
+ return -EINVAL;
+ }
+ /*convert to Mbps */
+ tx_rate = div_u64(mqprio_qopt->max_rate[i],
+ I40EVF_MBPS_DIVISOR);
+ total_max_rate += tx_rate;
+ num_qps += mqprio_qopt->qopt.count[i];
+ }
+ if (num_qps > MAX_QUEUES)
+ return -EINVAL;
+
+ ret = i40evf_validate_tx_bandwidth(adapter, total_max_rate);
+ return ret;
+}
+
+/**
+ * i40evf_del_all_cloud_filters - delete all cloud filters
+ * on the traffic classes
+ **/
+static void i40evf_del_all_cloud_filters(struct i40evf_adapter *adapter)
+{
+ struct i40evf_cloud_filter *cf, *cftmp;
+
+ spin_lock_bh(&adapter->cloud_filter_list_lock);
+ list_for_each_entry_safe(cf, cftmp, &adapter->cloud_filter_list,
+ list) {
+ list_del(&cf->list);
+ kfree(cf);
+ adapter->num_cloud_filters--;
+ }
+ spin_unlock_bh(&adapter->cloud_filter_list_lock);
+}
+
+/**
+ * __i40evf_setup_tc - configure multiple traffic classes
+ * @netdev: network interface device structure
+ * @type_date: tc offload data
+ *
+ * This function processes the config information provided by the
+ * user to configure traffic classes/queue channels and packages the
+ * information to request the PF to setup traffic classes.
+ *
+ * Returns 0 on success.
+ **/
+static int __i40evf_setup_tc(struct net_device *netdev, void *type_data)
+{
+ struct tc_mqprio_qopt_offload *mqprio_qopt = type_data;
+ struct i40evf_adapter *adapter = netdev_priv(netdev);
+ struct virtchnl_vf_resource *vfres = adapter->vf_res;
+ u8 num_tc = 0, total_qps = 0;
+ int ret = 0, netdev_tc = 0;
+ u64 max_tx_rate;
+ u16 mode;
+ int i;
+
+ num_tc = mqprio_qopt->qopt.num_tc;
+ mode = mqprio_qopt->mode;
+
+ /* delete queue_channel */
+ if (!mqprio_qopt->qopt.hw) {
+ if (adapter->ch_config.state == __I40EVF_TC_RUNNING) {
+ /* reset the tc configuration */
+ netdev_reset_tc(netdev);
+ adapter->num_tc = 0;
+ netif_tx_stop_all_queues(netdev);
+ netif_tx_disable(netdev);
+ i40evf_del_all_cloud_filters(adapter);
+ adapter->aq_required = I40EVF_FLAG_AQ_DISABLE_CHANNELS;
+ goto exit;
+ } else {
+ return -EINVAL;
+ }
+ }
+
+ /* add queue channel */
+ if (mode == TC_MQPRIO_MODE_CHANNEL) {
+ if (!(vfres->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_ADQ)) {
+ dev_err(&adapter->pdev->dev, "ADq not supported\n");
+ return -EOPNOTSUPP;
+ }
+ if (adapter->ch_config.state != __I40EVF_TC_INVALID) {
+ dev_err(&adapter->pdev->dev, "TC configuration already exists\n");
+ return -EINVAL;
+ }
+
+ ret = i40evf_validate_ch_config(adapter, mqprio_qopt);
+ if (ret)
+ return ret;
+ /* Return if same TC config is requested */
+ if (adapter->num_tc == num_tc)
+ return 0;
+ adapter->num_tc = num_tc;
+
+ for (i = 0; i < I40EVF_MAX_TRAFFIC_CLASS; i++) {
+ if (i < num_tc) {
+ adapter->ch_config.ch_info[i].count =
+ mqprio_qopt->qopt.count[i];
+ adapter->ch_config.ch_info[i].offset =
+ mqprio_qopt->qopt.offset[i];
+ total_qps += mqprio_qopt->qopt.count[i];
+ max_tx_rate = mqprio_qopt->max_rate[i];
+ /* convert to Mbps */
+ max_tx_rate = div_u64(max_tx_rate,
+ I40EVF_MBPS_DIVISOR);
+ adapter->ch_config.ch_info[i].max_tx_rate =
+ max_tx_rate;
+ } else {
+ adapter->ch_config.ch_info[i].count = 1;
+ adapter->ch_config.ch_info[i].offset = 0;
+ }
+ }
+ adapter->ch_config.total_qps = total_qps;
+ netif_tx_stop_all_queues(netdev);
+ netif_tx_disable(netdev);
+ adapter->aq_required |= I40EVF_FLAG_AQ_ENABLE_CHANNELS;
+ netdev_reset_tc(netdev);
+ /* Report the tc mapping up the stack */
+ netdev_set_num_tc(adapter->netdev, num_tc);
+ for (i = 0; i < I40EVF_MAX_TRAFFIC_CLASS; i++) {
+ u16 qcount = mqprio_qopt->qopt.count[i];
+ u16 qoffset = mqprio_qopt->qopt.offset[i];
+
+ if (i < num_tc)
+ netdev_set_tc_queue(netdev, netdev_tc++, qcount,
+ qoffset);
+ }
+ }
+exit:
+ return ret;
+}
+
+/**
+ * i40evf_parse_cls_flower - Parse tc flower filters provided by kernel
+ * @adapter: board private structure
+ * @cls_flower: pointer to struct tc_cls_flower_offload
+ * @filter: pointer to cloud filter structure
+ */
+static int i40evf_parse_cls_flower(struct i40evf_adapter *adapter,
+ struct tc_cls_flower_offload *f,
+ struct i40evf_cloud_filter *filter)
+{
+ u16 n_proto_mask = 0;
+ u16 n_proto_key = 0;
+ u8 field_flags = 0;
+ u16 addr_type = 0;
+ u16 n_proto = 0;
+ int i = 0;
+ struct virtchnl_filter *vf = &filter->f;
+
+ if (f->dissector->used_keys &
+ ~(BIT(FLOW_DISSECTOR_KEY_CONTROL) |
+ BIT(FLOW_DISSECTOR_KEY_BASIC) |
+ BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS) |
+ BIT(FLOW_DISSECTOR_KEY_VLAN) |
+ BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS) |
+ BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS) |
+ BIT(FLOW_DISSECTOR_KEY_PORTS) |
+ BIT(FLOW_DISSECTOR_KEY_ENC_KEYID))) {
+ dev_err(&adapter->pdev->dev, "Unsupported key used: 0x%x\n",
+ f->dissector->used_keys);
+ return -EOPNOTSUPP;
+ }
+
+ if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_KEYID)) {
+ struct flow_dissector_key_keyid *mask =
+ skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_ENC_KEYID,
+ f->mask);
+
+ if (mask->keyid != 0)
+ field_flags |= I40EVF_CLOUD_FIELD_TEN_ID;
+ }
+
+ if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_BASIC)) {
+ struct flow_dissector_key_basic *key =
+ skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_BASIC,
+ f->key);
+
+ struct flow_dissector_key_basic *mask =
+ skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_BASIC,
+ f->mask);
+ n_proto_key = ntohs(key->n_proto);
+ n_proto_mask = ntohs(mask->n_proto);
+
+ if (n_proto_key == ETH_P_ALL) {
+ n_proto_key = 0;
+ n_proto_mask = 0;
+ }
+ n_proto = n_proto_key & n_proto_mask;
+ if (n_proto != ETH_P_IP && n_proto != ETH_P_IPV6)
+ return -EINVAL;
+ if (n_proto == ETH_P_IPV6) {
+ /* specify flow type as TCP IPv6 */
+ vf->flow_type = VIRTCHNL_TCP_V6_FLOW;
+ }
+
+ if (key->ip_proto != IPPROTO_TCP) {
+ dev_info(&adapter->pdev->dev, "Only TCP transport is supported\n");
+ return -EINVAL;
+ }
+ }
+
+ if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
+ struct flow_dissector_key_eth_addrs *key =
+ skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_ETH_ADDRS,
+ f->key);
+
+ struct flow_dissector_key_eth_addrs *mask =
+ skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_ETH_ADDRS,
+ f->mask);
+ /* use is_broadcast and is_zero to check for all 0xf or 0 */
+ if (!is_zero_ether_addr(mask->dst)) {
+ if (is_broadcast_ether_addr(mask->dst)) {
+ field_flags |= I40EVF_CLOUD_FIELD_OMAC;
+ } else {
+ dev_err(&adapter->pdev->dev, "Bad ether dest mask %pM\n",
+ mask->dst);
+ return I40E_ERR_CONFIG;
+ }
+ }
+
+ if (!is_zero_ether_addr(mask->src)) {
+ if (is_broadcast_ether_addr(mask->src)) {
+ field_flags |= I40EVF_CLOUD_FIELD_IMAC;
+ } else {
+ dev_err(&adapter->pdev->dev, "Bad ether src mask %pM\n",
+ mask->src);
+ return I40E_ERR_CONFIG;
+ }
+ }
+
+ if (!is_zero_ether_addr(key->dst))
+ if (is_valid_ether_addr(key->dst) ||
+ is_multicast_ether_addr(key->dst)) {
+ /* set the mask if a valid dst_mac address */
+ for (i = 0; i < ETH_ALEN; i++)
+ vf->mask.tcp_spec.dst_mac[i] |= 0xff;
+ ether_addr_copy(vf->data.tcp_spec.dst_mac,
+ key->dst);
+ }
+
+ if (!is_zero_ether_addr(key->src))
+ if (is_valid_ether_addr(key->src) ||
+ is_multicast_ether_addr(key->src)) {
+ /* set the mask if a valid dst_mac address */
+ for (i = 0; i < ETH_ALEN; i++)
+ vf->mask.tcp_spec.src_mac[i] |= 0xff;
+ ether_addr_copy(vf->data.tcp_spec.src_mac,
+ key->src);
+ }
+ }
+
+ if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_VLAN)) {
+ struct flow_dissector_key_vlan *key =
+ skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_VLAN,
+ f->key);
+ struct flow_dissector_key_vlan *mask =
+ skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_VLAN,
+ f->mask);
+
+ if (mask->vlan_id) {
+ if (mask->vlan_id == VLAN_VID_MASK) {
+ field_flags |= I40EVF_CLOUD_FIELD_IVLAN;
+ } else {
+ dev_err(&adapter->pdev->dev, "Bad vlan mask %u\n",
+ mask->vlan_id);
+ return I40E_ERR_CONFIG;
+ }
+ }
+ vf->mask.tcp_spec.vlan_id |= cpu_to_be16(0xffff);
+ vf->data.tcp_spec.vlan_id = cpu_to_be16(key->vlan_id);
+ }
+
+ if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_CONTROL)) {
+ struct flow_dissector_key_control *key =
+ skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_CONTROL,
+ f->key);
+
+ addr_type = key->addr_type;
+ }
+
+ if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
+ struct flow_dissector_key_ipv4_addrs *key =
+ skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_IPV4_ADDRS,
+ f->key);
+ struct flow_dissector_key_ipv4_addrs *mask =
+ skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_IPV4_ADDRS,
+ f->mask);
+
+ if (mask->dst) {
+ if (mask->dst == cpu_to_be32(0xffffffff)) {
+ field_flags |= I40EVF_CLOUD_FIELD_IIP;
+ } else {
+ dev_err(&adapter->pdev->dev, "Bad ip dst mask 0x%08x\n",
+ be32_to_cpu(mask->dst));
+ return I40E_ERR_CONFIG;
+ }
+ }
+
+ if (mask->src) {
+ if (mask->src == cpu_to_be32(0xffffffff)) {
+ field_flags |= I40EVF_CLOUD_FIELD_IIP;
+ } else {
+ dev_err(&adapter->pdev->dev, "Bad ip src mask 0x%08x\n",
+ be32_to_cpu(mask->dst));
+ return I40E_ERR_CONFIG;
+ }
+ }
+
+ if (field_flags & I40EVF_CLOUD_FIELD_TEN_ID) {
+ dev_info(&adapter->pdev->dev, "Tenant id not allowed for ip filter\n");
+ return I40E_ERR_CONFIG;
+ }
+ if (key->dst) {
+ vf->mask.tcp_spec.dst_ip[0] |= cpu_to_be32(0xffffffff);
+ vf->data.tcp_spec.dst_ip[0] = key->dst;
+ }
+ if (key->src) {
+ vf->mask.tcp_spec.src_ip[0] |= cpu_to_be32(0xffffffff);
+ vf->data.tcp_spec.src_ip[0] = key->src;
+ }
+ }
+
+ if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
+ struct flow_dissector_key_ipv6_addrs *key =
+ skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_IPV6_ADDRS,
+ f->key);
+ struct flow_dissector_key_ipv6_addrs *mask =
+ skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_IPV6_ADDRS,
+ f->mask);
+
+ /* validate mask, make sure it is not IPV6_ADDR_ANY */
+ if (ipv6_addr_any(&mask->dst)) {
+ dev_err(&adapter->pdev->dev, "Bad ipv6 dst mask 0x%02x\n",
+ IPV6_ADDR_ANY);
+ return I40E_ERR_CONFIG;
+ }
+
+ /* src and dest IPv6 address should not be LOOPBACK
+ * (0:0:0:0:0:0:0:1) which can be represented as ::1
+ */
+ if (ipv6_addr_loopback(&key->dst) ||
+ ipv6_addr_loopback(&key->src)) {
+ dev_err(&adapter->pdev->dev,
+ "ipv6 addr should not be loopback\n");
+ return I40E_ERR_CONFIG;
+ }
+ if (!ipv6_addr_any(&mask->dst) || !ipv6_addr_any(&mask->src))
+ field_flags |= I40EVF_CLOUD_FIELD_IIP;
+
+ for (i = 0; i < 4; i++)
+ vf->mask.tcp_spec.dst_ip[i] |= cpu_to_be32(0xffffffff);
+ memcpy(&vf->data.tcp_spec.dst_ip, &key->dst.s6_addr32,
+ sizeof(vf->data.tcp_spec.dst_ip));
+ for (i = 0; i < 4; i++)
+ vf->mask.tcp_spec.src_ip[i] |= cpu_to_be32(0xffffffff);
+ memcpy(&vf->data.tcp_spec.src_ip, &key->src.s6_addr32,
+ sizeof(vf->data.tcp_spec.src_ip));
+ }
+ if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_PORTS)) {
+ struct flow_dissector_key_ports *key =
+ skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_PORTS,
+ f->key);
+ struct flow_dissector_key_ports *mask =
+ skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_PORTS,
+ f->mask);
+
+ if (mask->src) {
+ if (mask->src == cpu_to_be16(0xffff)) {
+ field_flags |= I40EVF_CLOUD_FIELD_IIP;
+ } else {
+ dev_err(&adapter->pdev->dev, "Bad src port mask %u\n",
+ be16_to_cpu(mask->src));
+ return I40E_ERR_CONFIG;
+ }
+ }
+
+ if (mask->dst) {
+ if (mask->dst == cpu_to_be16(0xffff)) {
+ field_flags |= I40EVF_CLOUD_FIELD_IIP;
+ } else {
+ dev_err(&adapter->pdev->dev, "Bad dst port mask %u\n",
+ be16_to_cpu(mask->dst));
+ return I40E_ERR_CONFIG;
+ }
+ }
+ if (key->dst) {
+ vf->mask.tcp_spec.dst_port |= cpu_to_be16(0xffff);
+ vf->data.tcp_spec.dst_port = key->dst;
+ }
+
+ if (key->src) {
+ vf->mask.tcp_spec.src_port |= cpu_to_be16(0xffff);
+ vf->data.tcp_spec.src_port = key->dst;
+ }
+ }
+ vf->field_flags = field_flags;
+
+ return 0;
+}
+
+/**
+ * i40evf_handle_tclass - Forward to a traffic class on the device
+ * @adapter: board private structure
+ * @tc: traffic class index on the device
+ * @filter: pointer to cloud filter structure
+ */
+static int i40evf_handle_tclass(struct i40evf_adapter *adapter, u32 tc,
+ struct i40evf_cloud_filter *filter)
+{
+ if (tc == 0)
+ return 0;
+ if (tc < adapter->num_tc) {
+ if (!filter->f.data.tcp_spec.dst_port) {
+ dev_err(&adapter->pdev->dev,
+ "Specify destination port to redirect to traffic class other than TC0\n");
+ return -EINVAL;
+ }
+ }
+ /* redirect to a traffic class on the same device */
+ filter->f.action = VIRTCHNL_ACTION_TC_REDIRECT;
+ filter->f.action_meta = tc;
+ return 0;
+}
+
+/**
+ * i40evf_configure_clsflower - Add tc flower filters
+ * @adapter: board private structure
+ * @cls_flower: Pointer to struct tc_cls_flower_offload
+ */
+static int i40evf_configure_clsflower(struct i40evf_adapter *adapter,
+ struct tc_cls_flower_offload *cls_flower)
+{
+ int tc = tc_classid_to_hwtc(adapter->netdev, cls_flower->classid);
+ struct i40evf_cloud_filter *filter = NULL;
+ int err = 0, count = 50;
+
+ while (test_and_set_bit(__I40EVF_IN_CRITICAL_TASK,
+ &adapter->crit_section)) {
+ udelay(1);
+ if (--count == 0)
+ return -EINVAL;
+ }
+
+ if (tc < 0) {
+ dev_err(&adapter->pdev->dev, "Invalid traffic class\n");
+ return -EINVAL;
+ }
+
+ filter = kzalloc(sizeof(*filter), GFP_KERNEL);
+ if (!filter) {
+ err = -ENOMEM;
+ goto clearout;
+ }
+ filter->cookie = cls_flower->cookie;
+
+ /* set the mask to all zeroes to begin with */
+ memset(&filter->f.mask.tcp_spec, 0, sizeof(struct virtchnl_l4_spec));
+ /* start out with flow type and eth type IPv4 to begin with */
+ filter->f.flow_type = VIRTCHNL_TCP_V4_FLOW;
+ err = i40evf_parse_cls_flower(adapter, cls_flower, filter);
+ if (err < 0)
+ goto err;
+
+ err = i40evf_handle_tclass(adapter, tc, filter);
+ if (err < 0)
+ goto err;
+
+ /* add filter to the list */
+ spin_lock_bh(&adapter->cloud_filter_list_lock);
+ list_add_tail(&filter->list, &adapter->cloud_filter_list);
+ adapter->num_cloud_filters++;
+ filter->add = true;
+ adapter->aq_required |= I40EVF_FLAG_AQ_ADD_CLOUD_FILTER;
+ spin_unlock_bh(&adapter->cloud_filter_list_lock);
+err:
+ if (err)
+ kfree(filter);
+clearout:
+ clear_bit(__I40EVF_IN_CRITICAL_TASK, &adapter->crit_section);
+ return err;
+}
+
+/* i40evf_find_cf - Find the cloud filter in the list
+ * @adapter: Board private structure
+ * @cookie: filter specific cookie
+ *
+ * Returns ptr to the filter object or NULL. Must be called while holding the
+ * cloud_filter_list_lock.
+ */
+static struct i40evf_cloud_filter *i40evf_find_cf(struct i40evf_adapter *adapter,
+ unsigned long *cookie)
+{
+ struct i40evf_cloud_filter *filter = NULL;
+
+ if (!cookie)
+ return NULL;
+
+ list_for_each_entry(filter, &adapter->cloud_filter_list, list) {
+ if (!memcmp(cookie, &filter->cookie, sizeof(filter->cookie)))
+ return filter;
+ }
+ return NULL;
+}
+
+/**
+ * i40evf_delete_clsflower - Remove tc flower filters
+ * @adapter: board private structure
+ * @cls_flower: Pointer to struct tc_cls_flower_offload
+ */
+static int i40evf_delete_clsflower(struct i40evf_adapter *adapter,
+ struct tc_cls_flower_offload *cls_flower)
+{
+ struct i40evf_cloud_filter *filter = NULL;
+ int err = 0;
+
+ spin_lock_bh(&adapter->cloud_filter_list_lock);
+ filter = i40evf_find_cf(adapter, &cls_flower->cookie);
+ if (filter) {
+ filter->del = true;
+ adapter->aq_required |= I40EVF_FLAG_AQ_DEL_CLOUD_FILTER;
+ } else {
+ err = -EINVAL;
+ }
+ spin_unlock_bh(&adapter->cloud_filter_list_lock);
+
+ return err;
+}
+
+/**
+ * i40evf_setup_tc_cls_flower - flower classifier offloads
+ * @netdev: net device to configure
+ * @type_data: offload data
+ */
+static int i40evf_setup_tc_cls_flower(struct i40evf_adapter *adapter,
+ struct tc_cls_flower_offload *cls_flower)
+{
+ if (cls_flower->common.chain_index)
+ return -EOPNOTSUPP;
+
+ switch (cls_flower->command) {
+ case TC_CLSFLOWER_REPLACE:
+ return i40evf_configure_clsflower(adapter, cls_flower);
+ case TC_CLSFLOWER_DESTROY:
+ return i40evf_delete_clsflower(adapter, cls_flower);
+ case TC_CLSFLOWER_STATS:
+ return -EOPNOTSUPP;
+ default:
+ return -EINVAL;
+ }
+}
+
+/**
+ * i40evf_setup_tc_block_cb - block callback for tc
+ * @type: type of offload
+ * @type_data: offload data
+ * @cb_priv:
+ *
+ * This function is the block callback for traffic classes
+ **/
+static int i40evf_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
+ void *cb_priv)
+{
+ switch (type) {
+ case TC_SETUP_CLSFLOWER:
+ return i40evf_setup_tc_cls_flower(cb_priv, type_data);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+/**
+ * i40evf_setup_tc_block - register callbacks for tc
+ * @netdev: network interface device structure
+ * @f: tc offload data
+ *
+ * This function registers block callbacks for tc
+ * offloads
+ **/
+static int i40evf_setup_tc_block(struct net_device *dev,
+ struct tc_block_offload *f)
+{
+ struct i40evf_adapter *adapter = netdev_priv(dev);
+
+ if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
+ return -EOPNOTSUPP;
+
+ switch (f->command) {
+ case TC_BLOCK_BIND:
+ return tcf_block_cb_register(f->block, i40evf_setup_tc_block_cb,
+ adapter, adapter);
+ case TC_BLOCK_UNBIND:
+ tcf_block_cb_unregister(f->block, i40evf_setup_tc_block_cb,
+ adapter);
+ return 0;
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+/**
+ * i40evf_setup_tc - configure multiple traffic classes
+ * @netdev: network interface device structure
+ * @type: type of offload
+ * @type_date: tc offload data
+ *
+ * This function is the callback to ndo_setup_tc in the
+ * netdev_ops.
+ *
+ * Returns 0 on success
+ **/
+static int i40evf_setup_tc(struct net_device *netdev, enum tc_setup_type type,
+ void *type_data)
+{
+ switch (type) {
+ case TC_SETUP_QDISC_MQPRIO:
+ return __i40evf_setup_tc(netdev, type_data);
+ case TC_SETUP_BLOCK:
+ return i40evf_setup_tc_block(netdev, type_data);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
/**
* i40evf_open - Called when a network interface is made active
* @netdev: network interface device structure
if (err)
goto err_req_irq;
+ spin_lock_bh(&adapter->mac_vlan_list_lock);
+
i40evf_add_filter(adapter, adapter->hw.mac.addr);
+
+ spin_unlock_bh(&adapter->mac_vlan_list_lock);
+
i40evf_configure(adapter);
i40evf_up_complete(adapter);
#ifdef CONFIG_NET_POLL_CONTROLLER
.ndo_poll_controller = i40evf_netpoll,
#endif
+ .ndo_setup_tc = i40evf_setup_tc,
};
/**
if (vfres->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_VLAN)
hw_features |= (NETIF_F_HW_VLAN_CTAG_TX |
NETIF_F_HW_VLAN_CTAG_RX);
+ /* Enable cloud filter if ADQ is supported */
+ if (vfres->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_ADQ)
+ hw_features |= NETIF_F_HW_TC;
netdev->hw_features |= hw_features;
mutex_init(&hw->aq.arq_mutex);
spin_lock_init(&adapter->mac_vlan_list_lock);
+ spin_lock_init(&adapter->cloud_filter_list_lock);
INIT_LIST_HEAD(&adapter->mac_filter_list);
INIT_LIST_HEAD(&adapter->vlan_filter_list);
+ INIT_LIST_HEAD(&adapter->cloud_filter_list);
INIT_WORK(&adapter->reset_task, i40evf_reset_task);
INIT_WORK(&adapter->adminq_task, i40evf_adminq_task);
{
struct net_device *netdev = pci_get_drvdata(pdev);
struct i40evf_adapter *adapter = netdev_priv(netdev);
+ struct i40evf_vlan_filter *vlf, *vlftmp;
struct i40evf_mac_filter *f, *ftmp;
+ struct i40evf_cloud_filter *cf, *cftmp;
struct i40e_hw *hw = &adapter->hw;
int err;
/* Indicate we are in remove and not to run reset_task */
/* Shut down all the garbage mashers on the detention level */
adapter->state = __I40EVF_REMOVE;
adapter->aq_required = 0;
+ adapter->flags &= ~I40EVF_FLAG_REINIT_ITR_NEEDED;
i40evf_request_reset(adapter);
msleep(50);
/* If the FW isn't responding, kick it once, but only once. */
list_del(&f->list);
kfree(f);
}
- list_for_each_entry_safe(f, ftmp, &adapter->vlan_filter_list, list) {
- list_del(&f->list);
- kfree(f);
+ list_for_each_entry_safe(vlf, vlftmp, &adapter->vlan_filter_list,
+ list) {
+ list_del(&vlf->list);
+ kfree(vlf);
}
spin_unlock_bh(&adapter->mac_vlan_list_lock);
+ spin_lock_bh(&adapter->cloud_filter_list_lock);
+ list_for_each_entry_safe(cf, cftmp, &adapter->cloud_filter_list, list) {
+ list_del(&cf->list);
+ kfree(cf);
+ }
+ spin_unlock_bh(&adapter->cloud_filter_list_lock);
+
free_netdev(netdev);
pci_disable_pcie_error_reporting(pdev);
VIRTCHNL_VF_OFFLOAD_RSS_PCTYPE_V2 |
VIRTCHNL_VF_OFFLOAD_ENCAP |
VIRTCHNL_VF_OFFLOAD_ENCAP_CSUM |
- VIRTCHNL_VF_OFFLOAD_REQ_QUEUES;
+ VIRTCHNL_VF_OFFLOAD_REQ_QUEUES |
+ VIRTCHNL_VF_OFFLOAD_ADQ;
adapter->current_op = VIRTCHNL_OP_GET_VF_RESOURCES;
adapter->aq_required &= ~I40EVF_FLAG_AQ_GET_CONFIG;
void i40evf_map_queues(struct i40evf_adapter *adapter)
{
struct virtchnl_irq_map_info *vimi;
+ struct virtchnl_vector_map *vecmap;
int v_idx, q_vectors, len;
struct i40e_q_vector *q_vector;
vimi->num_vectors = adapter->num_msix_vectors;
/* Queue vectors first */
for (v_idx = 0; v_idx < q_vectors; v_idx++) {
- q_vector = adapter->q_vectors + v_idx;
- vimi->vecmap[v_idx].vsi_id = adapter->vsi_res->vsi_id;
- vimi->vecmap[v_idx].vector_id = v_idx + NONQ_VECS;
- vimi->vecmap[v_idx].txq_map = q_vector->ring_mask;
- vimi->vecmap[v_idx].rxq_map = q_vector->ring_mask;
+ q_vector = &adapter->q_vectors[v_idx];
+ vecmap = &vimi->vecmap[v_idx];
+
+ vecmap->vsi_id = adapter->vsi_res->vsi_id;
+ vecmap->vector_id = v_idx + NONQ_VECS;
+ vecmap->txq_map = q_vector->ring_mask;
+ vecmap->rxq_map = q_vector->ring_mask;
+ vecmap->rxitr_idx = I40E_RX_ITR;
+ vecmap->txitr_idx = I40E_TX_ITR;
}
/* Misc vector last - this is only for AdminQ messages */
- vimi->vecmap[v_idx].vsi_id = adapter->vsi_res->vsi_id;
- vimi->vecmap[v_idx].vector_id = 0;
- vimi->vecmap[v_idx].txq_map = 0;
- vimi->vecmap[v_idx].rxq_map = 0;
+ vecmap = &vimi->vecmap[v_idx];
+ vecmap->vsi_id = adapter->vsi_res->vsi_id;
+ vecmap->vector_id = 0;
+ vecmap->txq_map = 0;
+ vecmap->rxq_map = 0;
adapter->aq_required &= ~I40EVF_FLAG_AQ_MAP_VECTORS;
i40evf_send_pf_msg(adapter, VIRTCHNL_OP_CONFIG_IRQ_MAP,
more = true;
}
- veal = kzalloc(len, GFP_KERNEL);
+ veal = kzalloc(len, GFP_ATOMIC);
if (!veal) {
spin_unlock_bh(&adapter->mac_vlan_list_lock);
return;
(count * sizeof(struct virtchnl_ether_addr));
more = true;
}
- veal = kzalloc(len, GFP_KERNEL);
+ veal = kzalloc(len, GFP_ATOMIC);
if (!veal) {
spin_unlock_bh(&adapter->mac_vlan_list_lock);
return;
(count * sizeof(u16));
more = true;
}
- vvfl = kzalloc(len, GFP_KERNEL);
+ vvfl = kzalloc(len, GFP_ATOMIC);
if (!vvfl) {
spin_unlock_bh(&adapter->mac_vlan_list_lock);
return;
(count * sizeof(u16));
more = true;
}
- vvfl = kzalloc(len, GFP_KERNEL);
+ vvfl = kzalloc(len, GFP_ATOMIC);
if (!vvfl) {
spin_unlock_bh(&adapter->mac_vlan_list_lock);
return;
netdev_info(netdev, "NIC Link is Up %sbps Full Duplex\n", speed);
}
+/**
+ * i40evf_enable_channel
+ * @adapter: adapter structure
+ *
+ * Request that the PF enable channels as specified by
+ * the user via tc tool.
+ **/
+void i40evf_enable_channels(struct i40evf_adapter *adapter)
+{
+ struct virtchnl_tc_info *vti = NULL;
+ u16 len;
+ int i;
+
+ if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) {
+ /* bail because we already have a command pending */
+ dev_err(&adapter->pdev->dev, "Cannot configure mqprio, command %d pending\n",
+ adapter->current_op);
+ return;
+ }
+
+ len = (adapter->num_tc * sizeof(struct virtchnl_channel_info)) +
+ sizeof(struct virtchnl_tc_info);
+
+ vti = kzalloc(len, GFP_KERNEL);
+ if (!vti)
+ return;
+ vti->num_tc = adapter->num_tc;
+ for (i = 0; i < vti->num_tc; i++) {
+ vti->list[i].count = adapter->ch_config.ch_info[i].count;
+ vti->list[i].offset = adapter->ch_config.ch_info[i].offset;
+ vti->list[i].pad = 0;
+ vti->list[i].max_tx_rate =
+ adapter->ch_config.ch_info[i].max_tx_rate;
+ }
+
+ adapter->ch_config.state = __I40EVF_TC_RUNNING;
+ adapter->flags |= I40EVF_FLAG_REINIT_ITR_NEEDED;
+ adapter->current_op = VIRTCHNL_OP_ENABLE_CHANNELS;
+ adapter->aq_required &= ~I40EVF_FLAG_AQ_ENABLE_CHANNELS;
+ i40evf_send_pf_msg(adapter, VIRTCHNL_OP_ENABLE_CHANNELS,
+ (u8 *)vti, len);
+ kfree(vti);
+}
+
+/**
+ * i40evf_disable_channel
+ * @adapter: adapter structure
+ *
+ * Request that the PF disable channels that are configured
+ **/
+void i40evf_disable_channels(struct i40evf_adapter *adapter)
+{
+ if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) {
+ /* bail because we already have a command pending */
+ dev_err(&adapter->pdev->dev, "Cannot configure mqprio, command %d pending\n",
+ adapter->current_op);
+ return;
+ }
+
+ adapter->ch_config.state = __I40EVF_TC_INVALID;
+ adapter->flags |= I40EVF_FLAG_REINIT_ITR_NEEDED;
+ adapter->current_op = VIRTCHNL_OP_DISABLE_CHANNELS;
+ adapter->aq_required &= ~I40EVF_FLAG_AQ_DISABLE_CHANNELS;
+ i40evf_send_pf_msg(adapter, VIRTCHNL_OP_DISABLE_CHANNELS,
+ NULL, 0);
+}
+
+/**
+ * i40evf_print_cloud_filter
+ * @adapter: adapter structure
+ * @f: cloud filter to print
+ *
+ * Print the cloud filter
+ **/
+static void i40evf_print_cloud_filter(struct i40evf_adapter *adapter,
+ struct virtchnl_filter *f)
+{
+ switch (f->flow_type) {
+ case VIRTCHNL_TCP_V4_FLOW:
+ dev_info(&adapter->pdev->dev, "dst_mac: %pM src_mac: %pM vlan_id: %hu dst_ip: %pI4 src_ip %pI4 dst_port %hu src_port %hu\n",
+ &f->data.tcp_spec.dst_mac,
+ &f->data.tcp_spec.src_mac,
+ ntohs(f->data.tcp_spec.vlan_id),
+ &f->data.tcp_spec.dst_ip[0],
+ &f->data.tcp_spec.src_ip[0],
+ ntohs(f->data.tcp_spec.dst_port),
+ ntohs(f->data.tcp_spec.src_port));
+ break;
+ case VIRTCHNL_TCP_V6_FLOW:
+ dev_info(&adapter->pdev->dev, "dst_mac: %pM src_mac: %pM vlan_id: %hu dst_ip: %pI6 src_ip %pI6 dst_port %hu src_port %hu\n",
+ &f->data.tcp_spec.dst_mac,
+ &f->data.tcp_spec.src_mac,
+ ntohs(f->data.tcp_spec.vlan_id),
+ &f->data.tcp_spec.dst_ip,
+ &f->data.tcp_spec.src_ip,
+ ntohs(f->data.tcp_spec.dst_port),
+ ntohs(f->data.tcp_spec.src_port));
+ break;
+ }
+}
+
+/**
+ * i40evf_add_cloud_filter
+ * @adapter: adapter structure
+ *
+ * Request that the PF add cloud filters as specified
+ * by the user via tc tool.
+ **/
+void i40evf_add_cloud_filter(struct i40evf_adapter *adapter)
+{
+ struct i40evf_cloud_filter *cf;
+ struct virtchnl_filter *f;
+ int len = 0, count = 0;
+
+ if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) {
+ /* bail because we already have a command pending */
+ dev_err(&adapter->pdev->dev, "Cannot add cloud filter, command %d pending\n",
+ adapter->current_op);
+ return;
+ }
+ list_for_each_entry(cf, &adapter->cloud_filter_list, list) {
+ if (cf->add) {
+ count++;
+ break;
+ }
+ }
+ if (!count) {
+ adapter->aq_required &= ~I40EVF_FLAG_AQ_ADD_CLOUD_FILTER;
+ return;
+ }
+ adapter->current_op = VIRTCHNL_OP_ADD_CLOUD_FILTER;
+
+ len = sizeof(struct virtchnl_filter);
+ f = kzalloc(len, GFP_KERNEL);
+ if (!f)
+ return;
+
+ list_for_each_entry(cf, &adapter->cloud_filter_list, list) {
+ if (cf->add) {
+ memcpy(f, &cf->f, sizeof(struct virtchnl_filter));
+ cf->add = false;
+ cf->state = __I40EVF_CF_ADD_PENDING;
+ i40evf_send_pf_msg(adapter,
+ VIRTCHNL_OP_ADD_CLOUD_FILTER,
+ (u8 *)f, len);
+ }
+ }
+ kfree(f);
+}
+
+/**
+ * i40evf_del_cloud_filter
+ * @adapter: adapter structure
+ *
+ * Request that the PF delete cloud filters as specified
+ * by the user via tc tool.
+ **/
+void i40evf_del_cloud_filter(struct i40evf_adapter *adapter)
+{
+ struct i40evf_cloud_filter *cf, *cftmp;
+ struct virtchnl_filter *f;
+ int len = 0, count = 0;
+
+ if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) {
+ /* bail because we already have a command pending */
+ dev_err(&adapter->pdev->dev, "Cannot remove cloud filter, command %d pending\n",
+ adapter->current_op);
+ return;
+ }
+ list_for_each_entry(cf, &adapter->cloud_filter_list, list) {
+ if (cf->del) {
+ count++;
+ break;
+ }
+ }
+ if (!count) {
+ adapter->aq_required &= ~I40EVF_FLAG_AQ_DEL_CLOUD_FILTER;
+ return;
+ }
+ adapter->current_op = VIRTCHNL_OP_DEL_CLOUD_FILTER;
+
+ len = sizeof(struct virtchnl_filter);
+ f = kzalloc(len, GFP_KERNEL);
+ if (!f)
+ return;
+
+ list_for_each_entry_safe(cf, cftmp, &adapter->cloud_filter_list, list) {
+ if (cf->del) {
+ memcpy(f, &cf->f, sizeof(struct virtchnl_filter));
+ cf->del = false;
+ cf->state = __I40EVF_CF_DEL_PENDING;
+ i40evf_send_pf_msg(adapter,
+ VIRTCHNL_OP_DEL_CLOUD_FILTER,
+ (u8 *)f, len);
+ }
+ }
+ kfree(f);
+}
+
/**
* i40evf_request_reset
* @adapter: adapter structure
if (adapter->link_up == link_up)
break;
- /* If we get link up message and start queues before
- * our queues are configured it will trigger a TX hang.
- * In that case, just ignore the link status message,
- * we'll get another one after we enable queues and
- * actually prepared to send traffic.
- */
- if (link_up && adapter->state != __I40EVF_RUNNING)
- break;
+ if (link_up) {
+ /* If we get link up message and start queues
+ * before our queues are configured it will
+ * trigger a TX hang. In that case, just ignore
+ * the link status message,we'll get another one
+ * after we enable queues and actually prepared
+ * to send traffic.
+ */
+ if (adapter->state != __I40EVF_RUNNING)
+ break;
+
+ /* For ADq enabled VF, we reconfigure VSIs and
+ * re-allocate queues. Hence wait till all
+ * queues are enabled.
+ */
+ if (adapter->flags &
+ I40EVF_FLAG_QUEUES_DISABLED)
+ break;
+ }
adapter->link_up = link_up;
if (link_up) {
i40evf_print_link_message(adapter);
break;
case VIRTCHNL_EVENT_RESET_IMPENDING:
- dev_info(&adapter->pdev->dev, "PF reset warning received\n");
+ dev_info(&adapter->pdev->dev, "Reset warning received from the PF\n");
if (!(adapter->flags & I40EVF_FLAG_RESET_PENDING)) {
adapter->flags |= I40EVF_FLAG_RESET_PENDING;
dev_info(&adapter->pdev->dev, "Scheduling reset task\n");
dev_err(&adapter->pdev->dev, "Failed to delete MAC filter, error %s\n",
i40evf_stat_str(&adapter->hw, v_retval));
break;
+ case VIRTCHNL_OP_ENABLE_CHANNELS:
+ dev_err(&adapter->pdev->dev, "Failed to configure queue channels, error %s\n",
+ i40evf_stat_str(&adapter->hw, v_retval));
+ adapter->flags &= ~I40EVF_FLAG_REINIT_ITR_NEEDED;
+ adapter->ch_config.state = __I40EVF_TC_INVALID;
+ netdev_reset_tc(netdev);
+ netif_tx_start_all_queues(netdev);
+ break;
+ case VIRTCHNL_OP_DISABLE_CHANNELS:
+ dev_err(&adapter->pdev->dev, "Failed to disable queue channels, error %s\n",
+ i40evf_stat_str(&adapter->hw, v_retval));
+ adapter->flags &= ~I40EVF_FLAG_REINIT_ITR_NEEDED;
+ adapter->ch_config.state = __I40EVF_TC_RUNNING;
+ netif_tx_start_all_queues(netdev);
+ break;
+ case VIRTCHNL_OP_ADD_CLOUD_FILTER: {
+ struct i40evf_cloud_filter *cf, *cftmp;
+
+ list_for_each_entry_safe(cf, cftmp,
+ &adapter->cloud_filter_list,
+ list) {
+ if (cf->state == __I40EVF_CF_ADD_PENDING) {
+ cf->state = __I40EVF_CF_INVALID;
+ dev_info(&adapter->pdev->dev, "Failed to add cloud filter, error %s\n",
+ i40evf_stat_str(&adapter->hw,
+ v_retval));
+ i40evf_print_cloud_filter(adapter,
+ &cf->f);
+ list_del(&cf->list);
+ kfree(cf);
+ adapter->num_cloud_filters--;
+ }
+ }
+ }
+ break;
+ case VIRTCHNL_OP_DEL_CLOUD_FILTER: {
+ struct i40evf_cloud_filter *cf;
+
+ list_for_each_entry(cf, &adapter->cloud_filter_list,
+ list) {
+ if (cf->state == __I40EVF_CF_DEL_PENDING) {
+ cf->state = __I40EVF_CF_ACTIVE;
+ dev_info(&adapter->pdev->dev, "Failed to del cloud filter, error %s\n",
+ i40evf_stat_str(&adapter->hw,
+ v_retval));
+ i40evf_print_cloud_filter(adapter,
+ &cf->f);
+ }
+ }
+ }
+ break;
default:
dev_err(&adapter->pdev->dev, "PF returned error %d (%s) to our request %d\n",
v_retval,
case VIRTCHNL_OP_ENABLE_QUEUES:
/* enable transmits */
i40evf_irq_enable(adapter, true);
+ adapter->flags &= ~I40EVF_FLAG_QUEUES_DISABLED;
break;
case VIRTCHNL_OP_DISABLE_QUEUES:
i40evf_free_all_tx_resources(adapter);
}
}
break;
+ case VIRTCHNL_OP_ADD_CLOUD_FILTER: {
+ struct i40evf_cloud_filter *cf;
+
+ list_for_each_entry(cf, &adapter->cloud_filter_list, list) {
+ if (cf->state == __I40EVF_CF_ADD_PENDING)
+ cf->state = __I40EVF_CF_ACTIVE;
+ }
+ }
+ break;
+ case VIRTCHNL_OP_DEL_CLOUD_FILTER: {
+ struct i40evf_cloud_filter *cf, *cftmp;
+
+ list_for_each_entry_safe(cf, cftmp, &adapter->cloud_filter_list,
+ list) {
+ if (cf->state == __I40EVF_CF_DEL_PENDING) {
+ cf->state = __I40EVF_CF_INVALID;
+ list_del(&cf->list);
+ kfree(cf);
+ adapter->num_cloud_filters--;
+ }
+ }
+ }
+ break;
default:
if (adapter->current_op && (v_opcode != adapter->current_op))
dev_warn(&adapter->pdev->dev, "Expected response %d from PF, received %d\n",
for (i = 0; i < reta_entries; i++)
adapter->rss_indir_tbl[i] = indir[i];
+
+ ixgbe_store_reta(adapter);
}
/* Fill out the rss hash key */
ixgbe_store_key(adapter);
}
- ixgbe_store_reta(adapter);
-
return 0;
}
return false;
/* start at VMDq register offset for SR-IOV enabled setups */
- pool = 0;
reg_idx = vmdq->offset * __ALIGN_MASK(1, ~vmdq->mask);
for (i = 0, pool = 0; i < adapter->num_rx_queues; i++, reg_idx++) {
/* If we are greater than indices move to next pool */
if (test_bit(__IXGBE_PTP_RUNNING, &adapter->state)) {
ixgbe_ptp_overflow_check(adapter);
- ixgbe_ptp_rx_hang(adapter);
+ if (adapter->flags & IXGBE_FLAG_RX_HWTSTAMP_IN_REGISTER)
+ ixgbe_ptp_rx_hang(adapter);
ixgbe_ptp_tx_hang(adapter);
}
#define IXGBEVF_TEST_LEN (sizeof(ixgbe_gstrings_test) / ETH_GSTRING_LEN)
+static const char ixgbevf_priv_flags_strings[][ETH_GSTRING_LEN] = {
+#define IXGBEVF_PRIV_FLAGS_LEGACY_RX BIT(0)
+ "legacy-rx",
+};
+
+#define IXGBEVF_PRIV_FLAGS_STR_LEN ARRAY_SIZE(ixgbevf_priv_flags_strings)
+
static int ixgbevf_get_link_ksettings(struct net_device *netdev,
struct ethtool_link_ksettings *cmd)
{
sizeof(drvinfo->version));
strlcpy(drvinfo->bus_info, pci_name(adapter->pdev),
sizeof(drvinfo->bus_info));
+
+ drvinfo->n_priv_flags = IXGBEVF_PRIV_FLAGS_STR_LEN;
}
static void ixgbevf_get_ringparam(struct net_device *netdev,
return IXGBEVF_TEST_LEN;
case ETH_SS_STATS:
return IXGBEVF_STATS_LEN;
+ case ETH_SS_PRIV_FLAGS:
+ return IXGBEVF_PRIV_FLAGS_STR_LEN;
default:
return -EINVAL;
}
p += ETH_GSTRING_LEN;
}
break;
+ case ETH_SS_PRIV_FLAGS:
+ memcpy(data, ixgbevf_priv_flags_strings,
+ IXGBEVF_PRIV_FLAGS_STR_LEN * ETH_GSTRING_LEN);
+ break;
}
}
return err;
}
+static u32 ixgbevf_get_priv_flags(struct net_device *netdev)
+{
+ struct ixgbevf_adapter *adapter = netdev_priv(netdev);
+ u32 priv_flags = 0;
+
+ if (adapter->flags & IXGBEVF_FLAGS_LEGACY_RX)
+ priv_flags |= IXGBEVF_PRIV_FLAGS_LEGACY_RX;
+
+ return priv_flags;
+}
+
+static int ixgbevf_set_priv_flags(struct net_device *netdev, u32 priv_flags)
+{
+ struct ixgbevf_adapter *adapter = netdev_priv(netdev);
+ unsigned int flags = adapter->flags;
+
+ flags &= ~IXGBEVF_FLAGS_LEGACY_RX;
+ if (priv_flags & IXGBEVF_PRIV_FLAGS_LEGACY_RX)
+ flags |= IXGBEVF_FLAGS_LEGACY_RX;
+
+ if (flags != adapter->flags) {
+ adapter->flags = flags;
+
+ /* reset interface to repopulate queues */
+ if (netif_running(netdev))
+ ixgbevf_reinit_locked(adapter);
+ }
+
+ return 0;
+}
+
static const struct ethtool_ops ixgbevf_ethtool_ops = {
.get_drvinfo = ixgbevf_get_drvinfo,
.get_regs_len = ixgbevf_get_regs_len,
.get_rxfh_key_size = ixgbevf_get_rxfh_key_size,
.get_rxfh = ixgbevf_get_rxfh,
.get_link_ksettings = ixgbevf_get_link_ksettings,
+ .get_priv_flags = ixgbevf_get_priv_flags,
+ .set_priv_flags = ixgbevf_set_priv_flags,
};
void ixgbevf_set_ethtool_ops(struct net_device *netdev)
};
enum ixgbevf_ring_state_t {
+ __IXGBEVF_RX_3K_BUFFER,
+ __IXGBEVF_RX_BUILD_SKB_ENABLED,
__IXGBEVF_TX_DETECT_HANG,
__IXGBEVF_HANG_CHECK_ARMED,
};
-#define check_for_tx_hang(ring) \
- test_bit(__IXGBEVF_TX_DETECT_HANG, &(ring)->state)
-#define set_check_for_tx_hang(ring) \
- set_bit(__IXGBEVF_TX_DETECT_HANG, &(ring)->state)
-#define clear_check_for_tx_hang(ring) \
- clear_bit(__IXGBEVF_TX_DETECT_HANG, &(ring)->state)
-
struct ixgbevf_ring {
struct ixgbevf_ring *next;
+ struct ixgbevf_q_vector *q_vector; /* backpointer to q_vector */
struct net_device *netdev;
struct device *dev;
void *desc; /* descriptor ring memory */
*/
u16 reg_idx;
int queue_index; /* needed for multiqueue queue management */
-};
+} ____cacheline_internodealigned_in_smp;
/* How many Rx Buffers do we bundle into one write to the hardware ? */
#define IXGBEVF_RX_BUFFER_WRITE 16 /* Must be power of 2 */
/* Supported Rx Buffer Sizes */
#define IXGBEVF_RXBUFFER_256 256 /* Used for packet split */
#define IXGBEVF_RXBUFFER_2048 2048
+#define IXGBEVF_RXBUFFER_3072 3072
#define IXGBEVF_RX_HDR_SIZE IXGBEVF_RXBUFFER_256
-#define IXGBEVF_RX_BUFSZ IXGBEVF_RXBUFFER_2048
#define MAXIMUM_ETHERNET_VLAN_SIZE (VLAN_ETH_FRAME_LEN + ETH_FCS_LEN)
+#define IXGBEVF_SKB_PAD (NET_SKB_PAD + NET_IP_ALIGN)
+#if (PAGE_SIZE < 8192)
+#define IXGBEVF_MAX_FRAME_BUILD_SKB \
+ (SKB_WITH_OVERHEAD(IXGBEVF_RXBUFFER_2048) - IXGBEVF_SKB_PAD)
+#else
+#define IXGBEVF_MAX_FRAME_BUILD_SKB IXGBEVF_RXBUFFER_2048
+#endif
+
#define IXGBE_TX_FLAGS_CSUM BIT(0)
#define IXGBE_TX_FLAGS_VLAN BIT(1)
#define IXGBE_TX_FLAGS_TSO BIT(2)
#define IXGBE_TX_FLAGS_VLAN_PRIO_MASK 0x0000e000
#define IXGBE_TX_FLAGS_VLAN_SHIFT 16
+#define ring_uses_large_buffer(ring) \
+ test_bit(__IXGBEVF_RX_3K_BUFFER, &(ring)->state)
+#define set_ring_uses_large_buffer(ring) \
+ set_bit(__IXGBEVF_RX_3K_BUFFER, &(ring)->state)
+#define clear_ring_uses_large_buffer(ring) \
+ clear_bit(__IXGBEVF_RX_3K_BUFFER, &(ring)->state)
+
+#define ring_uses_build_skb(ring) \
+ test_bit(__IXGBEVF_RX_BUILD_SKB_ENABLED, &(ring)->state)
+#define set_ring_build_skb_enabled(ring) \
+ set_bit(__IXGBEVF_RX_BUILD_SKB_ENABLED, &(ring)->state)
+#define clear_ring_build_skb_enabled(ring) \
+ clear_bit(__IXGBEVF_RX_BUILD_SKB_ENABLED, &(ring)->state)
+
+static inline unsigned int ixgbevf_rx_bufsz(struct ixgbevf_ring *ring)
+{
+#if (PAGE_SIZE < 8192)
+ if (ring_uses_large_buffer(ring))
+ return IXGBEVF_RXBUFFER_3072;
+
+ if (ring_uses_build_skb(ring))
+ return IXGBEVF_MAX_FRAME_BUILD_SKB;
+#endif
+ return IXGBEVF_RXBUFFER_2048;
+}
+
+static inline unsigned int ixgbevf_rx_pg_order(struct ixgbevf_ring *ring)
+{
+#if (PAGE_SIZE < 8192)
+ if (ring_uses_large_buffer(ring))
+ return 1;
+#endif
+ return 0;
+}
+
+#define ixgbevf_rx_pg_size(_ring) (PAGE_SIZE << ixgbevf_rx_pg_order(_ring))
+
+#define check_for_tx_hang(ring) \
+ test_bit(__IXGBEVF_TX_DETECT_HANG, &(ring)->state)
+#define set_check_for_tx_hang(ring) \
+ set_bit(__IXGBEVF_TX_DETECT_HANG, &(ring)->state)
+#define clear_check_for_tx_hang(ring) \
+ clear_bit(__IXGBEVF_TX_DETECT_HANG, &(ring)->state)
+
struct ixgbevf_ring_container {
struct ixgbevf_ring *ring; /* pointer to linked list of rings */
unsigned int total_bytes; /* total bytes processed this int */
u16 itr; /* Interrupt throttle rate written to EITR */
struct napi_struct napi;
struct ixgbevf_ring_container rx, tx;
+ struct rcu_head rcu; /* to avoid race with update stats on free */
char name[IFNAMSIZ + 9];
+
+ /* for dynamic allocation of rings associated with this q_vector */
+ struct ixgbevf_ring ring[0] ____cacheline_internodealigned_in_smp;
#ifdef CONFIG_NET_RX_BUSY_POLL
unsigned int state;
#define IXGBEVF_QV_STATE_IDLE 0
u32 *rss_key;
u8 rss_indir_tbl[IXGBEVF_X550_VFRETA_SIZE];
+ u32 flags;
+#define IXGBEVF_FLAGS_LEGACY_RX BIT(1)
};
enum ixbgevf_state_t {
static void ixgbevf_queue_reset_subtask(struct ixgbevf_adapter *adapter);
static void ixgbevf_set_itr(struct ixgbevf_q_vector *q_vector);
static void ixgbevf_free_all_rx_resources(struct ixgbevf_adapter *adapter);
+static bool ixgbevf_can_reuse_rx_page(struct ixgbevf_rx_buffer *rx_buffer);
+static void ixgbevf_reuse_rx_page(struct ixgbevf_ring *rx_ring,
+ struct ixgbevf_rx_buffer *old_buff);
static void ixgbevf_remove_adapter(struct ixgbe_hw *hw)
{
skb->protocol = eth_type_trans(skb, rx_ring->netdev);
}
+static
+struct ixgbevf_rx_buffer *ixgbevf_get_rx_buffer(struct ixgbevf_ring *rx_ring,
+ const unsigned int size)
+{
+ struct ixgbevf_rx_buffer *rx_buffer;
+
+ rx_buffer = &rx_ring->rx_buffer_info[rx_ring->next_to_clean];
+ prefetchw(rx_buffer->page);
+
+ /* we are reusing so sync this buffer for CPU use */
+ dma_sync_single_range_for_cpu(rx_ring->dev,
+ rx_buffer->dma,
+ rx_buffer->page_offset,
+ size,
+ DMA_FROM_DEVICE);
+
+ rx_buffer->pagecnt_bias--;
+
+ return rx_buffer;
+}
+
+static void ixgbevf_put_rx_buffer(struct ixgbevf_ring *rx_ring,
+ struct ixgbevf_rx_buffer *rx_buffer)
+{
+ if (ixgbevf_can_reuse_rx_page(rx_buffer)) {
+ /* hand second half of page back to the ring */
+ ixgbevf_reuse_rx_page(rx_ring, rx_buffer);
+ } else {
+ /* We are not reusing the buffer so unmap it and free
+ * any references we are holding to it
+ */
+ dma_unmap_page_attrs(rx_ring->dev, rx_buffer->dma,
+ ixgbevf_rx_pg_size(rx_ring),
+ DMA_FROM_DEVICE,
+ IXGBEVF_RX_DMA_ATTR);
+ __page_frag_cache_drain(rx_buffer->page,
+ rx_buffer->pagecnt_bias);
+ }
+
+ /* clear contents of rx_buffer */
+ rx_buffer->page = NULL;
+}
+
/**
* ixgbevf_is_non_eop - process handling of non-EOP buffers
* @rx_ring: Rx ring being processed
return true;
}
+static inline unsigned int ixgbevf_rx_offset(struct ixgbevf_ring *rx_ring)
+{
+ return ring_uses_build_skb(rx_ring) ? IXGBEVF_SKB_PAD : 0;
+}
+
static bool ixgbevf_alloc_mapped_page(struct ixgbevf_ring *rx_ring,
struct ixgbevf_rx_buffer *bi)
{
struct page *page = bi->page;
- dma_addr_t dma = bi->dma;
+ dma_addr_t dma;
/* since we are recycling buffers we should seldom need to alloc */
if (likely(page))
return true;
/* alloc new page for storage */
- page = dev_alloc_page();
+ page = dev_alloc_pages(ixgbevf_rx_pg_order(rx_ring));
if (unlikely(!page)) {
rx_ring->rx_stats.alloc_rx_page_failed++;
return false;
}
/* map page for use */
- dma = dma_map_page_attrs(rx_ring->dev, page, 0, PAGE_SIZE,
+ dma = dma_map_page_attrs(rx_ring->dev, page, 0,
+ ixgbevf_rx_pg_size(rx_ring),
DMA_FROM_DEVICE, IXGBEVF_RX_DMA_ATTR);
/* if mapping failed free memory back to system since
* there isn't much point in holding memory we can't use
*/
if (dma_mapping_error(rx_ring->dev, dma)) {
- __free_page(page);
+ __free_pages(page, ixgbevf_rx_pg_order(rx_ring));
rx_ring->rx_stats.alloc_rx_page_failed++;
return false;
bi->dma = dma;
bi->page = page;
- bi->page_offset = 0;
+ bi->page_offset = ixgbevf_rx_offset(rx_ring);
bi->pagecnt_bias = 1;
rx_ring->rx_stats.alloc_rx_page++;
/* sync the buffer for use by the device */
dma_sync_single_range_for_device(rx_ring->dev, bi->dma,
bi->page_offset,
- IXGBEVF_RX_BUFSZ,
+ ixgbevf_rx_bufsz(rx_ring),
DMA_FROM_DEVICE);
/* Refresh the desc even if pkt_addr didn't change
return (page_to_nid(page) != numa_mem_id()) || page_is_pfmemalloc(page);
}
-static bool ixgbevf_can_reuse_rx_page(struct ixgbevf_rx_buffer *rx_buffer,
- struct page *page,
- const unsigned int truesize)
+static bool ixgbevf_can_reuse_rx_page(struct ixgbevf_rx_buffer *rx_buffer)
{
- unsigned int pagecnt_bias = rx_buffer->pagecnt_bias--;
+ unsigned int pagecnt_bias = rx_buffer->pagecnt_bias;
+ struct page *page = rx_buffer->page;
/* avoid re-using remote pages */
if (unlikely(ixgbevf_page_is_reserved(page)))
#if (PAGE_SIZE < 8192)
/* if we are only owner of page we can reuse it */
- if (unlikely(page_ref_count(page) != pagecnt_bias))
+ if (unlikely((page_ref_count(page) - pagecnt_bias) > 1))
return false;
-
- /* flip page offset to other buffer */
- rx_buffer->page_offset ^= IXGBEVF_RX_BUFSZ;
-
#else
- /* move offset up to the next cache line */
- rx_buffer->page_offset += truesize;
+#define IXGBEVF_LAST_OFFSET \
+ (SKB_WITH_OVERHEAD(PAGE_SIZE) - IXGBEVF_RXBUFFER_2048)
- if (rx_buffer->page_offset > (PAGE_SIZE - IXGBEVF_RX_BUFSZ))
+ if (rx_buffer->page_offset > IXGBEVF_LAST_OFFSET)
return false;
#endif
* the pagecnt_bias and page count so that we fully restock the
* number of references the driver holds.
*/
- if (unlikely(pagecnt_bias == 1)) {
+ if (unlikely(!pagecnt_bias)) {
page_ref_add(page, USHRT_MAX);
rx_buffer->pagecnt_bias = USHRT_MAX;
}
* ixgbevf_add_rx_frag - Add contents of Rx buffer to sk_buff
* @rx_ring: rx descriptor ring to transact packets on
* @rx_buffer: buffer containing page to add
- * @rx_desc: descriptor containing length of buffer written by hardware
* @skb: sk_buff to place the data into
+ * @size: size of buffer to be added
*
* This function will add the data contained in rx_buffer->page to the skb.
- * This is done either through a direct copy if the data in the buffer is
- * less than the skb header size, otherwise it will just attach the page as
- * a frag to the skb.
- *
- * The function will then update the page offset if necessary and return
- * true if the buffer can be reused by the adapter.
**/
-static bool ixgbevf_add_rx_frag(struct ixgbevf_ring *rx_ring,
+static void ixgbevf_add_rx_frag(struct ixgbevf_ring *rx_ring,
struct ixgbevf_rx_buffer *rx_buffer,
- u16 size,
- union ixgbe_adv_rx_desc *rx_desc,
- struct sk_buff *skb)
+ struct sk_buff *skb,
+ unsigned int size)
{
- struct page *page = rx_buffer->page;
- unsigned char *va = page_address(page) + rx_buffer->page_offset;
#if (PAGE_SIZE < 8192)
- unsigned int truesize = IXGBEVF_RX_BUFSZ;
+ unsigned int truesize = ixgbevf_rx_pg_size(rx_ring) / 2;
#else
- unsigned int truesize = ALIGN(size, L1_CACHE_BYTES);
+ unsigned int truesize = ring_uses_build_skb(rx_ring) ?
+ SKB_DATA_ALIGN(IXGBEVF_SKB_PAD + size) :
+ SKB_DATA_ALIGN(size);
+#endif
+ skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_buffer->page,
+ rx_buffer->page_offset, size, truesize);
+#if (PAGE_SIZE < 8192)
+ rx_buffer->page_offset ^= truesize;
+#else
+ rx_buffer->page_offset += truesize;
#endif
- unsigned int pull_len;
-
- if (unlikely(skb_is_nonlinear(skb)))
- goto add_tail_frag;
-
- if (likely(size <= IXGBEVF_RX_HDR_SIZE)) {
- memcpy(__skb_put(skb, size), va, ALIGN(size, sizeof(long)));
-
- /* page is not reserved, we can reuse buffer as is */
- if (likely(!ixgbevf_page_is_reserved(page)))
- return true;
-
- /* this page cannot be reused so discard it */
- return false;
- }
-
- /* we need the header to contain the greater of either ETH_HLEN or
- * 60 bytes if the skb->len is less than 60 for skb_pad.
- */
- pull_len = eth_get_headlen(va, IXGBEVF_RX_HDR_SIZE);
-
- /* align pull length to size of long to optimize memcpy performance */
- memcpy(__skb_put(skb, pull_len), va, ALIGN(pull_len, sizeof(long)));
-
- /* update all of the pointers */
- va += pull_len;
- size -= pull_len;
-
-add_tail_frag:
- skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page,
- (unsigned long)va & ~PAGE_MASK, size, truesize);
-
- return ixgbevf_can_reuse_rx_page(rx_buffer, page, truesize);
}
-static struct sk_buff *ixgbevf_fetch_rx_buffer(struct ixgbevf_ring *rx_ring,
- union ixgbe_adv_rx_desc *rx_desc,
- struct sk_buff *skb)
+static
+struct sk_buff *ixgbevf_construct_skb(struct ixgbevf_ring *rx_ring,
+ struct ixgbevf_rx_buffer *rx_buffer,
+ union ixgbe_adv_rx_desc *rx_desc,
+ unsigned int size)
{
- struct ixgbevf_rx_buffer *rx_buffer;
- struct page *page;
- u16 size = le16_to_cpu(rx_desc->wb.upper.length);
-
- rx_buffer = &rx_ring->rx_buffer_info[rx_ring->next_to_clean];
- page = rx_buffer->page;
- prefetchw(page);
-
- /* we are reusing so sync this buffer for CPU use */
- dma_sync_single_range_for_cpu(rx_ring->dev,
- rx_buffer->dma,
- rx_buffer->page_offset,
- size,
- DMA_FROM_DEVICE);
-
- if (likely(!skb)) {
- void *page_addr = page_address(page) +
- rx_buffer->page_offset;
+ void *va = page_address(rx_buffer->page) + rx_buffer->page_offset;
+#if (PAGE_SIZE < 8192)
+ unsigned int truesize = ixgbevf_rx_pg_size(rx_ring) / 2;
+#else
+ unsigned int truesize = SKB_DATA_ALIGN(size);
+#endif
+ unsigned int headlen;
+ struct sk_buff *skb;
- /* prefetch first cache line of first page */
- prefetch(page_addr);
+ /* prefetch first cache line of first page */
+ prefetch(va);
#if L1_CACHE_BYTES < 128
- prefetch(page_addr + L1_CACHE_BYTES);
+ prefetch(va + L1_CACHE_BYTES);
#endif
- /* allocate a skb to store the frags */
- skb = netdev_alloc_skb_ip_align(rx_ring->netdev,
- IXGBEVF_RX_HDR_SIZE);
- if (unlikely(!skb)) {
- rx_ring->rx_stats.alloc_rx_buff_failed++;
- return NULL;
- }
+ /* allocate a skb to store the frags */
+ skb = napi_alloc_skb(&rx_ring->q_vector->napi, IXGBEVF_RX_HDR_SIZE);
+ if (unlikely(!skb))
+ return NULL;
- /* we will be copying header into skb->data in
- * pskb_may_pull so it is in our interest to prefetch
- * it now to avoid a possible cache miss
- */
- prefetchw(skb->data);
- }
+ /* Determine available headroom for copy */
+ headlen = size;
+ if (headlen > IXGBEVF_RX_HDR_SIZE)
+ headlen = eth_get_headlen(va, IXGBEVF_RX_HDR_SIZE);
- /* pull page into skb */
- if (ixgbevf_add_rx_frag(rx_ring, rx_buffer, size, rx_desc, skb)) {
- /* hand second half of page back to the ring */
- ixgbevf_reuse_rx_page(rx_ring, rx_buffer);
+ /* align pull length to size of long to optimize memcpy performance */
+ memcpy(__skb_put(skb, headlen), va, ALIGN(headlen, sizeof(long)));
+
+ /* update all of the pointers */
+ size -= headlen;
+ if (size) {
+ skb_add_rx_frag(skb, 0, rx_buffer->page,
+ (va + headlen) - page_address(rx_buffer->page),
+ size, truesize);
+#if (PAGE_SIZE < 8192)
+ rx_buffer->page_offset ^= truesize;
+#else
+ rx_buffer->page_offset += truesize;
+#endif
} else {
- /* We are not reusing the buffer so unmap it and free
- * any references we are holding to it
- */
- dma_unmap_page_attrs(rx_ring->dev, rx_buffer->dma,
- PAGE_SIZE, DMA_FROM_DEVICE,
- IXGBEVF_RX_DMA_ATTR);
- __page_frag_cache_drain(page, rx_buffer->pagecnt_bias);
+ rx_buffer->pagecnt_bias++;
}
- /* clear contents of buffer_info */
- rx_buffer->dma = 0;
- rx_buffer->page = NULL;
-
return skb;
}
IXGBE_WRITE_REG(hw, IXGBE_VTEIMS, qmask);
}
+static struct sk_buff *ixgbevf_build_skb(struct ixgbevf_ring *rx_ring,
+ struct ixgbevf_rx_buffer *rx_buffer,
+ union ixgbe_adv_rx_desc *rx_desc,
+ unsigned int size)
+{
+ void *va = page_address(rx_buffer->page) + rx_buffer->page_offset;
+#if (PAGE_SIZE < 8192)
+ unsigned int truesize = ixgbevf_rx_pg_size(rx_ring) / 2;
+#else
+ unsigned int truesize = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) +
+ SKB_DATA_ALIGN(IXGBEVF_SKB_PAD + size);
+#endif
+ struct sk_buff *skb;
+
+ /* prefetch first cache line of first page */
+ prefetch(va);
+#if L1_CACHE_BYTES < 128
+ prefetch(va + L1_CACHE_BYTES);
+#endif
+
+ /* build an skb to around the page buffer */
+ skb = build_skb(va - IXGBEVF_SKB_PAD, truesize);
+ if (unlikely(!skb))
+ return NULL;
+
+ /* update pointers within the skb to store the data */
+ skb_reserve(skb, IXGBEVF_SKB_PAD);
+ __skb_put(skb, size);
+
+ /* update buffer offset */
+#if (PAGE_SIZE < 8192)
+ rx_buffer->page_offset ^= truesize;
+#else
+ rx_buffer->page_offset += truesize;
+#endif
+
+ return skb;
+}
static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector,
struct ixgbevf_ring *rx_ring,
int budget)
while (likely(total_rx_packets < budget)) {
union ixgbe_adv_rx_desc *rx_desc;
+ struct ixgbevf_rx_buffer *rx_buffer;
+ unsigned int size;
/* return some buffers to hardware, one at a time is too slow */
if (cleaned_count >= IXGBEVF_RX_BUFFER_WRITE) {
}
rx_desc = IXGBEVF_RX_DESC(rx_ring, rx_ring->next_to_clean);
-
- if (!rx_desc->wb.upper.length)
+ size = le16_to_cpu(rx_desc->wb.upper.length);
+ if (!size)
break;
/* This memory barrier is needed to keep us from reading
*/
rmb();
+ rx_buffer = ixgbevf_get_rx_buffer(rx_ring, size);
+
/* retrieve a buffer from the ring */
- skb = ixgbevf_fetch_rx_buffer(rx_ring, rx_desc, skb);
+ if (skb)
+ ixgbevf_add_rx_frag(rx_ring, rx_buffer, skb, size);
+ else if (ring_uses_build_skb(rx_ring))
+ skb = ixgbevf_build_skb(rx_ring, rx_buffer,
+ rx_desc, size);
+ else
+ skb = ixgbevf_construct_skb(rx_ring, rx_buffer,
+ rx_desc, size);
/* exit if we failed to retrieve a buffer */
if (!skb) {
rx_ring->rx_stats.alloc_rx_buff_failed++;
+ rx_buffer->pagecnt_bias++;
break;
}
+ ixgbevf_put_rx_buffer(rx_ring, rx_buffer);
cleaned_count++;
/* fetch next buffer in frame if non-eop */
return IRQ_HANDLED;
}
-static inline void map_vector_to_rxq(struct ixgbevf_adapter *a, int v_idx,
- int r_idx)
-{
- struct ixgbevf_q_vector *q_vector = a->q_vector[v_idx];
-
- a->rx_ring[r_idx]->next = q_vector->rx.ring;
- q_vector->rx.ring = a->rx_ring[r_idx];
- q_vector->rx.count++;
-}
-
-static inline void map_vector_to_txq(struct ixgbevf_adapter *a, int v_idx,
- int t_idx)
-{
- struct ixgbevf_q_vector *q_vector = a->q_vector[v_idx];
-
- a->tx_ring[t_idx]->next = q_vector->tx.ring;
- q_vector->tx.ring = a->tx_ring[t_idx];
- q_vector->tx.count++;
-}
-
-/**
- * ixgbevf_map_rings_to_vectors - Maps descriptor rings to vectors
- * @adapter: board private structure to initialize
- *
- * This function maps descriptor rings to the queue-specific vectors
- * we were allotted through the MSI-X enabling code. Ideally, we'd have
- * one vector per ring/queue, but on a constrained vector budget, we
- * group the rings as "efficiently" as possible. You would add new
- * mapping configurations in here.
- **/
-static int ixgbevf_map_rings_to_vectors(struct ixgbevf_adapter *adapter)
-{
- int q_vectors;
- int v_start = 0;
- int rxr_idx = 0, txr_idx = 0;
- int rxr_remaining = adapter->num_rx_queues;
- int txr_remaining = adapter->num_tx_queues;
- int i, j;
- int rqpv, tqpv;
-
- q_vectors = adapter->num_msix_vectors - NON_Q_VECTORS;
-
- /* The ideal configuration...
- * We have enough vectors to map one per queue.
- */
- if (q_vectors == adapter->num_rx_queues + adapter->num_tx_queues) {
- for (; rxr_idx < rxr_remaining; v_start++, rxr_idx++)
- map_vector_to_rxq(adapter, v_start, rxr_idx);
-
- for (; txr_idx < txr_remaining; v_start++, txr_idx++)
- map_vector_to_txq(adapter, v_start, txr_idx);
- return 0;
- }
-
- /* If we don't have enough vectors for a 1-to-1
- * mapping, we'll have to group them so there are
- * multiple queues per vector.
- */
- /* Re-adjusting *qpv takes care of the remainder. */
- for (i = v_start; i < q_vectors; i++) {
- rqpv = DIV_ROUND_UP(rxr_remaining, q_vectors - i);
- for (j = 0; j < rqpv; j++) {
- map_vector_to_rxq(adapter, i, rxr_idx);
- rxr_idx++;
- rxr_remaining--;
- }
- }
- for (i = v_start; i < q_vectors; i++) {
- tqpv = DIV_ROUND_UP(txr_remaining, q_vectors - i);
- for (j = 0; j < tqpv; j++) {
- map_vector_to_txq(adapter, i, txr_idx);
- txr_idx++;
- txr_remaining--;
- }
- }
-
- return 0;
-}
-
/**
* ixgbevf_request_msix_irqs - Initialize MSI-X interrupts
* @adapter: board private structure
return err;
}
-static inline void ixgbevf_reset_q_vectors(struct ixgbevf_adapter *adapter)
-{
- int i, q_vectors = adapter->num_msix_vectors - NON_Q_VECTORS;
-
- for (i = 0; i < q_vectors; i++) {
- struct ixgbevf_q_vector *q_vector = adapter->q_vector[i];
-
- q_vector->rx.ring = NULL;
- q_vector->tx.ring = NULL;
- q_vector->rx.count = 0;
- q_vector->tx.count = 0;
- }
-}
-
/**
* ixgbevf_request_irq - initialize interrupts
* @adapter: board private structure
free_irq(adapter->msix_entries[i].vector,
adapter->q_vector[i]);
}
-
- ixgbevf_reset_q_vectors(adapter);
}
/**
#define IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT 2
-static void ixgbevf_configure_srrctl(struct ixgbevf_adapter *adapter, int index)
+static void ixgbevf_configure_srrctl(struct ixgbevf_adapter *adapter,
+ struct ixgbevf_ring *ring, int index)
{
struct ixgbe_hw *hw = &adapter->hw;
u32 srrctl;
srrctl = IXGBE_SRRCTL_DROP_EN;
srrctl |= IXGBEVF_RX_HDR_SIZE << IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT;
- srrctl |= IXGBEVF_RX_BUFSZ >> IXGBE_SRRCTL_BSIZEPKT_SHIFT;
+ if (ring_uses_large_buffer(ring))
+ srrctl |= IXGBEVF_RXBUFFER_3072 >> IXGBE_SRRCTL_BSIZEPKT_SHIFT;
+ else
+ srrctl |= IXGBEVF_RXBUFFER_2048 >> IXGBE_SRRCTL_BSIZEPKT_SHIFT;
srrctl |= IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
IXGBE_WRITE_REG(hw, IXGBE_VFSRRCTL(index), srrctl);
ring->next_to_use = 0;
ring->next_to_alloc = 0;
- ixgbevf_configure_srrctl(adapter, reg_idx);
+ ixgbevf_configure_srrctl(adapter, ring, reg_idx);
+
+ /* RXDCTL.RLPML does not work on 82599 */
+ if (adapter->hw.mac.type != ixgbe_mac_82599_vf) {
+ rxdctl &= ~(IXGBE_RXDCTL_RLPMLMASK |
+ IXGBE_RXDCTL_RLPML_EN);
- /* allow any size packet since we can handle overflow */
- rxdctl &= ~IXGBE_RXDCTL_RLPML_EN;
+#if (PAGE_SIZE < 8192)
+ /* Limit the maximum frame size so we don't overrun the skb */
+ if (ring_uses_build_skb(ring) &&
+ !ring_uses_large_buffer(ring))
+ rxdctl |= IXGBEVF_MAX_FRAME_BUILD_SKB |
+ IXGBE_RXDCTL_RLPML_EN;
+#endif
+ }
rxdctl |= IXGBE_RXDCTL_ENABLE | IXGBE_RXDCTL_VME;
IXGBE_WRITE_REG(hw, IXGBE_VFRXDCTL(reg_idx), rxdctl);
ixgbevf_alloc_rx_buffers(ring, ixgbevf_desc_unused(ring));
}
+static void ixgbevf_set_rx_buffer_len(struct ixgbevf_adapter *adapter,
+ struct ixgbevf_ring *rx_ring)
+{
+ struct net_device *netdev = adapter->netdev;
+ unsigned int max_frame = netdev->mtu + ETH_HLEN + ETH_FCS_LEN;
+
+ /* set build_skb and buffer size flags */
+ clear_ring_build_skb_enabled(rx_ring);
+ clear_ring_uses_large_buffer(rx_ring);
+
+ if (adapter->flags & IXGBEVF_FLAGS_LEGACY_RX)
+ return;
+
+ set_ring_build_skb_enabled(rx_ring);
+
+#if (PAGE_SIZE < 8192)
+ if (max_frame <= IXGBEVF_MAX_FRAME_BUILD_SKB)
+ return;
+
+ set_ring_uses_large_buffer(rx_ring);
+#endif
+}
+
/**
* ixgbevf_configure_rx - Configure 82599 VF Receive Unit after Reset
* @adapter: board private structure
/* Setup the HW Rx Head and Tail Descriptor Pointers and
* the Base and Length of the Rx Descriptor Ring
*/
- for (i = 0; i < adapter->num_rx_queues; i++)
- ixgbevf_configure_rx_ring(adapter, adapter->rx_ring[i]);
+ for (i = 0; i < adapter->num_rx_queues; i++) {
+ struct ixgbevf_ring *rx_ring = adapter->rx_ring[i];
+
+ ixgbevf_set_rx_buffer_len(adapter, rx_ring);
+ ixgbevf_configure_rx_ring(adapter, rx_ring);
+ }
}
static int ixgbevf_vlan_rx_add_vid(struct net_device *netdev,
dma_sync_single_range_for_cpu(rx_ring->dev,
rx_buffer->dma,
rx_buffer->page_offset,
- IXGBEVF_RX_BUFSZ,
+ ixgbevf_rx_bufsz(rx_ring),
DMA_FROM_DEVICE);
/* free resources associated with mapping */
dma_unmap_page_attrs(rx_ring->dev,
rx_buffer->dma,
- PAGE_SIZE,
+ ixgbevf_rx_pg_size(rx_ring),
DMA_FROM_DEVICE,
IXGBEVF_RX_DMA_ATTR);
}
/**
- * ixgbevf_alloc_queues - Allocate memory for all rings
+ * ixgbevf_set_interrupt_capability - set MSI-X or FAIL if not supported
+ * @adapter: board private structure to initialize
+ *
+ * Attempt to configure the interrupts using the best available
+ * capabilities of the hardware and the kernel.
+ **/
+static int ixgbevf_set_interrupt_capability(struct ixgbevf_adapter *adapter)
+{
+ int vector, v_budget;
+
+ /* It's easy to be greedy for MSI-X vectors, but it really
+ * doesn't do us much good if we have a lot more vectors
+ * than CPU's. So let's be conservative and only ask for
+ * (roughly) the same number of vectors as there are CPU's.
+ * The default is to use pairs of vectors.
+ */
+ v_budget = max(adapter->num_rx_queues, adapter->num_tx_queues);
+ v_budget = min_t(int, v_budget, num_online_cpus());
+ v_budget += NON_Q_VECTORS;
+
+ adapter->msix_entries = kcalloc(v_budget,
+ sizeof(struct msix_entry), GFP_KERNEL);
+ if (!adapter->msix_entries)
+ return -ENOMEM;
+
+ for (vector = 0; vector < v_budget; vector++)
+ adapter->msix_entries[vector].entry = vector;
+
+ /* A failure in MSI-X entry allocation isn't fatal, but the VF driver
+ * does not support any other modes, so we will simply fail here. Note
+ * that we clean up the msix_entries pointer else-where.
+ */
+ return ixgbevf_acquire_msix_vectors(adapter, v_budget);
+}
+
+static void ixgbevf_add_ring(struct ixgbevf_ring *ring,
+ struct ixgbevf_ring_container *head)
+{
+ ring->next = head->ring;
+ head->ring = ring;
+ head->count++;
+}
+
+/**
+ * ixgbevf_alloc_q_vector - Allocate memory for a single interrupt vector
* @adapter: board private structure to initialize
+ * @v_idx: index of vector in adapter struct
+ * @txr_count: number of Tx rings for q vector
+ * @txr_idx: index of first Tx ring to assign
+ * @rxr_count: number of Rx rings for q vector
+ * @rxr_idx: index of first Rx ring to assign
*
- * We allocate one ring per queue at run-time since we don't know the
- * number of queues at compile-time. The polling_netdev array is
- * intended for Multiqueue, but should work fine with a single queue.
+ * We allocate one q_vector. If allocation fails we return -ENOMEM.
**/
-static int ixgbevf_alloc_queues(struct ixgbevf_adapter *adapter)
+static int ixgbevf_alloc_q_vector(struct ixgbevf_adapter *adapter, int v_idx,
+ int txr_count, int txr_idx,
+ int rxr_count, int rxr_idx)
{
+ struct ixgbevf_q_vector *q_vector;
struct ixgbevf_ring *ring;
- int rx = 0, tx = 0;
+ int ring_count, size;
+
+ ring_count = txr_count + rxr_count;
+ size = sizeof(*q_vector) + (sizeof(*ring) * ring_count);
+
+ /* allocate q_vector and rings */
+ q_vector = kzalloc(size, GFP_KERNEL);
+ if (!q_vector)
+ return -ENOMEM;
+
+ /* initialize NAPI */
+ netif_napi_add(adapter->netdev, &q_vector->napi, ixgbevf_poll, 64);
+
+ /* tie q_vector and adapter together */
+ adapter->q_vector[v_idx] = q_vector;
+ q_vector->adapter = adapter;
+ q_vector->v_idx = v_idx;
- for (; tx < adapter->num_tx_queues; tx++) {
- ring = kzalloc(sizeof(*ring), GFP_KERNEL);
- if (!ring)
- goto err_allocation;
+ /* initialize pointer to rings */
+ ring = q_vector->ring;
+ while (txr_count) {
+ /* assign generic ring traits */
ring->dev = &adapter->pdev->dev;
ring->netdev = adapter->netdev;
+
+ /* configure backlink on ring */
+ ring->q_vector = q_vector;
+
+ /* update q_vector Tx values */
+ ixgbevf_add_ring(ring, &q_vector->tx);
+
+ /* apply Tx specific ring traits */
ring->count = adapter->tx_ring_count;
- ring->queue_index = tx;
- ring->reg_idx = tx;
+ ring->queue_index = txr_idx;
+ ring->reg_idx = txr_idx;
- adapter->tx_ring[tx] = ring;
- }
+ /* assign ring to adapter */
+ adapter->tx_ring[txr_idx] = ring;
+
+ /* update count and index */
+ txr_count--;
+ txr_idx++;
- for (; rx < adapter->num_rx_queues; rx++) {
- ring = kzalloc(sizeof(*ring), GFP_KERNEL);
- if (!ring)
- goto err_allocation;
+ /* push pointer to next ring */
+ ring++;
+ }
+ while (rxr_count) {
+ /* assign generic ring traits */
ring->dev = &adapter->pdev->dev;
ring->netdev = adapter->netdev;
+ /* configure backlink on ring */
+ ring->q_vector = q_vector;
+
+ /* update q_vector Rx values */
+ ixgbevf_add_ring(ring, &q_vector->rx);
+
+ /* apply Rx specific ring traits */
ring->count = adapter->rx_ring_count;
- ring->queue_index = rx;
- ring->reg_idx = rx;
+ ring->queue_index = rxr_idx;
+ ring->reg_idx = rxr_idx;
- adapter->rx_ring[rx] = ring;
- }
+ /* assign ring to adapter */
+ adapter->rx_ring[rxr_idx] = ring;
- return 0;
+ /* update count and index */
+ rxr_count--;
+ rxr_idx++;
-err_allocation:
- while (tx) {
- kfree(adapter->tx_ring[--tx]);
- adapter->tx_ring[tx] = NULL;
+ /* push pointer to next ring */
+ ring++;
}
- while (rx) {
- kfree(adapter->rx_ring[--rx]);
- adapter->rx_ring[rx] = NULL;
- }
- return -ENOMEM;
+ return 0;
}
/**
- * ixgbevf_set_interrupt_capability - set MSI-X or FAIL if not supported
+ * ixgbevf_free_q_vector - Free memory allocated for specific interrupt vector
* @adapter: board private structure to initialize
+ * @v_idx: index of vector in adapter struct
*
- * Attempt to configure the interrupts using the best available
- * capabilities of the hardware and the kernel.
+ * This function frees the memory allocated to the q_vector. In addition if
+ * NAPI is enabled it will delete any references to the NAPI struct prior
+ * to freeing the q_vector.
**/
-static int ixgbevf_set_interrupt_capability(struct ixgbevf_adapter *adapter)
+static void ixgbevf_free_q_vector(struct ixgbevf_adapter *adapter, int v_idx)
{
- struct net_device *netdev = adapter->netdev;
- int err;
- int vector, v_budget;
-
- /* It's easy to be greedy for MSI-X vectors, but it really
- * doesn't do us much good if we have a lot more vectors
- * than CPU's. So let's be conservative and only ask for
- * (roughly) the same number of vectors as there are CPU's.
- * The default is to use pairs of vectors.
- */
- v_budget = max(adapter->num_rx_queues, adapter->num_tx_queues);
- v_budget = min_t(int, v_budget, num_online_cpus());
- v_budget += NON_Q_VECTORS;
-
- /* A failure in MSI-X entry allocation isn't fatal, but it does
- * mean we disable MSI-X capabilities of the adapter.
- */
- adapter->msix_entries = kcalloc(v_budget,
- sizeof(struct msix_entry), GFP_KERNEL);
- if (!adapter->msix_entries)
- return -ENOMEM;
+ struct ixgbevf_q_vector *q_vector = adapter->q_vector[v_idx];
+ struct ixgbevf_ring *ring;
- for (vector = 0; vector < v_budget; vector++)
- adapter->msix_entries[vector].entry = vector;
+ ixgbevf_for_each_ring(ring, q_vector->tx)
+ adapter->tx_ring[ring->queue_index] = NULL;
- err = ixgbevf_acquire_msix_vectors(adapter, v_budget);
- if (err)
- return err;
+ ixgbevf_for_each_ring(ring, q_vector->rx)
+ adapter->rx_ring[ring->queue_index] = NULL;
- err = netif_set_real_num_tx_queues(netdev, adapter->num_tx_queues);
- if (err)
- return err;
+ adapter->q_vector[v_idx] = NULL;
+ netif_napi_del(&q_vector->napi);
- return netif_set_real_num_rx_queues(netdev, adapter->num_rx_queues);
+ /* ixgbevf_get_stats() might access the rings on this vector,
+ * we must wait a grace period before freeing it.
+ */
+ kfree_rcu(q_vector, rcu);
}
/**
**/
static int ixgbevf_alloc_q_vectors(struct ixgbevf_adapter *adapter)
{
- int q_idx, num_q_vectors;
- struct ixgbevf_q_vector *q_vector;
+ int q_vectors = adapter->num_msix_vectors - NON_Q_VECTORS;
+ int rxr_remaining = adapter->num_rx_queues;
+ int txr_remaining = adapter->num_tx_queues;
+ int rxr_idx = 0, txr_idx = 0, v_idx = 0;
+ int err;
+
+ if (q_vectors >= (rxr_remaining + txr_remaining)) {
+ for (; rxr_remaining; v_idx++, q_vectors--) {
+ int rqpv = DIV_ROUND_UP(rxr_remaining, q_vectors);
+
+ err = ixgbevf_alloc_q_vector(adapter, v_idx,
+ 0, 0, rqpv, rxr_idx);
+ if (err)
+ goto err_out;
- num_q_vectors = adapter->num_msix_vectors - NON_Q_VECTORS;
+ /* update counts and index */
+ rxr_remaining -= rqpv;
+ rxr_idx += rqpv;
+ }
+ }
+
+ for (; q_vectors; v_idx++, q_vectors--) {
+ int rqpv = DIV_ROUND_UP(rxr_remaining, q_vectors);
+ int tqpv = DIV_ROUND_UP(txr_remaining, q_vectors);
- for (q_idx = 0; q_idx < num_q_vectors; q_idx++) {
- q_vector = kzalloc(sizeof(struct ixgbevf_q_vector), GFP_KERNEL);
- if (!q_vector)
+ err = ixgbevf_alloc_q_vector(adapter, v_idx,
+ tqpv, txr_idx,
+ rqpv, rxr_idx);
+
+ if (err)
goto err_out;
- q_vector->adapter = adapter;
- q_vector->v_idx = q_idx;
- netif_napi_add(adapter->netdev, &q_vector->napi,
- ixgbevf_poll, 64);
- adapter->q_vector[q_idx] = q_vector;
+
+ /* update counts and index */
+ rxr_remaining -= rqpv;
+ rxr_idx += rqpv;
+ txr_remaining -= tqpv;
+ txr_idx += tqpv;
}
return 0;
err_out:
- while (q_idx) {
- q_idx--;
- q_vector = adapter->q_vector[q_idx];
-#ifdef CONFIG_NET_RX_BUSY_POLL
- napi_hash_del(&q_vector->napi);
-#endif
- netif_napi_del(&q_vector->napi);
- kfree(q_vector);
- adapter->q_vector[q_idx] = NULL;
+ while (v_idx) {
+ v_idx--;
+ ixgbevf_free_q_vector(adapter, v_idx);
}
+
return -ENOMEM;
}
**/
static void ixgbevf_free_q_vectors(struct ixgbevf_adapter *adapter)
{
- int q_idx, num_q_vectors = adapter->num_msix_vectors - NON_Q_VECTORS;
-
- for (q_idx = 0; q_idx < num_q_vectors; q_idx++) {
- struct ixgbevf_q_vector *q_vector = adapter->q_vector[q_idx];
+ int q_vectors = adapter->num_msix_vectors - NON_Q_VECTORS;
- adapter->q_vector[q_idx] = NULL;
-#ifdef CONFIG_NET_RX_BUSY_POLL
- napi_hash_del(&q_vector->napi);
-#endif
- netif_napi_del(&q_vector->napi);
- kfree(q_vector);
+ while (q_vectors) {
+ q_vectors--;
+ ixgbevf_free_q_vector(adapter, q_vectors);
}
}
goto err_alloc_q_vectors;
}
- err = ixgbevf_alloc_queues(adapter);
- if (err) {
- pr_err("Unable to allocate memory for queues\n");
- goto err_alloc_queues;
- }
-
hw_dbg(&adapter->hw, "Multiqueue %s: Rx Queue count = %u, Tx Queue count = %u\n",
(adapter->num_rx_queues > 1) ? "Enabled" :
"Disabled", adapter->num_rx_queues, adapter->num_tx_queues);
set_bit(__IXGBEVF_DOWN, &adapter->state);
return 0;
-err_alloc_queues:
- ixgbevf_free_q_vectors(adapter);
err_alloc_q_vectors:
ixgbevf_reset_interrupt_capability(adapter);
err_set_interrupt:
**/
static void ixgbevf_clear_interrupt_scheme(struct ixgbevf_adapter *adapter)
{
- int i;
-
- for (i = 0; i < adapter->num_tx_queues; i++) {
- kfree(adapter->tx_ring[i]);
- adapter->tx_ring[i] = NULL;
- }
- for (i = 0; i < adapter->num_rx_queues; i++) {
- kfree(adapter->rx_ring[i]);
- adapter->rx_ring[i] = NULL;
- }
-
adapter->num_tx_queues = 0;
adapter->num_rx_queues = 0;
if (!err)
continue;
hw_dbg(&adapter->hw, "Allocation for Tx Queue %u failed\n", i);
- break;
+ goto err_setup_tx;
}
+ return 0;
+err_setup_tx:
+ /* rewind the index freeing the rings as we go */
+ while (i--)
+ ixgbevf_free_tx_resources(adapter->tx_ring[i]);
return err;
}
if (!err)
continue;
hw_dbg(&adapter->hw, "Allocation for Rx Queue %u failed\n", i);
- break;
+ goto err_setup_rx;
}
+
+ return 0;
+err_setup_rx:
+ /* rewind the index freeing the rings as we go */
+ while (i--)
+ ixgbevf_free_rx_resources(adapter->rx_ring[i]);
return err;
}
ixgbevf_configure(adapter);
- /* Map the Tx/Rx rings to the vectors we were allotted.
- * if request_irq will be called in this function map_rings
- * must be called *before* up_complete
- */
- ixgbevf_map_rings_to_vectors(adapter);
-
err = ixgbevf_request_irq(adapter);
if (err)
goto err_req_irq;
+ /* Notify the stack of the actual queue counts. */
+ err = netif_set_real_num_tx_queues(netdev, adapter->num_tx_queues);
+ if (err)
+ goto err_set_queues;
+
+ err = netif_set_real_num_rx_queues(netdev, adapter->num_rx_queues);
+ if (err)
+ goto err_set_queues;
+
ixgbevf_up_complete(adapter);
return 0;
+err_set_queues:
+ ixgbevf_free_irq(adapter);
err_req_irq:
- ixgbevf_down(adapter);
-err_setup_rx:
ixgbevf_free_all_rx_resources(adapter);
-err_setup_tx:
+err_setup_rx:
ixgbevf_free_all_tx_resources(adapter);
+err_setup_tx:
ixgbevf_reset(adapter);
-
err_setup_reset:
return err;
return __ixgbevf_maybe_stop_tx(tx_ring, size);
}
-static int ixgbevf_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
+static int ixgbevf_xmit_frame_ring(struct sk_buff *skb,
+ struct ixgbevf_ring *tx_ring)
{
- struct ixgbevf_adapter *adapter = netdev_priv(netdev);
struct ixgbevf_tx_buffer *first;
- struct ixgbevf_ring *tx_ring;
int tso;
u32 tx_flags = 0;
u16 count = TXD_USE_COUNT(skb_headlen(skb));
return NETDEV_TX_OK;
}
- tx_ring = adapter->tx_ring[skb->queue_mapping];
-
/* need: 1 descriptor per page * PAGE_SIZE/IXGBE_MAX_DATA_PER_TXD,
* + 1 desc for skb_headlen/IXGBE_MAX_DATA_PER_TXD,
* + 2 desc gap to keep tail from touching head,
return NETDEV_TX_OK;
}
+static int ixgbevf_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
+{
+ struct ixgbevf_adapter *adapter = netdev_priv(netdev);
+ struct ixgbevf_ring *tx_ring;
+
+ if (skb->len <= 0) {
+ dev_kfree_skb_any(skb);
+ return NETDEV_TX_OK;
+ }
+
+ /* The minimum packet size for olinfo paylen is 17 so pad the skb
+ * in order to meet this minimum size requirement.
+ */
+ if (skb->len < 17) {
+ if (skb_padto(skb, 17))
+ return NETDEV_TX_OK;
+ skb->len = 17;
+ }
+
+ tx_ring = adapter->tx_ring[skb->queue_mapping];
+ return ixgbevf_xmit_frame_ring(skb, tx_ring);
+}
+
/**
* ixgbevf_set_mac - Change the Ethernet Address of the NIC
* @netdev: network interface device structure
/* must set new MTU before calling down or up */
netdev->mtu = new_mtu;
+ if (netif_running(netdev))
+ ixgbevf_reinit_locked(adapter);
+
return 0;
}
rtnl_lock();
err = ixgbevf_init_interrupt_scheme(adapter);
+ if (!err && netif_running(netdev))
+ err = ixgbevf_open(netdev);
rtnl_unlock();
- if (err) {
- dev_err(&pdev->dev, "Cannot initialize interrupts\n");
+ if (err)
return err;
- }
-
- if (netif_running(netdev)) {
- err = ixgbevf_open(netdev);
- if (err)
- return err;
- }
netif_device_attach(netdev);
stats->multicast = adapter->stats.vfmprc - adapter->stats.base_vfmprc;
+ rcu_read_lock();
for (i = 0; i < adapter->num_rx_queues; i++) {
ring = adapter->rx_ring[i];
do {
stats->tx_bytes += bytes;
stats->tx_packets += packets;
}
+ rcu_read_unlock();
}
#define IXGBEVF_MAX_MAC_HDR_LEN 127
#define MVPP2_RXQ_PACKET_OFFSET_MASK 0x70000000
#define MVPP2_RXQ_DISABLE_MASK BIT(31)
+/* Top Registers */
+#define MVPP2_MH_REG(port) (0x5040 + 4 * (port))
+#define MVPP2_DSA_EXTENDED BIT(5)
+
/* Parser Registers */
#define MVPP2_PRS_INIT_LOOKUP_REG 0x1000
#define MVPP2_PRS_PORT_LU_MAX 0xf
#define MVPP2_ETH_TYPE_LEN 2
#define MVPP2_PPPOE_HDR_SIZE 8
#define MVPP2_VLAN_TAG_LEN 4
+#define MVPP2_VLAN_TAG_EDSA_LEN 8
/* Lbtd 802.3 type */
#define MVPP2_IP_LBDT_TYPE 0xfffa
#define MVPP2_PRS_TCAM_LU_BYTE 20
#define MVPP2_PRS_TCAM_EN_OFFS(offs) ((offs) + 2)
#define MVPP2_PRS_TCAM_INV_WORD 5
+
+#define MVPP2_PRS_VID_TCAM_BYTE 2
+
+/* There is a TCAM range reserved for VLAN filtering entries, range size is 33
+ * 10 VLAN ID filter entries per port
+ * 1 default VLAN filter entry per port
+ * It is assumed that there are 3 ports for filter, not including loopback port
+ */
+#define MVPP2_PRS_VLAN_FILT_MAX 11
+#define MVPP2_PRS_VLAN_FILT_RANGE_SIZE 33
+
+#define MVPP2_PRS_VLAN_FILT_MAX_ENTRY (MVPP2_PRS_VLAN_FILT_MAX - 2)
+#define MVPP2_PRS_VLAN_FILT_DFLT_ENTRY (MVPP2_PRS_VLAN_FILT_MAX - 1)
+
/* Tcam entries ID */
#define MVPP2_PE_DROP_ALL 0
#define MVPP2_PE_FIRST_FREE_TID 1
-#define MVPP2_PE_LAST_FREE_TID (MVPP2_PRS_TCAM_SRAM_SIZE - 31)
+
+/* VLAN filtering range */
+#define MVPP2_PE_VID_FILT_RANGE_END (MVPP2_PRS_TCAM_SRAM_SIZE - 31)
+#define MVPP2_PE_VID_FILT_RANGE_START (MVPP2_PE_VID_FILT_RANGE_END - \
+ MVPP2_PRS_VLAN_FILT_RANGE_SIZE + 1)
+#define MVPP2_PE_LAST_FREE_TID (MVPP2_PE_VID_FILT_RANGE_START - 1)
#define MVPP2_PE_IP6_EXT_PROTO_UN (MVPP2_PRS_TCAM_SRAM_SIZE - 30)
#define MVPP2_PE_MAC_MC_IP6 (MVPP2_PRS_TCAM_SRAM_SIZE - 29)
#define MVPP2_PE_IP6_ADDR_UN (MVPP2_PRS_TCAM_SRAM_SIZE - 28)
#define MVPP2_PE_IP4_ADDR_UN (MVPP2_PRS_TCAM_SRAM_SIZE - 27)
#define MVPP2_PE_LAST_DEFAULT_FLOW (MVPP2_PRS_TCAM_SRAM_SIZE - 26)
-#define MVPP2_PE_FIRST_DEFAULT_FLOW (MVPP2_PRS_TCAM_SRAM_SIZE - 19)
-#define MVPP2_PE_EDSA_TAGGED (MVPP2_PRS_TCAM_SRAM_SIZE - 18)
-#define MVPP2_PE_EDSA_UNTAGGED (MVPP2_PRS_TCAM_SRAM_SIZE - 17)
-#define MVPP2_PE_DSA_TAGGED (MVPP2_PRS_TCAM_SRAM_SIZE - 16)
-#define MVPP2_PE_DSA_UNTAGGED (MVPP2_PRS_TCAM_SRAM_SIZE - 15)
-#define MVPP2_PE_ETYPE_EDSA_TAGGED (MVPP2_PRS_TCAM_SRAM_SIZE - 14)
-#define MVPP2_PE_ETYPE_EDSA_UNTAGGED (MVPP2_PRS_TCAM_SRAM_SIZE - 13)
-#define MVPP2_PE_ETYPE_DSA_TAGGED (MVPP2_PRS_TCAM_SRAM_SIZE - 12)
-#define MVPP2_PE_ETYPE_DSA_UNTAGGED (MVPP2_PRS_TCAM_SRAM_SIZE - 11)
-#define MVPP2_PE_MH_DEFAULT (MVPP2_PRS_TCAM_SRAM_SIZE - 10)
-#define MVPP2_PE_DSA_DEFAULT (MVPP2_PRS_TCAM_SRAM_SIZE - 9)
-#define MVPP2_PE_IP6_PROTO_UN (MVPP2_PRS_TCAM_SRAM_SIZE - 8)
-#define MVPP2_PE_IP4_PROTO_UN (MVPP2_PRS_TCAM_SRAM_SIZE - 7)
-#define MVPP2_PE_ETH_TYPE_UN (MVPP2_PRS_TCAM_SRAM_SIZE - 6)
+#define MVPP2_PE_FIRST_DEFAULT_FLOW (MVPP2_PRS_TCAM_SRAM_SIZE - 21)
+#define MVPP2_PE_EDSA_TAGGED (MVPP2_PRS_TCAM_SRAM_SIZE - 20)
+#define MVPP2_PE_EDSA_UNTAGGED (MVPP2_PRS_TCAM_SRAM_SIZE - 19)
+#define MVPP2_PE_DSA_TAGGED (MVPP2_PRS_TCAM_SRAM_SIZE - 18)
+#define MVPP2_PE_DSA_UNTAGGED (MVPP2_PRS_TCAM_SRAM_SIZE - 17)
+#define MVPP2_PE_ETYPE_EDSA_TAGGED (MVPP2_PRS_TCAM_SRAM_SIZE - 16)
+#define MVPP2_PE_ETYPE_EDSA_UNTAGGED (MVPP2_PRS_TCAM_SRAM_SIZE - 15)
+#define MVPP2_PE_ETYPE_DSA_TAGGED (MVPP2_PRS_TCAM_SRAM_SIZE - 14)
+#define MVPP2_PE_ETYPE_DSA_UNTAGGED (MVPP2_PRS_TCAM_SRAM_SIZE - 13)
+#define MVPP2_PE_MH_DEFAULT (MVPP2_PRS_TCAM_SRAM_SIZE - 12)
+#define MVPP2_PE_DSA_DEFAULT (MVPP2_PRS_TCAM_SRAM_SIZE - 11)
+#define MVPP2_PE_IP6_PROTO_UN (MVPP2_PRS_TCAM_SRAM_SIZE - 10)
+#define MVPP2_PE_IP4_PROTO_UN (MVPP2_PRS_TCAM_SRAM_SIZE - 9)
+#define MVPP2_PE_ETH_TYPE_UN (MVPP2_PRS_TCAM_SRAM_SIZE - 8)
+#define MVPP2_PE_VID_FLTR_DEFAULT (MVPP2_PRS_TCAM_SRAM_SIZE - 7)
+#define MVPP2_PE_VID_EDSA_FLTR_DEFAULT (MVPP2_PRS_TCAM_SRAM_SIZE - 6)
#define MVPP2_PE_VLAN_DBL (MVPP2_PRS_TCAM_SRAM_SIZE - 5)
#define MVPP2_PE_VLAN_NONE (MVPP2_PRS_TCAM_SRAM_SIZE - 4)
#define MVPP2_PE_MAC_MC_ALL (MVPP2_PRS_TCAM_SRAM_SIZE - 3)
#define MVPP2_PE_MAC_PROMISCUOUS (MVPP2_PRS_TCAM_SRAM_SIZE - 2)
#define MVPP2_PE_MAC_NON_PROMISCUOUS (MVPP2_PRS_TCAM_SRAM_SIZE - 1)
+#define MVPP2_PRS_VID_PORT_FIRST(port) (MVPP2_PE_VID_FILT_RANGE_START + \
+ ((port) * MVPP2_PRS_VLAN_FILT_MAX))
+#define MVPP2_PRS_VID_PORT_LAST(port) (MVPP2_PRS_VID_PORT_FIRST(port) \
+ + MVPP2_PRS_VLAN_FILT_MAX_ENTRY)
+/* Index of default vid filter for given port */
+#define MVPP2_PRS_VID_PORT_DFLT(port) (MVPP2_PRS_VID_PORT_FIRST(port) \
+ + MVPP2_PRS_VLAN_FILT_DFLT_ENTRY)
+
/* Sram structure
* The fields are represented by MVPP2_PRS_TCAM_DATA_REG(3)->(0).
*/
#define MVPP2_PRS_IPV6_EXT_AH_L4_AI_BIT BIT(4)
#define MVPP2_PRS_SINGLE_VLAN_AI 0
#define MVPP2_PRS_DBL_VLAN_AI_BIT BIT(7)
+#define MVPP2_PRS_EDSA_VID_AI_BIT BIT(0)
/* DSA/EDSA type */
#define MVPP2_PRS_TAGGED true
MVPP2_PRS_LU_MAC,
MVPP2_PRS_LU_DSA,
MVPP2_PRS_LU_VLAN,
+ MVPP2_PRS_LU_VID,
MVPP2_PRS_LU_L2,
MVPP2_PRS_LU_PPPOE,
MVPP2_PRS_LU_IP4,
mvpp2_prs_tcam_data_byte_set(pe, offset + 1, ethertype & 0xff, 0xff);
}
+/* Set vid in tcam sw entry */
+static void mvpp2_prs_match_vid(struct mvpp2_prs_entry *pe, int offset,
+ unsigned short vid)
+{
+ mvpp2_prs_tcam_data_byte_set(pe, offset + 0, (vid & 0xf00) >> 8, 0xf);
+ mvpp2_prs_tcam_data_byte_set(pe, offset + 1, vid & 0xff, 0xff);
+}
+
/* Set bits in sram sw entry */
static void mvpp2_prs_sram_bits_set(struct mvpp2_prs_entry *pe, int bit_num,
int val)
mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_DSA);
pe.index = tid;
- /* Shift 4 bytes if DSA tag or 8 bytes in case of EDSA tag*/
- mvpp2_prs_sram_shift_set(&pe, shift,
- MVPP2_PRS_SRAM_OP_SEL_SHIFT_ADD);
-
/* Update shadow table */
mvpp2_prs_shadow_set(priv, pe.index, MVPP2_PRS_LU_DSA);
if (tagged) {
/* Set tagged bit in DSA tag */
mvpp2_prs_tcam_data_byte_set(&pe, 0,
- MVPP2_PRS_TCAM_DSA_TAGGED_BIT,
- MVPP2_PRS_TCAM_DSA_TAGGED_BIT);
- /* Clear all ai bits for next iteration */
- mvpp2_prs_sram_ai_update(&pe, 0,
- MVPP2_PRS_SRAM_AI_MASK);
- /* If packet is tagged continue check vlans */
- mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_VLAN);
+ MVPP2_PRS_TCAM_DSA_TAGGED_BIT,
+ MVPP2_PRS_TCAM_DSA_TAGGED_BIT);
+
+ /* Set ai bits for next iteration */
+ if (extend)
+ mvpp2_prs_sram_ai_update(&pe, 1,
+ MVPP2_PRS_SRAM_AI_MASK);
+ else
+ mvpp2_prs_sram_ai_update(&pe, 0,
+ MVPP2_PRS_SRAM_AI_MASK);
+
+ /* If packet is tagged continue check vid filtering */
+ mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_VID);
} else {
+ /* Shift 4 bytes for DSA tag or 8 bytes for EDSA tag*/
+ mvpp2_prs_sram_shift_set(&pe, shift,
+ MVPP2_PRS_SRAM_OP_SEL_SHIFT_ADD);
+
/* Set result info bits to 'no vlans' */
mvpp2_prs_sram_ri_update(&pe, MVPP2_PRS_RI_VLAN_NONE,
MVPP2_PRS_RI_VLAN_MASK);
mvpp2_prs_match_etype(pe, 0, tpid);
- mvpp2_prs_sram_next_lu_set(pe, MVPP2_PRS_LU_L2);
- /* Shift 4 bytes - skip 1 vlan tag */
- mvpp2_prs_sram_shift_set(pe, MVPP2_VLAN_TAG_LEN,
- MVPP2_PRS_SRAM_OP_SEL_SHIFT_ADD);
+ /* VLAN tag detected, proceed with VID filtering */
+ mvpp2_prs_sram_next_lu_set(pe, MVPP2_PRS_LU_VID);
+
/* Clear all ai bits for next iteration */
mvpp2_prs_sram_ai_update(pe, 0, MVPP2_PRS_SRAM_AI_MASK);
mvpp2_prs_match_etype(pe, 4, tpid2);
mvpp2_prs_sram_next_lu_set(pe, MVPP2_PRS_LU_VLAN);
- /* Shift 8 bytes - skip 2 vlan tags */
- mvpp2_prs_sram_shift_set(pe, 2 * MVPP2_VLAN_TAG_LEN,
+ /* Shift 4 bytes - skip outer vlan tag */
+ mvpp2_prs_sram_shift_set(pe, MVPP2_VLAN_TAG_LEN,
MVPP2_PRS_SRAM_OP_SEL_SHIFT_ADD);
mvpp2_prs_sram_ri_update(pe, MVPP2_PRS_RI_VLAN_DOUBLE,
MVPP2_PRS_RI_VLAN_MASK);
mvpp2_prs_hw_write(priv, &pe);
}
+/* Initialize parser entries for VID filtering */
+static void mvpp2_prs_vid_init(struct mvpp2 *priv)
+{
+ struct mvpp2_prs_entry pe;
+
+ memset(&pe, 0, sizeof(pe));
+
+ /* Set default vid entry */
+ pe.index = MVPP2_PE_VID_FLTR_DEFAULT;
+ mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_VID);
+
+ mvpp2_prs_tcam_ai_update(&pe, 0, MVPP2_PRS_EDSA_VID_AI_BIT);
+
+ /* Skip VLAN header - Set offset to 4 bytes */
+ mvpp2_prs_sram_shift_set(&pe, MVPP2_VLAN_TAG_LEN,
+ MVPP2_PRS_SRAM_OP_SEL_SHIFT_ADD);
+
+ /* Clear all ai bits for next iteration */
+ mvpp2_prs_sram_ai_update(&pe, 0, MVPP2_PRS_SRAM_AI_MASK);
+
+ mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_L2);
+
+ /* Unmask all ports */
+ mvpp2_prs_tcam_port_map_set(&pe, MVPP2_PRS_PORT_MASK);
+
+ /* Update shadow table and hw entry */
+ mvpp2_prs_shadow_set(priv, pe.index, MVPP2_PRS_LU_VID);
+ mvpp2_prs_hw_write(priv, &pe);
+
+ /* Set default vid entry for extended DSA*/
+ memset(&pe, 0, sizeof(pe));
+
+ /* Set default vid entry */
+ pe.index = MVPP2_PE_VID_EDSA_FLTR_DEFAULT;
+ mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_VID);
+
+ mvpp2_prs_tcam_ai_update(&pe, MVPP2_PRS_EDSA_VID_AI_BIT,
+ MVPP2_PRS_EDSA_VID_AI_BIT);
+
+ /* Skip VLAN header - Set offset to 8 bytes */
+ mvpp2_prs_sram_shift_set(&pe, MVPP2_VLAN_TAG_EDSA_LEN,
+ MVPP2_PRS_SRAM_OP_SEL_SHIFT_ADD);
+
+ /* Clear all ai bits for next iteration */
+ mvpp2_prs_sram_ai_update(&pe, 0, MVPP2_PRS_SRAM_AI_MASK);
+
+ mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_L2);
+
+ /* Unmask all ports */
+ mvpp2_prs_tcam_port_map_set(&pe, MVPP2_PRS_PORT_MASK);
+
+ /* Update shadow table and hw entry */
+ mvpp2_prs_shadow_set(priv, pe.index, MVPP2_PRS_LU_VID);
+ mvpp2_prs_hw_write(priv, &pe);
+}
+
/* Match basic ethertypes */
static int mvpp2_prs_etype_init(struct mvpp2 *priv)
{
mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_VLAN);
pe.index = MVPP2_PE_VLAN_DBL;
- mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_L2);
+ mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_VID);
+
/* Clear ai for next iterations */
mvpp2_prs_sram_ai_update(&pe, 0, MVPP2_PRS_SRAM_AI_MASK);
mvpp2_prs_sram_ri_update(&pe, MVPP2_PRS_RI_VLAN_DOUBLE,
return 0;
}
+/* Find tcam entry with matched pair <vid,port> */
+static int mvpp2_prs_vid_range_find(struct mvpp2 *priv, int pmap, u16 vid,
+ u16 mask)
+{
+ unsigned char byte[2], enable[2];
+ struct mvpp2_prs_entry pe;
+ u16 rvid, rmask;
+ int tid;
+
+ /* Go through the all entries with MVPP2_PRS_LU_VID */
+ for (tid = MVPP2_PE_VID_FILT_RANGE_START;
+ tid <= MVPP2_PE_VID_FILT_RANGE_END; tid++) {
+ if (!priv->prs_shadow[tid].valid ||
+ priv->prs_shadow[tid].lu != MVPP2_PRS_LU_VID)
+ continue;
+
+ pe.index = tid;
+
+ mvpp2_prs_hw_read(priv, &pe);
+ mvpp2_prs_tcam_data_byte_get(&pe, 2, &byte[0], &enable[0]);
+ mvpp2_prs_tcam_data_byte_get(&pe, 3, &byte[1], &enable[1]);
+
+ rvid = ((byte[0] & 0xf) << 8) + byte[1];
+ rmask = ((enable[0] & 0xf) << 8) + enable[1];
+
+ if (rvid != vid || rmask != mask)
+ continue;
+
+ return tid;
+ }
+
+ return 0;
+}
+
+/* Write parser entry for VID filtering */
+static int mvpp2_prs_vid_entry_add(struct mvpp2_port *port, u16 vid)
+{
+ unsigned int vid_start = MVPP2_PE_VID_FILT_RANGE_START +
+ port->id * MVPP2_PRS_VLAN_FILT_MAX;
+ unsigned int mask = 0xfff, reg_val, shift;
+ struct mvpp2 *priv = port->priv;
+ struct mvpp2_prs_entry pe;
+ int tid;
+
+ /* Scan TCAM and see if entry with this <vid,port> already exist */
+ tid = mvpp2_prs_vid_range_find(priv, (1 << port->id), vid, mask);
+
+ reg_val = mvpp2_read(priv, MVPP2_MH_REG(port->id));
+ if (reg_val & MVPP2_DSA_EXTENDED)
+ shift = MVPP2_VLAN_TAG_EDSA_LEN;
+ else
+ shift = MVPP2_VLAN_TAG_LEN;
+
+ /* No such entry */
+ if (!tid) {
+ memset(&pe, 0, sizeof(pe));
+
+ /* Go through all entries from first to last in vlan range */
+ tid = mvpp2_prs_tcam_first_free(priv, vid_start,
+ vid_start +
+ MVPP2_PRS_VLAN_FILT_MAX_ENTRY);
+
+ /* There isn't room for a new VID filter */
+ if (tid < 0)
+ return tid;
+
+ mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_VID);
+ pe.index = tid;
+
+ /* Mask all ports */
+ mvpp2_prs_tcam_port_map_set(&pe, 0);
+ } else {
+ mvpp2_prs_hw_read(priv, &pe);
+ }
+
+ /* Enable the current port */
+ mvpp2_prs_tcam_port_set(&pe, port->id, true);
+
+ /* Continue - set next lookup */
+ mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_L2);
+
+ /* Skip VLAN header - Set offset to 4 or 8 bytes */
+ mvpp2_prs_sram_shift_set(&pe, shift, MVPP2_PRS_SRAM_OP_SEL_SHIFT_ADD);
+
+ /* Set match on VID */
+ mvpp2_prs_match_vid(&pe, MVPP2_PRS_VID_TCAM_BYTE, vid);
+
+ /* Clear all ai bits for next iteration */
+ mvpp2_prs_sram_ai_update(&pe, 0, MVPP2_PRS_SRAM_AI_MASK);
+
+ /* Update shadow table */
+ mvpp2_prs_shadow_set(priv, pe.index, MVPP2_PRS_LU_VID);
+ mvpp2_prs_hw_write(priv, &pe);
+
+ return 0;
+}
+
+/* Write parser entry for VID filtering */
+static void mvpp2_prs_vid_entry_remove(struct mvpp2_port *port, u16 vid)
+{
+ struct mvpp2 *priv = port->priv;
+ int tid;
+
+ /* Scan TCAM and see if entry with this <vid,port> already exist */
+ tid = mvpp2_prs_vid_range_find(priv, (1 << port->id), vid, 0xfff);
+
+ /* No such entry */
+ if (tid)
+ return;
+
+ mvpp2_prs_hw_inv(priv, tid);
+ priv->prs_shadow[tid].valid = false;
+}
+
+/* Remove all existing VID filters on this port */
+static void mvpp2_prs_vid_remove_all(struct mvpp2_port *port)
+{
+ struct mvpp2 *priv = port->priv;
+ int tid;
+
+ for (tid = MVPP2_PRS_VID_PORT_FIRST(port->id);
+ tid <= MVPP2_PRS_VID_PORT_LAST(port->id); tid++) {
+ if (priv->prs_shadow[tid].valid)
+ mvpp2_prs_vid_entry_remove(port, tid);
+ }
+}
+
+/* Remove VID filering entry for this port */
+static void mvpp2_prs_vid_disable_filtering(struct mvpp2_port *port)
+{
+ unsigned int tid = MVPP2_PRS_VID_PORT_DFLT(port->id);
+ struct mvpp2 *priv = port->priv;
+
+ /* Invalidate the guard entry */
+ mvpp2_prs_hw_inv(priv, tid);
+
+ priv->prs_shadow[tid].valid = false;
+}
+
+/* Add guard entry that drops packets when no VID is matched on this port */
+static void mvpp2_prs_vid_enable_filtering(struct mvpp2_port *port)
+{
+ unsigned int tid = MVPP2_PRS_VID_PORT_DFLT(port->id);
+ struct mvpp2 *priv = port->priv;
+ unsigned int reg_val, shift;
+ struct mvpp2_prs_entry pe;
+
+ if (priv->prs_shadow[tid].valid)
+ return;
+
+ memset(&pe, 0, sizeof(pe));
+
+ pe.index = tid;
+
+ reg_val = mvpp2_read(priv, MVPP2_MH_REG(port->id));
+ if (reg_val & MVPP2_DSA_EXTENDED)
+ shift = MVPP2_VLAN_TAG_EDSA_LEN;
+ else
+ shift = MVPP2_VLAN_TAG_LEN;
+
+ mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_VID);
+
+ /* Mask all ports */
+ mvpp2_prs_tcam_port_map_set(&pe, 0);
+
+ /* Update port mask */
+ mvpp2_prs_tcam_port_set(&pe, port->id, true);
+
+ /* Continue - set next lookup */
+ mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_L2);
+
+ /* Skip VLAN header - Set offset to 4 or 8 bytes */
+ mvpp2_prs_sram_shift_set(&pe, shift, MVPP2_PRS_SRAM_OP_SEL_SHIFT_ADD);
+
+ /* Drop VLAN packets that don't belong to any VIDs on this port */
+ mvpp2_prs_sram_ri_update(&pe, MVPP2_PRS_RI_DROP_MASK,
+ MVPP2_PRS_RI_DROP_MASK);
+
+ /* Clear all ai bits for next iteration */
+ mvpp2_prs_sram_ai_update(&pe, 0, MVPP2_PRS_SRAM_AI_MASK);
+
+ /* Update shadow table */
+ mvpp2_prs_shadow_set(priv, pe.index, MVPP2_PRS_LU_VID);
+ mvpp2_prs_hw_write(priv, &pe);
+}
+
/* Parser default initialization */
static int mvpp2_prs_default_init(struct platform_device *pdev,
struct mvpp2 *priv)
mvpp2_prs_dsa_init(priv);
+ mvpp2_prs_vid_init(priv);
+
err = mvpp2_prs_etype_init(priv);
if (err)
return err;
int id = port->id;
bool allmulti = dev->flags & IFF_ALLMULTI;
+retry:
mvpp2_prs_mac_promisc_set(priv, id, dev->flags & IFF_PROMISC);
mvpp2_prs_mac_multi_set(priv, id, MVPP2_PE_MAC_MC_ALL, allmulti);
mvpp2_prs_mac_multi_set(priv, id, MVPP2_PE_MAC_MC_IP6, allmulti);
/* Remove all port->id's mcast enries */
mvpp2_prs_mcast_del_all(priv, id);
- if (allmulti && !netdev_mc_empty(dev)) {
- netdev_for_each_mc_addr(ha, dev)
- mvpp2_prs_mac_da_accept(priv, id, ha->addr, true);
+ if (!allmulti) {
+ netdev_for_each_mc_addr(ha, dev) {
+ if (mvpp2_prs_mac_da_accept(priv, id, ha->addr, true)) {
+ allmulti = true;
+ goto retry;
+ }
+ }
}
+
+ /* Disable VLAN filtering in promiscuous mode */
+ if (dev->flags & IFF_PROMISC)
+ mvpp2_prs_vid_disable_filtering(port);
+ else
+ mvpp2_prs_vid_enable_filtering(port);
}
static int mvpp2_set_mac_address(struct net_device *dev, void *p)
return ret;
}
+static int mvpp2_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid)
+{
+ struct mvpp2_port *port = netdev_priv(dev);
+ int ret;
+
+ ret = mvpp2_prs_vid_entry_add(port, vid);
+ if (ret)
+ netdev_err(dev, "rx-vlan-filter offloading cannot accept more than %d VIDs per port\n",
+ MVPP2_PRS_VLAN_FILT_MAX - 1);
+ return ret;
+}
+
+static int mvpp2_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, u16 vid)
+{
+ struct mvpp2_port *port = netdev_priv(dev);
+
+ mvpp2_prs_vid_entry_remove(port, vid);
+ return 0;
+}
+
+static int mvpp2_set_features(struct net_device *dev,
+ netdev_features_t features)
+{
+ netdev_features_t changed = dev->features ^ features;
+ struct mvpp2_port *port = netdev_priv(dev);
+
+ if (changed & NETIF_F_HW_VLAN_CTAG_FILTER) {
+ if (features & NETIF_F_HW_VLAN_CTAG_FILTER) {
+ mvpp2_prs_vid_enable_filtering(port);
+ } else {
+ /* Invalidate all registered VID filters for this
+ * port
+ */
+ mvpp2_prs_vid_remove_all(port);
+
+ mvpp2_prs_vid_disable_filtering(port);
+ }
+ }
+
+ return 0;
+}
+
/* Ethtool methods */
/* Set interrupt coalescing for ethtools */
.ndo_change_mtu = mvpp2_change_mtu,
.ndo_get_stats64 = mvpp2_get_stats64,
.ndo_do_ioctl = mvpp2_ioctl,
+ .ndo_vlan_rx_add_vid = mvpp2_vlan_rx_add_vid,
+ .ndo_vlan_rx_kill_vid = mvpp2_vlan_rx_kill_vid,
+ .ndo_set_features = mvpp2_set_features,
};
static const struct ethtool_ops mvpp2_eth_tool_ops = {
features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_TSO;
dev->features = features | NETIF_F_RXCSUM;
- dev->hw_features |= features | NETIF_F_RXCSUM | NETIF_F_GRO;
+ dev->hw_features |= features | NETIF_F_RXCSUM | NETIF_F_GRO |
+ NETIF_F_HW_VLAN_CTAG_FILTER;
dev->vlan_features |= features;
dev->gso_max_segs = MVPP2_MAX_TSO_SEGS;
"rx_xdp_drop",
"rx_xdp_tx",
"rx_xdp_tx_full",
+
+ /* phy statistics */
+ "rx_packets_phy", "rx_bytes_phy",
+ "tx_packets_phy", "tx_bytes_phy",
};
static const char mlx4_en_test_names[][ETH_GSTRING_LEN]= {
if (bitmap_iterator_test(&it))
data[index++] = ((unsigned long *)&priv->xdp_stats)[i];
+ for (i = 0; i < NUM_PHY_STATS; i++, bitmap_iterator_inc(&it))
+ if (bitmap_iterator_test(&it))
+ data[index++] = ((unsigned long *)&priv->phy_stats)[i];
+
for (i = 0; i < priv->tx_ring_num[TX]; i++) {
data[index++] = priv->tx_ring[TX][i]->packets;
data[index++] = priv->tx_ring[TX][i]->bytes;
strcpy(data + (index++) * ETH_GSTRING_LEN,
main_strings[strings]);
+ for (i = 0; i < NUM_PHY_STATS; i++, strings++,
+ bitmap_iterator_inc(&it))
+ if (bitmap_iterator_test(&it))
+ strcpy(data + (index++) * ETH_GSTRING_LEN,
+ main_strings[strings]);
+
for (i = 0; i < priv->tx_ring_num[TX]; i++) {
sprintf(data + (index++) * ETH_GSTRING_LEN,
"tx%d_packets", i);
bitmap_set(stats_bitmap->bitmap, last_i, NUM_XDP_STATS);
last_i += NUM_XDP_STATS;
+
+ if (!mlx4_is_slave(dev))
+ bitmap_set(stats_bitmap->bitmap, last_i, NUM_PHY_STATS);
+ last_i += NUM_PHY_STATS;
}
int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port,
mlx4_en_stop_port(dev, 1);
}
- en_warn(priv, "Changing device configuration rx filter(%x) rx vlan(%x)\n",
- ts_config.rx_filter,
- !!(features & NETIF_F_HW_VLAN_CTAG_RX));
-
mlx4_en_safe_replace_resources(priv, tmp);
if (DEV_FEATURE_CHANGED(dev, features, NETIF_F_HW_VLAN_CTAG_RX)) {
priv->port_stats.xmit_more += READ_ONCE(ring->xmit_more);
}
- if (mlx4_is_master(mdev->dev)) {
- stats->rx_packets = en_stats_adder(&mlx4_en_stats->RTOT_prio_0,
- &mlx4_en_stats->RTOT_prio_1,
- NUM_PRIORITIES);
- stats->tx_packets = en_stats_adder(&mlx4_en_stats->TTOT_prio_0,
- &mlx4_en_stats->TTOT_prio_1,
- NUM_PRIORITIES);
- stats->rx_bytes = en_stats_adder(&mlx4_en_stats->ROCT_prio_0,
- &mlx4_en_stats->ROCT_prio_1,
- NUM_PRIORITIES);
- stats->tx_bytes = en_stats_adder(&mlx4_en_stats->TOCT_prio_0,
- &mlx4_en_stats->TOCT_prio_1,
- NUM_PRIORITIES);
+ if (!mlx4_is_slave(mdev->dev)) {
+ struct mlx4_en_phy_stats *p_stats = &priv->phy_stats;
+
+ p_stats->rx_packets_phy =
+ en_stats_adder(&mlx4_en_stats->RTOT_prio_0,
+ &mlx4_en_stats->RTOT_prio_1,
+ NUM_PRIORITIES);
+ p_stats->tx_packets_phy =
+ en_stats_adder(&mlx4_en_stats->TTOT_prio_0,
+ &mlx4_en_stats->TTOT_prio_1,
+ NUM_PRIORITIES);
+ p_stats->rx_bytes_phy =
+ en_stats_adder(&mlx4_en_stats->ROCT_prio_0,
+ &mlx4_en_stats->ROCT_prio_1,
+ NUM_PRIORITIES);
+ p_stats->tx_bytes_phy =
+ en_stats_adder(&mlx4_en_stats->TOCT_prio_0,
+ &mlx4_en_stats->TOCT_prio_1,
+ NUM_PRIORITIES);
+ if (mlx4_is_master(mdev->dev)) {
+ stats->rx_packets = p_stats->rx_packets_phy;
+ stats->tx_packets = p_stats->tx_packets_phy;
+ stats->rx_bytes = p_stats->rx_bytes_phy;
+ stats->tx_bytes = p_stats->tx_bytes_phy;
+ }
}
/* net device stats */
return get_fixed_ipv4_csum(hw_checksum, skb, hdr);
}
+#if IS_ENABLED(CONFIG_IPV6)
+#define MLX4_CQE_STATUS_IP_ANY (MLX4_CQE_STATUS_IPV4 | MLX4_CQE_STATUS_IPV6)
+#else
+#define MLX4_CQE_STATUS_IP_ANY (MLX4_CQE_STATUS_IPV4)
+#endif
+
int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int budget)
{
struct mlx4_en_priv *priv = netdev_priv(dev);
int polled = 0;
int index;
- if (unlikely(!priv->port_up))
+ if (unlikely(!priv->port_up || budget <= 0))
return 0;
- if (unlikely(budget <= 0))
- return polled;
-
ring = priv->rx_ring[cq_ring];
/* Protect accesses to: ring->xdp_prog, priv->mac_hash list */
ring->csum_ok++;
} else {
if (!(priv->flags & MLX4_EN_FLAG_RX_CSUM_NON_TCP_UDP &&
- (cqe->status & cpu_to_be16(MLX4_CQE_STATUS_IPV4 |
-#if IS_ENABLED(CONFIG_IPV6)
- MLX4_CQE_STATUS_IPV6))))
-#else
- 0))))
-#endif
+ (cqe->status & cpu_to_be16(MLX4_CQE_STATUS_IP_ANY))))
goto csum_none;
if (check_csum(cqe, skb, va, dev->features))
goto csum_none;
struct mlx4_en_flow_stats_tx tx_flowstats;
struct mlx4_en_port_stats port_stats;
struct mlx4_en_xdp_stats xdp_stats;
+ struct mlx4_en_phy_stats phy_stats;
struct mlx4_en_stats_bitmap stats_bitmap;
struct list_head mc_list;
struct list_head curr_list;
#define NUM_XDP_STATS 3
};
+struct mlx4_en_phy_stats {
+ unsigned long rx_packets_phy;
+ unsigned long rx_bytes_phy;
+ unsigned long tx_packets_phy;
+ unsigned long tx_bytes_phy;
+#define NUM_PHY_STATS 4
+};
+
#define NUM_MAIN_STATS 21
#define MLX4_NUM_PRIORITIES 8
#define NUM_ALL_STATS (NUM_MAIN_STATS + NUM_PORT_STATS + NUM_PKT_STATS + \
NUM_FLOW_STATS + NUM_PERF_STATS + NUM_PF_STATS + \
- NUM_XDP_STATS)
+ NUM_XDP_STATS + NUM_PHY_STATS)
#define MLX4_FIND_NETDEV_STAT(n) (offsetof(struct net_device_stats, n) / \
sizeof(((struct net_device_stats *)0)->n))
}
int mlx5_buf_alloc_node(struct mlx5_core_dev *dev, int size,
- struct mlx5_buf *buf, int node)
+ struct mlx5_frag_buf *buf, int node)
{
dma_addr_t t;
buf->size = size;
buf->npages = 1;
buf->page_shift = (u8)get_order(size) + PAGE_SHIFT;
- buf->direct.buf = mlx5_dma_zalloc_coherent_node(dev, size,
- &t, node);
- if (!buf->direct.buf)
+
+ buf->frags = kzalloc(sizeof(*buf->frags), GFP_KERNEL);
+ if (!buf->frags)
return -ENOMEM;
- buf->direct.map = t;
+ buf->frags->buf = mlx5_dma_zalloc_coherent_node(dev, size,
+ &t, node);
+ if (!buf->frags->buf)
+ goto err_out;
+
+ buf->frags->map = t;
while (t & ((1 << buf->page_shift) - 1)) {
--buf->page_shift;
}
return 0;
+err_out:
+ kfree(buf->frags);
+ return -ENOMEM;
}
-int mlx5_buf_alloc(struct mlx5_core_dev *dev, int size, struct mlx5_buf *buf)
+int mlx5_buf_alloc(struct mlx5_core_dev *dev,
+ int size, struct mlx5_frag_buf *buf)
{
return mlx5_buf_alloc_node(dev, size, buf, dev->priv.numa_node);
}
-EXPORT_SYMBOL_GPL(mlx5_buf_alloc);
+EXPORT_SYMBOL(mlx5_buf_alloc);
-void mlx5_buf_free(struct mlx5_core_dev *dev, struct mlx5_buf *buf)
+void mlx5_buf_free(struct mlx5_core_dev *dev, struct mlx5_frag_buf *buf)
{
- dma_free_coherent(&dev->pdev->dev, buf->size, buf->direct.buf,
- buf->direct.map);
+ dma_free_coherent(&dev->pdev->dev, buf->size, buf->frags->buf,
+ buf->frags->map);
+
+ kfree(buf->frags);
}
EXPORT_SYMBOL_GPL(mlx5_buf_free);
err_out:
return -ENOMEM;
}
+EXPORT_SYMBOL_GPL(mlx5_frag_buf_alloc_node);
void mlx5_frag_buf_free(struct mlx5_core_dev *dev, struct mlx5_frag_buf *buf)
{
}
kfree(buf->frags);
}
+EXPORT_SYMBOL_GPL(mlx5_frag_buf_free);
static struct mlx5_db_pgdir *mlx5_alloc_db_pgdir(struct mlx5_core_dev *dev,
int node)
}
EXPORT_SYMBOL_GPL(mlx5_db_free);
-void mlx5_fill_page_array(struct mlx5_buf *buf, __be64 *pas)
+void mlx5_fill_page_array(struct mlx5_frag_buf *buf, __be64 *pas)
{
u64 addr;
int i;
for (i = 0; i < buf->npages; i++) {
- addr = buf->direct.map + (i << buf->page_shift);
+ addr = buf->frags->map + (i << buf->page_shift);
pas[i] = cpu_to_be64(addr);
}
tasklet_ctx.list) {
list_del_init(&mcq->tasklet_ctx.list);
mcq->tasklet_ctx.comp(mcq);
- if (refcount_dec_and_test(&mcq->refcount))
- complete(&mcq->free);
+ mlx5_cq_put(mcq);
if (time_after(jiffies, end))
break;
}
* still arrive.
*/
if (list_empty_careful(&cq->tasklet_ctx.list)) {
- refcount_inc(&cq->refcount);
+ mlx5_cq_hold(cq);
list_add_tail(&cq->tasklet_ctx.list, &tasklet_ctx->list);
}
spin_unlock_irqrestore(&tasklet_ctx->lock, flags);
}
-void mlx5_cq_completion(struct mlx5_core_dev *dev, u32 cqn)
-{
- struct mlx5_core_cq *cq;
- struct mlx5_cq_table *table = &dev->priv.cq_table;
-
- spin_lock(&table->lock);
- cq = radix_tree_lookup(&table->tree, cqn);
- if (likely(cq))
- refcount_inc(&cq->refcount);
- spin_unlock(&table->lock);
-
- if (!cq) {
- mlx5_core_warn(dev, "Completion event for bogus CQ 0x%x\n", cqn);
- return;
- }
-
- ++cq->arm_sn;
-
- cq->comp(cq);
-
- if (refcount_dec_and_test(&cq->refcount))
- complete(&cq->free);
-}
-
-void mlx5_cq_event(struct mlx5_core_dev *dev, u32 cqn, int event_type)
-{
- struct mlx5_cq_table *table = &dev->priv.cq_table;
- struct mlx5_core_cq *cq;
-
- spin_lock(&table->lock);
-
- cq = radix_tree_lookup(&table->tree, cqn);
- if (cq)
- refcount_inc(&cq->refcount);
-
- spin_unlock(&table->lock);
-
- if (!cq) {
- mlx5_core_warn(dev, "Async event for bogus CQ 0x%x\n", cqn);
- return;
- }
-
- cq->event(cq, event_type);
-
- if (refcount_dec_and_test(&cq->refcount))
- complete(&cq->free);
-}
-
int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
u32 *in, int inlen)
{
- struct mlx5_cq_table *table = &dev->priv.cq_table;
+ int eqn = MLX5_GET(cqc, MLX5_ADDR_OF(create_cq_in, in, cq_context), c_eqn);
+ u32 dout[MLX5_ST_SZ_DW(destroy_cq_out)];
u32 out[MLX5_ST_SZ_DW(create_cq_out)];
u32 din[MLX5_ST_SZ_DW(destroy_cq_in)];
- u32 dout[MLX5_ST_SZ_DW(destroy_cq_out)];
- int eqn = MLX5_GET(cqc, MLX5_ADDR_OF(create_cq_in, in, cq_context),
- c_eqn);
struct mlx5_eq *eq;
int err;
cq->cqn = MLX5_GET(create_cq_out, out, cqn);
cq->cons_index = 0;
cq->arm_sn = 0;
- refcount_set(&cq->refcount, 1);
+ cq->eq = eq;
+ refcount_set(&cq->refcount, 0);
+ mlx5_cq_hold(cq);
init_completion(&cq->free);
if (!cq->comp)
cq->comp = mlx5_add_cq_to_tasklet;
cq->tasklet_ctx.priv = &eq->tasklet_ctx;
INIT_LIST_HEAD(&cq->tasklet_ctx.list);
- spin_lock_irq(&table->lock);
- err = radix_tree_insert(&table->tree, cq->cqn, cq);
- spin_unlock_irq(&table->lock);
+ /* Add to comp EQ CQ tree to recv comp events */
+ err = mlx5_eq_add_cq(eq, cq);
if (err)
goto err_cmd;
+ /* Add to async EQ CQ tree to recv async events */
+ err = mlx5_eq_add_cq(&dev->priv.eq_table.async_eq, cq);
+ if (err)
+ goto err_cq_add;
+
cq->pid = current->pid;
err = mlx5_debug_cq_add(dev, cq);
if (err)
return 0;
+err_cq_add:
+ mlx5_eq_del_cq(eq, cq);
err_cmd:
memset(din, 0, sizeof(din));
memset(dout, 0, sizeof(dout));
int mlx5_core_destroy_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq)
{
- struct mlx5_cq_table *table = &dev->priv.cq_table;
u32 out[MLX5_ST_SZ_DW(destroy_cq_out)] = {0};
u32 in[MLX5_ST_SZ_DW(destroy_cq_in)] = {0};
- struct mlx5_core_cq *tmp;
int err;
- spin_lock_irq(&table->lock);
- tmp = radix_tree_delete(&table->tree, cq->cqn);
- spin_unlock_irq(&table->lock);
- if (!tmp) {
- mlx5_core_warn(dev, "cq 0x%x not found in tree\n", cq->cqn);
- return -EINVAL;
- }
- if (tmp != cq) {
- mlx5_core_warn(dev, "corruption on srqn 0x%x\n", cq->cqn);
- return -EINVAL;
- }
+ err = mlx5_eq_del_cq(&dev->priv.eq_table.async_eq, cq);
+ if (err)
+ return err;
+
+ err = mlx5_eq_del_cq(cq->eq, cq);
+ if (err)
+ return err;
MLX5_SET(destroy_cq_in, in, opcode, MLX5_CMD_OP_DESTROY_CQ);
MLX5_SET(destroy_cq_in, in, cqn, cq->cqn);
synchronize_irq(cq->irqn);
mlx5_debug_cq_remove(dev, cq);
- if (refcount_dec_and_test(&cq->refcount))
- complete(&cq->free);
+ mlx5_cq_put(cq);
wait_for_completion(&cq->free);
return 0;
return mlx5_core_modify_cq(dev, cq, in, sizeof(in));
}
EXPORT_SYMBOL(mlx5_core_modify_cq_moderation);
-
-int mlx5_init_cq_table(struct mlx5_core_dev *dev)
-{
- struct mlx5_cq_table *table = &dev->priv.cq_table;
- int err;
-
- memset(table, 0, sizeof(*table));
- spin_lock_init(&table->lock);
- INIT_RADIX_TREE(&table->tree, GFP_ATOMIC);
- err = mlx5_cq_debugfs_init(dev);
-
- return err;
-}
-
-void mlx5_cleanup_cq_table(struct mlx5_core_dev *dev)
-{
- mlx5_cq_debugfs_cleanup(dev);
-}
}
EXPORT_SYMBOL(mlx5_unregister_interface);
+void mlx5_reload_interface(struct mlx5_core_dev *mdev, int protocol)
+{
+ mutex_lock(&mlx5_intf_mutex);
+ mlx5_remove_dev_by_protocol(mdev, protocol);
+ mlx5_add_dev_by_protocol(mdev, protocol);
+ mutex_unlock(&mlx5_intf_mutex);
+}
+
void *mlx5_get_protocol_dev(struct mlx5_core_dev *mdev, int protocol)
{
struct mlx5_priv *priv = &mdev->priv;
"%pI4");
} else if (ethertype.v == ETH_P_IPV6) {
static const struct in6_addr full_ones = {
- .in6_u.u6_addr32 = {htonl(0xffffffff),
- htonl(0xffffffff),
- htonl(0xffffffff),
- htonl(0xffffffff)},
+ .in6_u.u6_addr32 = {__constant_htonl(0xffffffff),
+ __constant_htonl(0xffffffff),
+ __constant_htonl(0xffffffff),
+ __constant_htonl(0xffffffff)},
};
DECLARE_MASK_VAL(struct in6_addr, src_ipv6);
DECLARE_MASK_VAL(struct in6_addr, dst_ipv6);
param->wq.linear = 1;
}
-static void mlx5e_build_drop_rq_param(struct mlx5e_rq_param *param)
+static void mlx5e_build_drop_rq_param(struct mlx5_core_dev *mdev,
+ struct mlx5e_rq_param *param)
{
void *rqc = param->rqc;
void *wq = MLX5_ADDR_OF(rqc, rqc, wq);
MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_LINKED_LIST);
MLX5_SET(wq, wq, log_wq_stride, ilog2(sizeof(struct mlx5e_rx_wqe)));
+
+ param->wq.buf_numa_node = dev_to_node(&mdev->pdev->dev);
}
static void mlx5e_build_sq_param_common(struct mlx5e_priv *priv,
struct mlx5e_cq *cq,
struct mlx5e_cq_param *param)
{
+ param->wq.buf_numa_node = dev_to_node(&mdev->pdev->dev);
+ param->wq.db_numa_node = dev_to_node(&mdev->pdev->dev);
+
return mlx5e_alloc_cq_common(mdev, param, cq);
}
struct mlx5e_cq *cq = &drop_rq->cq;
int err;
- mlx5e_build_drop_rq_param(&rq_param);
+ mlx5e_build_drop_rq_param(mdev, &rq_param);
err = mlx5e_alloc_drop_cq(mdev, cq, &cq_param);
if (err)
}
#endif
-int mlx5e_setup_tc(struct net_device *dev, enum tc_setup_type type,
- void *type_data)
+static int mlx5e_setup_tc(struct net_device *dev, enum tc_setup_type type,
+ void *type_data)
{
switch (type) {
#ifdef CONFIG_MLX5_ESWITCH
kfree(ppriv); /* mlx5e_rep_priv */
}
+static void *mlx5e_vport_rep_get_proto_dev(struct mlx5_eswitch_rep *rep)
+{
+ struct mlx5e_rep_priv *rpriv;
+
+ rpriv = mlx5e_rep_to_rep_priv(rep);
+
+ return rpriv->netdev;
+}
+
static void mlx5e_rep_register_vf_vports(struct mlx5e_priv *priv)
{
struct mlx5_core_dev *mdev = priv->mdev;
rep_if.load = mlx5e_vport_rep_load;
rep_if.unload = mlx5e_vport_rep_unload;
+ rep_if.get_proto_dev = mlx5e_vport_rep_get_proto_dev;
mlx5_eswitch_register_vport_rep(esw, vport, &rep_if, REP_ETH);
}
}
rep_if.load = mlx5e_nic_rep_load;
rep_if.unload = mlx5e_nic_rep_unload;
+ rep_if.get_proto_dev = mlx5e_vport_rep_get_proto_dev;
rep_if.priv = rpriv;
INIT_LIST_HEAD(&rpriv->vport_sqs_list);
mlx5_eswitch_register_vport_rep(esw, 0, &rep_if, REP_ETH); /* UPLINK PF vport*/
#include <linux/tcp.h>
#include <linux/bpf_trace.h>
#include <net/busy_poll.h>
+#include <net/ip6_checksum.h>
#include "en.h"
#include "en_tc.h"
#include "eswitch.h"
static inline void mlx5e_read_cqe_slot(struct mlx5e_cq *cq, u32 cqcc,
void *data)
{
- u32 ci = cqcc & cq->wq.sz_m1;
+ u32 ci = cqcc & cq->wq.fbc.sz_m1;
memcpy(data, mlx5_cqwq_get_wqe(&cq->wq, ci), sizeof(struct mlx5_cqe64));
}
static inline void mlx5e_cqes_update_owner(struct mlx5e_cq *cq, u32 cqcc, int n)
{
- u8 op_own = (cqcc >> cq->wq.log_sz) & 1;
- u32 wq_sz = 1 << cq->wq.log_sz;
- u32 ci = cqcc & cq->wq.sz_m1;
+ struct mlx5_frag_buf_ctrl *fbc = &cq->wq.fbc;
+ u8 op_own = (cqcc >> fbc->log_sz) & 1;
+ u32 wq_sz = 1 << fbc->log_sz;
+ u32 ci = cqcc & fbc->sz_m1;
u32 ci_top = min_t(u32, wq_sz, ci + n);
for (; ci < ci_top; ci++, n--) {
cq->title.byte_cnt = cq->mini_arr[cq->mini_arr_idx].byte_cnt;
cq->title.check_sum = cq->mini_arr[cq->mini_arr_idx].checksum;
cq->title.op_own &= 0xf0;
- cq->title.op_own |= 0x01 & (cqcc >> cq->wq.log_sz);
+ cq->title.op_own |= 0x01 & (cqcc >> cq->wq.fbc.log_sz);
cq->title.wqe_counter = cpu_to_be16(cq->decmprs_wqe_counter);
if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ)
return true;
}
+static void mlx5e_lro_update_tcp_hdr(struct mlx5_cqe64 *cqe, struct tcphdr *tcp)
+{
+ u8 l4_hdr_type = get_cqe_l4_hdr_type(cqe);
+ u8 tcp_ack = (l4_hdr_type == CQE_L4_HDR_TYPE_TCP_ACK_NO_DATA) ||
+ (l4_hdr_type == CQE_L4_HDR_TYPE_TCP_ACK_AND_DATA);
+
+ tcp->check = 0;
+ tcp->psh = get_cqe_lro_tcppsh(cqe);
+
+ if (tcp_ack) {
+ tcp->ack = 1;
+ tcp->ack_seq = cqe->lro_ack_seq_num;
+ tcp->window = cqe->lro_tcp_win;
+ }
+}
+
static void mlx5e_lro_update_hdr(struct sk_buff *skb, struct mlx5_cqe64 *cqe,
u32 cqe_bcnt)
{
struct ethhdr *eth = (struct ethhdr *)(skb->data);
struct tcphdr *tcp;
int network_depth = 0;
+ __wsum check;
__be16 proto;
u16 tot_len;
void *ip_p;
- u8 l4_hdr_type = get_cqe_l4_hdr_type(cqe);
- u8 tcp_ack = (l4_hdr_type == CQE_L4_HDR_TYPE_TCP_ACK_NO_DATA) ||
- (l4_hdr_type == CQE_L4_HDR_TYPE_TCP_ACK_AND_DATA);
-
proto = __vlan_get_protocol(skb, eth->h_proto, &network_depth);
tot_len = cqe_bcnt - network_depth;
ipv4->check = 0;
ipv4->check = ip_fast_csum((unsigned char *)ipv4,
ipv4->ihl);
+
+ mlx5e_lro_update_tcp_hdr(cqe, tcp);
+ check = csum_partial(tcp, tcp->doff * 4,
+ csum_unfold((__force __sum16)cqe->check_sum));
+ /* Almost done, don't forget the pseudo header */
+ tcp->check = csum_tcpudp_magic(ipv4->saddr, ipv4->daddr,
+ tot_len - sizeof(struct iphdr),
+ IPPROTO_TCP, check);
} else {
+ u16 payload_len = tot_len - sizeof(struct ipv6hdr);
struct ipv6hdr *ipv6 = ip_p;
tcp = ip_p + sizeof(struct ipv6hdr);
skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6;
ipv6->hop_limit = cqe->lro_min_ttl;
- ipv6->payload_len = cpu_to_be16(tot_len -
- sizeof(struct ipv6hdr));
- }
-
- tcp->psh = get_cqe_lro_tcppsh(cqe);
-
- if (tcp_ack) {
- tcp->ack = 1;
- tcp->ack_seq = cqe->lro_ack_seq_num;
- tcp->window = cqe->lro_tcp_win;
+ ipv6->payload_len = cpu_to_be16(payload_len);
+
+ mlx5e_lro_update_tcp_hdr(cqe, tcp);
+ check = csum_partial(tcp, tcp->doff * 4,
+ csum_unfold((__force __sum16)cqe->check_sum));
+ /* Almost done, don't forget the pseudo header */
+ tcp->check = csum_ipv6_magic(&ipv6->saddr, &ipv6->daddr, payload_len,
+ IPPROTO_TCP, check);
}
}
if (iph->protocol != IPPROTO_UDP)
goto out;
- udph = udp_hdr(skb);
+ /* Don't assume skb_transport_header() was set */
+ udph = (struct udphdr *)((u8 *)iph + 4 * iph->ihl);
if (udph->dest != htons(9))
goto out;
if (tcf_vlan_action(a) == TCA_VLAN_ACT_POP) {
attr->action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_POP;
} else if (tcf_vlan_action(a) == TCA_VLAN_ACT_PUSH) {
- if (tcf_vlan_push_proto(a) != htons(ETH_P_8021Q))
+ if (tcf_vlan_push_proto(a) != htons(ETH_P_8021Q) ||
+ tcf_vlan_push_prio(a))
return -EOPNOTSUPP;
attr->action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH;
default:
hlen = mlx5e_skb_l2_header_offset(skb);
}
- return min_t(u16, hlen, skb->len);
+ return min_t(u16, hlen, skb_headlen(skb));
}
static inline void mlx5e_tx_skb_pull_inline(unsigned char **skb_data,
}
}
+/* caller must eventually call mlx5_cq_put on the returned cq */
+static struct mlx5_core_cq *mlx5_eq_cq_get(struct mlx5_eq *eq, u32 cqn)
+{
+ struct mlx5_cq_table *table = &eq->cq_table;
+ struct mlx5_core_cq *cq = NULL;
+
+ spin_lock(&table->lock);
+ cq = radix_tree_lookup(&table->tree, cqn);
+ if (likely(cq))
+ mlx5_cq_hold(cq);
+ spin_unlock(&table->lock);
+
+ return cq;
+}
+
+static void mlx5_eq_cq_completion(struct mlx5_eq *eq, u32 cqn)
+{
+ struct mlx5_core_cq *cq = mlx5_eq_cq_get(eq, cqn);
+
+ if (unlikely(!cq)) {
+ mlx5_core_warn(eq->dev, "Completion event for bogus CQ 0x%x\n", cqn);
+ return;
+ }
+
+ ++cq->arm_sn;
+
+ cq->comp(cq);
+
+ mlx5_cq_put(cq);
+}
+
+static void mlx5_eq_cq_event(struct mlx5_eq *eq, u32 cqn, int event_type)
+{
+ struct mlx5_core_cq *cq = mlx5_eq_cq_get(eq, cqn);
+
+ if (unlikely(!cq)) {
+ mlx5_core_warn(eq->dev, "Async event for bogus CQ 0x%x\n", cqn);
+ return;
+ }
+
+ cq->event(cq, event_type);
+
+ mlx5_cq_put(cq);
+}
+
static irqreturn_t mlx5_eq_int(int irq, void *eq_ptr)
{
struct mlx5_eq *eq = eq_ptr;
switch (eqe->type) {
case MLX5_EVENT_TYPE_COMP:
cqn = be32_to_cpu(eqe->data.comp.cqn) & 0xffffff;
- mlx5_cq_completion(dev, cqn);
+ mlx5_eq_cq_completion(eq, cqn);
break;
case MLX5_EVENT_TYPE_DCT_DRAINED:
rsn = be32_to_cpu(eqe->data.dct.dctn) & 0xffffff;
cqn = be32_to_cpu(eqe->data.cq_err.cqn) & 0xffffff;
mlx5_core_warn(dev, "CQ error on CQN 0x%x, syndrome 0x%x\n",
cqn, eqe->data.cq_err.syndrome);
- mlx5_cq_event(dev, cqn, eqe->type);
+ mlx5_eq_cq_event(eq, cqn, eqe->type);
break;
case MLX5_EVENT_TYPE_PAGE_REQUEST:
int nent, u64 mask, const char *name,
enum mlx5_eq_type type)
{
+ struct mlx5_cq_table *cq_table = &eq->cq_table;
u32 out[MLX5_ST_SZ_DW(create_eq_out)] = {0};
struct mlx5_priv *priv = &dev->priv;
irq_handler_t handler;
u32 *in;
int err;
+ /* Init CQ table */
+ memset(cq_table, 0, sizeof(*cq_table));
+ spin_lock_init(&cq_table->lock);
+ INIT_RADIX_TREE(&cq_table->tree, GFP_ATOMIC);
+
eq->type = type;
eq->nent = roundup_pow_of_two(nent + MLX5_NUM_SPARE_EQE);
eq->cons_index = 0;
mlx5_buf_free(dev, &eq->buf);
return err;
}
-EXPORT_SYMBOL_GPL(mlx5_create_map_eq);
int mlx5_destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
{
return err;
}
-EXPORT_SYMBOL_GPL(mlx5_destroy_unmap_eq);
+
+int mlx5_eq_add_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq)
+{
+ struct mlx5_cq_table *table = &eq->cq_table;
+ int err;
+
+ spin_lock_irq(&table->lock);
+ err = radix_tree_insert(&table->tree, cq->cqn, cq);
+ spin_unlock_irq(&table->lock);
+
+ return err;
+}
+
+int mlx5_eq_del_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq)
+{
+ struct mlx5_cq_table *table = &eq->cq_table;
+ struct mlx5_core_cq *tmp;
+
+ spin_lock_irq(&table->lock);
+ tmp = radix_tree_delete(&table->tree, cq->cqn);
+ spin_unlock_irq(&table->lock);
+
+ if (!tmp) {
+ mlx5_core_warn(eq->dev, "cq 0x%x not found in eq 0x%x tree\n", eq->eqn, cq->cqn);
+ return -ENOENT;
+ }
+
+ if (tmp != cq) {
+ mlx5_core_warn(eq->dev, "corruption on cqn 0x%x in eq 0x%x\n", eq->eqn, cq->cqn);
+ return -EINVAL;
+ }
+
+ return 0;
+}
int mlx5_eq_init(struct mlx5_core_dev *dev)
{
MLX5_SET(query_eq_in, in, eq_number, eq->eqn);
return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen);
}
-EXPORT_SYMBOL_GPL(mlx5_core_eq_query);
esw_debug(esw->dev, "Enabling VPORT(%d)\n", vport_num);
+ /* Create steering drop counters for ingress and egress ACLs */
+ if (vport_num && esw->mode == SRIOV_LEGACY)
+ esw_vport_create_drop_counters(vport);
+
/* Restore old vport configuration */
esw_apply_vport_conf(esw, vport);
if (!vport_num)
vport->info.trusted = true;
- /* create steering drop counters for ingress and egress ACLs */
- if (vport_num && esw->mode == SRIOV_LEGACY)
- esw_vport_create_drop_counters(vport);
-
esw_vport_change_handle_locked(vport);
esw->enabled_vports++;
esw_info(esw->dev, "E-Switch enable SRIOV: nvfs(%d) mode (%d)\n", nvfs, mode);
esw->mode = mode;
- if (mode == SRIOV_LEGACY)
+ if (mode == SRIOV_LEGACY) {
err = esw_create_legacy_fdb_table(esw, nvfs + 1);
- else
+ } else {
+ mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_IB);
+
err = esw_offloads_init(esw, nvfs + 1);
+ }
+
if (err)
goto abort;
abort:
esw->mode = SRIOV_NONE;
+
+ if (mode == SRIOV_OFFLOADS)
+ mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_IB);
+
return err;
}
void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw)
{
struct esw_mc_addr *mc_promisc;
+ int old_mode;
int nvports;
int i;
else if (esw->mode == SRIOV_OFFLOADS)
esw_offloads_cleanup(esw, nvports);
+ old_mode = esw->mode;
esw->mode = SRIOV_NONE;
+
+ if (old_mode == SRIOV_OFFLOADS)
+ mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_IB);
}
int mlx5_eswitch_init(struct mlx5_core_dev *dev)
kvfree(out);
return err;
}
+
+u8 mlx5_eswitch_mode(struct mlx5_eswitch *esw)
+{
+ return esw->mode;
+}
+EXPORT_SYMBOL_GPL(mlx5_eswitch_mode);
#include <linux/if_link.h>
#include <net/devlink.h>
#include <linux/mlx5/device.h>
+#include <linux/mlx5/eswitch.h>
#include "lib/mpfs.h"
-enum {
- SRIOV_NONE,
- SRIOV_LEGACY,
- SRIOV_OFFLOADS
-};
-
-enum {
- REP_ETH,
- NUM_REP_TYPES,
-};
-
#ifdef CONFIG_MLX5_ESWITCH
#define MLX5_MAX_UC_PER_VPORT(dev) \
struct mlx5_flow_table *fdb;
struct mlx5_flow_group *send_to_vport_grp;
struct mlx5_flow_group *miss_grp;
- struct mlx5_flow_handle *miss_rule;
+ struct mlx5_flow_handle *miss_rule_uni;
+ struct mlx5_flow_handle *miss_rule_multi;
int vlan_push_pop_refcount;
} offloads;
};
};
-struct mlx5_eswitch_rep;
-struct mlx5_eswitch_rep_if {
- int (*load)(struct mlx5_core_dev *dev,
- struct mlx5_eswitch_rep *rep);
- void (*unload)(struct mlx5_eswitch_rep *rep);
- void *priv;
- bool valid;
-};
-
-struct mlx5_eswitch_rep {
- struct mlx5_eswitch_rep_if rep_if[NUM_REP_TYPES];
- u16 vport;
- u8 hw_id[ETH_ALEN];
- u16 vlan;
- u32 vlan_refcount;
-};
-
struct mlx5_esw_offload {
struct mlx5_flow_table *ft_offloads;
struct mlx5_flow_group *vport_rx_group;
int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw,
int vport,
struct ifla_vf_stats *vf_stats);
-struct mlx5_flow_handle *
-mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw, int vport,
- u32 sqn);
void mlx5_eswitch_del_send_to_vport_rule(struct mlx5_flow_handle *rule);
struct mlx5_flow_spec;
int mlx5_eswitch_inline_mode_get(struct mlx5_eswitch *esw, int nvfs, u8 *mode);
int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, u8 encap);
int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink, u8 *encap);
-void mlx5_eswitch_register_vport_rep(struct mlx5_eswitch *esw,
- int vport_index,
- struct mlx5_eswitch_rep_if *rep_if,
- u8 rep_type);
-void mlx5_eswitch_unregister_vport_rep(struct mlx5_eswitch *esw,
- int vport_index,
- u8 rep_type);
void *mlx5_eswitch_get_uplink_priv(struct mlx5_eswitch *esw, u8 rep_type);
int mlx5_eswitch_add_vlan_action(struct mlx5_eswitch *esw,
kvfree(spec);
return flow_rule;
}
+EXPORT_SYMBOL(mlx5_eswitch_add_send_to_vport_rule);
void mlx5_eswitch_del_send_to_vport_rule(struct mlx5_flow_handle *rule)
{
struct mlx5_flow_destination dest = {};
struct mlx5_flow_handle *flow_rule = NULL;
struct mlx5_flow_spec *spec;
+ void *headers_c;
+ void *headers_v;
int err = 0;
+ u8 *dmac_c;
+ u8 *dmac_v;
spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
if (!spec) {
goto out;
}
+ spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+ headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+ outer_headers);
+ dmac_c = MLX5_ADDR_OF(fte_match_param, headers_c,
+ outer_headers.dmac_47_16);
+ dmac_c[0] = 0x01;
+
dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
dest.vport_num = 0;
flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
&flow_act, &dest, 1);
if (IS_ERR(flow_rule)) {
err = PTR_ERR(flow_rule);
- esw_warn(esw->dev, "FDB: Failed to add miss flow rule err %d\n", err);
+ esw_warn(esw->dev, "FDB: Failed to add unicast miss flow rule err %d\n", err);
goto out;
}
- esw->fdb_table.offloads.miss_rule = flow_rule;
+ esw->fdb_table.offloads.miss_rule_uni = flow_rule;
+
+ headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ outer_headers);
+ dmac_v = MLX5_ADDR_OF(fte_match_param, headers_v,
+ outer_headers.dmac_47_16);
+ dmac_v[0] = 0x01;
+ flow_rule = mlx5_add_flow_rules(esw->fdb_table.offloads.fdb, spec,
+ &flow_act, &dest, 1);
+ if (IS_ERR(flow_rule)) {
+ err = PTR_ERR(flow_rule);
+ esw_warn(esw->dev, "FDB: Failed to add multicast miss flow rule err %d\n", err);
+ mlx5_del_flow_rules(esw->fdb_table.offloads.miss_rule_uni);
+ goto out;
+ }
+
+ esw->fdb_table.offloads.miss_rule_multi = flow_rule;
+
out:
kvfree(spec);
return err;
}
#define MAX_PF_SQ 256
+#define MAX_SQ_NVPORTS 32
static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports)
{
struct mlx5_flow_group *g;
void *match_criteria;
u32 *flow_group_in;
+ u8 *dmac;
esw_debug(esw->dev, "Create offloads FDB Tables\n");
flow_group_in = kvzalloc(inlen, GFP_KERNEL);
if (err)
goto fast_fdb_err;
- table_size = nvports + MAX_PF_SQ + 1;
+ table_size = nvports * MAX_SQ_NVPORTS + MAX_PF_SQ + 2;
ft_attr.max_fte = table_size;
ft_attr.prio = FDB_SLOW_PATH;
MLX5_SET_TO_ONES(fte_match_param, match_criteria, misc_parameters.source_sqn);
MLX5_SET_TO_ONES(fte_match_param, match_criteria, misc_parameters.source_port);
- ix = nvports + MAX_PF_SQ;
+ ix = nvports * MAX_SQ_NVPORTS + MAX_PF_SQ;
MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0);
MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, ix - 1);
/* create miss group */
memset(flow_group_in, 0, inlen);
- MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, 0);
+ MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
+ MLX5_MATCH_OUTER_HEADERS);
+ match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in,
+ match_criteria);
+ dmac = MLX5_ADDR_OF(fte_match_param, match_criteria,
+ outer_headers.dmac_47_16);
+ dmac[0] = 0x01;
MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, ix);
- MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, ix + 1);
+ MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, ix + 2);
g = mlx5_create_flow_group(fdb, flow_group_in);
if (IS_ERR(g)) {
return;
esw_debug(esw->dev, "Destroy offloads FDB Tables\n");
- mlx5_del_flow_rules(esw->fdb_table.offloads.miss_rule);
+ mlx5_del_flow_rules(esw->fdb_table.offloads.miss_rule_multi);
+ mlx5_del_flow_rules(esw->fdb_table.offloads.miss_rule_uni);
mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_grp);
mlx5_destroy_flow_group(esw->fdb_table.offloads.miss_grp);
{
int err;
- /* disable PF RoCE so missed packets don't go through RoCE steering */
- mlx5_dev_list_lock();
- mlx5_remove_dev_by_protocol(esw->dev, MLX5_INTERFACE_PROTOCOL_IB);
- mlx5_dev_list_unlock();
-
err = esw_create_offloads_fdb_tables(esw, nvports);
if (err)
- goto create_fdb_err;
+ return err;
err = esw_create_offloads_table(esw);
if (err)
create_ft_err:
esw_destroy_offloads_fdb_tables(esw);
-create_fdb_err:
- /* enable back PF RoCE */
- mlx5_dev_list_lock();
- mlx5_add_dev_by_protocol(esw->dev, MLX5_INTERFACE_PROTOCOL_IB);
- mlx5_dev_list_unlock();
-
return err;
}
}
/* enable back PF RoCE */
- mlx5_dev_list_lock();
- mlx5_add_dev_by_protocol(esw->dev, MLX5_INTERFACE_PROTOCOL_IB);
- mlx5_dev_list_unlock();
+ mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_IB);
return err;
}
rep_if->load = __rep_if->load;
rep_if->unload = __rep_if->unload;
+ rep_if->get_proto_dev = __rep_if->get_proto_dev;
rep_if->priv = __rep_if->priv;
rep_if->valid = true;
}
+EXPORT_SYMBOL(mlx5_eswitch_register_vport_rep);
void mlx5_eswitch_unregister_vport_rep(struct mlx5_eswitch *esw,
int vport_index, u8 rep_type)
rep->rep_if[rep_type].valid = false;
}
+EXPORT_SYMBOL(mlx5_eswitch_unregister_vport_rep);
void *mlx5_eswitch_get_uplink_priv(struct mlx5_eswitch *esw, u8 rep_type)
{
rep = &offloads->vport_reps[UPLINK_REP_INDEX];
return rep->rep_if[rep_type].priv;
}
+
+void *mlx5_eswitch_get_proto_dev(struct mlx5_eswitch *esw,
+ int vport,
+ u8 rep_type)
+{
+ struct mlx5_esw_offload *offloads = &esw->offloads;
+ struct mlx5_eswitch_rep *rep;
+
+ if (vport == FDB_UPLINK_VPORT)
+ vport = UPLINK_REP_INDEX;
+
+ rep = &offloads->vport_reps[vport];
+
+ if (rep->rep_if[rep_type].valid &&
+ rep->rep_if[rep_type].get_proto_dev)
+ return rep->rep_if[rep_type].get_proto_dev(rep);
+ return NULL;
+}
+EXPORT_SYMBOL(mlx5_eswitch_get_proto_dev);
+
+void *mlx5_eswitch_uplink_get_proto_dev(struct mlx5_eswitch *esw, u8 rep_type)
+{
+ return mlx5_eswitch_get_proto_dev(esw, UPLINK_REP_INDEX, rep_type);
+}
+EXPORT_SYMBOL(mlx5_eswitch_uplink_get_proto_dev);
+
+struct mlx5_eswitch_rep *mlx5_eswitch_vport_rep(struct mlx5_eswitch *esw,
+ int vport)
+{
+ return &esw->offloads.vport_reps[vport];
+}
+EXPORT_SYMBOL(mlx5_eswitch_vport_rep);
if (xored_actions & (MLX5_FLOW_CONTEXT_ACTION_DROP |
MLX5_FLOW_CONTEXT_ACTION_ENCAP |
- MLX5_FLOW_CONTEXT_ACTION_DECAP))
+ MLX5_FLOW_CONTEXT_ACTION_DECAP |
+ MLX5_FLOW_CONTEXT_ACTION_MOD_HDR))
return true;
return false;
/* Collect all fgs which has a matching match_criteria */
err = build_match_list(&match_head, ft, spec);
- if (err)
+ if (err) {
+ if (take_write)
+ up_write_ref_node(&ft->node);
return ERR_PTR(err);
+ }
if (!take_write)
up_read_ref_node(&ft->node);
dest_num, version);
free_match_list(&match_head);
if (!IS_ERR(rule) ||
- (PTR_ERR(rule) != -ENOENT && PTR_ERR(rule) != -EAGAIN))
+ (PTR_ERR(rule) != -ENOENT && PTR_ERR(rule) != -EAGAIN)) {
+ if (take_write)
+ up_write_ref_node(&ft->node);
return rule;
+ }
if (!take_write) {
nested_down_write_ref_node(&ft->node, FS_LOCK_GRANDPARENT);
#include <linux/highmem.h>
#include <rdma/mlx5-abi.h>
#include "en.h"
+#include "clock.h"
enum {
MLX5_CYCLES_SHIFT = 23
MLX5_SET(cmd_hca_cap,
set_hca_cap,
cache_line_128byte,
- cache_line_size() == 128 ? 1 : 0);
+ cache_line_size() >= 128 ? 1 : 0);
if (MLX5_CAP_GEN_MAX(dev, dct))
MLX5_SET(cmd_hca_cap, set_hca_cap, dct, 1);
goto out;
}
- err = mlx5_init_cq_table(dev);
+ err = mlx5_cq_debugfs_init(dev);
if (err) {
- dev_err(&pdev->dev, "failed to initialize cq table\n");
+ dev_err(&pdev->dev, "failed to initialize cq debugfs\n");
goto err_eq_cleanup;
}
mlx5_cleanup_mkey_table(dev);
mlx5_cleanup_srq_table(dev);
mlx5_cleanup_qp_table(dev);
- mlx5_cleanup_cq_table(dev);
+ mlx5_cq_debugfs_cleanup(dev);
err_eq_cleanup:
mlx5_eq_cleanup(dev);
mlx5_cleanup_mkey_table(dev);
mlx5_cleanup_srq_table(dev);
mlx5_cleanup_qp_table(dev);
- mlx5_cleanup_cq_table(dev);
+ mlx5_cq_debugfs_cleanup(dev);
mlx5_eq_cleanup(dev);
}
#include <linux/sched.h>
#include <linux/if_link.h>
#include <linux/firmware.h>
+#include <linux/mlx5/cq.h>
#define DRIVER_NAME "mlx5_core"
#define DRIVER_VERSION "5.0-0"
-#define MLX5_TOTAL_VPORTS(mdev) (1 + pci_sriov_get_totalvfs(mdev->pdev))
-#define MLX5_VPORT_MANAGER(mdev) \
- (MLX5_CAP_GEN(mdev, vport_group_manager) && \
- (MLX5_CAP_GEN(mdev, port_type) == MLX5_CAP_PORT_TYPE_ETH) && \
- mlx5_core_is_pf(mdev))
-
extern uint mlx5_core_debug_mask;
#define mlx5_core_dbg(__dev, format, ...) \
u32 element_id);
int mlx5_wait_for_vf_pages(struct mlx5_core_dev *dev);
u64 mlx5_read_internal_timer(struct mlx5_core_dev *dev);
+
+int mlx5_eq_init(struct mlx5_core_dev *dev);
+void mlx5_eq_cleanup(struct mlx5_core_dev *dev);
+int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx,
+ int nent, u64 mask, const char *name,
+ enum mlx5_eq_type type);
+int mlx5_destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq);
+int mlx5_eq_add_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq);
+int mlx5_eq_del_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq);
+int mlx5_core_eq_query(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
+ u32 *out, int outlen);
+int mlx5_start_eqs(struct mlx5_core_dev *dev);
+void mlx5_stop_eqs(struct mlx5_core_dev *dev);
struct mlx5_eq *mlx5_eqn2eq(struct mlx5_core_dev *dev, int eqn);
u32 mlx5_eq_poll_irq_disabled(struct mlx5_eq *eq);
void mlx5_cq_tasklet_cb(unsigned long data);
+void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool forced);
+int mlx5_debug_eq_add(struct mlx5_core_dev *dev, struct mlx5_eq *eq);
+void mlx5_debug_eq_remove(struct mlx5_core_dev *dev, struct mlx5_eq *eq);
+int mlx5_eq_debugfs_init(struct mlx5_core_dev *dev);
+void mlx5_eq_debugfs_cleanup(struct mlx5_core_dev *dev);
+int mlx5_cq_debugfs_init(struct mlx5_core_dev *dev);
+void mlx5_cq_debugfs_cleanup(struct mlx5_core_dev *dev);
int mlx5_query_pcam_reg(struct mlx5_core_dev *dev, u32 *pcam, u8 feature_group,
u8 access_reg_group);
int mlx5_lag_allow(struct mlx5_core_dev *dev);
int mlx5_lag_forbid(struct mlx5_core_dev *dev);
+void mlx5_reload_interface(struct mlx5_core_dev *mdev, int protocol);
#endif /* __MLX5_CORE_H__ */
u32 mlx5_cqwq_get_size(struct mlx5_cqwq *wq)
{
- return wq->sz_m1 + 1;
+ return wq->fbc.sz_m1 + 1;
}
u32 mlx5_wq_ll_get_size(struct mlx5_wq_ll *wq)
static u32 mlx5_cqwq_get_byte_size(struct mlx5_cqwq *wq)
{
- return mlx5_cqwq_get_size(wq) << wq->log_stride;
+ return mlx5_cqwq_get_size(wq) << wq->fbc.log_stride;
}
static u32 mlx5_wq_ll_get_byte_size(struct mlx5_wq_ll *wq)
goto err_db_free;
}
- wq->buf = wq_ctrl->buf.direct.buf;
+ wq->buf = wq_ctrl->buf.frags->buf;
wq->db = wq_ctrl->db.db;
wq_ctrl->mdev = mdev;
goto err_db_free;
}
- wq->rq.buf = wq_ctrl->buf.direct.buf;
+ wq->rq.buf = wq_ctrl->buf.frags->buf;
wq->sq.buf = wq->rq.buf + mlx5_wq_cyc_get_byte_size(&wq->rq);
wq->rq.db = &wq_ctrl->db.db[MLX5_RCV_DBR];
wq->sq.db = &wq_ctrl->db.db[MLX5_SND_DBR];
{
int err;
- wq->log_stride = 6 + MLX5_GET(cqc, cqc, cqe_sz);
- wq->log_sz = MLX5_GET(cqc, cqc, log_cq_size);
- wq->sz_m1 = (1 << wq->log_sz) - 1;
- wq->log_frag_strides = PAGE_SHIFT - wq->log_stride;
- wq->frag_sz_m1 = (1 << wq->log_frag_strides) - 1;
+ mlx5_core_init_cq_frag_buf(&wq->fbc, cqc);
err = mlx5_db_alloc_node(mdev, &wq_ctrl->db, param->db_numa_node);
if (err) {
goto err_db_free;
}
- wq->frag_buf = wq_ctrl->frag_buf;
+ wq->fbc.frag_buf = wq_ctrl->frag_buf;
wq->db = wq_ctrl->db.db;
wq_ctrl->mdev = mdev;
goto err_db_free;
}
- wq->buf = wq_ctrl->buf.direct.buf;
+ wq->buf = wq_ctrl->buf.frags->buf;
wq->db = wq_ctrl->db.db;
for (i = 0; i < wq->sz_m1; i++) {
struct mlx5_wq_ctrl {
struct mlx5_core_dev *mdev;
- struct mlx5_buf buf;
+ struct mlx5_frag_buf buf;
struct mlx5_db db;
};
};
struct mlx5_cqwq {
- struct mlx5_frag_buf frag_buf;
- __be32 *db;
- u32 sz_m1;
- u32 frag_sz_m1;
- u32 cc; /* consumer counter */
- u8 log_sz;
- u8 log_stride;
- u8 log_frag_strides;
+ struct mlx5_frag_buf_ctrl fbc;
+ __be32 *db;
+ u32 cc; /* consumer counter */
};
struct mlx5_wq_ll {
static inline u32 mlx5_cqwq_get_ci(struct mlx5_cqwq *wq)
{
- return wq->cc & wq->sz_m1;
+ return wq->cc & wq->fbc.sz_m1;
}
static inline void *mlx5_cqwq_get_wqe(struct mlx5_cqwq *wq, u32 ix)
{
- unsigned int frag = (ix >> wq->log_frag_strides);
-
- return wq->frag_buf.frags[frag].buf +
- ((wq->frag_sz_m1 & ix) << wq->log_stride);
+ return mlx5_frag_buf_get_wqe(&wq->fbc, ix);
}
static inline u32 mlx5_cqwq_get_wrap_cnt(struct mlx5_cqwq *wq)
{
- return wq->cc >> wq->log_sz;
+ return wq->cc >> wq->fbc.log_sz;
}
static inline void mlx5_cqwq_pop(struct mlx5_cqwq *wq)
depends on IPV6 || IPV6=n
select PARMAN
select MLXFW
+ depends on NET_IPGRE
+ depends on !(MLXSW_CORE=y && NET_IPGRE=m)
+ depends on IPV6_GRE
+ depends on !(MLXSW_CORE=y && IPV6_GRE=m)
default m
---help---
This driver supports Mellanox Technologies Spectrum Ethernet
spectrum_cnt.o spectrum_fid.o \
spectrum_ipip.o spectrum_acl_flex_actions.o \
spectrum_mr.o spectrum_mr_tcam.o \
- spectrum_qdisc.o
+ spectrum_qdisc.o spectrum_span.o
mlxsw_spectrum-$(CONFIG_MLXSW_SPECTRUM_DCB) += spectrum_dcb.o
mlxsw_spectrum-$(CONFIG_NET_DEVLINK) += spectrum_dpipe.o
obj-$(CONFIG_MLXSW_MINIMAL) += mlxsw_minimal.o
/*
* drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c
- * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2017, 2018 Mellanox Technologies. All rights reserved.
* Copyright (c) 2017 Jiri Pirko <jiri@mellanox.com>
*
* Redistribution and use in source and binary forms, with or without
struct mlxsw_afa_resource resource;
int span_id;
u8 local_in_port;
- u8 local_out_port;
bool ingress;
};
{
block->afa->ops->mirror_del(block->afa->ops_priv,
mirror->local_in_port,
- mirror->local_out_port,
+ mirror->span_id,
mirror->ingress);
kfree(mirror);
}
}
static struct mlxsw_afa_mirror *
-mlxsw_afa_mirror_create(struct mlxsw_afa_block *block,
- u8 local_in_port, u8 local_out_port,
- bool ingress)
+mlxsw_afa_mirror_create(struct mlxsw_afa_block *block, u8 local_in_port,
+ const struct net_device *out_dev, bool ingress)
{
struct mlxsw_afa_mirror *mirror;
int err;
return ERR_PTR(-ENOMEM);
err = block->afa->ops->mirror_add(block->afa->ops_priv,
- local_in_port, local_out_port,
+ local_in_port, out_dev,
ingress, &mirror->span_id);
if (err)
goto err_mirror_add;
mirror->ingress = ingress;
- mirror->local_out_port = local_out_port;
mirror->local_in_port = local_in_port;
mirror->resource.destructor = mlxsw_afa_mirror_destructor;
mlxsw_afa_resource_add(block, &mirror->resource);
}
int
-mlxsw_afa_block_append_mirror(struct mlxsw_afa_block *block,
- u8 local_in_port, u8 local_out_port, bool ingress)
+mlxsw_afa_block_append_mirror(struct mlxsw_afa_block *block, u8 local_in_port,
+ const struct net_device *out_dev, bool ingress)
{
struct mlxsw_afa_mirror *mirror;
int err;
- mirror = mlxsw_afa_mirror_create(block, local_in_port, local_out_port,
+ mirror = mlxsw_afa_mirror_create(block, local_in_port, out_dev,
ingress);
if (IS_ERR(mirror))
return PTR_ERR(mirror);
#define _MLXSW_CORE_ACL_FLEX_ACTIONS_H
#include <linux/types.h>
+#include <linux/netdevice.h>
struct mlxsw_afa;
struct mlxsw_afa_block;
void (*kvdl_fwd_entry_del)(void *priv, u32 kvdl_index);
int (*counter_index_get)(void *priv, unsigned int *p_counter_index);
void (*counter_index_put)(void *priv, unsigned int counter_index);
- int (*mirror_add)(void *priv, u8 locol_in_port, u8 local_out_port,
+ int (*mirror_add)(void *priv, u8 local_in_port,
+ const struct net_device *out_dev,
bool ingress, int *p_span_id);
- void (*mirror_del)(void *priv, u8 locol_in_port, u8 local_out_port,
+ void (*mirror_del)(void *priv, u8 local_in_port, int span_id,
bool ingress);
};
int mlxsw_afa_block_append_trap_and_forward(struct mlxsw_afa_block *block,
u16 trap_id);
int mlxsw_afa_block_append_mirror(struct mlxsw_afa_block *block,
- u8 local_in_port, u8 local_out_port,
+ u8 local_in_port,
+ const struct net_device *out_dev,
bool ingress);
int mlxsw_afa_block_append_fwd(struct mlxsw_afa_block *block,
u8 local_port, bool in_port);
/*
* drivers/net/ethernet/mellanox/mlxsw/reg.h
- * Copyright (c) 2015-2017 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2015-2018 Mellanox Technologies. All rights reserved.
* Copyright (c) 2015-2016 Ido Schimmel <idosch@mellanox.com>
* Copyright (c) 2015 Elad Raz <eladr@mellanox.com>
* Copyright (c) 2015-2017 Jiri Pirko <jiri@mellanox.com>
* Copyright (c) 2016 Yotam Gigi <yotamg@mellanox.com>
- * Copyright (c) 2017 Petr Machata <petrm@mellanox.com>
+ * Copyright (c) 2017-2018 Petr Machata <petrm@mellanox.com>
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*/
MLXSW_ITEM32(reg, mpat, be, 0x04, 25, 1);
+enum mlxsw_reg_mpat_span_type {
+ /* Local SPAN Ethernet.
+ * The original packet is not encapsulated.
+ */
+ MLXSW_REG_MPAT_SPAN_TYPE_LOCAL_ETH = 0x0,
+
+ /* Encapsulated Remote SPAN Ethernet L3 GRE.
+ * The packet is encapsulated with GRE header.
+ */
+ MLXSW_REG_MPAT_SPAN_TYPE_REMOTE_ETH_L3 = 0x3,
+};
+
+/* reg_mpat_span_type
+ * SPAN type.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mpat, span_type, 0x04, 0, 4);
+
+/* Remote SPAN - Ethernet VLAN
+ * - - - - - - - - - - - - - -
+ */
+
+/* reg_mpat_eth_rspan_vid
+ * Encapsulation header VLAN ID.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mpat, eth_rspan_vid, 0x18, 0, 12);
+
+/* Encapsulated Remote SPAN - Ethernet L2
+ * - - - - - - - - - - - - - - - - - - -
+ */
+
+enum mlxsw_reg_mpat_eth_rspan_version {
+ MLXSW_REG_MPAT_ETH_RSPAN_VERSION_NO_HEADER = 15,
+};
+
+/* reg_mpat_eth_rspan_version
+ * RSPAN mirror header version.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mpat, eth_rspan_version, 0x10, 18, 4);
+
+/* reg_mpat_eth_rspan_mac
+ * Destination MAC address.
+ * Access: RW
+ */
+MLXSW_ITEM_BUF(reg, mpat, eth_rspan_mac, 0x12, 6);
+
+/* reg_mpat_eth_rspan_tp
+ * Tag Packet. Indicates whether the mirroring header should be VLAN tagged.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mpat, eth_rspan_tp, 0x18, 16, 1);
+
+/* Encapsulated Remote SPAN - Ethernet L3
+ * - - - - - - - - - - - - - - - - - - -
+ */
+
+enum mlxsw_reg_mpat_eth_rspan_protocol {
+ MLXSW_REG_MPAT_ETH_RSPAN_PROTOCOL_IPV4,
+ MLXSW_REG_MPAT_ETH_RSPAN_PROTOCOL_IPV6,
+};
+
+/* reg_mpat_eth_rspan_protocol
+ * SPAN encapsulation protocol.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mpat, eth_rspan_protocol, 0x18, 24, 4);
+
+/* reg_mpat_eth_rspan_ttl
+ * Encapsulation header Time-to-Live/HopLimit.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mpat, eth_rspan_ttl, 0x1C, 4, 8);
+
+/* reg_mpat_eth_rspan_smac
+ * Source MAC address
+ * Access: RW
+ */
+MLXSW_ITEM_BUF(reg, mpat, eth_rspan_smac, 0x22, 6);
+
+/* reg_mpat_eth_rspan_dip*
+ * Destination IP address. The IP version is configured by protocol.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mpat, eth_rspan_dip4, 0x4C, 0, 32);
+MLXSW_ITEM_BUF(reg, mpat, eth_rspan_dip6, 0x40, 16);
+
+/* reg_mpat_eth_rspan_sip*
+ * Source IP address. The IP version is configured by protocol.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mpat, eth_rspan_sip4, 0x5C, 0, 32);
+MLXSW_ITEM_BUF(reg, mpat, eth_rspan_sip6, 0x50, 16);
+
static inline void mlxsw_reg_mpat_pack(char *payload, u8 pa_id,
- u16 system_port, bool e)
+ u16 system_port, bool e,
+ enum mlxsw_reg_mpat_span_type span_type)
{
MLXSW_REG_ZERO(mpat, payload);
mlxsw_reg_mpat_pa_id_set(payload, pa_id);
mlxsw_reg_mpat_e_set(payload, e);
mlxsw_reg_mpat_qos_set(payload, 1);
mlxsw_reg_mpat_be_set(payload, 1);
+ mlxsw_reg_mpat_span_type_set(payload, span_type);
+}
+
+static inline void mlxsw_reg_mpat_eth_rspan_pack(char *payload, u16 vid)
+{
+ mlxsw_reg_mpat_eth_rspan_vid_set(payload, vid);
+}
+
+static inline void
+mlxsw_reg_mpat_eth_rspan_l2_pack(char *payload,
+ enum mlxsw_reg_mpat_eth_rspan_version version,
+ const char *mac,
+ bool tp)
+{
+ mlxsw_reg_mpat_eth_rspan_version_set(payload, version);
+ mlxsw_reg_mpat_eth_rspan_mac_memcpy_to(payload, mac);
+ mlxsw_reg_mpat_eth_rspan_tp_set(payload, tp);
+}
+
+static inline void
+mlxsw_reg_mpat_eth_rspan_l3_ipv4_pack(char *payload, u8 ttl,
+ const char *smac,
+ u32 sip, u32 dip)
+{
+ mlxsw_reg_mpat_eth_rspan_ttl_set(payload, ttl);
+ mlxsw_reg_mpat_eth_rspan_smac_memcpy_to(payload, smac);
+ mlxsw_reg_mpat_eth_rspan_protocol_set(payload,
+ MLXSW_REG_MPAT_ETH_RSPAN_PROTOCOL_IPV4);
+ mlxsw_reg_mpat_eth_rspan_sip4_set(payload, sip);
+ mlxsw_reg_mpat_eth_rspan_dip4_set(payload, dip);
+}
+
+static inline void
+mlxsw_reg_mpat_eth_rspan_l3_ipv6_pack(char *payload, u8 ttl,
+ const char *smac,
+ struct in6_addr sip, struct in6_addr dip)
+{
+ mlxsw_reg_mpat_eth_rspan_ttl_set(payload, ttl);
+ mlxsw_reg_mpat_eth_rspan_smac_memcpy_to(payload, smac);
+ mlxsw_reg_mpat_eth_rspan_protocol_set(payload,
+ MLXSW_REG_MPAT_ETH_RSPAN_PROTOCOL_IPV6);
+ mlxsw_reg_mpat_eth_rspan_sip6_memcpy_to(payload, (void *)&sip);
+ mlxsw_reg_mpat_eth_rspan_dip6_memcpy_to(payload, (void *)&dip);
}
/* MPAR - Monitoring Port Analyzer Register
/*
* drivers/net/ethernet/mellanox/mlxsw/spectrum.c
- * Copyright (c) 2015-2017 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2015-2018 Mellanox Technologies. All rights reserved.
* Copyright (c) 2015-2017 Jiri Pirko <jiri@mellanox.com>
* Copyright (c) 2015 Ido Schimmel <idosch@mellanox.com>
* Copyright (c) 2015 Elad Raz <eladr@mellanox.com>
#include "spectrum_cnt.h"
#include "spectrum_dpipe.h"
#include "spectrum_acl_flex_actions.h"
+#include "spectrum_span.h"
#include "../mlxfw/mlxfw.h"
#define MLXSW_FWREV_MAJOR 13
return 0;
}
-static int mlxsw_sp_span_init(struct mlxsw_sp *mlxsw_sp)
-{
- int i;
-
- if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_SPAN))
- return -EIO;
-
- mlxsw_sp->span.entries_count = MLXSW_CORE_RES_GET(mlxsw_sp->core,
- MAX_SPAN);
- mlxsw_sp->span.entries = kcalloc(mlxsw_sp->span.entries_count,
- sizeof(struct mlxsw_sp_span_entry),
- GFP_KERNEL);
- if (!mlxsw_sp->span.entries)
- return -ENOMEM;
-
- for (i = 0; i < mlxsw_sp->span.entries_count; i++)
- INIT_LIST_HEAD(&mlxsw_sp->span.entries[i].bound_ports_list);
-
- return 0;
-}
-
-static void mlxsw_sp_span_fini(struct mlxsw_sp *mlxsw_sp)
-{
- int i;
-
- for (i = 0; i < mlxsw_sp->span.entries_count; i++) {
- struct mlxsw_sp_span_entry *curr = &mlxsw_sp->span.entries[i];
-
- WARN_ON_ONCE(!list_empty(&curr->bound_ports_list));
- }
- kfree(mlxsw_sp->span.entries);
-}
-
-static struct mlxsw_sp_span_entry *
-mlxsw_sp_span_entry_create(struct mlxsw_sp_port *port)
-{
- struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp;
- struct mlxsw_sp_span_entry *span_entry;
- char mpat_pl[MLXSW_REG_MPAT_LEN];
- u8 local_port = port->local_port;
- int index;
- int i;
- int err;
-
- /* find a free entry to use */
- index = -1;
- for (i = 0; i < mlxsw_sp->span.entries_count; i++) {
- if (!mlxsw_sp->span.entries[i].used) {
- index = i;
- span_entry = &mlxsw_sp->span.entries[i];
- break;
- }
- }
- if (index < 0)
- return NULL;
-
- /* create a new port analayzer entry for local_port */
- mlxsw_reg_mpat_pack(mpat_pl, index, local_port, true);
- err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpat), mpat_pl);
- if (err)
- return NULL;
-
- span_entry->used = true;
- span_entry->id = index;
- span_entry->ref_count = 1;
- span_entry->local_port = local_port;
- return span_entry;
-}
-
-static void mlxsw_sp_span_entry_destroy(struct mlxsw_sp *mlxsw_sp,
- struct mlxsw_sp_span_entry *span_entry)
-{
- u8 local_port = span_entry->local_port;
- char mpat_pl[MLXSW_REG_MPAT_LEN];
- int pa_id = span_entry->id;
-
- mlxsw_reg_mpat_pack(mpat_pl, pa_id, local_port, false);
- mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpat), mpat_pl);
- span_entry->used = false;
-}
-
-struct mlxsw_sp_span_entry *
-mlxsw_sp_span_entry_find(struct mlxsw_sp *mlxsw_sp, u8 local_port)
-{
- int i;
-
- for (i = 0; i < mlxsw_sp->span.entries_count; i++) {
- struct mlxsw_sp_span_entry *curr = &mlxsw_sp->span.entries[i];
-
- if (curr->used && curr->local_port == local_port)
- return curr;
- }
- return NULL;
-}
-
-static struct mlxsw_sp_span_entry
-*mlxsw_sp_span_entry_get(struct mlxsw_sp_port *port)
-{
- struct mlxsw_sp_span_entry *span_entry;
-
- span_entry = mlxsw_sp_span_entry_find(port->mlxsw_sp,
- port->local_port);
- if (span_entry) {
- /* Already exists, just take a reference */
- span_entry->ref_count++;
- return span_entry;
- }
-
- return mlxsw_sp_span_entry_create(port);
-}
-
-static int mlxsw_sp_span_entry_put(struct mlxsw_sp *mlxsw_sp,
- struct mlxsw_sp_span_entry *span_entry)
-{
- WARN_ON(!span_entry->ref_count);
- if (--span_entry->ref_count == 0)
- mlxsw_sp_span_entry_destroy(mlxsw_sp, span_entry);
- return 0;
-}
-
-static bool mlxsw_sp_span_is_egress_mirror(struct mlxsw_sp_port *port)
-{
- struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp;
- struct mlxsw_sp_span_inspected_port *p;
- int i;
-
- for (i = 0; i < mlxsw_sp->span.entries_count; i++) {
- struct mlxsw_sp_span_entry *curr = &mlxsw_sp->span.entries[i];
-
- list_for_each_entry(p, &curr->bound_ports_list, list)
- if (p->local_port == port->local_port &&
- p->type == MLXSW_SP_SPAN_EGRESS)
- return true;
- }
-
- return false;
-}
-
-static int mlxsw_sp_span_mtu_to_buffsize(const struct mlxsw_sp *mlxsw_sp,
- int mtu)
-{
- return mlxsw_sp_bytes_cells(mlxsw_sp, mtu * 5 / 2) + 1;
-}
-
-static int mlxsw_sp_span_port_mtu_update(struct mlxsw_sp_port *port, u16 mtu)
-{
- struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp;
- char sbib_pl[MLXSW_REG_SBIB_LEN];
- int err;
-
- /* If port is egress mirrored, the shared buffer size should be
- * updated according to the mtu value
- */
- if (mlxsw_sp_span_is_egress_mirror(port)) {
- u32 buffsize = mlxsw_sp_span_mtu_to_buffsize(mlxsw_sp, mtu);
-
- mlxsw_reg_sbib_pack(sbib_pl, port->local_port, buffsize);
- err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbib), sbib_pl);
- if (err) {
- netdev_err(port->dev, "Could not update shared buffer for mirroring\n");
- return err;
- }
- }
-
- return 0;
-}
-
-static struct mlxsw_sp_span_inspected_port *
-mlxsw_sp_span_entry_bound_port_find(struct mlxsw_sp_port *port,
- struct mlxsw_sp_span_entry *span_entry)
-{
- struct mlxsw_sp_span_inspected_port *p;
-
- list_for_each_entry(p, &span_entry->bound_ports_list, list)
- if (port->local_port == p->local_port)
- return p;
- return NULL;
-}
-
-static int
-mlxsw_sp_span_inspected_port_bind(struct mlxsw_sp_port *port,
- struct mlxsw_sp_span_entry *span_entry,
- enum mlxsw_sp_span_type type,
- bool bind)
-{
- struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp;
- char mpar_pl[MLXSW_REG_MPAR_LEN];
- int pa_id = span_entry->id;
-
- /* bind the port to the SPAN entry */
- mlxsw_reg_mpar_pack(mpar_pl, port->local_port,
- (enum mlxsw_reg_mpar_i_e) type, bind, pa_id);
- return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpar), mpar_pl);
-}
-
-static int
-mlxsw_sp_span_inspected_port_add(struct mlxsw_sp_port *port,
- struct mlxsw_sp_span_entry *span_entry,
- enum mlxsw_sp_span_type type,
- bool bind)
-{
- struct mlxsw_sp_span_inspected_port *inspected_port;
- struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp;
- char sbib_pl[MLXSW_REG_SBIB_LEN];
- int err;
-
- /* if it is an egress SPAN, bind a shared buffer to it */
- if (type == MLXSW_SP_SPAN_EGRESS) {
- u32 buffsize = mlxsw_sp_span_mtu_to_buffsize(mlxsw_sp,
- port->dev->mtu);
-
- mlxsw_reg_sbib_pack(sbib_pl, port->local_port, buffsize);
- err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbib), sbib_pl);
- if (err) {
- netdev_err(port->dev, "Could not create shared buffer for mirroring\n");
- return err;
- }
- }
-
- if (bind) {
- err = mlxsw_sp_span_inspected_port_bind(port, span_entry, type,
- true);
- if (err)
- goto err_port_bind;
- }
-
- inspected_port = kzalloc(sizeof(*inspected_port), GFP_KERNEL);
- if (!inspected_port) {
- err = -ENOMEM;
- goto err_inspected_port_alloc;
- }
- inspected_port->local_port = port->local_port;
- inspected_port->type = type;
- list_add_tail(&inspected_port->list, &span_entry->bound_ports_list);
-
- return 0;
-
-err_inspected_port_alloc:
- if (bind)
- mlxsw_sp_span_inspected_port_bind(port, span_entry, type,
- false);
-err_port_bind:
- if (type == MLXSW_SP_SPAN_EGRESS) {
- mlxsw_reg_sbib_pack(sbib_pl, port->local_port, 0);
- mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbib), sbib_pl);
- }
- return err;
-}
-
-static void
-mlxsw_sp_span_inspected_port_del(struct mlxsw_sp_port *port,
- struct mlxsw_sp_span_entry *span_entry,
- enum mlxsw_sp_span_type type,
- bool bind)
-{
- struct mlxsw_sp_span_inspected_port *inspected_port;
- struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp;
- char sbib_pl[MLXSW_REG_SBIB_LEN];
-
- inspected_port = mlxsw_sp_span_entry_bound_port_find(port, span_entry);
- if (!inspected_port)
- return;
-
- if (bind)
- mlxsw_sp_span_inspected_port_bind(port, span_entry, type,
- false);
- /* remove the SBIB buffer if it was egress SPAN */
- if (type == MLXSW_SP_SPAN_EGRESS) {
- mlxsw_reg_sbib_pack(sbib_pl, port->local_port, 0);
- mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbib), sbib_pl);
- }
-
- mlxsw_sp_span_entry_put(mlxsw_sp, span_entry);
-
- list_del(&inspected_port->list);
- kfree(inspected_port);
-}
-
-int mlxsw_sp_span_mirror_add(struct mlxsw_sp_port *from,
- struct mlxsw_sp_port *to,
- enum mlxsw_sp_span_type type, bool bind)
-{
- struct mlxsw_sp *mlxsw_sp = from->mlxsw_sp;
- struct mlxsw_sp_span_entry *span_entry;
- int err;
-
- span_entry = mlxsw_sp_span_entry_get(to);
- if (!span_entry)
- return -ENOENT;
-
- netdev_dbg(from->dev, "Adding inspected port to SPAN entry %d\n",
- span_entry->id);
-
- err = mlxsw_sp_span_inspected_port_add(from, span_entry, type, bind);
- if (err)
- goto err_port_bind;
-
- return 0;
-
-err_port_bind:
- mlxsw_sp_span_entry_put(mlxsw_sp, span_entry);
- return err;
-}
-
-void mlxsw_sp_span_mirror_del(struct mlxsw_sp_port *from, u8 destination_port,
- enum mlxsw_sp_span_type type, bool bind)
-{
- struct mlxsw_sp_span_entry *span_entry;
-
- span_entry = mlxsw_sp_span_entry_find(from->mlxsw_sp,
- destination_port);
- if (!span_entry) {
- netdev_err(from->dev, "no span entry found\n");
- return;
- }
-
- netdev_dbg(from->dev, "removing inspected port from SPAN entry %d\n",
- span_entry->id);
- mlxsw_sp_span_inspected_port_del(from, span_entry, type, bind);
-}
-
static int mlxsw_sp_port_sample_set(struct mlxsw_sp_port *mlxsw_sp_port,
bool enable, u32 rate)
{
xstats->tail_drop[i] =
mlxsw_reg_ppcnt_tc_no_buffer_discard_uc_get(ppcnt_pl);
}
+
+ for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
+ err = mlxsw_sp_port_get_stats_raw(dev, MLXSW_REG_PPCNT_PRIO_CNT,
+ i, ppcnt_pl);
+ if (err)
+ continue;
+
+ xstats->tx_packets[i] = mlxsw_reg_ppcnt_tx_frames_get(ppcnt_pl);
+ xstats->tx_bytes[i] = mlxsw_reg_ppcnt_tx_octets_get(ppcnt_pl);
+ }
}
static void update_stats_cache(struct work_struct *work)
bool ingress)
{
enum mlxsw_sp_span_type span_type;
- struct mlxsw_sp_port *to_port;
struct net_device *to_dev;
to_dev = tcf_mirred_dev(a);
return -EINVAL;
}
- if (!mlxsw_sp_port_dev_check(to_dev)) {
- netdev_err(mlxsw_sp_port->dev, "Cannot mirror to a non-spectrum port");
- return -EOPNOTSUPP;
- }
- to_port = netdev_priv(to_dev);
-
- mirror->to_local_port = to_port->local_port;
mirror->ingress = ingress;
span_type = ingress ? MLXSW_SP_SPAN_INGRESS : MLXSW_SP_SPAN_EGRESS;
- return mlxsw_sp_span_mirror_add(mlxsw_sp_port, to_port, span_type,
- true);
+ return mlxsw_sp_span_mirror_add(mlxsw_sp_port, to_dev, span_type,
+ true, &mirror->span_id);
}
static void
span_type = mirror->ingress ?
MLXSW_SP_SPAN_INGRESS : MLXSW_SP_SPAN_EGRESS;
- mlxsw_sp_span_mirror_del(mlxsw_sp_port, mirror->to_local_port,
+ mlxsw_sp_span_mirror_del(mlxsw_sp_port, mirror->span_id,
span_type, true);
}
goto err_afa_init;
}
+ err = mlxsw_sp_span_init(mlxsw_sp);
+ if (err) {
+ dev_err(mlxsw_sp->bus_info->dev, "Failed to init span system\n");
+ goto err_span_init;
+ }
+
+ /* Initialize router after SPAN is initialized, so that the FIB and
+ * neighbor event handlers can issue SPAN respin.
+ */
err = mlxsw_sp_router_init(mlxsw_sp);
if (err) {
dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize router\n");
goto err_router_init;
}
- /* Initialize netdevice notifier after router is initialized, so that
- * the event handler can use router structures.
+ /* Initialize netdevice notifier after router and SPAN is initialized,
+ * so that the event handler can use router structures and call SPAN
+ * respin.
*/
mlxsw_sp->netdevice_nb.notifier_call = mlxsw_sp_netdevice_event;
err = register_netdevice_notifier(&mlxsw_sp->netdevice_nb);
goto err_netdev_notifier;
}
- err = mlxsw_sp_span_init(mlxsw_sp);
- if (err) {
- dev_err(mlxsw_sp->bus_info->dev, "Failed to init span system\n");
- goto err_span_init;
- }
-
err = mlxsw_sp_acl_init(mlxsw_sp);
if (err) {
dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize ACL\n");
err_dpipe_init:
mlxsw_sp_acl_fini(mlxsw_sp);
err_acl_init:
- mlxsw_sp_span_fini(mlxsw_sp);
-err_span_init:
unregister_netdevice_notifier(&mlxsw_sp->netdevice_nb);
err_netdev_notifier:
mlxsw_sp_router_fini(mlxsw_sp);
err_router_init:
+ mlxsw_sp_span_fini(mlxsw_sp);
+err_span_init:
mlxsw_sp_afa_fini(mlxsw_sp);
err_afa_init:
mlxsw_sp_counter_pool_fini(mlxsw_sp);
mlxsw_sp_ports_remove(mlxsw_sp);
mlxsw_sp_dpipe_fini(mlxsw_sp);
mlxsw_sp_acl_fini(mlxsw_sp);
- mlxsw_sp_span_fini(mlxsw_sp);
unregister_netdevice_notifier(&mlxsw_sp->netdevice_nb);
mlxsw_sp_router_fini(mlxsw_sp);
+ mlxsw_sp_span_fini(mlxsw_sp);
mlxsw_sp_afa_fini(mlxsw_sp);
mlxsw_sp_counter_pool_fini(mlxsw_sp);
mlxsw_sp_switchdev_fini(mlxsw_sp);
.resource_query_enable = 1,
};
-static bool
-mlxsw_sp_resource_kvd_granularity_validate(struct netlink_ext_ack *extack,
- u64 size)
-{
- const struct mlxsw_config_profile *profile;
-
- profile = &mlxsw_sp_config_profile;
- if (size % profile->kvd_hash_granularity) {
- NL_SET_ERR_MSG_MOD(extack, "resource set with wrong granularity");
- return false;
- }
- return true;
-}
-
-static int
-mlxsw_sp_resource_kvd_size_validate(struct devlink *devlink, u64 size,
- struct netlink_ext_ack *extack)
-{
- NL_SET_ERR_MSG_MOD(extack, "kvd size cannot be changed");
- return -EINVAL;
-}
-
-static int
-mlxsw_sp_resource_kvd_linear_size_validate(struct devlink *devlink, u64 size,
- struct netlink_ext_ack *extack)
-{
- if (!mlxsw_sp_resource_kvd_granularity_validate(extack, size))
- return -EINVAL;
-
- return 0;
-}
-
-static int
-mlxsw_sp_resource_kvd_hash_single_size_validate(struct devlink *devlink, u64 size,
- struct netlink_ext_ack *extack)
-{
- struct mlxsw_core *mlxsw_core = devlink_priv(devlink);
-
- if (!mlxsw_sp_resource_kvd_granularity_validate(extack, size))
- return -EINVAL;
-
- if (size < MLXSW_CORE_RES_GET(mlxsw_core, KVD_SINGLE_MIN_SIZE)) {
- NL_SET_ERR_MSG_MOD(extack, "hash single size is smaller than minimum");
- return -EINVAL;
- }
- return 0;
-}
-
-static int
-mlxsw_sp_resource_kvd_hash_double_size_validate(struct devlink *devlink, u64 size,
- struct netlink_ext_ack *extack)
-{
- struct mlxsw_core *mlxsw_core = devlink_priv(devlink);
-
- if (!mlxsw_sp_resource_kvd_granularity_validate(extack, size))
- return -EINVAL;
-
- if (size < MLXSW_CORE_RES_GET(mlxsw_core, KVD_DOUBLE_MIN_SIZE)) {
- NL_SET_ERR_MSG_MOD(extack, "hash double size is smaller than minimum");
- return -EINVAL;
- }
- return 0;
-}
-
static u64 mlxsw_sp_resource_kvd_linear_occ_get(struct devlink *devlink)
{
struct mlxsw_core *mlxsw_core = devlink_priv(devlink);
return mlxsw_sp_kvdl_occ_get(mlxsw_sp);
}
-static struct devlink_resource_ops mlxsw_sp_resource_kvd_ops = {
- .size_validate = mlxsw_sp_resource_kvd_size_validate,
-};
-
static struct devlink_resource_ops mlxsw_sp_resource_kvd_linear_ops = {
- .size_validate = mlxsw_sp_resource_kvd_linear_size_validate,
.occ_get = mlxsw_sp_resource_kvd_linear_occ_get,
};
-static struct devlink_resource_ops mlxsw_sp_resource_kvd_hash_single_ops = {
- .size_validate = mlxsw_sp_resource_kvd_hash_single_size_validate,
-};
-
-static struct devlink_resource_ops mlxsw_sp_resource_kvd_hash_double_ops = {
- .size_validate = mlxsw_sp_resource_kvd_hash_double_size_validate,
-};
-
static struct devlink_resource_size_params mlxsw_sp_kvd_size_params;
static struct devlink_resource_size_params mlxsw_sp_linear_size_params;
static struct devlink_resource_size_params mlxsw_sp_hash_single_size_params;
MLXSW_SP_RESOURCE_KVD,
DEVLINK_RESOURCE_ID_PARENT_TOP,
&mlxsw_sp_kvd_size_params,
- &mlxsw_sp_resource_kvd_ops);
+ NULL);
if (err)
return err;
if (err)
return err;
+ err = mlxsw_sp_kvdl_resources_register(devlink);
+ if (err)
+ return err;
+
double_size = kvd_size - linear_size;
double_size *= profile->kvd_hash_double_parts;
double_size /= profile->kvd_hash_double_parts +
MLXSW_SP_RESOURCE_KVD_HASH_DOUBLE,
MLXSW_SP_RESOURCE_KVD,
&mlxsw_sp_hash_double_size_params,
- &mlxsw_sp_resource_kvd_hash_double_ops);
+ NULL);
if (err)
return err;
MLXSW_SP_RESOURCE_KVD_HASH_SINGLE,
MLXSW_SP_RESOURCE_KVD,
&mlxsw_sp_hash_single_size_params,
- &mlxsw_sp_resource_kvd_hash_single_ops);
+ NULL);
if (err)
return err;
u16 lag_id;
if (mlxsw_sp_lag_index_get(mlxsw_sp, lag_dev, &lag_id) != 0) {
- NL_SET_ERR_MSG(extack,
- "spectrum: Exceeded number of supported LAG devices");
+ NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported LAG devices");
return false;
}
if (lag_upper_info->tx_type != NETDEV_LAG_TX_TYPE_HASH) {
- NL_SET_ERR_MSG(extack,
- "spectrum: LAG device using unsupported Tx type");
+ NL_SET_ERR_MSG_MOD(extack, "LAG device using unsupported Tx type");
return false;
}
return true;
!netif_is_lag_master(upper_dev) &&
!netif_is_bridge_master(upper_dev) &&
!netif_is_ovs_master(upper_dev)) {
- NL_SET_ERR_MSG(extack,
- "spectrum: Unknown upper device type");
+ NL_SET_ERR_MSG_MOD(extack, "Unknown upper device type");
return -EINVAL;
}
if (!info->linking)
(!netif_is_bridge_master(upper_dev) ||
!mlxsw_sp_bridge_device_is_offloaded(mlxsw_sp,
upper_dev))) {
- NL_SET_ERR_MSG(extack,
- "spectrum: Enslaving a port to a device that already has an upper device is not supported");
+ NL_SET_ERR_MSG_MOD(extack, "Enslaving a port to a device that already has an upper device is not supported");
return -EINVAL;
}
if (netif_is_lag_master(upper_dev) &&
info->upper_info, extack))
return -EINVAL;
if (netif_is_lag_master(upper_dev) && vlan_uses_dev(dev)) {
- NL_SET_ERR_MSG(extack,
- "spectrum: Master device is a LAG master and this device has a VLAN");
+ NL_SET_ERR_MSG_MOD(extack, "Master device is a LAG master and this device has a VLAN");
return -EINVAL;
}
if (netif_is_lag_port(dev) && is_vlan_dev(upper_dev) &&
!netif_is_lag_master(vlan_dev_real_dev(upper_dev))) {
- NL_SET_ERR_MSG(extack,
- "spectrum: Can not put a VLAN on a LAG port");
+ NL_SET_ERR_MSG_MOD(extack, "Can not put a VLAN on a LAG port");
return -EINVAL;
}
if (netif_is_ovs_master(upper_dev) && vlan_uses_dev(dev)) {
- NL_SET_ERR_MSG(extack,
- "spectrum: Master device is an OVS master and this device has a VLAN");
+ NL_SET_ERR_MSG_MOD(extack, "Master device is an OVS master and this device has a VLAN");
return -EINVAL;
}
if (netif_is_ovs_port(dev) && is_vlan_dev(upper_dev)) {
- NL_SET_ERR_MSG(extack,
- "spectrum: Can not put a VLAN on an OVS port");
+ NL_SET_ERR_MSG_MOD(extack, "Can not put a VLAN on an OVS port");
return -EINVAL;
}
break;
case NETDEV_PRECHANGEUPPER:
upper_dev = info->upper_dev;
if (!netif_is_bridge_master(upper_dev)) {
- NL_SET_ERR_MSG(extack, "spectrum: VLAN devices only support bridge and VRF uppers");
+ NL_SET_ERR_MSG_MOD(extack, "VLAN devices only support bridge and VRF uppers");
return -EINVAL;
}
if (!info->linking)
(!netif_is_bridge_master(upper_dev) ||
!mlxsw_sp_bridge_device_is_offloaded(mlxsw_sp,
upper_dev))) {
- NL_SET_ERR_MSG(extack, "spectrum: Enslaving a port to a device that already has an upper device is not supported");
+ NL_SET_ERR_MSG_MOD(extack, "Enslaving a port to a device that already has an upper device is not supported");
return -EINVAL;
}
break;
unsigned long event, void *ptr)
{
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+ struct mlxsw_sp_span_entry *span_entry;
struct mlxsw_sp *mlxsw_sp;
int err = 0;
mlxsw_sp = container_of(nb, struct mlxsw_sp, netdevice_nb);
+ if (event == NETDEV_UNREGISTER) {
+ span_entry = mlxsw_sp_span_entry_find_by_port(mlxsw_sp, dev);
+ if (span_entry)
+ mlxsw_sp_span_entry_invalidate(mlxsw_sp, span_entry);
+ }
+ mlxsw_sp_span_respin(mlxsw_sp);
+
if (mlxsw_sp_netdev_is_ipip_ol(mlxsw_sp, dev))
err = mlxsw_sp_netdevice_ipip_ol_event(mlxsw_sp, dev,
event, ptr);
#define MLXSW_SP_RESOURCE_NAME_KVD_LINEAR "linear"
#define MLXSW_SP_RESOURCE_NAME_KVD_HASH_SINGLE "hash_single"
#define MLXSW_SP_RESOURCE_NAME_KVD_HASH_DOUBLE "hash_double"
+#define MLXSW_SP_RESOURCE_NAME_KVD_LINEAR_SINGLES "singles"
+#define MLXSW_SP_RESOURCE_NAME_KVD_LINEAR_CHUNKS "chunks"
+#define MLXSW_SP_RESOURCE_NAME_KVD_LINEAR_LARGE_CHUNKS "large_chunks"
enum mlxsw_sp_resource_id {
MLXSW_SP_RESOURCE_KVD,
MLXSW_SP_RESOURCE_KVD_LINEAR,
MLXSW_SP_RESOURCE_KVD_HASH_SINGLE,
MLXSW_SP_RESOURCE_KVD_HASH_DOUBLE,
+ MLXSW_SP_RESOURCE_KVD_LINEAR_SINGLE,
+ MLXSW_SP_RESOURCE_KVD_LINEAR_CHUNKS,
+ MLXSW_SP_RESOURCE_KVD_LINEAR_LARGE_CHUNKS,
};
struct mlxsw_sp_port;
struct mlxsw_sp_rif;
+struct mlxsw_sp_span_entry;
struct mlxsw_sp_upper {
struct net_device *dev;
unsigned long *ports_in_mid; /* bits array */
};
-enum mlxsw_sp_span_type {
- MLXSW_SP_SPAN_EGRESS,
- MLXSW_SP_SPAN_INGRESS
-};
-
-struct mlxsw_sp_span_inspected_port {
- struct list_head list;
- enum mlxsw_sp_span_type type;
- u8 local_port;
-};
-
-struct mlxsw_sp_span_entry {
- u8 local_port;
- bool used;
- struct list_head bound_ports_list;
- int ref_count;
- int id;
-};
-
enum mlxsw_sp_port_mall_action_type {
MLXSW_SP_PORT_MALL_MIRROR,
MLXSW_SP_PORT_MALL_SAMPLE,
};
struct mlxsw_sp_port_mall_mirror_tc_entry {
- u8 to_local_port;
+ int span_id;
bool ingress;
};
u64 wred_drop[TC_MAX_QUEUE];
u64 tail_drop[TC_MAX_QUEUE];
u64 backlog[TC_MAX_QUEUE];
+ u64 tx_bytes[IEEE_8021QAZ_MAX_TCS];
+ u64 tx_packets[IEEE_8021QAZ_MAX_TCS];
};
struct mlxsw_sp_port {
struct mlxsw_sp_port_sample *sample;
struct list_head vlans_list;
struct mlxsw_sp_qdisc *root_qdisc;
+ struct mlxsw_sp_qdisc *tclass_qdiscs;
unsigned acl_rule_count;
struct mlxsw_sp_acl_block *ing_acl_block;
struct mlxsw_sp_acl_block *eg_acl_block;
struct mlxsw_sp_port *mlxsw_sp_port_lower_dev_hold(struct net_device *dev);
void mlxsw_sp_port_dev_put(struct mlxsw_sp_port *mlxsw_sp_port);
struct mlxsw_sp_port *mlxsw_sp_port_dev_lower_find_rcu(struct net_device *dev);
-int mlxsw_sp_span_mirror_add(struct mlxsw_sp_port *from,
- struct mlxsw_sp_port *to,
- enum mlxsw_sp_span_type type,
- bool bind);
-void mlxsw_sp_span_mirror_del(struct mlxsw_sp_port *from,
- u8 destination_port,
- enum mlxsw_sp_span_type type,
- bool bind);
-struct mlxsw_sp_span_entry *
-mlxsw_sp_span_entry_find(struct mlxsw_sp *mlxsw_sp, u8 local_port);
/* spectrum_dcb.c */
#ifdef CONFIG_MLXSW_SPECTRUM_DCB
unsigned int entry_count,
unsigned int *p_alloc_size);
u64 mlxsw_sp_kvdl_occ_get(const struct mlxsw_sp *mlxsw_sp);
+int mlxsw_sp_kvdl_resources_register(struct devlink *devlink);
struct mlxsw_sp_acl_rule_info {
unsigned int priority;
struct net_device *out_dev)
{
struct mlxsw_sp_acl_block_binding *binding;
- struct mlxsw_sp_port *out_port;
struct mlxsw_sp_port *in_port;
if (!list_is_singular(&block->binding_list))
binding = list_first_entry(&block->binding_list,
struct mlxsw_sp_acl_block_binding, list);
in_port = binding->mlxsw_sp_port;
- if (!mlxsw_sp_port_dev_check(out_dev))
- return -EINVAL;
-
- out_port = netdev_priv(out_dev);
- if (out_port->mlxsw_sp != mlxsw_sp)
- return -EINVAL;
return mlxsw_afa_block_append_mirror(rulei->act_block,
in_port->local_port,
- out_port->local_port,
+ out_dev,
binding->ingress);
}
/*
* drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.c
- * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2017, 2018 Mellanox Technologies. All rights reserved.
* Copyright (c) 2017 Jiri Pirko <jiri@mellanox.com>
* Copyright (c) 2017 Yotam Gigi <yotamg@mellanox.com>
*
#include "spectrum_acl_flex_actions.h"
#include "core_acl_flex_actions.h"
+#include "spectrum_span.h"
#define MLXSW_SP_KVDL_ACT_EXT_SIZE 1
}
static int
-mlxsw_sp_act_mirror_add(void *priv, u8 local_in_port, u8 local_out_port,
+mlxsw_sp_act_mirror_add(void *priv, u8 local_in_port,
+ const struct net_device *out_dev,
bool ingress, int *p_span_id)
{
- struct mlxsw_sp_port *in_port, *out_port;
- struct mlxsw_sp_span_entry *span_entry;
+ struct mlxsw_sp_port *in_port;
struct mlxsw_sp *mlxsw_sp = priv;
enum mlxsw_sp_span_type type;
- int err;
type = ingress ? MLXSW_SP_SPAN_INGRESS : MLXSW_SP_SPAN_EGRESS;
- out_port = mlxsw_sp->ports[local_out_port];
in_port = mlxsw_sp->ports[local_in_port];
- err = mlxsw_sp_span_mirror_add(in_port, out_port, type, false);
- if (err)
- return err;
-
- span_entry = mlxsw_sp_span_entry_find(mlxsw_sp, local_out_port);
- if (!span_entry) {
- err = -ENOENT;
- goto err_span_entry_find;
- }
-
- *p_span_id = span_entry->id;
- return 0;
-
-err_span_entry_find:
- mlxsw_sp_span_mirror_del(in_port, local_out_port, type, false);
- return err;
+ return mlxsw_sp_span_mirror_add(in_port, out_dev, type,
+ false, p_span_id);
}
static void
-mlxsw_sp_act_mirror_del(void *priv, u8 local_in_port, u8 local_out_port,
- bool ingress)
+mlxsw_sp_act_mirror_del(void *priv, u8 local_in_port, int span_id, bool ingress)
{
struct mlxsw_sp *mlxsw_sp = priv;
struct mlxsw_sp_port *in_port;
type = ingress ? MLXSW_SP_SPAN_INGRESS : MLXSW_SP_SPAN_EGRESS;
in_port = mlxsw_sp->ports[local_in_port];
- mlxsw_sp_span_mirror_del(in_port, local_out_port, type, false);
+ mlxsw_sp_span_mirror_del(in_port, span_id, type, false);
}
static const struct mlxsw_afa_ops mlxsw_sp_act_afa_ops = {
/*
* drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c
- * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
- * Copyright (c) 2017 Petr Machata <petrm@mellanox.com>
+ * Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2017-2018 Petr Machata <petrm@mellanox.com>
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*/
#include <net/ip_tunnels.h>
+#include <net/ip6_tunnel.h>
#include "spectrum_ipip.h"
struct ip_tunnel_parm
-mlxsw_sp_ipip_netdev_parms(const struct net_device *ol_dev)
+mlxsw_sp_ipip_netdev_parms4(const struct net_device *ol_dev)
{
struct ip_tunnel *tun = netdev_priv(ol_dev);
return tun->parms;
}
-static bool mlxsw_sp_ipip_parms_has_ikey(struct ip_tunnel_parm parms)
+struct __ip6_tnl_parm
+mlxsw_sp_ipip_netdev_parms6(const struct net_device *ol_dev)
+{
+ struct ip6_tnl *tun = netdev_priv(ol_dev);
+
+ return tun->parms;
+}
+
+static bool mlxsw_sp_ipip_parms4_has_ikey(struct ip_tunnel_parm parms)
{
return !!(parms.i_flags & TUNNEL_KEY);
}
-static bool mlxsw_sp_ipip_parms_has_okey(struct ip_tunnel_parm parms)
+static bool mlxsw_sp_ipip_parms4_has_okey(struct ip_tunnel_parm parms)
{
return !!(parms.o_flags & TUNNEL_KEY);
}
-static u32 mlxsw_sp_ipip_parms_ikey(struct ip_tunnel_parm parms)
+static u32 mlxsw_sp_ipip_parms4_ikey(struct ip_tunnel_parm parms)
{
- return mlxsw_sp_ipip_parms_has_ikey(parms) ?
+ return mlxsw_sp_ipip_parms4_has_ikey(parms) ?
be32_to_cpu(parms.i_key) : 0;
}
-static u32 mlxsw_sp_ipip_parms_okey(struct ip_tunnel_parm parms)
+static u32 mlxsw_sp_ipip_parms4_okey(struct ip_tunnel_parm parms)
{
- return mlxsw_sp_ipip_parms_has_okey(parms) ?
+ return mlxsw_sp_ipip_parms4_has_okey(parms) ?
be32_to_cpu(parms.o_key) : 0;
}
-static __be32 mlxsw_sp_ipip_parms_saddr4(struct ip_tunnel_parm parms)
+static union mlxsw_sp_l3addr
+mlxsw_sp_ipip_parms4_saddr(struct ip_tunnel_parm parms)
{
- return parms.iph.saddr;
+ return (union mlxsw_sp_l3addr) { .addr4 = parms.iph.saddr };
}
static union mlxsw_sp_l3addr
-mlxsw_sp_ipip_parms_saddr(enum mlxsw_sp_l3proto proto,
- struct ip_tunnel_parm parms)
+mlxsw_sp_ipip_parms6_saddr(struct __ip6_tnl_parm parms)
{
- switch (proto) {
- case MLXSW_SP_L3_PROTO_IPV4:
- return (union mlxsw_sp_l3addr) {
- .addr4 = mlxsw_sp_ipip_parms_saddr4(parms),
- };
- case MLXSW_SP_L3_PROTO_IPV6:
- break;
- }
-
- WARN_ON(1);
- return (union mlxsw_sp_l3addr) {
- .addr4 = 0,
- };
+ return (union mlxsw_sp_l3addr) { .addr6 = parms.laddr };
}
-static __be32 mlxsw_sp_ipip_parms_daddr4(struct ip_tunnel_parm parms)
+static union mlxsw_sp_l3addr
+mlxsw_sp_ipip_parms4_daddr(struct ip_tunnel_parm parms)
{
- return parms.iph.daddr;
+ return (union mlxsw_sp_l3addr) { .addr4 = parms.iph.daddr };
}
static union mlxsw_sp_l3addr
-mlxsw_sp_ipip_parms_daddr(enum mlxsw_sp_l3proto proto,
- struct ip_tunnel_parm parms)
+mlxsw_sp_ipip_parms6_daddr(struct __ip6_tnl_parm parms)
+{
+ return (union mlxsw_sp_l3addr) { .addr6 = parms.raddr };
+}
+
+union mlxsw_sp_l3addr
+mlxsw_sp_ipip_netdev_saddr(enum mlxsw_sp_l3proto proto,
+ const struct net_device *ol_dev)
{
+ struct ip_tunnel_parm parms4;
+ struct __ip6_tnl_parm parms6;
+
switch (proto) {
case MLXSW_SP_L3_PROTO_IPV4:
- return (union mlxsw_sp_l3addr) {
- .addr4 = mlxsw_sp_ipip_parms_daddr4(parms),
- };
+ parms4 = mlxsw_sp_ipip_netdev_parms4(ol_dev);
+ return mlxsw_sp_ipip_parms4_saddr(parms4);
case MLXSW_SP_L3_PROTO_IPV6:
- break;
+ parms6 = mlxsw_sp_ipip_netdev_parms6(ol_dev);
+ return mlxsw_sp_ipip_parms6_saddr(parms6);
}
WARN_ON(1);
- return (union mlxsw_sp_l3addr) {
- .addr4 = 0,
- };
-}
-
-static bool mlxsw_sp_ipip_netdev_has_ikey(const struct net_device *ol_dev)
-{
- return mlxsw_sp_ipip_parms_has_ikey(mlxsw_sp_ipip_netdev_parms(ol_dev));
+ return (union mlxsw_sp_l3addr) {0};
}
-static bool mlxsw_sp_ipip_netdev_has_okey(const struct net_device *ol_dev)
+static __be32 mlxsw_sp_ipip_netdev_daddr4(const struct net_device *ol_dev)
{
- return mlxsw_sp_ipip_parms_has_okey(mlxsw_sp_ipip_netdev_parms(ol_dev));
-}
-static u32 mlxsw_sp_ipip_netdev_ikey(const struct net_device *ol_dev)
-{
- return mlxsw_sp_ipip_parms_ikey(mlxsw_sp_ipip_netdev_parms(ol_dev));
-}
+ struct ip_tunnel_parm parms4 = mlxsw_sp_ipip_netdev_parms4(ol_dev);
-static u32 mlxsw_sp_ipip_netdev_okey(const struct net_device *ol_dev)
-{
- return mlxsw_sp_ipip_parms_okey(mlxsw_sp_ipip_netdev_parms(ol_dev));
+ return mlxsw_sp_ipip_parms4_daddr(parms4).addr4;
}
-union mlxsw_sp_l3addr
-mlxsw_sp_ipip_netdev_saddr(enum mlxsw_sp_l3proto proto,
+static union mlxsw_sp_l3addr
+mlxsw_sp_ipip_netdev_daddr(enum mlxsw_sp_l3proto proto,
const struct net_device *ol_dev)
{
- return mlxsw_sp_ipip_parms_saddr(proto,
- mlxsw_sp_ipip_netdev_parms(ol_dev));
-}
+ struct ip_tunnel_parm parms4;
+ struct __ip6_tnl_parm parms6;
-static __be32 mlxsw_sp_ipip_netdev_daddr4(const struct net_device *ol_dev)
-{
- return mlxsw_sp_ipip_parms_daddr4(mlxsw_sp_ipip_netdev_parms(ol_dev));
+ switch (proto) {
+ case MLXSW_SP_L3_PROTO_IPV4:
+ parms4 = mlxsw_sp_ipip_netdev_parms4(ol_dev);
+ return mlxsw_sp_ipip_parms4_daddr(parms4);
+ case MLXSW_SP_L3_PROTO_IPV6:
+ parms6 = mlxsw_sp_ipip_netdev_parms6(ol_dev);
+ return mlxsw_sp_ipip_parms6_daddr(parms6);
+ }
+
+ WARN_ON(1);
+ return (union mlxsw_sp_l3addr) {0};
}
-static union mlxsw_sp_l3addr
-mlxsw_sp_ipip_netdev_daddr(enum mlxsw_sp_l3proto proto,
- const struct net_device *ol_dev)
+bool mlxsw_sp_l3addr_is_zero(union mlxsw_sp_l3addr addr)
{
- return mlxsw_sp_ipip_parms_daddr(proto,
- mlxsw_sp_ipip_netdev_parms(ol_dev));
+ union mlxsw_sp_l3addr naddr = {0};
+
+ return !memcmp(&addr, &naddr, sizeof(naddr));
}
static int
u32 tunnel_index,
struct mlxsw_sp_ipip_entry *ipip_entry)
{
- bool has_ikey = mlxsw_sp_ipip_netdev_has_ikey(ipip_entry->ol_dev);
u16 rif_index = mlxsw_sp_ipip_lb_rif_index(ipip_entry->ol_lb);
- u32 ikey = mlxsw_sp_ipip_netdev_ikey(ipip_entry->ol_dev);
char rtdp_pl[MLXSW_REG_RTDP_LEN];
+ struct ip_tunnel_parm parms;
unsigned int type_check;
+ bool has_ikey;
u32 daddr4;
+ u32 ikey;
+
+ parms = mlxsw_sp_ipip_netdev_parms4(ipip_entry->ol_dev);
+ has_ikey = mlxsw_sp_ipip_parms4_has_ikey(parms);
+ ikey = mlxsw_sp_ipip_parms4_ikey(parms);
mlxsw_reg_rtdp_pack(rtdp_pl, MLXSW_REG_RTDP_TYPE_IPIP, tunnel_index);
{
union mlxsw_sp_l3addr saddr = mlxsw_sp_ipip_netdev_saddr(proto, ol_dev);
union mlxsw_sp_l3addr daddr = mlxsw_sp_ipip_netdev_daddr(proto, ol_dev);
- union mlxsw_sp_l3addr naddr = {0};
/* Tunnels with unset local or remote address are valid in Linux and
* used for lightweight tunnels (LWT) and Non-Broadcast Multi-Access
* (NBMA) tunnels. In principle these can be offloaded, but the driver
* currently doesn't support this. So punt.
*/
- return memcmp(&saddr, &naddr, sizeof(naddr)) &&
- memcmp(&daddr, &naddr, sizeof(naddr));
+ return !mlxsw_sp_l3addr_is_zero(saddr) &&
+ !mlxsw_sp_l3addr_is_zero(daddr);
}
static bool mlxsw_sp_ipip_can_offload_gre4(const struct mlxsw_sp *mlxsw_sp,
mlxsw_sp_ipip_ol_loopback_config_gre4(struct mlxsw_sp *mlxsw_sp,
const struct net_device *ol_dev)
{
+ struct ip_tunnel_parm parms = mlxsw_sp_ipip_netdev_parms4(ol_dev);
enum mlxsw_reg_ritr_loopback_ipip_type lb_ipipt;
- lb_ipipt = mlxsw_sp_ipip_netdev_has_okey(ol_dev) ?
+ lb_ipipt = mlxsw_sp_ipip_parms4_has_okey(parms) ?
MLXSW_REG_RITR_LOOPBACK_IPIP_TYPE_IP_IN_GRE_KEY_IN_IP :
MLXSW_REG_RITR_LOOPBACK_IPIP_TYPE_IP_IN_GRE_IN_IP;
return (struct mlxsw_sp_rif_ipip_lb_config){
.lb_ipipt = lb_ipipt,
- .okey = mlxsw_sp_ipip_netdev_okey(ol_dev),
+ .okey = mlxsw_sp_ipip_parms4_okey(parms),
.ul_protocol = MLXSW_SP_L3_PROTO_IPV4,
.saddr = mlxsw_sp_ipip_netdev_saddr(MLXSW_SP_L3_PROTO_IPV4,
ol_dev),
bool update_nhs = false;
int err = 0;
- new_parms = mlxsw_sp_ipip_netdev_parms(ipip_entry->ol_dev);
+ new_parms = mlxsw_sp_ipip_netdev_parms4(ipip_entry->ol_dev);
- new_saddr = mlxsw_sp_ipip_parms_saddr(MLXSW_SP_L3_PROTO_IPV4,
- new_parms);
- old_saddr = mlxsw_sp_ipip_parms_saddr(MLXSW_SP_L3_PROTO_IPV4,
- ipip_entry->parms);
- new_daddr = mlxsw_sp_ipip_parms_daddr(MLXSW_SP_L3_PROTO_IPV4,
- new_parms);
- old_daddr = mlxsw_sp_ipip_parms_daddr(MLXSW_SP_L3_PROTO_IPV4,
- ipip_entry->parms);
+ new_saddr = mlxsw_sp_ipip_parms4_saddr(new_parms);
+ old_saddr = mlxsw_sp_ipip_parms4_saddr(ipip_entry->parms4);
+ new_daddr = mlxsw_sp_ipip_parms4_daddr(new_parms);
+ old_daddr = mlxsw_sp_ipip_parms4_daddr(ipip_entry->parms4);
if (!mlxsw_sp_l3addr_eq(&new_saddr, &old_saddr)) {
u16 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ipip_entry->ol_dev);
}
update_tunnel = true;
- } else if ((mlxsw_sp_ipip_parms_okey(ipip_entry->parms) !=
- mlxsw_sp_ipip_parms_okey(new_parms)) ||
- ipip_entry->parms.link != new_parms.link) {
+ } else if ((mlxsw_sp_ipip_parms4_okey(ipip_entry->parms4) !=
+ mlxsw_sp_ipip_parms4_okey(new_parms)) ||
+ ipip_entry->parms4.link != new_parms.link) {
update_tunnel = true;
} else if (!mlxsw_sp_l3addr_eq(&new_daddr, &old_daddr)) {
update_nhs = true;
- } else if (mlxsw_sp_ipip_parms_ikey(ipip_entry->parms) !=
- mlxsw_sp_ipip_parms_ikey(new_parms)) {
+ } else if (mlxsw_sp_ipip_parms4_ikey(ipip_entry->parms4) !=
+ mlxsw_sp_ipip_parms4_ikey(new_parms)) {
update_decap = true;
}
false, false, false,
extack);
- ipip_entry->parms = new_parms;
+ ipip_entry->parms4 = new_parms;
return err;
}
/*
* drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h
- * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
- * Copyright (c) 2017 Petr Machata <petrm@mellanox.com>
+ * Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2017-2018 Petr Machata <petrm@mellanox.com>
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
#include "spectrum_router.h"
#include <net/ip_fib.h>
+#include <linux/if_tunnel.h>
struct ip_tunnel_parm
-mlxsw_sp_ipip_netdev_parms(const struct net_device *ol_dev);
+mlxsw_sp_ipip_netdev_parms4(const struct net_device *ol_dev);
+struct __ip6_tnl_parm
+mlxsw_sp_ipip_netdev_parms6(const struct net_device *ol_dev);
union mlxsw_sp_l3addr
mlxsw_sp_ipip_netdev_saddr(enum mlxsw_sp_l3proto proto,
const struct net_device *ol_dev);
+bool mlxsw_sp_l3addr_is_zero(union mlxsw_sp_l3addr addr);
+
enum mlxsw_sp_ipip_type {
MLXSW_SP_IPIP_TYPE_GRE4,
MLXSW_SP_IPIP_TYPE_MAX,
struct mlxsw_sp_rif_ipip_lb *ol_lb;
struct mlxsw_sp_fib_entry *decap_fib_entry;
struct list_head ipip_list_node;
- struct ip_tunnel_parm parms;
+ union {
+ struct ip_tunnel_parm parms4;
+ };
};
struct mlxsw_sp_ipip_ops {
struct mlxsw_sp_kvdl_part {
struct list_head list;
- const struct mlxsw_sp_kvdl_part_info *info;
+ struct mlxsw_sp_kvdl_part_info *info;
unsigned long usage[0]; /* Entries */
};
return 0;
}
+enum mlxsw_sp_kvdl_part_id {
+ MLXSW_SP_KVDL_PART_SINGLE,
+ MLXSW_SP_KVDL_PART_CHUNKS,
+ MLXSW_SP_KVDL_PART_LARGE_CHUNKS,
+};
+
static const struct mlxsw_sp_kvdl_part_info kvdl_parts_info[] = {
{
- .part_index = 0,
+ .part_index = MLXSW_SP_KVDL_PART_SINGLE,
.start_index = MLXSW_SP_KVDL_SINGLE_BASE,
.end_index = MLXSW_SP_KVDL_SINGLE_END,
.alloc_size = 1,
},
{
- .part_index = 1,
+ .part_index = MLXSW_SP_KVDL_PART_CHUNKS,
.start_index = MLXSW_SP_KVDL_CHUNKS_BASE,
.end_index = MLXSW_SP_KVDL_CHUNKS_END,
.alloc_size = MLXSW_SP_CHUNK_MAX,
},
{
- .part_index = 2,
+ .part_index = MLXSW_SP_KVDL_PART_LARGE_CHUNKS,
.start_index = MLXSW_SP_KVDL_LARGE_CHUNKS_BASE,
.end_index = MLXSW_SP_KVDL_LARGE_CHUNKS_END,
.alloc_size = MLXSW_SP_LARGE_CHUNK_MAX,
return NULL;
}
+static void
+mlxsw_sp_kvdl_part_update(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_kvdl_part *part, unsigned int size)
+{
+ struct mlxsw_sp_kvdl_part_info *info = part->info;
+
+ if (list_is_last(&part->list, &mlxsw_sp->kvdl->parts_list)) {
+ info->end_index = size - 1;
+ } else {
+ struct mlxsw_sp_kvdl_part *last_part;
+
+ last_part = list_next_entry(part, list);
+ info->start_index = last_part->info->end_index + 1;
+ info->end_index = info->start_index + size - 1;
+ }
+}
+
static int mlxsw_sp_kvdl_part_init(struct mlxsw_sp *mlxsw_sp,
unsigned int part_index)
{
+ struct devlink *devlink = priv_to_devlink(mlxsw_sp->core);
const struct mlxsw_sp_kvdl_part_info *info;
+ enum mlxsw_sp_resource_id resource_id;
struct mlxsw_sp_kvdl_part *part;
+ bool need_update = true;
unsigned int nr_entries;
size_t usage_size;
+ u64 resource_size;
+ int err;
info = &kvdl_parts_info[part_index];
- nr_entries = (info->end_index - info->start_index + 1) /
- info->alloc_size;
+ switch (part_index) {
+ case MLXSW_SP_KVDL_PART_SINGLE:
+ resource_id = MLXSW_SP_RESOURCE_KVD_LINEAR_SINGLE;
+ break;
+ case MLXSW_SP_KVDL_PART_CHUNKS:
+ resource_id = MLXSW_SP_RESOURCE_KVD_LINEAR_CHUNKS;
+ break;
+ case MLXSW_SP_KVDL_PART_LARGE_CHUNKS:
+ resource_id = MLXSW_SP_RESOURCE_KVD_LINEAR_LARGE_CHUNKS;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ err = devlink_resource_size_get(devlink, resource_id, &resource_size);
+ if (err) {
+ need_update = false;
+ resource_size = info->end_index - info->start_index + 1;
+ }
+
+ nr_entries = div_u64(resource_size, info->alloc_size);
usage_size = BITS_TO_LONGS(nr_entries) * sizeof(unsigned long);
part = kzalloc(sizeof(*part) + usage_size, GFP_KERNEL);
if (!part)
return -ENOMEM;
- part->info = info;
- list_add(&part->list, &mlxsw_sp->kvdl->parts_list);
+ part->info = kmemdup(info, sizeof(*part->info), GFP_KERNEL);
+ if (!part->info)
+ goto err_part_info_alloc;
+ list_add(&part->list, &mlxsw_sp->kvdl->parts_list);
+ if (need_update)
+ mlxsw_sp_kvdl_part_update(mlxsw_sp, part, resource_size);
return 0;
+
+err_part_info_alloc:
+ kfree(part);
+ return -ENOMEM;
}
static void mlxsw_sp_kvdl_part_fini(struct mlxsw_sp *mlxsw_sp,
return;
list_del(&part->list);
+ kfree(part->info);
kfree(part);
}
return occ;
}
+u64 mlxsw_sp_kvdl_single_occ_get(struct devlink *devlink)
+{
+ struct mlxsw_core *mlxsw_core = devlink_priv(devlink);
+ struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core);
+ struct mlxsw_sp_kvdl_part *part;
+
+ part = mlxsw_sp_kvdl_part_find(mlxsw_sp, MLXSW_SP_KVDL_PART_SINGLE);
+ if (!part)
+ return -EINVAL;
+
+ return mlxsw_sp_kvdl_part_occ(part);
+}
+
+u64 mlxsw_sp_kvdl_chunks_occ_get(struct devlink *devlink)
+{
+ struct mlxsw_core *mlxsw_core = devlink_priv(devlink);
+ struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core);
+ struct mlxsw_sp_kvdl_part *part;
+
+ part = mlxsw_sp_kvdl_part_find(mlxsw_sp, MLXSW_SP_KVDL_PART_CHUNKS);
+ if (!part)
+ return -EINVAL;
+
+ return mlxsw_sp_kvdl_part_occ(part);
+}
+
+u64 mlxsw_sp_kvdl_large_chunks_occ_get(struct devlink *devlink)
+{
+ struct mlxsw_core *mlxsw_core = devlink_priv(devlink);
+ struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core);
+ struct mlxsw_sp_kvdl_part *part;
+
+ part = mlxsw_sp_kvdl_part_find(mlxsw_sp,
+ MLXSW_SP_KVDL_PART_LARGE_CHUNKS);
+ if (!part)
+ return -EINVAL;
+
+ return mlxsw_sp_kvdl_part_occ(part);
+}
+
+static struct devlink_resource_ops mlxsw_sp_kvdl_single_ops = {
+ .occ_get = mlxsw_sp_kvdl_single_occ_get,
+};
+
+static struct devlink_resource_ops mlxsw_sp_kvdl_chunks_ops = {
+ .occ_get = mlxsw_sp_kvdl_chunks_occ_get,
+};
+
+static struct devlink_resource_ops mlxsw_sp_kvdl_chunks_large_ops = {
+ .occ_get = mlxsw_sp_kvdl_large_chunks_occ_get,
+};
+
+static struct devlink_resource_size_params mlxsw_sp_kvdl_single_size_params = {
+ .size_min = 0,
+ .size_granularity = 1,
+ .unit = DEVLINK_RESOURCE_UNIT_ENTRY,
+};
+
+static struct devlink_resource_size_params mlxsw_sp_kvdl_chunks_size_params = {
+ .size_min = 0,
+ .size_granularity = MLXSW_SP_CHUNK_MAX,
+ .unit = DEVLINK_RESOURCE_UNIT_ENTRY,
+};
+
+static struct devlink_resource_size_params mlxsw_sp_kvdl_large_chunks_size_params = {
+ .size_min = 0,
+ .size_granularity = MLXSW_SP_LARGE_CHUNK_MAX,
+ .unit = DEVLINK_RESOURCE_UNIT_ENTRY,
+};
+
+static void
+mlxsw_sp_kvdl_resource_size_params_prepare(struct devlink *devlink)
+{
+ struct mlxsw_core *mlxsw_core = devlink_priv(devlink);
+ u32 kvdl_max_size;
+
+ kvdl_max_size = MLXSW_CORE_RES_GET(mlxsw_core, KVD_SIZE) -
+ MLXSW_CORE_RES_GET(mlxsw_core, KVD_SINGLE_MIN_SIZE) -
+ MLXSW_CORE_RES_GET(mlxsw_core, KVD_DOUBLE_MIN_SIZE);
+
+ mlxsw_sp_kvdl_single_size_params.size_max = kvdl_max_size;
+ mlxsw_sp_kvdl_chunks_size_params.size_max = kvdl_max_size;
+ mlxsw_sp_kvdl_large_chunks_size_params.size_max = kvdl_max_size;
+}
+
+int mlxsw_sp_kvdl_resources_register(struct devlink *devlink)
+{
+ int err;
+
+ mlxsw_sp_kvdl_resource_size_params_prepare(devlink);
+ err = devlink_resource_register(devlink, MLXSW_SP_RESOURCE_NAME_KVD_LINEAR_SINGLES,
+ false, MLXSW_SP_KVDL_SINGLE_SIZE,
+ MLXSW_SP_RESOURCE_KVD_LINEAR_SINGLE,
+ MLXSW_SP_RESOURCE_KVD_LINEAR,
+ &mlxsw_sp_kvdl_single_size_params,
+ &mlxsw_sp_kvdl_single_ops);
+ if (err)
+ return err;
+
+ err = devlink_resource_register(devlink, MLXSW_SP_RESOURCE_NAME_KVD_LINEAR_CHUNKS,
+ false, MLXSW_SP_KVDL_CHUNKS_SIZE,
+ MLXSW_SP_RESOURCE_KVD_LINEAR_CHUNKS,
+ MLXSW_SP_RESOURCE_KVD_LINEAR,
+ &mlxsw_sp_kvdl_chunks_size_params,
+ &mlxsw_sp_kvdl_chunks_ops);
+ if (err)
+ return err;
+
+ err = devlink_resource_register(devlink, MLXSW_SP_RESOURCE_NAME_KVD_LINEAR_LARGE_CHUNKS,
+ false, MLXSW_SP_KVDL_LARGE_CHUNKS_SIZE,
+ MLXSW_SP_RESOURCE_KVD_LINEAR_LARGE_CHUNKS,
+ MLXSW_SP_RESOURCE_KVD_LINEAR,
+ &mlxsw_sp_kvdl_large_chunks_size_params,
+ &mlxsw_sp_kvdl_chunks_large_ops);
+ return err;
+}
+
int mlxsw_sp_kvdl_init(struct mlxsw_sp *mlxsw_sp)
{
struct mlxsw_sp_kvdl *kvdl;
switch (mr_route->mr_table->proto) {
case MLXSW_SP_L3_PROTO_IPV4:
- ivif = mr_route->mfc4->mfc_parent;
- return mr_route->mfc4->mfc_un.res.ttls[ivif] != 255;
+ ivif = mr_route->mfc4->_c.mfc_parent;
+ return mr_route->mfc4->_c.mfc_un.res.ttls[ivif] != 255;
case MLXSW_SP_L3_PROTO_IPV6:
/* fall through */
default:
mr_route->mfc4 = mfc;
mr_route->mr_table = mr_table;
for (i = 0; i < MAXVIFS; i++) {
- if (mfc->mfc_un.res.ttls[i] != 255) {
+ if (mfc->_c.mfc_un.res.ttls[i] != 255) {
err = mlxsw_sp_mr_route_evif_link(mr_route,
&mr_table->vifs[i]);
if (err)
mr_route->min_mtu = mr_table->vifs[i].dev->mtu;
}
}
- mlxsw_sp_mr_route_ivif_link(mr_route, &mr_table->vifs[mfc->mfc_parent]);
+ mlxsw_sp_mr_route_ivif_link(mr_route,
+ &mr_table->vifs[mfc->_c.mfc_parent]);
mr_route->route_action = mlxsw_sp_mr_route_action(mr_route);
return mr_route;
switch (mr_route->mr_table->proto) {
case MLXSW_SP_L3_PROTO_IPV4:
if (offload)
- mr_route->mfc4->mfc_flags |= MFC_OFFLOAD;
+ mr_route->mfc4->_c.mfc_flags |= MFC_OFFLOAD;
else
- mr_route->mfc4->mfc_flags &= ~MFC_OFFLOAD;
+ mr_route->mfc4->_c.mfc_flags &= ~MFC_OFFLOAD;
break;
case MLXSW_SP_L3_PROTO_IPV6:
/* fall through */
switch (mr_route->mr_table->proto) {
case MLXSW_SP_L3_PROTO_IPV4:
- if (mr_route->mfc4->mfc_un.res.pkt != packets)
- mr_route->mfc4->mfc_un.res.lastuse = jiffies;
- mr_route->mfc4->mfc_un.res.pkt = packets;
- mr_route->mfc4->mfc_un.res.bytes = bytes;
+ if (mr_route->mfc4->_c.mfc_un.res.pkt != packets)
+ mr_route->mfc4->_c.mfc_un.res.lastuse = jiffies;
+ mr_route->mfc4->_c.mfc_un.res.pkt = packets;
+ mr_route->mfc4->_c.mfc_un.res.bytes = bytes;
break;
case MLXSW_SP_L3_PROTO_IPV6:
/* fall through */
#include "reg.h"
#define MLXSW_SP_PRIO_BAND_TO_TCLASS(band) (IEEE_8021QAZ_MAX_TCS - band - 1)
+#define MLXSW_SP_PRIO_CHILD_TO_TCLASS(child) \
+ MLXSW_SP_PRIO_BAND_TO_TCLASS((child - 1))
enum mlxsw_sp_qdisc_type {
MLXSW_SP_QDISC_NO_QDISC,
struct mlxsw_sp_qdisc {
u32 handle;
u8 tclass_num;
+ u8 prio_bitmap;
union {
struct red_stats red;
} xstats_base;
mlxsw_sp_qdisc->handle == handle;
}
+static struct mlxsw_sp_qdisc *
+mlxsw_sp_qdisc_find(struct mlxsw_sp_port *mlxsw_sp_port, u32 parent,
+ bool root_only)
+{
+ int tclass, child_index;
+
+ if (parent == TC_H_ROOT)
+ return mlxsw_sp_port->root_qdisc;
+
+ if (root_only || !mlxsw_sp_port->root_qdisc ||
+ !mlxsw_sp_port->root_qdisc->ops ||
+ TC_H_MAJ(parent) != mlxsw_sp_port->root_qdisc->handle ||
+ TC_H_MIN(parent) > IEEE_8021QAZ_MAX_TCS)
+ return NULL;
+
+ child_index = TC_H_MIN(parent);
+ tclass = MLXSW_SP_PRIO_CHILD_TO_TCLASS(child_index);
+ return &mlxsw_sp_port->tclass_qdiscs[tclass];
+}
+
+static struct mlxsw_sp_qdisc *
+mlxsw_sp_qdisc_find_by_handle(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle)
+{
+ int i;
+
+ if (mlxsw_sp_port->root_qdisc->handle == handle)
+ return mlxsw_sp_port->root_qdisc;
+
+ if (mlxsw_sp_port->root_qdisc->handle == TC_H_UNSPEC)
+ return NULL;
+
+ for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++)
+ if (mlxsw_sp_port->tclass_qdiscs[i].handle == handle)
+ return &mlxsw_sp_port->tclass_qdiscs[i];
+
+ return NULL;
+}
+
static int
mlxsw_sp_qdisc_destroy(struct mlxsw_sp_port *mlxsw_sp_port,
struct mlxsw_sp_qdisc *mlxsw_sp_qdisc)
return -EOPNOTSUPP;
}
+static void
+mlxsw_sp_qdisc_bstats_per_priority_get(struct mlxsw_sp_port_xstats *xstats,
+ u8 prio_bitmap, u64 *tx_packets,
+ u64 *tx_bytes)
+{
+ int i;
+
+ *tx_packets = 0;
+ *tx_bytes = 0;
+ for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
+ if (prio_bitmap & BIT(i)) {
+ *tx_packets += xstats->tx_packets[i];
+ *tx_bytes += xstats->tx_bytes[i];
+ }
+ }
+}
+
static int
mlxsw_sp_tclass_congestion_enable(struct mlxsw_sp_port *mlxsw_sp_port,
int tclass_num, u32 min, u32 max,
u8 tclass_num = mlxsw_sp_qdisc->tclass_num;
struct mlxsw_sp_qdisc_stats *stats_base;
struct mlxsw_sp_port_xstats *xstats;
- struct rtnl_link_stats64 *stats;
struct red_stats *red_base;
xstats = &mlxsw_sp_port->periodic_hw_stats.xstats;
- stats = &mlxsw_sp_port->periodic_hw_stats.stats;
stats_base = &mlxsw_sp_qdisc->stats_base;
red_base = &mlxsw_sp_qdisc->xstats_base.red;
- stats_base->tx_packets = stats->tx_packets;
- stats_base->tx_bytes = stats->tx_bytes;
-
+ mlxsw_sp_qdisc_bstats_per_priority_get(xstats,
+ mlxsw_sp_qdisc->prio_bitmap,
+ &stats_base->tx_packets,
+ &stats_base->tx_bytes);
red_base->prob_mark = xstats->ecn;
red_base->prob_drop = xstats->wred_drop[tclass_num];
red_base->pdrop = xstats->tail_drop[tclass_num];
mlxsw_sp_qdisc_red_destroy(struct mlxsw_sp_port *mlxsw_sp_port,
struct mlxsw_sp_qdisc *mlxsw_sp_qdisc)
{
+ struct mlxsw_sp_qdisc *root_qdisc = mlxsw_sp_port->root_qdisc;
+
+ if (root_qdisc != mlxsw_sp_qdisc)
+ root_qdisc->stats_base.backlog -=
+ mlxsw_sp_qdisc->stats_base.backlog;
+
return mlxsw_sp_tclass_congestion_disable(mlxsw_sp_port,
mlxsw_sp_qdisc->tclass_num);
}
backlog = mlxsw_sp_cells_bytes(mlxsw_sp_port->mlxsw_sp,
mlxsw_sp_qdisc->stats_base.backlog);
p->qstats->backlog -= backlog;
+ mlxsw_sp_qdisc->stats_base.backlog = 0;
}
static int
u8 tclass_num = mlxsw_sp_qdisc->tclass_num;
struct mlxsw_sp_qdisc_stats *stats_base;
struct mlxsw_sp_port_xstats *xstats;
- struct rtnl_link_stats64 *stats;
xstats = &mlxsw_sp_port->periodic_hw_stats.xstats;
- stats = &mlxsw_sp_port->periodic_hw_stats.stats;
stats_base = &mlxsw_sp_qdisc->stats_base;
- tx_bytes = stats->tx_bytes - stats_base->tx_bytes;
- tx_packets = stats->tx_packets - stats_base->tx_packets;
+ mlxsw_sp_qdisc_bstats_per_priority_get(xstats,
+ mlxsw_sp_qdisc->prio_bitmap,
+ &tx_packets, &tx_bytes);
+ tx_bytes = tx_bytes - stats_base->tx_bytes;
+ tx_packets = tx_packets - stats_base->tx_packets;
+
overlimits = xstats->wred_drop[tclass_num] + xstats->ecn -
stats_base->overlimits;
drops = xstats->wred_drop[tclass_num] + xstats->tail_drop[tclass_num] -
{
struct mlxsw_sp_qdisc *mlxsw_sp_qdisc;
- if (p->parent != TC_H_ROOT)
+ mlxsw_sp_qdisc = mlxsw_sp_qdisc_find(mlxsw_sp_port, p->parent, false);
+ if (!mlxsw_sp_qdisc)
return -EOPNOTSUPP;
- mlxsw_sp_qdisc = mlxsw_sp_port->root_qdisc;
-
if (p->command == TC_RED_REPLACE)
return mlxsw_sp_qdisc_replace(mlxsw_sp_port, p->handle,
mlxsw_sp_qdisc,
{
int i;
- for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++)
+ for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
mlxsw_sp_port_prio_tc_set(mlxsw_sp_port, i,
MLXSW_SP_PORT_DEFAULT_TCLASS);
+ mlxsw_sp_qdisc_destroy(mlxsw_sp_port,
+ &mlxsw_sp_port->tclass_qdiscs[i]);
+ mlxsw_sp_port->tclass_qdiscs[i].prio_bitmap = 0;
+ }
return 0;
}
void *params)
{
struct tc_prio_qopt_offload_params *p = params;
- int tclass, i;
+ struct mlxsw_sp_qdisc *child_qdisc;
+ int tclass, i, band, backlog;
+ u8 old_priomap;
int err;
- for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
- tclass = MLXSW_SP_PRIO_BAND_TO_TCLASS(p->priomap[i]);
- err = mlxsw_sp_port_prio_tc_set(mlxsw_sp_port, i, tclass);
- if (err)
- return err;
+ for (band = 0; band < p->bands; band++) {
+ tclass = MLXSW_SP_PRIO_BAND_TO_TCLASS(band);
+ child_qdisc = &mlxsw_sp_port->tclass_qdiscs[tclass];
+ old_priomap = child_qdisc->prio_bitmap;
+ child_qdisc->prio_bitmap = 0;
+ for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
+ if (p->priomap[i] == band) {
+ child_qdisc->prio_bitmap |= BIT(i);
+ if (BIT(i) & old_priomap)
+ continue;
+ err = mlxsw_sp_port_prio_tc_set(mlxsw_sp_port,
+ i, tclass);
+ if (err)
+ return err;
+ }
+ }
+ if (old_priomap != child_qdisc->prio_bitmap &&
+ child_qdisc->ops && child_qdisc->ops->clean_stats) {
+ backlog = child_qdisc->stats_base.backlog;
+ child_qdisc->ops->clean_stats(mlxsw_sp_port,
+ child_qdisc);
+ child_qdisc->stats_base.backlog = backlog;
+ }
+ }
+ for (; band < IEEE_8021QAZ_MAX_TCS; band++) {
+ tclass = MLXSW_SP_PRIO_BAND_TO_TCLASS(band);
+ child_qdisc = &mlxsw_sp_port->tclass_qdiscs[tclass];
+ child_qdisc->prio_bitmap = 0;
+ mlxsw_sp_qdisc_destroy(mlxsw_sp_port, child_qdisc);
}
-
return 0;
}
for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
drops += xstats->tail_drop[i];
+ drops += xstats->wred_drop[i];
backlog += xstats->backlog[i];
}
drops = drops - stats_base->drops;
stats_base->tx_bytes = stats->tx_bytes;
stats_base->drops = 0;
- for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++)
+ for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
stats_base->drops += xstats->tail_drop[i];
+ stats_base->drops += xstats->wred_drop[i];
+ }
mlxsw_sp_qdisc->stats_base.backlog = 0;
}
.clean_stats = mlxsw_sp_setup_tc_qdisc_prio_clean_stats,
};
+/* Grafting is not supported in mlxsw. It will result in un-offloading of the
+ * grafted qdisc as well as the qdisc in the qdisc new location.
+ * (However, if the graft is to the location where the qdisc is already at, it
+ * will be ignored completely and won't cause un-offloading).
+ */
+static int
+mlxsw_sp_qdisc_prio_graft(struct mlxsw_sp_port *mlxsw_sp_port,
+ struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
+ struct tc_prio_qopt_offload_graft_params *p)
+{
+ int tclass_num = MLXSW_SP_PRIO_BAND_TO_TCLASS(p->band);
+ struct mlxsw_sp_qdisc *old_qdisc;
+
+ /* Check if the grafted qdisc is already in its "new" location. If so -
+ * nothing needs to be done.
+ */
+ if (p->band < IEEE_8021QAZ_MAX_TCS &&
+ mlxsw_sp_port->tclass_qdiscs[tclass_num].handle == p->child_handle)
+ return 0;
+
+ /* See if the grafted qdisc is already offloaded on any tclass. If so,
+ * unoffload it.
+ */
+ old_qdisc = mlxsw_sp_qdisc_find_by_handle(mlxsw_sp_port,
+ p->child_handle);
+ if (old_qdisc)
+ mlxsw_sp_qdisc_destroy(mlxsw_sp_port, old_qdisc);
+
+ mlxsw_sp_qdisc_destroy(mlxsw_sp_port,
+ &mlxsw_sp_port->tclass_qdiscs[tclass_num]);
+ return -EOPNOTSUPP;
+}
+
int mlxsw_sp_setup_tc_prio(struct mlxsw_sp_port *mlxsw_sp_port,
struct tc_prio_qopt_offload *p)
{
struct mlxsw_sp_qdisc *mlxsw_sp_qdisc;
- if (p->parent != TC_H_ROOT)
+ mlxsw_sp_qdisc = mlxsw_sp_qdisc_find(mlxsw_sp_port, p->parent, true);
+ if (!mlxsw_sp_qdisc)
return -EOPNOTSUPP;
- mlxsw_sp_qdisc = mlxsw_sp_port->root_qdisc;
if (p->command == TC_PRIO_REPLACE)
return mlxsw_sp_qdisc_replace(mlxsw_sp_port, p->handle,
mlxsw_sp_qdisc,
case TC_PRIO_STATS:
return mlxsw_sp_qdisc_get_stats(mlxsw_sp_port, mlxsw_sp_qdisc,
&p->stats);
+ case TC_PRIO_GRAFT:
+ return mlxsw_sp_qdisc_prio_graft(mlxsw_sp_port, mlxsw_sp_qdisc,
+ &p->graft_params);
default:
return -EOPNOTSUPP;
}
int mlxsw_sp_tc_qdisc_init(struct mlxsw_sp_port *mlxsw_sp_port)
{
- mlxsw_sp_port->root_qdisc = kzalloc(sizeof(*mlxsw_sp_port->root_qdisc),
- GFP_KERNEL);
- if (!mlxsw_sp_port->root_qdisc)
- return -ENOMEM;
+ struct mlxsw_sp_qdisc *mlxsw_sp_qdisc;
+ int i;
+ mlxsw_sp_qdisc = kzalloc(sizeof(*mlxsw_sp_qdisc), GFP_KERNEL);
+ if (!mlxsw_sp_qdisc)
+ goto err_root_qdisc_init;
+
+ mlxsw_sp_port->root_qdisc = mlxsw_sp_qdisc;
+ mlxsw_sp_port->root_qdisc->prio_bitmap = 0xff;
mlxsw_sp_port->root_qdisc->tclass_num = MLXSW_SP_PORT_DEFAULT_TCLASS;
+ mlxsw_sp_qdisc = kzalloc(sizeof(*mlxsw_sp_qdisc) * IEEE_8021QAZ_MAX_TCS,
+ GFP_KERNEL);
+ if (!mlxsw_sp_qdisc)
+ goto err_tclass_qdiscs_init;
+
+ mlxsw_sp_port->tclass_qdiscs = mlxsw_sp_qdisc;
+ for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++)
+ mlxsw_sp_port->tclass_qdiscs[i].tclass_num = i;
+
return 0;
+
+err_tclass_qdiscs_init:
+ kfree(mlxsw_sp_port->root_qdisc);
+err_root_qdisc_init:
+ return -ENOMEM;
}
void mlxsw_sp_tc_qdisc_fini(struct mlxsw_sp_port *mlxsw_sp_port)
{
+ kfree(mlxsw_sp_port->tclass_qdiscs);
kfree(mlxsw_sp_port->root_qdisc);
}
/*
* drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
- * Copyright (c) 2016-2017 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2016-2018 Mellanox Technologies. All rights reserved.
* Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com>
* Copyright (c) 2016 Ido Schimmel <idosch@mellanox.com>
* Copyright (c) 2016 Yotam Gigi <yotamg@mellanox.com>
- * Copyright (c) 2017 Petr Machata <petrm@mellanox.com>
+ * Copyright (c) 2017-2018 Petr Machata <petrm@mellanox.com>
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
#include "spectrum_mr.h"
#include "spectrum_mr_tcam.h"
#include "spectrum_router.h"
+#include "spectrum_span.h"
struct mlxsw_sp_fib;
struct mlxsw_sp_vr;
u32 tb_id,
struct netlink_ext_ack *extack)
{
+ struct mlxsw_sp_mr_table *mr4_table;
+ struct mlxsw_sp_fib *fib4;
+ struct mlxsw_sp_fib *fib6;
struct mlxsw_sp_vr *vr;
int err;
vr = mlxsw_sp_vr_find_unused(mlxsw_sp);
if (!vr) {
- NL_SET_ERR_MSG(extack, "spectrum: Exceeded number of supported virtual routers");
+ NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported virtual routers");
return ERR_PTR(-EBUSY);
}
- vr->fib4 = mlxsw_sp_fib_create(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV4);
- if (IS_ERR(vr->fib4))
- return ERR_CAST(vr->fib4);
- vr->fib6 = mlxsw_sp_fib_create(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV6);
- if (IS_ERR(vr->fib6)) {
- err = PTR_ERR(vr->fib6);
+ fib4 = mlxsw_sp_fib_create(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV4);
+ if (IS_ERR(fib4))
+ return ERR_CAST(fib4);
+ fib6 = mlxsw_sp_fib_create(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV6);
+ if (IS_ERR(fib6)) {
+ err = PTR_ERR(fib6);
goto err_fib6_create;
}
- vr->mr4_table = mlxsw_sp_mr_table_create(mlxsw_sp, vr->id,
- MLXSW_SP_L3_PROTO_IPV4);
- if (IS_ERR(vr->mr4_table)) {
- err = PTR_ERR(vr->mr4_table);
+ mr4_table = mlxsw_sp_mr_table_create(mlxsw_sp, vr->id,
+ MLXSW_SP_L3_PROTO_IPV4);
+ if (IS_ERR(mr4_table)) {
+ err = PTR_ERR(mr4_table);
goto err_mr_table_create;
}
+ vr->fib4 = fib4;
+ vr->fib6 = fib6;
+ vr->mr4_table = mr4_table;
vr->tb_id = tb_id;
return vr;
err_mr_table_create:
- mlxsw_sp_fib_destroy(mlxsw_sp, vr->fib6);
- vr->fib6 = NULL;
+ mlxsw_sp_fib_destroy(mlxsw_sp, fib6);
err_fib6_create:
- mlxsw_sp_fib_destroy(mlxsw_sp, vr->fib4);
- vr->fib4 = NULL;
+ mlxsw_sp_fib_destroy(mlxsw_sp, fib4);
return ERR_PTR(err);
}
enum mlxsw_sp_ipip_type ipipt,
struct net_device *ol_dev)
{
+ const struct mlxsw_sp_ipip_ops *ipip_ops;
struct mlxsw_sp_ipip_entry *ipip_entry;
struct mlxsw_sp_ipip_entry *ret = NULL;
+ ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipipt];
ipip_entry = kzalloc(sizeof(*ipip_entry), GFP_KERNEL);
if (!ipip_entry)
return ERR_PTR(-ENOMEM);
ipip_entry->ipipt = ipipt;
ipip_entry->ol_dev = ol_dev;
- ipip_entry->parms = mlxsw_sp_ipip_netdev_parms(ol_dev);
+
+ switch (ipip_ops->ul_proto) {
+ case MLXSW_SP_L3_PROTO_IPV4:
+ ipip_entry->parms4 = mlxsw_sp_ipip_netdev_parms4(ol_dev);
+ break;
+ case MLXSW_SP_L3_PROTO_IPV6:
+ WARN_ON(1);
+ break;
+ }
return ipip_entry;
read_unlock_bh(&n->lock);
rtnl_lock();
+ mlxsw_sp_span_respin(mlxsw_sp);
+
entry_connected = nud_state & NUD_VALID && !dead;
neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n);
if (!entry_connected && !neigh_entry)
mlxsw_core_schedule_work(&net_work->work);
mlxsw_sp_port_dev_put(mlxsw_sp_port);
break;
- case NETEVENT_MULTIPATH_HASH_UPDATE:
+ case NETEVENT_IPV4_MPATH_HASH_UPDATE:
+ case NETEVENT_IPV6_MPATH_HASH_UPDATE:
net = ptr;
if (!net_eq(net, &init_net))
struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
int i;
+ if (!list_is_singular(&nh_grp->fib_list))
+ return;
+
for (i = 0; i < nh_grp->count; i++) {
struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
/* Protect internal structures from changes */
rtnl_lock();
+ mlxsw_sp_span_respin(mlxsw_sp);
+
switch (fib_work->event) {
case FIB_EVENT_ENTRY_REPLACE: /* fall through */
case FIB_EVENT_ENTRY_APPEND: /* fall through */
int err;
rtnl_lock();
+ mlxsw_sp_span_respin(mlxsw_sp);
+
switch (fib_work->event) {
case FIB_EVENT_ENTRY_REPLACE: /* fall through */
case FIB_EVENT_ENTRY_ADD:
}
if (err < 0)
- NL_SET_ERR_MSG(extack, "spectrum: FIB rules not supported. Aborting offload");
+ NL_SET_ERR_MSG_MOD(extack, "FIB rules not supported. Aborting offload");
return err;
}
err = mlxsw_sp_rif_index_alloc(mlxsw_sp, &rif_index);
if (err) {
- NL_SET_ERR_MSG(extack, "spectrum: Exceeded number of supported router interfaces");
+ NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported router interfaces");
goto err_rif_index_alloc;
}
static void mlxsw_sp_mp6_hash_init(char *recr2_pl)
{
+ bool only_l3 = !init_net.ipv6.sysctl.multipath_hash_policy;
+
mlxsw_sp_mp_hash_header_set(recr2_pl,
MLXSW_REG_RECR2_IPV6_EN_NOT_TCP_NOT_UDP);
mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_IPV6_EN_TCP_UDP);
mlxsw_reg_recr2_ipv6_sip_enable(recr2_pl);
mlxsw_reg_recr2_ipv6_dip_enable(recr2_pl);
- mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_IPV6_FLOW_LABEL);
mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_IPV6_NEXT_HEADER);
+ if (only_l3) {
+ mlxsw_sp_mp_hash_field_set(recr2_pl,
+ MLXSW_REG_RECR2_IPV6_FLOW_LABEL);
+ } else {
+ mlxsw_sp_mp_hash_header_set(recr2_pl,
+ MLXSW_REG_RECR2_TCP_UDP_EN_IPV6);
+ mlxsw_sp_mp_hash_field_set(recr2_pl,
+ MLXSW_REG_RECR2_TCP_UDP_SPORT);
+ mlxsw_sp_mp_hash_field_set(recr2_pl,
+ MLXSW_REG_RECR2_TCP_UDP_DPORT);
+ }
}
static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp)
--- /dev/null
+/*
+ * drivers/net/ethernet/mellanox/mlxsw/mlxsw_span.c
+ * Copyright (c) 2018 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2018 Petr Machata <petrm@mellanox.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/list.h>
+#include <net/arp.h>
+#include <net/gre.h>
+#include <net/ndisc.h>
+#include <net/ip6_tunnel.h>
+
+#include "spectrum.h"
+#include "spectrum_span.h"
+#include "spectrum_ipip.h"
+
+int mlxsw_sp_span_init(struct mlxsw_sp *mlxsw_sp)
+{
+ int i;
+
+ if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_SPAN))
+ return -EIO;
+
+ mlxsw_sp->span.entries_count = MLXSW_CORE_RES_GET(mlxsw_sp->core,
+ MAX_SPAN);
+ mlxsw_sp->span.entries = kcalloc(mlxsw_sp->span.entries_count,
+ sizeof(struct mlxsw_sp_span_entry),
+ GFP_KERNEL);
+ if (!mlxsw_sp->span.entries)
+ return -ENOMEM;
+
+ for (i = 0; i < mlxsw_sp->span.entries_count; i++) {
+ struct mlxsw_sp_span_entry *curr = &mlxsw_sp->span.entries[i];
+
+ INIT_LIST_HEAD(&curr->bound_ports_list);
+ curr->id = i;
+ }
+
+ return 0;
+}
+
+void mlxsw_sp_span_fini(struct mlxsw_sp *mlxsw_sp)
+{
+ int i;
+
+ for (i = 0; i < mlxsw_sp->span.entries_count; i++) {
+ struct mlxsw_sp_span_entry *curr = &mlxsw_sp->span.entries[i];
+
+ WARN_ON_ONCE(!list_empty(&curr->bound_ports_list));
+ }
+ kfree(mlxsw_sp->span.entries);
+}
+
+static int
+mlxsw_sp_span_entry_phys_parms(const struct net_device *to_dev,
+ struct mlxsw_sp_span_parms *sparmsp)
+{
+ sparmsp->dest_port = netdev_priv(to_dev);
+ return 0;
+}
+
+static int
+mlxsw_sp_span_entry_phys_configure(struct mlxsw_sp_span_entry *span_entry,
+ struct mlxsw_sp_span_parms sparms)
+{
+ struct mlxsw_sp_port *dest_port = sparms.dest_port;
+ struct mlxsw_sp *mlxsw_sp = dest_port->mlxsw_sp;
+ u8 local_port = dest_port->local_port;
+ char mpat_pl[MLXSW_REG_MPAT_LEN];
+ int pa_id = span_entry->id;
+
+ /* Create a new port analayzer entry for local_port. */
+ mlxsw_reg_mpat_pack(mpat_pl, pa_id, local_port, true,
+ MLXSW_REG_MPAT_SPAN_TYPE_LOCAL_ETH);
+
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpat), mpat_pl);
+}
+
+static void
+mlxsw_sp_span_entry_deconfigure_common(struct mlxsw_sp_span_entry *span_entry,
+ enum mlxsw_reg_mpat_span_type span_type)
+{
+ struct mlxsw_sp_port *dest_port = span_entry->parms.dest_port;
+ struct mlxsw_sp *mlxsw_sp = dest_port->mlxsw_sp;
+ u8 local_port = dest_port->local_port;
+ char mpat_pl[MLXSW_REG_MPAT_LEN];
+ int pa_id = span_entry->id;
+
+ mlxsw_reg_mpat_pack(mpat_pl, pa_id, local_port, false, span_type);
+ mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpat), mpat_pl);
+}
+
+static void
+mlxsw_sp_span_entry_phys_deconfigure(struct mlxsw_sp_span_entry *span_entry)
+{
+ mlxsw_sp_span_entry_deconfigure_common(span_entry,
+ MLXSW_REG_MPAT_SPAN_TYPE_LOCAL_ETH);
+}
+
+static const
+struct mlxsw_sp_span_entry_ops mlxsw_sp_span_entry_ops_phys = {
+ .can_handle = mlxsw_sp_port_dev_check,
+ .parms = mlxsw_sp_span_entry_phys_parms,
+ .configure = mlxsw_sp_span_entry_phys_configure,
+ .deconfigure = mlxsw_sp_span_entry_phys_deconfigure,
+};
+
+static struct net_device *
+mlxsw_sp_span_gretap4_route(const struct net_device *to_dev,
+ __be32 *saddrp, __be32 *daddrp)
+{
+ struct ip_tunnel *tun = netdev_priv(to_dev);
+ struct net_device *dev = NULL;
+ struct ip_tunnel_parm parms;
+ struct rtable *rt = NULL;
+ struct flowi4 fl4;
+
+ /* We assume "dev" stays valid after rt is put. */
+ ASSERT_RTNL();
+
+ parms = mlxsw_sp_ipip_netdev_parms4(to_dev);
+ ip_tunnel_init_flow(&fl4, parms.iph.protocol, *daddrp, *saddrp,
+ 0, 0, parms.link, tun->fwmark);
+
+ rt = ip_route_output_key(tun->net, &fl4);
+ if (IS_ERR(rt))
+ return NULL;
+
+ if (rt->rt_type != RTN_UNICAST)
+ goto out;
+
+ dev = rt->dst.dev;
+ *saddrp = fl4.saddr;
+ *daddrp = rt->rt_gateway;
+
+out:
+ ip_rt_put(rt);
+ return dev;
+}
+
+static int mlxsw_sp_span_dmac(struct neigh_table *tbl,
+ const void *pkey,
+ struct net_device *l3edev,
+ unsigned char dmac[ETH_ALEN])
+{
+ struct neighbour *neigh = neigh_lookup(tbl, pkey, l3edev);
+ int err = 0;
+
+ if (!neigh) {
+ neigh = neigh_create(tbl, pkey, l3edev);
+ if (IS_ERR(neigh))
+ return PTR_ERR(neigh);
+ }
+
+ neigh_event_send(neigh, NULL);
+
+ read_lock_bh(&neigh->lock);
+ if ((neigh->nud_state & NUD_VALID) && !neigh->dead)
+ memcpy(dmac, neigh->ha, ETH_ALEN);
+ else
+ err = -ENOENT;
+ read_unlock_bh(&neigh->lock);
+
+ neigh_release(neigh);
+ return err;
+}
+
+static int
+mlxsw_sp_span_entry_unoffloadable(struct mlxsw_sp_span_parms *sparmsp)
+{
+ sparmsp->dest_port = NULL;
+ return 0;
+}
+
+static int
+mlxsw_sp_span_entry_tunnel_parms_common(struct net_device *l3edev,
+ union mlxsw_sp_l3addr saddr,
+ union mlxsw_sp_l3addr daddr,
+ union mlxsw_sp_l3addr gw,
+ __u8 ttl,
+ struct neigh_table *tbl,
+ struct mlxsw_sp_span_parms *sparmsp)
+{
+ unsigned char dmac[ETH_ALEN];
+
+ if (mlxsw_sp_l3addr_is_zero(gw))
+ gw = daddr;
+
+ if (!l3edev || !mlxsw_sp_port_dev_check(l3edev) ||
+ mlxsw_sp_span_dmac(tbl, &gw, l3edev, dmac))
+ return mlxsw_sp_span_entry_unoffloadable(sparmsp);
+
+ sparmsp->dest_port = netdev_priv(l3edev);
+ sparmsp->ttl = ttl;
+ memcpy(sparmsp->dmac, dmac, ETH_ALEN);
+ memcpy(sparmsp->smac, l3edev->dev_addr, ETH_ALEN);
+ sparmsp->saddr = saddr;
+ sparmsp->daddr = daddr;
+ return 0;
+}
+
+static int
+mlxsw_sp_span_entry_gretap4_parms(const struct net_device *to_dev,
+ struct mlxsw_sp_span_parms *sparmsp)
+{
+ struct ip_tunnel_parm tparm = mlxsw_sp_ipip_netdev_parms4(to_dev);
+ union mlxsw_sp_l3addr saddr = { .addr4 = tparm.iph.saddr };
+ union mlxsw_sp_l3addr daddr = { .addr4 = tparm.iph.daddr };
+ bool inherit_tos = tparm.iph.tos & 0x1;
+ bool inherit_ttl = !tparm.iph.ttl;
+ union mlxsw_sp_l3addr gw = daddr;
+ struct net_device *l3edev;
+
+ if (!(to_dev->flags & IFF_UP) ||
+ /* Reject tunnels with GRE keys, checksums, etc. */
+ tparm.i_flags || tparm.o_flags ||
+ /* Require a fixed TTL and a TOS copied from the mirrored packet. */
+ inherit_ttl || !inherit_tos ||
+ /* A destination address may not be "any". */
+ mlxsw_sp_l3addr_is_zero(daddr))
+ return mlxsw_sp_span_entry_unoffloadable(sparmsp);
+
+ l3edev = mlxsw_sp_span_gretap4_route(to_dev, &saddr.addr4, &gw.addr4);
+ return mlxsw_sp_span_entry_tunnel_parms_common(l3edev, saddr, daddr, gw,
+ tparm.iph.ttl,
+ &arp_tbl, sparmsp);
+}
+
+static int
+mlxsw_sp_span_entry_gretap4_configure(struct mlxsw_sp_span_entry *span_entry,
+ struct mlxsw_sp_span_parms sparms)
+{
+ struct mlxsw_sp_port *dest_port = sparms.dest_port;
+ struct mlxsw_sp *mlxsw_sp = dest_port->mlxsw_sp;
+ u8 local_port = dest_port->local_port;
+ char mpat_pl[MLXSW_REG_MPAT_LEN];
+ int pa_id = span_entry->id;
+
+ /* Create a new port analayzer entry for local_port. */
+ mlxsw_reg_mpat_pack(mpat_pl, pa_id, local_port, true,
+ MLXSW_REG_MPAT_SPAN_TYPE_REMOTE_ETH_L3);
+ mlxsw_reg_mpat_eth_rspan_l2_pack(mpat_pl,
+ MLXSW_REG_MPAT_ETH_RSPAN_VERSION_NO_HEADER,
+ sparms.dmac, false);
+ mlxsw_reg_mpat_eth_rspan_l3_ipv4_pack(mpat_pl,
+ sparms.ttl, sparms.smac,
+ be32_to_cpu(sparms.saddr.addr4),
+ be32_to_cpu(sparms.daddr.addr4));
+
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpat), mpat_pl);
+}
+
+static void
+mlxsw_sp_span_entry_gretap4_deconfigure(struct mlxsw_sp_span_entry *span_entry)
+{
+ mlxsw_sp_span_entry_deconfigure_common(span_entry,
+ MLXSW_REG_MPAT_SPAN_TYPE_REMOTE_ETH_L3);
+}
+
+static const struct mlxsw_sp_span_entry_ops mlxsw_sp_span_entry_ops_gretap4 = {
+ .can_handle = is_gretap_dev,
+ .parms = mlxsw_sp_span_entry_gretap4_parms,
+ .configure = mlxsw_sp_span_entry_gretap4_configure,
+ .deconfigure = mlxsw_sp_span_entry_gretap4_deconfigure,
+};
+
+static struct net_device *
+mlxsw_sp_span_gretap6_route(const struct net_device *to_dev,
+ struct in6_addr *saddrp,
+ struct in6_addr *daddrp)
+{
+ struct ip6_tnl *t = netdev_priv(to_dev);
+ struct flowi6 fl6 = t->fl.u.ip6;
+ struct net_device *dev = NULL;
+ struct dst_entry *dst;
+ struct rt6_info *rt6;
+
+ /* We assume "dev" stays valid after dst is released. */
+ ASSERT_RTNL();
+
+ fl6.flowi6_mark = t->parms.fwmark;
+ if (!ip6_tnl_xmit_ctl(t, &fl6.saddr, &fl6.daddr))
+ return NULL;
+
+ dst = ip6_route_output(t->net, NULL, &fl6);
+ if (!dst || dst->error)
+ goto out;
+
+ rt6 = container_of(dst, struct rt6_info, dst);
+
+ dev = dst->dev;
+ *saddrp = fl6.saddr;
+ *daddrp = rt6->rt6i_gateway;
+
+out:
+ dst_release(dst);
+ return dev;
+}
+
+static int
+mlxsw_sp_span_entry_gretap6_parms(const struct net_device *to_dev,
+ struct mlxsw_sp_span_parms *sparmsp)
+{
+ struct __ip6_tnl_parm tparm = mlxsw_sp_ipip_netdev_parms6(to_dev);
+ bool inherit_tos = tparm.flags & IP6_TNL_F_USE_ORIG_TCLASS;
+ union mlxsw_sp_l3addr saddr = { .addr6 = tparm.laddr };
+ union mlxsw_sp_l3addr daddr = { .addr6 = tparm.raddr };
+ bool inherit_ttl = !tparm.hop_limit;
+ union mlxsw_sp_l3addr gw = daddr;
+ struct net_device *l3edev;
+
+ if (!(to_dev->flags & IFF_UP) ||
+ /* Reject tunnels with GRE keys, checksums, etc. */
+ tparm.i_flags || tparm.o_flags ||
+ /* Require a fixed TTL and a TOS copied from the mirrored packet. */
+ inherit_ttl || !inherit_tos ||
+ /* A destination address may not be "any". */
+ mlxsw_sp_l3addr_is_zero(daddr))
+ return mlxsw_sp_span_entry_unoffloadable(sparmsp);
+
+ l3edev = mlxsw_sp_span_gretap6_route(to_dev, &saddr.addr6, &gw.addr6);
+ return mlxsw_sp_span_entry_tunnel_parms_common(l3edev, saddr, daddr, gw,
+ tparm.hop_limit,
+ &nd_tbl, sparmsp);
+}
+
+static int
+mlxsw_sp_span_entry_gretap6_configure(struct mlxsw_sp_span_entry *span_entry,
+ struct mlxsw_sp_span_parms sparms)
+{
+ struct mlxsw_sp_port *dest_port = sparms.dest_port;
+ struct mlxsw_sp *mlxsw_sp = dest_port->mlxsw_sp;
+ u8 local_port = dest_port->local_port;
+ char mpat_pl[MLXSW_REG_MPAT_LEN];
+ int pa_id = span_entry->id;
+
+ /* Create a new port analayzer entry for local_port. */
+ mlxsw_reg_mpat_pack(mpat_pl, pa_id, local_port, true,
+ MLXSW_REG_MPAT_SPAN_TYPE_REMOTE_ETH_L3);
+ mlxsw_reg_mpat_eth_rspan_l2_pack(mpat_pl,
+ MLXSW_REG_MPAT_ETH_RSPAN_VERSION_NO_HEADER,
+ sparms.dmac, false);
+ mlxsw_reg_mpat_eth_rspan_l3_ipv6_pack(mpat_pl, sparms.ttl, sparms.smac,
+ sparms.saddr.addr6,
+ sparms.daddr.addr6);
+
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpat), mpat_pl);
+}
+
+static void
+mlxsw_sp_span_entry_gretap6_deconfigure(struct mlxsw_sp_span_entry *span_entry)
+{
+ mlxsw_sp_span_entry_deconfigure_common(span_entry,
+ MLXSW_REG_MPAT_SPAN_TYPE_REMOTE_ETH_L3);
+}
+
+static const
+struct mlxsw_sp_span_entry_ops mlxsw_sp_span_entry_ops_gretap6 = {
+ .can_handle = is_ip6gretap_dev,
+ .parms = mlxsw_sp_span_entry_gretap6_parms,
+ .configure = mlxsw_sp_span_entry_gretap6_configure,
+ .deconfigure = mlxsw_sp_span_entry_gretap6_deconfigure,
+};
+
+static const
+struct mlxsw_sp_span_entry_ops *const mlxsw_sp_span_entry_types[] = {
+ &mlxsw_sp_span_entry_ops_phys,
+ &mlxsw_sp_span_entry_ops_gretap4,
+ &mlxsw_sp_span_entry_ops_gretap6,
+};
+
+static int
+mlxsw_sp_span_entry_nop_parms(const struct net_device *to_dev,
+ struct mlxsw_sp_span_parms *sparmsp)
+{
+ return mlxsw_sp_span_entry_unoffloadable(sparmsp);
+}
+
+static int
+mlxsw_sp_span_entry_nop_configure(struct mlxsw_sp_span_entry *span_entry,
+ struct mlxsw_sp_span_parms sparms)
+{
+ return 0;
+}
+
+static void
+mlxsw_sp_span_entry_nop_deconfigure(struct mlxsw_sp_span_entry *span_entry)
+{
+}
+
+static const struct mlxsw_sp_span_entry_ops mlxsw_sp_span_entry_ops_nop = {
+ .parms = mlxsw_sp_span_entry_nop_parms,
+ .configure = mlxsw_sp_span_entry_nop_configure,
+ .deconfigure = mlxsw_sp_span_entry_nop_deconfigure,
+};
+
+static void
+mlxsw_sp_span_entry_configure(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_span_entry *span_entry,
+ struct mlxsw_sp_span_parms sparms)
+{
+ if (sparms.dest_port) {
+ if (sparms.dest_port->mlxsw_sp != mlxsw_sp) {
+ netdev_err(span_entry->to_dev, "Cannot mirror to %s, which belongs to a different mlxsw instance",
+ sparms.dest_port->dev->name);
+ sparms.dest_port = NULL;
+ } else if (span_entry->ops->configure(span_entry, sparms)) {
+ netdev_err(span_entry->to_dev, "Failed to offload mirror to %s",
+ sparms.dest_port->dev->name);
+ sparms.dest_port = NULL;
+ }
+ }
+
+ span_entry->parms = sparms;
+}
+
+static void
+mlxsw_sp_span_entry_deconfigure(struct mlxsw_sp_span_entry *span_entry)
+{
+ if (span_entry->parms.dest_port)
+ span_entry->ops->deconfigure(span_entry);
+}
+
+static struct mlxsw_sp_span_entry *
+mlxsw_sp_span_entry_create(struct mlxsw_sp *mlxsw_sp,
+ const struct net_device *to_dev,
+ const struct mlxsw_sp_span_entry_ops *ops,
+ struct mlxsw_sp_span_parms sparms)
+{
+ struct mlxsw_sp_span_entry *span_entry = NULL;
+ int i;
+
+ /* find a free entry to use */
+ for (i = 0; i < mlxsw_sp->span.entries_count; i++) {
+ if (!mlxsw_sp->span.entries[i].ref_count) {
+ span_entry = &mlxsw_sp->span.entries[i];
+ break;
+ }
+ }
+ if (!span_entry)
+ return NULL;
+
+ span_entry->ops = ops;
+ span_entry->ref_count = 1;
+ span_entry->to_dev = to_dev;
+ mlxsw_sp_span_entry_configure(mlxsw_sp, span_entry, sparms);
+
+ return span_entry;
+}
+
+static void mlxsw_sp_span_entry_destroy(struct mlxsw_sp_span_entry *span_entry)
+{
+ mlxsw_sp_span_entry_deconfigure(span_entry);
+}
+
+struct mlxsw_sp_span_entry *
+mlxsw_sp_span_entry_find_by_port(struct mlxsw_sp *mlxsw_sp,
+ const struct net_device *to_dev)
+{
+ int i;
+
+ for (i = 0; i < mlxsw_sp->span.entries_count; i++) {
+ struct mlxsw_sp_span_entry *curr = &mlxsw_sp->span.entries[i];
+
+ if (curr->ref_count && curr->to_dev == to_dev)
+ return curr;
+ }
+ return NULL;
+}
+
+void mlxsw_sp_span_entry_invalidate(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_span_entry *span_entry)
+{
+ mlxsw_sp_span_entry_deconfigure(span_entry);
+ span_entry->ops = &mlxsw_sp_span_entry_ops_nop;
+}
+
+static struct mlxsw_sp_span_entry *
+mlxsw_sp_span_entry_find_by_id(struct mlxsw_sp *mlxsw_sp, int span_id)
+{
+ int i;
+
+ for (i = 0; i < mlxsw_sp->span.entries_count; i++) {
+ struct mlxsw_sp_span_entry *curr = &mlxsw_sp->span.entries[i];
+
+ if (curr->ref_count && curr->id == span_id)
+ return curr;
+ }
+ return NULL;
+}
+
+static struct mlxsw_sp_span_entry *
+mlxsw_sp_span_entry_get(struct mlxsw_sp *mlxsw_sp,
+ const struct net_device *to_dev,
+ const struct mlxsw_sp_span_entry_ops *ops,
+ struct mlxsw_sp_span_parms sparms)
+{
+ struct mlxsw_sp_span_entry *span_entry;
+
+ span_entry = mlxsw_sp_span_entry_find_by_port(mlxsw_sp, to_dev);
+ if (span_entry) {
+ /* Already exists, just take a reference */
+ span_entry->ref_count++;
+ return span_entry;
+ }
+
+ return mlxsw_sp_span_entry_create(mlxsw_sp, to_dev, ops, sparms);
+}
+
+static int mlxsw_sp_span_entry_put(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_span_entry *span_entry)
+{
+ WARN_ON(!span_entry->ref_count);
+ if (--span_entry->ref_count == 0)
+ mlxsw_sp_span_entry_destroy(span_entry);
+ return 0;
+}
+
+static bool mlxsw_sp_span_is_egress_mirror(struct mlxsw_sp_port *port)
+{
+ struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp;
+ struct mlxsw_sp_span_inspected_port *p;
+ int i;
+
+ for (i = 0; i < mlxsw_sp->span.entries_count; i++) {
+ struct mlxsw_sp_span_entry *curr = &mlxsw_sp->span.entries[i];
+
+ list_for_each_entry(p, &curr->bound_ports_list, list)
+ if (p->local_port == port->local_port &&
+ p->type == MLXSW_SP_SPAN_EGRESS)
+ return true;
+ }
+
+ return false;
+}
+
+static int mlxsw_sp_span_mtu_to_buffsize(const struct mlxsw_sp *mlxsw_sp,
+ int mtu)
+{
+ return mlxsw_sp_bytes_cells(mlxsw_sp, mtu * 5 / 2) + 1;
+}
+
+int mlxsw_sp_span_port_mtu_update(struct mlxsw_sp_port *port, u16 mtu)
+{
+ struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp;
+ char sbib_pl[MLXSW_REG_SBIB_LEN];
+ int err;
+
+ /* If port is egress mirrored, the shared buffer size should be
+ * updated according to the mtu value
+ */
+ if (mlxsw_sp_span_is_egress_mirror(port)) {
+ u32 buffsize = mlxsw_sp_span_mtu_to_buffsize(mlxsw_sp, mtu);
+
+ mlxsw_reg_sbib_pack(sbib_pl, port->local_port, buffsize);
+ err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbib), sbib_pl);
+ if (err) {
+ netdev_err(port->dev, "Could not update shared buffer for mirroring\n");
+ return err;
+ }
+ }
+
+ return 0;
+}
+
+static struct mlxsw_sp_span_inspected_port *
+mlxsw_sp_span_entry_bound_port_find(struct mlxsw_sp_port *port,
+ struct mlxsw_sp_span_entry *span_entry)
+{
+ struct mlxsw_sp_span_inspected_port *p;
+
+ list_for_each_entry(p, &span_entry->bound_ports_list, list)
+ if (port->local_port == p->local_port)
+ return p;
+ return NULL;
+}
+
+static int
+mlxsw_sp_span_inspected_port_bind(struct mlxsw_sp_port *port,
+ struct mlxsw_sp_span_entry *span_entry,
+ enum mlxsw_sp_span_type type,
+ bool bind)
+{
+ struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp;
+ char mpar_pl[MLXSW_REG_MPAR_LEN];
+ int pa_id = span_entry->id;
+
+ /* bind the port to the SPAN entry */
+ mlxsw_reg_mpar_pack(mpar_pl, port->local_port,
+ (enum mlxsw_reg_mpar_i_e)type, bind, pa_id);
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpar), mpar_pl);
+}
+
+static int
+mlxsw_sp_span_inspected_port_add(struct mlxsw_sp_port *port,
+ struct mlxsw_sp_span_entry *span_entry,
+ enum mlxsw_sp_span_type type,
+ bool bind)
+{
+ struct mlxsw_sp_span_inspected_port *inspected_port;
+ struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp;
+ char sbib_pl[MLXSW_REG_SBIB_LEN];
+ int err;
+
+ /* if it is an egress SPAN, bind a shared buffer to it */
+ if (type == MLXSW_SP_SPAN_EGRESS) {
+ u32 buffsize = mlxsw_sp_span_mtu_to_buffsize(mlxsw_sp,
+ port->dev->mtu);
+
+ mlxsw_reg_sbib_pack(sbib_pl, port->local_port, buffsize);
+ err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbib), sbib_pl);
+ if (err) {
+ netdev_err(port->dev, "Could not create shared buffer for mirroring\n");
+ return err;
+ }
+ }
+
+ if (bind) {
+ err = mlxsw_sp_span_inspected_port_bind(port, span_entry, type,
+ true);
+ if (err)
+ goto err_port_bind;
+ }
+
+ inspected_port = kzalloc(sizeof(*inspected_port), GFP_KERNEL);
+ if (!inspected_port) {
+ err = -ENOMEM;
+ goto err_inspected_port_alloc;
+ }
+ inspected_port->local_port = port->local_port;
+ inspected_port->type = type;
+ list_add_tail(&inspected_port->list, &span_entry->bound_ports_list);
+
+ return 0;
+
+err_inspected_port_alloc:
+ if (bind)
+ mlxsw_sp_span_inspected_port_bind(port, span_entry, type,
+ false);
+err_port_bind:
+ if (type == MLXSW_SP_SPAN_EGRESS) {
+ mlxsw_reg_sbib_pack(sbib_pl, port->local_port, 0);
+ mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbib), sbib_pl);
+ }
+ return err;
+}
+
+static void
+mlxsw_sp_span_inspected_port_del(struct mlxsw_sp_port *port,
+ struct mlxsw_sp_span_entry *span_entry,
+ enum mlxsw_sp_span_type type,
+ bool bind)
+{
+ struct mlxsw_sp_span_inspected_port *inspected_port;
+ struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp;
+ char sbib_pl[MLXSW_REG_SBIB_LEN];
+
+ inspected_port = mlxsw_sp_span_entry_bound_port_find(port, span_entry);
+ if (!inspected_port)
+ return;
+
+ if (bind)
+ mlxsw_sp_span_inspected_port_bind(port, span_entry, type,
+ false);
+ /* remove the SBIB buffer if it was egress SPAN */
+ if (type == MLXSW_SP_SPAN_EGRESS) {
+ mlxsw_reg_sbib_pack(sbib_pl, port->local_port, 0);
+ mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbib), sbib_pl);
+ }
+
+ mlxsw_sp_span_entry_put(mlxsw_sp, span_entry);
+
+ list_del(&inspected_port->list);
+ kfree(inspected_port);
+}
+
+static const struct mlxsw_sp_span_entry_ops *
+mlxsw_sp_span_entry_ops(struct mlxsw_sp *mlxsw_sp,
+ const struct net_device *to_dev)
+{
+ size_t i;
+
+ for (i = 0; i < ARRAY_SIZE(mlxsw_sp_span_entry_types); ++i)
+ if (mlxsw_sp_span_entry_types[i]->can_handle(to_dev))
+ return mlxsw_sp_span_entry_types[i];
+
+ return NULL;
+}
+
+int mlxsw_sp_span_mirror_add(struct mlxsw_sp_port *from,
+ const struct net_device *to_dev,
+ enum mlxsw_sp_span_type type, bool bind,
+ int *p_span_id)
+{
+ struct mlxsw_sp *mlxsw_sp = from->mlxsw_sp;
+ const struct mlxsw_sp_span_entry_ops *ops;
+ struct mlxsw_sp_span_parms sparms = {0};
+ struct mlxsw_sp_span_entry *span_entry;
+ int err;
+
+ ops = mlxsw_sp_span_entry_ops(mlxsw_sp, to_dev);
+ if (!ops) {
+ netdev_err(to_dev, "Cannot mirror to %s", to_dev->name);
+ return -EOPNOTSUPP;
+ }
+
+ err = ops->parms(to_dev, &sparms);
+ if (err)
+ return err;
+
+ span_entry = mlxsw_sp_span_entry_get(mlxsw_sp, to_dev, ops, sparms);
+ if (!span_entry)
+ return -ENOENT;
+
+ netdev_dbg(from->dev, "Adding inspected port to SPAN entry %d\n",
+ span_entry->id);
+
+ err = mlxsw_sp_span_inspected_port_add(from, span_entry, type, bind);
+ if (err)
+ goto err_port_bind;
+
+ *p_span_id = span_entry->id;
+ return 0;
+
+err_port_bind:
+ mlxsw_sp_span_entry_put(mlxsw_sp, span_entry);
+ return err;
+}
+
+void mlxsw_sp_span_mirror_del(struct mlxsw_sp_port *from, int span_id,
+ enum mlxsw_sp_span_type type, bool bind)
+{
+ struct mlxsw_sp_span_entry *span_entry;
+
+ span_entry = mlxsw_sp_span_entry_find_by_id(from->mlxsw_sp, span_id);
+ if (!span_entry) {
+ netdev_err(from->dev, "no span entry found\n");
+ return;
+ }
+
+ netdev_dbg(from->dev, "removing inspected port from SPAN entry %d\n",
+ span_entry->id);
+ mlxsw_sp_span_inspected_port_del(from, span_entry, type, bind);
+}
+
+void mlxsw_sp_span_respin(struct mlxsw_sp *mlxsw_sp)
+{
+ int i;
+ int err;
+
+ ASSERT_RTNL();
+ for (i = 0; i < mlxsw_sp->span.entries_count; i++) {
+ struct mlxsw_sp_span_entry *curr = &mlxsw_sp->span.entries[i];
+ struct mlxsw_sp_span_parms sparms = {0};
+
+ if (!curr->ref_count)
+ continue;
+
+ err = curr->ops->parms(curr->to_dev, &sparms);
+ if (err)
+ continue;
+
+ if (memcmp(&sparms, &curr->parms, sizeof(sparms))) {
+ mlxsw_sp_span_entry_deconfigure(curr);
+ mlxsw_sp_span_entry_configure(mlxsw_sp, curr, sparms);
+ }
+ }
+}
--- /dev/null
+/*
+ * drivers/net/ethernet/mellanox/mlxsw/mlxsw_span.h
+ * Copyright (c) 2018 Mellanox Technologies. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _MLXSW_SPECTRUM_SPAN_H
+#define _MLXSW_SPECTRUM_SPAN_H
+
+#include <linux/types.h>
+#include <linux/if_ether.h>
+
+#include "spectrum_router.h"
+
+struct mlxsw_sp;
+struct mlxsw_sp_port;
+
+enum mlxsw_sp_span_type {
+ MLXSW_SP_SPAN_EGRESS,
+ MLXSW_SP_SPAN_INGRESS
+};
+
+struct mlxsw_sp_span_inspected_port {
+ struct list_head list;
+ enum mlxsw_sp_span_type type;
+ u8 local_port;
+};
+
+struct mlxsw_sp_span_parms {
+ struct mlxsw_sp_port *dest_port; /* NULL for unoffloaded SPAN. */
+ unsigned int ttl;
+ unsigned char dmac[ETH_ALEN];
+ unsigned char smac[ETH_ALEN];
+ union mlxsw_sp_l3addr daddr;
+ union mlxsw_sp_l3addr saddr;
+};
+
+struct mlxsw_sp_span_entry_ops;
+
+struct mlxsw_sp_span_entry {
+ const struct net_device *to_dev;
+ const struct mlxsw_sp_span_entry_ops *ops;
+ struct mlxsw_sp_span_parms parms;
+ struct list_head bound_ports_list;
+ int ref_count;
+ int id;
+};
+
+struct mlxsw_sp_span_entry_ops {
+ bool (*can_handle)(const struct net_device *to_dev);
+ int (*parms)(const struct net_device *to_dev,
+ struct mlxsw_sp_span_parms *sparmsp);
+ int (*configure)(struct mlxsw_sp_span_entry *span_entry,
+ struct mlxsw_sp_span_parms sparms);
+ void (*deconfigure)(struct mlxsw_sp_span_entry *span_entry);
+};
+
+int mlxsw_sp_span_init(struct mlxsw_sp *mlxsw_sp);
+void mlxsw_sp_span_fini(struct mlxsw_sp *mlxsw_sp);
+void mlxsw_sp_span_respin(struct mlxsw_sp *mlxsw_sp);
+
+int mlxsw_sp_span_mirror_add(struct mlxsw_sp_port *from,
+ const struct net_device *to_dev,
+ enum mlxsw_sp_span_type type,
+ bool bind, int *p_span_id);
+void mlxsw_sp_span_mirror_del(struct mlxsw_sp_port *from, int span_id,
+ enum mlxsw_sp_span_type type, bool bind);
+struct mlxsw_sp_span_entry *
+mlxsw_sp_span_entry_find_by_port(struct mlxsw_sp *mlxsw_sp,
+ const struct net_device *to_dev);
+
+void mlxsw_sp_span_entry_invalidate(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_span_entry *span_entry);
+
+int mlxsw_sp_span_port_mtu_update(struct mlxsw_sp_port *port, u16 mtu);
+
+#endif
struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan;
if (is_vlan_dev(bridge_port->dev)) {
- NL_SET_ERR_MSG(extack, "spectrum: Can not enslave a VLAN device to a VLAN-aware bridge");
+ NL_SET_ERR_MSG_MOD(extack, "Can not enslave a VLAN device to a VLAN-aware bridge");
return -EINVAL;
}
struct netlink_ext_ack *extack)
{
struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan;
+ struct net_device *dev = bridge_port->dev;
u16 vid;
- if (!is_vlan_dev(bridge_port->dev)) {
- NL_SET_ERR_MSG(extack, "spectrum: Only VLAN devices can be enslaved to a VLAN-unaware bridge");
- return -EINVAL;
- }
- vid = vlan_dev_vlan_id(bridge_port->dev);
-
+ vid = is_vlan_dev(dev) ? vlan_dev_vlan_id(dev) : 1;
mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_vid(mlxsw_sp_port, vid);
if (WARN_ON(!mlxsw_sp_port_vlan))
return -EINVAL;
if (mlxsw_sp_port_is_br_member(mlxsw_sp_port, bridge_device->dev)) {
- NL_SET_ERR_MSG(extack, "spectrum: Can not bridge VLAN uppers of the same port");
+ NL_SET_ERR_MSG_MOD(extack, "Can not bridge VLAN uppers of the same port");
return -EINVAL;
}
struct mlxsw_sp_port *mlxsw_sp_port)
{
struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan;
- u16 vid = vlan_dev_vlan_id(bridge_port->dev);
+ struct net_device *dev = bridge_port->dev;
+ u16 vid;
+ vid = is_vlan_dev(dev) ? vlan_dev_vlan_id(dev) : 1;
mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_vid(mlxsw_sp_port, vid);
if (WARN_ON(!mlxsw_sp_port_vlan))
return;
*((volatile unsigned int *)dev->base_addr+(reg)) = (val); \
} while (0)
-
-/* use 0 for production, 1 for verification, >1 for debug */
-#ifdef SONIC_DEBUG
-static unsigned int sonic_debug = SONIC_DEBUG;
-#else
-static unsigned int sonic_debug = 1;
-#endif
-
/*
* We cannot use station (ethernet) address prefixes to detect the
* sonic controller since these are board manufacturer depended.
static int sonic_probe1(struct net_device *dev)
{
- static unsigned version_printed;
unsigned int silicon_revision;
unsigned int val;
struct sonic_local *lp = netdev_priv(dev);
* the expected location.
*/
silicon_revision = SONIC_READ(SONIC_SR);
- if (sonic_debug > 1)
- printk("SONIC Silicon Revision = 0x%04x\n",silicon_revision);
-
i = 0;
while (known_revisions[i] != 0xffff &&
known_revisions[i] != silicon_revision)
i++;
if (known_revisions[i] == 0xffff) {
- printk("SONIC ethernet controller not found (0x%4x)\n",
- silicon_revision);
+ pr_info("SONIC ethernet controller not found (0x%4x)\n",
+ silicon_revision);
goto out;
}
- if (sonic_debug && version_printed++ == 0)
- printk(version);
-
- printk(KERN_INFO "%s: Sonic ethernet found at 0x%08lx, ",
- dev_name(lp->device), dev->base_addr);
-
/*
* Put the sonic into software reset, then
* retrieve and print the ethernet address.
err = sonic_probe1(dev);
if (err)
goto out;
+
+ pr_info("SONIC ethernet @%08lx, MAC %pM, IRQ %d\n",
+ dev->base_addr, dev->dev_addr, dev->irq);
+
+ sonic_msg_init(dev);
+
err = register_netdev(dev);
if (err)
goto out1;
- printk("%s: MAC %pM IRQ %d\n", dev->name, dev->dev_addr, dev->irq);
-
return 0;
out1:
}
MODULE_DESCRIPTION("Jazz SONIC ethernet driver");
-module_param(sonic_debug, int, 0);
-MODULE_PARM_DESC(sonic_debug, "jazzsonic debug level (1-4)");
MODULE_ALIAS("platform:jazzsonic");
#include "sonic.c"
#include <asm/macints.h>
#include <asm/mac_via.h>
-static char mac_sonic_string[] = "macsonic";
-
#include "sonic.h"
/* These should basically be bus-size and endian independent (since
#define SONIC_WRITE(reg,val) (nubus_writew(val, dev->base_addr + (reg * 4) \
+ lp->reg_offset))
-/* use 0 for production, 1 for verification, >1 for debug */
-#ifdef SONIC_DEBUG
-static unsigned int sonic_debug = SONIC_DEBUG;
-#else
-static unsigned int sonic_debug = 1;
-#endif
-
-static int sonic_version_printed;
-
/* For onboard SONIC */
#define ONBOARD_SONIC_REGISTERS 0x50F0A000
#define ONBOARD_SONIC_PROM_BASE 0x50f08000
int sr;
bool commslot = macintosh_config->expansion_type == MAC_EXP_PDS_COMM;
- if (!MACH_IS_MAC)
- return -ENODEV;
-
- printk(KERN_INFO "Checking for internal Macintosh ethernet (SONIC).. ");
-
/* Bogus probing, on the models which may or may not have
Ethernet (BTW, the Ethernet *is* always at the same
address, and nothing else lives there, at least if Apple's
card_present = hwreg_present((void*)ONBOARD_SONIC_REGISTERS);
if (!card_present) {
- printk("none.\n");
+ pr_info("Onboard/comm-slot SONIC not found\n");
return -ENODEV;
}
}
- printk("yes\n");
-
/* Danger! My arms are flailing wildly! You *must* set lp->reg_offset
* and dev->base_addr before using SONIC_READ() or SONIC_WRITE() */
dev->base_addr = ONBOARD_SONIC_REGISTERS;
else
dev->irq = IRQ_NUBUS_9;
- if (!sonic_version_printed) {
- printk(KERN_INFO "%s", version);
- sonic_version_printed = 1;
- }
- printk(KERN_INFO "%s: onboard / comm-slot SONIC at 0x%08lx\n",
- dev_name(lp->device), dev->base_addr);
-
/* The PowerBook's SONIC is 16 bit always. */
if (macintosh_config->ident == MAC_MODEL_PB520) {
lp->reg_offset = 0;
lp->dma_bitmode = SONIC_BITMODE16;
- sr = SONIC_READ(SONIC_SR);
} else if (commslot) {
/* Some of the comm-slot cards are 16 bit. But some
of them are not. The 32-bit cards use offset 2 and
else {
lp->dma_bitmode = SONIC_BITMODE16;
lp->reg_offset = 0;
- sr = SONIC_READ(SONIC_SR);
}
} else {
/* All onboard cards are at offset 2 with 32 bit DMA. */
lp->reg_offset = 2;
lp->dma_bitmode = SONIC_BITMODE32;
- sr = SONIC_READ(SONIC_SR);
}
- printk(KERN_INFO
- "%s: revision 0x%04x, using %d bit DMA and register offset %d\n",
- dev_name(lp->device), sr, lp->dma_bitmode?32:16, lp->reg_offset);
-#if 0 /* This is sometimes useful to find out how MacOS configured the card. */
- printk(KERN_INFO "%s: DCR: 0x%04x, DCR2: 0x%04x\n", dev_name(lp->device),
- SONIC_READ(SONIC_DCR) & 0xffff, SONIC_READ(SONIC_DCR2) & 0xffff);
-#endif
+ pr_info("Onboard/comm-slot SONIC, revision 0x%04x, %d bit DMA, register offset %d\n",
+ SONIC_READ(SONIC_SR), lp->dma_bitmode ? 32 : 16,
+ lp->reg_offset);
+
+ /* This is sometimes useful to find out how MacOS configured the card */
+ pr_debug("%s: DCR=0x%04x, DCR2=0x%04x\n", __func__,
+ SONIC_READ(SONIC_DCR) & 0xffff,
+ SONIC_READ(SONIC_DCR2) & 0xffff);
/* Software reset, then initialize control registers. */
SONIC_WRITE(SONIC_CMD, SONIC_CR_RST);
/* Now look for the MAC address. */
mac_onboard_sonic_ethernet_addr(dev);
+ pr_info("SONIC ethernet @%08lx, MAC %pM, IRQ %d\n",
+ dev->base_addr, dev->dev_addr, dev->irq);
+
/* Shared init code */
return macsonic_init(dev);
}
-static int mac_nubus_sonic_ethernet_addr(struct net_device *dev,
+static int mac_sonic_nubus_ethernet_addr(struct net_device *dev,
unsigned long prom_addr, int id)
{
int i;
return -1;
}
-static int mac_nubus_sonic_probe(struct net_device *dev)
+static int mac_sonic_nubus_probe_board(struct nubus_board *board, int id,
+ struct net_device *dev)
{
- static int slots;
- struct nubus_rsrc *ndev = NULL;
struct sonic_local* lp = netdev_priv(dev);
unsigned long base_addr, prom_addr;
u16 sonic_dcr;
- int id = -1;
int reg_offset, dma_bitmode;
- /* Find the first SONIC that hasn't been initialized already */
- for_each_func_rsrc(ndev) {
- if (ndev->category != NUBUS_CAT_NETWORK ||
- ndev->type != NUBUS_TYPE_ETHERNET)
- continue;
-
- /* Have we seen it already? */
- if (slots & (1<<ndev->board->slot))
- continue;
- slots |= 1<<ndev->board->slot;
-
- /* Is it one of ours? */
- if ((id = macsonic_ident(ndev)) != -1)
- break;
- }
-
- if (ndev == NULL)
- return -ENODEV;
-
switch (id) {
case MACSONIC_DUODOCK:
- base_addr = ndev->board->slot_addr + DUODOCK_SONIC_REGISTERS;
- prom_addr = ndev->board->slot_addr + DUODOCK_SONIC_PROM_BASE;
+ base_addr = board->slot_addr + DUODOCK_SONIC_REGISTERS;
+ prom_addr = board->slot_addr + DUODOCK_SONIC_PROM_BASE;
sonic_dcr = SONIC_DCR_EXBUS | SONIC_DCR_RFT0 | SONIC_DCR_RFT1 |
SONIC_DCR_TFT0;
reg_offset = 2;
dma_bitmode = SONIC_BITMODE32;
break;
case MACSONIC_APPLE:
- base_addr = ndev->board->slot_addr + APPLE_SONIC_REGISTERS;
- prom_addr = ndev->board->slot_addr + APPLE_SONIC_PROM_BASE;
+ base_addr = board->slot_addr + APPLE_SONIC_REGISTERS;
+ prom_addr = board->slot_addr + APPLE_SONIC_PROM_BASE;
sonic_dcr = SONIC_DCR_BMS | SONIC_DCR_RFT1 | SONIC_DCR_TFT0;
reg_offset = 0;
dma_bitmode = SONIC_BITMODE32;
break;
case MACSONIC_APPLE16:
- base_addr = ndev->board->slot_addr + APPLE_SONIC_REGISTERS;
- prom_addr = ndev->board->slot_addr + APPLE_SONIC_PROM_BASE;
+ base_addr = board->slot_addr + APPLE_SONIC_REGISTERS;
+ prom_addr = board->slot_addr + APPLE_SONIC_PROM_BASE;
sonic_dcr = SONIC_DCR_EXBUS | SONIC_DCR_RFT1 | SONIC_DCR_TFT0 |
SONIC_DCR_PO1 | SONIC_DCR_BMS;
reg_offset = 0;
dma_bitmode = SONIC_BITMODE16;
break;
case MACSONIC_DAYNALINK:
- base_addr = ndev->board->slot_addr + APPLE_SONIC_REGISTERS;
- prom_addr = ndev->board->slot_addr + DAYNALINK_PROM_BASE;
+ base_addr = board->slot_addr + APPLE_SONIC_REGISTERS;
+ prom_addr = board->slot_addr + DAYNALINK_PROM_BASE;
sonic_dcr = SONIC_DCR_RFT1 | SONIC_DCR_TFT0 |
SONIC_DCR_PO1 | SONIC_DCR_BMS;
reg_offset = 0;
dma_bitmode = SONIC_BITMODE16;
break;
case MACSONIC_DAYNA:
- base_addr = ndev->board->slot_addr + DAYNA_SONIC_REGISTERS;
- prom_addr = ndev->board->slot_addr + DAYNA_SONIC_MAC_ADDR;
+ base_addr = board->slot_addr + DAYNA_SONIC_REGISTERS;
+ prom_addr = board->slot_addr + DAYNA_SONIC_MAC_ADDR;
sonic_dcr = SONIC_DCR_BMS |
SONIC_DCR_RFT1 | SONIC_DCR_TFT0 | SONIC_DCR_PO1;
reg_offset = 0;
dev->base_addr = base_addr;
lp->reg_offset = reg_offset;
lp->dma_bitmode = dma_bitmode;
- dev->irq = SLOT2IRQ(ndev->board->slot);
+ dev->irq = SLOT2IRQ(board->slot);
- if (!sonic_version_printed) {
- printk(KERN_INFO "%s", version);
- sonic_version_printed = 1;
- }
- printk(KERN_INFO "%s: %s in slot %X\n",
- dev_name(lp->device), ndev->board->name, ndev->board->slot);
- printk(KERN_INFO "%s: revision 0x%04x, using %d bit DMA and register offset %d\n",
- dev_name(lp->device), SONIC_READ(SONIC_SR), dma_bitmode?32:16, reg_offset);
+ dev_info(&board->dev, "%s, revision 0x%04x, %d bit DMA, register offset %d\n",
+ board->name, SONIC_READ(SONIC_SR),
+ lp->dma_bitmode ? 32 : 16, lp->reg_offset);
-#if 0 /* This is sometimes useful to find out how MacOS configured the card. */
- printk(KERN_INFO "%s: DCR: 0x%04x, DCR2: 0x%04x\n", dev_name(lp->device),
- SONIC_READ(SONIC_DCR) & 0xffff, SONIC_READ(SONIC_DCR2) & 0xffff);
-#endif
+ /* This is sometimes useful to find out how MacOS configured the card */
+ dev_dbg(&board->dev, "%s: DCR=0x%04x, DCR2=0x%04x\n", __func__,
+ SONIC_READ(SONIC_DCR) & 0xffff,
+ SONIC_READ(SONIC_DCR2) & 0xffff);
/* Software reset, then initialize control registers. */
SONIC_WRITE(SONIC_CMD, SONIC_CR_RST);
SONIC_WRITE(SONIC_ISR, 0x7fff);
/* Now look for the MAC address. */
- if (mac_nubus_sonic_ethernet_addr(dev, prom_addr, id) != 0)
+ if (mac_sonic_nubus_ethernet_addr(dev, prom_addr, id) != 0)
return -ENODEV;
+ dev_info(&board->dev, "SONIC ethernet @%08lx, MAC %pM, IRQ %d\n",
+ dev->base_addr, dev->dev_addr, dev->irq);
+
/* Shared init code */
return macsonic_init(dev);
}
-static int mac_sonic_probe(struct platform_device *pdev)
+static int mac_sonic_platform_probe(struct platform_device *pdev)
{
struct net_device *dev;
struct sonic_local *lp;
SET_NETDEV_DEV(dev, &pdev->dev);
platform_set_drvdata(pdev, dev);
- /* This will catch fatal stuff like -ENOMEM as well as success */
err = mac_onboard_sonic_probe(dev);
- if (err == 0)
- goto found;
- if (err != -ENODEV)
- goto out;
- err = mac_nubus_sonic_probe(dev);
if (err)
goto out;
-found:
+
+ sonic_msg_init(dev);
+
err = register_netdev(dev);
if (err)
goto out;
- printk("%s: MAC %pM IRQ %d\n", dev->name, dev->dev_addr, dev->irq);
-
return 0;
out:
}
MODULE_DESCRIPTION("Macintosh SONIC ethernet driver");
-module_param(sonic_debug, int, 0);
-MODULE_PARM_DESC(sonic_debug, "macsonic debug level (1-4)");
MODULE_ALIAS("platform:macsonic");
#include "sonic.c"
-static int mac_sonic_device_remove(struct platform_device *pdev)
+static int mac_sonic_platform_remove(struct platform_device *pdev)
{
struct net_device *dev = platform_get_drvdata(pdev);
struct sonic_local* lp = netdev_priv(dev);
return 0;
}
-static struct platform_driver mac_sonic_driver = {
- .probe = mac_sonic_probe,
- .remove = mac_sonic_device_remove,
- .driver = {
- .name = mac_sonic_string,
+static struct platform_driver mac_sonic_platform_driver = {
+ .probe = mac_sonic_platform_probe,
+ .remove = mac_sonic_platform_remove,
+ .driver = {
+ .name = "macsonic",
+ },
+};
+
+static int mac_sonic_nubus_probe(struct nubus_board *board)
+{
+ struct net_device *ndev;
+ struct sonic_local *lp;
+ struct nubus_rsrc *fres;
+ int id = -1;
+ int err;
+
+ /* The platform driver will handle a PDS or Comm Slot card (even if
+ * it has a pseudoslot declaration ROM).
+ */
+ if (macintosh_config->expansion_type == MAC_EXP_PDS_COMM)
+ return -ENODEV;
+
+ for_each_board_func_rsrc(board, fres) {
+ if (fres->category != NUBUS_CAT_NETWORK ||
+ fres->type != NUBUS_TYPE_ETHERNET)
+ continue;
+
+ id = macsonic_ident(fres);
+ if (id != -1)
+ break;
+ }
+ if (!fres)
+ return -ENODEV;
+
+ ndev = alloc_etherdev(sizeof(struct sonic_local));
+ if (!ndev)
+ return -ENOMEM;
+
+ lp = netdev_priv(ndev);
+ lp->device = &board->dev;
+ SET_NETDEV_DEV(ndev, &board->dev);
+
+ err = mac_sonic_nubus_probe_board(board, id, ndev);
+ if (err)
+ goto out;
+
+ sonic_msg_init(ndev);
+
+ err = register_netdev(ndev);
+ if (err)
+ goto out;
+
+ nubus_set_drvdata(board, ndev);
+
+ return 0;
+
+out:
+ free_netdev(ndev);
+ return err;
+}
+
+static int mac_sonic_nubus_remove(struct nubus_board *board)
+{
+ struct net_device *ndev = nubus_get_drvdata(board);
+ struct sonic_local *lp = netdev_priv(ndev);
+
+ unregister_netdev(ndev);
+ dma_free_coherent(lp->device,
+ SIZEOF_SONIC_DESC * SONIC_BUS_SCALE(lp->dma_bitmode),
+ lp->descriptors, lp->descriptors_laddr);
+ free_netdev(ndev);
+
+ return 0;
+}
+
+static struct nubus_driver mac_sonic_nubus_driver = {
+ .probe = mac_sonic_nubus_probe,
+ .remove = mac_sonic_nubus_remove,
+ .driver = {
+ .name = "macsonic-nubus",
+ .owner = THIS_MODULE,
},
};
-module_platform_driver(mac_sonic_driver);
+static int perr, nerr;
+
+static int __init mac_sonic_init(void)
+{
+ perr = platform_driver_register(&mac_sonic_platform_driver);
+ nerr = nubus_driver_register(&mac_sonic_nubus_driver);
+ return 0;
+}
+module_init(mac_sonic_init);
+
+static void __exit mac_sonic_exit(void)
+{
+ if (!perr)
+ platform_driver_unregister(&mac_sonic_platform_driver);
+ if (!nerr)
+ nubus_driver_unregister(&mac_sonic_nubus_driver);
+}
+module_exit(mac_sonic_exit);
* the NetBSD file "sys/arch/mac68k/dev/if_sn.c".
*/
+static unsigned int version_printed;
+static int sonic_debug = -1;
+module_param(sonic_debug, int, 0);
+MODULE_PARM_DESC(sonic_debug, "debug message level");
+
+static void sonic_msg_init(struct net_device *dev)
+{
+ struct sonic_local *lp = netdev_priv(dev);
+
+ lp->msg_enable = netif_msg_init(sonic_debug, 0);
+
+ if (version_printed++ == 0)
+ netif_dbg(lp, drv, dev, "%s", version);
+}
/*
* Open/initialize the SONIC controller.
struct sonic_local *lp = netdev_priv(dev);
int i;
- if (sonic_debug > 2)
- printk("sonic_open: initializing sonic driver.\n");
+ netif_dbg(lp, ifup, dev, "%s: initializing sonic driver\n", __func__);
for (i = 0; i < SONIC_NUM_RRS; i++) {
struct sk_buff *skb = netdev_alloc_skb(dev, SONIC_RBSIZE + 2);
netif_start_queue(dev);
- if (sonic_debug > 2)
- printk("sonic_open: Initialization done.\n");
+ netif_dbg(lp, ifup, dev, "%s: Initialization done\n", __func__);
return 0;
}
struct sonic_local *lp = netdev_priv(dev);
int i;
- if (sonic_debug > 2)
- printk("sonic_close\n");
+ netif_dbg(lp, ifdown, dev, "%s\n", __func__);
netif_stop_queue(dev);
int length;
int entry = lp->next_tx;
- if (sonic_debug > 2)
- printk("sonic_send_packet: skb=%p, dev=%p\n", skb, dev);
+ netif_dbg(lp, tx_queued, dev, "%s: skb=%p\n", __func__, skb);
length = skb->len;
if (length < ETH_ZLEN) {
lp->next_tx = (entry + 1) & SONIC_TDS_MASK;
if (lp->tx_skb[lp->next_tx] != NULL) {
/* The ring is full, the ISR has yet to process the next TD. */
- if (sonic_debug > 3)
- printk("%s: stopping queue\n", dev->name);
+ netif_dbg(lp, tx_queued, dev, "%s: stopping queue\n", __func__);
netif_stop_queue(dev);
/* after this packet, wait for ISR to free up some TDAs */
} else netif_start_queue(dev);
- if (sonic_debug > 2)
- printk("sonic_send_packet: issuing Tx command\n");
+ netif_dbg(lp, tx_queued, dev, "%s: issuing Tx command\n", __func__);
SONIC_WRITE(SONIC_CMD, SONIC_CR_TXP);
do {
if (status & SONIC_INT_PKTRX) {
- if (sonic_debug > 2)
- printk("%s: packet rx\n", dev->name);
+ netif_dbg(lp, intr, dev, "%s: packet rx\n", __func__);
sonic_rx(dev); /* got packet(s) */
SONIC_WRITE(SONIC_ISR, SONIC_INT_PKTRX); /* clear the interrupt */
}
* still being allocated by sonic_send_packet (status clear & tx_skb[entry] clear)
*/
- if (sonic_debug > 2)
- printk("%s: tx done\n", dev->name);
+ netif_dbg(lp, intr, dev, "%s: tx done\n", __func__);
while (lp->tx_skb[entry] != NULL) {
if ((td_status = sonic_tda_get(dev, entry, SONIC_TD_STATUS)) == 0)
* check error conditions
*/
if (status & SONIC_INT_RFO) {
- if (sonic_debug > 1)
- printk("%s: rx fifo overrun\n", dev->name);
+ netif_dbg(lp, rx_err, dev, "%s: rx fifo overrun\n",
+ __func__);
lp->stats.rx_fifo_errors++;
SONIC_WRITE(SONIC_ISR, SONIC_INT_RFO); /* clear the interrupt */
}
if (status & SONIC_INT_RDE) {
- if (sonic_debug > 1)
- printk("%s: rx descriptors exhausted\n", dev->name);
+ netif_dbg(lp, rx_err, dev, "%s: rx descriptors exhausted\n",
+ __func__);
lp->stats.rx_dropped++;
SONIC_WRITE(SONIC_ISR, SONIC_INT_RDE); /* clear the interrupt */
}
if (status & SONIC_INT_RBAE) {
- if (sonic_debug > 1)
- printk("%s: rx buffer area exceeded\n", dev->name);
+ netif_dbg(lp, rx_err, dev, "%s: rx buffer area exceeded\n",
+ __func__);
lp->stats.rx_dropped++;
SONIC_WRITE(SONIC_ISR, SONIC_INT_RBAE); /* clear the interrupt */
}
/* transmit error */
if (status & SONIC_INT_TXER) {
- if ((SONIC_READ(SONIC_TCR) & SONIC_TCR_FU) && (sonic_debug > 2))
- printk(KERN_ERR "%s: tx fifo underrun\n", dev->name);
+ if (SONIC_READ(SONIC_TCR) & SONIC_TCR_FU)
+ netif_dbg(lp, tx_err, dev, "%s: tx fifo underrun\n",
+ __func__);
SONIC_WRITE(SONIC_ISR, SONIC_INT_TXER); /* clear the interrupt */
}
if (lp->cur_rwp >= lp->rra_end) lp->cur_rwp = lp->rra_laddr & 0xffff;
SONIC_WRITE(SONIC_RWP, lp->cur_rwp);
if (SONIC_READ(SONIC_ISR) & SONIC_INT_RBE) {
- if (sonic_debug > 2)
- printk("%s: rx buffer exhausted\n", dev->name);
+ netif_dbg(lp, rx_err, dev, "%s: rx buffer exhausted\n",
+ __func__);
SONIC_WRITE(SONIC_ISR, SONIC_INT_RBE); /* clear the flag */
}
} else
(netdev_mc_count(dev) > 15)) {
rcr |= SONIC_RCR_AMC;
} else {
- if (sonic_debug > 2)
- printk("sonic_multicast_list: mc_count %d\n",
- netdev_mc_count(dev));
+ netif_dbg(lp, ifup, dev, "%s: mc_count %d\n", __func__,
+ netdev_mc_count(dev));
sonic_set_cam_enable(dev, 1); /* always enable our own address */
i = 1;
netdev_for_each_mc_addr(ha, dev) {
}
}
- if (sonic_debug > 2)
- printk("sonic_multicast_list: setting RCR=%x\n", rcr);
+ netif_dbg(lp, ifup, dev, "%s: setting RCR=%x\n", __func__, rcr);
SONIC_WRITE(SONIC_RCR, rcr);
}
/*
* initialize the receive resource area
*/
- if (sonic_debug > 2)
- printk("sonic_init: initialize receive resource area\n");
+ netif_dbg(lp, ifup, dev, "%s: initialize receive resource area\n",
+ __func__);
for (i = 0; i < SONIC_NUM_RRS; i++) {
u16 bufadr_l = (unsigned long)lp->rx_laddr[i] & 0xffff;
SONIC_WRITE(SONIC_EOBC, (SONIC_RBSIZE >> 1) - (lp->dma_bitmode ? 2 : 1));
/* load the resource pointers */
- if (sonic_debug > 3)
- printk("sonic_init: issuing RRRA command\n");
+ netif_dbg(lp, ifup, dev, "%s: issuing RRRA command\n", __func__);
SONIC_WRITE(SONIC_CMD, SONIC_CR_RRRA);
i = 0;
break;
}
- if (sonic_debug > 2)
- printk("sonic_init: status=%x i=%d\n", SONIC_READ(SONIC_CMD), i);
+ netif_dbg(lp, ifup, dev, "%s: status=%x, i=%d\n", __func__,
+ SONIC_READ(SONIC_CMD), i);
/*
* Initialize the receive descriptors so that they
* become a circular linked list, ie. let the last
* descriptor point to the first again.
*/
- if (sonic_debug > 2)
- printk("sonic_init: initialize receive descriptors\n");
+ netif_dbg(lp, ifup, dev, "%s: initialize receive descriptors\n",
+ __func__);
+
for (i=0; i<SONIC_NUM_RDS; i++) {
sonic_rda_put(dev, i, SONIC_RD_STATUS, 0);
sonic_rda_put(dev, i, SONIC_RD_PKTLEN, 0);
/*
* initialize transmit descriptors
*/
- if (sonic_debug > 2)
- printk("sonic_init: initialize transmit descriptors\n");
+ netif_dbg(lp, ifup, dev, "%s: initialize transmit descriptors\n",
+ __func__);
+
for (i = 0; i < SONIC_NUM_TDS; i++) {
sonic_tda_put(dev, i, SONIC_TD_STATUS, 0);
sonic_tda_put(dev, i, SONIC_TD_CONFIG, 0);
if (SONIC_READ(SONIC_ISR) & SONIC_INT_LCD)
break;
}
- if (sonic_debug > 2) {
- printk("sonic_init: CMD=%x, ISR=%x\n, i=%d",
- SONIC_READ(SONIC_CMD), SONIC_READ(SONIC_ISR), i);
- }
+ netif_dbg(lp, ifup, dev, "%s: CMD=%x, ISR=%x, i=%d\n", __func__,
+ SONIC_READ(SONIC_CMD), SONIC_READ(SONIC_ISR), i);
/*
* enable receiver, disable loopback
if ((cmd & SONIC_CR_RXEN) == 0 || (cmd & SONIC_CR_STP) == 0)
printk(KERN_ERR "sonic_init: failed, status=%x\n", cmd);
- if (sonic_debug > 2)
- printk("sonic_init: new status=%x\n",
- SONIC_READ(SONIC_CMD));
+ netif_dbg(lp, ifup, dev, "%s: new status=%x\n", __func__,
+ SONIC_READ(SONIC_CMD));
return 0;
}
unsigned int eol_rx;
unsigned int eol_tx; /* last unacked transmit packet */
unsigned int next_tx; /* next free TD */
+ int msg_enable;
struct device *device; /* generic device */
struct net_device_stats stats;
};
static void sonic_multicast_list(struct net_device *dev);
static int sonic_init(struct net_device *dev);
static void sonic_tx_timeout(struct net_device *dev);
+static void sonic_msg_init(struct net_device *dev);
/* Internal inlines for reading/writing DMA buffers. Note that bus
size and endianness matter here, whereas they don't for registers,
#define SONIC_WRITE(reg,val) \
*((volatile unsigned int *)dev->base_addr+reg) = val
-
-/* Use 0 for production, 1 for verification, and >2 for debug */
-#ifdef SONIC_DEBUG
-static unsigned int sonic_debug = SONIC_DEBUG;
-#else
-static unsigned int sonic_debug = 1;
-#endif
-
/*
* We cannot use station (ethernet) address prefixes to detect the
* sonic controller since these are board manufacturer depended.
static int __init sonic_probe1(struct net_device *dev)
{
- static unsigned version_printed = 0;
unsigned int silicon_revision;
struct sonic_local *lp = netdev_priv(dev);
unsigned int base_addr = dev->base_addr;
* the expected location.
*/
silicon_revision = SONIC_READ(SONIC_SR);
- if (sonic_debug > 1)
- printk("SONIC Silicon Revision = 0x%04x\n",silicon_revision);
-
i = 0;
while ((known_revisions[i] != 0xffff) &&
(known_revisions[i] != silicon_revision))
i++;
if (known_revisions[i] == 0xffff) {
- printk("SONIC ethernet controller not found (0x%4x)\n",
- silicon_revision);
+ pr_info("SONIC ethernet controller not found (0x%4x)\n",
+ silicon_revision);
return -ENODEV;
}
- if (sonic_debug && version_printed++ == 0)
- printk(version);
-
/*
* Put the sonic into software reset, then retrieve ethernet address.
* Note: we are assuming that the boot-loader has initialized the cam.
if ((err = sonic_probe1(dev)))
goto out;
+
+ pr_info("SONIC ethernet @%08lx, MAC %pM, IRQ %d\n",
+ dev->base_addr, dev->dev_addr, dev->irq);
+
+ sonic_msg_init(dev);
+
if ((err = register_netdev(dev)))
goto out1;
- printk("%s: SONIC ethernet @%08lx, MAC %pM, IRQ %d\n", dev->name,
- dev->base_addr, dev->dev_addr, dev->irq);
-
return 0;
out1:
}
MODULE_DESCRIPTION("Xtensa XT2000 SONIC ethernet driver");
-module_param(sonic_debug, int, 0);
-MODULE_PARM_DESC(sonic_debug, "xtsonic debug level (1-4)");
#include "sonic.c"
--- /dev/null
+# SPDX-License-Identifier: GPL-2.0
+# kbuild requires Makefile in a directory to build individual objects
--- /dev/null
+# SPDX-License-Identifier: GPL-2.0
+# kbuild requires Makefile in a directory to build individual objects
#define NFP_FLOWER_MASK_MPLS_BOS BIT(8)
#define NFP_FLOWER_MASK_MPLS_Q BIT(0)
+/* Compressed HW representation of TCP Flags */
+#define NFP_FL_TCP_FLAG_URG BIT(4)
+#define NFP_FL_TCP_FLAG_PSH BIT(3)
+#define NFP_FL_TCP_FLAG_RST BIT(2)
+#define NFP_FL_TCP_FLAG_SYN BIT(1)
+#define NFP_FL_TCP_FLAG_FIN BIT(0)
+
#define NFP_FL_SC_ACT_DROP 0x80000000
#define NFP_FL_SC_ACT_USER 0x7D000000
#define NFP_FL_SC_ACT_POPV 0x6A000000
* 3 2 1
* 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- * | DSCP |ECN| protocol | reserved |
+ * | DSCP |ECN| protocol | ttl | flags |
* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
* | ipv4_addr_src |
* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
u8 tos;
u8 proto;
u8 ttl;
- u8 reserved;
+ u8 flags;
__be32 ipv4_src;
__be32 ipv4_dst;
};
#include <linux/time64.h>
#include <linux/types.h>
#include <net/pkt_cls.h>
+#include <net/tcp.h>
#include <linux/workqueue.h>
struct net_device;
frame->tos = flow_ip->tos;
frame->ttl = flow_ip->ttl;
}
+
+ if (dissector_uses_key(flow->dissector, FLOW_DISSECTOR_KEY_TCP)) {
+ struct flow_dissector_key_tcp *tcp;
+ u32 tcp_flags;
+
+ tcp = skb_flow_dissector_target(flow->dissector,
+ FLOW_DISSECTOR_KEY_TCP, target);
+ tcp_flags = be16_to_cpu(tcp->flags);
+
+ if (tcp_flags & TCPHDR_FIN)
+ frame->flags |= NFP_FL_TCP_FLAG_FIN;
+ if (tcp_flags & TCPHDR_SYN)
+ frame->flags |= NFP_FL_TCP_FLAG_SYN;
+ if (tcp_flags & TCPHDR_RST)
+ frame->flags |= NFP_FL_TCP_FLAG_RST;
+ if (tcp_flags & TCPHDR_PSH)
+ frame->flags |= NFP_FL_TCP_FLAG_PSH;
+ if (tcp_flags & TCPHDR_URG)
+ frame->flags |= NFP_FL_TCP_FLAG_URG;
+ }
}
static void
#include "../nfp_net.h"
#include "../nfp_port.h"
+#define NFP_FLOWER_SUPPORTED_TCPFLAGS \
+ (TCPHDR_FIN | TCPHDR_SYN | TCPHDR_RST | \
+ TCPHDR_PSH | TCPHDR_URG)
+
#define NFP_FLOWER_WHITELIST_DISSECTOR \
(BIT(FLOW_DISSECTOR_KEY_CONTROL) | \
BIT(FLOW_DISSECTOR_KEY_BASIC) | \
BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS) | \
BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS) | \
+ BIT(FLOW_DISSECTOR_KEY_TCP) | \
BIT(FLOW_DISSECTOR_KEY_PORTS) | \
BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS) | \
BIT(FLOW_DISSECTOR_KEY_VLAN) | \
}
}
+ if (dissector_uses_key(flow->dissector, FLOW_DISSECTOR_KEY_TCP)) {
+ struct flow_dissector_key_tcp *tcp;
+ u32 tcp_flags;
+
+ tcp = skb_flow_dissector_target(flow->dissector,
+ FLOW_DISSECTOR_KEY_TCP,
+ flow->key);
+ tcp_flags = be16_to_cpu(tcp->flags);
+
+ if (tcp_flags & ~NFP_FLOWER_SUPPORTED_TCPFLAGS)
+ return -EOPNOTSUPP;
+
+ /* We only support PSH and URG flags when either
+ * FIN, SYN or RST is present as well.
+ */
+ if ((tcp_flags & (TCPHDR_PSH | TCPHDR_URG)) &&
+ !(tcp_flags & (TCPHDR_FIN | TCPHDR_SYN | TCPHDR_RST)))
+ return -EOPNOTSUPP;
+
+ /* We need to store TCP flags in the IPv4 key space, thus
+ * we need to ensure we include a IPv4 key layer if we have
+ * not done so already.
+ */
+ if (!(key_layer & NFP_FLOWER_LAYER_IPV4)) {
+ key_layer |= NFP_FLOWER_LAYER_IPV4;
+ key_size += sizeof(struct nfp_flower_ipv4);
+ }
+ }
+
ret_key_ls->key_layer = key_layer;
ret_key_ls->key_layer_two = key_layer_two;
ret_key_ls->key_size = key_size;
MODULE_FIRMWARE("netronome/nic_AMDA0097-0001_8x10.nffw");
MODULE_FIRMWARE("netronome/nic_AMDA0099-0001_2x10.nffw");
MODULE_FIRMWARE("netronome/nic_AMDA0099-0001_2x25.nffw");
+MODULE_FIRMWARE("netronome/nic_AMDA0099-0001_1x10_1x25.nffw");
MODULE_AUTHOR("Netronome Systems <oss-drivers@netronome.com>");
MODULE_LICENSE("GPL");
/*
- * Copyright (C) 2015-2017 Netronome Systems, Inc.
+ * Copyright (C) 2015-2018 Netronome Systems, Inc.
*
* This software is dual licensed under the GNU General License Version 2,
* June 1991 as shown in the file COPYING in the top-level directory of this
* The configuration BAR is 8K in size, but due to
* THB-350, 32k needs to be reserved.
*/
-#define NFP_NET_CFG_BAR_SZ (32 * 1024)
+#define NFP_NET_CFG_BAR_SZ (32 * 1024)
/**
* Offset in Freelist buffer where packet starts on RX
*/
-#define NFP_NET_RX_OFFSET 32
+#define NFP_NET_RX_OFFSET 32
/**
* LSO parameters
#define NFP_NET_META_PORTID 5
#define NFP_NET_META_CSUM 6 /* checksum complete type */
-#define NFP_META_PORT_ID_CTRL ~0U
+#define NFP_META_PORT_ID_CTRL ~0U
/**
* Hash type pre-pended when a RSS hash was computed
*/
-#define NFP_NET_RSS_NONE 0
-#define NFP_NET_RSS_IPV4 1
-#define NFP_NET_RSS_IPV6 2
-#define NFP_NET_RSS_IPV6_EX 3
-#define NFP_NET_RSS_IPV4_TCP 4
-#define NFP_NET_RSS_IPV6_TCP 5
-#define NFP_NET_RSS_IPV6_EX_TCP 6
-#define NFP_NET_RSS_IPV4_UDP 7
-#define NFP_NET_RSS_IPV6_UDP 8
-#define NFP_NET_RSS_IPV6_EX_UDP 9
+#define NFP_NET_RSS_NONE 0
+#define NFP_NET_RSS_IPV4 1
+#define NFP_NET_RSS_IPV6 2
+#define NFP_NET_RSS_IPV6_EX 3
+#define NFP_NET_RSS_IPV4_TCP 4
+#define NFP_NET_RSS_IPV6_TCP 5
+#define NFP_NET_RSS_IPV6_EX_TCP 6
+#define NFP_NET_RSS_IPV4_UDP 7
+#define NFP_NET_RSS_IPV6_UDP 8
+#define NFP_NET_RSS_IPV6_EX_UDP 9
/**
* Ring counts
- * %NFP_NET_TXR_MAX: Maximum number of TX rings
- * %NFP_NET_RXR_MAX: Maximum number of RX rings
+ * %NFP_NET_TXR_MAX: Maximum number of TX rings
+ * %NFP_NET_RXR_MAX: Maximum number of RX rings
*/
-#define NFP_NET_TXR_MAX 64
-#define NFP_NET_RXR_MAX 64
+#define NFP_NET_TXR_MAX 64
+#define NFP_NET_RXR_MAX 64
/**
* Read/Write config words (0x0000 - 0x002c)
- * %NFP_NET_CFG_CTRL: Global control
+ * %NFP_NET_CFG_CTRL: Global control
* %NFP_NET_CFG_UPDATE: Indicate which fields are updated
* %NFP_NET_CFG_TXRS_ENABLE: Bitmask of enabled TX rings
* %NFP_NET_CFG_RXRS_ENABLE: Bitmask of enabled RX rings
- * %NFP_NET_CFG_MTU: Set MTU size
+ * %NFP_NET_CFG_MTU: Set MTU size
* %NFP_NET_CFG_FLBUFSZ: Set freelist buffer size (must be larger than MTU)
- * %NFP_NET_CFG_EXN: MSI-X table entry for exceptions
- * %NFP_NET_CFG_LSC: MSI-X table entry for link state changes
+ * %NFP_NET_CFG_EXN: MSI-X table entry for exceptions
+ * %NFP_NET_CFG_LSC: MSI-X table entry for link state changes
* %NFP_NET_CFG_MACADDR: MAC address
*
* TODO:
* - define Error details in UPDATE
*/
-#define NFP_NET_CFG_CTRL 0x0000
-#define NFP_NET_CFG_CTRL_ENABLE (0x1 << 0) /* Global enable */
-#define NFP_NET_CFG_CTRL_PROMISC (0x1 << 1) /* Enable Promisc mode */
-#define NFP_NET_CFG_CTRL_L2BC (0x1 << 2) /* Allow L2 Broadcast */
-#define NFP_NET_CFG_CTRL_L2MC (0x1 << 3) /* Allow L2 Multicast */
-#define NFP_NET_CFG_CTRL_RXCSUM (0x1 << 4) /* Enable RX Checksum */
-#define NFP_NET_CFG_CTRL_TXCSUM (0x1 << 5) /* Enable TX Checksum */
-#define NFP_NET_CFG_CTRL_RXVLAN (0x1 << 6) /* Enable VLAN strip */
-#define NFP_NET_CFG_CTRL_TXVLAN (0x1 << 7) /* Enable VLAN insert */
-#define NFP_NET_CFG_CTRL_SCATTER (0x1 << 8) /* Scatter DMA */
-#define NFP_NET_CFG_CTRL_GATHER (0x1 << 9) /* Gather DMA */
-#define NFP_NET_CFG_CTRL_LSO (0x1 << 10) /* LSO/TSO (version 1) */
+#define NFP_NET_CFG_CTRL 0x0000
+#define NFP_NET_CFG_CTRL_ENABLE (0x1 << 0) /* Global enable */
+#define NFP_NET_CFG_CTRL_PROMISC (0x1 << 1) /* Enable Promisc mode */
+#define NFP_NET_CFG_CTRL_L2BC (0x1 << 2) /* Allow L2 Broadcast */
+#define NFP_NET_CFG_CTRL_L2MC (0x1 << 3) /* Allow L2 Multicast */
+#define NFP_NET_CFG_CTRL_RXCSUM (0x1 << 4) /* Enable RX Checksum */
+#define NFP_NET_CFG_CTRL_TXCSUM (0x1 << 5) /* Enable TX Checksum */
+#define NFP_NET_CFG_CTRL_RXVLAN (0x1 << 6) /* Enable VLAN strip */
+#define NFP_NET_CFG_CTRL_TXVLAN (0x1 << 7) /* Enable VLAN insert */
+#define NFP_NET_CFG_CTRL_SCATTER (0x1 << 8) /* Scatter DMA */
+#define NFP_NET_CFG_CTRL_GATHER (0x1 << 9) /* Gather DMA */
+#define NFP_NET_CFG_CTRL_LSO (0x1 << 10) /* LSO/TSO (version 1) */
#define NFP_NET_CFG_CTRL_CTAG_FILTER (0x1 << 11) /* VLAN CTAG filtering */
-#define NFP_NET_CFG_CTRL_RINGCFG (0x1 << 16) /* Ring runtime changes */
+#define NFP_NET_CFG_CTRL_RINGCFG (0x1 << 16) /* Ring runtime changes */
#define NFP_NET_CFG_CTRL_RSS (0x1 << 17) /* RSS (version 1) */
-#define NFP_NET_CFG_CTRL_IRQMOD (0x1 << 18) /* Interrupt moderation */
-#define NFP_NET_CFG_CTRL_RINGPRIO (0x1 << 19) /* Ring priorities */
-#define NFP_NET_CFG_CTRL_MSIXAUTO (0x1 << 20) /* MSI-X auto-masking */
-#define NFP_NET_CFG_CTRL_TXRWB (0x1 << 21) /* Write-back of TX ring*/
-#define NFP_NET_CFG_CTRL_L2SWITCH (0x1 << 22) /* L2 Switch */
+#define NFP_NET_CFG_CTRL_IRQMOD (0x1 << 18) /* Interrupt moderation */
+#define NFP_NET_CFG_CTRL_RINGPRIO (0x1 << 19) /* Ring priorities */
+#define NFP_NET_CFG_CTRL_MSIXAUTO (0x1 << 20) /* MSI-X auto-masking */
+#define NFP_NET_CFG_CTRL_TXRWB (0x1 << 21) /* Write-back of TX ring*/
+#define NFP_NET_CFG_CTRL_L2SWITCH (0x1 << 22) /* L2 Switch */
#define NFP_NET_CFG_CTRL_L2SWITCH_LOCAL (0x1 << 23) /* Switch to local */
#define NFP_NET_CFG_CTRL_VXLAN (0x1 << 24) /* VXLAN tunnel support */
#define NFP_NET_CFG_CTRL_NVGRE (0x1 << 25) /* NVGRE tunnel support */
#define NFP_NET_CFG_CTRL_CHAIN_META (NFP_NET_CFG_CTRL_RSS2 | \
NFP_NET_CFG_CTRL_CSUM_COMPLETE)
-#define NFP_NET_CFG_UPDATE 0x0004
-#define NFP_NET_CFG_UPDATE_GEN (0x1 << 0) /* General update */
-#define NFP_NET_CFG_UPDATE_RING (0x1 << 1) /* Ring config change */
-#define NFP_NET_CFG_UPDATE_RSS (0x1 << 2) /* RSS config change */
-#define NFP_NET_CFG_UPDATE_TXRPRIO (0x1 << 3) /* TX Ring prio change */
-#define NFP_NET_CFG_UPDATE_RXRPRIO (0x1 << 4) /* RX Ring prio change */
-#define NFP_NET_CFG_UPDATE_MSIX (0x1 << 5) /* MSI-X change */
-#define NFP_NET_CFG_UPDATE_L2SWITCH (0x1 << 6) /* Switch changes */
-#define NFP_NET_CFG_UPDATE_RESET (0x1 << 7) /* Update due to FLR */
-#define NFP_NET_CFG_UPDATE_IRQMOD (0x1 << 8) /* IRQ mod change */
+#define NFP_NET_CFG_UPDATE 0x0004
+#define NFP_NET_CFG_UPDATE_GEN (0x1 << 0) /* General update */
+#define NFP_NET_CFG_UPDATE_RING (0x1 << 1) /* Ring config change */
+#define NFP_NET_CFG_UPDATE_RSS (0x1 << 2) /* RSS config change */
+#define NFP_NET_CFG_UPDATE_TXRPRIO (0x1 << 3) /* TX Ring prio change */
+#define NFP_NET_CFG_UPDATE_RXRPRIO (0x1 << 4) /* RX Ring prio change */
+#define NFP_NET_CFG_UPDATE_MSIX (0x1 << 5) /* MSI-X change */
+#define NFP_NET_CFG_UPDATE_L2SWITCH (0x1 << 6) /* Switch changes */
+#define NFP_NET_CFG_UPDATE_RESET (0x1 << 7) /* Update due to FLR */
+#define NFP_NET_CFG_UPDATE_IRQMOD (0x1 << 8) /* IRQ mod change */
#define NFP_NET_CFG_UPDATE_VXLAN (0x1 << 9) /* VXLAN port change */
#define NFP_NET_CFG_UPDATE_BPF (0x1 << 10) /* BPF program load */
#define NFP_NET_CFG_UPDATE_MACADDR (0x1 << 11) /* MAC address change */
#define NFP_NET_CFG_UPDATE_MBOX (0x1 << 12) /* Mailbox update */
#define NFP_NET_CFG_UPDATE_VF (0x1 << 13) /* VF settings change */
-#define NFP_NET_CFG_UPDATE_ERR (0x1 << 31) /* A error occurred */
-#define NFP_NET_CFG_TXRS_ENABLE 0x0008
-#define NFP_NET_CFG_RXRS_ENABLE 0x0010
-#define NFP_NET_CFG_MTU 0x0018
-#define NFP_NET_CFG_FLBUFSZ 0x001c
-#define NFP_NET_CFG_EXN 0x001f
-#define NFP_NET_CFG_LSC 0x0020
-#define NFP_NET_CFG_MACADDR 0x0024
+#define NFP_NET_CFG_UPDATE_ERR (0x1 << 31) /* A error occurred */
+#define NFP_NET_CFG_TXRS_ENABLE 0x0008
+#define NFP_NET_CFG_RXRS_ENABLE 0x0010
+#define NFP_NET_CFG_MTU 0x0018
+#define NFP_NET_CFG_FLBUFSZ 0x001c
+#define NFP_NET_CFG_EXN 0x001f
+#define NFP_NET_CFG_LSC 0x0020
+#define NFP_NET_CFG_MACADDR 0x0024
/**
* Read-only words (0x0030 - 0x0050):
* %NFP_NET_CFG_VERSION: Firmware version number
- * %NFP_NET_CFG_STS: Status
- * %NFP_NET_CFG_CAP: Capabilities (same bits as %NFP_NET_CFG_CTRL)
+ * %NFP_NET_CFG_STS: Status
+ * %NFP_NET_CFG_CAP: Capabilities (same bits as %NFP_NET_CFG_CTRL)
* %NFP_NET_CFG_MAX_TXRINGS: Maximum number of TX rings
* %NFP_NET_CFG_MAX_RXRINGS: Maximum number of RX rings
* %NFP_NET_CFG_MAX_MTU: Maximum support MTU
* TODO:
* - define more STS bits
*/
-#define NFP_NET_CFG_VERSION 0x0030
+#define NFP_NET_CFG_VERSION 0x0030
#define NFP_NET_CFG_VERSION_RESERVED_MASK (0xff << 24)
#define NFP_NET_CFG_VERSION_CLASS_MASK (0xff << 16)
-#define NFP_NET_CFG_VERSION_CLASS(x) (((x) & 0xff) << 16)
+#define NFP_NET_CFG_VERSION_CLASS(x) (((x) & 0xff) << 16)
#define NFP_NET_CFG_VERSION_CLASS_GENERIC 0
#define NFP_NET_CFG_VERSION_MAJOR_MASK (0xff << 8)
-#define NFP_NET_CFG_VERSION_MAJOR(x) (((x) & 0xff) << 8)
+#define NFP_NET_CFG_VERSION_MAJOR(x) (((x) & 0xff) << 8)
#define NFP_NET_CFG_VERSION_MINOR_MASK (0xff << 0)
-#define NFP_NET_CFG_VERSION_MINOR(x) (((x) & 0xff) << 0)
-#define NFP_NET_CFG_STS 0x0034
-#define NFP_NET_CFG_STS_LINK (0x1 << 0) /* Link up or down */
+#define NFP_NET_CFG_VERSION_MINOR(x) (((x) & 0xff) << 0)
+#define NFP_NET_CFG_STS 0x0034
+#define NFP_NET_CFG_STS_LINK (0x1 << 0) /* Link up or down */
/* Link rate */
#define NFP_NET_CFG_STS_LINK_RATE_SHIFT 1
#define NFP_NET_CFG_STS_LINK_RATE_MASK 0xF
-#define NFP_NET_CFG_STS_LINK_RATE \
+#define NFP_NET_CFG_STS_LINK_RATE \
(NFP_NET_CFG_STS_LINK_RATE_MASK << NFP_NET_CFG_STS_LINK_RATE_SHIFT)
#define NFP_NET_CFG_STS_LINK_RATE_UNSUPPORTED 0
-#define NFP_NET_CFG_STS_LINK_RATE_UNKNOWN 1
-#define NFP_NET_CFG_STS_LINK_RATE_1G 2
-#define NFP_NET_CFG_STS_LINK_RATE_10G 3
-#define NFP_NET_CFG_STS_LINK_RATE_25G 4
-#define NFP_NET_CFG_STS_LINK_RATE_40G 5
-#define NFP_NET_CFG_STS_LINK_RATE_50G 6
-#define NFP_NET_CFG_STS_LINK_RATE_100G 7
-#define NFP_NET_CFG_CAP 0x0038
-#define NFP_NET_CFG_MAX_TXRINGS 0x003c
-#define NFP_NET_CFG_MAX_RXRINGS 0x0040
-#define NFP_NET_CFG_MAX_MTU 0x0044
+#define NFP_NET_CFG_STS_LINK_RATE_UNKNOWN 1
+#define NFP_NET_CFG_STS_LINK_RATE_1G 2
+#define NFP_NET_CFG_STS_LINK_RATE_10G 3
+#define NFP_NET_CFG_STS_LINK_RATE_25G 4
+#define NFP_NET_CFG_STS_LINK_RATE_40G 5
+#define NFP_NET_CFG_STS_LINK_RATE_50G 6
+#define NFP_NET_CFG_STS_LINK_RATE_100G 7
+#define NFP_NET_CFG_CAP 0x0038
+#define NFP_NET_CFG_MAX_TXRINGS 0x003c
+#define NFP_NET_CFG_MAX_RXRINGS 0x0040
+#define NFP_NET_CFG_MAX_MTU 0x0044
/* Next two words are being used by VFs for solving THB350 issue */
-#define NFP_NET_CFG_START_TXQ 0x0048
-#define NFP_NET_CFG_START_RXQ 0x004c
+#define NFP_NET_CFG_START_TXQ 0x0048
+#define NFP_NET_CFG_START_RXQ 0x004c
/**
* Prepend configuration
/**
* 40B reserved for future use (0x0098 - 0x00c0)
*/
-#define NFP_NET_CFG_RESERVED 0x0098
-#define NFP_NET_CFG_RESERVED_SZ 0x0028
+#define NFP_NET_CFG_RESERVED 0x0098
+#define NFP_NET_CFG_RESERVED_SZ 0x0028
/**
* RSS configuration (0x0100 - 0x01ac):
* %NFP_NET_CFG_RSS_KEY: RSS "secret" key
* %NFP_NET_CFG_RSS_ITBL: RSS indirection table
*/
-#define NFP_NET_CFG_RSS_BASE 0x0100
-#define NFP_NET_CFG_RSS_CTRL NFP_NET_CFG_RSS_BASE
-#define NFP_NET_CFG_RSS_MASK (0x7f)
-#define NFP_NET_CFG_RSS_MASK_of(_x) ((_x) & 0x7f)
-#define NFP_NET_CFG_RSS_IPV4 (1 << 8) /* RSS for IPv4 */
-#define NFP_NET_CFG_RSS_IPV6 (1 << 9) /* RSS for IPv6 */
-#define NFP_NET_CFG_RSS_IPV4_TCP (1 << 10) /* RSS for IPv4/TCP */
-#define NFP_NET_CFG_RSS_IPV4_UDP (1 << 11) /* RSS for IPv4/UDP */
-#define NFP_NET_CFG_RSS_IPV6_TCP (1 << 12) /* RSS for IPv6/TCP */
-#define NFP_NET_CFG_RSS_IPV6_UDP (1 << 13) /* RSS for IPv6/UDP */
+#define NFP_NET_CFG_RSS_BASE 0x0100
+#define NFP_NET_CFG_RSS_CTRL NFP_NET_CFG_RSS_BASE
+#define NFP_NET_CFG_RSS_MASK (0x7f)
+#define NFP_NET_CFG_RSS_MASK_of(_x) ((_x) & 0x7f)
+#define NFP_NET_CFG_RSS_IPV4 (1 << 8) /* RSS for IPv4 */
+#define NFP_NET_CFG_RSS_IPV6 (1 << 9) /* RSS for IPv6 */
+#define NFP_NET_CFG_RSS_IPV4_TCP (1 << 10) /* RSS for IPv4/TCP */
+#define NFP_NET_CFG_RSS_IPV4_UDP (1 << 11) /* RSS for IPv4/UDP */
+#define NFP_NET_CFG_RSS_IPV6_TCP (1 << 12) /* RSS for IPv6/TCP */
+#define NFP_NET_CFG_RSS_IPV6_UDP (1 << 13) /* RSS for IPv6/UDP */
#define NFP_NET_CFG_RSS_HFUNC 0xff000000
-#define NFP_NET_CFG_RSS_TOEPLITZ (1 << 24) /* Use Toeplitz hash */
+#define NFP_NET_CFG_RSS_TOEPLITZ (1 << 24) /* Use Toeplitz hash */
#define NFP_NET_CFG_RSS_XOR (1 << 25) /* Use XOR as hash */
#define NFP_NET_CFG_RSS_CRC32 (1 << 26) /* Use CRC32 as hash */
#define NFP_NET_CFG_RSS_HFUNCS 3
-#define NFP_NET_CFG_RSS_KEY (NFP_NET_CFG_RSS_BASE + 0x4)
-#define NFP_NET_CFG_RSS_KEY_SZ 0x28
-#define NFP_NET_CFG_RSS_ITBL (NFP_NET_CFG_RSS_BASE + 0x4 + \
+#define NFP_NET_CFG_RSS_KEY (NFP_NET_CFG_RSS_BASE + 0x4)
+#define NFP_NET_CFG_RSS_KEY_SZ 0x28
+#define NFP_NET_CFG_RSS_ITBL (NFP_NET_CFG_RSS_BASE + 0x4 + \
NFP_NET_CFG_RSS_KEY_SZ)
-#define NFP_NET_CFG_RSS_ITBL_SZ 0x80
+#define NFP_NET_CFG_RSS_ITBL_SZ 0x80
/**
* TX ring configuration (0x200 - 0x800)
* %NFP_NET_CFG_TXR_PRIO: Per TX ring priority (1B entries)
* %NFP_NET_CFG_TXR_IRQ_MOD: Per TX ring interrupt moderation packet
*/
-#define NFP_NET_CFG_TXR_BASE 0x0200
-#define NFP_NET_CFG_TXR_ADDR(_x) (NFP_NET_CFG_TXR_BASE + ((_x) * 0x8))
-#define NFP_NET_CFG_TXR_WB_ADDR(_x) (NFP_NET_CFG_TXR_BASE + 0x200 + \
+#define NFP_NET_CFG_TXR_BASE 0x0200
+#define NFP_NET_CFG_TXR_ADDR(_x) (NFP_NET_CFG_TXR_BASE + ((_x) * 0x8))
+#define NFP_NET_CFG_TXR_WB_ADDR(_x) (NFP_NET_CFG_TXR_BASE + 0x200 + \
((_x) * 0x8))
-#define NFP_NET_CFG_TXR_SZ(_x) (NFP_NET_CFG_TXR_BASE + 0x400 + (_x))
-#define NFP_NET_CFG_TXR_VEC(_x) (NFP_NET_CFG_TXR_BASE + 0x440 + (_x))
-#define NFP_NET_CFG_TXR_PRIO(_x) (NFP_NET_CFG_TXR_BASE + 0x480 + (_x))
+#define NFP_NET_CFG_TXR_SZ(_x) (NFP_NET_CFG_TXR_BASE + 0x400 + (_x))
+#define NFP_NET_CFG_TXR_VEC(_x) (NFP_NET_CFG_TXR_BASE + 0x440 + (_x))
+#define NFP_NET_CFG_TXR_PRIO(_x) (NFP_NET_CFG_TXR_BASE + 0x480 + (_x))
#define NFP_NET_CFG_TXR_IRQ_MOD(_x) (NFP_NET_CFG_TXR_BASE + 0x500 + \
((_x) * 0x4))
* %NFP_NET_CFG_RXR_PRIO: Per RX ring priority (1B entries)
* %NFP_NET_CFG_RXR_IRQ_MOD: Per RX ring interrupt moderation (4B entries)
*/
-#define NFP_NET_CFG_RXR_BASE 0x0800
-#define NFP_NET_CFG_RXR_ADDR(_x) (NFP_NET_CFG_RXR_BASE + ((_x) * 0x8))
-#define NFP_NET_CFG_RXR_SZ(_x) (NFP_NET_CFG_RXR_BASE + 0x200 + (_x))
-#define NFP_NET_CFG_RXR_VEC(_x) (NFP_NET_CFG_RXR_BASE + 0x240 + (_x))
-#define NFP_NET_CFG_RXR_PRIO(_x) (NFP_NET_CFG_RXR_BASE + 0x280 + (_x))
+#define NFP_NET_CFG_RXR_BASE 0x0800
+#define NFP_NET_CFG_RXR_ADDR(_x) (NFP_NET_CFG_RXR_BASE + ((_x) * 0x8))
+#define NFP_NET_CFG_RXR_SZ(_x) (NFP_NET_CFG_RXR_BASE + 0x200 + (_x))
+#define NFP_NET_CFG_RXR_VEC(_x) (NFP_NET_CFG_RXR_BASE + 0x240 + (_x))
+#define NFP_NET_CFG_RXR_PRIO(_x) (NFP_NET_CFG_RXR_BASE + 0x280 + (_x))
#define NFP_NET_CFG_RXR_IRQ_MOD(_x) (NFP_NET_CFG_RXR_BASE + 0x300 + \
((_x) * 0x4))
* the MSI-X entry and the host driver must clear the register to
* re-enable the interrupt.
*/
-#define NFP_NET_CFG_ICR_BASE 0x0c00
-#define NFP_NET_CFG_ICR(_x) (NFP_NET_CFG_ICR_BASE + (_x))
-#define NFP_NET_CFG_ICR_UNMASKED 0x0
-#define NFP_NET_CFG_ICR_RXTX 0x1
-#define NFP_NET_CFG_ICR_LSC 0x2
+#define NFP_NET_CFG_ICR_BASE 0x0c00
+#define NFP_NET_CFG_ICR(_x) (NFP_NET_CFG_ICR_BASE + (_x))
+#define NFP_NET_CFG_ICR_UNMASKED 0x0
+#define NFP_NET_CFG_ICR_RXTX 0x1
+#define NFP_NET_CFG_ICR_LSC 0x2
/**
* General device stats (0x0d00 - 0x0d90)
* all counters are 64bit.
*/
-#define NFP_NET_CFG_STATS_BASE 0x0d00
-#define NFP_NET_CFG_STATS_RX_DISCARDS (NFP_NET_CFG_STATS_BASE + 0x00)
-#define NFP_NET_CFG_STATS_RX_ERRORS (NFP_NET_CFG_STATS_BASE + 0x08)
-#define NFP_NET_CFG_STATS_RX_OCTETS (NFP_NET_CFG_STATS_BASE + 0x10)
-#define NFP_NET_CFG_STATS_RX_UC_OCTETS (NFP_NET_CFG_STATS_BASE + 0x18)
-#define NFP_NET_CFG_STATS_RX_MC_OCTETS (NFP_NET_CFG_STATS_BASE + 0x20)
-#define NFP_NET_CFG_STATS_RX_BC_OCTETS (NFP_NET_CFG_STATS_BASE + 0x28)
-#define NFP_NET_CFG_STATS_RX_FRAMES (NFP_NET_CFG_STATS_BASE + 0x30)
-#define NFP_NET_CFG_STATS_RX_MC_FRAMES (NFP_NET_CFG_STATS_BASE + 0x38)
-#define NFP_NET_CFG_STATS_RX_BC_FRAMES (NFP_NET_CFG_STATS_BASE + 0x40)
-
-#define NFP_NET_CFG_STATS_TX_DISCARDS (NFP_NET_CFG_STATS_BASE + 0x48)
-#define NFP_NET_CFG_STATS_TX_ERRORS (NFP_NET_CFG_STATS_BASE + 0x50)
-#define NFP_NET_CFG_STATS_TX_OCTETS (NFP_NET_CFG_STATS_BASE + 0x58)
-#define NFP_NET_CFG_STATS_TX_UC_OCTETS (NFP_NET_CFG_STATS_BASE + 0x60)
-#define NFP_NET_CFG_STATS_TX_MC_OCTETS (NFP_NET_CFG_STATS_BASE + 0x68)
-#define NFP_NET_CFG_STATS_TX_BC_OCTETS (NFP_NET_CFG_STATS_BASE + 0x70)
-#define NFP_NET_CFG_STATS_TX_FRAMES (NFP_NET_CFG_STATS_BASE + 0x78)
-#define NFP_NET_CFG_STATS_TX_MC_FRAMES (NFP_NET_CFG_STATS_BASE + 0x80)
-#define NFP_NET_CFG_STATS_TX_BC_FRAMES (NFP_NET_CFG_STATS_BASE + 0x88)
+#define NFP_NET_CFG_STATS_BASE 0x0d00
+#define NFP_NET_CFG_STATS_RX_DISCARDS (NFP_NET_CFG_STATS_BASE + 0x00)
+#define NFP_NET_CFG_STATS_RX_ERRORS (NFP_NET_CFG_STATS_BASE + 0x08)
+#define NFP_NET_CFG_STATS_RX_OCTETS (NFP_NET_CFG_STATS_BASE + 0x10)
+#define NFP_NET_CFG_STATS_RX_UC_OCTETS (NFP_NET_CFG_STATS_BASE + 0x18)
+#define NFP_NET_CFG_STATS_RX_MC_OCTETS (NFP_NET_CFG_STATS_BASE + 0x20)
+#define NFP_NET_CFG_STATS_RX_BC_OCTETS (NFP_NET_CFG_STATS_BASE + 0x28)
+#define NFP_NET_CFG_STATS_RX_FRAMES (NFP_NET_CFG_STATS_BASE + 0x30)
+#define NFP_NET_CFG_STATS_RX_MC_FRAMES (NFP_NET_CFG_STATS_BASE + 0x38)
+#define NFP_NET_CFG_STATS_RX_BC_FRAMES (NFP_NET_CFG_STATS_BASE + 0x40)
+
+#define NFP_NET_CFG_STATS_TX_DISCARDS (NFP_NET_CFG_STATS_BASE + 0x48)
+#define NFP_NET_CFG_STATS_TX_ERRORS (NFP_NET_CFG_STATS_BASE + 0x50)
+#define NFP_NET_CFG_STATS_TX_OCTETS (NFP_NET_CFG_STATS_BASE + 0x58)
+#define NFP_NET_CFG_STATS_TX_UC_OCTETS (NFP_NET_CFG_STATS_BASE + 0x60)
+#define NFP_NET_CFG_STATS_TX_MC_OCTETS (NFP_NET_CFG_STATS_BASE + 0x68)
+#define NFP_NET_CFG_STATS_TX_BC_OCTETS (NFP_NET_CFG_STATS_BASE + 0x70)
+#define NFP_NET_CFG_STATS_TX_FRAMES (NFP_NET_CFG_STATS_BASE + 0x78)
+#define NFP_NET_CFG_STATS_TX_MC_FRAMES (NFP_NET_CFG_STATS_BASE + 0x80)
+#define NFP_NET_CFG_STATS_TX_BC_FRAMES (NFP_NET_CFG_STATS_BASE + 0x88)
#define NFP_NET_CFG_STATS_APP0_FRAMES (NFP_NET_CFG_STATS_BASE + 0x90)
#define NFP_NET_CFG_STATS_APP0_BYTES (NFP_NET_CFG_STATS_BASE + 0x98)
* %NFP_NET_CFG_TXR_STATS: TX ring statistics (Packet and Byte count)
* %NFP_NET_CFG_RXR_STATS: RX ring statistics (Packet and Byte count)
*/
-#define NFP_NET_CFG_TXR_STATS_BASE 0x1000
-#define NFP_NET_CFG_TXR_STATS(_x) (NFP_NET_CFG_TXR_STATS_BASE + \
+#define NFP_NET_CFG_TXR_STATS_BASE 0x1000
+#define NFP_NET_CFG_TXR_STATS(_x) (NFP_NET_CFG_TXR_STATS_BASE + \
((_x) * 0x10))
-#define NFP_NET_CFG_RXR_STATS_BASE 0x1400
-#define NFP_NET_CFG_RXR_STATS(_x) (NFP_NET_CFG_RXR_STATS_BASE + \
+#define NFP_NET_CFG_RXR_STATS_BASE 0x1400
+#define NFP_NET_CFG_RXR_STATS(_x) (NFP_NET_CFG_RXR_STATS_BASE + \
((_x) * 0x10))
/**
* %NFP_NET_CFG_TLV_TYPE: Offset of type within the TLV
* %NFP_NET_CFG_TLV_TYPE_REQUIRED: Driver must be able to parse the TLV
* %NFP_NET_CFG_TLV_LENGTH: Offset of length within the TLV
- * %NFP_NET_CFG_TLV_LENGTH_INC: TLV length increments
+ * %NFP_NET_CFG_TLV_LENGTH_INC: TLV length increments
* %NFP_NET_CFG_TLV_VALUE: Offset of value with the TLV
*
* List of simple TLV structures, first one starts at %NFP_NET_CFG_TLV_BASE.
* Note that the 4 byte TLV header is not counted in %NFP_NET_CFG_TLV_LENGTH.
*/
#define NFP_NET_CFG_TLV_TYPE 0x00
-#define NFP_NET_CFG_TLV_TYPE_REQUIRED 0x8000
+#define NFP_NET_CFG_TLV_TYPE_REQUIRED 0x8000
#define NFP_NET_CFG_TLV_LENGTH 0x02
#define NFP_NET_CFG_TLV_LENGTH_INC 4
#define NFP_NET_CFG_TLV_VALUE 0x04
-#define NFP_NET_CFG_TLV_HEADER_REQUIRED 0x80000000
+#define NFP_NET_CFG_TLV_HEADER_REQUIRED 0x80000000
#define NFP_NET_CFG_TLV_HEADER_TYPE 0x7fff0000
#define NFP_NET_CFG_TLV_HEADER_LENGTH 0x0000ffff
--- /dev/null
+# SPDX-License-Identifier: GPL-2.0
+# kbuild requires Makefile in a directory to build individual objects
--- /dev/null
+# SPDX-License-Identifier: GPL-2.0
+# kbuild requires Makefile in a directory to build individual objects
--- /dev/null
+# SPDX-License-Identifier: GPL-2.0
+# kbuild requires Makefile in a directory to build individual objects
/* fpdu can be fragmented over maximum 3 bds: header, partial mpa, unaligned */
#define QED_IWARP_MAX_BDS_PER_FPDU 3
-char *pkt_type_str[] = {
+static const char * const pkt_type_str[] = {
"QED_IWARP_MPA_PKT_PACKED",
"QED_IWARP_MPA_PKT_PARTIAL",
"QED_IWARP_MPA_PKT_UNALIGNED"
/* Local Definitions and Declarations */
-struct rmnet_walk_data {
- struct net_device *real_dev;
- struct list_head *head;
- struct rmnet_port *port;
-};
-
static int rmnet_is_real_dev_registered(const struct net_device *real_dev)
{
return rcu_access_pointer(real_dev->rx_handler) == rmnet_rx_handler;
static void rmnet_unregister_bridge(struct net_device *dev,
struct rmnet_port *port)
{
- struct net_device *rmnet_dev, *bridge_dev;
struct rmnet_port *bridge_port;
+ struct net_device *bridge_dev;
if (port->rmnet_mode != RMNET_EPMODE_BRIDGE)
return;
/* bridge slave handling */
if (!port->nr_rmnet_devs) {
- rmnet_dev = netdev_master_upper_dev_get_rcu(dev);
- netdev_upper_dev_unlink(dev, rmnet_dev);
-
bridge_dev = port->bridge_ep;
bridge_port = rmnet_get_port_rtnl(bridge_dev);
bridge_dev = port->bridge_ep;
bridge_port = rmnet_get_port_rtnl(bridge_dev);
- rmnet_dev = netdev_master_upper_dev_get_rcu(bridge_dev);
- netdev_upper_dev_unlink(bridge_dev, rmnet_dev);
-
rmnet_unregister_real_device(bridge_dev, bridge_port);
}
}
if (err)
goto err1;
- err = netdev_master_upper_dev_link(dev, real_dev, NULL, NULL, extack);
- if (err)
- goto err2;
-
port->rmnet_mode = mode;
hlist_add_head_rcu(&ep->hlnode, &port->muxed_ep[mux_id]);
return 0;
-err2:
- rmnet_vnd_dellink(mux_id, port, ep);
err1:
rmnet_unregister_real_device(real_dev, port);
err0:
static void rmnet_dellink(struct net_device *dev, struct list_head *head)
{
+ struct rmnet_priv *priv = netdev_priv(dev);
struct net_device *real_dev;
struct rmnet_endpoint *ep;
struct rmnet_port *port;
u8 mux_id;
- rcu_read_lock();
- real_dev = netdev_master_upper_dev_get_rcu(dev);
- rcu_read_unlock();
+ real_dev = priv->real_dev;
if (!real_dev || !rmnet_is_real_dev_registered(real_dev))
return;
port = rmnet_get_port_rtnl(real_dev);
mux_id = rmnet_vnd_get_mux(dev);
- netdev_upper_dev_unlink(dev, real_dev);
ep = rmnet_get_endpoint(port, mux_id);
if (ep) {
unregister_netdevice_queue(dev, head);
}
-static int rmnet_dev_walk_unreg(struct net_device *rmnet_dev, void *data)
-{
- struct rmnet_walk_data *d = data;
- struct rmnet_endpoint *ep;
- u8 mux_id;
-
- mux_id = rmnet_vnd_get_mux(rmnet_dev);
- ep = rmnet_get_endpoint(d->port, mux_id);
- if (ep) {
- hlist_del_init_rcu(&ep->hlnode);
- rmnet_vnd_dellink(mux_id, d->port, ep);
- kfree(ep);
- }
- netdev_upper_dev_unlink(rmnet_dev, d->real_dev);
- unregister_netdevice_queue(rmnet_dev, d->head);
-
- return 0;
-}
-
static void rmnet_force_unassociate_device(struct net_device *dev)
{
struct net_device *real_dev = dev;
- struct rmnet_walk_data d;
+ struct hlist_node *tmp_ep;
+ struct rmnet_endpoint *ep;
struct rmnet_port *port;
+ unsigned long bkt_ep;
LIST_HEAD(list);
if (!rmnet_is_real_dev_registered(real_dev))
ASSERT_RTNL();
- d.real_dev = real_dev;
- d.head = &list;
-
port = rmnet_get_port_rtnl(dev);
- d.port = port;
rcu_read_lock();
rmnet_unregister_bridge(dev, port);
- netdev_walk_all_lower_dev_rcu(real_dev, rmnet_dev_walk_unreg, &d);
+ hash_for_each_safe(port->muxed_ep, bkt_ep, tmp_ep, ep, hlnode) {
+ unregister_netdevice_queue(ep->egress_dev, &list);
+ rmnet_vnd_dellink(ep->mux_id, port, ep);
+
+ hlist_del_init_rcu(&ep->hlnode);
+ kfree(ep);
+ }
+
rcu_read_unlock();
unregister_netdevice_many(&list);
if (err)
return -EBUSY;
- err = netdev_master_upper_dev_link(slave_dev, rmnet_dev, NULL, NULL,
- extack);
- if (err)
- return -EINVAL;
-
slave_port = rmnet_get_port(slave_dev);
slave_port->rmnet_mode = RMNET_EPMODE_BRIDGE;
slave_port->bridge_ep = real_dev;
port->rmnet_mode = RMNET_EPMODE_VND;
port->bridge_ep = NULL;
- netdev_upper_dev_unlink(slave_dev, rmnet_dev);
slave_port = rmnet_get_port(slave_dev);
rmnet_unregister_real_device(slave_dev, slave_port);
}
ep = rmnet_get_endpoint(port, mux_id);
+ if (!ep) {
+ kfree_skb(skb);
+ return RX_HANDLER_CONSUMED;
+ }
+
vnd = ep->egress_dev;
ip_family = cmd->flow_control.ip_family;
memset(&total_stats, 0, sizeof(struct rmnet_vnd_stats));
for_each_possible_cpu(cpu) {
- pcpu_ptr = this_cpu_ptr(priv->pcpu_stats);
+ pcpu_ptr = per_cpu_ptr(priv->pcpu_stats, cpu);
do {
start = u64_stats_fetch_begin_irq(&pcpu_ptr->syncp);
#define RTL8169_PHY_TIMEOUT (10*HZ)
/* write/read MMIO register */
-#define RTL_W8(reg, val8) writeb ((val8), ioaddr + (reg))
-#define RTL_W16(reg, val16) writew ((val16), ioaddr + (reg))
-#define RTL_W32(reg, val32) writel ((val32), ioaddr + (reg))
-#define RTL_R8(reg) readb (ioaddr + (reg))
-#define RTL_R16(reg) readw (ioaddr + (reg))
-#define RTL_R32(reg) readl (ioaddr + (reg))
+#define RTL_W8(tp, reg, val8) writeb((val8), tp->mmio_addr + (reg))
+#define RTL_W16(tp, reg, val16) writew((val16), tp->mmio_addr + (reg))
+#define RTL_W32(tp, reg, val32) writel((val32), tp->mmio_addr + (reg))
+#define RTL_R8(tp, reg) readb(tp->mmio_addr + (reg))
+#define RTL_R16(tp, reg) readw(tp->mmio_addr + (reg))
+#define RTL_R32(tp, reg) readl(tp->mmio_addr + (reg))
enum mac_version {
RTL_GIGA_MAC_VER_01 = 0,
u8 __pad[sizeof(void *) - sizeof(u32)];
};
-enum features {
- RTL_FEATURE_WOL = (1 << 0),
- RTL_FEATURE_MSI = (1 << 1),
- RTL_FEATURE_GMII = (1 << 2),
-};
-
struct rtl8169_counters {
__le64 tx_packets;
__le64 rx_packets;
void (*phy_reset_enable)(struct rtl8169_private *tp);
void (*hw_start)(struct net_device *);
unsigned int (*phy_reset_pending)(struct rtl8169_private *tp);
- unsigned int (*link_ok)(void __iomem *);
+ unsigned int (*link_ok)(struct rtl8169_private *tp);
int (*do_ioctl)(struct rtl8169_private *tp, struct mii_ioctl_data *data, int cmd);
bool (*tso_csum)(struct rtl8169_private *, struct sk_buff *, u32 *);
DECLARE_RTL_COND(rtl_ocp_gphy_cond)
{
- void __iomem *ioaddr = tp->mmio_addr;
-
- return RTL_R32(GPHY_OCP) & OCPAR_FLAG;
+ return RTL_R32(tp, GPHY_OCP) & OCPAR_FLAG;
}
static void r8168_phy_ocp_write(struct rtl8169_private *tp, u32 reg, u32 data)
{
- void __iomem *ioaddr = tp->mmio_addr;
-
if (rtl_ocp_reg_failure(tp, reg))
return;
- RTL_W32(GPHY_OCP, OCPAR_FLAG | (reg << 15) | data);
+ RTL_W32(tp, GPHY_OCP, OCPAR_FLAG | (reg << 15) | data);
rtl_udelay_loop_wait_low(tp, &rtl_ocp_gphy_cond, 25, 10);
}
static u16 r8168_phy_ocp_read(struct rtl8169_private *tp, u32 reg)
{
- void __iomem *ioaddr = tp->mmio_addr;
-
if (rtl_ocp_reg_failure(tp, reg))
return 0;
- RTL_W32(GPHY_OCP, reg << 15);
+ RTL_W32(tp, GPHY_OCP, reg << 15);
return rtl_udelay_loop_wait_high(tp, &rtl_ocp_gphy_cond, 25, 10) ?
- (RTL_R32(GPHY_OCP) & 0xffff) : ~0;
+ (RTL_R32(tp, GPHY_OCP) & 0xffff) : ~0;
}
static void r8168_mac_ocp_write(struct rtl8169_private *tp, u32 reg, u32 data)
{
- void __iomem *ioaddr = tp->mmio_addr;
-
if (rtl_ocp_reg_failure(tp, reg))
return;
- RTL_W32(OCPDR, OCPAR_FLAG | (reg << 15) | data);
+ RTL_W32(tp, OCPDR, OCPAR_FLAG | (reg << 15) | data);
}
static u16 r8168_mac_ocp_read(struct rtl8169_private *tp, u32 reg)
{
- void __iomem *ioaddr = tp->mmio_addr;
-
if (rtl_ocp_reg_failure(tp, reg))
return 0;
- RTL_W32(OCPDR, reg << 15);
+ RTL_W32(tp, OCPDR, reg << 15);
- return RTL_R32(OCPDR);
+ return RTL_R32(tp, OCPDR);
}
#define OCP_STD_PHY_BASE 0xa400
DECLARE_RTL_COND(rtl_phyar_cond)
{
- void __iomem *ioaddr = tp->mmio_addr;
-
- return RTL_R32(PHYAR) & 0x80000000;
+ return RTL_R32(tp, PHYAR) & 0x80000000;
}
static void r8169_mdio_write(struct rtl8169_private *tp, int reg, int value)
{
- void __iomem *ioaddr = tp->mmio_addr;
-
- RTL_W32(PHYAR, 0x80000000 | (reg & 0x1f) << 16 | (value & 0xffff));
+ RTL_W32(tp, PHYAR, 0x80000000 | (reg & 0x1f) << 16 | (value & 0xffff));
rtl_udelay_loop_wait_low(tp, &rtl_phyar_cond, 25, 20);
/*
static int r8169_mdio_read(struct rtl8169_private *tp, int reg)
{
- void __iomem *ioaddr = tp->mmio_addr;
int value;
- RTL_W32(PHYAR, 0x0 | (reg & 0x1f) << 16);
+ RTL_W32(tp, PHYAR, 0x0 | (reg & 0x1f) << 16);
value = rtl_udelay_loop_wait_high(tp, &rtl_phyar_cond, 25, 20) ?
- RTL_R32(PHYAR) & 0xffff : ~0;
+ RTL_R32(tp, PHYAR) & 0xffff : ~0;
/*
* According to hardware specs a 20us delay is required after read
DECLARE_RTL_COND(rtl_ocpar_cond)
{
- void __iomem *ioaddr = tp->mmio_addr;
-
- return RTL_R32(OCPAR) & OCPAR_FLAG;
+ return RTL_R32(tp, OCPAR) & OCPAR_FLAG;
}
static void r8168dp_1_mdio_access(struct rtl8169_private *tp, int reg, u32 data)
{
- void __iomem *ioaddr = tp->mmio_addr;
-
- RTL_W32(OCPDR, data | ((reg & OCPDR_REG_MASK) << OCPDR_GPHY_REG_SHIFT));
- RTL_W32(OCPAR, OCPAR_GPHY_WRITE_CMD);
- RTL_W32(EPHY_RXER_NUM, 0);
+ RTL_W32(tp, OCPDR, data | ((reg & OCPDR_REG_MASK) << OCPDR_GPHY_REG_SHIFT));
+ RTL_W32(tp, OCPAR, OCPAR_GPHY_WRITE_CMD);
+ RTL_W32(tp, EPHY_RXER_NUM, 0);
rtl_udelay_loop_wait_low(tp, &rtl_ocpar_cond, 1000, 100);
}
static int r8168dp_1_mdio_read(struct rtl8169_private *tp, int reg)
{
- void __iomem *ioaddr = tp->mmio_addr;
-
r8168dp_1_mdio_access(tp, reg, OCPDR_READ_CMD);
mdelay(1);
- RTL_W32(OCPAR, OCPAR_GPHY_READ_CMD);
- RTL_W32(EPHY_RXER_NUM, 0);
+ RTL_W32(tp, OCPAR, OCPAR_GPHY_READ_CMD);
+ RTL_W32(tp, EPHY_RXER_NUM, 0);
return rtl_udelay_loop_wait_high(tp, &rtl_ocpar_cond, 1000, 100) ?
- RTL_R32(OCPDR) & OCPDR_DATA_MASK : ~0;
+ RTL_R32(tp, OCPDR) & OCPDR_DATA_MASK : ~0;
}
#define R8168DP_1_MDIO_ACCESS_BIT 0x00020000
-static void r8168dp_2_mdio_start(void __iomem *ioaddr)
+static void r8168dp_2_mdio_start(struct rtl8169_private *tp)
{
- RTL_W32(0xd0, RTL_R32(0xd0) & ~R8168DP_1_MDIO_ACCESS_BIT);
+ RTL_W32(tp, 0xd0, RTL_R32(tp, 0xd0) & ~R8168DP_1_MDIO_ACCESS_BIT);
}
-static void r8168dp_2_mdio_stop(void __iomem *ioaddr)
+static void r8168dp_2_mdio_stop(struct rtl8169_private *tp)
{
- RTL_W32(0xd0, RTL_R32(0xd0) | R8168DP_1_MDIO_ACCESS_BIT);
+ RTL_W32(tp, 0xd0, RTL_R32(tp, 0xd0) | R8168DP_1_MDIO_ACCESS_BIT);
}
static void r8168dp_2_mdio_write(struct rtl8169_private *tp, int reg, int value)
{
- void __iomem *ioaddr = tp->mmio_addr;
-
- r8168dp_2_mdio_start(ioaddr);
+ r8168dp_2_mdio_start(tp);
r8169_mdio_write(tp, reg, value);
- r8168dp_2_mdio_stop(ioaddr);
+ r8168dp_2_mdio_stop(tp);
}
static int r8168dp_2_mdio_read(struct rtl8169_private *tp, int reg)
{
- void __iomem *ioaddr = tp->mmio_addr;
int value;
- r8168dp_2_mdio_start(ioaddr);
+ r8168dp_2_mdio_start(tp);
value = r8169_mdio_read(tp, reg);
- r8168dp_2_mdio_stop(ioaddr);
+ r8168dp_2_mdio_stop(tp);
return value;
}
DECLARE_RTL_COND(rtl_ephyar_cond)
{
- void __iomem *ioaddr = tp->mmio_addr;
-
- return RTL_R32(EPHYAR) & EPHYAR_FLAG;
+ return RTL_R32(tp, EPHYAR) & EPHYAR_FLAG;
}
static void rtl_ephy_write(struct rtl8169_private *tp, int reg_addr, int value)
{
- void __iomem *ioaddr = tp->mmio_addr;
-
- RTL_W32(EPHYAR, EPHYAR_WRITE_CMD | (value & EPHYAR_DATA_MASK) |
+ RTL_W32(tp, EPHYAR, EPHYAR_WRITE_CMD | (value & EPHYAR_DATA_MASK) |
(reg_addr & EPHYAR_REG_MASK) << EPHYAR_REG_SHIFT);
rtl_udelay_loop_wait_low(tp, &rtl_ephyar_cond, 10, 100);
static u16 rtl_ephy_read(struct rtl8169_private *tp, int reg_addr)
{
- void __iomem *ioaddr = tp->mmio_addr;
-
- RTL_W32(EPHYAR, (reg_addr & EPHYAR_REG_MASK) << EPHYAR_REG_SHIFT);
+ RTL_W32(tp, EPHYAR, (reg_addr & EPHYAR_REG_MASK) << EPHYAR_REG_SHIFT);
return rtl_udelay_loop_wait_high(tp, &rtl_ephyar_cond, 10, 100) ?
- RTL_R32(EPHYAR) & EPHYAR_DATA_MASK : ~0;
+ RTL_R32(tp, EPHYAR) & EPHYAR_DATA_MASK : ~0;
}
DECLARE_RTL_COND(rtl_eriar_cond)
{
- void __iomem *ioaddr = tp->mmio_addr;
-
- return RTL_R32(ERIAR) & ERIAR_FLAG;
+ return RTL_R32(tp, ERIAR) & ERIAR_FLAG;
}
static void rtl_eri_write(struct rtl8169_private *tp, int addr, u32 mask,
u32 val, int type)
{
- void __iomem *ioaddr = tp->mmio_addr;
-
BUG_ON((addr & 3) || (mask == 0));
- RTL_W32(ERIDR, val);
- RTL_W32(ERIAR, ERIAR_WRITE_CMD | type | mask | addr);
+ RTL_W32(tp, ERIDR, val);
+ RTL_W32(tp, ERIAR, ERIAR_WRITE_CMD | type | mask | addr);
rtl_udelay_loop_wait_low(tp, &rtl_eriar_cond, 100, 100);
}
static u32 rtl_eri_read(struct rtl8169_private *tp, int addr, int type)
{
- void __iomem *ioaddr = tp->mmio_addr;
-
- RTL_W32(ERIAR, ERIAR_READ_CMD | type | ERIAR_MASK_1111 | addr);
+ RTL_W32(tp, ERIAR, ERIAR_READ_CMD | type | ERIAR_MASK_1111 | addr);
return rtl_udelay_loop_wait_high(tp, &rtl_eriar_cond, 100, 100) ?
- RTL_R32(ERIDR) : ~0;
+ RTL_R32(tp, ERIDR) : ~0;
}
static void rtl_w0w1_eri(struct rtl8169_private *tp, int addr, u32 mask, u32 p,
static u32 r8168dp_ocp_read(struct rtl8169_private *tp, u8 mask, u16 reg)
{
- void __iomem *ioaddr = tp->mmio_addr;
-
- RTL_W32(OCPAR, ((u32)mask & 0x0f) << 12 | (reg & 0x0fff));
+ RTL_W32(tp, OCPAR, ((u32)mask & 0x0f) << 12 | (reg & 0x0fff));
return rtl_udelay_loop_wait_high(tp, &rtl_ocpar_cond, 100, 20) ?
- RTL_R32(OCPDR) : ~0;
+ RTL_R32(tp, OCPDR) : ~0;
}
static u32 r8168ep_ocp_read(struct rtl8169_private *tp, u8 mask, u16 reg)
static void r8168dp_ocp_write(struct rtl8169_private *tp, u8 mask, u16 reg,
u32 data)
{
- void __iomem *ioaddr = tp->mmio_addr;
-
- RTL_W32(OCPDR, data);
- RTL_W32(OCPAR, OCPAR_FLAG | ((u32)mask & 0x0f) << 12 | (reg & 0x0fff));
+ RTL_W32(tp, OCPDR, data);
+ RTL_W32(tp, OCPAR, OCPAR_FLAG | ((u32)mask & 0x0f) << 12 | (reg & 0x0fff));
rtl_udelay_loop_wait_low(tp, &rtl_ocpar_cond, 100, 20);
}
DECLARE_RTL_COND(rtl_ocp_tx_cond)
{
- void __iomem *ioaddr = tp->mmio_addr;
-
- return RTL_R8(IBISR0) & 0x20;
+ return RTL_R8(tp, IBISR0) & 0x20;
}
static void rtl8168ep_stop_cmac(struct rtl8169_private *tp)
{
- void __iomem *ioaddr = tp->mmio_addr;
-
- RTL_W8(IBCR2, RTL_R8(IBCR2) & ~0x01);
+ RTL_W8(tp, IBCR2, RTL_R8(tp, IBCR2) & ~0x01);
rtl_msleep_loop_wait_high(tp, &rtl_ocp_tx_cond, 50, 2000);
- RTL_W8(IBISR0, RTL_R8(IBISR0) | 0x20);
- RTL_W8(IBCR0, RTL_R8(IBCR0) & ~0x01);
+ RTL_W8(tp, IBISR0, RTL_R8(tp, IBISR0) | 0x20);
+ RTL_W8(tp, IBCR0, RTL_R8(tp, IBCR0) & ~0x01);
}
static void rtl8168dp_driver_start(struct rtl8169_private *tp)
}
}
-static int r8168dp_check_dash(struct rtl8169_private *tp)
+static bool r8168dp_check_dash(struct rtl8169_private *tp)
{
u16 reg = rtl8168_get_ocp_reg(tp);
- return (ocp_read(tp, 0x0f, reg) & 0x00008000) ? 1 : 0;
+ return !!(ocp_read(tp, 0x0f, reg) & 0x00008000);
}
-static int r8168ep_check_dash(struct rtl8169_private *tp)
+static bool r8168ep_check_dash(struct rtl8169_private *tp)
{
- return (ocp_read(tp, 0x0f, 0x128) & 0x00000001) ? 1 : 0;
+ return !!(ocp_read(tp, 0x0f, 0x128) & 0x00000001);
}
-static int r8168_check_dash(struct rtl8169_private *tp)
+static bool r8168_check_dash(struct rtl8169_private *tp)
{
switch (tp->mac_version) {
case RTL_GIGA_MAC_VER_27:
case RTL_GIGA_MAC_VER_51:
return r8168ep_check_dash(tp);
default:
- return 0;
+ return false;
}
}
DECLARE_RTL_COND(rtl_efusear_cond)
{
- void __iomem *ioaddr = tp->mmio_addr;
-
- return RTL_R32(EFUSEAR) & EFUSEAR_FLAG;
+ return RTL_R32(tp, EFUSEAR) & EFUSEAR_FLAG;
}
static u8 rtl8168d_efuse_read(struct rtl8169_private *tp, int reg_addr)
{
- void __iomem *ioaddr = tp->mmio_addr;
-
- RTL_W32(EFUSEAR, (reg_addr & EFUSEAR_REG_MASK) << EFUSEAR_REG_SHIFT);
+ RTL_W32(tp, EFUSEAR, (reg_addr & EFUSEAR_REG_MASK) << EFUSEAR_REG_SHIFT);
return rtl_udelay_loop_wait_high(tp, &rtl_efusear_cond, 100, 300) ?
- RTL_R32(EFUSEAR) & EFUSEAR_DATA_MASK : ~0;
+ RTL_R32(tp, EFUSEAR) & EFUSEAR_DATA_MASK : ~0;
}
static u16 rtl_get_events(struct rtl8169_private *tp)
{
- void __iomem *ioaddr = tp->mmio_addr;
-
- return RTL_R16(IntrStatus);
+ return RTL_R16(tp, IntrStatus);
}
static void rtl_ack_events(struct rtl8169_private *tp, u16 bits)
{
- void __iomem *ioaddr = tp->mmio_addr;
-
- RTL_W16(IntrStatus, bits);
+ RTL_W16(tp, IntrStatus, bits);
mmiowb();
}
static void rtl_irq_disable(struct rtl8169_private *tp)
{
- void __iomem *ioaddr = tp->mmio_addr;
-
- RTL_W16(IntrMask, 0);
+ RTL_W16(tp, IntrMask, 0);
mmiowb();
}
static void rtl_irq_enable(struct rtl8169_private *tp, u16 bits)
{
- void __iomem *ioaddr = tp->mmio_addr;
-
- RTL_W16(IntrMask, bits);
+ RTL_W16(tp, IntrMask, bits);
}
#define RTL_EVENT_NAPI_RX (RxOK | RxErr)
static void rtl8169_irq_mask_and_ack(struct rtl8169_private *tp)
{
- void __iomem *ioaddr = tp->mmio_addr;
-
rtl_irq_disable(tp);
rtl_ack_events(tp, RTL_EVENT_NAPI | tp->event_slow);
- RTL_R8(ChipCmd);
+ RTL_R8(tp, ChipCmd);
}
static unsigned int rtl8169_tbi_reset_pending(struct rtl8169_private *tp)
{
- void __iomem *ioaddr = tp->mmio_addr;
-
- return RTL_R32(TBICSR) & TBIReset;
+ return RTL_R32(tp, TBICSR) & TBIReset;
}
static unsigned int rtl8169_xmii_reset_pending(struct rtl8169_private *tp)
return rtl_readphy(tp, MII_BMCR) & BMCR_RESET;
}
-static unsigned int rtl8169_tbi_link_ok(void __iomem *ioaddr)
+static unsigned int rtl8169_tbi_link_ok(struct rtl8169_private *tp)
{
- return RTL_R32(TBICSR) & TBILinkOk;
+ return RTL_R32(tp, TBICSR) & TBILinkOk;
}
-static unsigned int rtl8169_xmii_link_ok(void __iomem *ioaddr)
+static unsigned int rtl8169_xmii_link_ok(struct rtl8169_private *tp)
{
- return RTL_R8(PHYstatus) & LinkStatus;
+ return RTL_R8(tp, PHYstatus) & LinkStatus;
}
static void rtl8169_tbi_reset_enable(struct rtl8169_private *tp)
{
- void __iomem *ioaddr = tp->mmio_addr;
-
- RTL_W32(TBICSR, RTL_R32(TBICSR) | TBIReset);
+ RTL_W32(tp, TBICSR, RTL_R32(tp, TBICSR) | TBIReset);
}
static void rtl8169_xmii_reset_enable(struct rtl8169_private *tp)
static void rtl_link_chg_patch(struct rtl8169_private *tp)
{
- void __iomem *ioaddr = tp->mmio_addr;
struct net_device *dev = tp->dev;
if (!netif_running(dev))
if (tp->mac_version == RTL_GIGA_MAC_VER_34 ||
tp->mac_version == RTL_GIGA_MAC_VER_38) {
- if (RTL_R8(PHYstatus) & _1000bpsF) {
+ if (RTL_R8(tp, PHYstatus) & _1000bpsF) {
rtl_eri_write(tp, 0x1bc, ERIAR_MASK_1111, 0x00000011,
ERIAR_EXGMAC);
rtl_eri_write(tp, 0x1dc, ERIAR_MASK_1111, 0x00000005,
ERIAR_EXGMAC);
- } else if (RTL_R8(PHYstatus) & _100bps) {
+ } else if (RTL_R8(tp, PHYstatus) & _100bps) {
rtl_eri_write(tp, 0x1bc, ERIAR_MASK_1111, 0x0000001f,
ERIAR_EXGMAC);
rtl_eri_write(tp, 0x1dc, ERIAR_MASK_1111, 0x00000005,
ERIAR_EXGMAC);
} else if (tp->mac_version == RTL_GIGA_MAC_VER_35 ||
tp->mac_version == RTL_GIGA_MAC_VER_36) {
- if (RTL_R8(PHYstatus) & _1000bpsF) {
+ if (RTL_R8(tp, PHYstatus) & _1000bpsF) {
rtl_eri_write(tp, 0x1bc, ERIAR_MASK_1111, 0x00000011,
ERIAR_EXGMAC);
rtl_eri_write(tp, 0x1dc, ERIAR_MASK_1111, 0x00000005,
ERIAR_EXGMAC);
}
} else if (tp->mac_version == RTL_GIGA_MAC_VER_37) {
- if (RTL_R8(PHYstatus) & _10bps) {
+ if (RTL_R8(tp, PHYstatus) & _10bps) {
rtl_eri_write(tp, 0x1d0, ERIAR_MASK_0011, 0x4d02,
ERIAR_EXGMAC);
rtl_eri_write(tp, 0x1dc, ERIAR_MASK_0011, 0x0060,
}
static void rtl8169_check_link_status(struct net_device *dev,
- struct rtl8169_private *tp,
- void __iomem *ioaddr)
+ struct rtl8169_private *tp)
{
- if (tp->link_ok(ioaddr)) {
+ if (tp->link_ok(tp)) {
rtl_link_chg_patch(tp);
/* This is to cancel a scheduled suspend if there's one. */
pm_request_resume(&tp->pci_dev->dev);
static u32 __rtl8169_get_wol(struct rtl8169_private *tp)
{
- void __iomem *ioaddr = tp->mmio_addr;
u8 options;
u32 wolopts = 0;
- options = RTL_R8(Config1);
+ options = RTL_R8(tp, Config1);
if (!(options & PMEnable))
return 0;
- options = RTL_R8(Config3);
+ options = RTL_R8(tp, Config3);
if (options & LinkUp)
wolopts |= WAKE_PHY;
switch (tp->mac_version) {
break;
}
- options = RTL_R8(Config5);
+ options = RTL_R8(tp, Config5);
if (options & UWF)
wolopts |= WAKE_UCAST;
if (options & BWF)
static void __rtl8169_set_wol(struct rtl8169_private *tp, u32 wolopts)
{
- void __iomem *ioaddr = tp->mmio_addr;
unsigned int i, tmp;
static const struct {
u32 opt;
};
u8 options;
- RTL_W8(Cfg9346, Cfg9346_Unlock);
+ RTL_W8(tp, Cfg9346, Cfg9346_Unlock);
switch (tp->mac_version) {
case RTL_GIGA_MAC_VER_34:
}
for (i = 0; i < tmp; i++) {
- options = RTL_R8(cfg[i].reg) & ~cfg[i].mask;
+ options = RTL_R8(tp, cfg[i].reg) & ~cfg[i].mask;
if (wolopts & cfg[i].opt)
options |= cfg[i].mask;
- RTL_W8(cfg[i].reg, options);
+ RTL_W8(tp, cfg[i].reg, options);
}
switch (tp->mac_version) {
case RTL_GIGA_MAC_VER_01 ... RTL_GIGA_MAC_VER_17:
- options = RTL_R8(Config1) & ~PMEnable;
+ options = RTL_R8(tp, Config1) & ~PMEnable;
if (wolopts)
options |= PMEnable;
- RTL_W8(Config1, options);
+ RTL_W8(tp, Config1, options);
break;
default:
- options = RTL_R8(Config2) & ~PME_SIGNAL;
+ options = RTL_R8(tp, Config2) & ~PME_SIGNAL;
if (wolopts)
options |= PME_SIGNAL;
- RTL_W8(Config2, options);
+ RTL_W8(tp, Config2, options);
break;
}
- RTL_W8(Cfg9346, Cfg9346_Lock);
+ RTL_W8(tp, Cfg9346, Cfg9346_Lock);
}
static int rtl8169_set_wol(struct net_device *dev, struct ethtool_wolinfo *wol)
rtl_lock_work(tp);
- if (wol->wolopts)
- tp->features |= RTL_FEATURE_WOL;
- else
- tp->features &= ~RTL_FEATURE_WOL;
if (pm_runtime_active(d))
__rtl8169_set_wol(tp, wol->wolopts);
else
u8 autoneg, u16 speed, u8 duplex, u32 ignored)
{
struct rtl8169_private *tp = netdev_priv(dev);
- void __iomem *ioaddr = tp->mmio_addr;
int ret = 0;
u32 reg;
- reg = RTL_R32(TBICSR);
+ reg = RTL_R32(tp, TBICSR);
if ((autoneg == AUTONEG_DISABLE) && (speed == SPEED_1000) &&
(duplex == DUPLEX_FULL)) {
- RTL_W32(TBICSR, reg & ~(TBINwEnable | TBINwRestart));
+ RTL_W32(tp, TBICSR, reg & ~(TBINwEnable | TBINwRestart));
} else if (autoneg == AUTONEG_ENABLE)
- RTL_W32(TBICSR, reg | TBINwEnable | TBINwRestart);
+ RTL_W32(tp, TBICSR, reg | TBINwEnable | TBINwRestart);
else {
netif_warn(tp, link, dev,
"incorrect speed setting refused in TBI mode\n");
netdev_features_t features)
{
struct rtl8169_private *tp = netdev_priv(dev);
- void __iomem *ioaddr = tp->mmio_addr;
u32 rx_config;
- rx_config = RTL_R32(RxConfig);
+ rx_config = RTL_R32(tp, RxConfig);
if (features & NETIF_F_RXALL)
rx_config |= (AcceptErr | AcceptRunt);
else
rx_config &= ~(AcceptErr | AcceptRunt);
- RTL_W32(RxConfig, rx_config);
+ RTL_W32(tp, RxConfig, rx_config);
if (features & NETIF_F_RXCSUM)
tp->cp_cmd |= RxChkSum;
else
tp->cp_cmd &= ~RxVlan;
- tp->cp_cmd |= RTL_R16(CPlusCmd) & ~(RxVlan | RxChkSum);
+ tp->cp_cmd |= RTL_R16(tp, CPlusCmd) & ~(RxVlan | RxChkSum);
- RTL_W16(CPlusCmd, tp->cp_cmd);
- RTL_R16(CPlusCmd);
+ RTL_W16(tp, CPlusCmd, tp->cp_cmd);
+ RTL_R16(tp, CPlusCmd);
}
static int rtl8169_set_features(struct net_device *dev,
struct ethtool_link_ksettings *cmd)
{
struct rtl8169_private *tp = netdev_priv(dev);
- void __iomem *ioaddr = tp->mmio_addr;
u32 status;
u32 supported, advertising;
SUPPORTED_1000baseT_Full | SUPPORTED_Autoneg | SUPPORTED_FIBRE;
cmd->base.port = PORT_FIBRE;
- status = RTL_R32(TBICSR);
+ status = RTL_R32(tp, TBICSR);
advertising = (status & TBINwEnable) ? ADVERTISED_Autoneg : 0;
cmd->base.autoneg = !!(status & TBINwEnable);
DECLARE_RTL_COND(rtl_counters_cond)
{
- void __iomem *ioaddr = tp->mmio_addr;
-
- return RTL_R32(CounterAddrLow) & (CounterReset | CounterDump);
+ return RTL_R32(tp, CounterAddrLow) & (CounterReset | CounterDump);
}
static bool rtl8169_do_counters(struct net_device *dev, u32 counter_cmd)
{
struct rtl8169_private *tp = netdev_priv(dev);
- void __iomem *ioaddr = tp->mmio_addr;
dma_addr_t paddr = tp->counters_phys_addr;
u32 cmd;
- RTL_W32(CounterAddrHigh, (u64)paddr >> 32);
- RTL_R32(CounterAddrHigh);
+ RTL_W32(tp, CounterAddrHigh, (u64)paddr >> 32);
+ RTL_R32(tp, CounterAddrHigh);
cmd = (u64)paddr & DMA_BIT_MASK(32);
- RTL_W32(CounterAddrLow, cmd);
- RTL_W32(CounterAddrLow, cmd | counter_cmd);
+ RTL_W32(tp, CounterAddrLow, cmd);
+ RTL_W32(tp, CounterAddrLow, cmd | counter_cmd);
return rtl_udelay_loop_wait_low(tp, &rtl_counters_cond, 10, 1000);
}
static bool rtl8169_update_counters(struct net_device *dev)
{
struct rtl8169_private *tp = netdev_priv(dev);
- void __iomem *ioaddr = tp->mmio_addr;
/*
* Some chips are unable to dump tally counters when the receiver
* is disabled.
*/
- if ((RTL_R8(ChipCmd) & CmdRxEnb) == 0)
+ if ((RTL_R8(tp, ChipCmd) & CmdRxEnb) == 0)
return true;
return rtl8169_do_counters(dev, CounterDump);
static int rtl_get_coalesce(struct net_device *dev, struct ethtool_coalesce *ec)
{
struct rtl8169_private *tp = netdev_priv(dev);
- void __iomem *ioaddr = tp->mmio_addr;
const struct rtl_coalesce_info *ci;
const struct rtl_coalesce_scale *scale;
struct {
if (IS_ERR(ci))
return PTR_ERR(ci);
- scale = &ci->scalev[RTL_R16(CPlusCmd) & 3];
+ scale = &ci->scalev[RTL_R16(tp, CPlusCmd) & 3];
/* read IntrMitigate and adjust according to scale */
- for (w = RTL_R16(IntrMitigate); w; w >>= RTL_COALESCE_SHIFT, p++) {
+ for (w = RTL_R16(tp, IntrMitigate); w; w >>= RTL_COALESCE_SHIFT, p++) {
*p->max_frames = (w & RTL_COALESCE_MASK) << 2;
w >>= RTL_COALESCE_SHIFT;
*p->usecs = w & RTL_COALESCE_MASK;
static int rtl_set_coalesce(struct net_device *dev, struct ethtool_coalesce *ec)
{
struct rtl8169_private *tp = netdev_priv(dev);
- void __iomem *ioaddr = tp->mmio_addr;
const struct rtl_coalesce_scale *scale;
struct {
u32 frames;
rtl_lock_work(tp);
- RTL_W16(IntrMitigate, swab16(w));
+ RTL_W16(tp, IntrMitigate, swab16(w));
tp->cp_cmd = (tp->cp_cmd & ~3) | cp01;
- RTL_W16(CPlusCmd, tp->cp_cmd);
- RTL_R16(CPlusCmd);
+ RTL_W16(tp, CPlusCmd, tp->cp_cmd);
+ RTL_R16(tp, CPlusCmd);
rtl_unlock_work(tp);
static void rtl8169_get_mac_version(struct rtl8169_private *tp,
struct net_device *dev, u8 default_version)
{
- void __iomem *ioaddr = tp->mmio_addr;
/*
* The driver currently handles the 8168Bf and the 8168Be identically
* but they can be identified more specifically through the test below
* if needed:
*
- * (RTL_R32(TxConfig) & 0x700000) == 0x500000 ? 8168Bf : 8168Be
+ * (RTL_R32(tp, TxConfig) & 0x700000) == 0x500000 ? 8168Bf : 8168Be
*
* Same thing for the 8101Eb and the 8101Ec:
*
- * (RTL_R32(TxConfig) & 0x700000) == 0x200000 ? 8101Eb : 8101Ec
+ * (RTL_R32(tp, TxConfig) & 0x700000) == 0x200000 ? 8101Eb : 8101Ec
*/
static const struct rtl_mac_info {
u32 mask;
const struct rtl_mac_info *p = mac_info;
u32 reg;
- reg = RTL_R32(TxConfig);
+ reg = RTL_R32(tp, TxConfig);
while ((reg & p->mask) != p->val)
p++;
tp->mac_version = p->mac_version;
rtl_writephy(tp, 0x1f, 0x0005);
rtl_w0w1_phy(tp, 0x01, 0x0100, 0x0000);
rtl_writephy(tp, 0x1f, 0x0000);
- /* soft-reset phy */
- rtl_writephy(tp, MII_BMCR, BMCR_RESET | BMCR_ANENABLE | BMCR_ANRESTART);
/* Broken BIOS workaround: feed GigaMAC registers with MAC address. */
rtl_rar_exgmac_set(tp, tp->dev->dev_addr);
static void rtl_phy_work(struct rtl8169_private *tp)
{
struct timer_list *timer = &tp->timer;
- void __iomem *ioaddr = tp->mmio_addr;
unsigned long timeout = RTL8169_PHY_TIMEOUT;
assert(tp->mac_version > RTL_GIGA_MAC_VER_01);
goto out_mod_timer;
}
- if (tp->link_ok(ioaddr))
+ if (tp->link_ok(tp))
return;
netif_dbg(tp, link, tp->dev, "PHY reset until link up\n");
static bool rtl_tbi_enabled(struct rtl8169_private *tp)
{
- void __iomem *ioaddr = tp->mmio_addr;
-
return (tp->mac_version == RTL_GIGA_MAC_VER_01) &&
- (RTL_R8(PHYstatus) & TBI_Enable);
+ (RTL_R8(tp, PHYstatus) & TBI_Enable);
}
static void rtl8169_init_phy(struct net_device *dev, struct rtl8169_private *tp)
{
- void __iomem *ioaddr = tp->mmio_addr;
-
rtl_hw_phy_config(dev);
if (tp->mac_version <= RTL_GIGA_MAC_VER_06) {
dprintk("Set MAC Reg C+CR Offset 0x82h = 0x01h\n");
- RTL_W8(0x82, 0x01);
+ RTL_W8(tp, 0x82, 0x01);
}
pci_write_config_byte(tp->pci_dev, PCI_LATENCY_TIMER, 0x40);
if (tp->mac_version == RTL_GIGA_MAC_VER_02) {
dprintk("Set MAC Reg C+CR Offset 0x82h = 0x01h\n");
- RTL_W8(0x82, 0x01);
+ RTL_W8(tp, 0x82, 0x01);
dprintk("Set PHY Reg 0x0bh = 0x00h\n");
rtl_writephy(tp, 0x0b, 0x0000); //w 0x0b 15 0 0
}
static void rtl_rar_set(struct rtl8169_private *tp, u8 *addr)
{
- void __iomem *ioaddr = tp->mmio_addr;
-
rtl_lock_work(tp);
- RTL_W8(Cfg9346, Cfg9346_Unlock);
+ RTL_W8(tp, Cfg9346, Cfg9346_Unlock);
- RTL_W32(MAC4, addr[4] | addr[5] << 8);
- RTL_R32(MAC4);
+ RTL_W32(tp, MAC4, addr[4] | addr[5] << 8);
+ RTL_R32(tp, MAC4);
- RTL_W32(MAC0, addr[0] | addr[1] << 8 | addr[2] << 16 | addr[3] << 24);
- RTL_R32(MAC0);
+ RTL_W32(tp, MAC0, addr[0] | addr[1] << 8 | addr[2] << 16 | addr[3] << 24);
+ RTL_R32(tp, MAC0);
if (tp->mac_version == RTL_GIGA_MAC_VER_34)
rtl_rar_exgmac_set(tp, addr);
- RTL_W8(Cfg9346, Cfg9346_Lock);
+ RTL_W8(tp, Cfg9346, Cfg9346_Lock);
rtl_unlock_work(tp);
}
static void rtl_wol_suspend_quirk(struct rtl8169_private *tp)
{
- void __iomem *ioaddr = tp->mmio_addr;
-
switch (tp->mac_version) {
case RTL_GIGA_MAC_VER_25:
case RTL_GIGA_MAC_VER_26:
case RTL_GIGA_MAC_VER_49:
case RTL_GIGA_MAC_VER_50:
case RTL_GIGA_MAC_VER_51:
- RTL_W32(RxConfig, RTL_R32(RxConfig) |
+ RTL_W32(tp, RxConfig, RTL_R32(tp, RxConfig) |
AcceptBroadcast | AcceptMulticast | AcceptMyPhys);
break;
default:
static void r810x_pll_power_down(struct rtl8169_private *tp)
{
- void __iomem *ioaddr = tp->mmio_addr;
-
if (rtl_wol_pll_power_down(tp))
return;
case RTL_GIGA_MAC_VER_16:
break;
default:
- RTL_W8(PMCH, RTL_R8(PMCH) & ~0x80);
+ RTL_W8(tp, PMCH, RTL_R8(tp, PMCH) & ~0x80);
break;
}
}
static void r810x_pll_power_up(struct rtl8169_private *tp)
{
- void __iomem *ioaddr = tp->mmio_addr;
-
r810x_phy_power_up(tp);
switch (tp->mac_version) {
break;
case RTL_GIGA_MAC_VER_47:
case RTL_GIGA_MAC_VER_48:
- RTL_W8(PMCH, RTL_R8(PMCH) | 0xc0);
+ RTL_W8(tp, PMCH, RTL_R8(tp, PMCH) | 0xc0);
break;
default:
- RTL_W8(PMCH, RTL_R8(PMCH) | 0x80);
+ RTL_W8(tp, PMCH, RTL_R8(tp, PMCH) | 0x80);
break;
}
}
static void r8168_pll_power_down(struct rtl8169_private *tp)
{
- void __iomem *ioaddr = tp->mmio_addr;
-
- if ((tp->mac_version == RTL_GIGA_MAC_VER_27 ||
- tp->mac_version == RTL_GIGA_MAC_VER_28 ||
- tp->mac_version == RTL_GIGA_MAC_VER_31 ||
- tp->mac_version == RTL_GIGA_MAC_VER_49 ||
- tp->mac_version == RTL_GIGA_MAC_VER_50 ||
- tp->mac_version == RTL_GIGA_MAC_VER_51) &&
- r8168_check_dash(tp)) {
+ if (r8168_check_dash(tp))
return;
- }
if ((tp->mac_version == RTL_GIGA_MAC_VER_23 ||
tp->mac_version == RTL_GIGA_MAC_VER_24) &&
- (RTL_R16(CPlusCmd) & ASF)) {
+ (RTL_R16(tp, CPlusCmd) & ASF)) {
return;
}
case RTL_GIGA_MAC_VER_46:
case RTL_GIGA_MAC_VER_50:
case RTL_GIGA_MAC_VER_51:
- RTL_W8(PMCH, RTL_R8(PMCH) & ~0x80);
+ RTL_W8(tp, PMCH, RTL_R8(tp, PMCH) & ~0x80);
break;
case RTL_GIGA_MAC_VER_40:
case RTL_GIGA_MAC_VER_41:
case RTL_GIGA_MAC_VER_49:
rtl_w0w1_eri(tp, 0x1a8, ERIAR_MASK_1111, 0x00000000,
0xfc000000, ERIAR_EXGMAC);
- RTL_W8(PMCH, RTL_R8(PMCH) & ~0x80);
+ RTL_W8(tp, PMCH, RTL_R8(tp, PMCH) & ~0x80);
break;
}
}
static void r8168_pll_power_up(struct rtl8169_private *tp)
{
- void __iomem *ioaddr = tp->mmio_addr;
-
switch (tp->mac_version) {
case RTL_GIGA_MAC_VER_25:
case RTL_GIGA_MAC_VER_26:
case RTL_GIGA_MAC_VER_31:
case RTL_GIGA_MAC_VER_32:
case RTL_GIGA_MAC_VER_33:
- RTL_W8(PMCH, RTL_R8(PMCH) | 0x80);
+ RTL_W8(tp, PMCH, RTL_R8(tp, PMCH) | 0x80);
break;
case RTL_GIGA_MAC_VER_44:
case RTL_GIGA_MAC_VER_45:
case RTL_GIGA_MAC_VER_46:
case RTL_GIGA_MAC_VER_50:
case RTL_GIGA_MAC_VER_51:
- RTL_W8(PMCH, RTL_R8(PMCH) | 0xc0);
+ RTL_W8(tp, PMCH, RTL_R8(tp, PMCH) | 0xc0);
break;
case RTL_GIGA_MAC_VER_40:
case RTL_GIGA_MAC_VER_41:
case RTL_GIGA_MAC_VER_49:
- RTL_W8(PMCH, RTL_R8(PMCH) | 0xc0);
+ RTL_W8(tp, PMCH, RTL_R8(tp, PMCH) | 0xc0);
rtl_w0w1_eri(tp, 0x1a8, ERIAR_MASK_1111, 0xfc000000,
0x00000000, ERIAR_EXGMAC);
break;
static void rtl_init_rxcfg(struct rtl8169_private *tp)
{
- void __iomem *ioaddr = tp->mmio_addr;
-
switch (tp->mac_version) {
case RTL_GIGA_MAC_VER_01:
case RTL_GIGA_MAC_VER_02:
case RTL_GIGA_MAC_VER_15:
case RTL_GIGA_MAC_VER_16:
case RTL_GIGA_MAC_VER_17:
- RTL_W32(RxConfig, RX_FIFO_THRESH | RX_DMA_BURST);
+ RTL_W32(tp, RxConfig, RX_FIFO_THRESH | RX_DMA_BURST);
break;
case RTL_GIGA_MAC_VER_18:
case RTL_GIGA_MAC_VER_19:
case RTL_GIGA_MAC_VER_24:
case RTL_GIGA_MAC_VER_34:
case RTL_GIGA_MAC_VER_35:
- RTL_W32(RxConfig, RX128_INT_EN | RX_MULTI_EN | RX_DMA_BURST);
+ RTL_W32(tp, RxConfig, RX128_INT_EN | RX_MULTI_EN | RX_DMA_BURST);
break;
case RTL_GIGA_MAC_VER_40:
case RTL_GIGA_MAC_VER_41:
case RTL_GIGA_MAC_VER_49:
case RTL_GIGA_MAC_VER_50:
case RTL_GIGA_MAC_VER_51:
- RTL_W32(RxConfig, RX128_INT_EN | RX_MULTI_EN | RX_DMA_BURST | RX_EARLY_OFF);
+ RTL_W32(tp, RxConfig, RX128_INT_EN | RX_MULTI_EN | RX_DMA_BURST | RX_EARLY_OFF);
break;
default:
- RTL_W32(RxConfig, RX128_INT_EN | RX_DMA_BURST);
+ RTL_W32(tp, RxConfig, RX128_INT_EN | RX_DMA_BURST);
break;
}
}
static void rtl_hw_jumbo_enable(struct rtl8169_private *tp)
{
- void __iomem *ioaddr = tp->mmio_addr;
-
- RTL_W8(Cfg9346, Cfg9346_Unlock);
+ RTL_W8(tp, Cfg9346, Cfg9346_Unlock);
rtl_generic_op(tp, tp->jumbo_ops.enable);
- RTL_W8(Cfg9346, Cfg9346_Lock);
+ RTL_W8(tp, Cfg9346, Cfg9346_Lock);
}
static void rtl_hw_jumbo_disable(struct rtl8169_private *tp)
{
- void __iomem *ioaddr = tp->mmio_addr;
-
- RTL_W8(Cfg9346, Cfg9346_Unlock);
+ RTL_W8(tp, Cfg9346, Cfg9346_Unlock);
rtl_generic_op(tp, tp->jumbo_ops.disable);
- RTL_W8(Cfg9346, Cfg9346_Lock);
+ RTL_W8(tp, Cfg9346, Cfg9346_Lock);
}
static void r8168c_hw_jumbo_enable(struct rtl8169_private *tp)
{
- void __iomem *ioaddr = tp->mmio_addr;
-
- RTL_W8(Config3, RTL_R8(Config3) | Jumbo_En0);
- RTL_W8(Config4, RTL_R8(Config4) | Jumbo_En1);
+ RTL_W8(tp, Config3, RTL_R8(tp, Config3) | Jumbo_En0);
+ RTL_W8(tp, Config4, RTL_R8(tp, Config4) | Jumbo_En1);
rtl_tx_performance_tweak(tp->pci_dev, PCI_EXP_DEVCTL_READRQ_512B);
}
static void r8168c_hw_jumbo_disable(struct rtl8169_private *tp)
{
- void __iomem *ioaddr = tp->mmio_addr;
-
- RTL_W8(Config3, RTL_R8(Config3) & ~Jumbo_En0);
- RTL_W8(Config4, RTL_R8(Config4) & ~Jumbo_En1);
+ RTL_W8(tp, Config3, RTL_R8(tp, Config3) & ~Jumbo_En0);
+ RTL_W8(tp, Config4, RTL_R8(tp, Config4) & ~Jumbo_En1);
rtl_tx_performance_tweak(tp->pci_dev, 0x5 << MAX_READ_REQUEST_SHIFT);
}
static void r8168dp_hw_jumbo_enable(struct rtl8169_private *tp)
{
- void __iomem *ioaddr = tp->mmio_addr;
-
- RTL_W8(Config3, RTL_R8(Config3) | Jumbo_En0);
+ RTL_W8(tp, Config3, RTL_R8(tp, Config3) | Jumbo_En0);
}
static void r8168dp_hw_jumbo_disable(struct rtl8169_private *tp)
{
- void __iomem *ioaddr = tp->mmio_addr;
-
- RTL_W8(Config3, RTL_R8(Config3) & ~Jumbo_En0);
+ RTL_W8(tp, Config3, RTL_R8(tp, Config3) & ~Jumbo_En0);
}
static void r8168e_hw_jumbo_enable(struct rtl8169_private *tp)
{
- void __iomem *ioaddr = tp->mmio_addr;
-
- RTL_W8(MaxTxPacketSize, 0x3f);
- RTL_W8(Config3, RTL_R8(Config3) | Jumbo_En0);
- RTL_W8(Config4, RTL_R8(Config4) | 0x01);
+ RTL_W8(tp, MaxTxPacketSize, 0x3f);
+ RTL_W8(tp, Config3, RTL_R8(tp, Config3) | Jumbo_En0);
+ RTL_W8(tp, Config4, RTL_R8(tp, Config4) | 0x01);
rtl_tx_performance_tweak(tp->pci_dev, PCI_EXP_DEVCTL_READRQ_512B);
}
static void r8168e_hw_jumbo_disable(struct rtl8169_private *tp)
{
- void __iomem *ioaddr = tp->mmio_addr;
-
- RTL_W8(MaxTxPacketSize, 0x0c);
- RTL_W8(Config3, RTL_R8(Config3) & ~Jumbo_En0);
- RTL_W8(Config4, RTL_R8(Config4) & ~0x01);
+ RTL_W8(tp, MaxTxPacketSize, 0x0c);
+ RTL_W8(tp, Config3, RTL_R8(tp, Config3) & ~Jumbo_En0);
+ RTL_W8(tp, Config4, RTL_R8(tp, Config4) & ~0x01);
rtl_tx_performance_tweak(tp->pci_dev, 0x5 << MAX_READ_REQUEST_SHIFT);
}
static void r8168b_1_hw_jumbo_enable(struct rtl8169_private *tp)
{
- void __iomem *ioaddr = tp->mmio_addr;
-
r8168b_0_hw_jumbo_enable(tp);
- RTL_W8(Config4, RTL_R8(Config4) | (1 << 0));
+ RTL_W8(tp, Config4, RTL_R8(tp, Config4) | (1 << 0));
}
static void r8168b_1_hw_jumbo_disable(struct rtl8169_private *tp)
{
- void __iomem *ioaddr = tp->mmio_addr;
-
r8168b_0_hw_jumbo_disable(tp);
- RTL_W8(Config4, RTL_R8(Config4) & ~(1 << 0));
+ RTL_W8(tp, Config4, RTL_R8(tp, Config4) & ~(1 << 0));
}
static void rtl_init_jumbo_ops(struct rtl8169_private *tp)
DECLARE_RTL_COND(rtl_chipcmd_cond)
{
- void __iomem *ioaddr = tp->mmio_addr;
-
- return RTL_R8(ChipCmd) & CmdReset;
+ return RTL_R8(tp, ChipCmd) & CmdReset;
}
static void rtl_hw_reset(struct rtl8169_private *tp)
{
- void __iomem *ioaddr = tp->mmio_addr;
-
- RTL_W8(ChipCmd, CmdReset);
+ RTL_W8(tp, ChipCmd, CmdReset);
rtl_udelay_loop_wait_low(tp, &rtl_chipcmd_cond, 100, 100);
}
static void rtl_rx_close(struct rtl8169_private *tp)
{
- void __iomem *ioaddr = tp->mmio_addr;
-
- RTL_W32(RxConfig, RTL_R32(RxConfig) & ~RX_CONFIG_ACCEPT_MASK);
+ RTL_W32(tp, RxConfig, RTL_R32(tp, RxConfig) & ~RX_CONFIG_ACCEPT_MASK);
}
DECLARE_RTL_COND(rtl_npq_cond)
{
- void __iomem *ioaddr = tp->mmio_addr;
-
- return RTL_R8(TxPoll) & NPQ;
+ return RTL_R8(tp, TxPoll) & NPQ;
}
DECLARE_RTL_COND(rtl_txcfg_empty_cond)
{
- void __iomem *ioaddr = tp->mmio_addr;
-
- return RTL_R32(TxConfig) & TXCFG_EMPTY;
+ return RTL_R32(tp, TxConfig) & TXCFG_EMPTY;
}
static void rtl8169_hw_reset(struct rtl8169_private *tp)
{
- void __iomem *ioaddr = tp->mmio_addr;
-
/* Disable interrupts */
rtl8169_irq_mask_and_ack(tp);
tp->mac_version == RTL_GIGA_MAC_VER_49 ||
tp->mac_version == RTL_GIGA_MAC_VER_50 ||
tp->mac_version == RTL_GIGA_MAC_VER_51) {
- RTL_W8(ChipCmd, RTL_R8(ChipCmd) | StopReq);
+ RTL_W8(tp, ChipCmd, RTL_R8(tp, ChipCmd) | StopReq);
rtl_udelay_loop_wait_high(tp, &rtl_txcfg_empty_cond, 100, 666);
} else {
- RTL_W8(ChipCmd, RTL_R8(ChipCmd) | StopReq);
+ RTL_W8(tp, ChipCmd, RTL_R8(tp, ChipCmd) | StopReq);
udelay(100);
}
static void rtl_set_rx_tx_config_registers(struct rtl8169_private *tp)
{
- void __iomem *ioaddr = tp->mmio_addr;
-
/* Set DMA burst size and Interframe Gap Time */
- RTL_W32(TxConfig, (TX_DMA_BURST << TxDMAShift) |
+ RTL_W32(tp, TxConfig, (TX_DMA_BURST << TxDMAShift) |
(InterFrameGap << TxInterFrameGapShift));
}
rtl_irq_enable_all(tp);
}
-static void rtl_set_rx_tx_desc_registers(struct rtl8169_private *tp,
- void __iomem *ioaddr)
+static void rtl_set_rx_tx_desc_registers(struct rtl8169_private *tp)
{
/*
* Magic spell: some iop3xx ARM board needs the TxDescAddrHigh
* register to be written before TxDescAddrLow to work.
* Switching from MMIO to I/O access fixes the issue as well.
*/
- RTL_W32(TxDescStartAddrHigh, ((u64) tp->TxPhyAddr) >> 32);
- RTL_W32(TxDescStartAddrLow, ((u64) tp->TxPhyAddr) & DMA_BIT_MASK(32));
- RTL_W32(RxDescAddrHigh, ((u64) tp->RxPhyAddr) >> 32);
- RTL_W32(RxDescAddrLow, ((u64) tp->RxPhyAddr) & DMA_BIT_MASK(32));
+ RTL_W32(tp, TxDescStartAddrHigh, ((u64) tp->TxPhyAddr) >> 32);
+ RTL_W32(tp, TxDescStartAddrLow, ((u64) tp->TxPhyAddr) & DMA_BIT_MASK(32));
+ RTL_W32(tp, RxDescAddrHigh, ((u64) tp->RxPhyAddr) >> 32);
+ RTL_W32(tp, RxDescAddrLow, ((u64) tp->RxPhyAddr) & DMA_BIT_MASK(32));
}
-static u16 rtl_rw_cpluscmd(void __iomem *ioaddr)
+static u16 rtl_rw_cpluscmd(struct rtl8169_private *tp)
{
u16 cmd;
- cmd = RTL_R16(CPlusCmd);
- RTL_W16(CPlusCmd, cmd);
+ cmd = RTL_R16(tp, CPlusCmd);
+ RTL_W16(tp, CPlusCmd, cmd);
return cmd;
}
-static void rtl_set_rx_max_size(void __iomem *ioaddr, unsigned int rx_buf_sz)
+static void rtl_set_rx_max_size(struct rtl8169_private *tp, unsigned int rx_buf_sz)
{
/* Low hurts. Let's disable the filtering. */
- RTL_W16(RxMaxSize, rx_buf_sz + 1);
+ RTL_W16(tp, RxMaxSize, rx_buf_sz + 1);
}
-static void rtl8169_set_magic_reg(void __iomem *ioaddr, unsigned mac_version)
+static void rtl8169_set_magic_reg(struct rtl8169_private *tp, unsigned mac_version)
{
static const struct rtl_cfg2_info {
u32 mac_version;
unsigned int i;
u32 clk;
- clk = RTL_R8(Config2) & PCI_Clock_66MHz;
+ clk = RTL_R8(tp, Config2) & PCI_Clock_66MHz;
for (i = 0; i < ARRAY_SIZE(cfg2_info); i++, p++) {
if ((p->mac_version == mac_version) && (p->clk == clk)) {
- RTL_W32(0x7c, p->val);
+ RTL_W32(tp, 0x7c, p->val);
break;
}
}
static void rtl_set_rx_mode(struct net_device *dev)
{
struct rtl8169_private *tp = netdev_priv(dev);
- void __iomem *ioaddr = tp->mmio_addr;
u32 mc_filter[2]; /* Multicast hash filter */
int rx_mode;
u32 tmp = 0;
if (dev->features & NETIF_F_RXALL)
rx_mode |= (AcceptErr | AcceptRunt);
- tmp = (RTL_R32(RxConfig) & ~RX_CONFIG_ACCEPT_MASK) | rx_mode;
+ tmp = (RTL_R32(tp, RxConfig) & ~RX_CONFIG_ACCEPT_MASK) | rx_mode;
if (tp->mac_version > RTL_GIGA_MAC_VER_06) {
u32 data = mc_filter[0];
if (tp->mac_version == RTL_GIGA_MAC_VER_35)
mc_filter[1] = mc_filter[0] = 0xffffffff;
- RTL_W32(MAR0 + 4, mc_filter[1]);
- RTL_W32(MAR0 + 0, mc_filter[0]);
+ RTL_W32(tp, MAR0 + 4, mc_filter[1]);
+ RTL_W32(tp, MAR0 + 0, mc_filter[0]);
- RTL_W32(RxConfig, tmp);
+ RTL_W32(tp, RxConfig, tmp);
}
static void rtl_hw_start_8169(struct net_device *dev)
{
struct rtl8169_private *tp = netdev_priv(dev);
- void __iomem *ioaddr = tp->mmio_addr;
struct pci_dev *pdev = tp->pci_dev;
if (tp->mac_version == RTL_GIGA_MAC_VER_05) {
- RTL_W16(CPlusCmd, RTL_R16(CPlusCmd) | PCIMulRW);
+ RTL_W16(tp, CPlusCmd, RTL_R16(tp, CPlusCmd) | PCIMulRW);
pci_write_config_byte(pdev, PCI_CACHE_LINE_SIZE, 0x08);
}
- RTL_W8(Cfg9346, Cfg9346_Unlock);
+ RTL_W8(tp, Cfg9346, Cfg9346_Unlock);
if (tp->mac_version == RTL_GIGA_MAC_VER_01 ||
tp->mac_version == RTL_GIGA_MAC_VER_02 ||
tp->mac_version == RTL_GIGA_MAC_VER_03 ||
tp->mac_version == RTL_GIGA_MAC_VER_04)
- RTL_W8(ChipCmd, CmdTxEnb | CmdRxEnb);
+ RTL_W8(tp, ChipCmd, CmdTxEnb | CmdRxEnb);
rtl_init_rxcfg(tp);
- RTL_W8(EarlyTxThres, NoEarlyTx);
+ RTL_W8(tp, EarlyTxThres, NoEarlyTx);
- rtl_set_rx_max_size(ioaddr, rx_buf_sz);
+ rtl_set_rx_max_size(tp, rx_buf_sz);
if (tp->mac_version == RTL_GIGA_MAC_VER_01 ||
tp->mac_version == RTL_GIGA_MAC_VER_02 ||
tp->mac_version == RTL_GIGA_MAC_VER_04)
rtl_set_rx_tx_config_registers(tp);
- tp->cp_cmd |= rtl_rw_cpluscmd(ioaddr) | PCIMulRW;
+ tp->cp_cmd |= rtl_rw_cpluscmd(tp) | PCIMulRW;
if (tp->mac_version == RTL_GIGA_MAC_VER_02 ||
tp->mac_version == RTL_GIGA_MAC_VER_03) {
tp->cp_cmd |= (1 << 14);
}
- RTL_W16(CPlusCmd, tp->cp_cmd);
+ RTL_W16(tp, CPlusCmd, tp->cp_cmd);
- rtl8169_set_magic_reg(ioaddr, tp->mac_version);
+ rtl8169_set_magic_reg(tp, tp->mac_version);
/*
* Undocumented corner. Supposedly:
* (TxTimer << 12) | (TxPackets << 8) | (RxTimer << 4) | RxPackets
*/
- RTL_W16(IntrMitigate, 0x0000);
+ RTL_W16(tp, IntrMitigate, 0x0000);
- rtl_set_rx_tx_desc_registers(tp, ioaddr);
+ rtl_set_rx_tx_desc_registers(tp);
if (tp->mac_version != RTL_GIGA_MAC_VER_01 &&
tp->mac_version != RTL_GIGA_MAC_VER_02 &&
tp->mac_version != RTL_GIGA_MAC_VER_03 &&
tp->mac_version != RTL_GIGA_MAC_VER_04) {
- RTL_W8(ChipCmd, CmdTxEnb | CmdRxEnb);
+ RTL_W8(tp, ChipCmd, CmdTxEnb | CmdRxEnb);
rtl_set_rx_tx_config_registers(tp);
}
- RTL_W8(Cfg9346, Cfg9346_Lock);
+ RTL_W8(tp, Cfg9346, Cfg9346_Lock);
/* Initially a 10 us delay. Turned it into a PCI commit. - FR */
- RTL_R8(IntrMask);
+ RTL_R8(tp, IntrMask);
- RTL_W32(RxMissed, 0);
+ RTL_W32(tp, RxMissed, 0);
rtl_set_rx_mode(dev);
/* no early-rx interrupts */
- RTL_W16(MultiIntr, RTL_R16(MultiIntr) & 0xf000);
+ RTL_W16(tp, MultiIntr, RTL_R16(tp, MultiIntr) & 0xf000);
}
static void rtl_csi_write(struct rtl8169_private *tp, int addr, int value)
DECLARE_RTL_COND(rtl_csiar_cond)
{
- void __iomem *ioaddr = tp->mmio_addr;
-
- return RTL_R32(CSIAR) & CSIAR_FLAG;
+ return RTL_R32(tp, CSIAR) & CSIAR_FLAG;
}
static void r8169_csi_write(struct rtl8169_private *tp, int addr, int value)
{
- void __iomem *ioaddr = tp->mmio_addr;
-
- RTL_W32(CSIDR, value);
- RTL_W32(CSIAR, CSIAR_WRITE_CMD | (addr & CSIAR_ADDR_MASK) |
+ RTL_W32(tp, CSIDR, value);
+ RTL_W32(tp, CSIAR, CSIAR_WRITE_CMD | (addr & CSIAR_ADDR_MASK) |
CSIAR_BYTE_ENABLE << CSIAR_BYTE_ENABLE_SHIFT);
rtl_udelay_loop_wait_low(tp, &rtl_csiar_cond, 10, 100);
static u32 r8169_csi_read(struct rtl8169_private *tp, int addr)
{
- void __iomem *ioaddr = tp->mmio_addr;
-
- RTL_W32(CSIAR, (addr & CSIAR_ADDR_MASK) |
+ RTL_W32(tp, CSIAR, (addr & CSIAR_ADDR_MASK) |
CSIAR_BYTE_ENABLE << CSIAR_BYTE_ENABLE_SHIFT);
return rtl_udelay_loop_wait_high(tp, &rtl_csiar_cond, 10, 100) ?
- RTL_R32(CSIDR) : ~0;
+ RTL_R32(tp, CSIDR) : ~0;
}
static void r8402_csi_write(struct rtl8169_private *tp, int addr, int value)
{
- void __iomem *ioaddr = tp->mmio_addr;
-
- RTL_W32(CSIDR, value);
- RTL_W32(CSIAR, CSIAR_WRITE_CMD | (addr & CSIAR_ADDR_MASK) |
+ RTL_W32(tp, CSIDR, value);
+ RTL_W32(tp, CSIAR, CSIAR_WRITE_CMD | (addr & CSIAR_ADDR_MASK) |
CSIAR_BYTE_ENABLE << CSIAR_BYTE_ENABLE_SHIFT |
CSIAR_FUNC_NIC);
static u32 r8402_csi_read(struct rtl8169_private *tp, int addr)
{
- void __iomem *ioaddr = tp->mmio_addr;
-
- RTL_W32(CSIAR, (addr & CSIAR_ADDR_MASK) | CSIAR_FUNC_NIC |
+ RTL_W32(tp, CSIAR, (addr & CSIAR_ADDR_MASK) | CSIAR_FUNC_NIC |
CSIAR_BYTE_ENABLE << CSIAR_BYTE_ENABLE_SHIFT);
return rtl_udelay_loop_wait_high(tp, &rtl_csiar_cond, 10, 100) ?
- RTL_R32(CSIDR) : ~0;
+ RTL_R32(tp, CSIDR) : ~0;
}
static void r8411_csi_write(struct rtl8169_private *tp, int addr, int value)
{
- void __iomem *ioaddr = tp->mmio_addr;
-
- RTL_W32(CSIDR, value);
- RTL_W32(CSIAR, CSIAR_WRITE_CMD | (addr & CSIAR_ADDR_MASK) |
+ RTL_W32(tp, CSIDR, value);
+ RTL_W32(tp, CSIAR, CSIAR_WRITE_CMD | (addr & CSIAR_ADDR_MASK) |
CSIAR_BYTE_ENABLE << CSIAR_BYTE_ENABLE_SHIFT |
CSIAR_FUNC_NIC2);
static u32 r8411_csi_read(struct rtl8169_private *tp, int addr)
{
- void __iomem *ioaddr = tp->mmio_addr;
-
- RTL_W32(CSIAR, (addr & CSIAR_ADDR_MASK) | CSIAR_FUNC_NIC2 |
+ RTL_W32(tp, CSIAR, (addr & CSIAR_ADDR_MASK) | CSIAR_FUNC_NIC2 |
CSIAR_BYTE_ENABLE << CSIAR_BYTE_ENABLE_SHIFT);
return rtl_udelay_loop_wait_high(tp, &rtl_csiar_cond, 10, 100) ?
- RTL_R32(CSIDR) : ~0;
+ RTL_R32(tp, CSIDR) : ~0;
}
static void rtl_init_csi_ops(struct rtl8169_private *tp)
static void rtl_pcie_state_l2l3_enable(struct rtl8169_private *tp, bool enable)
{
- void __iomem *ioaddr = tp->mmio_addr;
u8 data;
- data = RTL_R8(Config3);
+ data = RTL_R8(tp, Config3);
if (enable)
data |= Rdy_to_L23;
else
data &= ~Rdy_to_L23;
- RTL_W8(Config3, data);
+ RTL_W8(tp, Config3, data);
}
#define R8168_CPCMD_QUIRK_MASK (\
static void rtl_hw_start_8168bb(struct rtl8169_private *tp)
{
- void __iomem *ioaddr = tp->mmio_addr;
struct pci_dev *pdev = tp->pci_dev;
- RTL_W8(Config3, RTL_R8(Config3) & ~Beacon_en);
+ RTL_W8(tp, Config3, RTL_R8(tp, Config3) & ~Beacon_en);
- RTL_W16(CPlusCmd, RTL_R16(CPlusCmd) & ~R8168_CPCMD_QUIRK_MASK);
+ RTL_W16(tp, CPlusCmd, RTL_R16(tp, CPlusCmd) & ~R8168_CPCMD_QUIRK_MASK);
if (tp->dev->mtu <= ETH_DATA_LEN) {
rtl_tx_performance_tweak(pdev, (0x5 << MAX_READ_REQUEST_SHIFT) |
static void rtl_hw_start_8168bef(struct rtl8169_private *tp)
{
- void __iomem *ioaddr = tp->mmio_addr;
-
rtl_hw_start_8168bb(tp);
- RTL_W8(MaxTxPacketSize, TxPacketMax);
+ RTL_W8(tp, MaxTxPacketSize, TxPacketMax);
- RTL_W8(Config4, RTL_R8(Config4) & ~(1 << 0));
+ RTL_W8(tp, Config4, RTL_R8(tp, Config4) & ~(1 << 0));
}
static void __rtl_hw_start_8168cp(struct rtl8169_private *tp)
{
- void __iomem *ioaddr = tp->mmio_addr;
struct pci_dev *pdev = tp->pci_dev;
- RTL_W8(Config1, RTL_R8(Config1) | Speed_down);
+ RTL_W8(tp, Config1, RTL_R8(tp, Config1) | Speed_down);
- RTL_W8(Config3, RTL_R8(Config3) & ~Beacon_en);
+ RTL_W8(tp, Config3, RTL_R8(tp, Config3) & ~Beacon_en);
if (tp->dev->mtu <= ETH_DATA_LEN)
rtl_tx_performance_tweak(pdev, 0x5 << MAX_READ_REQUEST_SHIFT);
rtl_disable_clock_request(pdev);
- RTL_W16(CPlusCmd, RTL_R16(CPlusCmd) & ~R8168_CPCMD_QUIRK_MASK);
+ RTL_W16(tp, CPlusCmd, RTL_R16(tp, CPlusCmd) & ~R8168_CPCMD_QUIRK_MASK);
}
static void rtl_hw_start_8168cp_1(struct rtl8169_private *tp)
static void rtl_hw_start_8168cp_2(struct rtl8169_private *tp)
{
- void __iomem *ioaddr = tp->mmio_addr;
struct pci_dev *pdev = tp->pci_dev;
rtl_csi_access_enable_2(tp);
- RTL_W8(Config3, RTL_R8(Config3) & ~Beacon_en);
+ RTL_W8(tp, Config3, RTL_R8(tp, Config3) & ~Beacon_en);
if (tp->dev->mtu <= ETH_DATA_LEN)
rtl_tx_performance_tweak(pdev, 0x5 << MAX_READ_REQUEST_SHIFT);
- RTL_W16(CPlusCmd, RTL_R16(CPlusCmd) & ~R8168_CPCMD_QUIRK_MASK);
+ RTL_W16(tp, CPlusCmd, RTL_R16(tp, CPlusCmd) & ~R8168_CPCMD_QUIRK_MASK);
}
static void rtl_hw_start_8168cp_3(struct rtl8169_private *tp)
{
- void __iomem *ioaddr = tp->mmio_addr;
struct pci_dev *pdev = tp->pci_dev;
rtl_csi_access_enable_2(tp);
- RTL_W8(Config3, RTL_R8(Config3) & ~Beacon_en);
+ RTL_W8(tp, Config3, RTL_R8(tp, Config3) & ~Beacon_en);
/* Magic. */
- RTL_W8(DBG_REG, 0x20);
+ RTL_W8(tp, DBG_REG, 0x20);
- RTL_W8(MaxTxPacketSize, TxPacketMax);
+ RTL_W8(tp, MaxTxPacketSize, TxPacketMax);
if (tp->dev->mtu <= ETH_DATA_LEN)
rtl_tx_performance_tweak(pdev, 0x5 << MAX_READ_REQUEST_SHIFT);
- RTL_W16(CPlusCmd, RTL_R16(CPlusCmd) & ~R8168_CPCMD_QUIRK_MASK);
+ RTL_W16(tp, CPlusCmd, RTL_R16(tp, CPlusCmd) & ~R8168_CPCMD_QUIRK_MASK);
}
static void rtl_hw_start_8168c_1(struct rtl8169_private *tp)
{
- void __iomem *ioaddr = tp->mmio_addr;
static const struct ephy_info e_info_8168c_1[] = {
{ 0x02, 0x0800, 0x1000 },
{ 0x03, 0, 0x0002 },
rtl_csi_access_enable_2(tp);
- RTL_W8(DBG_REG, 0x06 | FIX_NAK_1 | FIX_NAK_2);
+ RTL_W8(tp, DBG_REG, 0x06 | FIX_NAK_1 | FIX_NAK_2);
rtl_ephy_init(tp, e_info_8168c_1, ARRAY_SIZE(e_info_8168c_1));
static void rtl_hw_start_8168d(struct rtl8169_private *tp)
{
- void __iomem *ioaddr = tp->mmio_addr;
struct pci_dev *pdev = tp->pci_dev;
rtl_csi_access_enable_2(tp);
rtl_disable_clock_request(pdev);
- RTL_W8(MaxTxPacketSize, TxPacketMax);
+ RTL_W8(tp, MaxTxPacketSize, TxPacketMax);
if (tp->dev->mtu <= ETH_DATA_LEN)
rtl_tx_performance_tweak(pdev, 0x5 << MAX_READ_REQUEST_SHIFT);
- RTL_W16(CPlusCmd, RTL_R16(CPlusCmd) & ~R8168_CPCMD_QUIRK_MASK);
+ RTL_W16(tp, CPlusCmd, RTL_R16(tp, CPlusCmd) & ~R8168_CPCMD_QUIRK_MASK);
}
static void rtl_hw_start_8168dp(struct rtl8169_private *tp)
{
- void __iomem *ioaddr = tp->mmio_addr;
struct pci_dev *pdev = tp->pci_dev;
rtl_csi_access_enable_1(tp);
if (tp->dev->mtu <= ETH_DATA_LEN)
rtl_tx_performance_tweak(pdev, 0x5 << MAX_READ_REQUEST_SHIFT);
- RTL_W8(MaxTxPacketSize, TxPacketMax);
+ RTL_W8(tp, MaxTxPacketSize, TxPacketMax);
rtl_disable_clock_request(pdev);
}
static void rtl_hw_start_8168d_4(struct rtl8169_private *tp)
{
- void __iomem *ioaddr = tp->mmio_addr;
struct pci_dev *pdev = tp->pci_dev;
static const struct ephy_info e_info_8168d_4[] = {
{ 0x0b, 0x0000, 0x0048 },
rtl_tx_performance_tweak(pdev, 0x5 << MAX_READ_REQUEST_SHIFT);
- RTL_W8(MaxTxPacketSize, TxPacketMax);
+ RTL_W8(tp, MaxTxPacketSize, TxPacketMax);
rtl_ephy_init(tp, e_info_8168d_4, ARRAY_SIZE(e_info_8168d_4));
static void rtl_hw_start_8168e_1(struct rtl8169_private *tp)
{
- void __iomem *ioaddr = tp->mmio_addr;
struct pci_dev *pdev = tp->pci_dev;
static const struct ephy_info e_info_8168e_1[] = {
{ 0x00, 0x0200, 0x0100 },
if (tp->dev->mtu <= ETH_DATA_LEN)
rtl_tx_performance_tweak(pdev, 0x5 << MAX_READ_REQUEST_SHIFT);
- RTL_W8(MaxTxPacketSize, TxPacketMax);
+ RTL_W8(tp, MaxTxPacketSize, TxPacketMax);
rtl_disable_clock_request(pdev);
/* Reset tx FIFO pointer */
- RTL_W32(MISC, RTL_R32(MISC) | TXPLA_RST);
- RTL_W32(MISC, RTL_R32(MISC) & ~TXPLA_RST);
+ RTL_W32(tp, MISC, RTL_R32(tp, MISC) | TXPLA_RST);
+ RTL_W32(tp, MISC, RTL_R32(tp, MISC) & ~TXPLA_RST);
- RTL_W8(Config5, RTL_R8(Config5) & ~Spi_en);
+ RTL_W8(tp, Config5, RTL_R8(tp, Config5) & ~Spi_en);
}
static void rtl_hw_start_8168e_2(struct rtl8169_private *tp)
{
- void __iomem *ioaddr = tp->mmio_addr;
struct pci_dev *pdev = tp->pci_dev;
static const struct ephy_info e_info_8168e_2[] = {
{ 0x09, 0x0000, 0x0080 },
rtl_w0w1_eri(tp, 0x1b0, ERIAR_MASK_0001, 0x10, 0x00, ERIAR_EXGMAC);
rtl_w0w1_eri(tp, 0x0d4, ERIAR_MASK_0011, 0x0c00, 0xff00, ERIAR_EXGMAC);
- RTL_W8(MaxTxPacketSize, EarlySize);
+ RTL_W8(tp, MaxTxPacketSize, EarlySize);
rtl_disable_clock_request(pdev);
- RTL_W32(TxConfig, RTL_R32(TxConfig) | TXCFG_AUTO_FIFO);
- RTL_W8(MCU, RTL_R8(MCU) & ~NOW_IS_OOB);
+ RTL_W32(tp, TxConfig, RTL_R32(tp, TxConfig) | TXCFG_AUTO_FIFO);
+ RTL_W8(tp, MCU, RTL_R8(tp, MCU) & ~NOW_IS_OOB);
/* Adjust EEE LED frequency */
- RTL_W8(EEE_LED, RTL_R8(EEE_LED) & ~0x07);
+ RTL_W8(tp, EEE_LED, RTL_R8(tp, EEE_LED) & ~0x07);
- RTL_W8(DLLPR, RTL_R8(DLLPR) | PFM_EN);
- RTL_W32(MISC, RTL_R32(MISC) | PWM_EN);
- RTL_W8(Config5, RTL_R8(Config5) & ~Spi_en);
+ RTL_W8(tp, DLLPR, RTL_R8(tp, DLLPR) | PFM_EN);
+ RTL_W32(tp, MISC, RTL_R32(tp, MISC) | PWM_EN);
+ RTL_W8(tp, Config5, RTL_R8(tp, Config5) & ~Spi_en);
}
static void rtl_hw_start_8168f(struct rtl8169_private *tp)
{
- void __iomem *ioaddr = tp->mmio_addr;
struct pci_dev *pdev = tp->pci_dev;
rtl_csi_access_enable_2(tp);
rtl_eri_write(tp, 0xcc, ERIAR_MASK_1111, 0x00000050, ERIAR_EXGMAC);
rtl_eri_write(tp, 0xd0, ERIAR_MASK_1111, 0x00000060, ERIAR_EXGMAC);
- RTL_W8(MaxTxPacketSize, EarlySize);
+ RTL_W8(tp, MaxTxPacketSize, EarlySize);
rtl_disable_clock_request(pdev);
- RTL_W32(TxConfig, RTL_R32(TxConfig) | TXCFG_AUTO_FIFO);
- RTL_W8(MCU, RTL_R8(MCU) & ~NOW_IS_OOB);
- RTL_W8(DLLPR, RTL_R8(DLLPR) | PFM_EN);
- RTL_W32(MISC, RTL_R32(MISC) | PWM_EN);
- RTL_W8(Config5, RTL_R8(Config5) & ~Spi_en);
+ RTL_W32(tp, TxConfig, RTL_R32(tp, TxConfig) | TXCFG_AUTO_FIFO);
+ RTL_W8(tp, MCU, RTL_R8(tp, MCU) & ~NOW_IS_OOB);
+ RTL_W8(tp, DLLPR, RTL_R8(tp, DLLPR) | PFM_EN);
+ RTL_W32(tp, MISC, RTL_R32(tp, MISC) | PWM_EN);
+ RTL_W8(tp, Config5, RTL_R8(tp, Config5) & ~Spi_en);
}
static void rtl_hw_start_8168f_1(struct rtl8169_private *tp)
{
- void __iomem *ioaddr = tp->mmio_addr;
static const struct ephy_info e_info_8168f_1[] = {
{ 0x06, 0x00c0, 0x0020 },
{ 0x08, 0x0001, 0x0002 },
rtl_w0w1_eri(tp, 0x0d4, ERIAR_MASK_0011, 0x0c00, 0xff00, ERIAR_EXGMAC);
/* Adjust EEE LED frequency */
- RTL_W8(EEE_LED, RTL_R8(EEE_LED) & ~0x07);
+ RTL_W8(tp, EEE_LED, RTL_R8(tp, EEE_LED) & ~0x07);
}
static void rtl_hw_start_8411(struct rtl8169_private *tp)
static void rtl_hw_start_8168g(struct rtl8169_private *tp)
{
- void __iomem *ioaddr = tp->mmio_addr;
struct pci_dev *pdev = tp->pci_dev;
- RTL_W32(TxConfig, RTL_R32(TxConfig) | TXCFG_AUTO_FIFO);
+ RTL_W32(tp, TxConfig, RTL_R32(tp, TxConfig) | TXCFG_AUTO_FIFO);
rtl_eri_write(tp, 0xc8, ERIAR_MASK_0101, 0x080002, ERIAR_EXGMAC);
rtl_eri_write(tp, 0xcc, ERIAR_MASK_0001, 0x38, ERIAR_EXGMAC);
rtl_w0w1_eri(tp, 0xdc, ERIAR_MASK_0001, 0x01, 0x00, ERIAR_EXGMAC);
rtl_eri_write(tp, 0x2f8, ERIAR_MASK_0011, 0x1d8f, ERIAR_EXGMAC);
- RTL_W32(MISC, RTL_R32(MISC) & ~RXDV_GATED_EN);
- RTL_W8(MaxTxPacketSize, EarlySize);
+ RTL_W32(tp, MISC, RTL_R32(tp, MISC) & ~RXDV_GATED_EN);
+ RTL_W8(tp, MaxTxPacketSize, EarlySize);
rtl_eri_write(tp, 0xc0, ERIAR_MASK_0011, 0x0000, ERIAR_EXGMAC);
rtl_eri_write(tp, 0xb8, ERIAR_MASK_0011, 0x0000, ERIAR_EXGMAC);
/* Adjust EEE LED frequency */
- RTL_W8(EEE_LED, RTL_R8(EEE_LED) & ~0x07);
+ RTL_W8(tp, EEE_LED, RTL_R8(tp, EEE_LED) & ~0x07);
rtl_w0w1_eri(tp, 0x2fc, ERIAR_MASK_0001, 0x01, 0x06, ERIAR_EXGMAC);
rtl_w0w1_eri(tp, 0x1b0, ERIAR_MASK_0011, 0x0000, 0x1000, ERIAR_EXGMAC);
static void rtl_hw_start_8168g_1(struct rtl8169_private *tp)
{
- void __iomem *ioaddr = tp->mmio_addr;
static const struct ephy_info e_info_8168g_1[] = {
{ 0x00, 0x0000, 0x0008 },
{ 0x0c, 0x37d0, 0x0820 },
rtl_hw_start_8168g(tp);
/* disable aspm and clock request before access ephy */
- RTL_W8(Config2, RTL_R8(Config2) & ~ClkReqEn);
- RTL_W8(Config5, RTL_R8(Config5) & ~ASPM_en);
+ RTL_W8(tp, Config2, RTL_R8(tp, Config2) & ~ClkReqEn);
+ RTL_W8(tp, Config5, RTL_R8(tp, Config5) & ~ASPM_en);
rtl_ephy_init(tp, e_info_8168g_1, ARRAY_SIZE(e_info_8168g_1));
}
static void rtl_hw_start_8168g_2(struct rtl8169_private *tp)
{
- void __iomem *ioaddr = tp->mmio_addr;
static const struct ephy_info e_info_8168g_2[] = {
{ 0x00, 0x0000, 0x0008 },
{ 0x0c, 0x3df0, 0x0200 },
rtl_hw_start_8168g(tp);
/* disable aspm and clock request before access ephy */
- RTL_W8(Config2, RTL_R8(Config2) & ~ClkReqEn);
- RTL_W8(Config5, RTL_R8(Config5) & ~ASPM_en);
+ RTL_W8(tp, Config2, RTL_R8(tp, Config2) & ~ClkReqEn);
+ RTL_W8(tp, Config5, RTL_R8(tp, Config5) & ~ASPM_en);
rtl_ephy_init(tp, e_info_8168g_2, ARRAY_SIZE(e_info_8168g_2));
}
static void rtl_hw_start_8411_2(struct rtl8169_private *tp)
{
- void __iomem *ioaddr = tp->mmio_addr;
static const struct ephy_info e_info_8411_2[] = {
{ 0x00, 0x0000, 0x0008 },
{ 0x0c, 0x3df0, 0x0200 },
rtl_hw_start_8168g(tp);
/* disable aspm and clock request before access ephy */
- RTL_W8(Config2, RTL_R8(Config2) & ~ClkReqEn);
- RTL_W8(Config5, RTL_R8(Config5) & ~ASPM_en);
+ RTL_W8(tp, Config2, RTL_R8(tp, Config2) & ~ClkReqEn);
+ RTL_W8(tp, Config5, RTL_R8(tp, Config5) & ~ASPM_en);
rtl_ephy_init(tp, e_info_8411_2, ARRAY_SIZE(e_info_8411_2));
}
static void rtl_hw_start_8168h_1(struct rtl8169_private *tp)
{
- void __iomem *ioaddr = tp->mmio_addr;
struct pci_dev *pdev = tp->pci_dev;
int rg_saw_cnt;
u32 data;
};
/* disable aspm and clock request before access ephy */
- RTL_W8(Config2, RTL_R8(Config2) & ~ClkReqEn);
- RTL_W8(Config5, RTL_R8(Config5) & ~ASPM_en);
+ RTL_W8(tp, Config2, RTL_R8(tp, Config2) & ~ClkReqEn);
+ RTL_W8(tp, Config5, RTL_R8(tp, Config5) & ~ASPM_en);
rtl_ephy_init(tp, e_info_8168h_1, ARRAY_SIZE(e_info_8168h_1));
- RTL_W32(TxConfig, RTL_R32(TxConfig) | TXCFG_AUTO_FIFO);
+ RTL_W32(tp, TxConfig, RTL_R32(tp, TxConfig) | TXCFG_AUTO_FIFO);
rtl_eri_write(tp, 0xc8, ERIAR_MASK_0101, 0x00080002, ERIAR_EXGMAC);
rtl_eri_write(tp, 0xcc, ERIAR_MASK_0001, 0x38, ERIAR_EXGMAC);
rtl_eri_write(tp, 0x5f0, ERIAR_MASK_0011, 0x4f87, ERIAR_EXGMAC);
- RTL_W32(MISC, RTL_R32(MISC) & ~RXDV_GATED_EN);
- RTL_W8(MaxTxPacketSize, EarlySize);
+ RTL_W32(tp, MISC, RTL_R32(tp, MISC) & ~RXDV_GATED_EN);
+ RTL_W8(tp, MaxTxPacketSize, EarlySize);
rtl_eri_write(tp, 0xc0, ERIAR_MASK_0011, 0x0000, ERIAR_EXGMAC);
rtl_eri_write(tp, 0xb8, ERIAR_MASK_0011, 0x0000, ERIAR_EXGMAC);
/* Adjust EEE LED frequency */
- RTL_W8(EEE_LED, RTL_R8(EEE_LED) & ~0x07);
+ RTL_W8(tp, EEE_LED, RTL_R8(tp, EEE_LED) & ~0x07);
- RTL_W8(DLLPR, RTL_R8(DLLPR) & ~PFM_EN);
- RTL_W8(MISC_1, RTL_R8(MISC_1) & ~PFM_D3COLD_EN);
+ RTL_W8(tp, DLLPR, RTL_R8(tp, DLLPR) & ~PFM_EN);
+ RTL_W8(tp, MISC_1, RTL_R8(tp, MISC_1) & ~PFM_D3COLD_EN);
- RTL_W8(DLLPR, RTL_R8(DLLPR) & ~TX_10M_PS_EN);
+ RTL_W8(tp, DLLPR, RTL_R8(tp, DLLPR) & ~TX_10M_PS_EN);
rtl_w0w1_eri(tp, 0x1b0, ERIAR_MASK_0011, 0x0000, 0x1000, ERIAR_EXGMAC);
static void rtl_hw_start_8168ep(struct rtl8169_private *tp)
{
- void __iomem *ioaddr = tp->mmio_addr;
struct pci_dev *pdev = tp->pci_dev;
rtl8168ep_stop_cmac(tp);
- RTL_W32(TxConfig, RTL_R32(TxConfig) | TXCFG_AUTO_FIFO);
+ RTL_W32(tp, TxConfig, RTL_R32(tp, TxConfig) | TXCFG_AUTO_FIFO);
rtl_eri_write(tp, 0xc8, ERIAR_MASK_0101, 0x00080002, ERIAR_EXGMAC);
rtl_eri_write(tp, 0xcc, ERIAR_MASK_0001, 0x2f, ERIAR_EXGMAC);
rtl_eri_write(tp, 0x5f0, ERIAR_MASK_0011, 0x4f87, ERIAR_EXGMAC);
- RTL_W32(MISC, RTL_R32(MISC) & ~RXDV_GATED_EN);
- RTL_W8(MaxTxPacketSize, EarlySize);
+ RTL_W32(tp, MISC, RTL_R32(tp, MISC) & ~RXDV_GATED_EN);
+ RTL_W8(tp, MaxTxPacketSize, EarlySize);
rtl_eri_write(tp, 0xc0, ERIAR_MASK_0011, 0x0000, ERIAR_EXGMAC);
rtl_eri_write(tp, 0xb8, ERIAR_MASK_0011, 0x0000, ERIAR_EXGMAC);
/* Adjust EEE LED frequency */
- RTL_W8(EEE_LED, RTL_R8(EEE_LED) & ~0x07);
+ RTL_W8(tp, EEE_LED, RTL_R8(tp, EEE_LED) & ~0x07);
rtl_w0w1_eri(tp, 0x2fc, ERIAR_MASK_0001, 0x01, 0x06, ERIAR_EXGMAC);
- RTL_W8(DLLPR, RTL_R8(DLLPR) & ~TX_10M_PS_EN);
+ RTL_W8(tp, DLLPR, RTL_R8(tp, DLLPR) & ~TX_10M_PS_EN);
rtl_pcie_state_l2l3_enable(tp, false);
}
static void rtl_hw_start_8168ep_1(struct rtl8169_private *tp)
{
- void __iomem *ioaddr = tp->mmio_addr;
static const struct ephy_info e_info_8168ep_1[] = {
{ 0x00, 0xffff, 0x10ab },
{ 0x06, 0xffff, 0xf030 },
};
/* disable aspm and clock request before access ephy */
- RTL_W8(Config2, RTL_R8(Config2) & ~ClkReqEn);
- RTL_W8(Config5, RTL_R8(Config5) & ~ASPM_en);
+ RTL_W8(tp, Config2, RTL_R8(tp, Config2) & ~ClkReqEn);
+ RTL_W8(tp, Config5, RTL_R8(tp, Config5) & ~ASPM_en);
rtl_ephy_init(tp, e_info_8168ep_1, ARRAY_SIZE(e_info_8168ep_1));
rtl_hw_start_8168ep(tp);
static void rtl_hw_start_8168ep_2(struct rtl8169_private *tp)
{
- void __iomem *ioaddr = tp->mmio_addr;
static const struct ephy_info e_info_8168ep_2[] = {
{ 0x00, 0xffff, 0x10a3 },
{ 0x19, 0xffff, 0xfc00 },
};
/* disable aspm and clock request before access ephy */
- RTL_W8(Config2, RTL_R8(Config2) & ~ClkReqEn);
- RTL_W8(Config5, RTL_R8(Config5) & ~ASPM_en);
+ RTL_W8(tp, Config2, RTL_R8(tp, Config2) & ~ClkReqEn);
+ RTL_W8(tp, Config5, RTL_R8(tp, Config5) & ~ASPM_en);
rtl_ephy_init(tp, e_info_8168ep_2, ARRAY_SIZE(e_info_8168ep_2));
rtl_hw_start_8168ep(tp);
- RTL_W8(DLLPR, RTL_R8(DLLPR) & ~PFM_EN);
- RTL_W8(MISC_1, RTL_R8(MISC_1) & ~PFM_D3COLD_EN);
+ RTL_W8(tp, DLLPR, RTL_R8(tp, DLLPR) & ~PFM_EN);
+ RTL_W8(tp, MISC_1, RTL_R8(tp, MISC_1) & ~PFM_D3COLD_EN);
}
static void rtl_hw_start_8168ep_3(struct rtl8169_private *tp)
{
- void __iomem *ioaddr = tp->mmio_addr;
u32 data;
static const struct ephy_info e_info_8168ep_3[] = {
{ 0x00, 0xffff, 0x10a3 },
};
/* disable aspm and clock request before access ephy */
- RTL_W8(Config2, RTL_R8(Config2) & ~ClkReqEn);
- RTL_W8(Config5, RTL_R8(Config5) & ~ASPM_en);
+ RTL_W8(tp, Config2, RTL_R8(tp, Config2) & ~ClkReqEn);
+ RTL_W8(tp, Config5, RTL_R8(tp, Config5) & ~ASPM_en);
rtl_ephy_init(tp, e_info_8168ep_3, ARRAY_SIZE(e_info_8168ep_3));
rtl_hw_start_8168ep(tp);
- RTL_W8(DLLPR, RTL_R8(DLLPR) & ~PFM_EN);
- RTL_W8(MISC_1, RTL_R8(MISC_1) & ~PFM_D3COLD_EN);
+ RTL_W8(tp, DLLPR, RTL_R8(tp, DLLPR) & ~PFM_EN);
+ RTL_W8(tp, MISC_1, RTL_R8(tp, MISC_1) & ~PFM_D3COLD_EN);
data = r8168_mac_ocp_read(tp, 0xd3e2);
data &= 0xf000;
static void rtl_hw_start_8168(struct net_device *dev)
{
struct rtl8169_private *tp = netdev_priv(dev);
- void __iomem *ioaddr = tp->mmio_addr;
- RTL_W8(Cfg9346, Cfg9346_Unlock);
+ RTL_W8(tp, Cfg9346, Cfg9346_Unlock);
- RTL_W8(MaxTxPacketSize, TxPacketMax);
+ RTL_W8(tp, MaxTxPacketSize, TxPacketMax);
- rtl_set_rx_max_size(ioaddr, rx_buf_sz);
+ rtl_set_rx_max_size(tp, rx_buf_sz);
- tp->cp_cmd |= RTL_R16(CPlusCmd) | PktCntrDisable | INTT_1;
+ tp->cp_cmd |= RTL_R16(tp, CPlusCmd) | PktCntrDisable | INTT_1;
- RTL_W16(CPlusCmd, tp->cp_cmd);
+ RTL_W16(tp, CPlusCmd, tp->cp_cmd);
- RTL_W16(IntrMitigate, 0x5151);
+ RTL_W16(tp, IntrMitigate, 0x5151);
/* Work around for RxFIFO overflow. */
if (tp->mac_version == RTL_GIGA_MAC_VER_11) {
tp->event_slow &= ~RxOverflow;
}
- rtl_set_rx_tx_desc_registers(tp, ioaddr);
+ rtl_set_rx_tx_desc_registers(tp);
rtl_set_rx_tx_config_registers(tp);
- RTL_R8(IntrMask);
+ RTL_R8(tp, IntrMask);
switch (tp->mac_version) {
case RTL_GIGA_MAC_VER_11:
break;
}
- RTL_W8(Cfg9346, Cfg9346_Lock);
+ RTL_W8(tp, Cfg9346, Cfg9346_Lock);
- RTL_W8(ChipCmd, CmdTxEnb | CmdRxEnb);
+ RTL_W8(tp, ChipCmd, CmdTxEnb | CmdRxEnb);
rtl_set_rx_mode(dev);
- RTL_W16(MultiIntr, RTL_R16(MultiIntr) & 0xf000);
+ RTL_W16(tp, MultiIntr, RTL_R16(tp, MultiIntr) & 0xf000);
}
#define R810X_CPCMD_QUIRK_MASK (\
static void rtl_hw_start_8102e_1(struct rtl8169_private *tp)
{
- void __iomem *ioaddr = tp->mmio_addr;
struct pci_dev *pdev = tp->pci_dev;
static const struct ephy_info e_info_8102e_1[] = {
{ 0x01, 0, 0x6e65 },
rtl_csi_access_enable_2(tp);
- RTL_W8(DBG_REG, FIX_NAK_1);
+ RTL_W8(tp, DBG_REG, FIX_NAK_1);
rtl_tx_performance_tweak(pdev, 0x5 << MAX_READ_REQUEST_SHIFT);
- RTL_W8(Config1,
+ RTL_W8(tp, Config1,
LEDS1 | LEDS0 | Speed_down | MEMMAP | IOMAP | VPD | PMEnable);
- RTL_W8(Config3, RTL_R8(Config3) & ~Beacon_en);
+ RTL_W8(tp, Config3, RTL_R8(tp, Config3) & ~Beacon_en);
- cfg1 = RTL_R8(Config1);
+ cfg1 = RTL_R8(tp, Config1);
if ((cfg1 & LEDS0) && (cfg1 & LEDS1))
- RTL_W8(Config1, cfg1 & ~LEDS0);
+ RTL_W8(tp, Config1, cfg1 & ~LEDS0);
rtl_ephy_init(tp, e_info_8102e_1, ARRAY_SIZE(e_info_8102e_1));
}
static void rtl_hw_start_8102e_2(struct rtl8169_private *tp)
{
- void __iomem *ioaddr = tp->mmio_addr;
struct pci_dev *pdev = tp->pci_dev;
rtl_csi_access_enable_2(tp);
rtl_tx_performance_tweak(pdev, 0x5 << MAX_READ_REQUEST_SHIFT);
- RTL_W8(Config1, MEMMAP | IOMAP | VPD | PMEnable);
- RTL_W8(Config3, RTL_R8(Config3) & ~Beacon_en);
+ RTL_W8(tp, Config1, MEMMAP | IOMAP | VPD | PMEnable);
+ RTL_W8(tp, Config3, RTL_R8(tp, Config3) & ~Beacon_en);
}
static void rtl_hw_start_8102e_3(struct rtl8169_private *tp)
static void rtl_hw_start_8105e_1(struct rtl8169_private *tp)
{
- void __iomem *ioaddr = tp->mmio_addr;
static const struct ephy_info e_info_8105e_1[] = {
{ 0x07, 0, 0x4000 },
{ 0x19, 0, 0x0200 },
};
/* Force LAN exit from ASPM if Rx/Tx are not idle */
- RTL_W32(FuncEvent, RTL_R32(FuncEvent) | 0x002800);
+ RTL_W32(tp, FuncEvent, RTL_R32(tp, FuncEvent) | 0x002800);
/* Disable Early Tally Counter */
- RTL_W32(FuncEvent, RTL_R32(FuncEvent) & ~0x010000);
+ RTL_W32(tp, FuncEvent, RTL_R32(tp, FuncEvent) & ~0x010000);
- RTL_W8(MCU, RTL_R8(MCU) | EN_NDP | EN_OOB_RESET);
- RTL_W8(DLLPR, RTL_R8(DLLPR) | PFM_EN);
+ RTL_W8(tp, MCU, RTL_R8(tp, MCU) | EN_NDP | EN_OOB_RESET);
+ RTL_W8(tp, DLLPR, RTL_R8(tp, DLLPR) | PFM_EN);
rtl_ephy_init(tp, e_info_8105e_1, ARRAY_SIZE(e_info_8105e_1));
static void rtl_hw_start_8402(struct rtl8169_private *tp)
{
- void __iomem *ioaddr = tp->mmio_addr;
static const struct ephy_info e_info_8402[] = {
{ 0x19, 0xffff, 0xff64 },
{ 0x1e, 0, 0x4000 }
rtl_csi_access_enable_2(tp);
/* Force LAN exit from ASPM if Rx/Tx are not idle */
- RTL_W32(FuncEvent, RTL_R32(FuncEvent) | 0x002800);
+ RTL_W32(tp, FuncEvent, RTL_R32(tp, FuncEvent) | 0x002800);
- RTL_W32(TxConfig, RTL_R32(TxConfig) | TXCFG_AUTO_FIFO);
- RTL_W8(MCU, RTL_R8(MCU) & ~NOW_IS_OOB);
+ RTL_W32(tp, TxConfig, RTL_R32(tp, TxConfig) | TXCFG_AUTO_FIFO);
+ RTL_W8(tp, MCU, RTL_R8(tp, MCU) & ~NOW_IS_OOB);
rtl_ephy_init(tp, e_info_8402, ARRAY_SIZE(e_info_8402));
static void rtl_hw_start_8106(struct rtl8169_private *tp)
{
- void __iomem *ioaddr = tp->mmio_addr;
-
/* Force LAN exit from ASPM if Rx/Tx are not idle */
- RTL_W32(FuncEvent, RTL_R32(FuncEvent) | 0x002800);
+ RTL_W32(tp, FuncEvent, RTL_R32(tp, FuncEvent) | 0x002800);
- RTL_W32(MISC, (RTL_R32(MISC) | DISABLE_LAN_EN) & ~EARLY_TALLY_EN);
- RTL_W8(MCU, RTL_R8(MCU) | EN_NDP | EN_OOB_RESET);
- RTL_W8(DLLPR, RTL_R8(DLLPR) & ~PFM_EN);
+ RTL_W32(tp, MISC, (RTL_R32(tp, MISC) | DISABLE_LAN_EN) & ~EARLY_TALLY_EN);
+ RTL_W8(tp, MCU, RTL_R8(tp, MCU) | EN_NDP | EN_OOB_RESET);
+ RTL_W8(tp, DLLPR, RTL_R8(tp, DLLPR) & ~PFM_EN);
rtl_pcie_state_l2l3_enable(tp, false);
}
static void rtl_hw_start_8101(struct net_device *dev)
{
struct rtl8169_private *tp = netdev_priv(dev);
- void __iomem *ioaddr = tp->mmio_addr;
struct pci_dev *pdev = tp->pci_dev;
if (tp->mac_version >= RTL_GIGA_MAC_VER_30)
pcie_capability_set_word(pdev, PCI_EXP_DEVCTL,
PCI_EXP_DEVCTL_NOSNOOP_EN);
- RTL_W8(Cfg9346, Cfg9346_Unlock);
+ RTL_W8(tp, Cfg9346, Cfg9346_Unlock);
- RTL_W8(MaxTxPacketSize, TxPacketMax);
+ RTL_W8(tp, MaxTxPacketSize, TxPacketMax);
- rtl_set_rx_max_size(ioaddr, rx_buf_sz);
+ rtl_set_rx_max_size(tp, rx_buf_sz);
tp->cp_cmd &= ~R810X_CPCMD_QUIRK_MASK;
- RTL_W16(CPlusCmd, tp->cp_cmd);
+ RTL_W16(tp, CPlusCmd, tp->cp_cmd);
- rtl_set_rx_tx_desc_registers(tp, ioaddr);
+ rtl_set_rx_tx_desc_registers(tp);
rtl_set_rx_tx_config_registers(tp);
break;
}
- RTL_W8(Cfg9346, Cfg9346_Lock);
+ RTL_W8(tp, Cfg9346, Cfg9346_Lock);
- RTL_W16(IntrMitigate, 0x0000);
+ RTL_W16(tp, IntrMitigate, 0x0000);
- RTL_W8(ChipCmd, CmdTxEnb | CmdRxEnb);
+ RTL_W8(tp, ChipCmd, CmdTxEnb | CmdRxEnb);
rtl_set_rx_mode(dev);
- RTL_R8(IntrMask);
+ RTL_R8(tp, IntrMask);
- RTL_W16(MultiIntr, RTL_R16(MultiIntr) & 0xf000);
+ RTL_W16(tp, MultiIntr, RTL_R16(tp, MultiIntr) & 0xf000);
}
static int rtl8169_change_mtu(struct net_device *dev, int new_mtu)
napi_enable(&tp->napi);
rtl_hw_start(dev);
netif_wake_queue(dev);
- rtl8169_check_link_status(dev, tp, tp->mmio_addr);
+ rtl8169_check_link_status(dev, tp);
}
static void rtl8169_tx_timeout(struct net_device *dev)
struct rtl8169_private *tp = netdev_priv(dev);
unsigned int entry = tp->cur_tx % NUM_TX_DESC;
struct TxDesc *txd = tp->TxDescArray + entry;
- void __iomem *ioaddr = tp->mmio_addr;
struct device *d = &tp->pci_dev->dev;
dma_addr_t mapping;
u32 status, len;
tp->cur_tx += frags + 1;
- RTL_W8(TxPoll, NPQ);
+ RTL_W8(tp, TxPoll, NPQ);
mmiowb();
/* The infamous DAC f*ckup only happens at boot time */
if ((tp->cp_cmd & PCIDAC) && !tp->cur_rx) {
- void __iomem *ioaddr = tp->mmio_addr;
-
netif_info(tp, intr, dev, "disabling PCI DAC\n");
tp->cp_cmd &= ~PCIDAC;
- RTL_W16(CPlusCmd, tp->cp_cmd);
+ RTL_W16(tp, CPlusCmd, tp->cp_cmd);
dev->features &= ~NETIF_F_HIGHDMA;
}
* of start_xmit activity is detected (if it is not detected,
* it is slow enough). -- FR
*/
- if (tp->cur_tx != dirty_tx) {
- void __iomem *ioaddr = tp->mmio_addr;
-
- RTL_W8(TxPoll, NPQ);
- }
+ if (tp->cur_tx != dirty_tx)
+ RTL_W8(tp, TxPoll, NPQ);
}
}
rtl8169_pcierr_interrupt(dev);
if (status & LinkChg)
- rtl8169_check_link_status(dev, tp, tp->mmio_addr);
+ rtl8169_check_link_status(dev, tp);
rtl_irq_enable_all(tp);
}
return work_done;
}
-static void rtl8169_rx_missed(struct net_device *dev, void __iomem *ioaddr)
+static void rtl8169_rx_missed(struct net_device *dev)
{
struct rtl8169_private *tp = netdev_priv(dev);
if (tp->mac_version > RTL_GIGA_MAC_VER_06)
return;
- dev->stats.rx_missed_errors += (RTL_R32(RxMissed) & 0xffffff);
- RTL_W32(RxMissed, 0);
+ dev->stats.rx_missed_errors += RTL_R32(tp, RxMissed) & 0xffffff;
+ RTL_W32(tp, RxMissed, 0);
}
static void rtl8169_down(struct net_device *dev)
{
struct rtl8169_private *tp = netdev_priv(dev);
- void __iomem *ioaddr = tp->mmio_addr;
del_timer_sync(&tp->timer);
* as netif_running is not true (rtl8169_interrupt, rtl8169_reset_task)
* and napi is disabled (rtl8169_poll).
*/
- rtl8169_rx_missed(dev, ioaddr);
+ rtl8169_rx_missed(dev);
/* Give a racing hard_start_xmit a few cycles to complete. */
synchronize_sched();
cancel_work_sync(&tp->wk.work);
- free_irq(pdev->irq, dev);
+ pci_free_irq(pdev, 0, dev);
dma_free_coherent(&pdev->dev, R8169_RX_RING_BYTES, tp->RxDescArray,
tp->RxPhyAddr);
{
struct rtl8169_private *tp = netdev_priv(dev);
- rtl8169_interrupt(tp->pci_dev->irq, dev);
+ rtl8169_interrupt(pci_irq_vector(tp->pci_dev, 0), dev);
}
#endif
static int rtl_open(struct net_device *dev)
{
struct rtl8169_private *tp = netdev_priv(dev);
- void __iomem *ioaddr = tp->mmio_addr;
struct pci_dev *pdev = tp->pci_dev;
int retval = -ENOMEM;
rtl_request_firmware(tp);
- retval = request_irq(pdev->irq, rtl8169_interrupt,
- (tp->features & RTL_FEATURE_MSI) ? 0 : IRQF_SHARED,
- dev->name, dev);
+ retval = pci_request_irq(pdev, 0, rtl8169_interrupt, NULL, dev,
+ dev->name);
if (retval < 0)
goto err_release_fw_2;
tp->saved_wolopts = 0;
pm_runtime_put_sync(&pdev->dev);
- rtl8169_check_link_status(dev, tp, ioaddr);
+ rtl8169_check_link_status(dev, tp);
out:
return retval;
rtl8169_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats)
{
struct rtl8169_private *tp = netdev_priv(dev);
- void __iomem *ioaddr = tp->mmio_addr;
struct pci_dev *pdev = tp->pci_dev;
struct rtl8169_counters *counters = tp->counters;
unsigned int start;
pm_runtime_get_noresume(&pdev->dev);
if (netif_running(dev) && pm_runtime_active(&pdev->dev))
- rtl8169_rx_missed(dev, ioaddr);
+ rtl8169_rx_missed(dev);
do {
start = u64_stats_fetch_begin_irq(&tp->rx_stats.syncp);
rtl8169_net_suspend(dev);
/* Update counters before going runtime suspend */
- rtl8169_rx_missed(dev, tp->mmio_addr);
+ rtl8169_rx_missed(dev);
rtl8169_update_counters(dev);
return 0;
static void rtl_wol_shutdown_quirk(struct rtl8169_private *tp)
{
- void __iomem *ioaddr = tp->mmio_addr;
-
/* WoL fails with 8168b when the receiver is disabled. */
switch (tp->mac_version) {
case RTL_GIGA_MAC_VER_11:
case RTL_GIGA_MAC_VER_17:
pci_clear_master(tp->pci_dev);
- RTL_W8(ChipCmd, CmdRxEnb);
+ RTL_W8(tp, ChipCmd, CmdRxEnb);
/* PCI commit */
- RTL_R8(ChipCmd);
+ RTL_R8(tp, ChipCmd);
break;
default:
break;
struct net_device *dev = pci_get_drvdata(pdev);
struct rtl8169_private *tp = netdev_priv(dev);
- if ((tp->mac_version == RTL_GIGA_MAC_VER_27 ||
- tp->mac_version == RTL_GIGA_MAC_VER_28 ||
- tp->mac_version == RTL_GIGA_MAC_VER_31 ||
- tp->mac_version == RTL_GIGA_MAC_VER_49 ||
- tp->mac_version == RTL_GIGA_MAC_VER_50 ||
- tp->mac_version == RTL_GIGA_MAC_VER_51) &&
- r8168_check_dash(tp)) {
+ if (r8168_check_dash(tp))
rtl8168_driver_stop(tp);
- }
netif_napi_del(&tp->napi);
unsigned int region;
unsigned int align;
u16 event_slow;
- unsigned features;
+ unsigned int has_gmii:1;
const struct rtl_coalesce_info *coalesce_info;
u8 default_ver;
} rtl_cfg_infos [] = {
.region = 1,
.align = 0,
.event_slow = SYSErr | LinkChg | RxOverflow | RxFIFOOver,
- .features = RTL_FEATURE_GMII,
+ .has_gmii = 1,
.coalesce_info = rtl_coalesce_info_8169,
.default_ver = RTL_GIGA_MAC_VER_01,
},
.region = 2,
.align = 8,
.event_slow = SYSErr | LinkChg | RxOverflow,
- .features = RTL_FEATURE_GMII | RTL_FEATURE_MSI,
+ .has_gmii = 1,
.coalesce_info = rtl_coalesce_info_8168_8136,
.default_ver = RTL_GIGA_MAC_VER_11,
},
.align = 8,
.event_slow = SYSErr | LinkChg | RxOverflow | RxFIFOOver |
PCSTimeout,
- .features = RTL_FEATURE_MSI,
.coalesce_info = rtl_coalesce_info_8168_8136,
.default_ver = RTL_GIGA_MAC_VER_13,
}
};
-/* Cfg9346_Unlock assumed. */
-static unsigned rtl_try_msi(struct rtl8169_private *tp,
- const struct rtl_cfg_info *cfg)
+static int rtl_alloc_irq(struct rtl8169_private *tp)
{
- void __iomem *ioaddr = tp->mmio_addr;
- unsigned msi = 0;
- u8 cfg2;
+ unsigned int flags;
- cfg2 = RTL_R8(Config2) & ~MSIEnable;
- if (cfg->features & RTL_FEATURE_MSI) {
- if (pci_enable_msi(tp->pci_dev)) {
- netif_info(tp, hw, tp->dev, "no MSI. Back to INTx.\n");
- } else {
- cfg2 |= MSIEnable;
- msi = RTL_FEATURE_MSI;
- }
+ if (tp->mac_version <= RTL_GIGA_MAC_VER_06) {
+ RTL_W8(tp, Cfg9346, Cfg9346_Unlock);
+ RTL_W8(tp, Config2, RTL_R8(tp, Config2) & ~MSIEnable);
+ RTL_W8(tp, Cfg9346, Cfg9346_Lock);
+ flags = PCI_IRQ_LEGACY;
+ } else {
+ flags = PCI_IRQ_ALL_TYPES;
}
- if (tp->mac_version <= RTL_GIGA_MAC_VER_06)
- RTL_W8(Config2, cfg2);
- return msi;
+
+ return pci_alloc_irq_vectors(tp->pci_dev, 1, 1, flags);
}
DECLARE_RTL_COND(rtl_link_list_ready_cond)
{
- void __iomem *ioaddr = tp->mmio_addr;
-
- return RTL_R8(MCU) & LINK_LIST_RDY;
+ return RTL_R8(tp, MCU) & LINK_LIST_RDY;
}
DECLARE_RTL_COND(rtl_rxtx_empty_cond)
{
- void __iomem *ioaddr = tp->mmio_addr;
-
- return (RTL_R8(MCU) & RXTX_EMPTY) == RXTX_EMPTY;
+ return (RTL_R8(tp, MCU) & RXTX_EMPTY) == RXTX_EMPTY;
}
static void rtl_hw_init_8168g(struct rtl8169_private *tp)
{
- void __iomem *ioaddr = tp->mmio_addr;
u32 data;
tp->ocp_base = OCP_STD_PHY_BASE;
- RTL_W32(MISC, RTL_R32(MISC) | RXDV_GATED_EN);
+ RTL_W32(tp, MISC, RTL_R32(tp, MISC) | RXDV_GATED_EN);
if (!rtl_udelay_loop_wait_high(tp, &rtl_txcfg_empty_cond, 100, 42))
return;
if (!rtl_udelay_loop_wait_high(tp, &rtl_rxtx_empty_cond, 100, 42))
return;
- RTL_W8(ChipCmd, RTL_R8(ChipCmd) & ~(CmdTxEnb | CmdRxEnb));
+ RTL_W8(tp, ChipCmd, RTL_R8(tp, ChipCmd) & ~(CmdTxEnb | CmdRxEnb));
msleep(1);
- RTL_W8(MCU, RTL_R8(MCU) & ~NOW_IS_OOB);
+ RTL_W8(tp, MCU, RTL_R8(tp, MCU) & ~NOW_IS_OOB);
data = r8168_mac_ocp_read(tp, 0xe8de);
data &= ~(1 << 14);
struct rtl8169_private *tp;
struct mii_if_info *mii;
struct net_device *dev;
- void __iomem *ioaddr;
int chipset, i;
int rc;
mii->mdio_write = rtl_mdio_write;
mii->phy_id_mask = 0x1f;
mii->reg_num_mask = 0x1f;
- mii->supports_gmii = !!(cfg->features & RTL_FEATURE_GMII);
+ mii->supports_gmii = cfg->has_gmii;
/* disable ASPM completely as that cause random device stop working
* problems as well as full system hangs for some PCIe devices users */
return -ENODEV;
}
- rc = pci_request_regions(pdev, MODULENAME);
+ rc = pcim_iomap_regions(pdev, BIT(region), MODULENAME);
if (rc < 0) {
- netif_err(tp, probe, dev, "could not request regions\n");
+ netif_err(tp, probe, dev, "cannot remap MMIO, aborting\n");
return rc;
}
- /* ioremap MMIO region */
- ioaddr = devm_ioremap(&pdev->dev, pci_resource_start(pdev, region),
- R8169_REGS_SIZE);
- if (!ioaddr) {
- netif_err(tp, probe, dev, "cannot remap MMIO, aborting\n");
- return -EIO;
- }
- tp->mmio_addr = ioaddr;
+ tp->mmio_addr = pcim_iomap_table(pdev)[region];
if (!pci_is_pcie(pdev))
netif_info(tp, probe, dev, "not PCI Express\n");
chipset = tp->mac_version;
tp->txd_version = rtl_chip_infos[chipset].txd_version;
- RTL_W8(Cfg9346, Cfg9346_Unlock);
- RTL_W8(Config1, RTL_R8(Config1) | PMEnable);
- RTL_W8(Config5, RTL_R8(Config5) & (BWF | MWF | UWF | LanWake | PMEStatus));
- switch (tp->mac_version) {
- case RTL_GIGA_MAC_VER_34:
- case RTL_GIGA_MAC_VER_35:
- case RTL_GIGA_MAC_VER_36:
- case RTL_GIGA_MAC_VER_37:
- case RTL_GIGA_MAC_VER_38:
- case RTL_GIGA_MAC_VER_40:
- case RTL_GIGA_MAC_VER_41:
- case RTL_GIGA_MAC_VER_42:
- case RTL_GIGA_MAC_VER_43:
- case RTL_GIGA_MAC_VER_44:
- case RTL_GIGA_MAC_VER_45:
- case RTL_GIGA_MAC_VER_46:
- case RTL_GIGA_MAC_VER_47:
- case RTL_GIGA_MAC_VER_48:
- case RTL_GIGA_MAC_VER_49:
- case RTL_GIGA_MAC_VER_50:
- case RTL_GIGA_MAC_VER_51:
- if (rtl_eri_read(tp, 0xdc, ERIAR_EXGMAC) & MagicPacket_v2)
- tp->features |= RTL_FEATURE_WOL;
- if ((RTL_R8(Config3) & LinkUp) != 0)
- tp->features |= RTL_FEATURE_WOL;
- break;
- default:
- if ((RTL_R8(Config3) & (LinkUp | MagicPacket)) != 0)
- tp->features |= RTL_FEATURE_WOL;
- break;
+ rc = rtl_alloc_irq(tp);
+ if (rc < 0) {
+ netif_err(tp, probe, dev, "Can't allocate interrupt\n");
+ return rc;
}
- if ((RTL_R8(Config5) & (UWF | BWF | MWF)) != 0)
- tp->features |= RTL_FEATURE_WOL;
- tp->features |= rtl_try_msi(tp, cfg);
- RTL_W8(Cfg9346, Cfg9346_Lock);
+
+ /* override BIOS settings, use userspace tools to enable WOL */
+ __rtl8169_set_wol(tp, 0);
if (rtl_tbi_enabled(tp)) {
tp->set_speed = rtl8169_set_speed_tbi;
rtl_rar_set(tp, (u8 *)mac_addr);
}
for (i = 0; i < ETH_ALEN; i++)
- dev->dev_addr[i] = RTL_R8(MAC0 + i);
+ dev->dev_addr[i] = RTL_R8(tp, MAC0 + i);
dev->ethtool_ops = &rtl8169_ethtool_ops;
dev->watchdog_timeo = RTL8169_TX_TIMEOUT;
pci_set_drvdata(pdev, dev);
netif_info(tp, probe, dev, "%s at 0x%p, %pM, XID %08x IRQ %d\n",
- rtl_chip_infos[chipset].name, ioaddr, dev->dev_addr,
- (u32)(RTL_R32(TxConfig) & 0x9cf0f8ff), pdev->irq);
+ rtl_chip_infos[chipset].name, tp->mmio_addr, dev->dev_addr,
+ (u32)(RTL_R32(tp, TxConfig) & 0x9cf0f8ff),
+ pci_irq_vector(pdev, 0));
if (rtl_chip_infos[chipset].jumbo_max != JUMBO_1K) {
netif_info(tp, probe, dev, "jumbo features [frames: %d bytes, "
"tx checksumming: %s]\n",
rtl_chip_infos[chipset].jumbo_tx_csum ? "ok" : "ko");
}
- if ((tp->mac_version == RTL_GIGA_MAC_VER_27 ||
- tp->mac_version == RTL_GIGA_MAC_VER_28 ||
- tp->mac_version == RTL_GIGA_MAC_VER_31 ||
- tp->mac_version == RTL_GIGA_MAC_VER_49 ||
- tp->mac_version == RTL_GIGA_MAC_VER_50 ||
- tp->mac_version == RTL_GIGA_MAC_VER_51) &&
- r8168_check_dash(tp)) {
+ if (r8168_check_dash(tp))
rtl8168_driver_start(tp);
- }
netif_carrier_off(dev);
u32 dirty_rx[NUM_RX_QUEUE]; /* Producer ring indices */
u32 cur_tx[NUM_TX_QUEUE];
u32 dirty_tx[NUM_TX_QUEUE];
+ u32 rx_buf_sz; /* Based on MTU+slack. */
struct napi_struct napi[NUM_RX_QUEUE];
struct work_struct work;
/* MII transceiver section. */
le32_to_cpu(desc->dptr)))
dma_unmap_single(ndev->dev.parent,
le32_to_cpu(desc->dptr),
- PKT_BUF_SZ,
+ priv->rx_buf_sz,
DMA_FROM_DEVICE);
}
ring_size = sizeof(struct ravb_ex_rx_desc) *
for (i = 0; i < priv->num_rx_ring[q]; i++) {
/* RX descriptor */
rx_desc = &priv->rx_ring[q][i];
- rx_desc->ds_cc = cpu_to_le16(PKT_BUF_SZ);
+ rx_desc->ds_cc = cpu_to_le16(priv->rx_buf_sz);
dma_addr = dma_map_single(ndev->dev.parent, priv->rx_skb[q][i]->data,
- PKT_BUF_SZ,
+ priv->rx_buf_sz,
DMA_FROM_DEVICE);
/* We just set the data size to 0 for a failed mapping which
* should prevent DMA from happening...
int ring_size;
int i;
+ /* +16 gets room from the status from the card. */
+ priv->rx_buf_sz = (ndev->mtu <= 1492 ? PKT_BUF_SZ : ndev->mtu) +
+ ETH_HLEN + VLAN_HLEN;
+
/* Allocate RX and TX skb rings */
priv->rx_skb[q] = kcalloc(priv->num_rx_ring[q],
sizeof(*priv->rx_skb[q]), GFP_KERNEL);
goto error;
for (i = 0; i < priv->num_rx_ring[q]; i++) {
- skb = netdev_alloc_skb(ndev, PKT_BUF_SZ + RAVB_ALIGN - 1);
+ skb = netdev_alloc_skb(ndev, priv->rx_buf_sz + RAVB_ALIGN - 1);
if (!skb)
goto error;
ravb_set_buffer_align(skb);
skb = priv->rx_skb[q][entry];
priv->rx_skb[q][entry] = NULL;
dma_unmap_single(ndev->dev.parent, le32_to_cpu(desc->dptr),
- PKT_BUF_SZ,
+ priv->rx_buf_sz,
DMA_FROM_DEVICE);
get_ts &= (q == RAVB_NC) ?
RAVB_RXTSTAMP_TYPE_V2_L2_EVENT :
for (; priv->cur_rx[q] - priv->dirty_rx[q] > 0; priv->dirty_rx[q]++) {
entry = priv->dirty_rx[q] % priv->num_rx_ring[q];
desc = &priv->rx_ring[q][entry];
- desc->ds_cc = cpu_to_le16(PKT_BUF_SZ);
+ desc->ds_cc = cpu_to_le16(priv->rx_buf_sz);
if (!priv->rx_skb[q][entry]) {
skb = netdev_alloc_skb(ndev,
- PKT_BUF_SZ + RAVB_ALIGN - 1);
+ priv->rx_buf_sz +
+ RAVB_ALIGN - 1);
if (!skb)
break; /* Better luck next round. */
ravb_set_buffer_align(skb);
return phy_mii_ioctl(phydev, req, cmd);
}
+static int ravb_change_mtu(struct net_device *ndev, int new_mtu)
+{
+ if (netif_running(ndev))
+ return -EBUSY;
+
+ ndev->mtu = new_mtu;
+ netdev_update_features(ndev);
+
+ return 0;
+}
+
static void ravb_set_rx_csum(struct net_device *ndev, bool enable)
{
struct ravb_private *priv = netdev_priv(ndev);
.ndo_set_rx_mode = ravb_set_rx_mode,
.ndo_tx_timeout = ravb_tx_timeout,
.ndo_do_ioctl = ravb_do_ioctl,
+ .ndo_change_mtu = ravb_change_mtu,
.ndo_validate_addr = eth_validate_addr,
.ndo_set_mac_address = eth_mac_addr,
.ndo_set_features = ravb_set_features,
goto out_release;
}
+ ndev->max_mtu = 2048 - (ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN);
+ ndev->min_mtu = ETH_MIN_MTU;
+
/* Set function */
ndev->netdev_ops = &ravb_netdev_ops;
ndev->ethtool_ops = &ravb_ethtool_ops;
/* Enable MagicPacket */
ravb_modify(ndev, ECMR, ECMR_MPDE, ECMR_MPDE);
- /* Increased clock usage so device won't be suspended */
- clk_enable(priv->clk);
-
return enable_irq_wake(priv->emac_irq);
}
if (ret < 0)
return ret;
- /* Restore clock usage count */
- clk_disable(priv->clk);
-
return disable_irq_wake(priv->emac_irq);
}
#include <linux/slab.h>
#include <linux/ethtool.h>
#include <linux/if_vlan.h>
-#include <linux/clk.h>
#include <linux/sh_eth.h>
#include <linux/of_mdio.h>
[TSU_FWSL0] = 0x0030,
[TSU_FWSL1] = 0x0034,
[TSU_FWSLC] = 0x0038,
- [TSU_QTAG0] = 0x0040,
- [TSU_QTAG1] = 0x0044,
+ [TSU_QTAGM0] = 0x0040,
+ [TSU_QTAGM1] = 0x0044,
[TSU_FWSR] = 0x0050,
[TSU_FWINMK] = 0x0054,
[TSU_ADQT0] = 0x0048,
.rpadir = 1,
.rpadir_value = 2 << 16,
.rtrate = 1,
+ .dual_port = 1,
};
#define SH_GIGA_ETH_BASE 0xfee00000UL
.no_trimd = 1,
.no_ade = 1,
.tsu = 1,
+ .dual_port = 1,
};
/* SH7734 */
.tsu = 1,
.irq_flags = IRQF_SHARED,
.magic = 1,
+ .dual_port = 1,
};
static struct sh_eth_cpu_data sh7619_data = {
EESIPR_RRFIP | EESIPR_RTLFIP | EESIPR_RTSFIP |
EESIPR_PREIP | EESIPR_CERFIP,
.tsu = 1,
+ .dual_port = 1,
};
static void sh_eth_set_default_cpu_data(struct sh_eth_cpu_data *cd)
static int sh_eth_check_reset(struct net_device *ndev)
{
- int ret = 0;
- int cnt = 100;
+ int cnt;
- while (cnt > 0) {
+ for (cnt = 100; cnt > 0; cnt--) {
if (!(sh_eth_read(ndev, EDMR) & EDMR_SRST_GETHER))
- break;
+ return 0;
mdelay(1);
- cnt--;
}
- if (cnt <= 0) {
- netdev_err(ndev, "Device reset failed\n");
- ret = -ETIMEDOUT;
- }
- return ret;
+
+ netdev_err(ndev, "Device reset failed\n");
+ return -ETIMEDOUT;
}
static int sh_eth_reset(struct net_device *ndev)
add_tsu_reg(TSU_FWSL0);
add_tsu_reg(TSU_FWSL1);
add_tsu_reg(TSU_FWSLC);
- add_tsu_reg(TSU_QTAG0);
- add_tsu_reg(TSU_QTAG1);
add_tsu_reg(TSU_QTAGM0);
add_tsu_reg(TSU_QTAGM1);
add_tsu_reg(TSU_FWSR);
wol->supported = 0;
wol->wolopts = 0;
- if (mdp->cd->magic && mdp->clk) {
+ if (mdp->cd->magic) {
wol->supported = WAKE_MAGIC;
wol->wolopts = mdp->wol_enabled ? WAKE_MAGIC : 0;
}
{
struct sh_eth_private *mdp = netdev_priv(ndev);
- if (!mdp->cd->magic || !mdp->clk || wol->wolopts & ~WAKE_MAGIC)
+ if (!mdp->cd->magic || wol->wolopts & ~WAKE_MAGIC)
return -EOPNOTSUPP;
mdp->wol_enabled = !!(wol->wolopts & WAKE_MAGIC);
/* SuperH's TSU register init function */
static void sh_eth_tsu_init(struct sh_eth_private *mdp)
{
- if (sh_eth_is_rz_fast_ether(mdp)) {
+ if (!mdp->cd->dual_port) {
sh_eth_tsu_write(mdp, 0, TSU_TEN); /* Disable all CAM entry */
sh_eth_tsu_write(mdp, TSU_FWSLC_POSTENU | TSU_FWSLC_POSTENL,
TSU_FWSLC); /* Enable POST registers */
sh_eth_tsu_write(mdp, 0, TSU_FWSL0);
sh_eth_tsu_write(mdp, 0, TSU_FWSL1);
sh_eth_tsu_write(mdp, TSU_FWSLC_POSTENU | TSU_FWSLC_POSTENL, TSU_FWSLC);
- if (sh_eth_is_gether(mdp)) {
- sh_eth_tsu_write(mdp, 0, TSU_QTAG0); /* Disable QTAG(0->1) */
- sh_eth_tsu_write(mdp, 0, TSU_QTAG1); /* Disable QTAG(1->0) */
- } else {
- sh_eth_tsu_write(mdp, 0, TSU_QTAGM0); /* Disable QTAG(0->1) */
- sh_eth_tsu_write(mdp, 0, TSU_QTAGM1); /* Disable QTAG(1->0) */
- }
+ sh_eth_tsu_write(mdp, 0, TSU_QTAGM0); /* Disable QTAG(0->1) */
+ sh_eth_tsu_write(mdp, 0, TSU_QTAGM1); /* Disable QTAG(1->0) */
sh_eth_tsu_write(mdp, 0, TSU_FWSR); /* all interrupt status clear */
sh_eth_tsu_write(mdp, 0, TSU_FWINMK); /* Disable all interrupt */
sh_eth_tsu_write(mdp, 0, TSU_TEN); /* Disable all CAM entry */
goto out_release;
}
- /* Get clock, if not found that's OK but Wake-On-Lan is unavailable */
- mdp->clk = devm_clk_get(&pdev->dev, NULL);
- if (IS_ERR(mdp->clk))
- mdp->clk = NULL;
-
ndev->base_addr = res->start;
spin_lock_init(&mdp->lock);
if (ret)
goto out_napi_del;
- if (mdp->cd->magic && mdp->clk)
+ if (mdp->cd->magic)
device_set_wakeup_capable(&pdev->dev, 1);
/* print device information */
/* Enable MagicPacket */
sh_eth_modify(ndev, ECMR, ECMR_MPDE, ECMR_MPDE);
- /* Increased clock usage so device won't be suspended */
- clk_enable(mdp->clk);
-
return enable_irq_wake(ndev->irq);
}
if (ret < 0)
return ret;
- /* Restore clock usage count */
- clk_disable(mdp->clk);
-
return disable_irq_wake(ndev->irq);
}
TSU_FWSL0,
TSU_FWSL1,
TSU_FWSLC,
- TSU_QTAG0,
- TSU_QTAG1,
+ TSU_QTAG0, /* Same as TSU_QTAGM0 */
+ TSU_QTAG1, /* Same as TSU_QTAGM1 */
TSU_QTAGM0,
TSU_QTAGM1,
TSU_FWSR,
unsigned rmiimode:1; /* EtherC has RMIIMODE register */
unsigned rtrate:1; /* EtherC has RTRATE register */
unsigned magic:1; /* EtherC has ECMR.MPDE and ECSR.MPD */
+ unsigned dual_port:1; /* Dual EtherC/E-DMAC */
};
struct sh_eth_private {
(1 << LOOPBACK_XAUI) | \
(1 << LOOPBACK_GMII) | \
(1 << LOOPBACK_SGMII) | \
- (1 << LOOPBACK_SGMII) | \
(1 << LOOPBACK_XGBR) | \
(1 << LOOPBACK_XFI) | \
(1 << LOOPBACK_XAUI_FAR) | \
config SMC9194
tristate "SMC 9194 support"
- depends on (ISA || MAC && BROKEN)
+ depends on ISA
select CRC32
---help---
This is support for the SMC9xxx based Ethernet cards. Choose this
#define MUX_CLK_NUM_PARENTS 2
struct meson8b_dwmac {
- struct platform_device *pdev;
-
+ struct device *dev;
void __iomem *regs;
-
phy_interface_t phy_mode;
+ struct clk *rgmii_tx_clk;
+ u32 tx_delay_ns;
+};
+struct meson8b_dwmac_clk_configs {
struct clk_mux m250_mux;
- struct clk *m250_mux_clk;
- struct clk *m250_mux_parent[MUX_CLK_NUM_PARENTS];
-
struct clk_divider m250_div;
- struct clk *m250_div_clk;
-
struct clk_fixed_factor fixed_div2;
- struct clk *fixed_div2_clk;
-
struct clk_gate rgmii_tx_en;
- struct clk *rgmii_tx_en_clk;
-
- u32 tx_delay_ns;
};
static void meson8b_dwmac_mask_bits(struct meson8b_dwmac *dwmac, u32 reg,
writel(data, dwmac->regs + reg);
}
-static int meson8b_init_rgmii_tx_clk(struct meson8b_dwmac *dwmac)
+static struct clk *meson8b_dwmac_register_clk(struct meson8b_dwmac *dwmac,
+ const char *name_suffix,
+ const char **parent_names,
+ int num_parents,
+ const struct clk_ops *ops,
+ struct clk_hw *hw)
{
struct clk_init_data init;
- int i, ret;
- struct device *dev = &dwmac->pdev->dev;
char clk_name[32];
- const char *clk_div_parents[1];
- const char *mux_parent_names[MUX_CLK_NUM_PARENTS];
+
+ snprintf(clk_name, sizeof(clk_name), "%s#%s", dev_name(dwmac->dev),
+ name_suffix);
+
+ init.name = clk_name;
+ init.ops = ops;
+ init.flags = CLK_SET_RATE_PARENT;
+ init.parent_names = parent_names;
+ init.num_parents = num_parents;
+
+ hw->init = &init;
+
+ return devm_clk_register(dwmac->dev, hw);
+}
+
+static int meson8b_init_rgmii_tx_clk(struct meson8b_dwmac *dwmac)
+{
+ int i, ret;
+ struct clk *clk;
+ struct device *dev = dwmac->dev;
+ const char *parent_name, *mux_parent_names[MUX_CLK_NUM_PARENTS];
+ struct meson8b_dwmac_clk_configs *clk_configs;
+
+ clk_configs = devm_kzalloc(dev, sizeof(*clk_configs), GFP_KERNEL);
+ if (!clk_configs)
+ return -ENOMEM;
/* get the mux parents from DT */
for (i = 0; i < MUX_CLK_NUM_PARENTS; i++) {
char name[16];
snprintf(name, sizeof(name), "clkin%d", i);
- dwmac->m250_mux_parent[i] = devm_clk_get(dev, name);
- if (IS_ERR(dwmac->m250_mux_parent[i])) {
- ret = PTR_ERR(dwmac->m250_mux_parent[i]);
+ clk = devm_clk_get(dev, name);
+ if (IS_ERR(clk)) {
+ ret = PTR_ERR(clk);
if (ret != -EPROBE_DEFER)
dev_err(dev, "Missing clock %s\n", name);
return ret;
}
- mux_parent_names[i] =
- __clk_get_name(dwmac->m250_mux_parent[i]);
+ mux_parent_names[i] = __clk_get_name(clk);
}
- /* create the m250_mux */
- snprintf(clk_name, sizeof(clk_name), "%s#m250_sel", dev_name(dev));
- init.name = clk_name;
- init.ops = &clk_mux_ops;
- init.flags = CLK_SET_RATE_PARENT;
- init.parent_names = mux_parent_names;
- init.num_parents = MUX_CLK_NUM_PARENTS;
-
- dwmac->m250_mux.reg = dwmac->regs + PRG_ETH0;
- dwmac->m250_mux.shift = PRG_ETH0_CLK_M250_SEL_SHIFT;
- dwmac->m250_mux.mask = PRG_ETH0_CLK_M250_SEL_MASK;
- dwmac->m250_mux.flags = 0;
- dwmac->m250_mux.table = NULL;
- dwmac->m250_mux.hw.init = &init;
-
- dwmac->m250_mux_clk = devm_clk_register(dev, &dwmac->m250_mux.hw);
- if (WARN_ON(IS_ERR(dwmac->m250_mux_clk)))
- return PTR_ERR(dwmac->m250_mux_clk);
-
- /* create the m250_div */
- snprintf(clk_name, sizeof(clk_name), "%s#m250_div", dev_name(dev));
- init.name = devm_kstrdup(dev, clk_name, GFP_KERNEL);
- init.ops = &clk_divider_ops;
- init.flags = CLK_SET_RATE_PARENT;
- clk_div_parents[0] = __clk_get_name(dwmac->m250_mux_clk);
- init.parent_names = clk_div_parents;
- init.num_parents = ARRAY_SIZE(clk_div_parents);
-
- dwmac->m250_div.reg = dwmac->regs + PRG_ETH0;
- dwmac->m250_div.shift = PRG_ETH0_CLK_M250_DIV_SHIFT;
- dwmac->m250_div.width = PRG_ETH0_CLK_M250_DIV_WIDTH;
- dwmac->m250_div.hw.init = &init;
- dwmac->m250_div.flags = CLK_DIVIDER_ONE_BASED |
+ clk_configs->m250_mux.reg = dwmac->regs + PRG_ETH0;
+ clk_configs->m250_mux.shift = PRG_ETH0_CLK_M250_SEL_SHIFT;
+ clk_configs->m250_mux.mask = PRG_ETH0_CLK_M250_SEL_MASK;
+ clk = meson8b_dwmac_register_clk(dwmac, "m250_sel", mux_parent_names,
+ MUX_CLK_NUM_PARENTS, &clk_mux_ops,
+ &clk_configs->m250_mux.hw);
+ if (WARN_ON(IS_ERR(clk)))
+ return PTR_ERR(clk);
+
+ parent_name = __clk_get_name(clk);
+ clk_configs->m250_div.reg = dwmac->regs + PRG_ETH0;
+ clk_configs->m250_div.shift = PRG_ETH0_CLK_M250_DIV_SHIFT;
+ clk_configs->m250_div.width = PRG_ETH0_CLK_M250_DIV_WIDTH;
+ clk_configs->m250_div.flags = CLK_DIVIDER_ONE_BASED |
CLK_DIVIDER_ALLOW_ZERO |
CLK_DIVIDER_ROUND_CLOSEST;
-
- dwmac->m250_div_clk = devm_clk_register(dev, &dwmac->m250_div.hw);
- if (WARN_ON(IS_ERR(dwmac->m250_div_clk)))
- return PTR_ERR(dwmac->m250_div_clk);
-
- /* create the fixed_div2 */
- snprintf(clk_name, sizeof(clk_name), "%s#fixed_div2", dev_name(dev));
- init.name = devm_kstrdup(dev, clk_name, GFP_KERNEL);
- init.ops = &clk_fixed_factor_ops;
- init.flags = CLK_SET_RATE_PARENT;
- clk_div_parents[0] = __clk_get_name(dwmac->m250_div_clk);
- init.parent_names = clk_div_parents;
- init.num_parents = ARRAY_SIZE(clk_div_parents);
-
- dwmac->fixed_div2.mult = 1;
- dwmac->fixed_div2.div = 2;
- dwmac->fixed_div2.hw.init = &init;
-
- dwmac->fixed_div2_clk = devm_clk_register(dev, &dwmac->fixed_div2.hw);
- if (WARN_ON(IS_ERR(dwmac->fixed_div2_clk)))
- return PTR_ERR(dwmac->fixed_div2_clk);
-
- /* create the rgmii_tx_en */
- init.name = devm_kasprintf(dev, GFP_KERNEL, "%s#rgmii_tx_en",
- dev_name(dev));
- init.ops = &clk_gate_ops;
- init.flags = CLK_SET_RATE_PARENT;
- clk_div_parents[0] = __clk_get_name(dwmac->fixed_div2_clk);
- init.parent_names = clk_div_parents;
- init.num_parents = ARRAY_SIZE(clk_div_parents);
-
- dwmac->rgmii_tx_en.reg = dwmac->regs + PRG_ETH0;
- dwmac->rgmii_tx_en.bit_idx = PRG_ETH0_RGMII_TX_CLK_EN;
- dwmac->rgmii_tx_en.hw.init = &init;
-
- dwmac->rgmii_tx_en_clk = devm_clk_register(dev,
- &dwmac->rgmii_tx_en.hw);
- if (WARN_ON(IS_ERR(dwmac->rgmii_tx_en_clk)))
- return PTR_ERR(dwmac->rgmii_tx_en_clk);
+ clk = meson8b_dwmac_register_clk(dwmac, "m250_div", &parent_name, 1,
+ &clk_divider_ops,
+ &clk_configs->m250_div.hw);
+ if (WARN_ON(IS_ERR(clk)))
+ return PTR_ERR(clk);
+
+ parent_name = __clk_get_name(clk);
+ clk_configs->fixed_div2.mult = 1;
+ clk_configs->fixed_div2.div = 2;
+ clk = meson8b_dwmac_register_clk(dwmac, "fixed_div2", &parent_name, 1,
+ &clk_fixed_factor_ops,
+ &clk_configs->fixed_div2.hw);
+ if (WARN_ON(IS_ERR(clk)))
+ return PTR_ERR(clk);
+
+ parent_name = __clk_get_name(clk);
+ clk_configs->rgmii_tx_en.reg = dwmac->regs + PRG_ETH0;
+ clk_configs->rgmii_tx_en.bit_idx = PRG_ETH0_RGMII_TX_CLK_EN;
+ clk = meson8b_dwmac_register_clk(dwmac, "rgmii_tx_en", &parent_name, 1,
+ &clk_gate_ops,
+ &clk_configs->rgmii_tx_en.hw);
+ if (WARN_ON(IS_ERR(clk)))
+ return PTR_ERR(clk);
+
+ dwmac->rgmii_tx_clk = clk;
return 0;
}
* a register) based on the line-speed (125MHz for Gbit speeds,
* 25MHz for 100Mbit/s and 2.5MHz for 10Mbit/s).
*/
- ret = clk_set_rate(dwmac->rgmii_tx_en_clk, 125 * 1000 * 1000);
+ ret = clk_set_rate(dwmac->rgmii_tx_clk, 125 * 1000 * 1000);
if (ret) {
- dev_err(&dwmac->pdev->dev,
+ dev_err(dwmac->dev,
"failed to set RGMII TX clock\n");
return ret;
}
- ret = clk_prepare_enable(dwmac->rgmii_tx_en_clk);
+ ret = clk_prepare_enable(dwmac->rgmii_tx_clk);
if (ret) {
- dev_err(&dwmac->pdev->dev,
+ dev_err(dwmac->dev,
"failed to enable the RGMII TX clock\n");
return ret;
}
+
+ devm_add_action_or_reset(dwmac->dev,
+ (void(*)(void *))clk_disable_unprepare,
+ dwmac->rgmii_tx_clk);
break;
case PHY_INTERFACE_MODE_RMII:
break;
default:
- dev_err(&dwmac->pdev->dev, "unsupported phy-mode %s\n",
+ dev_err(dwmac->dev, "unsupported phy-mode %s\n",
phy_modes(dwmac->phy_mode));
return -EINVAL;
}
goto err_remove_config_dt;
}
- dwmac->pdev = pdev;
+ dwmac->dev = &pdev->dev;
dwmac->phy_mode = of_get_phy_mode(pdev->dev.of_node);
if (dwmac->phy_mode < 0) {
dev_err(&pdev->dev, "missing phy-mode property\n");
ret = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res);
if (ret)
- goto err_clk_disable;
+ goto err_remove_config_dt;
return 0;
-err_clk_disable:
- if (phy_interface_mode_is_rgmii(dwmac->phy_mode))
- clk_disable_unprepare(dwmac->rgmii_tx_en_clk);
err_remove_config_dt:
stmmac_remove_config_dt(pdev, plat_dat);
return ret;
}
-static int meson8b_dwmac_remove(struct platform_device *pdev)
-{
- struct meson8b_dwmac *dwmac = get_stmmac_bsp_priv(&pdev->dev);
-
- if (phy_interface_mode_is_rgmii(dwmac->phy_mode))
- clk_disable_unprepare(dwmac->rgmii_tx_en_clk);
-
- return stmmac_pltfr_remove(pdev);
-}
-
static const struct of_device_id meson8b_dwmac_match[] = {
{ .compatible = "amlogic,meson8b-dwmac" },
{ .compatible = "amlogic,meson-gxbb-dwmac" },
static struct platform_driver meson8b_dwmac_driver = {
.probe = meson8b_dwmac_probe,
- .remove = meson8b_dwmac_remove,
+ .remove = stmmac_pltfr_remove,
.driver = {
.name = "meson8b-dwmac",
.pm = &stmmac_pltfr_pm_ops,
writel(value, ioaddr + base_register);
}
-static void dwmac4_tx_queue_routing(struct mac_device_info *hw,
+static void dwmac4_rx_queue_routing(struct mac_device_info *hw,
u8 packet, u32 queue)
{
void __iomem *ioaddr = hw->pcsr;
.rx_queue_enable = dwmac4_rx_queue_enable,
.rx_queue_prio = dwmac4_rx_queue_priority,
.tx_queue_prio = dwmac4_tx_queue_priority,
- .rx_queue_routing = dwmac4_tx_queue_routing,
+ .rx_queue_routing = dwmac4_rx_queue_routing,
.prog_mtl_rx_algorithms = dwmac4_prog_mtl_rx_algorithms,
.prog_mtl_tx_algorithms = dwmac4_prog_mtl_tx_algorithms,
.set_mtl_tx_queue_weight = dwmac4_set_mtl_tx_queue_weight,
.rx_queue_enable = dwmac4_rx_queue_enable,
.rx_queue_prio = dwmac4_rx_queue_priority,
.tx_queue_prio = dwmac4_tx_queue_priority,
- .rx_queue_routing = dwmac4_tx_queue_routing,
+ .rx_queue_routing = dwmac4_rx_queue_routing,
.prog_mtl_rx_algorithms = dwmac4_prog_mtl_rx_algorithms,
.prog_mtl_tx_algorithms = dwmac4_prog_mtl_tx_algorithms,
.set_mtl_tx_queue_weight = dwmac4_set_mtl_tx_queue_weight,
static void dwmac4_release_tx_desc(struct dma_desc *p, int mode)
{
+ p->des0 = 0;
+ p->des1 = 0;
p->des2 = 0;
p->des3 = 0;
}
unsigned int dirty_tx;
dma_addr_t dma_tx_phy;
u32 tx_tail_addr;
+ u32 mss;
};
struct stmmac_rx_queue {
spinlock_t ptp_lock;
void __iomem *mmcaddr;
void __iomem *ptpaddr;
- u32 mss;
#ifdef CONFIG_DEBUG_FS
struct dentry *dbgfs_dir;
tx_q->dirty_tx = 0;
tx_q->cur_tx = 0;
+ tx_q->mss = 0;
netdev_tx_reset_queue(netdev_get_tx_queue(priv->dev, queue));
}
if (unlikely(status & tx_dma_own))
break;
+ /* Make sure descriptor fields are read after reading
+ * the own bit.
+ */
+ dma_rmb();
+
/* Just consider the last segment and ...*/
if (likely(!(status & tx_not_ls))) {
/* ... verify the status error condition */
(i == DMA_TX_SIZE - 1));
tx_q->dirty_tx = 0;
tx_q->cur_tx = 0;
+ tx_q->mss = 0;
netdev_tx_reset_queue(netdev_get_tx_queue(priv->dev, chan));
stmmac_start_tx_dma(priv, chan);
continue;
packet = priv->plat->rx_queues_cfg[queue].pkt_route;
- priv->hw->mac->rx_queue_prio(priv->hw, packet, queue);
+ priv->hw->mac->rx_queue_routing(priv->hw, packet, queue);
}
}
priv->dma_buf_sz = STMMAC_ALIGN(buf_sz);
priv->rx_copybreak = STMMAC_RX_COPYBREAK;
- priv->mss = 0;
ret = alloc_dma_desc_resources(priv);
if (ret < 0) {
while (tmp_len > 0) {
tx_q->cur_tx = STMMAC_GET_ENTRY(tx_q->cur_tx, DMA_TX_SIZE);
+ WARN_ON(tx_q->tx_skbuff[tx_q->cur_tx]);
desc = tx_q->dma_tx + tx_q->cur_tx;
desc->des0 = cpu_to_le32(des + (total_len - tmp_len));
mss = skb_shinfo(skb)->gso_size;
/* set new MSS value if needed */
- if (mss != priv->mss) {
+ if (mss != tx_q->mss) {
mss_desc = tx_q->dma_tx + tx_q->cur_tx;
priv->hw->desc->set_mss(mss_desc, mss);
- priv->mss = mss;
+ tx_q->mss = mss;
tx_q->cur_tx = STMMAC_GET_ENTRY(tx_q->cur_tx, DMA_TX_SIZE);
+ WARN_ON(tx_q->tx_skbuff[tx_q->cur_tx]);
}
if (netif_msg_tx_queued(priv)) {
}
first_entry = tx_q->cur_tx;
+ WARN_ON(tx_q->tx_skbuff[first_entry]);
desc = tx_q->dma_tx + first_entry;
first = desc;
tx_q->tx_skbuff_dma[tx_q->cur_tx].buf = des;
tx_q->tx_skbuff_dma[tx_q->cur_tx].len = skb_frag_size(frag);
- tx_q->tx_skbuff[tx_q->cur_tx] = NULL;
tx_q->tx_skbuff_dma[tx_q->cur_tx].map_as_page = true;
}
tcp_hdrlen(skb) / 4, (skb->len - proto_hdr_len));
/* If context desc is used to change MSS */
- if (mss_desc)
+ if (mss_desc) {
+ /* Make sure that first descriptor has been completely
+ * written, including its own bit. This is because MSS is
+ * actually before first descriptor, so we need to make
+ * sure that MSS's own bit is the last thing written.
+ */
+ dma_wmb();
priv->hw->desc->set_tx_owner(mss_desc);
+ }
/* The own bit must be the latest setting done when prepare the
* descriptor and then barrier is needed to make sure that
* all is coherent before granting the DMA engine.
*/
- dma_wmb();
+ wmb();
if (netif_msg_pktdata(priv)) {
pr_info("%s: curr=%d dirty=%d f=%d, e=%d, f_p=%p, nfrags %d\n",
entry = tx_q->cur_tx;
first_entry = entry;
+ WARN_ON(tx_q->tx_skbuff[first_entry]);
csum_insertion = (skb->ip_summed == CHECKSUM_PARTIAL);
bool last_segment = (i == (nfrags - 1));
entry = STMMAC_GET_ENTRY(entry, DMA_TX_SIZE);
+ WARN_ON(tx_q->tx_skbuff[entry]);
if (likely(priv->extend_desc))
desc = (struct dma_desc *)(tx_q->dma_etx + entry);
if (dma_mapping_error(priv->device, des))
goto dma_map_err; /* should reuse desc w/o issues */
- tx_q->tx_skbuff[entry] = NULL;
-
tx_q->tx_skbuff_dma[entry].buf = des;
if (unlikely(priv->synopsys_id >= DWMAC_CORE_4_00))
desc->des0 = cpu_to_le32(des);
* descriptor and then barrier is needed to make sure that
* all is coherent before granting the DMA engine.
*/
- dma_wmb();
+ wmb();
}
netdev_tx_sent_queue(netdev_get_tx_queue(dev, queue), skb->len);
tx_q->cur_tx = 0;
tx_q->dirty_tx = 0;
+ tx_q->mss = 0;
}
}
stmmac_reset_queues_param(priv);
- /* reset private mss value to force mss context settings at
- * next tso xmit (only used for gmac4).
- */
- priv->mss = 0;
-
stmmac_clear_descriptors(priv);
stmmac_hw_setup(ndev, false);
* stmmac_mtl_setup - parse DT parameters for multiple queues configuration
* @pdev: platform device
*/
-static void stmmac_mtl_setup(struct platform_device *pdev,
- struct plat_stmmacenet_data *plat)
+static int stmmac_mtl_setup(struct platform_device *pdev,
+ struct plat_stmmacenet_data *plat)
{
struct device_node *q_node;
struct device_node *rx_node;
struct device_node *tx_node;
u8 queue = 0;
+ int ret = 0;
/* For backwards-compatibility with device trees that don't have any
* snps,mtl-rx-config or snps,mtl-tx-config properties, we fall back
rx_node = of_parse_phandle(pdev->dev.of_node, "snps,mtl-rx-config", 0);
if (!rx_node)
- return;
+ return ret;
tx_node = of_parse_phandle(pdev->dev.of_node, "snps,mtl-tx-config", 0);
if (!tx_node) {
of_node_put(rx_node);
- return;
+ return ret;
}
/* Processing RX queues common config */
queue++;
}
+ if (queue != plat->rx_queues_to_use) {
+ ret = -EINVAL;
+ dev_err(&pdev->dev, "Not all RX queues were configured\n");
+ goto out;
+ }
/* Processing TX queues common config */
if (of_property_read_u32(tx_node, "snps,tx-queues-to-use",
queue++;
}
+ if (queue != plat->tx_queues_to_use) {
+ ret = -EINVAL;
+ dev_err(&pdev->dev, "Not all TX queues were configured\n");
+ goto out;
+ }
+out:
of_node_put(rx_node);
of_node_put(tx_node);
of_node_put(q_node);
+
+ return ret;
}
/**
struct device_node *np = pdev->dev.of_node;
struct plat_stmmacenet_data *plat;
struct stmmac_dma_cfg *dma_cfg;
+ int rc;
plat = devm_kzalloc(&pdev->dev, sizeof(*plat), GFP_KERNEL);
if (!plat)
dev_warn(&pdev->dev, "snps,phy-addr property is deprecated\n");
/* To Configure PHY by using all device-tree supported properties */
- if (stmmac_dt_phy(plat, np, &pdev->dev))
- return ERR_PTR(-ENODEV);
+ rc = stmmac_dt_phy(plat, np, &pdev->dev);
+ if (rc)
+ return ERR_PTR(rc);
of_property_read_u32(np, "tx-fifo-depth", &plat->tx_fifo_size);
plat->axi = stmmac_axi_setup(pdev);
- stmmac_mtl_setup(pdev, plat);
+ rc = stmmac_mtl_setup(pdev, plat);
+ if (rc) {
+ stmmac_remove_config_dt(pdev, plat);
+ return ERR_PTR(rc);
+ }
/* clock setup */
plat->stmmac_clk = devm_clk_get(&pdev->dev,
.exit_batch = geneve_exit_batch_net,
.id = &geneve_net_id,
.size = sizeof(struct geneve_net),
+ .async = true,
};
static int __init geneve_init_module(void)
.exit = gtp_net_exit,
.id = >p_net_id,
.size = sizeof(struct gtp_net),
+ .async = true,
};
static int __init gtp_init(void)
exposes a debugfs node for each CA8210 instance which allows
direct use of the Cascoda API, exposing the 802.15.4 MAC
management entities.
+
+config IEEE802154_MCR20A
+ tristate "MCR20A transceiver driver"
+ depends on IEEE802154_DRIVERS && MAC802154
+ depends on SPI
+ ---help---
+ Say Y here to enable the MCR20A SPI 802.15.4 wireless
+ controller.
+
+ This driver can also be built as a module. To do so, say M here.
+ the module will be called 'mcr20a'.
obj-$(CONFIG_IEEE802154_ATUSB) += atusb.o
obj-$(CONFIG_IEEE802154_ADF7242) += adf7242.o
obj-$(CONFIG_IEEE802154_CA8210) += ca8210.o
+obj-$(CONFIG_IEEE802154_MCR20A) += mcr20a.o
--- /dev/null
+/*
+ * Driver for NXP MCR20A 802.15.4 Wireless-PAN Networking controller
+ *
+ * Copyright (C) 2018 Xue Liu <liuxuenetmail@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/gpio.h>
+#include <linux/spi/spi.h>
+#include <linux/workqueue.h>
+#include <linux/interrupt.h>
+#include <linux/skbuff.h>
+#include <linux/of_gpio.h>
+#include <linux/regmap.h>
+#include <linux/ieee802154.h>
+#include <linux/debugfs.h>
+
+#include <net/mac802154.h>
+#include <net/cfg802154.h>
+
+#include <linux/device.h>
+
+#include "mcr20a.h"
+
+#define SPI_COMMAND_BUFFER 3
+
+#define REGISTER_READ BIT(7)
+#define REGISTER_WRITE (0 << 7)
+#define REGISTER_ACCESS (0 << 6)
+#define PACKET_BUFF_BURST_ACCESS BIT(6)
+#define PACKET_BUFF_BYTE_ACCESS BIT(5)
+
+#define MCR20A_WRITE_REG(x) (x)
+#define MCR20A_READ_REG(x) (REGISTER_READ | (x))
+#define MCR20A_BURST_READ_PACKET_BUF (0xC0)
+#define MCR20A_BURST_WRITE_PACKET_BUF (0x40)
+
+#define MCR20A_CMD_REG 0x80
+#define MCR20A_CMD_REG_MASK 0x3f
+#define MCR20A_CMD_WRITE 0x40
+#define MCR20A_CMD_FB 0x20
+
+/* Number of Interrupt Request Status Register */
+#define MCR20A_IRQSTS_NUM 2 /* only IRQ_STS1 and IRQ_STS2 */
+
+/* MCR20A CCA Type */
+enum {
+ MCR20A_CCA_ED, // energy detect - CCA bit not active,
+ // not to be used for T and CCCA sequences
+ MCR20A_CCA_MODE1, // energy detect - CCA bit ACTIVE
+ MCR20A_CCA_MODE2, // 802.15.4 compliant signal detect - CCA bit ACTIVE
+ MCR20A_CCA_MODE3
+};
+
+enum {
+ MCR20A_XCVSEQ_IDLE = 0x00,
+ MCR20A_XCVSEQ_RX = 0x01,
+ MCR20A_XCVSEQ_TX = 0x02,
+ MCR20A_XCVSEQ_CCA = 0x03,
+ MCR20A_XCVSEQ_TR = 0x04,
+ MCR20A_XCVSEQ_CCCA = 0x05,
+};
+
+/* IEEE-802.15.4 defined constants (2.4 GHz logical channels) */
+#define MCR20A_MIN_CHANNEL (11)
+#define MCR20A_MAX_CHANNEL (26)
+#define MCR20A_CHANNEL_SPACING (5)
+
+/* MCR20A CCA Threshold constans */
+#define MCR20A_MIN_CCA_THRESHOLD (0x6EU)
+#define MCR20A_MAX_CCA_THRESHOLD (0x00U)
+
+/* version 0C */
+#define MCR20A_OVERWRITE_VERSION (0x0C)
+
+/* MCR20A PLL configurations */
+static const u8 PLL_INT[16] = {
+ /* 2405 */ 0x0B, /* 2410 */ 0x0B, /* 2415 */ 0x0B,
+ /* 2420 */ 0x0B, /* 2425 */ 0x0B, /* 2430 */ 0x0B,
+ /* 2435 */ 0x0C, /* 2440 */ 0x0C, /* 2445 */ 0x0C,
+ /* 2450 */ 0x0C, /* 2455 */ 0x0C, /* 2460 */ 0x0C,
+ /* 2465 */ 0x0D, /* 2470 */ 0x0D, /* 2475 */ 0x0D,
+ /* 2480 */ 0x0D
+};
+
+static const u8 PLL_FRAC[16] = {
+ /* 2405 */ 0x28, /* 2410 */ 0x50, /* 2415 */ 0x78,
+ /* 2420 */ 0xA0, /* 2425 */ 0xC8, /* 2430 */ 0xF0,
+ /* 2435 */ 0x18, /* 2440 */ 0x40, /* 2445 */ 0x68,
+ /* 2450 */ 0x90, /* 2455 */ 0xB8, /* 2460 */ 0xE0,
+ /* 2465 */ 0x08, /* 2470 */ 0x30, /* 2475 */ 0x58,
+ /* 2480 */ 0x80
+};
+
+static const struct reg_sequence mar20a_iar_overwrites[] = {
+ { IAR_MISC_PAD_CTRL, 0x02 },
+ { IAR_VCO_CTRL1, 0xB3 },
+ { IAR_VCO_CTRL2, 0x07 },
+ { IAR_PA_TUNING, 0x71 },
+ { IAR_CHF_IBUF, 0x2F },
+ { IAR_CHF_QBUF, 0x2F },
+ { IAR_CHF_IRIN, 0x24 },
+ { IAR_CHF_QRIN, 0x24 },
+ { IAR_CHF_IL, 0x24 },
+ { IAR_CHF_QL, 0x24 },
+ { IAR_CHF_CC1, 0x32 },
+ { IAR_CHF_CCL, 0x1D },
+ { IAR_CHF_CC2, 0x2D },
+ { IAR_CHF_IROUT, 0x24 },
+ { IAR_CHF_QROUT, 0x24 },
+ { IAR_PA_CAL, 0x28 },
+ { IAR_AGC_THR1, 0x55 },
+ { IAR_AGC_THR2, 0x2D },
+ { IAR_ATT_RSSI1, 0x5F },
+ { IAR_ATT_RSSI2, 0x8F },
+ { IAR_RSSI_OFFSET, 0x61 },
+ { IAR_CHF_PMA_GAIN, 0x03 },
+ { IAR_CCA1_THRESH, 0x50 },
+ { IAR_CORR_NVAL, 0x13 },
+ { IAR_ACKDELAY, 0x3D },
+};
+
+#define MCR20A_VALID_CHANNELS (0x07FFF800)
+
+struct mcr20a_platform_data {
+ int rst_gpio;
+};
+
+#define MCR20A_MAX_BUF (127)
+
+#define printdev(X) (&X->spi->dev)
+
+/* regmap information for Direct Access Register (DAR) access */
+#define MCR20A_DAR_WRITE 0x01
+#define MCR20A_DAR_READ 0x00
+#define MCR20A_DAR_NUMREGS 0x3F
+
+/* regmap information for Indirect Access Register (IAR) access */
+#define MCR20A_IAR_ACCESS 0x80
+#define MCR20A_IAR_NUMREGS 0xBEFF
+
+/* Read/Write SPI Commands for DAR and IAR registers. */
+#define MCR20A_READSHORT(reg) ((reg) << 1)
+#define MCR20A_WRITESHORT(reg) ((reg) << 1 | 1)
+#define MCR20A_READLONG(reg) (1 << 15 | (reg) << 5)
+#define MCR20A_WRITELONG(reg) (1 << 15 | (reg) << 5 | 1 << 4)
+
+/* Type definitions for link configuration of instantiable layers */
+#define MCR20A_PHY_INDIRECT_QUEUE_SIZE (12)
+
+static bool
+mcr20a_dar_writeable(struct device *dev, unsigned int reg)
+{
+ switch (reg) {
+ case DAR_IRQ_STS1:
+ case DAR_IRQ_STS2:
+ case DAR_IRQ_STS3:
+ case DAR_PHY_CTRL1:
+ case DAR_PHY_CTRL2:
+ case DAR_PHY_CTRL3:
+ case DAR_PHY_CTRL4:
+ case DAR_SRC_CTRL:
+ case DAR_SRC_ADDRS_SUM_LSB:
+ case DAR_SRC_ADDRS_SUM_MSB:
+ case DAR_T3CMP_LSB:
+ case DAR_T3CMP_MSB:
+ case DAR_T3CMP_USB:
+ case DAR_T2PRIMECMP_LSB:
+ case DAR_T2PRIMECMP_MSB:
+ case DAR_T1CMP_LSB:
+ case DAR_T1CMP_MSB:
+ case DAR_T1CMP_USB:
+ case DAR_T2CMP_LSB:
+ case DAR_T2CMP_MSB:
+ case DAR_T2CMP_USB:
+ case DAR_T4CMP_LSB:
+ case DAR_T4CMP_MSB:
+ case DAR_T4CMP_USB:
+ case DAR_PLL_INT0:
+ case DAR_PLL_FRAC0_LSB:
+ case DAR_PLL_FRAC0_MSB:
+ case DAR_PA_PWR:
+ /* no DAR_ACM */
+ case DAR_OVERWRITE_VER:
+ case DAR_CLK_OUT_CTRL:
+ case DAR_PWR_MODES:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static bool
+mcr20a_dar_readable(struct device *dev, unsigned int reg)
+{
+ bool rc;
+
+ /* all writeable are also readable */
+ rc = mcr20a_dar_writeable(dev, reg);
+ if (rc)
+ return rc;
+
+ /* readonly regs */
+ switch (reg) {
+ case DAR_RX_FRM_LEN:
+ case DAR_CCA1_ED_FNL:
+ case DAR_EVENT_TMR_LSB:
+ case DAR_EVENT_TMR_MSB:
+ case DAR_EVENT_TMR_USB:
+ case DAR_TIMESTAMP_LSB:
+ case DAR_TIMESTAMP_MSB:
+ case DAR_TIMESTAMP_USB:
+ case DAR_SEQ_STATE:
+ case DAR_LQI_VALUE:
+ case DAR_RSSI_CCA_CONT:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static bool
+mcr20a_dar_volatile(struct device *dev, unsigned int reg)
+{
+ /* can be changed during runtime */
+ switch (reg) {
+ case DAR_IRQ_STS1:
+ case DAR_IRQ_STS2:
+ case DAR_IRQ_STS3:
+ /* use them in spi_async and regmap so it's volatile */
+ return true;
+ default:
+ return false;
+ }
+}
+
+static bool
+mcr20a_dar_precious(struct device *dev, unsigned int reg)
+{
+ /* don't clear irq line on read */
+ switch (reg) {
+ case DAR_IRQ_STS1:
+ case DAR_IRQ_STS2:
+ case DAR_IRQ_STS3:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static const struct regmap_config mcr20a_dar_regmap = {
+ .name = "mcr20a_dar",
+ .reg_bits = 8,
+ .val_bits = 8,
+ .write_flag_mask = REGISTER_ACCESS | REGISTER_WRITE,
+ .read_flag_mask = REGISTER_ACCESS | REGISTER_READ,
+ .cache_type = REGCACHE_RBTREE,
+ .writeable_reg = mcr20a_dar_writeable,
+ .readable_reg = mcr20a_dar_readable,
+ .volatile_reg = mcr20a_dar_volatile,
+ .precious_reg = mcr20a_dar_precious,
+ .fast_io = true,
+ .can_multi_write = true,
+};
+
+static bool
+mcr20a_iar_writeable(struct device *dev, unsigned int reg)
+{
+ switch (reg) {
+ case IAR_XTAL_TRIM:
+ case IAR_PMC_LP_TRIM:
+ case IAR_MACPANID0_LSB:
+ case IAR_MACPANID0_MSB:
+ case IAR_MACSHORTADDRS0_LSB:
+ case IAR_MACSHORTADDRS0_MSB:
+ case IAR_MACLONGADDRS0_0:
+ case IAR_MACLONGADDRS0_8:
+ case IAR_MACLONGADDRS0_16:
+ case IAR_MACLONGADDRS0_24:
+ case IAR_MACLONGADDRS0_32:
+ case IAR_MACLONGADDRS0_40:
+ case IAR_MACLONGADDRS0_48:
+ case IAR_MACLONGADDRS0_56:
+ case IAR_RX_FRAME_FILTER:
+ case IAR_PLL_INT1:
+ case IAR_PLL_FRAC1_LSB:
+ case IAR_PLL_FRAC1_MSB:
+ case IAR_MACPANID1_LSB:
+ case IAR_MACPANID1_MSB:
+ case IAR_MACSHORTADDRS1_LSB:
+ case IAR_MACSHORTADDRS1_MSB:
+ case IAR_MACLONGADDRS1_0:
+ case IAR_MACLONGADDRS1_8:
+ case IAR_MACLONGADDRS1_16:
+ case IAR_MACLONGADDRS1_24:
+ case IAR_MACLONGADDRS1_32:
+ case IAR_MACLONGADDRS1_40:
+ case IAR_MACLONGADDRS1_48:
+ case IAR_MACLONGADDRS1_56:
+ case IAR_DUAL_PAN_CTRL:
+ case IAR_DUAL_PAN_DWELL:
+ case IAR_CCA1_THRESH:
+ case IAR_CCA1_ED_OFFSET_COMP:
+ case IAR_LQI_OFFSET_COMP:
+ case IAR_CCA_CTRL:
+ case IAR_CCA2_CORR_PEAKS:
+ case IAR_CCA2_CORR_THRESH:
+ case IAR_TMR_PRESCALE:
+ case IAR_ANT_PAD_CTRL:
+ case IAR_MISC_PAD_CTRL:
+ case IAR_BSM_CTRL:
+ case IAR_RNG:
+ case IAR_RX_WTR_MARK:
+ case IAR_SOFT_RESET:
+ case IAR_TXDELAY:
+ case IAR_ACKDELAY:
+ case IAR_CORR_NVAL:
+ case IAR_ANT_AGC_CTRL:
+ case IAR_AGC_THR1:
+ case IAR_AGC_THR2:
+ case IAR_PA_CAL:
+ case IAR_ATT_RSSI1:
+ case IAR_ATT_RSSI2:
+ case IAR_RSSI_OFFSET:
+ case IAR_XTAL_CTRL:
+ case IAR_CHF_PMA_GAIN:
+ case IAR_CHF_IBUF:
+ case IAR_CHF_QBUF:
+ case IAR_CHF_IRIN:
+ case IAR_CHF_QRIN:
+ case IAR_CHF_IL:
+ case IAR_CHF_QL:
+ case IAR_CHF_CC1:
+ case IAR_CHF_CCL:
+ case IAR_CHF_CC2:
+ case IAR_CHF_IROUT:
+ case IAR_CHF_QROUT:
+ case IAR_PA_TUNING:
+ case IAR_VCO_CTRL1:
+ case IAR_VCO_CTRL2:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static bool
+mcr20a_iar_readable(struct device *dev, unsigned int reg)
+{
+ bool rc;
+
+ /* all writeable are also readable */
+ rc = mcr20a_iar_writeable(dev, reg);
+ if (rc)
+ return rc;
+
+ /* readonly regs */
+ switch (reg) {
+ case IAR_PART_ID:
+ case IAR_DUAL_PAN_STS:
+ case IAR_RX_BYTE_COUNT:
+ case IAR_FILTERFAIL_CODE1:
+ case IAR_FILTERFAIL_CODE2:
+ case IAR_RSSI:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static bool
+mcr20a_iar_volatile(struct device *dev, unsigned int reg)
+{
+/* can be changed during runtime */
+ switch (reg) {
+ case IAR_DUAL_PAN_STS:
+ case IAR_RX_BYTE_COUNT:
+ case IAR_FILTERFAIL_CODE1:
+ case IAR_FILTERFAIL_CODE2:
+ case IAR_RSSI:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static const struct regmap_config mcr20a_iar_regmap = {
+ .name = "mcr20a_iar",
+ .reg_bits = 16,
+ .val_bits = 8,
+ .write_flag_mask = REGISTER_ACCESS | REGISTER_WRITE | IAR_INDEX,
+ .read_flag_mask = REGISTER_ACCESS | REGISTER_READ | IAR_INDEX,
+ .cache_type = REGCACHE_RBTREE,
+ .writeable_reg = mcr20a_iar_writeable,
+ .readable_reg = mcr20a_iar_readable,
+ .volatile_reg = mcr20a_iar_volatile,
+ .fast_io = true,
+};
+
+struct mcr20a_local {
+ struct spi_device *spi;
+
+ struct ieee802154_hw *hw;
+ struct mcr20a_platform_data *pdata;
+ struct regmap *regmap_dar;
+ struct regmap *regmap_iar;
+
+ u8 *buf;
+
+ bool is_tx;
+
+ /* for writing tx buffer */
+ struct spi_message tx_buf_msg;
+ u8 tx_header[1];
+ /* burst buffer write command */
+ struct spi_transfer tx_xfer_header;
+ u8 tx_len[1];
+ /* len of tx packet */
+ struct spi_transfer tx_xfer_len;
+ /* data of tx packet */
+ struct spi_transfer tx_xfer_buf;
+ struct sk_buff *tx_skb;
+
+ /* for read length rxfifo */
+ struct spi_message reg_msg;
+ u8 reg_cmd[1];
+ u8 reg_data[MCR20A_IRQSTS_NUM];
+ struct spi_transfer reg_xfer_cmd;
+ struct spi_transfer reg_xfer_data;
+
+ /* receive handling */
+ struct spi_message rx_buf_msg;
+ u8 rx_header[1];
+ struct spi_transfer rx_xfer_header;
+ u8 rx_lqi[1];
+ struct spi_transfer rx_xfer_lqi;
+ u8 rx_buf[MCR20A_MAX_BUF];
+ struct spi_transfer rx_xfer_buf;
+
+ /* isr handling for reading intstat */
+ struct spi_message irq_msg;
+ u8 irq_header[1];
+ u8 irq_data[MCR20A_IRQSTS_NUM];
+ struct spi_transfer irq_xfer_data;
+ struct spi_transfer irq_xfer_header;
+};
+
+static void
+mcr20a_write_tx_buf_complete(void *context)
+{
+ struct mcr20a_local *lp = context;
+ int ret;
+
+ dev_dbg(printdev(lp), "%s\n", __func__);
+
+ lp->reg_msg.complete = NULL;
+ lp->reg_cmd[0] = MCR20A_WRITE_REG(DAR_PHY_CTRL1);
+ lp->reg_data[0] = MCR20A_XCVSEQ_TX;
+ lp->reg_xfer_data.len = 1;
+
+ ret = spi_async(lp->spi, &lp->reg_msg);
+ if (ret)
+ dev_err(printdev(lp), "failed to set SEQ TX\n");
+}
+
+static int
+mcr20a_xmit(struct ieee802154_hw *hw, struct sk_buff *skb)
+{
+ struct mcr20a_local *lp = hw->priv;
+
+ dev_dbg(printdev(lp), "%s\n", __func__);
+
+ lp->tx_skb = skb;
+
+ print_hex_dump_debug("mcr20a tx: ", DUMP_PREFIX_OFFSET, 16, 1,
+ skb->data, skb->len, 0);
+
+ lp->is_tx = 1;
+
+ lp->reg_msg.complete = NULL;
+ lp->reg_cmd[0] = MCR20A_WRITE_REG(DAR_PHY_CTRL1);
+ lp->reg_data[0] = MCR20A_XCVSEQ_IDLE;
+ lp->reg_xfer_data.len = 1;
+
+ return spi_async(lp->spi, &lp->reg_msg);
+}
+
+static int
+mcr20a_ed(struct ieee802154_hw *hw, u8 *level)
+{
+ WARN_ON(!level);
+ *level = 0xbe;
+ return 0;
+}
+
+static int
+mcr20a_set_channel(struct ieee802154_hw *hw, u8 page, u8 channel)
+{
+ struct mcr20a_local *lp = hw->priv;
+ int ret;
+
+ dev_dbg(printdev(lp), "%s\n", __func__);
+
+ /* freqency = ((PLL_INT+64) + (PLL_FRAC/65536)) * 32 MHz */
+ ret = regmap_write(lp->regmap_dar, DAR_PLL_INT0, PLL_INT[channel - 11]);
+ if (ret)
+ return ret;
+ ret = regmap_write(lp->regmap_dar, DAR_PLL_FRAC0_LSB, 0x00);
+ if (ret)
+ return ret;
+ ret = regmap_write(lp->regmap_dar, DAR_PLL_FRAC0_MSB,
+ PLL_FRAC[channel - 11]);
+ if (ret)
+ return ret;
+
+ return 0;
+}
+
+static int
+mcr20a_start(struct ieee802154_hw *hw)
+{
+ struct mcr20a_local *lp = hw->priv;
+ int ret;
+
+ dev_dbg(printdev(lp), "%s\n", __func__);
+
+ /* No slotted operation */
+ dev_dbg(printdev(lp), "no slotted operation\n");
+ ret = regmap_update_bits(lp->regmap_dar, DAR_PHY_CTRL1,
+ DAR_PHY_CTRL1_SLOTTED, 0x0);
+
+ /* enable irq */
+ enable_irq(lp->spi->irq);
+
+ /* Unmask SEQ interrupt */
+ ret = regmap_update_bits(lp->regmap_dar, DAR_PHY_CTRL2,
+ DAR_PHY_CTRL2_SEQMSK, 0x0);
+
+ /* Start the RX sequence */
+ dev_dbg(printdev(lp), "start the RX sequence\n");
+ ret = regmap_update_bits(lp->regmap_dar, DAR_PHY_CTRL1,
+ DAR_PHY_CTRL1_XCVSEQ_MASK, MCR20A_XCVSEQ_RX);
+
+ return 0;
+}
+
+static void
+mcr20a_stop(struct ieee802154_hw *hw)
+{
+ struct mcr20a_local *lp = hw->priv;
+
+ dev_dbg(printdev(lp), "%s\n", __func__);
+
+ /* stop all running sequence */
+ regmap_update_bits(lp->regmap_dar, DAR_PHY_CTRL1,
+ DAR_PHY_CTRL1_XCVSEQ_MASK, MCR20A_XCVSEQ_IDLE);
+
+ /* disable irq */
+ disable_irq(lp->spi->irq);
+}
+
+static int
+mcr20a_set_hw_addr_filt(struct ieee802154_hw *hw,
+ struct ieee802154_hw_addr_filt *filt,
+ unsigned long changed)
+{
+ struct mcr20a_local *lp = hw->priv;
+
+ dev_dbg(printdev(lp), "%s\n", __func__);
+
+ if (changed & IEEE802154_AFILT_SADDR_CHANGED) {
+ u16 addr = le16_to_cpu(filt->short_addr);
+
+ regmap_write(lp->regmap_iar, IAR_MACSHORTADDRS0_LSB, addr);
+ regmap_write(lp->regmap_iar, IAR_MACSHORTADDRS0_MSB, addr >> 8);
+ }
+
+ if (changed & IEEE802154_AFILT_PANID_CHANGED) {
+ u16 pan = le16_to_cpu(filt->pan_id);
+
+ regmap_write(lp->regmap_iar, IAR_MACPANID0_LSB, pan);
+ regmap_write(lp->regmap_iar, IAR_MACPANID0_MSB, pan >> 8);
+ }
+
+ if (changed & IEEE802154_AFILT_IEEEADDR_CHANGED) {
+ u8 addr[8], i;
+
+ memcpy(addr, &filt->ieee_addr, 8);
+ for (i = 0; i < 8; i++)
+ regmap_write(lp->regmap_iar,
+ IAR_MACLONGADDRS0_0 + i, addr[i]);
+ }
+
+ if (changed & IEEE802154_AFILT_PANC_CHANGED) {
+ if (filt->pan_coord) {
+ regmap_update_bits(lp->regmap_dar, DAR_PHY_CTRL4,
+ DAR_PHY_CTRL4_PANCORDNTR0, 0x10);
+ } else {
+ regmap_update_bits(lp->regmap_dar, DAR_PHY_CTRL4,
+ DAR_PHY_CTRL4_PANCORDNTR0, 0x00);
+ }
+ }
+
+ return 0;
+}
+
+/* -30 dBm to 10 dBm */
+#define MCR20A_MAX_TX_POWERS 0x14
+static const s32 mcr20a_powers[MCR20A_MAX_TX_POWERS + 1] = {
+ -3000, -2800, -2600, -2400, -2200, -2000, -1800, -1600, -1400,
+ -1200, -1000, -800, -600, -400, -200, 0, 200, 400, 600, 800, 1000
+};
+
+static int
+mcr20a_set_txpower(struct ieee802154_hw *hw, s32 mbm)
+{
+ struct mcr20a_local *lp = hw->priv;
+ u32 i;
+
+ dev_dbg(printdev(lp), "%s(%d)\n", __func__, mbm);
+
+ for (i = 0; i < lp->hw->phy->supported.tx_powers_size; i++) {
+ if (lp->hw->phy->supported.tx_powers[i] == mbm)
+ return regmap_write(lp->regmap_dar, DAR_PA_PWR,
+ ((i + 8) & 0x1F));
+ }
+
+ return -EINVAL;
+}
+
+#define MCR20A_MAX_ED_LEVELS MCR20A_MIN_CCA_THRESHOLD
+static s32 mcr20a_ed_levels[MCR20A_MAX_ED_LEVELS + 1];
+
+static int
+mcr20a_set_cca_mode(struct ieee802154_hw *hw,
+ const struct wpan_phy_cca *cca)
+{
+ struct mcr20a_local *lp = hw->priv;
+ unsigned int cca_mode = 0xff;
+ bool cca_mode_and = false;
+ int ret;
+
+ dev_dbg(printdev(lp), "%s\n", __func__);
+
+ /* mapping 802.15.4 to driver spec */
+ switch (cca->mode) {
+ case NL802154_CCA_ENERGY:
+ cca_mode = MCR20A_CCA_MODE1;
+ break;
+ case NL802154_CCA_CARRIER:
+ cca_mode = MCR20A_CCA_MODE2;
+ break;
+ case NL802154_CCA_ENERGY_CARRIER:
+ switch (cca->opt) {
+ case NL802154_CCA_OPT_ENERGY_CARRIER_AND:
+ cca_mode = MCR20A_CCA_MODE3;
+ cca_mode_and = true;
+ break;
+ case NL802154_CCA_OPT_ENERGY_CARRIER_OR:
+ cca_mode = MCR20A_CCA_MODE3;
+ cca_mode_and = false;
+ break;
+ default:
+ return -EINVAL;
+ }
+ break;
+ default:
+ return -EINVAL;
+ }
+ ret = regmap_update_bits(lp->regmap_dar, DAR_PHY_CTRL4,
+ DAR_PHY_CTRL4_CCATYPE_MASK,
+ cca_mode << DAR_PHY_CTRL4_CCATYPE_SHIFT);
+ if (ret < 0)
+ return ret;
+
+ if (cca_mode == MCR20A_CCA_MODE3) {
+ if (cca_mode_and) {
+ ret = regmap_update_bits(lp->regmap_iar, IAR_CCA_CTRL,
+ IAR_CCA_CTRL_CCA3_AND_NOT_OR,
+ 0x08);
+ } else {
+ ret = regmap_update_bits(lp->regmap_iar,
+ IAR_CCA_CTRL,
+ IAR_CCA_CTRL_CCA3_AND_NOT_OR,
+ 0x00);
+ }
+ if (ret < 0)
+ return ret;
+ }
+
+ return ret;
+}
+
+static int
+mcr20a_set_cca_ed_level(struct ieee802154_hw *hw, s32 mbm)
+{
+ struct mcr20a_local *lp = hw->priv;
+ u32 i;
+
+ dev_dbg(printdev(lp), "%s\n", __func__);
+
+ for (i = 0; i < hw->phy->supported.cca_ed_levels_size; i++) {
+ if (hw->phy->supported.cca_ed_levels[i] == mbm)
+ return regmap_write(lp->regmap_iar, IAR_CCA1_THRESH, i);
+ }
+
+ return 0;
+}
+
+static int
+mcr20a_set_promiscuous_mode(struct ieee802154_hw *hw, const bool on)
+{
+ struct mcr20a_local *lp = hw->priv;
+ int ret;
+ u8 rx_frame_filter_reg = 0x0;
+ u8 val;
+
+ dev_dbg(printdev(lp), "%s(%d)\n", __func__, on);
+
+ if (on) {
+ /* All frame types accepted*/
+ val |= DAR_PHY_CTRL4_PROMISCUOUS;
+ rx_frame_filter_reg &= ~(IAR_RX_FRAME_FLT_FRM_VER);
+ rx_frame_filter_reg |= (IAR_RX_FRAME_FLT_ACK_FT |
+ IAR_RX_FRAME_FLT_NS_FT);
+
+ ret = regmap_update_bits(lp->regmap_dar, DAR_PHY_CTRL4,
+ DAR_PHY_CTRL4_PROMISCUOUS,
+ DAR_PHY_CTRL4_PROMISCUOUS);
+ if (ret < 0)
+ return ret;
+
+ ret = regmap_write(lp->regmap_iar, IAR_RX_FRAME_FILTER,
+ rx_frame_filter_reg);
+ if (ret < 0)
+ return ret;
+ } else {
+ ret = regmap_update_bits(lp->regmap_dar, DAR_PHY_CTRL4,
+ DAR_PHY_CTRL4_PROMISCUOUS, 0x0);
+ if (ret < 0)
+ return ret;
+
+ ret = regmap_write(lp->regmap_iar, IAR_RX_FRAME_FILTER,
+ IAR_RX_FRAME_FLT_FRM_VER |
+ IAR_RX_FRAME_FLT_BEACON_FT |
+ IAR_RX_FRAME_FLT_DATA_FT |
+ IAR_RX_FRAME_FLT_CMD_FT);
+ if (ret < 0)
+ return ret;
+ }
+
+ return 0;
+}
+
+static const struct ieee802154_ops mcr20a_hw_ops = {
+ .owner = THIS_MODULE,
+ .xmit_async = mcr20a_xmit,
+ .ed = mcr20a_ed,
+ .set_channel = mcr20a_set_channel,
+ .start = mcr20a_start,
+ .stop = mcr20a_stop,
+ .set_hw_addr_filt = mcr20a_set_hw_addr_filt,
+ .set_txpower = mcr20a_set_txpower,
+ .set_cca_mode = mcr20a_set_cca_mode,
+ .set_cca_ed_level = mcr20a_set_cca_ed_level,
+ .set_promiscuous_mode = mcr20a_set_promiscuous_mode,
+};
+
+static int
+mcr20a_request_rx(struct mcr20a_local *lp)
+{
+ dev_dbg(printdev(lp), "%s\n", __func__);
+
+ /* Start the RX sequence */
+ regmap_update_bits_async(lp->regmap_dar, DAR_PHY_CTRL1,
+ DAR_PHY_CTRL1_XCVSEQ_MASK, MCR20A_XCVSEQ_RX);
+
+ return 0;
+}
+
+static void
+mcr20a_handle_rx_read_buf_complete(void *context)
+{
+ struct mcr20a_local *lp = context;
+ u8 len = lp->reg_data[0] & DAR_RX_FRAME_LENGTH_MASK;
+ struct sk_buff *skb;
+
+ dev_dbg(printdev(lp), "%s\n", __func__);
+
+ dev_dbg(printdev(lp), "RX is done\n");
+
+ if (!ieee802154_is_valid_psdu_len(len)) {
+ dev_vdbg(&lp->spi->dev, "corrupted frame received\n");
+ len = IEEE802154_MTU;
+ }
+
+ len = len - 2; /* get rid of frame check field */
+
+ skb = dev_alloc_skb(len);
+ if (!skb)
+ return;
+
+ memcpy(skb_put(skb, len), lp->rx_buf, len);
+ ieee802154_rx_irqsafe(lp->hw, skb, lp->rx_lqi[0]);
+
+ print_hex_dump_debug("mcr20a rx: ", DUMP_PREFIX_OFFSET, 16, 1,
+ lp->rx_buf, len, 0);
+ pr_debug("mcr20a rx: lqi: %02hhx\n", lp->rx_lqi[0]);
+
+ /* start RX sequence */
+ mcr20a_request_rx(lp);
+}
+
+static void
+mcr20a_handle_rx_read_len_complete(void *context)
+{
+ struct mcr20a_local *lp = context;
+ u8 len;
+ int ret;
+
+ dev_dbg(printdev(lp), "%s\n", __func__);
+
+ /* get the length of received frame */
+ len = lp->reg_data[0] & DAR_RX_FRAME_LENGTH_MASK;
+ dev_dbg(printdev(lp), "frame len : %d\n", len);
+
+ /* prepare to read the rx buf */
+ lp->rx_buf_msg.complete = mcr20a_handle_rx_read_buf_complete;
+ lp->rx_header[0] = MCR20A_BURST_READ_PACKET_BUF;
+ lp->rx_xfer_buf.len = len;
+
+ ret = spi_async(lp->spi, &lp->rx_buf_msg);
+ if (ret)
+ dev_err(printdev(lp), "failed to read rx buffer length\n");
+}
+
+static int
+mcr20a_handle_rx(struct mcr20a_local *lp)
+{
+ dev_dbg(printdev(lp), "%s\n", __func__);
+ lp->reg_msg.complete = mcr20a_handle_rx_read_len_complete;
+ lp->reg_cmd[0] = MCR20A_READ_REG(DAR_RX_FRM_LEN);
+ lp->reg_xfer_data.len = 1;
+
+ return spi_async(lp->spi, &lp->reg_msg);
+}
+
+static int
+mcr20a_handle_tx_complete(struct mcr20a_local *lp)
+{
+ dev_dbg(printdev(lp), "%s\n", __func__);
+
+ ieee802154_xmit_complete(lp->hw, lp->tx_skb, false);
+
+ return mcr20a_request_rx(lp);
+}
+
+static int
+mcr20a_handle_tx(struct mcr20a_local *lp)
+{
+ int ret;
+
+ dev_dbg(printdev(lp), "%s\n", __func__);
+
+ /* write tx buffer */
+ lp->tx_header[0] = MCR20A_BURST_WRITE_PACKET_BUF;
+ /* add 2 bytes of FCS */
+ lp->tx_len[0] = lp->tx_skb->len + 2;
+ lp->tx_xfer_buf.tx_buf = lp->tx_skb->data;
+ /* add 1 byte psduLength */
+ lp->tx_xfer_buf.len = lp->tx_skb->len + 1;
+
+ ret = spi_async(lp->spi, &lp->tx_buf_msg);
+ if (ret) {
+ dev_err(printdev(lp), "SPI write Failed for TX buf\n");
+ return ret;
+ }
+
+ return 0;
+}
+
+static void
+mcr20a_irq_clean_complete(void *context)
+{
+ struct mcr20a_local *lp = context;
+ u8 seq_state = lp->irq_data[DAR_IRQ_STS1] & DAR_PHY_CTRL1_XCVSEQ_MASK;
+
+ dev_dbg(printdev(lp), "%s\n", __func__);
+
+ enable_irq(lp->spi->irq);
+
+ dev_dbg(printdev(lp), "IRQ STA1 (%02x) STA2 (%02x)\n",
+ lp->irq_data[DAR_IRQ_STS1], lp->irq_data[DAR_IRQ_STS2]);
+
+ switch (seq_state) {
+ /* TX IRQ, RX IRQ and SEQ IRQ */
+ case (0x03):
+ if (lp->is_tx) {
+ lp->is_tx = 0;
+ dev_dbg(printdev(lp), "TX is done. No ACK\n");
+ mcr20a_handle_tx_complete(lp);
+ }
+ break;
+ case (0x05):
+ /* rx is starting */
+ dev_dbg(printdev(lp), "RX is starting\n");
+ mcr20a_handle_rx(lp);
+ break;
+ case (0x07):
+ if (lp->is_tx) {
+ /* tx is done */
+ lp->is_tx = 0;
+ dev_dbg(printdev(lp), "TX is done. Get ACK\n");
+ mcr20a_handle_tx_complete(lp);
+ } else {
+ /* rx is starting */
+ dev_dbg(printdev(lp), "RX is starting\n");
+ mcr20a_handle_rx(lp);
+ }
+ break;
+ case (0x01):
+ if (lp->is_tx) {
+ dev_dbg(printdev(lp), "TX is starting\n");
+ mcr20a_handle_tx(lp);
+ } else {
+ dev_dbg(printdev(lp), "MCR20A is stop\n");
+ }
+ break;
+ }
+}
+
+static void mcr20a_irq_status_complete(void *context)
+{
+ int ret;
+ struct mcr20a_local *lp = context;
+
+ dev_dbg(printdev(lp), "%s\n", __func__);
+ regmap_update_bits_async(lp->regmap_dar, DAR_PHY_CTRL1,
+ DAR_PHY_CTRL1_XCVSEQ_MASK, MCR20A_XCVSEQ_IDLE);
+
+ lp->reg_msg.complete = mcr20a_irq_clean_complete;
+ lp->reg_cmd[0] = MCR20A_WRITE_REG(DAR_IRQ_STS1);
+ memcpy(lp->reg_data, lp->irq_data, MCR20A_IRQSTS_NUM);
+ lp->reg_xfer_data.len = MCR20A_IRQSTS_NUM;
+
+ ret = spi_async(lp->spi, &lp->reg_msg);
+
+ if (ret)
+ dev_err(printdev(lp), "failed to clean irq status\n");
+}
+
+static irqreturn_t mcr20a_irq_isr(int irq, void *data)
+{
+ struct mcr20a_local *lp = data;
+ int ret;
+
+ disable_irq_nosync(irq);
+
+ lp->irq_header[0] = MCR20A_READ_REG(DAR_IRQ_STS1);
+ /* read IRQSTSx */
+ ret = spi_async(lp->spi, &lp->irq_msg);
+ if (ret) {
+ enable_irq(irq);
+ return IRQ_NONE;
+ }
+
+ return IRQ_HANDLED;
+}
+
+static int mcr20a_get_platform_data(struct spi_device *spi,
+ struct mcr20a_platform_data *pdata)
+{
+ int ret = 0;
+
+ if (!spi->dev.of_node)
+ return -EINVAL;
+
+ pdata->rst_gpio = of_get_named_gpio(spi->dev.of_node, "rst_b-gpio", 0);
+ dev_dbg(&spi->dev, "rst_b-gpio: %d\n", pdata->rst_gpio);
+
+ return ret;
+}
+
+static void mcr20a_hw_setup(struct mcr20a_local *lp)
+{
+ u8 i;
+ struct ieee802154_hw *hw = lp->hw;
+ struct wpan_phy *phy = lp->hw->phy;
+
+ dev_dbg(printdev(lp), "%s\n", __func__);
+
+ phy->symbol_duration = 16;
+ phy->lifs_period = 40;
+ phy->sifs_period = 12;
+
+ hw->flags = IEEE802154_HW_TX_OMIT_CKSUM |
+ IEEE802154_HW_AFILT |
+ IEEE802154_HW_PROMISCUOUS;
+
+ phy->flags = WPAN_PHY_FLAG_TXPOWER | WPAN_PHY_FLAG_CCA_ED_LEVEL |
+ WPAN_PHY_FLAG_CCA_MODE;
+
+ phy->supported.cca_modes = BIT(NL802154_CCA_ENERGY) |
+ BIT(NL802154_CCA_CARRIER) | BIT(NL802154_CCA_ENERGY_CARRIER);
+ phy->supported.cca_opts = BIT(NL802154_CCA_OPT_ENERGY_CARRIER_AND) |
+ BIT(NL802154_CCA_OPT_ENERGY_CARRIER_OR);
+
+ /* initiating cca_ed_levels */
+ for (i = MCR20A_MAX_CCA_THRESHOLD; i < MCR20A_MIN_CCA_THRESHOLD + 1;
+ ++i) {
+ mcr20a_ed_levels[i] = -i * 100;
+ }
+
+ phy->supported.cca_ed_levels = mcr20a_ed_levels;
+ phy->supported.cca_ed_levels_size = ARRAY_SIZE(mcr20a_ed_levels);
+
+ phy->cca.mode = NL802154_CCA_ENERGY;
+
+ phy->supported.channels[0] = MCR20A_VALID_CHANNELS;
+ phy->current_page = 0;
+ /* MCR20A default reset value */
+ phy->current_channel = 20;
+ phy->symbol_duration = 16;
+ phy->supported.tx_powers = mcr20a_powers;
+ phy->supported.tx_powers_size = ARRAY_SIZE(mcr20a_powers);
+ phy->cca_ed_level = phy->supported.cca_ed_levels[75];
+ phy->transmit_power = phy->supported.tx_powers[0x0F];
+}
+
+static void
+mcr20a_setup_tx_spi_messages(struct mcr20a_local *lp)
+{
+ spi_message_init(&lp->tx_buf_msg);
+ lp->tx_buf_msg.context = lp;
+ lp->tx_buf_msg.complete = mcr20a_write_tx_buf_complete;
+
+ lp->tx_xfer_header.len = 1;
+ lp->tx_xfer_header.tx_buf = lp->tx_header;
+
+ lp->tx_xfer_len.len = 1;
+ lp->tx_xfer_len.tx_buf = lp->tx_len;
+
+ spi_message_add_tail(&lp->tx_xfer_header, &lp->tx_buf_msg);
+ spi_message_add_tail(&lp->tx_xfer_len, &lp->tx_buf_msg);
+ spi_message_add_tail(&lp->tx_xfer_buf, &lp->tx_buf_msg);
+}
+
+static void
+mcr20a_setup_rx_spi_messages(struct mcr20a_local *lp)
+{
+ spi_message_init(&lp->reg_msg);
+ lp->reg_msg.context = lp;
+
+ lp->reg_xfer_cmd.len = 1;
+ lp->reg_xfer_cmd.tx_buf = lp->reg_cmd;
+ lp->reg_xfer_cmd.rx_buf = lp->reg_cmd;
+
+ lp->reg_xfer_data.rx_buf = lp->reg_data;
+ lp->reg_xfer_data.tx_buf = lp->reg_data;
+
+ spi_message_add_tail(&lp->reg_xfer_cmd, &lp->reg_msg);
+ spi_message_add_tail(&lp->reg_xfer_data, &lp->reg_msg);
+
+ spi_message_init(&lp->rx_buf_msg);
+ lp->rx_buf_msg.context = lp;
+ lp->rx_buf_msg.complete = mcr20a_handle_rx_read_buf_complete;
+ lp->rx_xfer_header.len = 1;
+ lp->rx_xfer_header.tx_buf = lp->rx_header;
+ lp->rx_xfer_header.rx_buf = lp->rx_header;
+
+ lp->rx_xfer_buf.rx_buf = lp->rx_buf;
+
+ lp->rx_xfer_lqi.len = 1;
+ lp->rx_xfer_lqi.rx_buf = lp->rx_lqi;
+
+ spi_message_add_tail(&lp->rx_xfer_header, &lp->rx_buf_msg);
+ spi_message_add_tail(&lp->rx_xfer_buf, &lp->rx_buf_msg);
+ spi_message_add_tail(&lp->rx_xfer_lqi, &lp->rx_buf_msg);
+}
+
+static void
+mcr20a_setup_irq_spi_messages(struct mcr20a_local *lp)
+{
+ spi_message_init(&lp->irq_msg);
+ lp->irq_msg.context = lp;
+ lp->irq_msg.complete = mcr20a_irq_status_complete;
+ lp->irq_xfer_header.len = 1;
+ lp->irq_xfer_header.tx_buf = lp->irq_header;
+ lp->irq_xfer_header.rx_buf = lp->irq_header;
+
+ lp->irq_xfer_data.len = MCR20A_IRQSTS_NUM;
+ lp->irq_xfer_data.rx_buf = lp->irq_data;
+
+ spi_message_add_tail(&lp->irq_xfer_header, &lp->irq_msg);
+ spi_message_add_tail(&lp->irq_xfer_data, &lp->irq_msg);
+}
+
+static int
+mcr20a_phy_init(struct mcr20a_local *lp)
+{
+ u8 index;
+ unsigned int phy_reg = 0;
+ int ret;
+
+ dev_dbg(printdev(lp), "%s\n", __func__);
+
+ /* Disable Tristate on COCO MISO for SPI reads */
+ ret = regmap_write(lp->regmap_iar, IAR_MISC_PAD_CTRL, 0x02);
+ if (ret)
+ goto err_ret;
+
+ /* Clear all PP IRQ bits in IRQSTS1 to avoid unexpected interrupts
+ * immediately after init
+ */
+ ret = regmap_write(lp->regmap_dar, DAR_IRQ_STS1, 0xEF);
+ if (ret)
+ goto err_ret;
+
+ /* Clear all PP IRQ bits in IRQSTS2 */
+ ret = regmap_write(lp->regmap_dar, DAR_IRQ_STS2,
+ DAR_IRQSTS2_ASM_IRQ | DAR_IRQSTS2_PB_ERR_IRQ |
+ DAR_IRQSTS2_WAKE_IRQ);
+ if (ret)
+ goto err_ret;
+
+ /* Disable all timer interrupts */
+ ret = regmap_write(lp->regmap_dar, DAR_IRQ_STS3, 0xFF);
+ if (ret)
+ goto err_ret;
+
+ /* PHY_CTRL1 : default HW settings + AUTOACK enabled */
+ ret = regmap_update_bits(lp->regmap_dar, DAR_PHY_CTRL1,
+ DAR_PHY_CTRL1_AUTOACK, DAR_PHY_CTRL1_AUTOACK);
+
+ /* PHY_CTRL2 : disable all interrupts */
+ ret = regmap_write(lp->regmap_dar, DAR_PHY_CTRL2, 0xFF);
+ if (ret)
+ goto err_ret;
+
+ /* PHY_CTRL3 : disable all timers and remaining interrupts */
+ ret = regmap_write(lp->regmap_dar, DAR_PHY_CTRL3,
+ DAR_PHY_CTRL3_ASM_MSK | DAR_PHY_CTRL3_PB_ERR_MSK |
+ DAR_PHY_CTRL3_WAKE_MSK);
+ if (ret)
+ goto err_ret;
+
+ /* SRC_CTRL : enable Acknowledge Frame Pending and
+ * Source Address Matching Enable
+ */
+ ret = regmap_write(lp->regmap_dar, DAR_SRC_CTRL,
+ DAR_SRC_CTRL_ACK_FRM_PND |
+ (DAR_SRC_CTRL_INDEX << DAR_SRC_CTRL_INDEX_SHIFT));
+ if (ret)
+ goto err_ret;
+
+ /* RX_FRAME_FILTER */
+ /* FRM_VER[1:0] = b11. Accept FrameVersion 0 and 1 packets */
+ ret = regmap_write(lp->regmap_iar, IAR_RX_FRAME_FILTER,
+ IAR_RX_FRAME_FLT_FRM_VER |
+ IAR_RX_FRAME_FLT_BEACON_FT |
+ IAR_RX_FRAME_FLT_DATA_FT |
+ IAR_RX_FRAME_FLT_CMD_FT);
+ if (ret)
+ goto err_ret;
+
+ dev_info(printdev(lp), "MCR20A DAR overwrites version: 0x%02x\n",
+ MCR20A_OVERWRITE_VERSION);
+
+ /* Overwrites direct registers */
+ ret = regmap_write(lp->regmap_dar, DAR_OVERWRITE_VER,
+ MCR20A_OVERWRITE_VERSION);
+ if (ret)
+ goto err_ret;
+
+ /* Overwrites indirect registers */
+ ret = regmap_multi_reg_write(lp->regmap_iar, mar20a_iar_overwrites,
+ ARRAY_SIZE(mar20a_iar_overwrites));
+ if (ret)
+ goto err_ret;
+
+ /* Clear HW indirect queue */
+ dev_dbg(printdev(lp), "clear HW indirect queue\n");
+ for (index = 0; index < MCR20A_PHY_INDIRECT_QUEUE_SIZE; index++) {
+ phy_reg = (u8)(((index & DAR_SRC_CTRL_INDEX) <<
+ DAR_SRC_CTRL_INDEX_SHIFT)
+ | (DAR_SRC_CTRL_SRCADDR_EN)
+ | (DAR_SRC_CTRL_INDEX_DISABLE));
+ ret = regmap_write(lp->regmap_dar, DAR_SRC_CTRL, phy_reg);
+ if (ret)
+ goto err_ret;
+ phy_reg = 0;
+ }
+
+ /* Assign HW Indirect hash table to PAN0 */
+ ret = regmap_read(lp->regmap_iar, IAR_DUAL_PAN_CTRL, &phy_reg);
+ if (ret)
+ goto err_ret;
+
+ /* Clear current lvl */
+ phy_reg &= ~IAR_DUAL_PAN_CTRL_DUAL_PAN_SAM_LVL_MSK;
+
+ /* Set new lvl */
+ phy_reg |= MCR20A_PHY_INDIRECT_QUEUE_SIZE <<
+ IAR_DUAL_PAN_CTRL_DUAL_PAN_SAM_LVL_SHIFT;
+ ret = regmap_write(lp->regmap_iar, IAR_DUAL_PAN_CTRL, phy_reg);
+ if (ret)
+ goto err_ret;
+
+ /* Set CCA threshold to -75 dBm */
+ ret = regmap_write(lp->regmap_iar, IAR_CCA1_THRESH, 0x4B);
+ if (ret)
+ goto err_ret;
+
+ /* Set prescaller to obtain 1 symbol (16us) timebase */
+ ret = regmap_write(lp->regmap_iar, IAR_TMR_PRESCALE, 0x05);
+ if (ret)
+ goto err_ret;
+
+ /* Enable autodoze mode. */
+ ret = regmap_update_bits(lp->regmap_dar, DAR_PWR_MODES,
+ DAR_PWR_MODES_AUTODOZE,
+ DAR_PWR_MODES_AUTODOZE);
+ if (ret)
+ goto err_ret;
+
+ /* Disable clk_out */
+ ret = regmap_update_bits(lp->regmap_dar, DAR_CLK_OUT_CTRL,
+ DAR_CLK_OUT_CTRL_EN, 0x0);
+ if (ret)
+ goto err_ret;
+
+ return 0;
+
+err_ret:
+ return ret;
+}
+
+static int
+mcr20a_probe(struct spi_device *spi)
+{
+ struct ieee802154_hw *hw;
+ struct mcr20a_local *lp;
+ struct mcr20a_platform_data *pdata;
+ int irq_type;
+ int ret = -ENOMEM;
+
+ dev_dbg(&spi->dev, "%s\n", __func__);
+
+ if (!spi->irq) {
+ dev_err(&spi->dev, "no IRQ specified\n");
+ return -EINVAL;
+ }
+
+ pdata = kmalloc(sizeof(*pdata), GFP_KERNEL);
+ if (!pdata)
+ return -ENOMEM;
+
+ /* set mcr20a platform data */
+ ret = mcr20a_get_platform_data(spi, pdata);
+ if (ret < 0) {
+ dev_crit(&spi->dev, "mcr20a_get_platform_data failed.\n");
+ return ret;
+ }
+
+ /* init reset gpio */
+ if (gpio_is_valid(pdata->rst_gpio)) {
+ ret = devm_gpio_request_one(&spi->dev, pdata->rst_gpio,
+ GPIOF_OUT_INIT_HIGH, "reset");
+ if (ret)
+ return ret;
+ }
+
+ /* reset mcr20a */
+ if (gpio_is_valid(pdata->rst_gpio)) {
+ usleep_range(10, 20);
+ gpio_set_value_cansleep(pdata->rst_gpio, 0);
+ usleep_range(10, 20);
+ gpio_set_value_cansleep(pdata->rst_gpio, 1);
+ usleep_range(120, 240);
+ }
+
+ /* allocate ieee802154_hw and private data */
+ hw = ieee802154_alloc_hw(sizeof(*lp), &mcr20a_hw_ops);
+ if (!hw) {
+ dev_crit(&spi->dev, "ieee802154_alloc_hw failed\n");
+ return -ENOMEM;
+ }
+
+ /* init mcr20a local data */
+ lp = hw->priv;
+ lp->hw = hw;
+ lp->spi = spi;
+ lp->spi->dev.platform_data = pdata;
+ lp->pdata = pdata;
+
+ /* init ieee802154_hw */
+ hw->parent = &spi->dev;
+ ieee802154_random_extended_addr(&hw->phy->perm_extended_addr);
+
+ /* init buf */
+ lp->buf = devm_kzalloc(&spi->dev, SPI_COMMAND_BUFFER, GFP_KERNEL);
+
+ if (!lp->buf)
+ return -ENOMEM;
+
+ mcr20a_setup_tx_spi_messages(lp);
+ mcr20a_setup_rx_spi_messages(lp);
+ mcr20a_setup_irq_spi_messages(lp);
+
+ /* setup regmap */
+ lp->regmap_dar = devm_regmap_init_spi(spi, &mcr20a_dar_regmap);
+ if (IS_ERR(lp->regmap_dar)) {
+ ret = PTR_ERR(lp->regmap_dar);
+ dev_err(&spi->dev, "Failed to allocate dar map: %d\n",
+ ret);
+ goto free_dev;
+ }
+
+ lp->regmap_iar = devm_regmap_init_spi(spi, &mcr20a_iar_regmap);
+ if (IS_ERR(lp->regmap_iar)) {
+ ret = PTR_ERR(lp->regmap_iar);
+ dev_err(&spi->dev, "Failed to allocate iar map: %d\n", ret);
+ goto free_dev;
+ }
+
+ mcr20a_hw_setup(lp);
+
+ spi_set_drvdata(spi, lp);
+
+ ret = mcr20a_phy_init(lp);
+ if (ret < 0) {
+ dev_crit(&spi->dev, "mcr20a_phy_init failed\n");
+ goto free_dev;
+ }
+
+ irq_type = irq_get_trigger_type(spi->irq);
+ if (!irq_type)
+ irq_type = IRQF_TRIGGER_FALLING;
+
+ ret = devm_request_irq(&spi->dev, spi->irq, mcr20a_irq_isr,
+ irq_type, dev_name(&spi->dev), lp);
+ if (ret) {
+ dev_err(&spi->dev, "could not request_irq for mcr20a\n");
+ ret = -ENODEV;
+ goto free_dev;
+ }
+
+ /* disable_irq by default and wait for starting hardware */
+ disable_irq(spi->irq);
+
+ ret = ieee802154_register_hw(hw);
+ if (ret) {
+ dev_crit(&spi->dev, "ieee802154_register_hw failed\n");
+ goto free_dev;
+ }
+
+ return ret;
+
+free_dev:
+ ieee802154_free_hw(lp->hw);
+
+ return ret;
+}
+
+static int mcr20a_remove(struct spi_device *spi)
+{
+ struct mcr20a_local *lp = spi_get_drvdata(spi);
+
+ dev_dbg(&spi->dev, "%s\n", __func__);
+
+ ieee802154_unregister_hw(lp->hw);
+ ieee802154_free_hw(lp->hw);
+
+ return 0;
+}
+
+static const struct of_device_id mcr20a_of_match[] = {
+ { .compatible = "nxp,mcr20a", },
+ { },
+};
+MODULE_DEVICE_TABLE(of, mcr20a_of_match);
+
+static const struct spi_device_id mcr20a_device_id[] = {
+ { .name = "mcr20a", },
+ { },
+};
+MODULE_DEVICE_TABLE(spi, mcr20a_device_id);
+
+static struct spi_driver mcr20a_driver = {
+ .id_table = mcr20a_device_id,
+ .driver = {
+ .of_match_table = of_match_ptr(mcr20a_of_match),
+ .name = "mcr20a",
+ },
+ .probe = mcr20a_probe,
+ .remove = mcr20a_remove,
+};
+
+module_spi_driver(mcr20a_driver);
+
+MODULE_DESCRIPTION("MCR20A Transceiver Driver");
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Xue Liu <liuxuenetmail@gmail>");
--- /dev/null
+/*
+ * Driver for NXP MCR20A 802.15.4 Wireless-PAN Networking controller
+ *
+ * Copyright (C) 2018 Xue Liu <liuxuenetmail@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ */
+#ifndef _MCR20A_H
+#define _MCR20A_H
+
+/* Direct Accress Register */
+#define DAR_IRQ_STS1 0x00
+#define DAR_IRQ_STS2 0x01
+#define DAR_IRQ_STS3 0x02
+#define DAR_PHY_CTRL1 0x03
+#define DAR_PHY_CTRL2 0x04
+#define DAR_PHY_CTRL3 0x05
+#define DAR_RX_FRM_LEN 0x06
+#define DAR_PHY_CTRL4 0x07
+#define DAR_SRC_CTRL 0x08
+#define DAR_SRC_ADDRS_SUM_LSB 0x09
+#define DAR_SRC_ADDRS_SUM_MSB 0x0A
+#define DAR_CCA1_ED_FNL 0x0B
+#define DAR_EVENT_TMR_LSB 0x0C
+#define DAR_EVENT_TMR_MSB 0x0D
+#define DAR_EVENT_TMR_USB 0x0E
+#define DAR_TIMESTAMP_LSB 0x0F
+#define DAR_TIMESTAMP_MSB 0x10
+#define DAR_TIMESTAMP_USB 0x11
+#define DAR_T3CMP_LSB 0x12
+#define DAR_T3CMP_MSB 0x13
+#define DAR_T3CMP_USB 0x14
+#define DAR_T2PRIMECMP_LSB 0x15
+#define DAR_T2PRIMECMP_MSB 0x16
+#define DAR_T1CMP_LSB 0x17
+#define DAR_T1CMP_MSB 0x18
+#define DAR_T1CMP_USB 0x19
+#define DAR_T2CMP_LSB 0x1A
+#define DAR_T2CMP_MSB 0x1B
+#define DAR_T2CMP_USB 0x1C
+#define DAR_T4CMP_LSB 0x1D
+#define DAR_T4CMP_MSB 0x1E
+#define DAR_T4CMP_USB 0x1F
+#define DAR_PLL_INT0 0x20
+#define DAR_PLL_FRAC0_LSB 0x21
+#define DAR_PLL_FRAC0_MSB 0x22
+#define DAR_PA_PWR 0x23
+#define DAR_SEQ_STATE 0x24
+#define DAR_LQI_VALUE 0x25
+#define DAR_RSSI_CCA_CONT 0x26
+/*------------------ 0x27 */
+#define DAR_ASM_CTRL1 0x28
+#define DAR_ASM_CTRL2 0x29
+#define DAR_ASM_DATA_0 0x2A
+#define DAR_ASM_DATA_1 0x2B
+#define DAR_ASM_DATA_2 0x2C
+#define DAR_ASM_DATA_3 0x2D
+#define DAR_ASM_DATA_4 0x2E
+#define DAR_ASM_DATA_5 0x2F
+#define DAR_ASM_DATA_6 0x30
+#define DAR_ASM_DATA_7 0x31
+#define DAR_ASM_DATA_8 0x32
+#define DAR_ASM_DATA_9 0x33
+#define DAR_ASM_DATA_A 0x34
+#define DAR_ASM_DATA_B 0x35
+#define DAR_ASM_DATA_C 0x36
+#define DAR_ASM_DATA_D 0x37
+#define DAR_ASM_DATA_E 0x38
+#define DAR_ASM_DATA_F 0x39
+/*----------------------- 0x3A */
+#define DAR_OVERWRITE_VER 0x3B
+#define DAR_CLK_OUT_CTRL 0x3C
+#define DAR_PWR_MODES 0x3D
+#define IAR_INDEX 0x3E
+#define IAR_DATA 0x3F
+
+/* Indirect Resgister Memory */
+#define IAR_PART_ID 0x00
+#define IAR_XTAL_TRIM 0x01
+#define IAR_PMC_LP_TRIM 0x02
+#define IAR_MACPANID0_LSB 0x03
+#define IAR_MACPANID0_MSB 0x04
+#define IAR_MACSHORTADDRS0_LSB 0x05
+#define IAR_MACSHORTADDRS0_MSB 0x06
+#define IAR_MACLONGADDRS0_0 0x07
+#define IAR_MACLONGADDRS0_8 0x08
+#define IAR_MACLONGADDRS0_16 0x09
+#define IAR_MACLONGADDRS0_24 0x0A
+#define IAR_MACLONGADDRS0_32 0x0B
+#define IAR_MACLONGADDRS0_40 0x0C
+#define IAR_MACLONGADDRS0_48 0x0D
+#define IAR_MACLONGADDRS0_56 0x0E
+#define IAR_RX_FRAME_FILTER 0x0F
+#define IAR_PLL_INT1 0x10
+#define IAR_PLL_FRAC1_LSB 0x11
+#define IAR_PLL_FRAC1_MSB 0x12
+#define IAR_MACPANID1_LSB 0x13
+#define IAR_MACPANID1_MSB 0x14
+#define IAR_MACSHORTADDRS1_LSB 0x15
+#define IAR_MACSHORTADDRS1_MSB 0x16
+#define IAR_MACLONGADDRS1_0 0x17
+#define IAR_MACLONGADDRS1_8 0x18
+#define IAR_MACLONGADDRS1_16 0x19
+#define IAR_MACLONGADDRS1_24 0x1A
+#define IAR_MACLONGADDRS1_32 0x1B
+#define IAR_MACLONGADDRS1_40 0x1C
+#define IAR_MACLONGADDRS1_48 0x1D
+#define IAR_MACLONGADDRS1_56 0x1E
+#define IAR_DUAL_PAN_CTRL 0x1F
+#define IAR_DUAL_PAN_DWELL 0x20
+#define IAR_DUAL_PAN_STS 0x21
+#define IAR_CCA1_THRESH 0x22
+#define IAR_CCA1_ED_OFFSET_COMP 0x23
+#define IAR_LQI_OFFSET_COMP 0x24
+#define IAR_CCA_CTRL 0x25
+#define IAR_CCA2_CORR_PEAKS 0x26
+#define IAR_CCA2_CORR_THRESH 0x27
+#define IAR_TMR_PRESCALE 0x28
+/*-------------------- 0x29 */
+#define IAR_GPIO_DATA 0x2A
+#define IAR_GPIO_DIR 0x2B
+#define IAR_GPIO_PUL_EN 0x2C
+#define IAR_GPIO_PUL_SEL 0x2D
+#define IAR_GPIO_DS 0x2E
+/*------------------ 0x2F */
+#define IAR_ANT_PAD_CTRL 0x30
+#define IAR_MISC_PAD_CTRL 0x31
+#define IAR_BSM_CTRL 0x32
+/*------------------- 0x33 */
+#define IAR_RNG 0x34
+#define IAR_RX_BYTE_COUNT 0x35
+#define IAR_RX_WTR_MARK 0x36
+#define IAR_SOFT_RESET 0x37
+#define IAR_TXDELAY 0x38
+#define IAR_ACKDELAY 0x39
+#define IAR_SEQ_MGR_CTRL 0x3A
+#define IAR_SEQ_MGR_STS 0x3B
+#define IAR_SEQ_T_STS 0x3C
+#define IAR_ABORT_STS 0x3D
+#define IAR_CCCA_BUSY_CNT 0x3E
+#define IAR_SRC_ADDR_CHECKSUM1 0x3F
+#define IAR_SRC_ADDR_CHECKSUM2 0x40
+#define IAR_SRC_TBL_VALID1 0x41
+#define IAR_SRC_TBL_VALID2 0x42
+#define IAR_FILTERFAIL_CODE1 0x43
+#define IAR_FILTERFAIL_CODE2 0x44
+#define IAR_SLOT_PRELOAD 0x45
+/*-------------------- 0x46 */
+#define IAR_CORR_VT 0x47
+#define IAR_SYNC_CTRL 0x48
+#define IAR_PN_LSB_0 0x49
+#define IAR_PN_LSB_1 0x4A
+#define IAR_PN_MSB_0 0x4B
+#define IAR_PN_MSB_1 0x4C
+#define IAR_CORR_NVAL 0x4D
+#define IAR_TX_MODE_CTRL 0x4E
+#define IAR_SNF_THR 0x4F
+#define IAR_FAD_THR 0x50
+#define IAR_ANT_AGC_CTRL 0x51
+#define IAR_AGC_THR1 0x52
+#define IAR_AGC_THR2 0x53
+#define IAR_AGC_HYS 0x54
+#define IAR_AFC 0x55
+/*------------------- 0x56 */
+/*------------------- 0x57 */
+#define IAR_PHY_STS 0x58
+#define IAR_RX_MAX_CORR 0x59
+#define IAR_RX_MAX_PREAMBLE 0x5A
+#define IAR_RSSI 0x5B
+/*------------------- 0x5C */
+/*------------------- 0x5D */
+#define IAR_PLL_DIG_CTRL 0x5E
+#define IAR_VCO_CAL 0x5F
+#define IAR_VCO_BEST_DIFF 0x60
+#define IAR_VCO_BIAS 0x61
+#define IAR_KMOD_CTRL 0x62
+#define IAR_KMOD_CAL 0x63
+#define IAR_PA_CAL 0x64
+#define IAR_PA_PWRCAL 0x65
+#define IAR_ATT_RSSI1 0x66
+#define IAR_ATT_RSSI2 0x67
+#define IAR_RSSI_OFFSET 0x68
+#define IAR_RSSI_SLOPE 0x69
+#define IAR_RSSI_CAL1 0x6A
+#define IAR_RSSI_CAL2 0x6B
+/*------------------- 0x6C */
+/*------------------- 0x6D */
+#define IAR_XTAL_CTRL 0x6E
+#define IAR_XTAL_COMP_MIN 0x6F
+#define IAR_XTAL_COMP_MAX 0x70
+#define IAR_XTAL_GM 0x71
+/*------------------- 0x72 */
+/*------------------- 0x73 */
+#define IAR_LNA_TUNE 0x74
+#define IAR_LNA_AGCGAIN 0x75
+/*------------------- 0x76 */
+/*------------------- 0x77 */
+#define IAR_CHF_PMA_GAIN 0x78
+#define IAR_CHF_IBUF 0x79
+#define IAR_CHF_QBUF 0x7A
+#define IAR_CHF_IRIN 0x7B
+#define IAR_CHF_QRIN 0x7C
+#define IAR_CHF_IL 0x7D
+#define IAR_CHF_QL 0x7E
+#define IAR_CHF_CC1 0x7F
+#define IAR_CHF_CCL 0x80
+#define IAR_CHF_CC2 0x81
+#define IAR_CHF_IROUT 0x82
+#define IAR_CHF_QROUT 0x83
+/*------------------- 0x84 */
+/*------------------- 0x85 */
+#define IAR_RSSI_CTRL 0x86
+/*------------------- 0x87 */
+/*------------------- 0x88 */
+#define IAR_PA_BIAS 0x89
+#define IAR_PA_TUNING 0x8A
+/*------------------- 0x8B */
+/*------------------- 0x8C */
+#define IAR_PMC_HP_TRIM 0x8D
+#define IAR_VREGA_TRIM 0x8E
+/*------------------- 0x8F */
+/*------------------- 0x90 */
+#define IAR_VCO_CTRL1 0x91
+#define IAR_VCO_CTRL2 0x92
+/*------------------- 0x93 */
+/*------------------- 0x94 */
+#define IAR_ANA_SPARE_OUT1 0x95
+#define IAR_ANA_SPARE_OUT2 0x96
+#define IAR_ANA_SPARE_IN 0x97
+#define IAR_MISCELLANEOUS 0x98
+/*------------------- 0x99 */
+#define IAR_SEQ_MGR_OVRD0 0x9A
+#define IAR_SEQ_MGR_OVRD1 0x9B
+#define IAR_SEQ_MGR_OVRD2 0x9C
+#define IAR_SEQ_MGR_OVRD3 0x9D
+#define IAR_SEQ_MGR_OVRD4 0x9E
+#define IAR_SEQ_MGR_OVRD5 0x9F
+#define IAR_SEQ_MGR_OVRD6 0xA0
+#define IAR_SEQ_MGR_OVRD7 0xA1
+/*------------------- 0xA2 */
+#define IAR_TESTMODE_CTRL 0xA3
+#define IAR_DTM_CTRL1 0xA4
+#define IAR_DTM_CTRL2 0xA5
+#define IAR_ATM_CTRL1 0xA6
+#define IAR_ATM_CTRL2 0xA7
+#define IAR_ATM_CTRL3 0xA8
+/*------------------- 0xA9 */
+#define IAR_LIM_FE_TEST_CTRL 0xAA
+#define IAR_CHF_TEST_CTRL 0xAB
+#define IAR_VCO_TEST_CTRL 0xAC
+#define IAR_PLL_TEST_CTRL 0xAD
+#define IAR_PA_TEST_CTRL 0xAE
+#define IAR_PMC_TEST_CTRL 0xAF
+#define IAR_SCAN_DTM_PROTECT_1 0xFE
+#define IAR_SCAN_DTM_PROTECT_0 0xFF
+
+/* IRQSTS1 bits */
+#define DAR_IRQSTS1_RX_FRM_PEND BIT(7)
+#define DAR_IRQSTS1_PLL_UNLOCK_IRQ BIT(6)
+#define DAR_IRQSTS1_FILTERFAIL_IRQ BIT(5)
+#define DAR_IRQSTS1_RXWTRMRKIRQ BIT(4)
+#define DAR_IRQSTS1_CCAIRQ BIT(3)
+#define DAR_IRQSTS1_RXIRQ BIT(2)
+#define DAR_IRQSTS1_TXIRQ BIT(1)
+#define DAR_IRQSTS1_SEQIRQ BIT(0)
+
+/* IRQSTS2 bits */
+#define DAR_IRQSTS2_CRCVALID BIT(7)
+#define DAR_IRQSTS2_CCA BIT(6)
+#define DAR_IRQSTS2_SRCADDR BIT(5)
+#define DAR_IRQSTS2_PI BIT(4)
+#define DAR_IRQSTS2_TMRSTATUS BIT(3)
+#define DAR_IRQSTS2_ASM_IRQ BIT(2)
+#define DAR_IRQSTS2_PB_ERR_IRQ BIT(1)
+#define DAR_IRQSTS2_WAKE_IRQ BIT(0)
+
+/* IRQSTS3 bits */
+#define DAR_IRQSTS3_TMR4MSK BIT(7)
+#define DAR_IRQSTS3_TMR3MSK BIT(6)
+#define DAR_IRQSTS3_TMR2MSK BIT(5)
+#define DAR_IRQSTS3_TMR1MSK BIT(4)
+#define DAR_IRQSTS3_TMR4IRQ BIT(3)
+#define DAR_IRQSTS3_TMR3IRQ BIT(2)
+#define DAR_IRQSTS3_TMR2IRQ BIT(1)
+#define DAR_IRQSTS3_TMR1IRQ BIT(0)
+
+/* PHY_CTRL1 bits */
+#define DAR_PHY_CTRL1_TMRTRIGEN BIT(7)
+#define DAR_PHY_CTRL1_SLOTTED BIT(6)
+#define DAR_PHY_CTRL1_CCABFRTX BIT(5)
+#define DAR_PHY_CTRL1_CCABFRTX_SHIFT 5
+#define DAR_PHY_CTRL1_RXACKRQD BIT(4)
+#define DAR_PHY_CTRL1_AUTOACK BIT(3)
+#define DAR_PHY_CTRL1_XCVSEQ_MASK 0x07
+
+/* PHY_CTRL2 bits */
+#define DAR_PHY_CTRL2_CRC_MSK BIT(7)
+#define DAR_PHY_CTRL2_PLL_UNLOCK_MSK BIT(6)
+#define DAR_PHY_CTRL2_FILTERFAIL_MSK BIT(5)
+#define DAR_PHY_CTRL2_RX_WMRK_MSK BIT(4)
+#define DAR_PHY_CTRL2_CCAMSK BIT(3)
+#define DAR_PHY_CTRL2_RXMSK BIT(2)
+#define DAR_PHY_CTRL2_TXMSK BIT(1)
+#define DAR_PHY_CTRL2_SEQMSK BIT(0)
+
+/* PHY_CTRL3 bits */
+#define DAR_PHY_CTRL3_TMR4CMP_EN BIT(7)
+#define DAR_PHY_CTRL3_TMR3CMP_EN BIT(6)
+#define DAR_PHY_CTRL3_TMR2CMP_EN BIT(5)
+#define DAR_PHY_CTRL3_TMR1CMP_EN BIT(4)
+#define DAR_PHY_CTRL3_ASM_MSK BIT(2)
+#define DAR_PHY_CTRL3_PB_ERR_MSK BIT(1)
+#define DAR_PHY_CTRL3_WAKE_MSK BIT(0)
+
+/* RX_FRM_LEN bits */
+#define DAR_RX_FRAME_LENGTH_MASK (0x7F)
+
+/* PHY_CTRL4 bits */
+#define DAR_PHY_CTRL4_TRCV_MSK BIT(7)
+#define DAR_PHY_CTRL4_TC3TMOUT BIT(6)
+#define DAR_PHY_CTRL4_PANCORDNTR0 BIT(5)
+#define DAR_PHY_CTRL4_CCATYPE (3)
+#define DAR_PHY_CTRL4_CCATYPE_SHIFT (3)
+#define DAR_PHY_CTRL4_CCATYPE_MASK (0x18)
+#define DAR_PHY_CTRL4_TMRLOAD BIT(2)
+#define DAR_PHY_CTRL4_PROMISCUOUS BIT(1)
+#define DAR_PHY_CTRL4_TC2PRIME_EN BIT(0)
+
+/* SRC_CTRL bits */
+#define DAR_SRC_CTRL_INDEX (0x0F)
+#define DAR_SRC_CTRL_INDEX_SHIFT (4)
+#define DAR_SRC_CTRL_ACK_FRM_PND BIT(3)
+#define DAR_SRC_CTRL_SRCADDR_EN BIT(2)
+#define DAR_SRC_CTRL_INDEX_EN BIT(1)
+#define DAR_SRC_CTRL_INDEX_DISABLE BIT(0)
+
+/* DAR_ASM_CTRL1 bits */
+#define DAR_ASM_CTRL1_CLEAR BIT(7)
+#define DAR_ASM_CTRL1_START BIT(6)
+#define DAR_ASM_CTRL1_SELFTST BIT(5)
+#define DAR_ASM_CTRL1_CTR BIT(4)
+#define DAR_ASM_CTRL1_CBC BIT(3)
+#define DAR_ASM_CTRL1_AES BIT(2)
+#define DAR_ASM_CTRL1_LOAD_MAC BIT(1)
+
+/* DAR_ASM_CTRL2 bits */
+#define DAR_ASM_CTRL2_DATA_REG_TYPE_SEL (7)
+#define DAR_ASM_CTRL2_DATA_REG_TYPE_SEL_SHIFT (5)
+#define DAR_ASM_CTRL2_TSTPAS BIT(1)
+
+/* DAR_CLK_OUT_CTRL bits */
+#define DAR_CLK_OUT_CTRL_EXTEND BIT(7)
+#define DAR_CLK_OUT_CTRL_HIZ BIT(6)
+#define DAR_CLK_OUT_CTRL_SR BIT(5)
+#define DAR_CLK_OUT_CTRL_DS BIT(4)
+#define DAR_CLK_OUT_CTRL_EN BIT(3)
+#define DAR_CLK_OUT_CTRL_DIV (7)
+
+/* DAR_PWR_MODES bits */
+#define DAR_PWR_MODES_XTAL_READY BIT(5)
+#define DAR_PWR_MODES_XTALEN BIT(4)
+#define DAR_PWR_MODES_ASM_CLK_EN BIT(3)
+#define DAR_PWR_MODES_AUTODOZE BIT(1)
+#define DAR_PWR_MODES_PMC_MODE BIT(0)
+
+/* RX_FRAME_FILTER bits */
+#define IAR_RX_FRAME_FLT_FRM_VER (0xC0)
+#define IAR_RX_FRAME_FLT_FRM_VER_SHIFT (6)
+#define IAR_RX_FRAME_FLT_ACTIVE_PROMISCUOUS BIT(5)
+#define IAR_RX_FRAME_FLT_NS_FT BIT(4)
+#define IAR_RX_FRAME_FLT_CMD_FT BIT(3)
+#define IAR_RX_FRAME_FLT_ACK_FT BIT(2)
+#define IAR_RX_FRAME_FLT_DATA_FT BIT(1)
+#define IAR_RX_FRAME_FLT_BEACON_FT BIT(0)
+
+/* DUAL_PAN_CTRL bits */
+#define IAR_DUAL_PAN_CTRL_DUAL_PAN_SAM_LVL_MSK (0xF0)
+#define IAR_DUAL_PAN_CTRL_DUAL_PAN_SAM_LVL_SHIFT (4)
+#define IAR_DUAL_PAN_CTRL_CURRENT_NETWORK BIT(3)
+#define IAR_DUAL_PAN_CTRL_PANCORDNTR1 BIT(2)
+#define IAR_DUAL_PAN_CTRL_DUAL_PAN_AUTO BIT(1)
+#define IAR_DUAL_PAN_CTRL_ACTIVE_NETWORK BIT(0)
+
+/* DUAL_PAN_STS bits */
+#define IAR_DUAL_PAN_STS_RECD_ON_PAN1 BIT(7)
+#define IAR_DUAL_PAN_STS_RECD_ON_PAN0 BIT(6)
+#define IAR_DUAL_PAN_STS_DUAL_PAN_REMAIN (0x3F)
+
+/* CCA_CTRL bits */
+#define IAR_CCA_CTRL_AGC_FRZ_EN BIT(6)
+#define IAR_CCA_CTRL_CONT_RSSI_EN BIT(5)
+#define IAR_CCA_CTRL_LQI_RSSI_NOT_CORR BIT(4)
+#define IAR_CCA_CTRL_CCA3_AND_NOT_OR BIT(3)
+#define IAR_CCA_CTRL_POWER_COMP_EN_LQI BIT(2)
+#define IAR_CCA_CTRL_POWER_COMP_EN_ED BIT(1)
+#define IAR_CCA_CTRL_POWER_COMP_EN_CCA1 BIT(0)
+
+/* ANT_PAD_CTRL bits */
+#define IAR_ANT_PAD_CTRL_ANTX_POL (0x0F)
+#define IAR_ANT_PAD_CTRL_ANTX_POL_SHIFT (4)
+#define IAR_ANT_PAD_CTRL_ANTX_CTRLMODE BIT(3)
+#define IAR_ANT_PAD_CTRL_ANTX_HZ BIT(2)
+#define IAR_ANT_PAD_CTRL_ANTX_EN (3)
+
+/* MISC_PAD_CTRL bits */
+#define IAR_MISC_PAD_CTRL_MISO_HIZ_EN BIT(3)
+#define IAR_MISC_PAD_CTRL_IRQ_B_OD BIT(2)
+#define IAR_MISC_PAD_CTRL_NON_GPIO_DS BIT(1)
+#define IAR_MISC_PAD_CTRL_ANTX_CURR (1)
+
+/* ANT_AGC_CTRL bits */
+#define IAR_ANT_AGC_CTRL_FAD_EN_SHIFT (0)
+#define IAR_ANT_AGC_CTRL_FAD_EN_MASK (1)
+#define IAR_ANT_AGC_CTRL_ANTX_SHIFT (1)
+#define IAR_ANT_AGC_CTRL_ANTX_MASK BIT(AR_ANT_AGC_CTRL_ANTX_SHIFT)
+
+/* BSM_CTRL bits */
+#define BSM_CTRL_BSM_EN (1)
+
+/* SOFT_RESET bits */
+#define IAR_SOFT_RESET_SOG_RST BIT(7)
+#define IAR_SOFT_RESET_REGS_RST BIT(4)
+#define IAR_SOFT_RESET_PLL_RST BIT(3)
+#define IAR_SOFT_RESET_TX_RST BIT(2)
+#define IAR_SOFT_RESET_RX_RST BIT(1)
+#define IAR_SOFT_RESET_SEQ_MGR_RST BIT(0)
+
+/* SEQ_MGR_CTRL bits */
+#define IAR_SEQ_MGR_CTRL_SEQ_STATE_CTRL (3)
+#define IAR_SEQ_MGR_CTRL_SEQ_STATE_CTRL_SHIFT (6)
+#define IAR_SEQ_MGR_CTRL_NO_RX_RECYCLE BIT(5)
+#define IAR_SEQ_MGR_CTRL_LATCH_PREAMBLE BIT(4)
+#define IAR_SEQ_MGR_CTRL_EVENT_TMR_DO_NOT_LATCH BIT(3)
+#define IAR_SEQ_MGR_CTRL_CLR_NEW_SEQ_INHIBIT BIT(2)
+#define IAR_SEQ_MGR_CTRL_PSM_LOCK_DIS BIT(1)
+#define IAR_SEQ_MGR_CTRL_PLL_ABORT_OVRD BIT(0)
+
+/* SEQ_MGR_STS bits */
+#define IAR_SEQ_MGR_STS_TMR2_SEQ_TRIG_ARMED BIT(7)
+#define IAR_SEQ_MGR_STS_RX_MODE BIT(6)
+#define IAR_SEQ_MGR_STS_RX_TIMEOUT_PENDING BIT(5)
+#define IAR_SEQ_MGR_STS_NEW_SEQ_INHIBIT BIT(4)
+#define IAR_SEQ_MGR_STS_SEQ_IDLE BIT(3)
+#define IAR_SEQ_MGR_STS_XCVSEQ_ACTUAL (7)
+
+/* ABORT_STS bits */
+#define IAR_ABORT_STS_PLL_ABORTED BIT(2)
+#define IAR_ABORT_STS_TC3_ABORTED BIT(1)
+#define IAR_ABORT_STS_SW_ABORTED BIT(0)
+
+/* IAR_FILTERFAIL_CODE2 bits */
+#define IAR_FILTERFAIL_CODE2_PAN_SEL BIT(7)
+#define IAR_FILTERFAIL_CODE2_9_8 (3)
+
+/* PHY_STS bits */
+#define IAR_PHY_STS_PLL_UNLOCK BIT(7)
+#define IAR_PHY_STS_PLL_LOCK_ERR BIT(6)
+#define IAR_PHY_STS_PLL_LOCK BIT(5)
+#define IAR_PHY_STS_CRCVALID BIT(3)
+#define IAR_PHY_STS_FILTERFAIL_FLAG_SEL BIT(2)
+#define IAR_PHY_STS_SFD_DET BIT(1)
+#define IAR_PHY_STS_PREAMBLE_DET BIT(0)
+
+/* TESTMODE_CTRL bits */
+#define IAR_TEST_MODE_CTRL_HOT_ANT BIT(4)
+#define IAR_TEST_MODE_CTRL_IDEAL_RSSI_EN BIT(3)
+#define IAR_TEST_MODE_CTRL_IDEAL_PFC_EN BIT(2)
+#define IAR_TEST_MODE_CTRL_CONTINUOUS_EN BIT(1)
+#define IAR_TEST_MODE_CTRL_FPGA_EN BIT(0)
+
+/* DTM_CTRL1 bits */
+#define IAR_DTM_CTRL1_ATM_LOCKED BIT(7)
+#define IAR_DTM_CTRL1_DTM_EN BIT(6)
+#define IAR_DTM_CTRL1_PAGE5 BIT(5)
+#define IAR_DTM_CTRL1_PAGE4 BIT(4)
+#define IAR_DTM_CTRL1_PAGE3 BIT(3)
+#define IAR_DTM_CTRL1_PAGE2 BIT(2)
+#define IAR_DTM_CTRL1_PAGE1 BIT(1)
+#define IAR_DTM_CTRL1_PAGE0 BIT(0)
+
+/* TX_MODE_CTRL */
+#define IAR_TX_MODE_CTRL_TX_INV BIT(4)
+#define IAR_TX_MODE_CTRL_BT_EN BIT(3)
+#define IAR_TX_MODE_CTRL_DTS2 BIT(2)
+#define IAR_TX_MODE_CTRL_DTS1 BIT(1)
+#define IAR_TX_MODE_CTRL_DTS0 BIT(0)
+
+#define TX_MODE_CTRL_DTS_MASK (7)
+
+#endif /* _MCR20A_H */
DECLARE_BITMAP(mac_filters, IPVLAN_MAC_FILTER_SIZE);
netdev_features_t sfeatures;
u32 msg_enable;
+ spinlock_t addrs_lock;
};
struct ipvl_addr {
}
EXPORT_SYMBOL_GPL(ipvlan_count_rx);
+#if IS_ENABLED(CONFIG_IPV6)
static u8 ipvlan_get_v6_hash(const void *iaddr)
{
const struct in6_addr *ip6_addr = iaddr;
return __ipv6_addr_jhash(ip6_addr, ipvlan_jhash_secret) &
IPVLAN_HASH_MASK;
}
+#else
+static u8 ipvlan_get_v6_hash(const void *iaddr)
+{
+ return 0;
+}
+#endif
static u8 ipvlan_get_v4_hash(const void *iaddr)
{
IPVLAN_HASH_MASK;
}
+static bool addr_equal(bool is_v6, struct ipvl_addr *addr, const void *iaddr)
+{
+ if (!is_v6 && addr->atype == IPVL_IPV4) {
+ struct in_addr *i4addr = (struct in_addr *)iaddr;
+
+ return addr->ip4addr.s_addr == i4addr->s_addr;
+#if IS_ENABLED(CONFIG_IPV6)
+ } else if (is_v6 && addr->atype == IPVL_IPV6) {
+ struct in6_addr *i6addr = (struct in6_addr *)iaddr;
+
+ return ipv6_addr_equal(&addr->ip6addr, i6addr);
+#endif
+ }
+
+ return false;
+}
+
static struct ipvl_addr *ipvlan_ht_addr_lookup(const struct ipvl_port *port,
const void *iaddr, bool is_v6)
{
hash = is_v6 ? ipvlan_get_v6_hash(iaddr) :
ipvlan_get_v4_hash(iaddr);
- hlist_for_each_entry_rcu(addr, &port->hlhead[hash], hlnode) {
- if (is_v6 && addr->atype == IPVL_IPV6 &&
- ipv6_addr_equal(&addr->ip6addr, iaddr))
- return addr;
- else if (!is_v6 && addr->atype == IPVL_IPV4 &&
- addr->ip4addr.s_addr ==
- ((struct in_addr *)iaddr)->s_addr)
+ hlist_for_each_entry_rcu(addr, &port->hlhead[hash], hlnode)
+ if (addr_equal(is_v6, addr, iaddr))
return addr;
- }
return NULL;
}
struct ipvl_addr *ipvlan_find_addr(const struct ipvl_dev *ipvlan,
const void *iaddr, bool is_v6)
{
- struct ipvl_addr *addr;
+ struct ipvl_addr *addr, *ret = NULL;
- list_for_each_entry(addr, &ipvlan->addrs, anode) {
- if ((is_v6 && addr->atype == IPVL_IPV6 &&
- ipv6_addr_equal(&addr->ip6addr, iaddr)) ||
- (!is_v6 && addr->atype == IPVL_IPV4 &&
- addr->ip4addr.s_addr == ((struct in_addr *)iaddr)->s_addr))
- return addr;
+ rcu_read_lock();
+ list_for_each_entry_rcu(addr, &ipvlan->addrs, anode) {
+ if (addr_equal(is_v6, addr, iaddr)) {
+ ret = addr;
+ break;
+ }
}
- return NULL;
+ rcu_read_unlock();
+ return ret;
}
bool ipvlan_addr_busy(struct ipvl_port *port, void *iaddr, bool is_v6)
{
struct ipvl_dev *ipvlan;
+ bool ret = false;
- ASSERT_RTNL();
-
- list_for_each_entry(ipvlan, &port->ipvlans, pnode) {
- if (ipvlan_find_addr(ipvlan, iaddr, is_v6))
- return true;
+ rcu_read_lock();
+ list_for_each_entry_rcu(ipvlan, &port->ipvlans, pnode) {
+ if (ipvlan_find_addr(ipvlan, iaddr, is_v6)) {
+ ret = true;
+ break;
+ }
}
- return false;
+ rcu_read_unlock();
+ return ret;
}
static void *ipvlan_get_L3_hdr(struct ipvl_port *port, struct sk_buff *skb, int *type)
lyr3h = ip4h;
break;
}
+#if IS_ENABLED(CONFIG_IPV6)
case htons(ETH_P_IPV6): {
struct ipv6hdr *ip6h;
}
break;
}
+#endif
default:
return NULL;
}
{
struct ipvl_addr *addr = NULL;
- if (addr_type == IPVL_IPV6) {
+ switch (addr_type) {
+#if IS_ENABLED(CONFIG_IPV6)
+ case IPVL_IPV6: {
struct ipv6hdr *ip6h;
struct in6_addr *i6addr;
ip6h = (struct ipv6hdr *)lyr3h;
i6addr = use_dest ? &ip6h->daddr : &ip6h->saddr;
addr = ipvlan_ht_addr_lookup(port, i6addr, true);
- } else if (addr_type == IPVL_ICMPV6) {
+ break;
+ }
+ case IPVL_ICMPV6: {
struct nd_msg *ndmh;
struct in6_addr *i6addr;
i6addr = &ndmh->target;
addr = ipvlan_ht_addr_lookup(port, i6addr, true);
}
- } else if (addr_type == IPVL_IPV4) {
+ break;
+ }
+#endif
+ case IPVL_IPV4: {
struct iphdr *ip4h;
__be32 *i4addr;
ip4h = (struct iphdr *)lyr3h;
i4addr = use_dest ? &ip4h->daddr : &ip4h->saddr;
addr = ipvlan_ht_addr_lookup(port, i4addr, false);
- } else if (addr_type == IPVL_ARP) {
+ break;
+ }
+ case IPVL_ARP: {
struct arphdr *arph;
unsigned char *arp_ptr;
__be32 dip;
memcpy(&dip, arp_ptr, 4);
addr = ipvlan_ht_addr_lookup(port, &dip, false);
+ break;
+ }
}
return addr;
return ret;
}
+#if IS_ENABLED(CONFIG_IPV6)
static int ipvlan_process_v6_outbound(struct sk_buff *skb)
{
const struct ipv6hdr *ip6h = ipv6_hdr(skb);
out:
return ret;
}
+#else
+static int ipvlan_process_v6_outbound(struct sk_buff *skb)
+{
+ return NET_XMIT_DROP;
+}
+#endif
static int ipvlan_process_outbound(struct sk_buff *skb)
{
/* In this mode we dont care about multicast and broadcast traffic */
if (is_multicast_ether_addr(ethh->h_dest)) {
- pr_warn_ratelimited("Dropped {multi|broad}cast of type= [%x]\n",
- ntohs(skb->protocol));
+ pr_debug_ratelimited("Dropped {multi|broad}cast of type=[%x]\n",
+ ntohs(skb->protocol));
kfree_skb(skb);
goto out;
}
goto out;
break;
}
+#if IS_ENABLED(CONFIG_IPV6)
case AF_INET6:
{
struct dst_entry *dst;
};
skb_dst_drop(skb);
- dst = ip6_route_input_lookup(dev_net(sdev), sdev, &fl6, flags);
+ dst = ip6_route_input_lookup(dev_net(sdev), sdev, &fl6,
+ skb, flags);
skb_dst_set(skb, dst);
break;
}
+#endif
default:
break;
}
.hooknum = NF_INET_LOCAL_IN,
.priority = INT_MAX,
},
+#if IS_ENABLED(CONFIG_IPV6)
{
.hook = ipvlan_nf_input,
.pf = NFPROTO_IPV6,
.hooknum = NF_INET_LOCAL_IN,
.priority = INT_MAX,
},
+#endif
};
static const struct l3mdev_ops ipvl_l3mdev_ops = {
dev->state = (dev->state & ~IPVLAN_STATE_MASK) |
(phy_dev->state & IPVLAN_STATE_MASK);
dev->features = phy_dev->features & IPVLAN_FEATURES;
- dev->features |= NETIF_F_LLTX;
+ dev->features |= NETIF_F_LLTX | NETIF_F_VLAN_CHALLENGED;
dev->gso_max_size = phy_dev->gso_max_size;
dev->gso_max_segs = phy_dev->gso_max_segs;
dev->hard_header_len = phy_dev->hard_header_len;
else
dev->flags &= ~IFF_NOARP;
- list_for_each_entry(addr, &ipvlan->addrs, anode)
+ rcu_read_lock();
+ list_for_each_entry_rcu(addr, &ipvlan->addrs, anode)
ipvlan_ht_addr_add(ipvlan, addr);
+ rcu_read_unlock();
return dev_uc_add(phy_dev, phy_dev->dev_addr);
}
dev_uc_del(phy_dev, phy_dev->dev_addr);
- list_for_each_entry(addr, &ipvlan->addrs, anode)
+ rcu_read_lock();
+ list_for_each_entry_rcu(addr, &ipvlan->addrs, anode)
ipvlan_ht_addr_del(addr);
+ rcu_read_unlock();
return 0;
}
ipvlan->sfeatures = IPVLAN_FEATURES;
ipvlan_adjust_mtu(ipvlan, phy_dev);
INIT_LIST_HEAD(&ipvlan->addrs);
+ spin_lock_init(&ipvlan->addrs_lock);
/* TODO Probably put random address here to be presented to the
* world but keep using the physical-dev address for the outgoing
struct ipvl_dev *ipvlan = netdev_priv(dev);
struct ipvl_addr *addr, *next;
+ spin_lock_bh(&ipvlan->addrs_lock);
list_for_each_entry_safe(addr, next, &ipvlan->addrs, anode) {
ipvlan_ht_addr_del(addr);
- list_del(&addr->anode);
+ list_del_rcu(&addr->anode);
kfree_rcu(addr, rcu);
}
+ spin_unlock_bh(&ipvlan->addrs_lock);
ida_simple_remove(&ipvlan->port->ida, dev->dev_id);
list_del_rcu(&ipvlan->pnode);
if (dev->reg_state != NETREG_UNREGISTERING)
break;
- list_for_each_entry_safe(ipvlan, next, &port->ipvlans,
- pnode)
+ list_for_each_entry_safe(ipvlan, next, &port->ipvlans, pnode)
ipvlan->dev->rtnl_link_ops->dellink(ipvlan->dev,
&lst_kill);
unregister_netdevice_many(&lst_kill);
return NOTIFY_DONE;
}
+/* the caller must held the addrs lock */
static int ipvlan_add_addr(struct ipvl_dev *ipvlan, void *iaddr, bool is_v6)
{
struct ipvl_addr *addr;
return -ENOMEM;
addr->master = ipvlan;
- if (is_v6) {
- memcpy(&addr->ip6addr, iaddr, sizeof(struct in6_addr));
- addr->atype = IPVL_IPV6;
- } else {
+ if (!is_v6) {
memcpy(&addr->ip4addr, iaddr, sizeof(struct in_addr));
addr->atype = IPVL_IPV4;
+#if IS_ENABLED(CONFIG_IPV6)
+ } else {
+ memcpy(&addr->ip6addr, iaddr, sizeof(struct in6_addr));
+ addr->atype = IPVL_IPV6;
+#endif
}
- list_add_tail(&addr->anode, &ipvlan->addrs);
+
+ list_add_tail_rcu(&addr->anode, &ipvlan->addrs);
/* If the interface is not up, the address will be added to the hash
* list by ipvlan_open.
{
struct ipvl_addr *addr;
+ spin_lock_bh(&ipvlan->addrs_lock);
addr = ipvlan_find_addr(ipvlan, iaddr, is_v6);
- if (!addr)
+ if (!addr) {
+ spin_unlock_bh(&ipvlan->addrs_lock);
return;
+ }
ipvlan_ht_addr_del(addr);
- list_del(&addr->anode);
+ list_del_rcu(&addr->anode);
+ spin_unlock_bh(&ipvlan->addrs_lock);
kfree_rcu(addr, rcu);
-
- return;
-}
-
-static int ipvlan_add_addr6(struct ipvl_dev *ipvlan, struct in6_addr *ip6_addr)
-{
- if (ipvlan_addr_busy(ipvlan->port, ip6_addr, true)) {
- netif_err(ipvlan, ifup, ipvlan->dev,
- "Failed to add IPv6=%pI6c addr for %s intf\n",
- ip6_addr, ipvlan->dev->name);
- return -EINVAL;
- }
-
- return ipvlan_add_addr(ipvlan, ip6_addr, true);
-}
-
-static void ipvlan_del_addr6(struct ipvl_dev *ipvlan, struct in6_addr *ip6_addr)
-{
- return ipvlan_del_addr(ipvlan, ip6_addr, true);
}
static bool ipvlan_is_valid_dev(const struct net_device *dev)
return true;
}
+#if IS_ENABLED(CONFIG_IPV6)
+static int ipvlan_add_addr6(struct ipvl_dev *ipvlan, struct in6_addr *ip6_addr)
+{
+ int ret = -EINVAL;
+
+ spin_lock_bh(&ipvlan->addrs_lock);
+ if (ipvlan_addr_busy(ipvlan->port, ip6_addr, true))
+ netif_err(ipvlan, ifup, ipvlan->dev,
+ "Failed to add IPv6=%pI6c addr for %s intf\n",
+ ip6_addr, ipvlan->dev->name);
+ else
+ ret = ipvlan_add_addr(ipvlan, ip6_addr, true);
+ spin_unlock_bh(&ipvlan->addrs_lock);
+ return ret;
+}
+
+static void ipvlan_del_addr6(struct ipvl_dev *ipvlan, struct in6_addr *ip6_addr)
+{
+ return ipvlan_del_addr(ipvlan, ip6_addr, true);
+}
+
static int ipvlan_addr6_event(struct notifier_block *unused,
unsigned long event, void *ptr)
{
struct net_device *dev = (struct net_device *)i6vi->i6vi_dev->dev;
struct ipvl_dev *ipvlan = netdev_priv(dev);
- /* FIXME IPv6 autoconf calls us from bh without RTNL */
- if (in_softirq())
- return NOTIFY_DONE;
-
if (!ipvlan_is_valid_dev(dev))
return NOTIFY_DONE;
return NOTIFY_OK;
}
+#endif
static int ipvlan_add_addr4(struct ipvl_dev *ipvlan, struct in_addr *ip4_addr)
{
- if (ipvlan_addr_busy(ipvlan->port, ip4_addr, false)) {
+ int ret = -EINVAL;
+
+ spin_lock_bh(&ipvlan->addrs_lock);
+ if (ipvlan_addr_busy(ipvlan->port, ip4_addr, false))
netif_err(ipvlan, ifup, ipvlan->dev,
"Failed to add IPv4=%pI4 on %s intf.\n",
ip4_addr, ipvlan->dev->name);
- return -EINVAL;
- }
-
- return ipvlan_add_addr(ipvlan, ip4_addr, false);
+ else
+ ret = ipvlan_add_addr(ipvlan, ip4_addr, false);
+ spin_unlock_bh(&ipvlan->addrs_lock);
+ return ret;
}
static void ipvlan_del_addr4(struct ipvl_dev *ipvlan, struct in_addr *ip4_addr)
.notifier_call = ipvlan_device_event,
};
+#if IS_ENABLED(CONFIG_IPV6)
static struct notifier_block ipvlan_addr6_notifier_block __read_mostly = {
.notifier_call = ipvlan_addr6_event,
};
static struct notifier_block ipvlan_addr6_vtor_notifier_block __read_mostly = {
.notifier_call = ipvlan_addr6_validator_event,
};
+#endif
static void ipvlan_ns_exit(struct net *net)
{
.id = &ipvlan_netid,
.size = sizeof(struct ipvlan_netns),
.exit = ipvlan_ns_exit,
+ .async = true,
};
static int __init ipvlan_init_module(void)
ipvlan_init_secret();
register_netdevice_notifier(&ipvlan_notifier_block);
+#if IS_ENABLED(CONFIG_IPV6)
register_inet6addr_notifier(&ipvlan_addr6_notifier_block);
register_inet6addr_validator_notifier(
&ipvlan_addr6_vtor_notifier_block);
+#endif
register_inetaddr_notifier(&ipvlan_addr4_notifier_block);
register_inetaddr_validator_notifier(&ipvlan_addr4_vtor_notifier_block);
unregister_inetaddr_notifier(&ipvlan_addr4_notifier_block);
unregister_inetaddr_validator_notifier(
&ipvlan_addr4_vtor_notifier_block);
+#if IS_ENABLED(CONFIG_IPV6)
unregister_inet6addr_notifier(&ipvlan_addr6_notifier_block);
unregister_inet6addr_validator_notifier(
&ipvlan_addr6_vtor_notifier_block);
+#endif
unregister_netdevice_notifier(&ipvlan_notifier_block);
return err;
}
unregister_inetaddr_notifier(&ipvlan_addr4_notifier_block);
unregister_inetaddr_validator_notifier(
&ipvlan_addr4_vtor_notifier_block);
+#if IS_ENABLED(CONFIG_IPV6)
unregister_inet6addr_notifier(&ipvlan_addr6_notifier_block);
unregister_inet6addr_validator_notifier(
&ipvlan_addr6_vtor_notifier_block);
+#endif
}
module_init(ipvlan_init_module);
/* Registered in net/core/dev.c */
struct pernet_operations __net_initdata loopback_net_ops = {
.init = loopback_net_init,
+ .async = true,
};
/* the macvlan port may be freed by macvlan_uninit when fail to register.
* so we destroy the macvlan port only when it's valid.
*/
- if (create && macvlan_port_get_rtnl(dev))
+ if (create && macvlan_port_get_rtnl(lowerdev))
macvlan_port_destroy(port->dev);
return err;
}
return 0;
}
-static int aquantia_aneg_done(struct phy_device *phydev)
-{
- int reg;
-
- reg = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_STAT1);
- return (reg < 0) ? reg : (reg & BMSR_ANEGCOMPLETE);
-}
-
static int aquantia_config_intr(struct phy_device *phydev)
{
int err;
.name = "Aquantia AQ1202",
.features = PHY_AQUANTIA_FEATURES,
.flags = PHY_HAS_INTERRUPT,
- .aneg_done = aquantia_aneg_done,
+ .aneg_done = genphy_c45_aneg_done,
.config_aneg = aquantia_config_aneg,
.config_intr = aquantia_config_intr,
.ack_interrupt = aquantia_ack_interrupt,
.name = "Aquantia AQ2104",
.features = PHY_AQUANTIA_FEATURES,
.flags = PHY_HAS_INTERRUPT,
- .aneg_done = aquantia_aneg_done,
+ .aneg_done = genphy_c45_aneg_done,
.config_aneg = aquantia_config_aneg,
.config_intr = aquantia_config_intr,
.ack_interrupt = aquantia_ack_interrupt,
.name = "Aquantia AQR105",
.features = PHY_AQUANTIA_FEATURES,
.flags = PHY_HAS_INTERRUPT,
- .aneg_done = aquantia_aneg_done,
+ .aneg_done = genphy_c45_aneg_done,
.config_aneg = aquantia_config_aneg,
.config_intr = aquantia_config_intr,
.ack_interrupt = aquantia_ack_interrupt,
.name = "Aquantia AQR106",
.features = PHY_AQUANTIA_FEATURES,
.flags = PHY_HAS_INTERRUPT,
- .aneg_done = aquantia_aneg_done,
+ .aneg_done = genphy_c45_aneg_done,
.config_aneg = aquantia_config_aneg,
.config_intr = aquantia_config_intr,
.ack_interrupt = aquantia_ack_interrupt,
.name = "Aquantia AQR107",
.features = PHY_AQUANTIA_FEATURES,
.flags = PHY_HAS_INTERRUPT,
- .aneg_done = aquantia_aneg_done,
+ .aneg_done = genphy_c45_aneg_done,
.config_aneg = aquantia_config_aneg,
.config_intr = aquantia_config_intr,
.ack_interrupt = aquantia_ack_interrupt,
.name = "Aquantia AQR405",
.features = PHY_AQUANTIA_FEATURES,
.flags = PHY_HAS_INTERRUPT,
- .aneg_done = aquantia_aneg_done,
+ .aneg_done = genphy_c45_aneg_done,
.config_aneg = aquantia_config_aneg,
.config_intr = aquantia_config_intr,
.ack_interrupt = aquantia_ack_interrupt,
if (ret)
return ret;
- /* Disable EEE advertisment since this prevents the PHY
+ /* Disable EEE advertisement since this prevents the PHY
* from successfully linking up, trigger auto-negotiation restart
* to let the MAC decide what to do.
*/
MII_ADDR_C45 | regnum);
}
-static int cortina_config_aneg(struct phy_device *phydev)
-{
- phydev->supported = SUPPORTED_10000baseT_Full;
- phydev->advertising = SUPPORTED_10000baseT_Full;
-
- return 0;
-}
-
static int cortina_read_status(struct phy_device *phydev)
{
int gpio_int_status, ret = 0;
return ret;
}
-static int cortina_soft_reset(struct phy_device *phydev)
-{
- return 0;
-}
-
static int cortina_probe(struct phy_device *phydev)
{
u32 phy_id = 0;
.phy_id = PHY_ID_CS4340,
.phy_id_mask = 0xffffffff,
.name = "Cortina CS4340",
- .config_aneg = cortina_config_aneg,
+ .config_init = gen10g_config_init,
+ .config_aneg = gen10g_config_aneg,
.read_status = cortina_read_status,
- .soft_reset = cortina_soft_reset,
+ .soft_reset = gen10g_no_soft_reset,
.probe = cortina_probe,
},
};
#define DP83867_IO_MUX_CFG_IO_IMPEDANCE_MAX 0x0
#define DP83867_IO_MUX_CFG_IO_IMPEDANCE_MIN 0x1f
+#define DP83867_IO_MUX_CFG_CLK_O_SEL_MASK (0x1f << 8)
+#define DP83867_IO_MUX_CFG_CLK_O_SEL_SHIFT 8
/* CFG4 bits */
#define DP83867_CFG4_PORT_MIRROR_EN BIT(0)
int io_impedance;
int port_mirroring;
bool rxctrl_strap_quirk;
+ int clk_output_sel;
};
static int dp83867_ack_interrupt(struct phy_device *phydev)
dp83867->io_impedance = -EINVAL;
/* Optional configuration */
+ ret = of_property_read_u32(of_node, "ti,clk-output-sel",
+ &dp83867->clk_output_sel);
+ if (ret || dp83867->clk_output_sel > DP83867_CLK_O_SEL_REF_CLK)
+ /* Keep the default value if ti,clk-output-sel is not set
+ * or too high
+ */
+ dp83867->clk_output_sel = DP83867_CLK_O_SEL_REF_CLK;
+
if (of_property_read_bool(of_node, "ti,max-output-impedance"))
dp83867->io_impedance = DP83867_IO_MUX_CFG_IO_IMPEDANCE_MAX;
else if (of_property_read_bool(of_node, "ti,min-output-impedance"))
if (dp83867->port_mirroring != DP83867_PORT_MIRROING_KEEP)
dp83867_config_port_mirroring(phydev);
+ /* Clock output selection if muxing property is set */
+ if (dp83867->clk_output_sel != DP83867_CLK_O_SEL_REF_CLK) {
+ val = phy_read_mmd(phydev, DP83867_DEVADDR, DP83867_IO_MUX_CFG);
+ val &= ~DP83867_IO_MUX_CFG_CLK_O_SEL_MASK;
+ val |= (dp83867->clk_output_sel << DP83867_IO_MUX_CFG_CLK_O_SEL_SHIFT);
+ phy_write_mmd(phydev, DP83867_DEVADDR, DP83867_IO_MUX_CFG, val);
+ }
+
return 0;
}
return err;
/* There appears to be a bug in the 88e1512 when used in
- * SGMII to copper mode, where the AN advertisment register
+ * SGMII to copper mode, where the AN advertisement register
* clears the pause bits each time a negotiation occurs.
* This means we can never be truely sure what was advertised,
* so disable Pause support.
return 0;
}
-/*
- * Resetting the MV88X3310 causes it to become non-responsive. Avoid
- * setting the reset bit(s).
- */
-static int mv3310_soft_reset(struct phy_device *phydev)
-{
- return 0;
-}
-
static int mv3310_config_init(struct phy_device *phydev)
{
__ETHTOOL_DECLARE_LINK_MODE_MASK(supported) = { 0, };
if (val < 0)
return val;
- /* Read the link partner's 1G advertisment */
+ /* Read the link partner's 1G advertisement */
val = phy_read_mmd(phydev, MDIO_MMD_AN, MV_AN_STAT1000);
if (val < 0)
return val;
SUPPORTED_10000baseT_Full |
SUPPORTED_Backplane,
.probe = mv3310_probe,
- .soft_reset = mv3310_soft_reset,
+ .soft_reset = gen10g_no_soft_reset,
.config_init = mv3310_config_init,
.config_aneg = mv3310_config_aneg,
.aneg_done = mv3310_aneg_done,
EXPORT_SYMBOL_GPL(genphy_c45_read_link);
/**
- * genphy_c45_read_lpa - read the link partner advertisment and pause
+ * genphy_c45_read_lpa - read the link partner advertisement and pause
* @phydev: target phy_device struct
*
* Read the Clause 45 defined base (7.19) and 10G (7.33) status registers,
- * filling in the link partner advertisment, pause and asym_pause members
+ * filling in the link partner advertisement, pause and asym_pause members
* in @phydev. This assumes that the auto-negotiation MMD is present, and
* the backplane bit (7.48.0) is clear. Clause 45 PHY drivers are expected
* to fill in the remainder of the link partner advert from vendor registers.
{
int val;
- /* Read the link partner's base page advertisment */
+ /* Read the link partner's base page advertisement */
val = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_AN_LPA);
if (val < 0)
return val;
phydev->pause = val & LPA_PAUSE_CAP ? 1 : 0;
phydev->asym_pause = val & LPA_PAUSE_ASYM ? 1 : 0;
- /* Read the link partner's 10G advertisment */
+ /* Read the link partner's 10G advertisement */
val = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_AN_10GBT_STAT);
if (val < 0)
return val;
/* The gen10g_* functions are the old Clause 45 stub */
-static int gen10g_config_aneg(struct phy_device *phydev)
+int gen10g_config_aneg(struct phy_device *phydev)
{
return 0;
}
+EXPORT_SYMBOL_GPL(gen10g_config_aneg);
-static int gen10g_read_status(struct phy_device *phydev)
+int gen10g_read_status(struct phy_device *phydev)
{
u32 mmd_mask = phydev->c45_ids.devices_in_package;
int ret;
return 0;
}
+EXPORT_SYMBOL_GPL(gen10g_read_status);
-static int gen10g_soft_reset(struct phy_device *phydev)
+int gen10g_no_soft_reset(struct phy_device *phydev)
{
/* Do nothing for now */
return 0;
}
+EXPORT_SYMBOL_GPL(gen10g_no_soft_reset);
-static int gen10g_config_init(struct phy_device *phydev)
+int gen10g_config_init(struct phy_device *phydev)
{
/* Temporarily just say we support everything */
phydev->supported = SUPPORTED_10000baseT_Full;
return 0;
}
+EXPORT_SYMBOL_GPL(gen10g_config_init);
-static int gen10g_suspend(struct phy_device *phydev)
+int gen10g_suspend(struct phy_device *phydev)
{
return 0;
}
+EXPORT_SYMBOL_GPL(gen10g_suspend);
-static int gen10g_resume(struct phy_device *phydev)
+int gen10g_resume(struct phy_device *phydev)
{
return 0;
}
+EXPORT_SYMBOL_GPL(gen10g_resume);
struct phy_driver genphy_10g_driver = {
.phy_id = 0xffffffff,
.phy_id_mask = 0xffffffff,
.name = "Generic 10G PHY",
- .soft_reset = gen10g_soft_reset,
+ .soft_reset = gen10g_no_soft_reset,
.config_init = gen10g_config_init,
.features = 0,
.config_aneg = gen10g_config_aneg,
}
/**
- * phy_resolve_aneg_linkmode - resolve the advertisments into phy settings
+ * phy_resolve_aneg_linkmode - resolve the advertisements into phy settings
* @phydev: The phy_device struct
*
- * Resolve our and the link partner advertisments into their corresponding
+ * Resolve our and the link partner advertisements into their corresponding
* speed and duplex. If full duplex was negotiated, extract the pause mode
* from the link partner mask.
*/
ctl |= BMCR_FULLDPLX;
return phy_modify(phydev, MII_BMCR,
- BMCR_LOOPBACK | BMCR_ISOLATE | BMCR_PDOWN, ctl);
+ ~(BMCR_LOOPBACK | BMCR_ISOLATE | BMCR_PDOWN), ctl);
}
EXPORT_SYMBOL(genphy_setup_forced);
}
/* Flow control is resolved according to our and the link partners
- * advertisments using the following drawn from the 802.3 specs:
+ * advertisements using the following drawn from the 802.3 specs:
* Local device Link partner
* Pause AsymDir Pause AsymDir Result
* 1 X 1 X TX+RX
mutex_lock(&phy->lock);
mutex_lock(&pl->state_mutex);
- pl->netdev->phydev = phy;
pl->phydev = phy;
linkmode_copy(pl->supported, supported);
linkmode_copy(pl->link_config.advertising, config.advertising);
- /* Restrict the phy advertisment according to the MAC support. */
+ /* Restrict the phy advertisement according to the MAC support. */
ethtool_convert_link_mode_to_legacy_u32(&advertising, config.advertising);
phy->advertising = advertising;
mutex_unlock(&pl->state_mutex);
if (phy) {
mutex_lock(&phy->lock);
mutex_lock(&pl->state_mutex);
- pl->netdev->phydev = NULL;
pl->phydev = NULL;
mutex_unlock(&pl->state_mutex);
mutex_unlock(&phy->lock);
/* Apply the link configuration to the MAC when starting. This allows
* a fixed-link to start with the correct parameters, and also
- * ensures that we set the appropriate advertisment for Serdes links.
+ * ensures that we set the appropriate advertisement for Serdes links.
*/
phylink_resolve_flow(pl, &pl->link_config);
phylink_mac_config(pl, &pl->link_config);
config = pl->link_config;
- /* Mask out unsupported advertisments */
+ /* Mask out unsupported advertisements */
linkmode_and(config.advertising, kset->link_modes.advertising,
pl->supported);
if (phylink_validate(pl, pl->supported, &config))
return -EINVAL;
- /* If autonegotiation is enabled, we must have an advertisment */
+ /* If autonegotiation is enabled, we must have an advertisement */
if (config.an_enabled && phylink_is_empty_linkmode(config.advertising))
return -EINVAL;
bool changed;
u8 port;
- sfp_parse_support(pl->sfp_bus, id, support);
- port = sfp_parse_port(pl->sfp_bus, id, support);
- iface = sfp_parse_interface(pl->sfp_bus, id);
-
ASSERT_RTNL();
- switch (iface) {
- case PHY_INTERFACE_MODE_SGMII:
- case PHY_INTERFACE_MODE_1000BASEX:
- case PHY_INTERFACE_MODE_2500BASEX:
- case PHY_INTERFACE_MODE_10GKR:
- break;
- default:
- return -EINVAL;
- }
+ sfp_parse_support(pl->sfp_bus, id, support);
+ port = sfp_parse_port(pl->sfp_bus, id, support);
memset(&config, 0, sizeof(config));
linkmode_copy(config.advertising, support);
- config.interface = iface;
+ config.interface = PHY_INTERFACE_MODE_NA;
config.speed = SPEED_UNKNOWN;
config.duplex = DUPLEX_UNKNOWN;
config.pause = MLO_PAUSE_AN;
/* Ignore errors if we're expecting a PHY to attach later */
ret = phylink_validate(pl, support, &config);
+ if (ret) {
+ netdev_err(pl->netdev, "validation with support %*pb failed: %d\n",
+ __ETHTOOL_LINK_MODE_MASK_NBITS, support, ret);
+ return ret;
+ }
+
+ iface = sfp_select_interface(pl->sfp_bus, id, config.advertising);
+ if (iface == PHY_INTERFACE_MODE_NA) {
+ netdev_err(pl->netdev,
+ "selection of interface failed, advertisement %*pb\n",
+ __ETHTOOL_LINK_MODE_MASK_NBITS, config.advertising);
+ return -EINVAL;
+ }
+
+ config.interface = iface;
+ ret = phylink_validate(pl, support, &config);
if (ret) {
netdev_err(pl->netdev, "validation of %s/%s with support %*pb failed: %d\n",
phylink_an_mode_str(MLO_AN_INBAND),
}
EXPORT_SYMBOL_GPL(sfp_parse_port);
-/**
- * sfp_parse_interface() - Parse the phy_interface_t
- * @bus: a pointer to the &struct sfp_bus structure for the sfp module
- * @id: a pointer to the module's &struct sfp_eeprom_id
- *
- * Derive the phy_interface_t mode for the information found in the
- * module's identifying EEPROM. There is no standard or defined way
- * to derive this information, so we use some heuristics.
- *
- * If the encoding is 64b66b, then the module must be >= 10G, so
- * return %PHY_INTERFACE_MODE_10GKR.
- *
- * If it's 8b10b, then it's 1G or slower. If it's definitely a fibre
- * module, return %PHY_INTERFACE_MODE_1000BASEX mode, otherwise return
- * %PHY_INTERFACE_MODE_SGMII mode.
- *
- * If the encoding is not known, return %PHY_INTERFACE_MODE_NA.
- */
-phy_interface_t sfp_parse_interface(struct sfp_bus *bus,
- const struct sfp_eeprom_id *id)
-{
- phy_interface_t iface;
-
- /* Setting the serdes link mode is guesswork: there's no field in
- * the EEPROM which indicates what mode should be used.
- *
- * If the module wants 64b66b, then it must be >= 10G.
- *
- * If it's a gigabit-only fiber module, it probably does not have
- * a PHY, so switch to 802.3z negotiation mode. Otherwise, switch
- * to SGMII mode (which is required to support non-gigabit speeds).
- */
- switch (id->base.encoding) {
- case SFP_ENCODING_8472_64B66B:
- iface = PHY_INTERFACE_MODE_10GKR;
- break;
-
- case SFP_ENCODING_8B10B:
- if (!id->base.e1000_base_t &&
- !id->base.e100_base_lx &&
- !id->base.e100_base_fx)
- iface = PHY_INTERFACE_MODE_1000BASEX;
- else
- iface = PHY_INTERFACE_MODE_SGMII;
- break;
-
- default:
- if (id->base.e1000_base_cx) {
- iface = PHY_INTERFACE_MODE_1000BASEX;
- break;
- }
-
- iface = PHY_INTERFACE_MODE_NA;
- dev_err(bus->sfp_dev,
- "SFP module encoding does not support 8b10b nor 64b66b\n");
- break;
- }
-
- return iface;
-}
-EXPORT_SYMBOL_GPL(sfp_parse_interface);
-
/**
* sfp_parse_support() - Parse the eeprom id for supported link modes
* @bus: a pointer to the &struct sfp_bus structure for the sfp module
unsigned long *support)
{
unsigned int br_min, br_nom, br_max;
-
- phylink_set(support, Autoneg);
- phylink_set(support, Pause);
- phylink_set(support, Asym_Pause);
+ __ETHTOOL_DECLARE_LINK_MODE_MASK(modes) = { 0, };
/* Decode the bitrate information to MBd */
br_min = br_nom = br_max = 0;
/* Set ethtool support from the compliance fields. */
if (id->base.e10g_base_sr)
- phylink_set(support, 10000baseSR_Full);
+ phylink_set(modes, 10000baseSR_Full);
if (id->base.e10g_base_lr)
- phylink_set(support, 10000baseLR_Full);
+ phylink_set(modes, 10000baseLR_Full);
if (id->base.e10g_base_lrm)
- phylink_set(support, 10000baseLRM_Full);
+ phylink_set(modes, 10000baseLRM_Full);
if (id->base.e10g_base_er)
- phylink_set(support, 10000baseER_Full);
+ phylink_set(modes, 10000baseER_Full);
if (id->base.e1000_base_sx ||
id->base.e1000_base_lx ||
id->base.e1000_base_cx)
- phylink_set(support, 1000baseX_Full);
+ phylink_set(modes, 1000baseX_Full);
if (id->base.e1000_base_t) {
- phylink_set(support, 1000baseT_Half);
- phylink_set(support, 1000baseT_Full);
+ phylink_set(modes, 1000baseT_Half);
+ phylink_set(modes, 1000baseT_Full);
}
/* 1000Base-PX or 1000Base-BX10 */
if ((id->base.sfp_ct_passive || id->base.sfp_ct_active) && br_nom) {
/* This may look odd, but some manufacturers use 12000MBd */
if (br_min <= 12000 && br_max >= 10300)
- phylink_set(support, 10000baseCR_Full);
+ phylink_set(modes, 10000baseCR_Full);
if (br_min <= 3200 && br_max >= 3100)
- phylink_set(support, 2500baseX_Full);
+ phylink_set(modes, 2500baseX_Full);
if (br_min <= 1300 && br_max >= 1200)
- phylink_set(support, 1000baseX_Full);
+ phylink_set(modes, 1000baseX_Full);
}
if (id->base.sfp_ct_passive) {
if (id->base.passive.sff8431_app_e)
- phylink_set(support, 10000baseCR_Full);
+ phylink_set(modes, 10000baseCR_Full);
}
if (id->base.sfp_ct_active) {
if (id->base.active.sff8431_app_e ||
id->base.active.sff8431_lim) {
- phylink_set(support, 10000baseCR_Full);
+ phylink_set(modes, 10000baseCR_Full);
}
}
case 0x00: /* Unspecified */
break;
case 0x02: /* 100Gbase-SR4 or 25Gbase-SR */
- phylink_set(support, 100000baseSR4_Full);
- phylink_set(support, 25000baseSR_Full);
+ phylink_set(modes, 100000baseSR4_Full);
+ phylink_set(modes, 25000baseSR_Full);
break;
case 0x03: /* 100Gbase-LR4 or 25Gbase-LR */
case 0x04: /* 100Gbase-ER4 or 25Gbase-ER */
- phylink_set(support, 100000baseLR4_ER4_Full);
+ phylink_set(modes, 100000baseLR4_ER4_Full);
break;
case 0x0b: /* 100Gbase-CR4 or 25Gbase-CR CA-L */
case 0x0c: /* 25Gbase-CR CA-S */
case 0x0d: /* 25Gbase-CR CA-N */
- phylink_set(support, 100000baseCR4_Full);
- phylink_set(support, 25000baseCR_Full);
+ phylink_set(modes, 100000baseCR4_Full);
+ phylink_set(modes, 25000baseCR_Full);
break;
default:
dev_warn(bus->sfp_dev,
id->base.fc_speed_200 ||
id->base.fc_speed_400) {
if (id->base.br_nominal >= 31)
- phylink_set(support, 2500baseX_Full);
+ phylink_set(modes, 2500baseX_Full);
if (id->base.br_nominal >= 12)
- phylink_set(support, 1000baseX_Full);
+ phylink_set(modes, 1000baseX_Full);
}
+
+ /* If we haven't discovered any modes that this module supports, try
+ * the encoding and bitrate to determine supported modes. Some BiDi
+ * modules (eg, 1310nm/1550nm) are not 1000BASE-BX compliant due to
+ * the differing wavelengths, so do not set any transceiver bits.
+ */
+ if (bitmap_empty(modes, __ETHTOOL_LINK_MODE_MASK_NBITS)) {
+ /* If the encoding and bit rate allows 1000baseX */
+ if (id->base.encoding == SFP_ENCODING_8B10B && br_nom &&
+ br_min <= 1300 && br_max >= 1200)
+ phylink_set(modes, 1000baseX_Full);
+ }
+
+ bitmap_or(support, support, modes, __ETHTOOL_LINK_MODE_MASK_NBITS);
+
+ phylink_set(support, Autoneg);
+ phylink_set(support, Pause);
+ phylink_set(support, Asym_Pause);
}
EXPORT_SYMBOL_GPL(sfp_parse_support);
+/**
+ * sfp_select_interface() - Select appropriate phy_interface_t mode
+ * @bus: a pointer to the &struct sfp_bus structure for the sfp module
+ * @id: a pointer to the module's &struct sfp_eeprom_id
+ * @link_modes: ethtool link modes mask
+ *
+ * Derive the phy_interface_t mode for the information found in the
+ * module's identifying EEPROM and the link modes mask. There is no
+ * standard or defined way to derive this information, so we decide
+ * based upon the link mode mask.
+ */
+phy_interface_t sfp_select_interface(struct sfp_bus *bus,
+ const struct sfp_eeprom_id *id,
+ unsigned long *link_modes)
+{
+ if (phylink_test(link_modes, 10000baseCR_Full) ||
+ phylink_test(link_modes, 10000baseSR_Full) ||
+ phylink_test(link_modes, 10000baseLR_Full) ||
+ phylink_test(link_modes, 10000baseLRM_Full) ||
+ phylink_test(link_modes, 10000baseER_Full))
+ return PHY_INTERFACE_MODE_10GKR;
+
+ if (phylink_test(link_modes, 2500baseX_Full))
+ return PHY_INTERFACE_MODE_2500BASEX;
+
+ if (id->base.e1000_base_t ||
+ id->base.e100_base_lx ||
+ id->base.e100_base_fx)
+ return PHY_INTERFACE_MODE_SGMII;
+
+ if (phylink_test(link_modes, 1000baseX_Full))
+ return PHY_INTERFACE_MODE_1000BASEX;
+
+ dev_warn(bus->sfp_dev, "Unable to ascertain link mode\n");
+
+ return PHY_INTERFACE_MODE_NA;
+}
+EXPORT_SYMBOL_GPL(sfp_select_interface);
+
static LIST_HEAD(sfp_buses);
static DEFINE_MUTEX(sfp_mutex);
SFP_MOD_EMPTY = 0,
SFP_MOD_PROBE,
+ SFP_MOD_HPOWER,
SFP_MOD_PRESENT,
SFP_MOD_ERROR,
* access the I2C EEPROM. However, Avago modules require 300ms.
*/
#define T_PROBE_INIT msecs_to_jiffies(300)
+#define T_HPOWER_LEVEL msecs_to_jiffies(300)
#define T_PROBE_RETRY msecs_to_jiffies(100)
/* SFP modules appear to always have their PHY configured for bus address
struct sfp_bus *sfp_bus;
struct phy_device *mod_phy;
const struct sff_data *type;
+ u32 max_power_mW;
unsigned int (*get_state)(struct sfp *);
void (*set_state)(struct sfp *, unsigned int);
int (*read)(struct sfp *, bool, u8, void *, size_t);
+ int (*write)(struct sfp *, bool, u8, void *, size_t);
struct gpio_desc *gpio[GPIO_MAX];
}
}
-static int sfp__i2c_read(struct i2c_adapter *i2c, u8 bus_addr, u8 dev_addr,
- void *buf, size_t len)
+static int sfp_i2c_read(struct sfp *sfp, bool a2, u8 dev_addr, void *buf,
+ size_t len)
{
struct i2c_msg msgs[2];
+ u8 bus_addr = a2 ? 0x51 : 0x50;
int ret;
msgs[0].addr = bus_addr;
msgs[1].len = len;
msgs[1].buf = buf;
- ret = i2c_transfer(i2c, msgs, ARRAY_SIZE(msgs));
+ ret = i2c_transfer(sfp->i2c, msgs, ARRAY_SIZE(msgs));
if (ret < 0)
return ret;
return ret == ARRAY_SIZE(msgs) ? len : 0;
}
-static int sfp_i2c_read(struct sfp *sfp, bool a2, u8 addr, void *buf,
- size_t len)
+static int sfp_i2c_write(struct sfp *sfp, bool a2, u8 dev_addr, void *buf,
+ size_t len)
{
- return sfp__i2c_read(sfp->i2c, a2 ? 0x51 : 0x50, addr, buf, len);
+ struct i2c_msg msgs[1];
+ u8 bus_addr = a2 ? 0x51 : 0x50;
+ int ret;
+
+ msgs[0].addr = bus_addr;
+ msgs[0].flags = 0;
+ msgs[0].len = 1 + len;
+ msgs[0].buf = kmalloc(1 + len, GFP_KERNEL);
+ if (!msgs[0].buf)
+ return -ENOMEM;
+
+ msgs[0].buf[0] = dev_addr;
+ memcpy(&msgs[0].buf[1], buf, len);
+
+ ret = i2c_transfer(sfp->i2c, msgs, ARRAY_SIZE(msgs));
+
+ kfree(msgs[0].buf);
+
+ if (ret < 0)
+ return ret;
+
+ return ret == ARRAY_SIZE(msgs) ? len : 0;
}
static int sfp_i2c_configure(struct sfp *sfp, struct i2c_adapter *i2c)
sfp->i2c = i2c;
sfp->read = sfp_i2c_read;
+ sfp->write = sfp_i2c_write;
i2c_mii = mdio_i2c_alloc(sfp->dev, i2c);
if (IS_ERR(i2c_mii))
return sfp->read(sfp, a2, addr, buf, len);
}
+static int sfp_write(struct sfp *sfp, bool a2, u8 addr, void *buf, size_t len)
+{
+ return sfp->write(sfp, a2, addr, buf, len);
+}
+
static unsigned int sfp_check(void *buf, size_t len)
{
u8 *p, check;
sfp_sm_probe_phy(sfp);
}
+static int sfp_sm_mod_hpower(struct sfp *sfp)
+{
+ u32 power;
+ u8 val;
+ int err;
+
+ power = 1000;
+ if (sfp->id.ext.options & cpu_to_be16(SFP_OPTIONS_POWER_DECL))
+ power = 1500;
+ if (sfp->id.ext.options & cpu_to_be16(SFP_OPTIONS_HIGH_POWER_LEVEL))
+ power = 2000;
+
+ if (sfp->id.ext.sff8472_compliance == SFP_SFF8472_COMPLIANCE_NONE &&
+ (sfp->id.ext.diagmon & (SFP_DIAGMON_DDM | SFP_DIAGMON_ADDRMODE)) !=
+ SFP_DIAGMON_DDM) {
+ /* The module appears not to implement bus address 0xa2,
+ * or requires an address change sequence, so assume that
+ * the module powers up in the indicated power mode.
+ */
+ if (power > sfp->max_power_mW) {
+ dev_err(sfp->dev,
+ "Host does not support %u.%uW modules\n",
+ power / 1000, (power / 100) % 10);
+ return -EINVAL;
+ }
+ return 0;
+ }
+
+ if (power > sfp->max_power_mW) {
+ dev_warn(sfp->dev,
+ "Host does not support %u.%uW modules, module left in power mode 1\n",
+ power / 1000, (power / 100) % 10);
+ return 0;
+ }
+
+ if (power <= 1000)
+ return 0;
+
+ err = sfp_read(sfp, true, SFP_EXT_STATUS, &val, sizeof(val));
+ if (err != sizeof(val)) {
+ dev_err(sfp->dev, "Failed to read EEPROM: %d\n", err);
+ err = -EAGAIN;
+ goto err;
+ }
+
+ val |= BIT(0);
+
+ err = sfp_write(sfp, true, SFP_EXT_STATUS, &val, sizeof(val));
+ if (err != sizeof(val)) {
+ dev_err(sfp->dev, "Failed to write EEPROM: %d\n", err);
+ err = -EAGAIN;
+ goto err;
+ }
+
+ dev_info(sfp->dev, "Module switched to %u.%uW power level\n",
+ power / 1000, (power / 100) % 10);
+ return T_HPOWER_LEVEL;
+
+err:
+ return err;
+}
+
static int sfp_sm_mod_probe(struct sfp *sfp)
{
/* SFP module inserted - read I2C data */
struct sfp_eeprom_id id;
u8 check;
- int err;
+ int ret;
- err = sfp_read(sfp, false, 0, &id, sizeof(id));
- if (err < 0) {
- dev_err(sfp->dev, "failed to read EEPROM: %d\n", err);
+ ret = sfp_read(sfp, false, 0, &id, sizeof(id));
+ if (ret < 0) {
+ dev_err(sfp->dev, "failed to read EEPROM: %d\n", ret);
return -EAGAIN;
}
- if (err != sizeof(id)) {
- dev_err(sfp->dev, "EEPROM short read: %d\n", err);
+ if (ret != sizeof(id)) {
+ dev_err(sfp->dev, "EEPROM short read: %d\n", ret);
return -EAGAIN;
}
dev_warn(sfp->dev,
"module address swap to access page 0xA2 is not supported.\n");
- return sfp_module_insert(sfp->sfp_bus, &sfp->id);
+ ret = sfp_module_insert(sfp->sfp_bus, &sfp->id);
+ if (ret < 0)
+ return ret;
+
+ return sfp_sm_mod_hpower(sfp);
}
static void sfp_sm_mod_remove(struct sfp *sfp)
if (event == SFP_E_REMOVE) {
sfp_sm_ins_next(sfp, SFP_MOD_EMPTY, 0);
} else if (event == SFP_E_TIMEOUT) {
- int err = sfp_sm_mod_probe(sfp);
+ int val = sfp_sm_mod_probe(sfp);
- if (err == 0)
+ if (val == 0)
sfp_sm_ins_next(sfp, SFP_MOD_PRESENT, 0);
- else if (err == -EAGAIN)
- sfp_sm_set_timer(sfp, T_PROBE_RETRY);
- else
+ else if (val > 0)
+ sfp_sm_ins_next(sfp, SFP_MOD_HPOWER, val);
+ else if (val != -EAGAIN)
sfp_sm_ins_next(sfp, SFP_MOD_ERROR, 0);
+ else
+ sfp_sm_set_timer(sfp, T_PROBE_RETRY);
}
break;
+ case SFP_MOD_HPOWER:
+ if (event == SFP_E_TIMEOUT) {
+ sfp_sm_ins_next(sfp, SFP_MOD_PRESENT, 0);
+ break;
+ }
+ /* fallthrough */
case SFP_MOD_PRESENT:
case SFP_MOD_ERROR:
if (event == SFP_E_REMOVE) {
if (!(sfp->gpio[GPIO_MODDEF0]))
sfp->get_state = sff_gpio_get_state;
+ device_property_read_u32(&pdev->dev, "maximum-power-milliwatt",
+ &sfp->max_power_mW);
+ if (!sfp->max_power_mW)
+ sfp->max_power_mW = 1000;
+
+ dev_info(sfp->dev, "Host maximum power %u.%uW\n",
+ sfp->max_power_mW / 1000, (sfp->max_power_mW / 100) % 10);
+
sfp->sfp_bus = sfp_register_socket(sfp->dev, sfp, &sfp_module_ops);
if (!sfp->sfp_bus)
return -ENOMEM;
MDIO_PHYXS_LNSTAT_SYNC3 | \
MDIO_PHYXS_LNSTAT_ALIGN)
-static int teranetics_config_init(struct phy_device *phydev)
-{
- phydev->supported = SUPPORTED_10000baseT_Full;
- phydev->advertising = SUPPORTED_10000baseT_Full;
-
- return 0;
-}
-
-static int teranetics_soft_reset(struct phy_device *phydev)
-{
- return 0;
-}
-
static int teranetics_aneg_done(struct phy_device *phydev)
{
- int reg;
-
/* auto negotiation state can only be checked when using copper
* port, if using fiber port, just lie it's done.
*/
- if (!phy_read_mmd(phydev, MDIO_MMD_VEND1, 93)) {
- reg = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_STAT1);
- return (reg < 0) ? reg : (reg & BMSR_ANEGCOMPLETE);
- }
+ if (!phy_read_mmd(phydev, MDIO_MMD_VEND1, 93))
+ return genphy_c45_aneg_done(phydev);
return 1;
}
-static int teranetics_config_aneg(struct phy_device *phydev)
-{
- return 0;
-}
-
static int teranetics_read_status(struct phy_device *phydev)
{
int reg;
.phy_id = PHY_ID_TN2020,
.phy_id_mask = 0xffffffff,
.name = "Teranetics TN2020",
- .soft_reset = teranetics_soft_reset,
+ .soft_reset = gen10g_no_soft_reset,
.aneg_done = teranetics_aneg_done,
- .config_init = teranetics_config_init,
- .config_aneg = teranetics_config_aneg,
+ .config_init = gen10g_config_init,
+ .config_aneg = gen10g_config_aneg,
.read_status = teranetics_read_status,
.match_phy_device = teranetics_match_phy_device,
},
.exit = ppp_exit_net,
.id = &ppp_net_id,
.size = sizeof(struct ppp_net),
+ .async = true,
};
static int ppp_unit_register(struct ppp *ppp, int unit, bool ifname_is_set)
}
static int pppoe_getname(struct socket *sock, struct sockaddr *uaddr,
- int *usockaddr_len, int peer)
+ int peer)
{
int len = sizeof(struct sockaddr_pppox);
struct sockaddr_pppox sp;
memcpy(uaddr, &sp, len);
- *usockaddr_len = len;
-
- return 0;
+ return len;
}
static int pppoe_ioctl(struct socket *sock, unsigned int cmd,
.exit = pppoe_exit_net,
.id = &pppoe_net_id,
.size = sizeof(struct pppoe_net),
+ .async = true,
};
static int __init pppoe_init(void)
}
static int pptp_getname(struct socket *sock, struct sockaddr *uaddr,
- int *usockaddr_len, int peer)
+ int peer)
{
int len = sizeof(struct sockaddr_pppox);
struct sockaddr_pppox sp;
memcpy(uaddr, &sp, len);
- *usockaddr_len = len;
-
- return 0;
+ return len;
}
static int pptp_release(struct socket *sock)
}
#endif
-static int team_upper_dev_link(struct team *team, struct team_port *port)
+static int team_upper_dev_link(struct team *team, struct team_port *port,
+ struct netlink_ext_ack *extack)
{
struct netdev_lag_upper_info lag_upper_info;
int err;
lag_upper_info.tx_type = team->mode->lag_tx_type;
err = netdev_master_upper_dev_link(port->dev, team->dev, NULL,
- &lag_upper_info, NULL);
+ &lag_upper_info, extack);
if (err)
return err;
port->dev->priv_flags |= IFF_TEAM_PORT;
static int team_dev_type_check_change(struct net_device *dev,
struct net_device *port_dev);
-static int team_port_add(struct team *team, struct net_device *port_dev)
+static int team_port_add(struct team *team, struct net_device *port_dev,
+ struct netlink_ext_ack *extack)
{
struct net_device *dev = team->dev;
struct team_port *port;
int err;
if (port_dev->flags & IFF_LOOPBACK) {
+ NL_SET_ERR_MSG(extack, "Loopback device can't be added as a team port");
netdev_err(dev, "Device %s is loopback device. Loopback devices can't be added as a team port\n",
portname);
return -EINVAL;
}
if (team_port_exists(port_dev)) {
+ NL_SET_ERR_MSG(extack, "Device is already a port of a team device");
netdev_err(dev, "Device %s is already a port "
"of a team device\n", portname);
return -EBUSY;
if (port_dev->features & NETIF_F_VLAN_CHALLENGED &&
vlan_uses_dev(dev)) {
+ NL_SET_ERR_MSG(extack, "Device is VLAN challenged and team device has VLAN set up");
netdev_err(dev, "Device %s is VLAN challenged and team device has VLAN set up\n",
portname);
return -EPERM;
return err;
if (port_dev->flags & IFF_UP) {
+ NL_SET_ERR_MSG(extack, "Device is up. Set it down before adding it as a team port");
netdev_err(dev, "Device %s is up. Set it down before adding it as a team port\n",
portname);
return -EBUSY;
goto err_handler_register;
}
- err = team_upper_dev_link(team, port);
+ err = team_upper_dev_link(team, port, extack);
if (err) {
netdev_err(dev, "Device %s failed to set upper link\n",
portname);
int err;
mutex_lock(&team->lock);
- err = team_port_add(team, port_dev);
+ err = team_port_add(team, port_dev, extack);
mutex_unlock(&team->lock);
if (!err)
* @connected_work: Worker that finalizes the ThunderboltIP connection
* setup and enables DMA paths for high speed data
* transfers
+ * @disconnect_work: Worker that handles tearing down the ThunderboltIP
+ * connection
* @rx_hdr: Copy of the currently processed Rx frame. Used when a
* network packet consists of multiple Thunderbolt frames.
* In host byte order.
int login_retries;
struct delayed_work login_work;
struct work_struct connected_work;
+ struct work_struct disconnect_work;
struct thunderbolt_ip_frame_header rx_hdr;
struct tbnet_ring rx_ring;
atomic_t frame_id;
case TBIP_LOGOUT:
ret = tbnet_logout_response(net, route, sequence, command_id);
if (!ret)
- tbnet_tear_down(net, false);
+ queue_work(system_long_wq, &net->disconnect_work);
break;
default:
}
}
+static void tbnet_disconnect_work(struct work_struct *work)
+{
+ struct tbnet *net = container_of(work, typeof(*net), disconnect_work);
+
+ tbnet_tear_down(net, false);
+}
+
static bool tbnet_check_frame(struct tbnet *net, const struct tbnet_frame *tf,
const struct thunderbolt_ip_frame_header *hdr)
{
napi_disable(&net->napi);
+ cancel_work_sync(&net->disconnect_work);
tbnet_tear_down(net, true);
tb_ring_free(net->rx_ring.ring);
net = netdev_priv(dev);
INIT_DELAYED_WORK(&net->login_work, tbnet_login_work);
INIT_WORK(&net->connected_work, tbnet_connected_work);
+ INIT_WORK(&net->disconnect_work, tbnet_disconnect_work);
mutex_init(&net->connection_lock);
atomic_set(&net->command_id, 0);
atomic_set(&net->frame_id, 0);
stop_login(net);
if (netif_running(net->dev)) {
netif_device_detach(net->dev);
- tb_ring_stop(net->rx_ring.ring);
- tb_ring_stop(net->tx_ring.ring);
- tbnet_free_buffers(&net->rx_ring);
- tbnet_free_buffers(&net->tx_ring);
+ tbnet_tear_down(net, true);
}
return 0;
#include <linux/mutex.h>
#include <linux/uaccess.h>
+#include <linux/proc_fs.h>
/* Uncomment to enable debugging */
/* #define TUN_DEBUG 1 */
skb->truesize += skb->data_len;
for (i = 1; i < it->nr_segs; i++) {
+ struct page_frag *pfrag = ¤t->task_frag;
size_t fragsz = it->iov[i].iov_len;
- unsigned long offset;
- struct page *page;
- void *data;
if (fragsz == 0 || fragsz > PAGE_SIZE) {
err = -EINVAL;
goto free;
}
- local_bh_disable();
- data = napi_alloc_frag(fragsz);
- local_bh_enable();
- if (!data) {
+ if (!skb_page_frag_refill(fragsz, pfrag, GFP_KERNEL)) {
err = -ENOMEM;
goto free;
}
- page = virt_to_head_page(data);
- offset = data - page_address(page);
- skb_fill_page_desc(skb, i - 1, page, offset, fragsz);
+ skb_fill_page_desc(skb, i - 1, pfrag->page,
+ pfrag->offset, fragsz);
+ page_ref_inc(pfrag->page);
+ pfrag->offset += fragsz;
}
return skb;
return -EINVAL;
}
+static size_t tun_get_size(const struct net_device *dev)
+{
+ BUILD_BUG_ON(sizeof(u32) != sizeof(uid_t));
+ BUILD_BUG_ON(sizeof(u32) != sizeof(gid_t));
+
+ return nla_total_size(sizeof(uid_t)) + /* OWNER */
+ nla_total_size(sizeof(gid_t)) + /* GROUP */
+ nla_total_size(sizeof(u8)) + /* TYPE */
+ nla_total_size(sizeof(u8)) + /* PI */
+ nla_total_size(sizeof(u8)) + /* VNET_HDR */
+ nla_total_size(sizeof(u8)) + /* PERSIST */
+ nla_total_size(sizeof(u8)) + /* MULTI_QUEUE */
+ nla_total_size(sizeof(u32)) + /* NUM_QUEUES */
+ nla_total_size(sizeof(u32)) + /* NUM_DISABLED_QUEUES */
+ 0;
+}
+
+static int tun_fill_info(struct sk_buff *skb, const struct net_device *dev)
+{
+ struct tun_struct *tun = netdev_priv(dev);
+
+ if (nla_put_u8(skb, IFLA_TUN_TYPE, tun->flags & TUN_TYPE_MASK))
+ goto nla_put_failure;
+ if (uid_valid(tun->owner) &&
+ nla_put_u32(skb, IFLA_TUN_OWNER,
+ from_kuid_munged(current_user_ns(), tun->owner)))
+ goto nla_put_failure;
+ if (gid_valid(tun->group) &&
+ nla_put_u32(skb, IFLA_TUN_GROUP,
+ from_kgid_munged(current_user_ns(), tun->group)))
+ goto nla_put_failure;
+ if (nla_put_u8(skb, IFLA_TUN_PI, !(tun->flags & IFF_NO_PI)))
+ goto nla_put_failure;
+ if (nla_put_u8(skb, IFLA_TUN_VNET_HDR, !!(tun->flags & IFF_VNET_HDR)))
+ goto nla_put_failure;
+ if (nla_put_u8(skb, IFLA_TUN_PERSIST, !!(tun->flags & IFF_PERSIST)))
+ goto nla_put_failure;
+ if (nla_put_u8(skb, IFLA_TUN_MULTI_QUEUE,
+ !!(tun->flags & IFF_MULTI_QUEUE)))
+ goto nla_put_failure;
+ if (tun->flags & IFF_MULTI_QUEUE) {
+ if (nla_put_u32(skb, IFLA_TUN_NUM_QUEUES, tun->numqueues))
+ goto nla_put_failure;
+ if (nla_put_u32(skb, IFLA_TUN_NUM_DISABLED_QUEUES,
+ tun->numdisabled))
+ goto nla_put_failure;
+ }
+
+ return 0;
+
+nla_put_failure:
+ return -EMSGSIZE;
+}
+
static struct rtnl_link_ops tun_link_ops __read_mostly = {
.kind = DRV_NAME,
.priv_size = sizeof(struct tun_struct),
.setup = tun_setup,
.validate = tun_validate,
+ .get_size = tun_get_size,
+ .fill_info = tun_fill_info,
};
static void tun_sock_write_space(struct sock *sk)
struct tun_struct *tun;
void __user* argp = (void __user*)arg;
struct ifreq ifr;
+ struct net *net;
kuid_t owner;
kgid_t group;
int sndbuf;
int le;
int ret;
- if (cmd == TUNSETIFF || cmd == TUNSETQUEUE || _IOC_TYPE(cmd) == SOCK_IOC_TYPE) {
+ if (cmd == TUNSETIFF || cmd == TUNSETQUEUE ||
+ (_IOC_TYPE(cmd) == SOCK_IOC_TYPE && cmd != SIOCGSKNS)) {
if (copy_from_user(&ifr, argp, ifreq_len))
return -EFAULT;
} else {
rtnl_lock();
tun = tun_get(tfile);
+ net = sock_net(&tfile->sk);
if (cmd == TUNSETIFF) {
ret = -EEXIST;
if (tun)
ifr.ifr_name[IFNAMSIZ-1] = '\0';
- ret = tun_set_iff(sock_net(&tfile->sk), file, &ifr);
+ ret = tun_set_iff(net, file, &ifr);
if (ret)
goto unlock;
tfile->ifindex = ifindex;
goto unlock;
}
+ if (cmd == SIOCGSKNS) {
+ ret = -EPERM;
+ if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
+ goto unlock;
+
+ ret = open_related_ns(&net->ns, get_net_ns);
+ goto unlock;
+ }
ret = -EBADFD;
if (!tun)
return -ENOMEM;
memcpy(usb_buf, init_msg_1, 12);
- status = kalmia_send_init_packet(dev, usb_buf, sizeof(init_msg_1)
- / sizeof(init_msg_1[0]), usb_buf, 24);
+ status = kalmia_send_init_packet(dev, usb_buf, ARRAY_SIZE(init_msg_1),
+ usb_buf, 24);
if (status != 0)
return status;
memcpy(usb_buf, init_msg_2, 12);
- status = kalmia_send_init_packet(dev, usb_buf, sizeof(init_msg_2)
- / sizeof(init_msg_2[0]), usb_buf, 28);
+ status = kalmia_send_init_packet(dev, usb_buf, ARRAY_SIZE(init_msg_2),
+ usb_buf, 28);
if (status != 0)
return status;
/* it's racing here! */
ret = smsc75xx_write_reg(dev, RFE_CTL, pdata->rfe_ctl);
- if (ret < 0)
+ if (ret < 0) {
netdev_warn(dev->net, "Error writing RFE_CTL\n");
-
- return ret;
+ return ret;
+ }
+ return 0;
}
static int smsc75xx_wait_ready(struct usbnet *dev, int in_pm)
sg_init_one(sq->sg, xdp->data, xdp->data_end - xdp->data);
err = virtqueue_add_outbuf(sq->vq, sq->sg, 1, xdp->data, GFP_ATOMIC);
- if (unlikely(err)) {
- struct page *page = virt_to_head_page(xdp->data);
-
- put_page(page);
- return false;
- }
+ if (unlikely(err))
+ return false; /* Caller handle free/refcnt */
return true;
}
static int virtnet_xdp_xmit(struct net_device *dev, struct xdp_buff *xdp)
{
struct virtnet_info *vi = netdev_priv(dev);
- bool sent = __virtnet_xdp_xmit(vi, xdp);
+ struct receive_queue *rq = vi->rq;
+ struct bpf_prog *xdp_prog;
+ bool sent;
+ /* Only allow ndo_xdp_xmit if XDP is loaded on dev, as this
+ * indicate XDP resources have been successfully allocated.
+ */
+ xdp_prog = rcu_dereference(rq->xdp_prog);
+ if (!xdp_prog)
+ return -ENXIO;
+
+ sent = __virtnet_xdp_xmit(vi, xdp);
if (!sent)
return -ENOSPC;
return 0;
page_off += *len;
while (--*num_buf) {
+ int tailroom = SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
unsigned int buflen;
void *buf;
int off;
/* guard against a misconfigured or uncooperative backend that
* is sending packet larger than the MTU.
*/
- if ((page_off + buflen) > PAGE_SIZE) {
+ if ((page_off + buflen + tailroom) > PAGE_SIZE) {
put_page(p);
goto err_buf;
}
unsigned int buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) +
SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
struct page *page = virt_to_head_page(buf);
- unsigned int delta = 0, err;
+ unsigned int delta = 0;
struct page *xdp_page;
+ bool sent;
+ int err;
+
len -= vi->hdr_len;
rcu_read_lock();
void *orig_data;
u32 act;
- if (unlikely(hdr->hdr.gso_type || hdr->hdr.flags))
+ if (unlikely(hdr->hdr.gso_type))
goto err_xdp;
if (unlikely(xdp_headroom < virtnet_get_headroom(vi))) {
delta = orig_data - xdp.data;
break;
case XDP_TX:
- if (unlikely(!__virtnet_xdp_xmit(vi, &xdp)))
+ sent = __virtnet_xdp_xmit(vi, &xdp);
+ if (unlikely(!sent)) {
trace_xdp_exception(vi->dev, xdp_prog, act);
- else
- *xdp_xmit = true;
+ goto err_xdp;
+ }
+ *xdp_xmit = true;
rcu_read_unlock();
goto xdp_xmit;
case XDP_REDIRECT:
err = xdp_do_redirect(dev, &xdp, xdp_prog);
- if (!err)
- *xdp_xmit = true;
+ if (err)
+ goto err_xdp;
+ *xdp_xmit = true;
rcu_read_unlock();
goto xdp_xmit;
default:
struct bpf_prog *xdp_prog;
unsigned int truesize;
unsigned int headroom = mergeable_ctx_to_headroom(ctx);
+ bool sent;
int err;
head_skb = NULL;
void *data;
u32 act;
- /* This happens when rx buffer size is underestimated */
+ /* This happens when rx buffer size is underestimated
+ * or headroom is not enough because of the buffer
+ * was refilled before XDP is set. This should only
+ * happen for the first several packets, so we don't
+ * care much about its performance.
+ */
if (unlikely(num_buf > 1 ||
headroom < virtnet_get_headroom(vi))) {
/* linearize data for XDP */
act = bpf_prog_run_xdp(xdp_prog, &xdp);
- if (act != XDP_PASS)
- ewma_pkt_len_add(&rq->mrg_avg_pkt_len, len);
-
switch (act) {
case XDP_PASS:
/* recalculate offset to account for any header
}
break;
case XDP_TX:
- if (unlikely(!__virtnet_xdp_xmit(vi, &xdp)))
+ sent = __virtnet_xdp_xmit(vi, &xdp);
+ if (unlikely(!sent)) {
trace_xdp_exception(vi->dev, xdp_prog, act);
- else
- *xdp_xmit = true;
+ if (unlikely(xdp_page != page))
+ put_page(xdp_page);
+ goto err_xdp;
+ }
+ *xdp_xmit = true;
if (unlikely(xdp_page != page))
goto err_xdp;
rcu_read_unlock();
goto xdp_xmit;
case XDP_REDIRECT:
err = xdp_do_redirect(dev, &xdp, xdp_prog);
- if (!err)
- *xdp_xmit = true;
+ if (err) {
+ if (unlikely(xdp_page != page))
+ put_page(xdp_page);
+ goto err_xdp;
+ }
+ *xdp_xmit = true;
+ if (unlikely(xdp_page != page))
+ goto err_xdp;
rcu_read_unlock();
goto xdp_xmit;
default:
}
static unsigned int get_mergeable_buf_len(struct receive_queue *rq,
- struct ewma_pkt_len *avg_pkt_len)
+ struct ewma_pkt_len *avg_pkt_len,
+ unsigned int room)
{
const size_t hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf);
unsigned int len;
- len = hdr_len + clamp_t(unsigned int, ewma_pkt_len_read(avg_pkt_len),
+ if (room)
+ return PAGE_SIZE - room;
+
+ len = hdr_len + clamp_t(unsigned int, ewma_pkt_len_read(avg_pkt_len),
rq->min_buf_len, PAGE_SIZE - hdr_len);
+
return ALIGN(len, L1_CACHE_BYTES);
}
{
struct page_frag *alloc_frag = &rq->alloc_frag;
unsigned int headroom = virtnet_get_headroom(vi);
+ unsigned int tailroom = headroom ? sizeof(struct skb_shared_info) : 0;
+ unsigned int room = SKB_DATA_ALIGN(headroom + tailroom);
char *buf;
void *ctx;
int err;
unsigned int len, hole;
- len = get_mergeable_buf_len(rq, &rq->mrg_avg_pkt_len);
- if (unlikely(!skb_page_frag_refill(len + headroom, alloc_frag, gfp)))
+ /* Extra tailroom is needed to satisfy XDP's assumption. This
+ * means rx frags coalescing won't work, but consider we've
+ * disabled GSO for XDP, it won't be a big issue.
+ */
+ len = get_mergeable_buf_len(rq, &rq->mrg_avg_pkt_len, room);
+ if (unlikely(!skb_page_frag_refill(len + room, alloc_frag, gfp)))
return -ENOMEM;
buf = (char *)page_address(alloc_frag->page) + alloc_frag->offset;
buf += headroom; /* advance address leaving hole at front of pkt */
get_page(alloc_frag->page);
- alloc_frag->offset += len + headroom;
+ alloc_frag->offset += len + room;
hole = alloc_frag->size - alloc_frag->offset;
- if (hole < len + headroom) {
+ if (hole < len + room) {
/* To avoid internal fragmentation, if there is very likely not
* enough space for another buffer, add the remaining space to
* the current buffer.
{
struct virtnet_info *vi = netdev_priv(queue->dev);
unsigned int queue_index = get_netdev_rx_queue_index(queue);
+ unsigned int headroom = virtnet_get_headroom(vi);
+ unsigned int tailroom = headroom ? sizeof(struct skb_shared_info) : 0;
struct ewma_pkt_len *avg;
BUG_ON(queue_index >= vi->max_queue_pairs);
avg = &vi->rq[queue_index].mrg_avg_pkt_len;
return sprintf(buf, "%u\n",
- get_mergeable_buf_len(&vi->rq[queue_index], avg));
+ get_mergeable_buf_len(&vi->rq[queue_index], avg,
+ SKB_DATA_ALIGN(headroom + tailroom)));
}
static struct rx_queue_attribute mergeable_rx_buffer_size_attribute =
return -ENOMEM;
rth->dst.output = vrf_output;
- rth->rt_table_id = vrf->tb_id;
rcu_assign_pointer(vrf->rth, rth);
const struct net_device *dev,
struct flowi6 *fl6,
int ifindex,
+ const struct sk_buff *skb,
int flags)
{
struct net_vrf *vrf = netdev_priv(dev);
if (!table)
return NULL;
- return ip6_pol_route(net, table, ifindex, fl6, flags);
+ return ip6_pol_route(net, table, ifindex, fl6, skb, flags);
}
static void vrf_ip6_input_dst(struct sk_buff *skb, struct net_device *vrf_dev,
struct net *net = dev_net(vrf_dev);
struct rt6_info *rt6;
- rt6 = vrf_ip6_route_lookup(net, vrf_dev, &fl6, ifindex,
+ rt6 = vrf_ip6_route_lookup(net, vrf_dev, &fl6, ifindex, skb,
RT6_LOOKUP_F_HAS_SADDR | RT6_LOOKUP_F_IFACE);
if (unlikely(!rt6))
return;
if (!ipv6_addr_any(&fl6->saddr))
flags |= RT6_LOOKUP_F_HAS_SADDR;
- rt = vrf_ip6_route_lookup(net, dev, fl6, fl6->flowi6_oif, flags);
+ rt = vrf_ip6_route_lookup(net, dev, fl6, fl6->flowi6_oif, NULL, flags);
if (rt)
dst = &rt->dst;
sz = NLMSG_ALIGN(sizeof(struct fib_rule_hdr));
sz += nla_total_size(sizeof(u8)); /* FRA_L3MDEV */
sz += nla_total_size(sizeof(u32)); /* FRA_PRIORITY */
+ sz += nla_total_size(sizeof(u8)); /* FRA_PROTOCOL */
return sz;
}
frh->family = family;
frh->action = FR_ACT_TO_TBL;
+ if (nla_put_u8(skb, FRA_PROTOCOL, RTPROT_KERNEL))
+ goto nla_put_failure;
+
if (nla_put_u8(skb, FRA_L3MDEV, 1))
goto nla_put_failure;
.init = vrf_netns_init,
.id = &vrf_net_id,
.size = sizeof(bool),
+ .async = true,
};
static int __init vrf_init_module(void)
.exit_batch = vxlan_exit_batch_net,
.id = &vxlan_net_id,
.size = sizeof(struct vxlan_net),
+ .async = true,
};
static int __init vxlan_init_module(void)
*/
bool alive = (sta->status == wil_sta_connected);
- cfg80211_probe_status(ndev, sta->addr, req->cookie, alive, GFP_KERNEL);
+ cfg80211_probe_status(ndev, sta->addr, req->cookie, alive,
+ 0, false, GFP_KERNEL);
}
static struct list_head *next_probe_client(struct wil6210_priv *wil)
static unsigned int hwsim_net_id;
-static int hwsim_netgroup;
+static struct ida hwsim_netgroup_ida = IDA_INIT;
struct hwsim_net {
int netgroup;
return hwsim_net->netgroup;
}
-static inline void hwsim_net_set_netgroup(struct net *net)
+static inline int hwsim_net_set_netgroup(struct net *net)
{
struct hwsim_net *hwsim_net = net_generic(net, hwsim_net_id);
- hwsim_net->netgroup = hwsim_netgroup++;
+ hwsim_net->netgroup = ida_simple_get(&hwsim_netgroup_ida,
+ 0, 0, GFP_KERNEL);
+ return hwsim_net->netgroup >= 0 ? 0 : -ENOMEM;
}
static inline u32 hwsim_net_get_wmediumd(struct net *net)
static struct workqueue_struct *hwsim_wq;
static struct rhashtable hwsim_radios_rht;
static int hwsim_radio_idx;
+static int hwsim_radios_generation = 1;
static struct platform_driver mac80211_hwsim_driver = {
.driver = {
[HWSIM_ATTR_RADIO_NAME] = { .type = NLA_STRING },
[HWSIM_ATTR_NO_VIF] = { .type = NLA_FLAG },
[HWSIM_ATTR_FREQ] = { .type = NLA_U32 },
+ [HWSIM_ATTR_PERM_ADDR] = { .type = NLA_UNSPEC, .len = ETH_ALEN },
};
static void mac80211_hwsim_tx_frame(struct ieee80211_hw *hw,
bool destroy_on_close;
const char *hwname;
bool no_vif;
+ const u8 *perm_addr;
};
static void hwsim_mcast_config_msg(struct sk_buff *mcast_skb,
skb_queue_head_init(&data->pending);
SET_IEEE80211_DEV(hw, data->dev);
- eth_zero_addr(addr);
- addr[0] = 0x02;
- addr[3] = idx >> 8;
- addr[4] = idx;
- memcpy(data->addresses[0].addr, addr, ETH_ALEN);
- memcpy(data->addresses[1].addr, addr, ETH_ALEN);
- data->addresses[1].addr[0] |= 0x40;
- hw->wiphy->n_addresses = 2;
- hw->wiphy->addresses = data->addresses;
+ if (!param->perm_addr) {
+ eth_zero_addr(addr);
+ addr[0] = 0x02;
+ addr[3] = idx >> 8;
+ addr[4] = idx;
+ memcpy(data->addresses[0].addr, addr, ETH_ALEN);
+ /* Why need here second address ? */
+ data->addresses[1].addr[0] |= 0x40;
+ memcpy(data->addresses[1].addr, addr, ETH_ALEN);
+ hw->wiphy->n_addresses = 2;
+ hw->wiphy->addresses = data->addresses;
+ /* possible address clash is checked at hash table insertion */
+ } else {
+ memcpy(data->addresses[0].addr, param->perm_addr, ETH_ALEN);
+ /* compatibility with automatically generated mac addr */
+ memcpy(data->addresses[1].addr, param->perm_addr, ETH_ALEN);
+ hw->wiphy->n_addresses = 2;
+ hw->wiphy->addresses = data->addresses;
+ }
data->channels = param->channels;
data->use_chanctx = param->use_chanctx;
err = rhashtable_insert_fast(&hwsim_radios_rht, &data->rht,
hwsim_rht_params);
if (err < 0) {
- pr_debug("mac80211_hwsim: radio index %d already present\n",
- idx);
+ if (info) {
+ GENL_SET_ERR_MSG(info, "perm addr already present");
+ NL_SET_BAD_ATTR(info->extack,
+ info->attrs[HWSIM_ATTR_PERM_ADDR]);
+ }
spin_unlock_bh(&hwsim_radio_lock);
goto failed_final_insert;
}
list_add_tail(&data->list, &hwsim_radios);
+ hwsim_radios_generation++;
spin_unlock_bh(&hwsim_radio_lock);
if (idx > 0)
param.regd = hwsim_world_regdom_custom[idx];
}
+ if (info->attrs[HWSIM_ATTR_PERM_ADDR]) {
+ if (!is_valid_ether_addr(
+ nla_data(info->attrs[HWSIM_ATTR_PERM_ADDR]))) {
+ GENL_SET_ERR_MSG(info,"MAC is no valid source addr");
+ NL_SET_BAD_ATTR(info->extack,
+ info->attrs[HWSIM_ATTR_PERM_ADDR]);
+ return -EINVAL;
+ }
+
+
+ param.perm_addr = nla_data(info->attrs[HWSIM_ATTR_PERM_ADDR]);
+ }
+
ret = mac80211_hwsim_new_radio(info, ¶m);
kfree(hwname);
return ret;
list_del(&data->list);
rhashtable_remove_fast(&hwsim_radios_rht, &data->rht,
hwsim_rht_params);
+ hwsim_radios_generation++;
spin_unlock_bh(&hwsim_radio_lock);
mac80211_hwsim_del_radio(data, wiphy_name(data->hw->wiphy),
info);
static int hwsim_dump_radio_nl(struct sk_buff *skb,
struct netlink_callback *cb)
{
- int idx = cb->args[0];
+ int last_idx = cb->args[0];
struct mac80211_hwsim_data *data = NULL;
- int res;
+ int res = 0;
+ void *hdr;
spin_lock_bh(&hwsim_radio_lock);
+ cb->seq = hwsim_radios_generation;
- if (idx == hwsim_radio_idx)
+ if (last_idx >= hwsim_radio_idx-1)
goto done;
list_for_each_entry(data, &hwsim_radios, list) {
- if (data->idx < idx)
+ if (data->idx <= last_idx)
continue;
if (!net_eq(wiphy_net(data->hw->wiphy), sock_net(skb->sk)))
if (res < 0)
break;
- idx = data->idx + 1;
+ last_idx = data->idx;
}
- cb->args[0] = idx;
+ cb->args[0] = last_idx;
+
+ /* list changed, but no new element sent, set interrupted flag */
+ if (skb->len == 0 && cb->prev_seq && cb->seq != cb->prev_seq) {
+ hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq, &hwsim_genl_family,
+ NLM_F_MULTI, HWSIM_CMD_GET_RADIO);
+ if (!hdr)
+ res = -EMSGSIZE;
+ genl_dump_check_consistent(cb, hdr);
+ genlmsg_end(skb, hdr);
+ }
done:
spin_unlock_bh(&hwsim_radio_lock);
- return skb->len;
+ return res ?: skb->len;
}
/* Generic Netlink operations array */
struct mac80211_hwsim_data *data =
container_of(work, struct mac80211_hwsim_data, destroy_work);
+ hwsim_radios_generation++;
mac80211_hwsim_del_radio(data, wiphy_name(data->hw->wiphy), NULL);
}
static __net_init int hwsim_init_net(struct net *net)
{
- hwsim_net_set_netgroup(net);
-
- return 0;
+ return hwsim_net_set_netgroup(net);
}
static void __net_exit hwsim_exit_net(struct net *net)
queue_work(hwsim_wq, &data->destroy_work);
}
spin_unlock_bh(&hwsim_radio_lock);
+
+ ida_simple_remove(&hwsim_netgroup_ida, hwsim_net_get_netgroup(net));
}
static struct pernet_operations hwsim_net_ops = {
.exit = hwsim_exit_net,
.id = &hwsim_net_id,
.size = sizeof(struct hwsim_net),
+ .async = true,
};
static void hwsim_exit_netlink(void)
spin_lock_init(&hwsim_radio_lock);
- hwsim_wq = alloc_workqueue("hwsim_wq",WQ_MEM_RECLAIM,0);
+ hwsim_wq = alloc_workqueue("hwsim_wq", 0, 0);
if (!hwsim_wq)
return -ENOMEM;
rhashtable_init(&hwsim_radios_rht, &hwsim_rht_params);
* %HWSIM_ATTR_SIGNAL, %HWSIM_ATTR_COOKIE
* @HWSIM_CMD_NEW_RADIO: create a new radio with the given parameters,
* returns the radio ID (>= 0) or negative on errors, if successful
- * then multicast the result
+ * then multicast the result, uses optional parameter:
+ * %HWSIM_ATTR_REG_STRICT_REG, %HWSIM_ATTR_SUPPORT_P2P_DEVICE,
+ * %HWSIM_ATTR_DESTROY_RADIO_ON_CLOSE, %HWSIM_ATTR_CHANNELS,
+ * %HWSIM_ATTR_NO_VIF, %HWSIM_ATTR_RADIO_NAME, %HWSIM_ATTR_USE_CHANCTX,
+ * %HWSIM_ATTR_REG_HINT_ALPHA2, %HWSIM_ATTR_REG_CUSTOM_REG,
+ * %HWSIM_ATTR_PERM_ADDR
* @HWSIM_CMD_DEL_RADIO: destroy a radio, reply is multicasted
* @HWSIM_CMD_GET_RADIO: fetch information about existing radios, uses:
* %HWSIM_ATTR_RADIO_ID
* @HWSIM_ATTR_FREQ: Frequency at which packet is transmitted or received.
* @HWSIM_ATTR_TX_INFO_FLAGS: additional flags for corresponding
* rates of %HWSIM_ATTR_TX_INFO
+ * @HWSIM_ATTR_PERM_ADDR: permanent mac address of new radio
* @__HWSIM_ATTR_MAX: enum limit
*/
HWSIM_ATTR_FREQ,
HWSIM_ATTR_PAD,
HWSIM_ATTR_TX_INFO_FLAGS,
+ HWSIM_ATTR_PERM_ADDR,
__HWSIM_ATTR_MAX,
};
#define HWSIM_ATTR_MAX (__HWSIM_ATTR_MAX - 1)
BUG();
}
-void xenvif_rx_skb(struct xenvif_queue *queue)
+static void xenvif_rx_skb(struct xenvif_queue *queue)
{
struct xenvif_pkt_state pkt;
int ret;
ret = nvme_reset_ctrl(ctrl);
- if (!ret)
+ if (!ret) {
flush_work(&ctrl->reset_work);
+ if (ctrl->state != NVME_CTRL_LIVE)
+ ret = -ENETRESET;
+ }
+
return ret;
}
EXPORT_SYMBOL_GPL(nvme_reset_ctrl_sync);
switch (new_state) {
case NVME_CTRL_ADMIN_ONLY:
switch (old_state) {
- case NVME_CTRL_RECONNECTING:
+ case NVME_CTRL_CONNECTING:
changed = true;
/* FALLTHRU */
default:
switch (old_state) {
case NVME_CTRL_NEW:
case NVME_CTRL_RESETTING:
- case NVME_CTRL_RECONNECTING:
+ case NVME_CTRL_CONNECTING:
changed = true;
/* FALLTHRU */
default:
break;
}
break;
- case NVME_CTRL_RECONNECTING:
+ case NVME_CTRL_CONNECTING:
switch (old_state) {
- case NVME_CTRL_LIVE:
+ case NVME_CTRL_NEW:
case NVME_CTRL_RESETTING:
changed = true;
/* FALLTHRU */
case NVME_CTRL_LIVE:
case NVME_CTRL_ADMIN_ONLY:
case NVME_CTRL_RESETTING:
- case NVME_CTRL_RECONNECTING:
+ case NVME_CTRL_CONNECTING:
changed = true;
/* FALLTHRU */
default:
u64 slba = nvme_block_nr(ns, bio->bi_iter.bi_sector);
u32 nlb = bio->bi_iter.bi_size >> ns->lba_shift;
- range[n].cattr = cpu_to_le32(0);
- range[n].nlb = cpu_to_le32(nlb);
- range[n].slba = cpu_to_le64(slba);
+ if (n < segments) {
+ range[n].cattr = cpu_to_le32(0);
+ range[n].nlb = cpu_to_le32(nlb);
+ range[n].slba = cpu_to_le64(slba);
+ }
n++;
}
static int nvme_keep_alive(struct nvme_ctrl *ctrl)
{
- struct nvme_command c;
struct request *rq;
- memset(&c, 0, sizeof(c));
- c.common.opcode = nvme_admin_keep_alive;
-
- rq = nvme_alloc_request(ctrl->admin_q, &c, BLK_MQ_REQ_RESERVED,
+ rq = nvme_alloc_request(ctrl->admin_q, &ctrl->ka_cmd, BLK_MQ_REQ_RESERVED,
NVME_QID_ANY);
if (IS_ERR(rq))
return PTR_ERR(rq);
return;
INIT_DELAYED_WORK(&ctrl->ka_work, nvme_keep_alive_work);
+ memset(&ctrl->ka_cmd, 0, sizeof(ctrl->ka_cmd));
+ ctrl->ka_cmd.common.opcode = nvme_admin_keep_alive;
schedule_delayed_work(&ctrl->ka_work, ctrl->kato * HZ);
}
EXPORT_SYMBOL_GPL(nvme_start_keep_alive);
static void nvme_update_formats(struct nvme_ctrl *ctrl)
{
- struct nvme_ns *ns;
+ struct nvme_ns *ns, *next;
+ LIST_HEAD(rm_list);
mutex_lock(&ctrl->namespaces_mutex);
list_for_each_entry(ns, &ctrl->namespaces, list) {
- if (ns->disk && nvme_revalidate_disk(ns->disk))
- nvme_ns_remove(ns);
+ if (ns->disk && nvme_revalidate_disk(ns->disk)) {
+ list_move_tail(&ns->list, &rm_list);
+ }
}
mutex_unlock(&ctrl->namespaces_mutex);
+
+ list_for_each_entry_safe(ns, next, &rm_list, list)
+ nvme_ns_remove(ns);
}
static void nvme_passthru_end(struct nvme_ctrl *ctrl, u32 effects)
[NVME_CTRL_LIVE] = "live",
[NVME_CTRL_ADMIN_ONLY] = "only-admin",
[NVME_CTRL_RESETTING] = "resetting",
- [NVME_CTRL_RECONNECTING]= "reconnecting",
+ [NVME_CTRL_CONNECTING] = "connecting",
[NVME_CTRL_DELETING] = "deleting",
[NVME_CTRL_DEAD] = "dead",
};
cmd->common.opcode != nvme_fabrics_command ||
cmd->fabrics.fctype != nvme_fabrics_type_connect) {
/*
- * Reconnecting state means transport disruption, which can take
- * a long time and even might fail permanently, fail fast to
- * give upper layers a chance to failover.
+ * Connecting state means transport disruption or initial
+ * establishment, which can take a long time and even might
+ * fail permanently, fail fast to give upper layers a chance
+ * to failover.
* Deleting state means that the ctrl will never accept commands
* again, fail it permanently.
*/
- if (ctrl->state == NVME_CTRL_RECONNECTING ||
+ if (ctrl->state == NVME_CTRL_CONNECTING ||
ctrl->state == NVME_CTRL_DELETING) {
nvme_req(rq)->status = NVME_SC_ABORT_REQ;
return BLK_STS_IOERR;
enum nvme_fcop_flags {
FCOP_FLAGS_TERMIO = (1 << 0),
- FCOP_FLAGS_RELEASED = (1 << 1),
- FCOP_FLAGS_COMPLETE = (1 << 2),
- FCOP_FLAGS_AEN = (1 << 3),
+ FCOP_FLAGS_AEN = (1 << 1),
};
struct nvmefc_ls_req_op {
{
switch (ctrl->ctrl.state) {
case NVME_CTRL_NEW:
- case NVME_CTRL_RECONNECTING:
+ case NVME_CTRL_CONNECTING:
/*
* As all reconnects were suppressed, schedule a
* connect.
}
break;
- case NVME_CTRL_RECONNECTING:
+ case NVME_CTRL_CONNECTING:
/*
* The association has already been terminated and the
* controller is attempting reconnects. No need to do anything
/* *********************** NVME Ctrl Routines **************************** */
-static void __nvme_fc_final_op_cleanup(struct request *rq);
static void nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg);
static int
static int
__nvme_fc_abort_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_fcp_op *op)
{
- int state;
+ unsigned long flags;
+ int opstate;
+
+ spin_lock_irqsave(&ctrl->lock, flags);
+ opstate = atomic_xchg(&op->state, FCPOP_STATE_ABORTED);
+ if (opstate != FCPOP_STATE_ACTIVE)
+ atomic_set(&op->state, opstate);
+ else if (ctrl->flags & FCCTRL_TERMIO)
+ ctrl->iocnt++;
+ spin_unlock_irqrestore(&ctrl->lock, flags);
- state = atomic_xchg(&op->state, FCPOP_STATE_ABORTED);
- if (state != FCPOP_STATE_ACTIVE) {
- atomic_set(&op->state, state);
+ if (opstate != FCPOP_STATE_ACTIVE)
return -ECANCELED;
- }
ctrl->lport->ops->fcp_abort(&ctrl->lport->localport,
&ctrl->rport->remoteport,
nvme_fc_abort_aen_ops(struct nvme_fc_ctrl *ctrl)
{
struct nvme_fc_fcp_op *aen_op = ctrl->aen_ops;
- unsigned long flags;
- int i, ret;
-
- for (i = 0; i < NVME_NR_AEN_COMMANDS; i++, aen_op++) {
- if (atomic_read(&aen_op->state) != FCPOP_STATE_ACTIVE)
- continue;
-
- spin_lock_irqsave(&ctrl->lock, flags);
- if (ctrl->flags & FCCTRL_TERMIO) {
- ctrl->iocnt++;
- aen_op->flags |= FCOP_FLAGS_TERMIO;
- }
- spin_unlock_irqrestore(&ctrl->lock, flags);
-
- ret = __nvme_fc_abort_op(ctrl, aen_op);
- if (ret) {
- /*
- * if __nvme_fc_abort_op failed the io wasn't
- * active. Thus this call path is running in
- * parallel to the io complete. Treat as non-error.
- */
+ int i;
- /* back out the flags/counters */
- spin_lock_irqsave(&ctrl->lock, flags);
- if (ctrl->flags & FCCTRL_TERMIO)
- ctrl->iocnt--;
- aen_op->flags &= ~FCOP_FLAGS_TERMIO;
- spin_unlock_irqrestore(&ctrl->lock, flags);
- return;
- }
- }
+ for (i = 0; i < NVME_NR_AEN_COMMANDS; i++, aen_op++)
+ __nvme_fc_abort_op(ctrl, aen_op);
}
-static inline int
+static inline void
__nvme_fc_fcpop_chk_teardowns(struct nvme_fc_ctrl *ctrl,
- struct nvme_fc_fcp_op *op)
+ struct nvme_fc_fcp_op *op, int opstate)
{
unsigned long flags;
- bool complete_rq = false;
- spin_lock_irqsave(&ctrl->lock, flags);
- if (unlikely(op->flags & FCOP_FLAGS_TERMIO)) {
+ if (opstate == FCPOP_STATE_ABORTED) {
+ spin_lock_irqsave(&ctrl->lock, flags);
if (ctrl->flags & FCCTRL_TERMIO) {
if (!--ctrl->iocnt)
wake_up(&ctrl->ioabort_wait);
}
+ spin_unlock_irqrestore(&ctrl->lock, flags);
}
- if (op->flags & FCOP_FLAGS_RELEASED)
- complete_rq = true;
- else
- op->flags |= FCOP_FLAGS_COMPLETE;
- spin_unlock_irqrestore(&ctrl->lock, flags);
-
- return complete_rq;
}
static void
__le16 status = cpu_to_le16(NVME_SC_SUCCESS << 1);
union nvme_result result;
bool terminate_assoc = true;
+ int opstate;
/*
* WARNING:
* association to be terminated.
*/
+ opstate = atomic_xchg(&op->state, FCPOP_STATE_COMPLETE);
+
fc_dma_sync_single_for_cpu(ctrl->lport->dev, op->fcp_req.rspdma,
sizeof(op->rsp_iu), DMA_FROM_DEVICE);
- if (atomic_read(&op->state) == FCPOP_STATE_ABORTED ||
- op->flags & FCOP_FLAGS_TERMIO)
+ if (opstate == FCPOP_STATE_ABORTED)
status = cpu_to_le16(NVME_SC_ABORT_REQ << 1);
else if (freq->status)
status = cpu_to_le16(NVME_SC_INTERNAL << 1);
done:
if (op->flags & FCOP_FLAGS_AEN) {
nvme_complete_async_event(&queue->ctrl->ctrl, status, &result);
- __nvme_fc_fcpop_chk_teardowns(ctrl, op);
+ __nvme_fc_fcpop_chk_teardowns(ctrl, op, opstate);
atomic_set(&op->state, FCPOP_STATE_IDLE);
op->flags = FCOP_FLAGS_AEN; /* clear other flags */
nvme_fc_ctrl_put(ctrl);
if (status &&
(blk_queue_dying(rq->q) ||
ctrl->ctrl.state == NVME_CTRL_NEW ||
- ctrl->ctrl.state == NVME_CTRL_RECONNECTING))
+ ctrl->ctrl.state == NVME_CTRL_CONNECTING))
status |= cpu_to_le16(NVME_SC_DNR << 1);
- if (__nvme_fc_fcpop_chk_teardowns(ctrl, op))
- __nvme_fc_final_op_cleanup(rq);
- else
- nvme_end_request(rq, status, result);
+ __nvme_fc_fcpop_chk_teardowns(ctrl, op, opstate);
+ nvme_end_request(rq, status, result);
check_error:
if (terminate_assoc)
}
static void
-__nvme_fc_final_op_cleanup(struct request *rq)
+nvme_fc_complete_rq(struct request *rq)
{
struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq);
struct nvme_fc_ctrl *ctrl = op->ctrl;
atomic_set(&op->state, FCPOP_STATE_IDLE);
- op->flags &= ~(FCOP_FLAGS_TERMIO | FCOP_FLAGS_RELEASED |
- FCOP_FLAGS_COMPLETE);
nvme_fc_unmap_data(ctrl, rq, op);
nvme_complete_rq(rq);
nvme_fc_ctrl_put(ctrl);
-
-}
-
-static void
-nvme_fc_complete_rq(struct request *rq)
-{
- struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq);
- struct nvme_fc_ctrl *ctrl = op->ctrl;
- unsigned long flags;
- bool completed = false;
-
- /*
- * the core layer, on controller resets after calling
- * nvme_shutdown_ctrl(), calls complete_rq without our
- * calling blk_mq_complete_request(), thus there may still
- * be live i/o outstanding with the LLDD. Means transport has
- * to track complete calls vs fcpio_done calls to know what
- * path to take on completes and dones.
- */
- spin_lock_irqsave(&ctrl->lock, flags);
- if (op->flags & FCOP_FLAGS_COMPLETE)
- completed = true;
- else
- op->flags |= FCOP_FLAGS_RELEASED;
- spin_unlock_irqrestore(&ctrl->lock, flags);
-
- if (completed)
- __nvme_fc_final_op_cleanup(rq);
}
/*
struct nvme_ctrl *nctrl = data;
struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl);
struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(req);
- unsigned long flags;
- int status;
if (!blk_mq_request_started(req))
return;
- spin_lock_irqsave(&ctrl->lock, flags);
- if (ctrl->flags & FCCTRL_TERMIO) {
- ctrl->iocnt++;
- op->flags |= FCOP_FLAGS_TERMIO;
- }
- spin_unlock_irqrestore(&ctrl->lock, flags);
-
- status = __nvme_fc_abort_op(ctrl, op);
- if (status) {
- /*
- * if __nvme_fc_abort_op failed the io wasn't
- * active. Thus this call path is running in
- * parallel to the io complete. Treat as non-error.
- */
-
- /* back out the flags/counters */
- spin_lock_irqsave(&ctrl->lock, flags);
- if (ctrl->flags & FCCTRL_TERMIO)
- ctrl->iocnt--;
- op->flags &= ~FCOP_FLAGS_TERMIO;
- spin_unlock_irqrestore(&ctrl->lock, flags);
- return;
- }
+ __nvme_fc_abort_op(ctrl, op);
}
unsigned long recon_delay = ctrl->ctrl.opts->reconnect_delay * HZ;
bool recon = true;
- if (ctrl->ctrl.state != NVME_CTRL_RECONNECTING)
+ if (ctrl->ctrl.state != NVME_CTRL_CONNECTING)
return;
if (portptr->port_state == FC_OBJSTATE_ONLINE)
/* will block will waiting for io to terminate */
nvme_fc_delete_association(ctrl);
- if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RECONNECTING)) {
+ if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) {
dev_err(ctrl->ctrl.device,
"NVME-FC{%d}: error_recovery: Couldn't change state "
- "to RECONNECTING\n", ctrl->cnum);
+ "to CONNECTING\n", ctrl->cnum);
return;
}
* transport errors (frame drop, LS failure) inherently must kill
* the association. The transport is coded so that any command used
* to create the association (prior to a LIVE state transition
- * while NEW or RECONNECTING) will fail if it completes in error or
+ * while NEW or CONNECTING) will fail if it completes in error or
* times out.
*
* As such: as the connect request was mostly likely due to a
NVME_CTRL_LIVE,
NVME_CTRL_ADMIN_ONLY, /* Only admin queue live */
NVME_CTRL_RESETTING,
- NVME_CTRL_RECONNECTING,
+ NVME_CTRL_CONNECTING,
NVME_CTRL_DELETING,
NVME_CTRL_DEAD,
};
struct work_struct scan_work;
struct work_struct async_event_work;
struct delayed_work ka_work;
+ struct nvme_command ka_cmd;
struct work_struct fw_act_work;
/* Power saving configuration */
/* If there is a reset/reinit ongoing, we shouldn't reset again. */
switch (dev->ctrl.state) {
case NVME_CTRL_RESETTING:
- case NVME_CTRL_RECONNECTING:
+ case NVME_CTRL_CONNECTING:
return false;
default:
break;
* cancellation error. All outstanding requests are completed on
* shutdown, so we return BLK_EH_HANDLED.
*/
- if (dev->ctrl.state == NVME_CTRL_RESETTING) {
+ switch (dev->ctrl.state) {
+ case NVME_CTRL_CONNECTING:
+ case NVME_CTRL_RESETTING:
dev_warn(dev->ctrl.device,
"I/O %d QID %d timeout, disable controller\n",
req->tag, nvmeq->qid);
nvme_dev_disable(dev, false);
nvme_req(req)->flags |= NVME_REQ_CANCELLED;
return BLK_EH_HANDLED;
+ default:
+ break;
}
/*
static int nvme_alloc_sq_cmds(struct nvme_dev *dev, struct nvme_queue *nvmeq,
int qid, int depth)
{
- if (qid && dev->cmb && use_cmb_sqes && (dev->cmbsz & NVME_CMBSZ_SQS)) {
- unsigned offset = (qid - 1) * roundup(SQ_SIZE(depth),
- dev->ctrl.page_size);
- nvmeq->sq_dma_addr = dev->cmb_bus_addr + offset;
- nvmeq->sq_cmds_io = dev->cmb + offset;
- } else {
- nvmeq->sq_cmds = dma_alloc_coherent(dev->dev, SQ_SIZE(depth),
- &nvmeq->sq_dma_addr, GFP_KERNEL);
- if (!nvmeq->sq_cmds)
- return -ENOMEM;
- }
+ /* CMB SQEs will be mapped before creation */
+ if (qid && dev->cmb && use_cmb_sqes && (dev->cmbsz & NVME_CMBSZ_SQS))
+ return 0;
+ nvmeq->sq_cmds = dma_alloc_coherent(dev->dev, SQ_SIZE(depth),
+ &nvmeq->sq_dma_addr, GFP_KERNEL);
+ if (!nvmeq->sq_cmds)
+ return -ENOMEM;
return 0;
}
struct nvme_dev *dev = nvmeq->dev;
int result;
+ if (dev->cmb && use_cmb_sqes && (dev->cmbsz & NVME_CMBSZ_SQS)) {
+ unsigned offset = (qid - 1) * roundup(SQ_SIZE(nvmeq->q_depth),
+ dev->ctrl.page_size);
+ nvmeq->sq_dma_addr = dev->cmb_bus_addr + offset;
+ nvmeq->sq_cmds_io = dev->cmb + offset;
+ }
+
nvmeq->cq_vector = qid - 1;
result = adapter_alloc_cq(dev, qid, nvmeq);
if (result < 0)
nvme_dev_disable(dev, false);
/*
- * Introduce RECONNECTING state from nvme-fc/rdma transports to mark the
+ * Introduce CONNECTING state from nvme-fc/rdma transports to mark the
* initializing procedure here.
*/
- if (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_RECONNECTING)) {
+ if (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_CONNECTING)) {
dev_warn(dev->ctrl.device,
- "failed to mark controller RECONNECTING\n");
+ "failed to mark controller CONNECTING\n");
goto out;
}
static void nvme_rdma_reconnect_or_remove(struct nvme_rdma_ctrl *ctrl)
{
/* If we are resetting/deleting then do nothing */
- if (ctrl->ctrl.state != NVME_CTRL_RECONNECTING) {
+ if (ctrl->ctrl.state != NVME_CTRL_CONNECTING) {
WARN_ON_ONCE(ctrl->ctrl.state == NVME_CTRL_NEW ||
ctrl->ctrl.state == NVME_CTRL_LIVE);
return;
blk_mq_unquiesce_queue(ctrl->ctrl.admin_q);
nvme_start_queues(&ctrl->ctrl);
- if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RECONNECTING)) {
+ if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) {
/* state change failure should never happen */
WARN_ON_ONCE(1);
return;
nvme_stop_ctrl(&ctrl->ctrl);
nvme_rdma_shutdown_ctrl(ctrl, false);
- if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RECONNECTING)) {
+ if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) {
/* state change failure should never happen */
WARN_ON_ONCE(1);
return;
return;
out_fail:
- dev_warn(ctrl->ctrl.device, "Removing after reset failure\n");
- nvme_remove_namespaces(&ctrl->ctrl);
- nvme_rdma_shutdown_ctrl(ctrl, true);
- nvme_uninit_ctrl(&ctrl->ctrl);
- nvme_put_ctrl(&ctrl->ctrl);
+ ++ctrl->ctrl.nr_reconnects;
+ nvme_rdma_reconnect_or_remove(ctrl);
}
static const struct nvme_ctrl_ops nvme_rdma_ctrl_ops = {
if (!ctrl->queues)
goto out_uninit_ctrl;
+ changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING);
+ WARN_ON_ONCE(!changed);
+
ret = nvme_rdma_configure_admin_queue(ctrl, true);
if (ret)
goto out_kfree_queues;
static u16 nvmet_discard_range(struct nvmet_ns *ns,
struct nvme_dsm_range *range, struct bio **bio)
{
- if (__blkdev_issue_discard(ns->bdev,
+ int ret;
+
+ ret = __blkdev_issue_discard(ns->bdev,
le64_to_cpu(range->slba) << (ns->blksize_shift - 9),
le32_to_cpu(range->nlb) << (ns->blksize_shift - 9),
- GFP_KERNEL, 0, bio))
+ GFP_KERNEL, 0, bio);
+ if (ret && ret != -EOPNOTSUPP)
return NVME_SC_INTERNAL | NVME_SC_DNR;
return 0;
}
return 0;
}
-static void *
+static const void *
of_fwnode_device_get_match_data(const struct fwnode_handle *fwnode,
const struct device *dev)
{
- return (void *)of_device_get_match_data(dev);
+ return of_device_get_match_data(dev);
}
const struct fwnode_operations of_fwnode_ops = {
if (max_opps <= 0)
return max_opps ? max_opps : -ENODATA;
- freq_table = kcalloc((max_opps + 1), sizeof(*freq_table), GFP_ATOMIC);
+ freq_table = kcalloc((max_opps + 1), sizeof(*freq_table), GFP_KERNEL);
if (!freq_table)
return -ENOMEM;
static void quirk_chelsio_extend_vpd(struct pci_dev *dev)
{
- pci_set_vpd_size(dev, 8192);
-}
-
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_CHELSIO, 0x20, quirk_chelsio_extend_vpd);
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_CHELSIO, 0x21, quirk_chelsio_extend_vpd);
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_CHELSIO, 0x22, quirk_chelsio_extend_vpd);
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_CHELSIO, 0x23, quirk_chelsio_extend_vpd);
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_CHELSIO, 0x24, quirk_chelsio_extend_vpd);
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_CHELSIO, 0x25, quirk_chelsio_extend_vpd);
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_CHELSIO, 0x26, quirk_chelsio_extend_vpd);
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_CHELSIO, 0x30, quirk_chelsio_extend_vpd);
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_CHELSIO, 0x31, quirk_chelsio_extend_vpd);
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_CHELSIO, 0x32, quirk_chelsio_extend_vpd);
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_CHELSIO, 0x35, quirk_chelsio_extend_vpd);
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_CHELSIO, 0x36, quirk_chelsio_extend_vpd);
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_CHELSIO, 0x37, quirk_chelsio_extend_vpd);
+ int chip = (dev->device & 0xf000) >> 12;
+ int func = (dev->device & 0x0f00) >> 8;
+ int prod = (dev->device & 0x00ff) >> 0;
+
+ /*
+ * If this is a T3-based adapter, there's a 1KB VPD area at offset
+ * 0xc00 which contains the preferred VPD values. If this is a T4 or
+ * later based adapter, the special VPD is at offset 0x400 for the
+ * Physical Functions (the SR-IOV Virtual Functions have no VPD
+ * Capabilities). The PCI VPD Access core routines will normally
+ * compute the size of the VPD by parsing the VPD Data Structure at
+ * offset 0x000. This will result in silent failures when attempting
+ * to accesses these other VPD areas which are beyond those computed
+ * limits.
+ */
+ if (chip == 0x0 && prod >= 0x20)
+ pci_set_vpd_size(dev, 8192);
+ else if (chip >= 0x4 && func < 0x8)
+ pci_set_vpd_size(dev, 2048);
+}
+
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_CHELSIO, PCI_ANY_ID,
+ quirk_chelsio_extend_vpd);
#ifdef CONFIG_ACPI
/*
#include <linux/export.h>
#include <linux/kernel.h>
#include <linux/perf/arm_pmu.h>
-#include <linux/platform_device.h>
#include <linux/slab.h>
#include <linux/sched/clock.h>
#include <linux/spinlock.h>
#include <asm/irq_regs.h>
+static DEFINE_PER_CPU(struct arm_pmu *, cpu_armpmu);
+static DEFINE_PER_CPU(int, cpu_irq);
+
static int
armpmu_map_cache_event(const unsigned (*cache_map)
[PERF_COUNT_HW_CACHE_MAX]
return 0;
}
-static struct arm_pmu_platdata *armpmu_get_platdata(struct arm_pmu *armpmu)
-{
- struct platform_device *pdev = armpmu->plat_device;
-
- return pdev ? dev_get_platdata(&pdev->dev) : NULL;
-}
-
static irqreturn_t armpmu_dispatch_irq(int irq, void *dev)
{
struct arm_pmu *armpmu;
- struct arm_pmu_platdata *plat;
int ret;
u64 start_clock, finish_clock;
* dereference.
*/
armpmu = *(void **)dev;
-
- plat = armpmu_get_platdata(armpmu);
+ if (WARN_ON_ONCE(!armpmu))
+ return IRQ_NONE;
start_clock = sched_clock();
- if (plat && plat->handle_irq)
- ret = plat->handle_irq(irq, armpmu, armpmu->handle_irq);
- else
- ret = armpmu->handle_irq(irq, armpmu);
+ ret = armpmu->handle_irq(irq, armpmu);
finish_clock = sched_clock();
perf_sample_event_took(finish_clock - start_clock);
}
EXPORT_SYMBOL_GPL(perf_num_counters);
-void armpmu_free_irq(struct arm_pmu *armpmu, int cpu)
+static int armpmu_count_irq_users(const int irq)
{
- struct pmu_hw_events __percpu *hw_events = armpmu->hw_events;
- int irq = per_cpu(hw_events->irq, cpu);
+ int cpu, count = 0;
- if (!cpumask_test_and_clear_cpu(cpu, &armpmu->active_irqs))
- return;
-
- if (irq_is_percpu_devid(irq)) {
- free_percpu_irq(irq, &hw_events->percpu_pmu);
- cpumask_clear(&armpmu->active_irqs);
- return;
+ for_each_possible_cpu(cpu) {
+ if (per_cpu(cpu_irq, cpu) == irq)
+ count++;
}
- free_irq(irq, per_cpu_ptr(&hw_events->percpu_pmu, cpu));
+ return count;
}
-void armpmu_free_irqs(struct arm_pmu *armpmu)
+void armpmu_free_irq(int irq, int cpu)
{
- int cpu;
+ if (per_cpu(cpu_irq, cpu) == 0)
+ return;
+ if (WARN_ON(irq != per_cpu(cpu_irq, cpu)))
+ return;
+
+ if (!irq_is_percpu_devid(irq))
+ free_irq(irq, per_cpu_ptr(&cpu_armpmu, cpu));
+ else if (armpmu_count_irq_users(irq) == 1)
+ free_percpu_irq(irq, &cpu_armpmu);
- for_each_cpu(cpu, &armpmu->supported_cpus)
- armpmu_free_irq(armpmu, cpu);
+ per_cpu(cpu_irq, cpu) = 0;
}
-int armpmu_request_irq(struct arm_pmu *armpmu, int cpu)
+int armpmu_request_irq(int irq, int cpu)
{
int err = 0;
- struct pmu_hw_events __percpu *hw_events = armpmu->hw_events;
const irq_handler_t handler = armpmu_dispatch_irq;
- int irq = per_cpu(hw_events->irq, cpu);
if (!irq)
return 0;
- if (irq_is_percpu_devid(irq) && cpumask_empty(&armpmu->active_irqs)) {
- err = request_percpu_irq(irq, handler, "arm-pmu",
- &hw_events->percpu_pmu);
- } else if (irq_is_percpu_devid(irq)) {
- int other_cpu = cpumask_first(&armpmu->active_irqs);
- int other_irq = per_cpu(hw_events->irq, other_cpu);
-
- if (irq != other_irq) {
- pr_warn("mismatched PPIs detected.\n");
- err = -EINVAL;
- goto err_out;
- }
- } else {
- struct arm_pmu_platdata *platdata = armpmu_get_platdata(armpmu);
+ if (!irq_is_percpu_devid(irq)) {
unsigned long irq_flags;
err = irq_force_affinity(irq, cpumask_of(cpu));
goto err_out;
}
- if (platdata && platdata->irq_flags) {
- irq_flags = platdata->irq_flags;
- } else {
- irq_flags = IRQF_PERCPU |
- IRQF_NOBALANCING |
- IRQF_NO_THREAD;
- }
+ irq_flags = IRQF_PERCPU |
+ IRQF_NOBALANCING |
+ IRQF_NO_THREAD;
+ irq_set_status_flags(irq, IRQ_NOAUTOEN);
err = request_irq(irq, handler, irq_flags, "arm-pmu",
- per_cpu_ptr(&hw_events->percpu_pmu, cpu));
+ per_cpu_ptr(&cpu_armpmu, cpu));
+ } else if (armpmu_count_irq_users(irq) == 0) {
+ err = request_percpu_irq(irq, handler, "arm-pmu",
+ &cpu_armpmu);
}
if (err)
goto err_out;
- cpumask_set_cpu(cpu, &armpmu->active_irqs);
+ per_cpu(cpu_irq, cpu) = irq;
return 0;
err_out:
return err;
}
-int armpmu_request_irqs(struct arm_pmu *armpmu)
-{
- int cpu, err;
-
- for_each_cpu(cpu, &armpmu->supported_cpus) {
- err = armpmu_request_irq(armpmu, cpu);
- if (err)
- break;
- }
-
- return err;
-}
-
static int armpmu_get_cpu_irq(struct arm_pmu *pmu, int cpu)
{
struct pmu_hw_events __percpu *hw_events = pmu->hw_events;
if (pmu->reset)
pmu->reset(pmu);
+ per_cpu(cpu_armpmu, cpu) = pmu;
+
irq = armpmu_get_cpu_irq(pmu, cpu);
if (irq) {
- if (irq_is_percpu_devid(irq)) {
+ if (irq_is_percpu_devid(irq))
enable_percpu_irq(irq, IRQ_TYPE_NONE);
- return 0;
- }
+ else
+ enable_irq(irq);
}
return 0;
return 0;
irq = armpmu_get_cpu_irq(pmu, cpu);
- if (irq && irq_is_percpu_devid(irq))
- disable_percpu_irq(irq);
+ if (irq) {
+ if (irq_is_percpu_devid(irq))
+ disable_percpu_irq(irq);
+ else
+ disable_irq(irq);
+ }
+
+ per_cpu(cpu_armpmu, cpu) = NULL;
return 0;
}
&cpu_pmu->node);
}
-struct arm_pmu *armpmu_alloc(void)
+static struct arm_pmu *__armpmu_alloc(gfp_t flags)
{
struct arm_pmu *pmu;
int cpu;
- pmu = kzalloc(sizeof(*pmu), GFP_KERNEL);
+ pmu = kzalloc(sizeof(*pmu), flags);
if (!pmu) {
pr_info("failed to allocate PMU device!\n");
goto out;
}
- pmu->hw_events = alloc_percpu(struct pmu_hw_events);
+ pmu->hw_events = alloc_percpu_gfp(struct pmu_hw_events, flags);
if (!pmu->hw_events) {
pr_info("failed to allocate per-cpu PMU data.\n");
goto out_free_pmu;
return NULL;
}
+struct arm_pmu *armpmu_alloc(void)
+{
+ return __armpmu_alloc(GFP_KERNEL);
+}
+
+struct arm_pmu *armpmu_alloc_atomic(void)
+{
+ return __armpmu_alloc(GFP_ATOMIC);
+}
+
+
void armpmu_free(struct arm_pmu *pmu)
{
free_percpu(pmu->hw_events);
#include <linux/acpi.h>
#include <linux/cpumask.h>
#include <linux/init.h>
+#include <linux/irq.h>
+#include <linux/irqdesc.h>
#include <linux/percpu.h>
#include <linux/perf/arm_pmu.h>
pr_warn("No ACPI PMU IRQ for CPU%d\n", cpu);
}
+ /*
+ * Log and request the IRQ so the core arm_pmu code can manage
+ * it. We'll have to sanity-check IRQs later when we associate
+ * them with their PMUs.
+ */
per_cpu(pmu_irqs, cpu) = irq;
+ armpmu_request_irq(irq, cpu);
}
return 0;
return pmu;
}
- pmu = armpmu_alloc();
+ pmu = armpmu_alloc_atomic();
if (!pmu) {
pr_warn("Unable to allocate PMU for CPU%d\n",
smp_processor_id());
return pmu;
}
+/*
+ * Check whether the new IRQ is compatible with those already associated with
+ * the PMU (e.g. we don't have mismatched PPIs).
+ */
+static bool pmu_irq_matches(struct arm_pmu *pmu, int irq)
+{
+ struct pmu_hw_events __percpu *hw_events = pmu->hw_events;
+ int cpu;
+
+ if (!irq)
+ return true;
+
+ for_each_cpu(cpu, &pmu->supported_cpus) {
+ int other_irq = per_cpu(hw_events->irq, cpu);
+ if (!other_irq)
+ continue;
+
+ if (irq == other_irq)
+ continue;
+ if (!irq_is_percpu_devid(irq) && !irq_is_percpu_devid(other_irq))
+ continue;
+
+ pr_warn("mismatched PPIs detected\n");
+ return false;
+ }
+
+ return true;
+}
+
/*
* This must run before the common arm_pmu hotplug logic, so that we can
* associate a CPU and its interrupt before the common code tries to manage the
if (!pmu)
return -ENOMEM;
- cpumask_set_cpu(cpu, &pmu->supported_cpus);
-
per_cpu(probed_pmus, cpu) = pmu;
- /*
- * Log and request the IRQ so the core arm_pmu code can manage it. In
- * some situations (e.g. mismatched PPIs), we may fail to request the
- * IRQ. However, it may be too late for us to do anything about it.
- * The common ARM PMU code will log a warning in this case.
- */
- hw_events = pmu->hw_events;
- per_cpu(hw_events->irq, cpu) = irq;
- armpmu_request_irq(pmu, cpu);
+ if (pmu_irq_matches(pmu, irq)) {
+ hw_events = pmu->hw_events;
+ per_cpu(hw_events->irq, cpu) = irq;
+ }
+
+ cpumask_set_cpu(cpu, &pmu->supported_cpus);
/*
* Ideally, we'd probe the PMU here when we find the first matching
if (acpi_disabled)
return 0;
- /*
- * We can't request IRQs yet, since we don't know the cookie value
- * until we know which CPUs share the same logical PMU. We'll handle
- * that in arm_pmu_acpi_cpu_starting().
- */
ret = arm_pmu_acpi_parse_irqs();
if (ret)
return ret;
pdev->dev.of_node);
}
- /*
- * Some platforms have all PMU IRQs OR'd into a single IRQ, with a
- * special platdata function that attempts to demux them.
- */
- if (dev_get_platdata(&pdev->dev))
- cpumask_setall(&pmu->supported_cpus);
-
for (i = 0; i < num_irqs; i++) {
int cpu, irq;
return 0;
}
+static int armpmu_request_irqs(struct arm_pmu *armpmu)
+{
+ struct pmu_hw_events __percpu *hw_events = armpmu->hw_events;
+ int cpu, err;
+
+ for_each_cpu(cpu, &armpmu->supported_cpus) {
+ int irq = per_cpu(hw_events->irq, cpu);
+ if (!irq)
+ continue;
+
+ err = armpmu_request_irq(irq, cpu);
+ if (err)
+ break;
+ }
+
+ return err;
+}
+
+static void armpmu_free_irqs(struct arm_pmu *armpmu)
+{
+ int cpu;
+ struct pmu_hw_events __percpu *hw_events = armpmu->hw_events;
+
+ for_each_cpu(cpu, &armpmu->supported_cpus) {
+ int irq = per_cpu(hw_events->irq, cpu);
+
+ armpmu_free_irq(irq, cpu);
+ }
+}
+
int arm_pmu_device_probe(struct platform_device *pdev,
const struct of_device_id *of_table,
const struct pmu_probe_info *probe_table)
DMI_MATCH(DMI_CHASSIS_TYPE, "32"), /*Detachable*/
},
},
- {
- .matches = {
- DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
- DMI_MATCH(DMI_CHASSIS_TYPE, "30"), /*Tablet*/
- },
- },
- {
- .matches = {
- DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
- DMI_MATCH(DMI_CHASSIS_TYPE, "31"), /*Convertible*/
- },
- },
- {
- .matches = {
- DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
- DMI_MATCH(DMI_CHASSIS_TYPE, "32"), /*Detachable*/
- },
- },
{
.ident = "Dell Computer Corporation",
.matches = {
struct calling_interface_buffer buffer;
int ret;
- dell_fill_request(&buffer, 0, 0, 0, 0);
+ dell_fill_request(&buffer, 0x1, 0, 0, 0);
ret = dell_send_request(&buffer,
CLASS_KBD_BACKLIGHT, SELECT_KBD_BACKLIGHT);
if (ret)
/*
* ACPI Helpers
*/
-#define IDEAPAD_EC_TIMEOUT (100) /* in ms */
+#define IDEAPAD_EC_TIMEOUT (200) /* in ms */
static int read_method_int(acpi_handle handle, const char *method, int *val)
{
goto probe_failure;
}
- buf = kmalloc(strlen(wdriver->driver.name) + 4, GFP_KERNEL);
+ buf = kmalloc(strlen(wdriver->driver.name) + 5, GFP_KERNEL);
if (!buf) {
ret = -ENOMEM;
goto probe_string_failure;
vcdev->device_lost = true;
rc = NOTIFY_DONE;
break;
+ case CIO_OPER:
+ rc = NOTIFY_OK;
+ break;
default:
rc = NOTIFY_DONE;
break;
{},
};
+#ifdef CONFIG_PM_SLEEP
+static int virtio_ccw_freeze(struct ccw_device *cdev)
+{
+ struct virtio_ccw_device *vcdev = dev_get_drvdata(&cdev->dev);
+
+ return virtio_device_freeze(&vcdev->vdev);
+}
+
+static int virtio_ccw_restore(struct ccw_device *cdev)
+{
+ struct virtio_ccw_device *vcdev = dev_get_drvdata(&cdev->dev);
+ int ret;
+
+ ret = virtio_ccw_set_transport_rev(vcdev);
+ if (ret)
+ return ret;
+
+ return virtio_device_restore(&vcdev->vdev);
+}
+#endif
+
static struct ccw_driver virtio_ccw_driver = {
.driver = {
.owner = THIS_MODULE,
.set_online = virtio_ccw_online,
.notify = virtio_ccw_cio_notify,
.int_class = IRQIO_VIR,
+#ifdef CONFIG_PM_SLEEP
+ .freeze = virtio_ccw_freeze,
+ .thaw = virtio_ccw_restore,
+ .restore = virtio_ccw_restore,
+#endif
};
static int __init pure_hex(char **cp, unsigned int *val, int min_digit,
CFLAGS_ncr53c8xx.o := $(ncr53c8xx-flags-y) $(ncr53c8xx-flags-m)
zalon7xx-objs := zalon.o ncr53c8xx.o
NCR_Q720_mod-objs := NCR_Q720.o ncr53c8xx.o
-oktagon_esp_mod-objs := oktagon_esp.o oktagon_io.o
# Files generated that shall be removed upon make clean
clean-files := 53c700_d.h 53c700_u.h
* Map in the registers from the adapter.
*/
aac->base_size = AAC_MIN_FOOTPRINT_SIZE;
- if ((*aac_drivers[index].init)(aac))
+ if ((*aac_drivers[index].init)(aac)) {
+ error = -ENODEV;
goto out_unmap;
+ }
if (aac->sync_mode) {
if (aac_sync_mode)
+++ /dev/null
-/*
- * Implementation of Utility functions for all SCSI device types.
- *
- * Copyright (c) 1997, 1998, 1999 Justin T. Gibbs.
- * Copyright (c) 1997, 1998 Kenneth D. Merry.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions, and the following disclaimer,
- * without modification, immediately at the beginning of the file.
- * 2. The name of the author may not be used to endorse or promote products
- * derived from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
- * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * $FreeBSD: src/sys/cam/scsi/scsi_all.c,v 1.38 2002/09/23 04:56:35 mjacob Exp $
- * $Id$
- */
-
-#include "aiclib.h"
-
/* we will not receive ABTS response for this IO */
BNX2FC_IO_DBG(io_req, "Timer context finished processing "
"this scsi cmd\n");
+ return;
}
/* Cancel the timeout_work, as we received IO completion */
static struct csio_lnode *
csio_ln_lookup_by_portid(struct csio_hw *hw, uint8_t portid)
{
- struct csio_lnode *ln = hw->rln;
+ struct csio_lnode *ln;
struct list_head *tmp;
/* Match siblings lnode with portid */
/**
* alua_rtpg_queue() - cause RTPG to be submitted asynchronously
+ * @pg: ALUA port group associated with @sdev.
+ * @sdev: SCSI device for which to submit an RTPG.
+ * @qdata: Information about the callback to invoke after the RTPG.
+ * @force: Whether or not to submit an RTPG if a work item that will submit an
+ * RTPG already has been scheduled.
*
* Returns true if and only if alua_rtpg_work() will be called asynchronously.
* That function is responsible for calling @qdata->fn().
};
struct ibmvfc_fcp_rsp_info {
- __be16 reserved;
+ u8 reserved[3];
u8 rsp_code;
u8 reserved2[4];
}__attribute__((packed, aligned (2)));
struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
struct iscsi_sw_tcp_conn *tcp_sw_conn = tcp_conn->dd_data;
struct sockaddr_in6 addr;
- int rc, len;
+ int rc;
switch(param) {
case ISCSI_PARAM_CONN_PORT:
}
if (param == ISCSI_PARAM_LOCAL_PORT)
rc = kernel_getsockname(tcp_sw_conn->sock,
- (struct sockaddr *)&addr, &len);
+ (struct sockaddr *)&addr);
else
rc = kernel_getpeername(tcp_sw_conn->sock,
- (struct sockaddr *)&addr, &len);
+ (struct sockaddr *)&addr);
spin_unlock_bh(&conn->session->frwd_lock);
- if (rc)
+ if (rc < 0)
return rc;
return iscsi_conn_get_addr_param((struct sockaddr_storage *)
struct iscsi_tcp_conn *tcp_conn;
struct iscsi_sw_tcp_conn *tcp_sw_conn;
struct sockaddr_in6 addr;
- int rc, len;
+ int rc;
switch (param) {
case ISCSI_HOST_PARAM_IPADDRESS:
}
rc = kernel_getsockname(tcp_sw_conn->sock,
- (struct sockaddr *)&addr, &len);
+ (struct sockaddr *)&addr);
spin_unlock_bh(&session->frwd_lock);
- if (rc)
+ if (rc < 0)
return rc;
return iscsi_conn_get_addr_param((struct sockaddr_storage *)
continue;
}
- for_each_cpu(cpu, mask)
+ for_each_cpu_and(cpu, mask, cpu_online_mask) {
+ if (cpu >= ioc->cpu_msix_table_sz)
+ break;
ioc->cpu_msix_table[cpu] = reply_q->msix_index;
+ }
}
return;
}
{
struct qedi_ctx *qedi = data;
struct nvm_iscsi_initiator *initiator;
- char *str = buf;
int rc = 1;
u32 ipv6_en, dhcp_en, ip_len;
struct nvm_iscsi_block *block;
switch (type) {
case ISCSI_BOOT_ETH_IP_ADDR:
- rc = snprintf(str, ip_len, fmt, ip);
+ rc = snprintf(buf, ip_len, fmt, ip);
break;
case ISCSI_BOOT_ETH_SUBNET_MASK:
- rc = snprintf(str, ip_len, fmt, sub);
+ rc = snprintf(buf, ip_len, fmt, sub);
break;
case ISCSI_BOOT_ETH_GATEWAY:
- rc = snprintf(str, ip_len, fmt, gw);
+ rc = snprintf(buf, ip_len, fmt, gw);
break;
case ISCSI_BOOT_ETH_FLAGS:
- rc = snprintf(str, 3, "%hhd\n",
+ rc = snprintf(buf, 3, "%hhd\n",
SYSFS_FLAG_FW_SEL_BOOT);
break;
case ISCSI_BOOT_ETH_INDEX:
- rc = snprintf(str, 3, "0\n");
+ rc = snprintf(buf, 3, "0\n");
break;
case ISCSI_BOOT_ETH_MAC:
- rc = sysfs_format_mac(str, qedi->mac, ETH_ALEN);
+ rc = sysfs_format_mac(buf, qedi->mac, ETH_ALEN);
break;
case ISCSI_BOOT_ETH_VLAN:
- rc = snprintf(str, 12, "%d\n",
+ rc = snprintf(buf, 12, "%d\n",
GET_FIELD2(initiator->generic_cont0,
NVM_ISCSI_CFG_INITIATOR_VLAN));
break;
case ISCSI_BOOT_ETH_ORIGIN:
if (dhcp_en)
- rc = snprintf(str, 3, "3\n");
+ rc = snprintf(buf, 3, "3\n");
break;
default:
rc = 0;
{
struct qedi_ctx *qedi = data;
struct nvm_iscsi_initiator *initiator;
- char *str = buf;
int rc;
struct nvm_iscsi_block *block;
switch (type) {
case ISCSI_BOOT_INI_INITIATOR_NAME:
- rc = snprintf(str, NVM_ISCSI_CFG_ISCSI_NAME_MAX_LEN, "%s\n",
- initiator->initiator_name.byte);
+ rc = sprintf(buf, "%.*s\n", NVM_ISCSI_CFG_ISCSI_NAME_MAX_LEN,
+ initiator->initiator_name.byte);
break;
default:
rc = 0;
qedi_show_boot_tgt_info(struct qedi_ctx *qedi, int type,
char *buf, enum qedi_nvm_tgts idx)
{
- char *str = buf;
int rc = 1;
u32 ctrl_flags, ipv6_en, chap_en, mchap_en, ip_len;
struct nvm_iscsi_block *block;
switch (type) {
case ISCSI_BOOT_TGT_NAME:
- rc = snprintf(str, NVM_ISCSI_CFG_ISCSI_NAME_MAX_LEN, "%s\n",
- block->target[idx].target_name.byte);
+ rc = sprintf(buf, "%.*s\n", NVM_ISCSI_CFG_ISCSI_NAME_MAX_LEN,
+ block->target[idx].target_name.byte);
break;
case ISCSI_BOOT_TGT_IP_ADDR:
if (ipv6_en)
- rc = snprintf(str, ip_len, "%pI6\n",
+ rc = snprintf(buf, ip_len, "%pI6\n",
block->target[idx].ipv6_addr.byte);
else
- rc = snprintf(str, ip_len, "%pI4\n",
+ rc = snprintf(buf, ip_len, "%pI4\n",
block->target[idx].ipv4_addr.byte);
break;
case ISCSI_BOOT_TGT_PORT:
- rc = snprintf(str, 12, "%d\n",
+ rc = snprintf(buf, 12, "%d\n",
GET_FIELD2(block->target[idx].generic_cont0,
NVM_ISCSI_CFG_TARGET_TCP_PORT));
break;
case ISCSI_BOOT_TGT_LUN:
- rc = snprintf(str, 22, "%.*d\n",
+ rc = snprintf(buf, 22, "%.*d\n",
block->target[idx].lun.value[1],
block->target[idx].lun.value[0]);
break;
case ISCSI_BOOT_TGT_CHAP_NAME:
- rc = snprintf(str, NVM_ISCSI_CFG_CHAP_NAME_MAX_LEN, "%s\n",
- chap_name);
+ rc = sprintf(buf, "%.*s\n", NVM_ISCSI_CFG_CHAP_NAME_MAX_LEN,
+ chap_name);
break;
case ISCSI_BOOT_TGT_CHAP_SECRET:
- rc = snprintf(str, NVM_ISCSI_CFG_CHAP_PWD_MAX_LEN, "%s\n",
- chap_secret);
+ rc = sprintf(buf, "%.*s\n", NVM_ISCSI_CFG_CHAP_NAME_MAX_LEN,
+ chap_secret);
break;
case ISCSI_BOOT_TGT_REV_CHAP_NAME:
- rc = snprintf(str, NVM_ISCSI_CFG_CHAP_NAME_MAX_LEN, "%s\n",
- mchap_name);
+ rc = sprintf(buf, "%.*s\n", NVM_ISCSI_CFG_CHAP_NAME_MAX_LEN,
+ mchap_name);
break;
case ISCSI_BOOT_TGT_REV_CHAP_SECRET:
- rc = snprintf(str, NVM_ISCSI_CFG_CHAP_PWD_MAX_LEN, "%s\n",
- mchap_secret);
+ rc = sprintf(buf, "%.*s\n", NVM_ISCSI_CFG_CHAP_NAME_MAX_LEN,
+ mchap_secret);
break;
case ISCSI_BOOT_TGT_FLAGS:
- rc = snprintf(str, 3, "%hhd\n", SYSFS_FLAG_FW_SEL_BOOT);
+ rc = snprintf(buf, 3, "%hhd\n", SYSFS_FLAG_FW_SEL_BOOT);
break;
case ISCSI_BOOT_TGT_NIC_ASSOC:
- rc = snprintf(str, 3, "0\n");
+ rc = snprintf(buf, 3, "0\n");
break;
default:
rc = 0;
req->outstanding_cmds[sp->handle] = NULL;
iocb = &sp->u.iocb_cmd;
iocb->timeout(sp);
- if (sp->type != SRB_ELS_DCMD)
- sp->free(sp);
spin_unlock_irqrestore(&vha->hw->hardware_lock, flags);
}
srb_t *sp = data;
fc_port_t *fcport = sp->fcport;
struct srb_iocb *lio = &sp->u.iocb_cmd;
- struct event_arg ea;
if (fcport) {
ql_dbg(ql_dbg_disc, fcport->vha, 0x2071,
switch (sp->type) {
case SRB_LOGIN_CMD:
- if (!fcport)
- break;
/* Retry as needed. */
lio->u.logio.data[0] = MBS_COMMAND_ERROR;
lio->u.logio.data[1] = lio->u.logio.flags & SRB_LOGIN_RETRIED ?
QLA_LOGIO_LOGIN_RETRIED : 0;
- memset(&ea, 0, sizeof(ea));
- ea.event = FCME_PLOGI_DONE;
- ea.fcport = sp->fcport;
- ea.data[0] = lio->u.logio.data[0];
- ea.data[1] = lio->u.logio.data[1];
- ea.sp = sp;
- qla24xx_handle_plogi_done_event(fcport->vha, &ea);
+ sp->done(sp, QLA_FUNCTION_TIMEOUT);
break;
case SRB_LOGOUT_CMD:
- if (!fcport)
- break;
- qlt_logo_completion_handler(fcport, QLA_FUNCTION_TIMEOUT);
- break;
case SRB_CT_PTHRU_CMD:
case SRB_MB_IOCB:
case SRB_NACK_PLOGI:
qla2x00_async_logout_sp_done(void *ptr, int res)
{
srb_t *sp = ptr;
- struct srb_iocb *lio = &sp->u.iocb_cmd;
sp->fcport->flags &= ~(FCF_ASYNC_SENT | FCF_ASYNC_ACTIVE);
- if (!test_bit(UNLOADING, &sp->vha->dpc_flags))
- qla2x00_post_async_logout_done_work(sp->vha, sp->fcport,
- lio->u.logio.data);
+ sp->fcport->login_gen++;
+ qlt_logo_completion_handler(sp->fcport, res);
sp->free(sp);
}
memset(abt_iocb, 0, sizeof(struct abort_entry_24xx));
abt_iocb->entry_type = ABORT_IOCB_TYPE;
abt_iocb->entry_count = 1;
- abt_iocb->handle =
- cpu_to_le32(MAKE_HANDLE(aio->u.abt.req_que_no,
- aio->u.abt.cmd_hndl));
+ abt_iocb->handle = cpu_to_le32(MAKE_HANDLE(req->id, sp->handle));
abt_iocb->nport_handle = cpu_to_le16(sp->fcport->loop_id);
abt_iocb->handle_to_abort =
- cpu_to_le32(MAKE_HANDLE(req->id, aio->u.abt.cmd_hndl));
+ cpu_to_le32(MAKE_HANDLE(aio->u.abt.req_que_no,
+ aio->u.abt.cmd_hndl));
abt_iocb->port_id[0] = sp->fcport->d_id.b.al_pa;
abt_iocb->port_id[1] = sp->fcport->d_id.b.area;
abt_iocb->port_id[2] = sp->fcport->d_id.b.domain;
struct device_reg_2xxx __iomem *reg = &ha->iobase->isp;
/* Read all mbox registers? */
- mboxes = (1 << ha->mbx_count) - 1;
+ WARN_ON_ONCE(ha->mbx_count > 32);
+ mboxes = (1ULL << ha->mbx_count) - 1;
if (!ha->mcp)
ql_dbg(ql_dbg_async, vha, 0x5001, "MBX pointer ERROR.\n");
else
struct device_reg_24xx __iomem *reg = &ha->iobase->isp24;
/* Read all mbox registers? */
- mboxes = (1 << ha->mbx_count) - 1;
+ WARN_ON_ONCE(ha->mbx_count > 32);
+ mboxes = (1ULL << ha->mbx_count) - 1;
if (!ha->mcp)
ql_dbg(ql_dbg_async, vha, 0x504e, "MBX pointer ERROR.\n");
else
}
qla2x00_wait_for_hba_ready(base_vha);
+ qla2x00_wait_for_sess_deletion(base_vha);
+
/*
* if UNLOAD flag is already set, then continue unload,
* where it was set first.
sess);
qlt_send_term_imm_notif(vha, iocb, 1);
res = 0;
- spin_lock_irqsave(&tgt->ha->tgt.sess_lock,
- flags);
break;
}
#define DEV_DB_NON_PERSISTENT 0
#define DEV_DB_PERSISTENT 1
+#define QL4_ISP_REG_DISCONNECT 0xffffffffU
+
#define COPY_ISID(dst_isid, src_isid) { \
int i, j; \
for (i = 0, j = ISID_SIZE - 1; i < ISID_SIZE;) \
static struct scsi_transport_template *qla4xxx_scsi_transport;
+static int qla4xxx_isp_check_reg(struct scsi_qla_host *ha)
+{
+ u32 reg_val = 0;
+ int rval = QLA_SUCCESS;
+
+ if (is_qla8022(ha))
+ reg_val = readl(&ha->qla4_82xx_reg->host_status);
+ else if (is_qla8032(ha) || is_qla8042(ha))
+ reg_val = qla4_8xxx_rd_direct(ha, QLA8XXX_PEG_ALIVE_COUNTER);
+ else
+ reg_val = readw(&ha->reg->ctrl_status);
+
+ if (reg_val == QL4_ISP_REG_DISCONNECT)
+ rval = QLA_ERROR;
+
+ return rval;
+}
+
static int qla4xxx_send_ping(struct Scsi_Host *shost, uint32_t iface_num,
uint32_t iface_type, uint32_t payload_size,
uint32_t pid, struct sockaddr *dst_addr)
struct srb *srb = NULL;
int ret = SUCCESS;
int wait = 0;
+ int rval;
ql4_printk(KERN_INFO, ha, "scsi%ld:%d:%llu: Abort command issued cmd=%p, cdb=0x%x\n",
ha->host_no, id, lun, cmd, cmd->cmnd[0]);
+ rval = qla4xxx_isp_check_reg(ha);
+ if (rval != QLA_SUCCESS) {
+ ql4_printk(KERN_INFO, ha, "PCI/Register disconnect, exiting.\n");
+ return FAILED;
+ }
+
spin_lock_irqsave(&ha->hardware_lock, flags);
srb = (struct srb *) CMD_SP(cmd);
if (!srb) {
struct scsi_qla_host *ha = to_qla_host(cmd->device->host);
struct ddb_entry *ddb_entry = cmd->device->hostdata;
int ret = FAILED, stat;
+ int rval;
if (!ddb_entry)
return ret;
cmd, jiffies, cmd->request->timeout / HZ,
ha->dpc_flags, cmd->result, cmd->allowed));
+ rval = qla4xxx_isp_check_reg(ha);
+ if (rval != QLA_SUCCESS) {
+ ql4_printk(KERN_INFO, ha, "PCI/Register disconnect, exiting.\n");
+ return FAILED;
+ }
+
/* FIXME: wait for hba to go online */
stat = qla4xxx_reset_lun(ha, ddb_entry, cmd->device->lun);
if (stat != QLA_SUCCESS) {
struct scsi_qla_host *ha = to_qla_host(cmd->device->host);
struct ddb_entry *ddb_entry = cmd->device->hostdata;
int stat, ret;
+ int rval;
if (!ddb_entry)
return FAILED;
ha->host_no, cmd, jiffies, cmd->request->timeout / HZ,
ha->dpc_flags, cmd->result, cmd->allowed));
+ rval = qla4xxx_isp_check_reg(ha);
+ if (rval != QLA_SUCCESS) {
+ ql4_printk(KERN_INFO, ha, "PCI/Register disconnect, exiting.\n");
+ return FAILED;
+ }
+
stat = qla4xxx_reset_target(ha, ddb_entry);
if (stat != QLA_SUCCESS) {
starget_printk(KERN_INFO, scsi_target(cmd->device),
{
int return_status = FAILED;
struct scsi_qla_host *ha;
+ int rval;
ha = to_qla_host(cmd->device->host);
+ rval = qla4xxx_isp_check_reg(ha);
+ if (rval != QLA_SUCCESS) {
+ ql4_printk(KERN_INFO, ha, "PCI/Register disconnect, exiting.\n");
+ return FAILED;
+ }
+
if ((is_qla8032(ha) || is_qla8042(ha)) && ql4xdontresethba)
qla4_83xx_set_idc_dontreset(ha);
.eh_timed_out = storvsc_eh_timed_out,
.slave_alloc = storvsc_device_alloc,
.slave_configure = storvsc_device_configure,
- .cmd_per_lun = 255,
+ .cmd_per_lun = 2048,
.this_id = -1,
.use_clustering = ENABLE_CLUSTERING,
/* Make sure we dont get a sg segment crosses a page boundary */
* Look for the greatest clock divisor that allows an
* input speed faster than the period.
*/
- while (div-- > 0)
+ while (--div > 0)
if (kpc >= (div_10M[div] << 2)) break;
/*
/* REPORT SUPPORTED OPERATION CODES is not supported */
sdev->no_report_opcodes = 1;
+ /* WRITE_SAME command is not supported */
+ sdev->no_write_same = 1;
ufshcd_set_queue_depth(sdev);
struct sockaddr_qrtr *sq)
{
struct socket *sock;
- int sl = sizeof(*sq);
int ret;
ret = sock_create_kern(&init_net, AF_QIPCRTR, SOCK_DGRAM,
if (ret < 0)
return ERR_PTR(ret);
- ret = kernel_getsockname(sock, (struct sockaddr *)sq, &sl);
+ ret = kernel_getsockname(sock, (struct sockaddr *)sq);
if (ret < 0) {
sock_release(sock);
return ERR_PTR(ret);
size_t pgstart, pgend;
int ret = -EINVAL;
+ mutex_lock(&ashmem_mutex);
+
if (unlikely(!asma->file))
- return -EINVAL;
+ goto out_unlock;
- if (unlikely(copy_from_user(&pin, p, sizeof(pin))))
- return -EFAULT;
+ if (unlikely(copy_from_user(&pin, p, sizeof(pin)))) {
+ ret = -EFAULT;
+ goto out_unlock;
+ }
/* per custom, you can pass zero for len to mean "everything onward" */
if (!pin.len)
pin.len = PAGE_ALIGN(asma->size) - pin.offset;
if (unlikely((pin.offset | pin.len) & ~PAGE_MASK))
- return -EINVAL;
+ goto out_unlock;
if (unlikely(((__u32)-1) - pin.offset < pin.len))
- return -EINVAL;
+ goto out_unlock;
if (unlikely(PAGE_ALIGN(asma->size) < pin.offset + pin.len))
- return -EINVAL;
+ goto out_unlock;
pgstart = pin.offset / PAGE_SIZE;
pgend = pgstart + (pin.len / PAGE_SIZE) - 1;
- mutex_lock(&ashmem_mutex);
-
switch (cmd) {
case ASHMEM_PIN:
ret = ashmem_pin(asma, pgstart, pgend);
break;
}
+out_unlock:
mutex_unlock(&ashmem_mutex);
return ret;
#include <linux/err.h>
#include <linux/cma.h>
#include <linux/scatterlist.h>
+#include <linux/highmem.h>
#include "ion.h"
if (!pages)
return -ENOMEM;
+ if (PageHighMem(pages)) {
+ unsigned long nr_clear_pages = nr_pages;
+ struct page *page = pages;
+
+ while (nr_clear_pages > 0) {
+ void *vaddr = kmap_atomic(page);
+
+ memset(vaddr, 0, PAGE_SIZE);
+ kunmap_atomic(vaddr);
+ page++;
+ nr_clear_pages--;
+ }
+ } else {
+ memset(page_address(pages), 0, size);
+ }
+
table = kmalloc(sizeof(*table), GFP_KERNEL);
if (!table)
goto err;
config FSL_MC_BUS
bool "QorIQ DPAA2 fsl-mc bus driver"
- depends on OF && (ARCH_LAYERSCAPE || (COMPILE_TEST && (ARM || ARM64 || X86 || PPC)))
+ depends on OF && (ARCH_LAYERSCAPE || (COMPILE_TEST && (ARM || ARM64 || X86_LOCAL_APIC || PPC)))
select GENERIC_MSI_IRQ_DOMAIN
help
Driver to enable the bus infrastructure for the QorIQ DPAA2
for (np = of_find_matching_node(NULL, its_device_id); np;
np = of_find_matching_node(np, its_device_id)) {
+ if (!of_device_is_available(np))
+ continue;
if (!of_property_read_bool(np, "msi-controller"))
continue;
#define AD7192_GPOCON_P1DAT BIT(1) /* P1 state */
#define AD7192_GPOCON_P0DAT BIT(0) /* P0 state */
+#define AD7192_EXT_FREQ_MHZ_MIN 2457600
+#define AD7192_EXT_FREQ_MHZ_MAX 5120000
#define AD7192_INT_FREQ_MHZ 4915200
/* NOTE:
ARRAY_SIZE(ad7192_calib_arr));
}
+static inline bool ad7192_valid_external_frequency(u32 freq)
+{
+ return (freq >= AD7192_EXT_FREQ_MHZ_MIN &&
+ freq <= AD7192_EXT_FREQ_MHZ_MAX);
+}
+
static int ad7192_setup(struct ad7192_state *st,
const struct ad7192_platform_data *pdata)
{
id);
switch (pdata->clock_source_sel) {
- case AD7192_CLK_EXT_MCLK1_2:
- case AD7192_CLK_EXT_MCLK2:
- st->mclk = AD7192_INT_FREQ_MHZ;
- break;
case AD7192_CLK_INT:
case AD7192_CLK_INT_CO:
- if (pdata->ext_clk_hz)
- st->mclk = pdata->ext_clk_hz;
- else
- st->mclk = AD7192_INT_FREQ_MHZ;
+ st->mclk = AD7192_INT_FREQ_MHZ;
break;
+ case AD7192_CLK_EXT_MCLK1_2:
+ case AD7192_CLK_EXT_MCLK2:
+ if (ad7192_valid_external_frequency(pdata->ext_clk_hz)) {
+ st->mclk = pdata->ext_clk_hz;
+ break;
+ }
+ dev_err(&st->sd.spi->dev, "Invalid frequency setting %u\n",
+ pdata->ext_clk_hz);
+ ret = -EINVAL;
+ goto out;
default:
ret = -EINVAL;
goto out;
/* Ring buffer functions - here trigger setup related */
indio_dev->setup_ops = &ad5933_ring_setup_ops;
- indio_dev->modes |= INDIO_BUFFER_HARDWARE;
-
return 0;
}
indio_dev->dev.parent = &client->dev;
indio_dev->info = &ad5933_info;
indio_dev->name = id->name;
- indio_dev->modes = INDIO_DIRECT_MODE;
+ indio_dev->modes = (INDIO_BUFFER_SOFTWARE | INDIO_DIRECT_MODE);
indio_dev->channels = ad5933_channels;
indio_dev->num_channels = ARRAY_SIZE(ad5933_channels);
static int ipx_getname(struct socket *sock, struct sockaddr *uaddr,
- int *uaddr_len, int peer)
+ int peer)
{
struct ipx_address *addr;
struct sockaddr_ipx sipx;
struct ipx_sock *ipxs = ipx_sk(sk);
int rc;
- *uaddr_len = sizeof(struct sockaddr_ipx);
-
lock_sock(sk);
if (peer) {
rc = -ENOTCONN;
sipx.sipx_zero = 0;
memcpy(uaddr, &sipx, sizeof(sipx));
- rc = 0;
+ rc = sizeof(struct sockaddr_ipx);
out:
release_sock(sk);
return rc;
*
*/
static int irda_getname(struct socket *sock, struct sockaddr *uaddr,
- int *uaddr_len, int peer)
+ int peer)
{
struct sockaddr_irda saddr;
struct sock *sk = sock->sk;
pr_debug("%s(), tsap_sel = %#x\n", __func__, saddr.sir_lsap_sel);
pr_debug("%s(), addr = %08x\n", __func__, saddr.sir_addr);
- /* uaddr_len come to us uninitialised */
- *uaddr_len = sizeof (struct sockaddr_irda);
- memcpy(uaddr, &saddr, *uaddr_len);
+ memcpy(uaddr, &saddr, sizeof (struct sockaddr_irda));
- return 0;
+ return sizeof (struct sockaddr_irda);
}
/*
lnet_sock_getaddr(struct socket *sock, bool remote, __u32 *ip, int *port)
{
struct sockaddr_in sin;
- int len = sizeof(sin);
int rc;
if (remote)
- rc = kernel_getpeername(sock, (struct sockaddr *)&sin, &len);
+ rc = kernel_getpeername(sock, (struct sockaddr *)&sin);
else
- rc = kernel_getsockname(sock, (struct sockaddr *)&sin, &len);
- if (rc) {
+ rc = kernel_getsockname(sock, (struct sockaddr *)&sin);
+ if (rc < 0) {
CERROR("Error %d getting sock %s IP/port\n",
rc, remote ? "peer" : "local");
return rc;
struct socket *new_sock, *sock = np->np_socket;
struct sockaddr_in sock_in;
struct sockaddr_in6 sock_in6;
- int rc, err;
+ int rc;
rc = kernel_accept(sock, &new_sock, 0);
if (rc < 0)
memset(&sock_in6, 0, sizeof(struct sockaddr_in6));
rc = conn->sock->ops->getname(conn->sock,
- (struct sockaddr *)&sock_in6, &err, 1);
- if (!rc) {
+ (struct sockaddr *)&sock_in6, 1);
+ if (rc >= 0) {
if (!ipv6_addr_v4mapped(&sock_in6.sin6_addr)) {
memcpy(&conn->login_sockaddr, &sock_in6, sizeof(sock_in6));
} else {
}
rc = conn->sock->ops->getname(conn->sock,
- (struct sockaddr *)&sock_in6, &err, 0);
- if (!rc) {
+ (struct sockaddr *)&sock_in6, 0);
+ if (rc >= 0) {
if (!ipv6_addr_v4mapped(&sock_in6.sin6_addr)) {
memcpy(&conn->local_sockaddr, &sock_in6, sizeof(sock_in6));
} else {
memset(&sock_in, 0, sizeof(struct sockaddr_in));
rc = conn->sock->ops->getname(conn->sock,
- (struct sockaddr *)&sock_in, &err, 1);
- if (!rc)
+ (struct sockaddr *)&sock_in, 1);
+ if (rc >= 0)
memcpy(&conn->login_sockaddr, &sock_in, sizeof(sock_in));
rc = conn->sock->ops->getname(conn->sock,
- (struct sockaddr *)&sock_in, &err, 0);
- if (!rc)
+ (struct sockaddr *)&sock_in, 0);
+ if (rc >= 0)
memcpy(&conn->local_sockaddr, &sock_in, sizeof(sock_in));
}
config USB_EHCI_BIG_ENDIAN_DESC
bool
+config USB_UHCI_BIG_ENDIAN_MMIO
+ bool
+
+config USB_UHCI_BIG_ENDIAN_DESC
+ bool
+
menuconfig USB_SUPPORT
bool "USB support"
depends on HAS_IOMEM
wb = &acm->wb[wbn];
if (!wb->use) {
wb->use = 1;
+ wb->len = 0;
return wbn;
}
wbn = (wbn + 1) % ACM_NW;
static void acm_tty_flush_chars(struct tty_struct *tty)
{
struct acm *acm = tty->driver_data;
- struct acm_wb *cur = acm->putbuffer;
+ struct acm_wb *cur;
int err;
unsigned long flags;
+ spin_lock_irqsave(&acm->write_lock, flags);
+
+ cur = acm->putbuffer;
if (!cur) /* nothing to do */
- return;
+ goto out;
acm->putbuffer = NULL;
err = usb_autopm_get_interface_async(acm->control);
- spin_lock_irqsave(&acm->write_lock, flags);
if (err < 0) {
cur->use = 0;
acm->putbuffer = cur;
{ USB_DEVICE(0x1a0a, 0x0200), .driver_info =
USB_QUIRK_LINEAR_UFRAME_INTR_BINTERVAL },
+ /* Corsair K70 RGB */
+ { USB_DEVICE(0x1b1c, 0x1b13), .driver_info = USB_QUIRK_DELAY_INIT },
+
/* Corsair Strafe RGB */
{ USB_DEVICE(0x1b1c, 0x1b20), .driver_info = USB_QUIRK_DELAY_INIT },
/* Not specific buffer needed for ep0 ZLP */
dma_addr_t dma = hs_ep->desc_list_dma;
- dwc2_gadget_set_ep0_desc_chain(hsotg, hs_ep);
+ if (!index)
+ dwc2_gadget_set_ep0_desc_chain(hsotg, hs_ep);
+
dwc2_gadget_config_nonisoc_xfer_ddma(hs_ep, dma, 0);
} else {
dwc2_writel(DXEPTSIZ_MC(1) | DXEPTSIZ_PKTCNT(1) |
if (ints & DXEPINT_STSPHSERCVD) {
dev_dbg(hsotg->dev, "%s: StsPhseRcvd\n", __func__);
- /* Move to STATUS IN for DDMA */
- if (using_desc_dma(hsotg))
- dwc2_hsotg_ep0_zlp(hsotg, true);
+ /* Safety check EP0 state when STSPHSERCVD asserted */
+ if (hsotg->ep0_state == DWC2_EP0_DATA_OUT) {
+ /* Move to STATUS IN for DDMA */
+ if (using_desc_dma(hsotg))
+ dwc2_hsotg_ep0_zlp(hsotg, true);
+ }
+
}
if (ints & DXEPINT_BACK2BACKSETUP)
dwc2_writel(dwc2_hsotg_ep0_mps(hsotg->eps_out[0]->ep.maxpacket) |
DXEPCTL_USBACTEP, hsotg->regs + DIEPCTL0);
- dwc2_hsotg_enqueue_setup(hsotg);
-
- dev_dbg(hsotg->dev, "EP0: DIEPCTL0=0x%08x, DOEPCTL0=0x%08x\n",
- dwc2_readl(hsotg->regs + DIEPCTL0),
- dwc2_readl(hsotg->regs + DOEPCTL0));
-
/* clear global NAKs */
val = DCTL_CGOUTNAK | DCTL_CGNPINNAK;
if (!is_usb_reset)
mdelay(3);
hsotg->lx_state = DWC2_L0;
+
+ dwc2_hsotg_enqueue_setup(hsotg);
+
+ dev_dbg(hsotg->dev, "EP0: DIEPCTL0=0x%08x, DOEPCTL0=0x%08x\n",
+ dwc2_readl(hsotg->regs + DIEPCTL0),
+ dwc2_readl(hsotg->regs + DOEPCTL0));
}
static void dwc2_hsotg_core_disconnect(struct dwc2_hsotg *hsotg)
reg &= ~(DWC3_GCTL_PRTCAPDIR(DWC3_GCTL_PRTCAP_OTG));
reg |= DWC3_GCTL_PRTCAPDIR(mode);
dwc3_writel(dwc->regs, DWC3_GCTL, reg);
+
+ dwc->current_dr_role = mode;
}
static void __dwc3_set_mode(struct work_struct *work)
dwc3_set_prtcap(dwc, dwc->desired_dr_role);
- dwc->current_dr_role = dwc->desired_dr_role;
-
spin_unlock_irqrestore(&dwc->lock, flags);
switch (dwc->desired_dr_role) {
* XHCI driver will reset the host block. If dwc3 was configured for
* host-only mode, then we can return early.
*/
- if (dwc->dr_mode == USB_DR_MODE_HOST)
+ if (dwc->current_dr_role == DWC3_GCTL_PRTCAP_HOST)
return 0;
reg = dwc3_readl(dwc->regs, DWC3_DCTL);
udelay(1);
} while (--retries);
+ phy_exit(dwc->usb3_generic_phy);
+ phy_exit(dwc->usb2_generic_phy);
+
return -ETIMEDOUT;
}
parms->hwparams8 = dwc3_readl(dwc->regs, DWC3_GHWPARAMS8);
}
+static int dwc3_core_ulpi_init(struct dwc3 *dwc)
+{
+ int intf;
+ int ret = 0;
+
+ intf = DWC3_GHWPARAMS3_HSPHY_IFC(dwc->hwparams.hwparams3);
+
+ if (intf == DWC3_GHWPARAMS3_HSPHY_IFC_ULPI ||
+ (intf == DWC3_GHWPARAMS3_HSPHY_IFC_UTMI_ULPI &&
+ dwc->hsphy_interface &&
+ !strncmp(dwc->hsphy_interface, "ulpi", 4)))
+ ret = dwc3_ulpi_init(dwc);
+
+ return ret;
+}
+
/**
* dwc3_phy_setup - Configure USB PHY Interface of DWC3 Core
* @dwc: Pointer to our controller context structure
static int dwc3_phy_setup(struct dwc3 *dwc)
{
u32 reg;
- int ret;
reg = dwc3_readl(dwc->regs, DWC3_GUSB3PIPECTL(0));
}
/* FALLTHROUGH */
case DWC3_GHWPARAMS3_HSPHY_IFC_ULPI:
- ret = dwc3_ulpi_init(dwc);
- if (ret)
- return ret;
/* FALLTHROUGH */
default:
break;
}
static int dwc3_core_get_phy(struct dwc3 *dwc);
+static int dwc3_core_ulpi_init(struct dwc3 *dwc);
/**
* dwc3_core_init - Low-level initialization of DWC3 Core
dwc->maximum_speed = USB_SPEED_HIGH;
}
- ret = dwc3_core_get_phy(dwc);
+ ret = dwc3_phy_setup(dwc);
if (ret)
goto err0;
- ret = dwc3_core_soft_reset(dwc);
- if (ret)
- goto err0;
+ if (!dwc->ulpi_ready) {
+ ret = dwc3_core_ulpi_init(dwc);
+ if (ret)
+ goto err0;
+ dwc->ulpi_ready = true;
+ }
- ret = dwc3_phy_setup(dwc);
+ if (!dwc->phys_ready) {
+ ret = dwc3_core_get_phy(dwc);
+ if (ret)
+ goto err0a;
+ dwc->phys_ready = true;
+ }
+
+ ret = dwc3_core_soft_reset(dwc);
if (ret)
- goto err0;
+ goto err0a;
dwc3_core_setup_global_control(dwc);
dwc3_core_num_eps(dwc);
phy_exit(dwc->usb2_generic_phy);
phy_exit(dwc->usb3_generic_phy);
+err0a:
+ dwc3_ulpi_exit(dwc);
+
err0:
return ret;
}
switch (dwc->dr_mode) {
case USB_DR_MODE_PERIPHERAL:
- dwc->current_dr_role = DWC3_GCTL_PRTCAP_DEVICE;
dwc3_set_prtcap(dwc, DWC3_GCTL_PRTCAP_DEVICE);
if (dwc->usb2_phy)
}
break;
case USB_DR_MODE_HOST:
- dwc->current_dr_role = DWC3_GCTL_PRTCAP_HOST;
dwc3_set_prtcap(dwc, DWC3_GCTL_PRTCAP_HOST);
if (dwc->usb2_phy)
err3:
dwc3_free_event_buffers(dwc);
- dwc3_ulpi_exit(dwc);
err2:
pm_runtime_allow(&pdev->dev);
}
#ifdef CONFIG_PM
-static int dwc3_suspend_common(struct dwc3 *dwc)
+static int dwc3_suspend_common(struct dwc3 *dwc, pm_message_t msg)
{
unsigned long flags;
dwc3_core_exit(dwc);
break;
case DWC3_GCTL_PRTCAP_HOST:
+ /* do nothing during host runtime_suspend */
+ if (!PMSG_IS_AUTO(msg))
+ dwc3_core_exit(dwc);
+ break;
default:
/* do nothing */
break;
return 0;
}
-static int dwc3_resume_common(struct dwc3 *dwc)
+static int dwc3_resume_common(struct dwc3 *dwc, pm_message_t msg)
{
unsigned long flags;
int ret;
spin_unlock_irqrestore(&dwc->lock, flags);
break;
case DWC3_GCTL_PRTCAP_HOST:
+ /* nothing to do on host runtime_resume */
+ if (!PMSG_IS_AUTO(msg)) {
+ ret = dwc3_core_init(dwc);
+ if (ret)
+ return ret;
+ }
+ break;
default:
/* do nothing */
break;
static int dwc3_runtime_checks(struct dwc3 *dwc)
{
switch (dwc->current_dr_role) {
- case USB_DR_MODE_PERIPHERAL:
- case USB_DR_MODE_OTG:
+ case DWC3_GCTL_PRTCAP_DEVICE:
if (dwc->connected)
return -EBUSY;
break;
- case USB_DR_MODE_HOST:
+ case DWC3_GCTL_PRTCAP_HOST:
default:
/* do nothing */
break;
if (dwc3_runtime_checks(dwc))
return -EBUSY;
- ret = dwc3_suspend_common(dwc);
+ ret = dwc3_suspend_common(dwc, PMSG_AUTO_SUSPEND);
if (ret)
return ret;
device_init_wakeup(dev, false);
- ret = dwc3_resume_common(dwc);
+ ret = dwc3_resume_common(dwc, PMSG_AUTO_RESUME);
if (ret)
return ret;
struct dwc3 *dwc = dev_get_drvdata(dev);
int ret;
- ret = dwc3_suspend_common(dwc);
+ ret = dwc3_suspend_common(dwc, PMSG_SUSPEND);
if (ret)
return ret;
pinctrl_pm_select_default_state(dev);
- ret = dwc3_resume_common(dwc);
+ ret = dwc3_resume_common(dwc, PMSG_RESUME);
if (ret)
return ret;
#define DWC3_GDBGFIFOSPACE_TYPE(n) (((n) << 5) & 0x1e0)
#define DWC3_GDBGFIFOSPACE_SPACE_AVAILABLE(n) (((n) >> 16) & 0xffff)
-#define DWC3_TXFIFOQ 1
-#define DWC3_RXFIFOQ 3
-#define DWC3_TXREQQ 5
-#define DWC3_RXREQQ 7
-#define DWC3_RXINFOQ 9
-#define DWC3_DESCFETCHQ 13
-#define DWC3_EVENTQ 15
+#define DWC3_TXFIFOQ 0
+#define DWC3_RXFIFOQ 1
+#define DWC3_TXREQQ 2
+#define DWC3_RXREQQ 3
+#define DWC3_RXINFOQ 4
+#define DWC3_PSTATQ 5
+#define DWC3_DESCFETCHQ 6
+#define DWC3_EVENTQ 7
+#define DWC3_AUXEVENTQ 8
/* Global RX Threshold Configuration Register */
#define DWC3_GRXTHRCFG_MAXRXBURSTSIZE(n) (((n) & 0x1f) << 19)
* @usb3_phy: pointer to USB3 PHY
* @usb2_generic_phy: pointer to USB2 PHY
* @usb3_generic_phy: pointer to USB3 PHY
+ * @phys_ready: flag to indicate that PHYs are ready
* @ulpi: pointer to ulpi interface
+ * @ulpi_ready: flag to indicate that ULPI is initialized
* @u2sel: parameter from Set SEL request.
* @u2pel: parameter from Set SEL request.
* @u1sel: parameter from Set SEL request.
struct phy *usb2_generic_phy;
struct phy *usb3_generic_phy;
+ bool phys_ready;
+
struct ulpi *ulpi;
+ bool ulpi_ready;
void __iomem *regs;
size_t regs_size;
clk_disable_unprepare(simple->clks[i]);
clk_put(simple->clks[i]);
}
+ simple->num_clocks = 0;
reset_control_assert(simple->resets);
reset_control_put(simple->resets);
return 0;
}
+static void dwc3_omap_complete(struct device *dev)
+{
+ struct dwc3_omap *omap = dev_get_drvdata(dev);
+
+ if (extcon_get_state(omap->edev, EXTCON_USB))
+ dwc3_omap_set_mailbox(omap, OMAP_DWC3_VBUS_VALID);
+ else
+ dwc3_omap_set_mailbox(omap, OMAP_DWC3_VBUS_OFF);
+
+ if (extcon_get_state(omap->edev, EXTCON_USB_HOST))
+ dwc3_omap_set_mailbox(omap, OMAP_DWC3_ID_GROUND);
+ else
+ dwc3_omap_set_mailbox(omap, OMAP_DWC3_ID_FLOAT);
+}
+
static const struct dev_pm_ops dwc3_omap_dev_pm_ops = {
SET_SYSTEM_SLEEP_PM_OPS(dwc3_omap_suspend, dwc3_omap_resume)
+ .complete = dwc3_omap_complete,
};
#define DEV_PM_OPS (&dwc3_omap_dev_pm_ops)
trb++;
trb->ctrl &= ~DWC3_TRB_CTRL_HWO;
trace_dwc3_complete_trb(ep0, trb);
- ep0->trb_enqueue = 0;
+
+ if (r->direction)
+ dwc->eps[1]->trb_enqueue = 0;
+ else
+ dwc->eps[0]->trb_enqueue = 0;
+
dwc->ep0_bounced = false;
}
break;
}
+ dwc->eps[1]->endpoint.maxpacket = dwc->gadget.ep0->maxpacket;
+
/* Enable USB2 LPM Capability */
if ((dwc->revision > DWC3_REVISION_194A) &&
spin_lock_irqsave(&func->ffs->eps_lock, flags);
while(count--) {
- struct usb_endpoint_descriptor *ds;
- struct usb_ss_ep_comp_descriptor *comp_desc = NULL;
- int needs_comp_desc = false;
- int desc_idx;
-
- if (ffs->gadget->speed == USB_SPEED_SUPER) {
- desc_idx = 2;
- needs_comp_desc = true;
- } else if (ffs->gadget->speed == USB_SPEED_HIGH)
- desc_idx = 1;
- else
- desc_idx = 0;
-
- /* fall-back to lower speed if desc missing for current speed */
- do {
- ds = ep->descs[desc_idx];
- } while (!ds && --desc_idx >= 0);
-
- if (!ds) {
- ret = -EINVAL;
- break;
- }
-
ep->ep->driver_data = ep;
- ep->ep->desc = ds;
- if (needs_comp_desc) {
- comp_desc = (struct usb_ss_ep_comp_descriptor *)(ds +
- USB_DT_ENDPOINT_SIZE);
- ep->ep->maxburst = comp_desc->bMaxBurst + 1;
- ep->ep->comp_desc = comp_desc;
+ ret = config_ep_by_speed(func->gadget, &func->function, ep->ep);
+ if (ret) {
+ pr_err("%s: config_ep_by_speed(%s) returned %d\n",
+ __func__, ep->ep->name, ret);
+ break;
}
ret = usb_ep_enable(ep->ep);
if (likely(!ret)) {
epfile->ep = ep;
- epfile->in = usb_endpoint_dir_in(ds);
- epfile->isoc = usb_endpoint_xfer_isoc(ds);
+ epfile->in = usb_endpoint_dir_in(ep->ep->desc);
+ epfile->isoc = usb_endpoint_xfer_isoc(ep->ep->desc);
} else {
break;
}
struct ffs_data *ffs = func->ffs;
const int full = !!func->ffs->fs_descs_count;
- const int high = gadget_is_dualspeed(func->gadget) &&
- func->ffs->hs_descs_count;
- const int super = gadget_is_superspeed(func->gadget) &&
- func->ffs->ss_descs_count;
+ const int high = !!func->ffs->hs_descs_count;
+ const int super = !!func->ffs->ss_descs_count;
int fs_len, hs_len, ss_len, ret, i;
struct ffs_ep *eps_ptr;
dev_err(dev, "%s:%d Error!\n", __func__, __LINE__);
return ret;
}
+ iad_desc.bFirstInterface = ret;
+
std_ac_if_desc.bInterfaceNumber = ret;
uac2->ac_intf = ret;
uac2->ac_alt = 0;
tristate "Synopsys USB 2.0 Device controller"
depends on USB_GADGET && OF && HAS_DMA
depends on EXTCON || EXTCON=n
- select USB_GADGET_DUALSPEED
select USB_SNP_CORE
default ARCH_BCM_IPROC
help
if (ret) {
dev_err(&pci->dev,
"couldn't add resources to bdc device\n");
+ platform_device_put(bdc);
return ret;
}
void usb_ep_free_request(struct usb_ep *ep,
struct usb_request *req)
{
- ep->ops->free_request(ep, req);
trace_usb_ep_free_request(ep, req, 0);
+ ep->ops->free_request(ep, req);
}
EXPORT_SYMBOL_GPL(usb_ep_free_request);
{
struct fsl_ep *ep = get_ep_by_pipe(udc, pipe);
- if (ep->name)
+ if (ep->ep.name)
nuke(ep, -ESHUTDOWN);
}
curr_ep = get_ep_by_pipe(udc, i);
/* If the ep is configured */
- if (curr_ep->name == NULL) {
+ if (!curr_ep->ep.name) {
WARNING("Invalid EP?");
continue;
}
__renesas_usb3_ep_free_request(usb3->ep0_req);
if (usb3->phy)
phy_put(usb3->phy);
- pm_runtime_disable(usb3_to_dev(usb3));
+ pm_runtime_disable(&pdev->dev);
return 0;
}
bool
default y if ARCH_ASPEED
-config USB_UHCI_BIG_ENDIAN_MMIO
- bool
- default y if SPARC_LEON
-
-config USB_UHCI_BIG_ENDIAN_DESC
- bool
- default y if SPARC_LEON
-
config USB_FHCI_HCD
tristate "Freescale QE USB Host Controller support"
depends on OF_GPIO && QE_GPIO && QUICC_ENGINE
atomic_inc(&urb->use_count);
atomic_inc(&urb->dev->urbnum);
urb->setup_dma = dma_map_single(
- hcd->self.controller,
+ hcd->self.sysdev,
urb->setup_packet,
sizeof(struct usb_ctrlrequest),
DMA_TO_DEVICE);
urb->transfer_dma = dma_map_single(
- hcd->self.controller,
+ hcd->self.sysdev,
urb->transfer_buffer,
urb->transfer_buffer_length,
DMA_FROM_DEVICE);
* 15 secs after the setup
*/
if (is_setup) {
- /* SETUP pid */
+ /* SETUP pid, and interrupt after SETUP completion */
qtd_fill(ehci, qtd, urb->setup_dma,
sizeof(struct usb_ctrlrequest),
- token | (2 /* "setup" */ << 8), 8);
+ QTD_IOC | token | (2 /* "setup" */ << 8), 8);
submit_async(ehci, urb, &qtd_list, GFP_ATOMIC);
return 0; /*Return now; we shall come back after 15 seconds*/
qtd_prev->hw_next = QTD_NEXT(ehci, qtd->qtd_dma);
list_add_tail(&qtd->qtd_list, head);
- /* dont fill any data in such packets */
- qtd_fill(ehci, qtd, 0, 0, token, 0);
-
- /* by default, enable interrupt on urb completion */
- if (likely(!(urb->transfer_flags & URB_NO_INTERRUPT)))
- qtd->hw_token |= cpu_to_hc32(ehci, QTD_IOC);
+ /* Interrupt after STATUS completion */
+ qtd_fill(ehci, qtd, 0, 0, token | QTD_IOC, 0);
submit_async(ehci, urb, &qtd_list, GFP_KERNEL);
#define STATECHANGE_DELAY msecs_to_jiffies(300)
#define IO_WATCHDOG_DELAY msecs_to_jiffies(275)
+#define IO_WATCHDOG_OFF 0xffffff00
#include "ohci.h"
#include "pci-quirks.h"
}
/* Start up the I/O watchdog timer, if it's not running */
- if (!timer_pending(&ohci->io_watchdog) &&
+ if (ohci->prev_frame_no == IO_WATCHDOG_OFF &&
list_empty(&ohci->eds_in_use) &&
!(ohci->flags & OHCI_QUIRK_QEMU)) {
ohci->prev_frame_no = ohci_frame_no(ohci);
return 0;
timer_setup(&ohci->io_watchdog, io_watchdog_func, 0);
+ ohci->prev_frame_no = IO_WATCHDOG_OFF;
ohci->hcca = dma_alloc_coherent (hcd->self.controller,
sizeof(*ohci->hcca), &ohci->hcca_dma, GFP_KERNEL);
u32 head;
struct ed *ed;
struct td *td, *td_start, *td_next;
- unsigned frame_no;
+ unsigned frame_no, prev_frame_no = IO_WATCHDOG_OFF;
unsigned long flags;
spin_lock_irqsave(&ohci->lock, flags);
}
}
if (!list_empty(&ohci->eds_in_use)) {
- ohci->prev_frame_no = frame_no;
+ prev_frame_no = frame_no;
ohci->prev_wdh_cnt = ohci->wdh_cnt;
ohci->prev_donehead = ohci_readl(ohci,
&ohci->regs->donehead);
}
done:
+ ohci->prev_frame_no = prev_frame_no;
spin_unlock_irqrestore(&ohci->lock, flags);
}
if (quirk_nec(ohci))
flush_work(&ohci->nec_work);
del_timer_sync(&ohci->io_watchdog);
+ ohci->prev_frame_no = IO_WATCHDOG_OFF;
ohci_writel (ohci, OHCI_INTR_MIE, &ohci->regs->intrdisable);
ohci_usb_reset(ohci);
rc = ohci_rh_suspend (ohci, 0);
spin_unlock_irq (&ohci->lock);
- if (rc == 0)
+ if (rc == 0) {
del_timer_sync(&ohci->io_watchdog);
+ ohci->prev_frame_no = IO_WATCHDOG_OFF;
+ }
return rc;
}
* have modified this list. normally it's just prepending
* entries (which we'd ignore), but paranoia won't hurt.
*/
+ *last = ed->ed_next;
+ ed->ed_next = NULL;
modified = 0;
/* unlink urbs as requested, but rescan the list after
goto rescan_this;
/*
- * If no TDs are queued, take ED off the ed_rm_list.
+ * If no TDs are queued, ED is now idle.
* Otherwise, if the HC is running, reschedule.
- * If not, leave it on the list for further dequeues.
+ * If the HC isn't running, add ED back to the
+ * start of the list for later processing.
*/
if (list_empty(&ed->td_list)) {
- *last = ed->ed_next;
- ed->ed_next = NULL;
ed->state = ED_IDLE;
list_del(&ed->in_use_list);
} else if (ohci->rh_state == OHCI_RH_RUNNING) {
- *last = ed->ed_next;
- ed->ed_next = NULL;
ed_schedule(ohci, ed);
} else {
- last = &ed->ed_next;
+ ed->ed_next = ohci->ed_rm_list;
+ ohci->ed_rm_list = ed;
+ /* Don't loop on the same ED */
+ if (last == &ohci->ed_rm_list)
+ last = &ed->ed_next;
}
if (modified)
#define AX_INDXC 0x30
#define AX_DATAC 0x34
+#define PT_ADDR_INDX 0xE8
+#define PT_READ_INDX 0xE4
+#define PT_SIG_1_ADDR 0xA520
+#define PT_SIG_2_ADDR 0xA521
+#define PT_SIG_3_ADDR 0xA522
+#define PT_SIG_4_ADDR 0xA523
+#define PT_SIG_1_DATA 0x78
+#define PT_SIG_2_DATA 0x56
+#define PT_SIG_3_DATA 0x34
+#define PT_SIG_4_DATA 0x12
+#define PT4_P1_REG 0xB521
+#define PT4_P2_REG 0xB522
+#define PT2_P1_REG 0xD520
+#define PT2_P2_REG 0xD521
+#define PT1_P1_REG 0xD522
+#define PT1_P2_REG 0xD523
+
#define NB_PCIE_INDX_ADDR 0xe0
#define NB_PCIE_INDX_DATA 0xe4
#define PCIE_P_CNTL 0x10040
}
EXPORT_SYMBOL_GPL(usb_amd_dev_put);
+/*
+ * Check if port is disabled in BIOS on AMD Promontory host.
+ * BIOS Disabled ports may wake on connect/disconnect and need
+ * driver workaround to keep them disabled.
+ * Returns true if port is marked disabled.
+ */
+bool usb_amd_pt_check_port(struct device *device, int port)
+{
+ unsigned char value, port_shift;
+ struct pci_dev *pdev;
+ u16 reg;
+
+ pdev = to_pci_dev(device);
+ pci_write_config_word(pdev, PT_ADDR_INDX, PT_SIG_1_ADDR);
+
+ pci_read_config_byte(pdev, PT_READ_INDX, &value);
+ if (value != PT_SIG_1_DATA)
+ return false;
+
+ pci_write_config_word(pdev, PT_ADDR_INDX, PT_SIG_2_ADDR);
+
+ pci_read_config_byte(pdev, PT_READ_INDX, &value);
+ if (value != PT_SIG_2_DATA)
+ return false;
+
+ pci_write_config_word(pdev, PT_ADDR_INDX, PT_SIG_3_ADDR);
+
+ pci_read_config_byte(pdev, PT_READ_INDX, &value);
+ if (value != PT_SIG_3_DATA)
+ return false;
+
+ pci_write_config_word(pdev, PT_ADDR_INDX, PT_SIG_4_ADDR);
+
+ pci_read_config_byte(pdev, PT_READ_INDX, &value);
+ if (value != PT_SIG_4_DATA)
+ return false;
+
+ /* Check disabled port setting, if bit is set port is enabled */
+ switch (pdev->device) {
+ case 0x43b9:
+ case 0x43ba:
+ /*
+ * device is AMD_PROMONTORYA_4(0x43b9) or PROMONTORYA_3(0x43ba)
+ * PT4_P1_REG bits[7..1] represents USB2.0 ports 6 to 0
+ * PT4_P2_REG bits[6..0] represents ports 13 to 7
+ */
+ if (port > 6) {
+ reg = PT4_P2_REG;
+ port_shift = port - 7;
+ } else {
+ reg = PT4_P1_REG;
+ port_shift = port + 1;
+ }
+ break;
+ case 0x43bb:
+ /*
+ * device is AMD_PROMONTORYA_2(0x43bb)
+ * PT2_P1_REG bits[7..5] represents USB2.0 ports 2 to 0
+ * PT2_P2_REG bits[5..0] represents ports 9 to 3
+ */
+ if (port > 2) {
+ reg = PT2_P2_REG;
+ port_shift = port - 3;
+ } else {
+ reg = PT2_P1_REG;
+ port_shift = port + 5;
+ }
+ break;
+ case 0x43bc:
+ /*
+ * device is AMD_PROMONTORYA_1(0x43bc)
+ * PT1_P1_REG[7..4] represents USB2.0 ports 3 to 0
+ * PT1_P2_REG[5..0] represents ports 9 to 4
+ */
+ if (port > 3) {
+ reg = PT1_P2_REG;
+ port_shift = port - 4;
+ } else {
+ reg = PT1_P1_REG;
+ port_shift = port + 4;
+ }
+ break;
+ default:
+ return false;
+ }
+ pci_write_config_word(pdev, PT_ADDR_INDX, reg);
+ pci_read_config_byte(pdev, PT_READ_INDX, &value);
+
+ return !(value & BIT(port_shift));
+}
+EXPORT_SYMBOL_GPL(usb_amd_pt_check_port);
+
/*
* Make sure the controller is completely inactive, unable to
* generate interrupts or do DMA.
void usb_disable_xhci_ports(struct pci_dev *xhci_pdev);
void sb800_prefetch(struct device *dev, int on);
bool usb_xhci_needs_pci_reset(struct pci_dev *pdev);
+bool usb_amd_pt_check_port(struct device *device, int port);
#else
struct pci_dev;
static inline void usb_amd_quirk_pll_disable(void) {}
static inline void usb_amd_dev_put(void) {}
static inline void usb_disable_xhci_ports(struct pci_dev *xhci_pdev) {}
static inline void sb800_prefetch(struct device *dev, int on) {}
+static inline bool usb_amd_pt_check_port(struct device *device, int port)
+{
+ return false;
+}
#endif /* CONFIG_USB_PCI */
#endif /* __LINUX_USB_PCI_QUIRKS_H */
static int xhci_ring_trb_show(struct seq_file *s, void *unused)
{
int i;
- struct xhci_ring *ring = s->private;
+ struct xhci_ring *ring = *(struct xhci_ring **)s->private;
struct xhci_segment *seg = ring->first_seg;
for (i = 0; i < ring->num_segs; i++) {
snprintf(epriv->name, sizeof(epriv->name), "ep%02d", ep_index);
epriv->root = xhci_debugfs_create_ring_dir(xhci,
- &dev->eps[ep_index].new_ring,
+ &dev->eps[ep_index].ring,
epriv->name,
spriv->root);
spriv->eps[ep_index] = epriv;
temp = readl(port_array[wIndex]);
break;
}
-
- /* Software should not attempt to set
- * port link state above '3' (U3) and the port
- * must be enabled.
- */
- if ((temp & PORT_PE) == 0 ||
- (link_state > USB_SS_PORT_LS_U3)) {
- xhci_warn(xhci, "Cannot set link state.\n");
+ /* Port must be enabled */
+ if (!(temp & PORT_PE)) {
+ retval = -ENODEV;
+ break;
+ }
+ /* Can't set port link state above '3' (U3) */
+ if (link_state > USB_SS_PORT_LS_U3) {
+ xhci_warn(xhci, "Cannot set port %d link state %d\n",
+ wIndex, link_state);
goto error;
}
-
if (link_state == USB_SS_PORT_LS_U3) {
slot_id = xhci_find_slot_id_by_port(hcd, xhci,
wIndex + 1);
t2 |= PORT_WKOC_E | PORT_WKCONN_E;
t2 &= ~PORT_WKDISC_E;
}
+
+ if ((xhci->quirks & XHCI_U2_DISABLE_WAKE) &&
+ (hcd->speed < HCD_USB3)) {
+ if (usb_amd_pt_check_port(hcd->self.controller,
+ port_index))
+ t2 &= ~PORT_WAKE_BITS;
+ }
} else
t2 &= ~PORT_WAKE_BITS;
#define PCI_DEVICE_ID_INTEL_APL_XHCI 0x5aa8
#define PCI_DEVICE_ID_INTEL_DNV_XHCI 0x19d0
+#define PCI_DEVICE_ID_AMD_PROMONTORYA_4 0x43b9
+#define PCI_DEVICE_ID_AMD_PROMONTORYA_3 0x43ba
+#define PCI_DEVICE_ID_AMD_PROMONTORYA_2 0x43bb
+#define PCI_DEVICE_ID_AMD_PROMONTORYA_1 0x43bc
#define PCI_DEVICE_ID_ASMEDIA_1042A_XHCI 0x1142
static const char hcd_name[] = "xhci_hcd";
if (pdev->vendor == PCI_VENDOR_ID_AMD)
xhci->quirks |= XHCI_TRUST_TX_LENGTH;
+ if ((pdev->vendor == PCI_VENDOR_ID_AMD) &&
+ ((pdev->device == PCI_DEVICE_ID_AMD_PROMONTORYA_4) ||
+ (pdev->device == PCI_DEVICE_ID_AMD_PROMONTORYA_3) ||
+ (pdev->device == PCI_DEVICE_ID_AMD_PROMONTORYA_2) ||
+ (pdev->device == PCI_DEVICE_ID_AMD_PROMONTORYA_1)))
+ xhci->quirks |= XHCI_U2_DISABLE_WAKE;
+
if (pdev->vendor == PCI_VENDOR_ID_INTEL) {
xhci->quirks |= XHCI_LPM_SUPPORT;
xhci->quirks |= XHCI_INTEL_HOST;
return;
}
- xhci_debugfs_exit(xhci);
-
xhci_dbc_exit(xhci);
spin_lock_irq(&xhci->lock);
xhci_dbg_trace(xhci, trace_xhci_dbg_init, "cleaning up memory");
xhci_mem_cleanup(xhci);
+ xhci_debugfs_exit(xhci);
xhci_dbg_trace(xhci, trace_xhci_dbg_init,
"xhci_stop completed - status = %x",
readl(&xhci->op_regs->status));
xhci_dbg(xhci, "cleaning up memory\n");
xhci_mem_cleanup(xhci);
+ xhci_debugfs_exit(xhci);
xhci_dbg(xhci, "xhci_stop completed - status = %x\n",
readl(&xhci->op_regs->status));
virt_dev->eps[i].ep_state &= ~EP_STOP_CMD_PENDING;
del_timer_sync(&virt_dev->eps[i].stop_cmd_timer);
}
-
+ xhci_debugfs_remove_slot(xhci, udev->slot_id);
ret = xhci_disable_slot(xhci, udev->slot_id);
- if (ret) {
- xhci_debugfs_remove_slot(xhci, udev->slot_id);
+ if (ret)
xhci_free_virt_device(xhci, udev->slot_id);
- }
}
int xhci_disable_slot(struct xhci_hcd *xhci, u32 slot_id)
/* For controller with a broken Port Disable implementation */
#define XHCI_BROKEN_PORT_PED (1 << 25)
#define XHCI_LIMIT_ENDPOINT_INTERVAL_7 (1 << 26)
-/* Reserved. It was XHCI_U2_DISABLE_WAKE */
+#define XHCI_U2_DISABLE_WAKE (1 << 27)
#define XHCI_ASMEDIA_MODIFY_FLOWCONTROL (1 << 28)
#define XHCI_HW_LPM_DISABLE (1 << 29)
#define USB_DEVICE_ID_LD_MICROCASSYTIME 0x1033 /* USB Product ID of Micro-CASSY Time (reserved) */
#define USB_DEVICE_ID_LD_MICROCASSYTEMPERATURE 0x1035 /* USB Product ID of Micro-CASSY Temperature */
#define USB_DEVICE_ID_LD_MICROCASSYPH 0x1038 /* USB Product ID of Micro-CASSY pH */
+#define USB_DEVICE_ID_LD_POWERANALYSERCASSY 0x1040 /* USB Product ID of Power Analyser CASSY */
+#define USB_DEVICE_ID_LD_CONVERTERCONTROLLERCASSY 0x1042 /* USB Product ID of Converter Controller CASSY */
+#define USB_DEVICE_ID_LD_MACHINETESTCASSY 0x1043 /* USB Product ID of Machine Test CASSY */
#define USB_DEVICE_ID_LD_JWM 0x1080 /* USB Product ID of Joule and Wattmeter */
#define USB_DEVICE_ID_LD_DMMP 0x1081 /* USB Product ID of Digital Multimeter P (reserved) */
#define USB_DEVICE_ID_LD_UMIP 0x1090 /* USB Product ID of UMI P */
{ USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_MICROCASSYTIME) },
{ USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_MICROCASSYTEMPERATURE) },
{ USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_MICROCASSYPH) },
+ { USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_POWERANALYSERCASSY) },
+ { USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_CONVERTERCONTROLLERCASSY) },
+ { USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_MACHINETESTCASSY) },
{ USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_JWM) },
{ USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_DMMP) },
{ USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_UMIP) },
if ((devctl & mask) != (musb->context.devctl & mask))
musb->port1_status = 0;
- musb_start(musb);
+ musb_enable_interrupts(musb);
+ musb_platform_enable(musb);
spin_lock_irqsave(&musb->lock, flags);
error = musb_run_resume_work(musb);
}
}
- /*
- * The pipe must be broken if current urb->status is set, so don't
- * start next urb.
- * TODO: to minimize the risk of regression, only check urb->status
- * for RX, until we have a test case to understand the behavior of TX.
- */
- if ((!status || !is_in) && qh && qh->is_ready) {
+ if (qh != NULL && qh->is_ready) {
musb_dbg(musb, "... next ep%d %cX urb %p",
hw_ep->epnum, is_in ? 'R' : 'T', next_urb(qh));
musb_start_urb(musb, is_in, qh);
void __iomem *base = phy->io_priv;
enum usb_charger_type chgr_type = UNKNOWN_TYPE;
+ if (!regmap)
+ return UNKNOWN_TYPE;
+
if (mxs_charger_data_contact_detect(mxs_phy))
return chgr_type;
if ((uintptr_t)pkt->buf & (USBHS_USB_DMAC_XFER_SIZE - 1))
goto usbhsf_pio_prepare_pop;
+ /* return at this time if the pipe is running */
+ if (usbhs_pipe_is_running(pipe))
+ return 0;
+
usbhs_pipe_config_change_bfre(pipe, 1);
ret = usbhsf_fifo_select(pipe, fifo, 0);
usbhsf_fifo_clear(pipe, fifo);
pkt->actual = usbhs_dma_calc_received_size(pkt, chan, rcv_len);
+ usbhs_pipe_running(pipe, 0);
usbhsf_dma_stop(pipe, fifo);
usbhsf_dma_unmap(pkt);
usbhsf_fifo_unselect(pipe, pipe->fifo);
#define QUECTEL_PRODUCT_EC21 0x0121
#define QUECTEL_PRODUCT_EC25 0x0125
#define QUECTEL_PRODUCT_BG96 0x0296
+#define QUECTEL_PRODUCT_EP06 0x0306
#define CMOTECH_VENDOR_ID 0x16d8
#define CMOTECH_PRODUCT_6001 0x6001
.reserved = BIT(1) | BIT(4),
};
+static const struct option_blacklist_info quectel_ep06_blacklist = {
+ .reserved = BIT(4) | BIT(5),
+};
+
static const struct usb_device_id option_ids[] = {
{ USB_DEVICE(OPTION_VENDOR_ID, OPTION_PRODUCT_COLT) },
{ USB_DEVICE(OPTION_VENDOR_ID, OPTION_PRODUCT_RICOLA) },
.driver_info = (kernel_ulong_t)&net_intf4_blacklist },
{ USB_DEVICE(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_BG96),
.driver_info = (kernel_ulong_t)&net_intf4_blacklist },
+ { USB_DEVICE(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EP06),
+ .driver_info = (kernel_ulong_t)&quectel_ep06_blacklist },
{ USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_6001) },
{ USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_CMU_300) },
{ USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_6003),
goto err;
sdev->ud.tcp_socket = socket;
+ sdev->ud.sockfd = sockfd;
spin_unlock_irq(&sdev->ud.lock);
if (ud->tcp_socket) {
sockfd_put(ud->tcp_socket);
ud->tcp_socket = NULL;
+ ud->sockfd = -1;
}
/* 3. free used data */
sdev->ud.status = SDEV_ST_AVAILABLE;
spin_lock_init(&sdev->ud.lock);
sdev->ud.tcp_socket = NULL;
+ sdev->ud.sockfd = -1;
INIT_LIST_HEAD(&sdev->priv_init);
INIT_LIST_HEAD(&sdev->priv_tx);
if (vdev->ud.tcp_socket) {
sockfd_put(vdev->ud.tcp_socket);
vdev->ud.tcp_socket = NULL;
+ vdev->ud.sockfd = -1;
}
pr_info("release socket\n");
if (ud->tcp_socket) {
sockfd_put(ud->tcp_socket);
ud->tcp_socket = NULL;
+ ud->sockfd = -1;
}
ud->status = VDEV_ST_NULL;
struct sockaddr_ll sa;
char buf[MAX_ADDR_LEN];
} uaddr;
- int uaddr_len = sizeof uaddr, r;
+ int r;
struct socket *sock = sockfd_lookup(fd, &r);
if (!sock)
goto err;
}
- r = sock->ops->getname(sock, (struct sockaddr *)&uaddr.sa,
- &uaddr_len, 0);
- if (r)
+ r = sock->ops->getname(sock, (struct sockaddr *)&uaddr.sa, 0);
+ if (r < 0)
goto err;
if (uaddr.sa.sll_family != AF_PACKET) {
int timeout = 1000;
/* Rev. 1 Geode GXs use a 14 MHz reference clock instead of 48 MHz. */
- if (cpu_data(0).x86_mask == 1) {
+ if (cpu_data(0).x86_stepping == 1) {
pll_table = gx_pll_table_14MHz;
pll_table_len = ARRAY_SIZE(gx_pll_table_14MHz);
} else {
bool active_socket;
struct list_head list;
struct socket *sock;
+ atomic_t refcount;
union {
struct {
int irq;
};
};
+static inline struct sock_mapping *pvcalls_enter_sock(struct socket *sock)
+{
+ struct sock_mapping *map;
+
+ if (!pvcalls_front_dev ||
+ dev_get_drvdata(&pvcalls_front_dev->dev) == NULL)
+ return ERR_PTR(-ENOTCONN);
+
+ map = (struct sock_mapping *)sock->sk->sk_send_head;
+ if (map == NULL)
+ return ERR_PTR(-ENOTSOCK);
+
+ pvcalls_enter();
+ atomic_inc(&map->refcount);
+ return map;
+}
+
+static inline void pvcalls_exit_sock(struct socket *sock)
+{
+ struct sock_mapping *map;
+
+ map = (struct sock_mapping *)sock->sk->sk_send_head;
+ atomic_dec(&map->refcount);
+ pvcalls_exit();
+}
+
static inline int get_request(struct pvcalls_bedata *bedata, int *req_id)
{
*req_id = bedata->ring.req_prod_pvt & (RING_SIZE(&bedata->ring) - 1);
if (addr->sa_family != AF_INET || sock->type != SOCK_STREAM)
return -EOPNOTSUPP;
- pvcalls_enter();
- if (!pvcalls_front_dev) {
- pvcalls_exit();
- return -ENOTCONN;
- }
+ map = pvcalls_enter_sock(sock);
+ if (IS_ERR(map))
+ return PTR_ERR(map);
bedata = dev_get_drvdata(&pvcalls_front_dev->dev);
- map = (struct sock_mapping *)sock->sk->sk_send_head;
- if (!map) {
- pvcalls_exit();
- return -ENOTSOCK;
- }
-
spin_lock(&bedata->socket_lock);
ret = get_request(bedata, &req_id);
if (ret < 0) {
spin_unlock(&bedata->socket_lock);
- pvcalls_exit();
+ pvcalls_exit_sock(sock);
return ret;
}
ret = create_active(map, &evtchn);
if (ret < 0) {
spin_unlock(&bedata->socket_lock);
- pvcalls_exit();
+ pvcalls_exit_sock(sock);
return ret;
}
smp_rmb();
ret = bedata->rsp[req_id].ret;
bedata->rsp[req_id].req_id = PVCALLS_INVALID_ID;
- pvcalls_exit();
+ pvcalls_exit_sock(sock);
return ret;
}
if (flags & (MSG_CONFIRM|MSG_DONTROUTE|MSG_EOR|MSG_OOB))
return -EOPNOTSUPP;
- pvcalls_enter();
- if (!pvcalls_front_dev) {
- pvcalls_exit();
- return -ENOTCONN;
- }
+ map = pvcalls_enter_sock(sock);
+ if (IS_ERR(map))
+ return PTR_ERR(map);
bedata = dev_get_drvdata(&pvcalls_front_dev->dev);
- map = (struct sock_mapping *) sock->sk->sk_send_head;
- if (!map) {
- pvcalls_exit();
- return -ENOTSOCK;
- }
-
mutex_lock(&map->active.out_mutex);
if ((flags & MSG_DONTWAIT) && !pvcalls_front_write_todo(map)) {
mutex_unlock(&map->active.out_mutex);
- pvcalls_exit();
+ pvcalls_exit_sock(sock);
return -EAGAIN;
}
if (len > INT_MAX)
tot_sent = sent;
mutex_unlock(&map->active.out_mutex);
- pvcalls_exit();
+ pvcalls_exit_sock(sock);
return tot_sent;
}
if (flags & (MSG_CMSG_CLOEXEC|MSG_ERRQUEUE|MSG_OOB|MSG_TRUNC))
return -EOPNOTSUPP;
- pvcalls_enter();
- if (!pvcalls_front_dev) {
- pvcalls_exit();
- return -ENOTCONN;
- }
+ map = pvcalls_enter_sock(sock);
+ if (IS_ERR(map))
+ return PTR_ERR(map);
bedata = dev_get_drvdata(&pvcalls_front_dev->dev);
- map = (struct sock_mapping *) sock->sk->sk_send_head;
- if (!map) {
- pvcalls_exit();
- return -ENOTSOCK;
- }
-
mutex_lock(&map->active.in_mutex);
if (len > XEN_FLEX_RING_SIZE(PVCALLS_RING_ORDER))
len = XEN_FLEX_RING_SIZE(PVCALLS_RING_ORDER);
ret = 0;
mutex_unlock(&map->active.in_mutex);
- pvcalls_exit();
+ pvcalls_exit_sock(sock);
return ret;
}
if (addr->sa_family != AF_INET || sock->type != SOCK_STREAM)
return -EOPNOTSUPP;
- pvcalls_enter();
- if (!pvcalls_front_dev) {
- pvcalls_exit();
- return -ENOTCONN;
- }
+ map = pvcalls_enter_sock(sock);
+ if (IS_ERR(map))
+ return PTR_ERR(map);
bedata = dev_get_drvdata(&pvcalls_front_dev->dev);
- map = (struct sock_mapping *) sock->sk->sk_send_head;
- if (map == NULL) {
- pvcalls_exit();
- return -ENOTSOCK;
- }
-
spin_lock(&bedata->socket_lock);
ret = get_request(bedata, &req_id);
if (ret < 0) {
spin_unlock(&bedata->socket_lock);
- pvcalls_exit();
+ pvcalls_exit_sock(sock);
return ret;
}
req = RING_GET_REQUEST(&bedata->ring, req_id);
bedata->rsp[req_id].req_id = PVCALLS_INVALID_ID;
map->passive.status = PVCALLS_STATUS_BIND;
- pvcalls_exit();
+ pvcalls_exit_sock(sock);
return 0;
}
struct xen_pvcalls_request *req;
int notify, req_id, ret;
- pvcalls_enter();
- if (!pvcalls_front_dev) {
- pvcalls_exit();
- return -ENOTCONN;
- }
+ map = pvcalls_enter_sock(sock);
+ if (IS_ERR(map))
+ return PTR_ERR(map);
bedata = dev_get_drvdata(&pvcalls_front_dev->dev);
- map = (struct sock_mapping *) sock->sk->sk_send_head;
- if (!map) {
- pvcalls_exit();
- return -ENOTSOCK;
- }
-
if (map->passive.status != PVCALLS_STATUS_BIND) {
- pvcalls_exit();
+ pvcalls_exit_sock(sock);
return -EOPNOTSUPP;
}
ret = get_request(bedata, &req_id);
if (ret < 0) {
spin_unlock(&bedata->socket_lock);
- pvcalls_exit();
+ pvcalls_exit_sock(sock);
return ret;
}
req = RING_GET_REQUEST(&bedata->ring, req_id);
bedata->rsp[req_id].req_id = PVCALLS_INVALID_ID;
map->passive.status = PVCALLS_STATUS_LISTEN;
- pvcalls_exit();
+ pvcalls_exit_sock(sock);
return ret;
}
struct xen_pvcalls_request *req;
int notify, req_id, ret, evtchn, nonblock;
- pvcalls_enter();
- if (!pvcalls_front_dev) {
- pvcalls_exit();
- return -ENOTCONN;
- }
+ map = pvcalls_enter_sock(sock);
+ if (IS_ERR(map))
+ return PTR_ERR(map);
bedata = dev_get_drvdata(&pvcalls_front_dev->dev);
- map = (struct sock_mapping *) sock->sk->sk_send_head;
- if (!map) {
- pvcalls_exit();
- return -ENOTSOCK;
- }
-
if (map->passive.status != PVCALLS_STATUS_LISTEN) {
- pvcalls_exit();
+ pvcalls_exit_sock(sock);
return -EINVAL;
}
goto received;
}
if (nonblock) {
- pvcalls_exit();
+ pvcalls_exit_sock(sock);
return -EAGAIN;
}
if (wait_event_interruptible(map->passive.inflight_accept_req,
!test_and_set_bit(PVCALLS_FLAG_ACCEPT_INFLIGHT,
(void *)&map->passive.flags))) {
- pvcalls_exit();
+ pvcalls_exit_sock(sock);
return -EINTR;
}
}
clear_bit(PVCALLS_FLAG_ACCEPT_INFLIGHT,
(void *)&map->passive.flags);
spin_unlock(&bedata->socket_lock);
- pvcalls_exit();
+ pvcalls_exit_sock(sock);
return ret;
}
map2 = kzalloc(sizeof(*map2), GFP_ATOMIC);
clear_bit(PVCALLS_FLAG_ACCEPT_INFLIGHT,
(void *)&map->passive.flags);
spin_unlock(&bedata->socket_lock);
- pvcalls_exit();
+ pvcalls_exit_sock(sock);
return -ENOMEM;
}
ret = create_active(map2, &evtchn);
clear_bit(PVCALLS_FLAG_ACCEPT_INFLIGHT,
(void *)&map->passive.flags);
spin_unlock(&bedata->socket_lock);
- pvcalls_exit();
+ pvcalls_exit_sock(sock);
return ret;
}
list_add_tail(&map2->list, &bedata->socket_mappings);
/* We could check if we have received a response before returning. */
if (nonblock) {
WRITE_ONCE(map->passive.inflight_req_id, req_id);
- pvcalls_exit();
+ pvcalls_exit_sock(sock);
return -EAGAIN;
}
if (wait_event_interruptible(bedata->inflight_req,
READ_ONCE(bedata->rsp[req_id].req_id) == req_id)) {
- pvcalls_exit();
+ pvcalls_exit_sock(sock);
return -EINTR;
}
/* read req_id, then the content */
clear_bit(PVCALLS_FLAG_ACCEPT_INFLIGHT,
(void *)&map->passive.flags);
pvcalls_front_free_map(bedata, map2);
- pvcalls_exit();
+ pvcalls_exit_sock(sock);
return -ENOMEM;
}
newsock->sk->sk_send_head = (void *)map2;
clear_bit(PVCALLS_FLAG_ACCEPT_INFLIGHT, (void *)&map->passive.flags);
wake_up(&map->passive.inflight_accept_req);
- pvcalls_exit();
+ pvcalls_exit_sock(sock);
return ret;
}
struct sock_mapping *map;
__poll_t ret;
- pvcalls_enter();
- if (!pvcalls_front_dev) {
- pvcalls_exit();
+ map = pvcalls_enter_sock(sock);
+ if (IS_ERR(map))
return EPOLLNVAL;
- }
bedata = dev_get_drvdata(&pvcalls_front_dev->dev);
- map = (struct sock_mapping *) sock->sk->sk_send_head;
- if (!map) {
- pvcalls_exit();
- return EPOLLNVAL;
- }
if (map->active_socket)
ret = pvcalls_front_poll_active(file, bedata, map, wait);
else
ret = pvcalls_front_poll_passive(file, bedata, map, wait);
- pvcalls_exit();
+ pvcalls_exit_sock(sock);
return ret;
}
if (sock->sk == NULL)
return 0;
- pvcalls_enter();
- if (!pvcalls_front_dev) {
- pvcalls_exit();
- return -EIO;
+ map = pvcalls_enter_sock(sock);
+ if (IS_ERR(map)) {
+ if (PTR_ERR(map) == -ENOTCONN)
+ return -EIO;
+ else
+ return 0;
}
-
bedata = dev_get_drvdata(&pvcalls_front_dev->dev);
- map = (struct sock_mapping *) sock->sk->sk_send_head;
- if (map == NULL) {
- pvcalls_exit();
- return 0;
- }
-
spin_lock(&bedata->socket_lock);
ret = get_request(bedata, &req_id);
if (ret < 0) {
spin_unlock(&bedata->socket_lock);
- pvcalls_exit();
+ pvcalls_exit_sock(sock);
return ret;
}
sock->sk->sk_send_head = NULL;
/*
* We need to make sure that sendmsg/recvmsg on this socket have
* not started before we've cleared sk_send_head here. The
- * easiest (though not optimal) way to guarantee this is to see
- * that no pvcall (other than us) is in progress.
+ * easiest way to guarantee this is to see that no pvcalls
+ * (other than us) is in progress on this socket.
*/
- while (atomic_read(&pvcalls_refcount) > 1)
+ while (atomic_read(&map->refcount) > 1)
cpu_relax();
pvcalls_front_free_map(bedata, map);
} else {
+ wake_up(&bedata->inflight_req);
+ wake_up(&map->passive.inflight_accept_req);
+
+ while (atomic_read(&map->refcount) > 1)
+ cpu_relax();
+
spin_lock(&bedata->socket_lock);
list_del(&map->list);
spin_unlock(&bedata->socket_lock);
int pool = tmem_frontswap_poolid;
int ret;
+ /* THP isn't supported */
+ if (PageTransHuge(page))
+ return -1;
+
if (pool < 0)
return -1;
if (ind64 != ind)
struct list_head list;
wait_queue_head_t wq;
struct xsd_sockmsg msg;
+ uint32_t caller_req_id;
enum xsd_sockmsg_type type;
char *body;
const struct kvec *vec;
goto out;
if (req->state == xb_req_state_wait_reply) {
+ req->msg.req_id = req->caller_req_id;
req->msg.type = state.msg.type;
req->msg.len = state.msg.len;
req->body = state.body;
req->state = xb_req_state_queued;
init_waitqueue_head(&req->wq);
+ /* Save the caller req_id and restore it later in the reply */
+ req->caller_req_id = req->msg.req_id;
req->msg.req_id = xs_request_enter(req);
mutex_lock(&xb_write_mutex);
req->num_vecs = num_vecs;
req->cb = xs_wake_up;
+ msg.req_id = 0;
msg.tx_id = t.id;
msg.type = type;
msg.len = 0;
while (node) {
ref = rb_entry(node, struct prelim_ref, rbnode);
node = rb_next(&ref->rbnode);
- WARN_ON(ref->count < 0);
+ /*
+ * ref->count < 0 can happen here if there are delayed
+ * refs with a node->action of BTRFS_DROP_DELAYED_REF.
+ * prelim_ref_insert() relies on this when merging
+ * identical refs to keep the overall count correct.
+ * prelim_ref_insert() will merge only those refs
+ * which compare identically. Any refs having
+ * e.g. different offsets would not be merged,
+ * and would retain their original ref->count < 0.
+ */
if (roots && ref->count && ref->root_id && ref->parent == 0) {
if (sc && sc->root_objectid &&
ref->root_id != sc->root_objectid) {
spin_unlock(&delayed_refs->lock);
if (qrecord_inserted)
- return btrfs_qgroup_trace_extent_post(fs_info, record);
+ btrfs_qgroup_trace_extent_post(fs_info, record);
+
return 0;
free_head_ref:
u64 bytes;
struct request_queue *req_q;
+ if (!stripe->dev->bdev) {
+ ASSERT(btrfs_test_opt(fs_info, DEGRADED));
+ continue;
+ }
req_q = bdev_get_queue(stripe->dev->bdev);
if (!blk_queue_discard(req_q))
continue;
leaf = path->nodes[0];
if (path->slots[0] >= btrfs_header_nritems(leaf)) {
ret = btrfs_next_leaf(root, path);
- if (ret < 0)
+ if (ret < 0) {
+ if (cow_start != (u64)-1)
+ cur_offset = cow_start;
goto error;
+ }
if (ret > 0)
break;
leaf = path->nodes[0];
ret = btrfs_orphan_reserve_metadata(trans, inode);
ASSERT(!ret);
if (ret) {
+ /*
+ * dec doesn't need spin_lock as ->orphan_block_rsv
+ * would be released only if ->orphan_inodes is
+ * zero.
+ */
atomic_dec(&root->orphan_inodes);
clear_bit(BTRFS_INODE_ORPHAN_META_RESERVED,
&inode->runtime_flags);
if (insert >= 1) {
ret = btrfs_insert_orphan_item(trans, root, btrfs_ino(inode));
if (ret) {
- atomic_dec(&root->orphan_inodes);
if (reserve) {
clear_bit(BTRFS_INODE_ORPHAN_META_RESERVED,
&inode->runtime_flags);
btrfs_orphan_release_metadata(inode);
}
+ /*
+ * btrfs_orphan_commit_root may race with us and set
+ * ->orphan_block_rsv to zero, in order to avoid that,
+ * decrease ->orphan_inodes after everything is done.
+ */
+ atomic_dec(&root->orphan_inodes);
if (ret != -EEXIST) {
clear_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
&inode->runtime_flags);
{
struct btrfs_root *root = inode->root;
int delete_item = 0;
- int release_rsv = 0;
int ret = 0;
- spin_lock(&root->orphan_lock);
if (test_and_clear_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
&inode->runtime_flags))
delete_item = 1;
+ if (delete_item && trans)
+ ret = btrfs_del_orphan_item(trans, root, btrfs_ino(inode));
+
if (test_and_clear_bit(BTRFS_INODE_ORPHAN_META_RESERVED,
&inode->runtime_flags))
- release_rsv = 1;
- spin_unlock(&root->orphan_lock);
+ btrfs_orphan_release_metadata(inode);
- if (delete_item) {
+ /*
+ * btrfs_orphan_commit_root may race with us and set ->orphan_block_rsv
+ * to zero, in order to avoid that, decrease ->orphan_inodes after
+ * everything is done.
+ */
+ if (delete_item)
atomic_dec(&root->orphan_inodes);
- if (trans)
- ret = btrfs_del_orphan_item(trans, root,
- btrfs_ino(inode));
- }
-
- if (release_rsv)
- btrfs_orphan_release_metadata(inode);
return ret;
}
trace_btrfs_inode_evict(inode);
if (!root) {
- kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode));
+ clear_inode(inode);
return;
}
int ret;
ret = btrfs_find_all_roots(NULL, fs_info, bytenr, 0, &old_root, false);
- if (ret < 0)
- return ret;
+ if (ret < 0) {
+ fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
+ btrfs_warn(fs_info,
+"error accounting new delayed refs extent (err code: %d), quota inconsistent",
+ ret);
+ return 0;
+ }
/*
* Here we don't need to get the lock of
#include "hash.h"
#include "compression.h"
#include "qgroup.h"
+#include "inode-map.h"
/* magic values for the inode_only field in btrfs_log_inode:
*
clean_tree_block(fs_info, next);
btrfs_wait_tree_block_writeback(next);
btrfs_tree_unlock(next);
+ } else {
+ if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &next->bflags))
+ clear_extent_buffer_dirty(next);
}
WARN_ON(root_owner !=
clean_tree_block(fs_info, next);
btrfs_wait_tree_block_writeback(next);
btrfs_tree_unlock(next);
+ } else {
+ if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &next->bflags))
+ clear_extent_buffer_dirty(next);
}
WARN_ON(root_owner != BTRFS_TREE_LOG_OBJECTID);
clean_tree_block(fs_info, next);
btrfs_wait_tree_block_writeback(next);
btrfs_tree_unlock(next);
+ } else {
+ if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &next->bflags))
+ clear_extent_buffer_dirty(next);
}
WARN_ON(log->root_key.objectid !=
while (1) {
ret = find_first_extent_bit(&log->dirty_log_pages,
- 0, &start, &end, EXTENT_DIRTY | EXTENT_NEW,
+ 0, &start, &end,
+ EXTENT_DIRTY | EXTENT_NEW | EXTENT_NEED_WAIT,
NULL);
if (ret)
break;
clear_extent_bits(&log->dirty_log_pages, start, end,
- EXTENT_DIRTY | EXTENT_NEW);
+ EXTENT_DIRTY | EXTENT_NEW | EXTENT_NEED_WAIT);
}
/*
path);
}
+ if (!ret && wc.stage == LOG_WALK_REPLAY_ALL) {
+ struct btrfs_root *root = wc.replay_dest;
+
+ btrfs_release_path(path);
+
+ /*
+ * We have just replayed everything, and the highest
+ * objectid of fs roots probably has changed in case
+ * some inode_item's got replayed.
+ *
+ * root->objectid_mutex is not acquired as log replay
+ * could only happen during mount.
+ */
+ ret = btrfs_find_highest_objectid(root,
+ &root->highest_objectid);
+ }
+
key.offset = found_key.offset - 1;
wc.replay_dest->log_root = NULL;
free_extent_buffer(log->node);
btrfs_sysfs_remove_fsid(fs_devs);
list_del(&fs_devs->list);
free_fs_devices(fs_devs);
+ break;
} else {
fs_devs->num_devices--;
list_del(&dev->dev_list);
{
struct connection *con;
struct sockaddr_storage saddr;
- int buflen;
void (*orig_report)(struct sock *) = NULL;
read_lock_bh(&sk->sk_callback_lock);
orig_report = listen_sock.sk_error_report;
if (con->sock == NULL ||
- kernel_getpeername(con->sock, (struct sockaddr *)&saddr, &buflen)) {
+ kernel_getpeername(con->sock, (struct sockaddr *)&saddr) < 0) {
printk_ratelimited(KERN_ERR "dlm: node %d: socket error "
"sending to node %d, port %d, "
"sk_err=%d/%d\n", dlm_our_nodeid(),
/* Get the connected socket's peer */
memset(&peeraddr, 0, sizeof(peeraddr));
- if (newsock->ops->getname(newsock, (struct sockaddr *)&peeraddr,
- &len, 2)) {
+ len = newsock->ops->getname(newsock, (struct sockaddr *)&peeraddr, 2);
+ if (len < 0) {
result = -ECONNABORTED;
goto accept_err;
}
*/
#include <linux/efi.h>
+#include <linux/delay.h>
#include <linux/fs.h>
#include <linux/slab.h>
#include <linux/mount.h>
ssize_t size = 0;
int err;
+ while (!__ratelimit(&file->f_cred->user->ratelimit)) {
+ if (!msleep_interruptible(50))
+ return -EINTR;
+ }
+
err = efivar_entry_size(var, &datasize);
/*
__be64 *ptr;
sector_t lblock;
sector_t lend;
- int ret;
+ int ret = 0;
int eob;
unsigned int len;
struct buffer_head *bh;
goto out;
}
- if ((flags & IOMAP_REPORT) && gfs2_is_stuffed(ip)) {
- gfs2_stuffed_iomap(inode, iomap);
- if (pos >= iomap->length)
- return -ENOENT;
- ret = 0;
- goto out;
+ if (gfs2_is_stuffed(ip)) {
+ if (flags & IOMAP_REPORT) {
+ gfs2_stuffed_iomap(inode, iomap);
+ if (pos >= iomap->length)
+ ret = -ENOENT;
+ goto out;
+ }
+ BUG_ON(!(flags & IOMAP_WRITE));
}
lblock = pos >> inode->i_blkbits;
iomap->type = IOMAP_HOLE;
iomap->length = (u64)(lend - lblock) << inode->i_blkbits;
iomap->flags = IOMAP_F_MERGED;
- bmap_lock(ip, 0);
+ bmap_lock(ip, flags & IOMAP_WRITE);
/*
* Directory data blocks have a struct gfs2_meta_header header, so the
iomap->flags |= IOMAP_F_BOUNDARY;
iomap->length = (u64)len << inode->i_blkbits;
- ret = 0;
-
out_release:
release_metapath(&mp);
- bmap_unlock(ip, 0);
+ bmap_unlock(ip, flags & IOMAP_WRITE);
out:
trace_gfs2_iomap_end(ip, iomap, ret);
return ret;
do_alloc:
- if (!(flags & IOMAP_WRITE)) {
- if (pos >= i_size_read(inode)) {
+ if (flags & IOMAP_WRITE) {
+ ret = gfs2_iomap_alloc(inode, iomap, flags, &mp);
+ } else if (flags & IOMAP_REPORT) {
+ loff_t size = i_size_read(inode);
+ if (pos >= size)
ret = -ENOENT;
- goto out_release;
- }
- ret = 0;
- iomap->length = hole_size(inode, lblock, &mp);
- goto out_release;
+ else if (height <= ip->i_height)
+ iomap->length = hole_size(inode, lblock, &mp);
+ else
+ iomap->length = size - pos;
+ } else {
+ if (height <= ip->i_height)
+ iomap->length = hole_size(inode, lblock, &mp);
}
-
- ret = gfs2_iomap_alloc(inode, iomap, flags, &mp);
goto out_release;
}
.exit = lockd_exit_net,
.id = &lockd_net_id,
.size = sizeof(struct lockd_net),
+ .async = true,
};
.exit = nfs_net_exit,
.id = &nfs_net_id,
.size = sizeof(struct nfs_net),
+ .async = true,
};
/*
.exit = grace_exit_net,
.id = &grace_net_id,
.size = sizeof(struct list_head),
+ .async = true,
};
static int __init
return fd;
}
+EXPORT_SYMBOL_GPL(open_related_ns);
static long ns_ioctl(struct file *filp, unsigned int ioctl,
unsigned long arg)
static int o2net_accept_one(struct socket *sock, int *more)
{
- int ret, slen;
+ int ret;
struct sockaddr_in sin;
struct socket *new_sock = NULL;
struct o2nm_node *node = NULL;
goto out;
}
- slen = sizeof(sin);
- ret = new_sock->ops->getname(new_sock, (struct sockaddr *) &sin,
- &slen, 1);
+ ret = new_sock->ops->getname(new_sock, (struct sockaddr *) &sin, 1);
if (ret < 0)
goto out;
/* we have to zero-fill user buffer even if no read */
if (copy_to_user(buffer, buf, tsz))
return -EFAULT;
+ } else if (m->type == KCORE_USER) {
+ /* User page is handled prior to normal kernel page: */
+ if (copy_to_user(buffer, (char *)start, tsz))
+ return -EFAULT;
} else {
if (kern_addr_valid(start)) {
/*
static struct pernet_operations __net_initdata proc_net_ns_ops = {
.init = proc_net_ns_init,
.exit = proc_net_ns_exit,
+ .async = true,
};
int __init proc_net_init(void)
err |= __put_user(kinfo->si_trapno, &uinfo->ssi_trapno);
#endif
#ifdef BUS_MCEERR_AO
- /*
+ /*
+ * Other callers might not initialize the si_lsb field,
+ * so check explicitly for the right codes here.
+ */
+ if (kinfo->si_signo == SIGBUS &&
+ kinfo->si_code == BUS_MCEERR_AO)
+ err |= __put_user((short) kinfo->si_addr_lsb,
+ &uinfo->ssi_addr_lsb);
+#endif
+#ifdef BUS_MCEERR_AR
+ /*
* Other callers might not initialize the si_lsb field,
* so check explicitly for the right codes here.
*/
if (kinfo->si_signo == SIGBUS &&
- (kinfo->si_code == BUS_MCEERR_AR ||
- kinfo->si_code == BUS_MCEERR_AO))
+ kinfo->si_code == BUS_MCEERR_AR)
err |= __put_user((short) kinfo->si_addr_lsb,
&uinfo->ssi_addr_lsb);
#endif
* @nr: Bit to set
* @addr: Address to count from
*
- * This operation is atomic and provides acquire barrier semantics.
+ * This operation is atomic and provides acquire barrier semantics if
+ * the returned value is 0.
* It can be used to implement bit locks.
*/
#define test_and_set_bit_lock(nr, addr) test_and_set_bit(nr, addr)
#ifndef HAVE_ARCH_BUG
#define BUG() do { \
printk("BUG: failure at %s:%d/%s()!\n", __FILE__, __LINE__, __func__); \
+ barrier_before_unreachable(); \
panic("BUG!"); \
} while (0)
#endif
* &drm_pending_vblank_event pointer to clean up private events.
*/
struct drm_pending_vblank_event *event;
+
+ /**
+ * @abort_completion:
+ *
+ * A flag that's set after drm_atomic_helper_setup_commit takes a second
+ * reference for the completion of $drm_crtc_state.event. It's used by
+ * the free code to remove the second reference if commit fails.
+ */
+ bool abort_completion;
};
struct __drm_planes_state {
void drm_kms_helper_poll_disable(struct drm_device *dev);
void drm_kms_helper_poll_enable(struct drm_device *dev);
+bool drm_kms_helper_is_poll_worker(void);
#endif
#define DP83867_RGMIIDCTL_3_75_NS 0xe
#define DP83867_RGMIIDCTL_4_00_NS 0xf
+/* IO_MUX_CFG - Clock output selection */
+#define DP83867_CLK_O_SEL_CHN_A_RCLK 0x0
+#define DP83867_CLK_O_SEL_CHN_B_RCLK 0x1
+#define DP83867_CLK_O_SEL_CHN_C_RCLK 0x2
+#define DP83867_CLK_O_SEL_CHN_D_RCLK 0x3
+#define DP83867_CLK_O_SEL_CHN_A_RCLK_DIV5 0x4
+#define DP83867_CLK_O_SEL_CHN_B_RCLK_DIV5 0x5
+#define DP83867_CLK_O_SEL_CHN_C_RCLK_DIV5 0x6
+#define DP83867_CLK_O_SEL_CHN_D_RCLK_DIV5 0x7
+#define DP83867_CLK_O_SEL_CHN_A_TCLK 0x8
+#define DP83867_CLK_O_SEL_CHN_B_TCLK 0x9
+#define DP83867_CLK_O_SEL_CHN_C_TCLK 0xA
+#define DP83867_CLK_O_SEL_CHN_D_TCLK 0xB
+#define DP83867_CLK_O_SEL_REF_CLK 0xC
#endif
const struct acpi_device_id *acpi_match_device(const struct acpi_device_id *ids,
const struct device *dev);
-void *acpi_get_match_data(const struct device *dev);
+const void *acpi_device_get_match_data(const struct device *dev);
extern bool acpi_driver_match_device(struct device *dev,
const struct device_driver *drv);
int acpi_device_uevent_modalias(struct device *, struct kobj_uevent_env *);
return NULL;
}
-static inline void *acpi_get_match_data(const struct device *dev)
+static inline const void *acpi_device_get_match_data(const struct device *dev)
{
return NULL;
}
/* Inter module exports */
/* Give a device find its atif control structure */
+#if IS_ENABLED(CONFIG_IRDA) || IS_ENABLED(CONFIG_ATALK)
static inline struct atalk_iface *atalk_find_dev(struct net_device *dev)
{
return dev->atalk_ptr;
}
+#endif
extern struct atalk_addr *atalk_find_dev_addr(struct net_device *dev);
extern struct net_device *atrtr_get_dev(struct atalk_addr *sa);
VIRTCHNL_OP_ENABLE_VLAN_STRIPPING = 27,
VIRTCHNL_OP_DISABLE_VLAN_STRIPPING = 28,
VIRTCHNL_OP_REQUEST_QUEUES = 29,
+ VIRTCHNL_OP_ENABLE_CHANNELS = 30,
+ VIRTCHNL_OP_DISABLE_CHANNELS = 31,
+ VIRTCHNL_OP_ADD_CLOUD_FILTER = 32,
+ VIRTCHNL_OP_DEL_CLOUD_FILTER = 33,
};
-/* This macro is used to generate a compilation error if a structure
+/* These macros are used to generate compilation errors if a structure/union
* is not exactly the correct length. It gives a divide by zero error if the
- * structure is not of the correct size, otherwise it creates an enum that is
- * never used.
+ * structure/union is not of the correct size, otherwise it creates an enum
+ * that is never used.
*/
#define VIRTCHNL_CHECK_STRUCT_LEN(n, X) enum virtchnl_static_assert_enum_##X \
{ virtchnl_static_assert_##X = (n)/((sizeof(struct X) == (n)) ? 1 : 0) }
+#define VIRTCHNL_CHECK_UNION_LEN(n, X) enum virtchnl_static_asset_enum_##X \
+ { virtchnl_static_assert_##X = (n)/((sizeof(union X) == (n)) ? 1 : 0) }
/* Virtual channel message descriptor. This overlays the admin queue
* descriptor. All other data is passed in external buffers.
#define VIRTCHNL_VF_OFFLOAD_ENCAP 0X00100000
#define VIRTCHNL_VF_OFFLOAD_ENCAP_CSUM 0X00200000
#define VIRTCHNL_VF_OFFLOAD_RX_ENCAP_CSUM 0X00400000
+#define VIRTCHNL_VF_OFFLOAD_ADQ 0X00800000
#define VF_BASE_MODE_OFFLOADS (VIRTCHNL_VF_OFFLOAD_L2 | \
VIRTCHNL_VF_OFFLOAD_VLAN | \
VIRTCHNL_CHECK_STRUCT_LEN(8, virtchnl_rss_hena);
+/* VIRTCHNL_OP_ENABLE_CHANNELS
+ * VIRTCHNL_OP_DISABLE_CHANNELS
+ * VF sends these messages to enable or disable channels based on
+ * the user specified queue count and queue offset for each traffic class.
+ * This struct encompasses all the information that the PF needs from
+ * VF to create a channel.
+ */
+struct virtchnl_channel_info {
+ u16 count; /* number of queues in a channel */
+ u16 offset; /* queues in a channel start from 'offset' */
+ u32 pad;
+ u64 max_tx_rate;
+};
+
+VIRTCHNL_CHECK_STRUCT_LEN(16, virtchnl_channel_info);
+
+struct virtchnl_tc_info {
+ u32 num_tc;
+ u32 pad;
+ struct virtchnl_channel_info list[1];
+};
+
+VIRTCHNL_CHECK_STRUCT_LEN(24, virtchnl_tc_info);
+
+/* VIRTCHNL_ADD_CLOUD_FILTER
+ * VIRTCHNL_DEL_CLOUD_FILTER
+ * VF sends these messages to add or delete a cloud filter based on the
+ * user specified match and action filters. These structures encompass
+ * all the information that the PF needs from the VF to add/delete a
+ * cloud filter.
+ */
+
+struct virtchnl_l4_spec {
+ u8 src_mac[ETH_ALEN];
+ u8 dst_mac[ETH_ALEN];
+ __be16 vlan_id;
+ __be16 pad; /* reserved for future use */
+ __be32 src_ip[4];
+ __be32 dst_ip[4];
+ __be16 src_port;
+ __be16 dst_port;
+};
+
+VIRTCHNL_CHECK_STRUCT_LEN(52, virtchnl_l4_spec);
+
+union virtchnl_flow_spec {
+ struct virtchnl_l4_spec tcp_spec;
+ u8 buffer[128]; /* reserved for future use */
+};
+
+VIRTCHNL_CHECK_UNION_LEN(128, virtchnl_flow_spec);
+
+enum virtchnl_action {
+ /* action types */
+ VIRTCHNL_ACTION_DROP = 0,
+ VIRTCHNL_ACTION_TC_REDIRECT,
+};
+
+enum virtchnl_flow_type {
+ /* flow types */
+ VIRTCHNL_TCP_V4_FLOW = 0,
+ VIRTCHNL_TCP_V6_FLOW,
+};
+
+struct virtchnl_filter {
+ union virtchnl_flow_spec data;
+ union virtchnl_flow_spec mask;
+ enum virtchnl_flow_type flow_type;
+ enum virtchnl_action action;
+ u32 action_meta;
+ __u8 field_flags;
+};
+
+VIRTCHNL_CHECK_STRUCT_LEN(272, virtchnl_filter);
+
/* VIRTCHNL_OP_EVENT
* PF sends this message to inform the VF driver of events that may affect it.
* No direct response is expected from the VF, though it may generate other
case VIRTCHNL_OP_REQUEST_QUEUES:
valid_len = sizeof(struct virtchnl_vf_res_request);
break;
+ case VIRTCHNL_OP_ENABLE_CHANNELS:
+ valid_len = sizeof(struct virtchnl_tc_info);
+ if (msglen >= valid_len) {
+ struct virtchnl_tc_info *vti =
+ (struct virtchnl_tc_info *)msg;
+ valid_len += vti->num_tc *
+ sizeof(struct virtchnl_channel_info);
+ if (vti->num_tc == 0)
+ err_msg_format = true;
+ }
+ break;
+ case VIRTCHNL_OP_DISABLE_CHANNELS:
+ break;
+ case VIRTCHNL_OP_ADD_CLOUD_FILTER:
+ valid_len = sizeof(struct virtchnl_filter);
+ break;
+ case VIRTCHNL_OP_DEL_CLOUD_FILTER:
+ valid_len = sizeof(struct virtchnl_filter);
+ break;
/* These are always errors coming from the VF. */
case VIRTCHNL_OP_EVENT:
case VIRTCHNL_OP_UNKNOWN:
#define BLKDEV_MIN_RQ 4
#define BLKDEV_MAX_RQ 128 /* Default maximum */
-/* Must be consisitent with blk_mq_poll_stats_bkt() */
+/* Must be consistent with blk_mq_poll_stats_bkt() */
#define BLK_MQ_POLL_STATS_BKTS 16
/*
#define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) \
({ \
int __ret = 0; \
- if (cgroup_bpf_enabled && sk) { \
+ if (cgroup_bpf_enabled) { \
__ret = __cgroup_bpf_run_filter_sk(sk, \
BPF_CGROUP_INET_SOCK_CREATE); \
} \
#if GCC_VERSION >= 40100
# define __compiletime_object_size(obj) __builtin_object_size(obj, 0)
-
-#define __nostackprotector __attribute__((__optimize__("no-stack-protector")))
#endif
#if GCC_VERSION >= 40300
#endif /* __CHECKER__ */
#endif /* GCC_VERSION >= 40300 */
+#if GCC_VERSION >= 40400
+#define __optimize(level) __attribute__((__optimize__(level)))
+#define __nostackprotector __optimize("no-stack-protector")
+#endif /* GCC_VERSION >= 40400 */
+
#if GCC_VERSION >= 40500
#ifndef __CHECKER__
#endif
#endif
+/*
+ * calling noreturn functions, __builtin_unreachable() and __builtin_trap()
+ * confuse the stack allocation in gcc, leading to overly large stack
+ * frames, see https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82365
+ *
+ * Adding an empty inline assembly before it works around the problem
+ */
+#define barrier_before_unreachable() asm volatile("")
+
/*
* Mark a position in code as unreachable. This can be used to
* suppress control flow warnings after asm blocks that transfer
* unreleased. Really, we need to have autoconf for the kernel.
*/
#define unreachable() \
- do { annotate_unreachable(); __builtin_unreachable(); } while (0)
+ do { \
+ annotate_unreachable(); \
+ barrier_before_unreachable(); \
+ __builtin_unreachable(); \
+ } while (0)
/* Mark a function definition as prohibited from being cloned. */
#define __noclone __attribute__((__noclone__, __optimize__("no-tracer")))
# define barrier_data(ptr) barrier()
#endif
+/* workaround for GCC PR82365 if needed */
+#ifndef barrier_before_unreachable
+# define barrier_before_unreachable() do { } while (0)
+#endif
+
/* Unreachable code */
#ifdef CONFIG_STACK_VALIDATION
/*
#endif /* __ASSEMBLY__ */
+#ifndef __optimize
+# define __optimize(level)
+#endif
+
/* Compile time object size, -1 for unknown */
#ifndef __compiletime_object_size
# define __compiletime_object_size(obj) -1
}
#endif
-#ifdef CONFIG_ARCH_HAS_CPU_RELAX
+#if defined(CONFIG_CPU_IDLE) && defined(CONFIG_ARCH_HAS_CPU_RELAX)
void cpuidle_poll_state_init(struct cpuidle_driver *drv);
#else
static inline void cpuidle_poll_state_init(struct cpuidle_driver *drv) {}
for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask)
#define for_each_cpu_not(cpu, mask) \
for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask)
+#define for_each_cpu_wrap(cpu, mask, start) \
+ for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask, (void)(start))
#define for_each_cpu_and(cpu, mask, and) \
for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask, (void)and)
#else
/*
* This is a hack for the legacy x86 forbid_dac and iommu_sac_force. Please
- * don't use this is new code.
+ * don't use this in new code.
*/
#ifndef arch_dma_supported
#define arch_dma_supported(dev, mask) (1)
#include <linux/set_memory.h>
#include <linux/kallsyms.h>
-#include <net/xdp.h>
#include <net/sch_generic.h>
#include <uapi/linux/filter.h>
struct sock;
struct seccomp_data;
struct bpf_prog_aux;
+struct xdp_rxq_info;
/* ArgX, context and stack frame pointer register positions. Note,
* Arg1, Arg2, Arg3, etc are used as argument mappings of function
struct fwnode_handle *(*get)(struct fwnode_handle *fwnode);
void (*put)(struct fwnode_handle *fwnode);
bool (*device_is_available)(const struct fwnode_handle *fwnode);
- void *(*device_get_match_data)(const struct fwnode_handle *fwnode,
- const struct device *dev);
+ const void *(*device_get_match_data)(const struct fwnode_handle *fwnode,
+ const struct device *dev);
bool (*property_present)(const struct fwnode_handle *fwnode,
const char *propname);
int (*property_read_int_array)(const struct fwnode_handle *fwnode,
* Copyright (c) 2006, Michael Wu <flamingice@sourmilk.net>
* Copyright (c) 2013 - 2014 Intel Mobile Communications GmbH
* Copyright (c) 2016 - 2017 Intel Deutschland GmbH
+ * Copyright (c) 2018 Intel Corporation
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
#define FILS_ERP_MAX_REALM_LEN 253
#define FILS_ERP_MAX_RRK_LEN 64
-#define PMK_MAX_LEN 48
+#define PMK_MAX_LEN 64
/* Public action codes (IEEE Std 802.11-2016, 9.6.8.1, Table 9-307) */
enum ieee80211_pub_actioncode {
return (u8 *)hdr + 24;
}
+/**
+ * ieee80211_get_tid - get qos TID
+ * @hdr: the frame
+ */
+static inline u8 ieee80211_get_tid(struct ieee80211_hdr *hdr)
+{
+ u8 *qc = ieee80211_get_qos_ctl(hdr);
+
+ return qc[0] & IEEE80211_QOS_CTL_TID_MASK;
+}
+
/**
* ieee80211_get_SA - get pointer to SA
* @hdr: the frame
#include <generated/autoconf.h>
+#ifdef CONFIG_CPU_BIG_ENDIAN
+#define __BIG_ENDIAN 4321
+#else
+#define __LITTLE_ENDIAN 1234
+#endif
+
#define __ARG_PLACEHOLDER_1 0,
#define __take_second_arg(__ignored, val, ...) val
*/
#define IS_ENABLED(option) __or(IS_BUILTIN(option), IS_MODULE(option))
+/* Make sure we always have all types and struct attributes defined. */
+#include <linux/compiler_types.h>
+
#endif /* __LINUX_KCONFIG_H */
KCORE_VMALLOC,
KCORE_RAM,
KCORE_VMEMMAP,
+ KCORE_USER,
KCORE_OTHER,
};
static inline void mod_memcg_state(struct mem_cgroup *memcg,
int idx, int val)
{
- preempt_disable();
+ unsigned long flags;
+
+ local_irq_save(flags);
__mod_memcg_state(memcg, idx, val);
- preempt_enable();
+ local_irq_restore(flags);
}
/**
static inline void mod_lruvec_state(struct lruvec *lruvec,
enum node_stat_item idx, int val)
{
- preempt_disable();
+ unsigned long flags;
+
+ local_irq_save(flags);
__mod_lruvec_state(lruvec, idx, val);
- preempt_enable();
+ local_irq_restore(flags);
}
static inline void __mod_lruvec_page_state(struct page *page,
static inline void mod_lruvec_page_state(struct page *page,
enum node_stat_item idx, int val)
{
- preempt_disable();
+ unsigned long flags;
+
+ local_irq_save(flags);
__mod_lruvec_page_state(page, idx, val);
- preempt_enable();
+ local_irq_restore(flags);
}
unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order,
static inline void count_memcg_events(struct mem_cgroup *memcg,
int idx, unsigned long count)
{
- preempt_disable();
+ unsigned long flags;
+
+ local_irq_save(flags);
__count_memcg_events(memcg, idx, count);
- preempt_enable();
+ local_irq_restore(flags);
}
/* idx can be of type enum memcg_event_item or vm_event_item */
} tasklet_ctx;
int reset_notify_added;
struct list_head reset_notify;
+ struct mlx5_eq *eq;
};
mlx5_write64(doorbell, uar_page + MLX5_CQ_DOORBELL, NULL);
}
-int mlx5_init_cq_table(struct mlx5_core_dev *dev);
-void mlx5_cleanup_cq_table(struct mlx5_core_dev *dev);
+static inline void mlx5_cq_hold(struct mlx5_core_cq *cq)
+{
+ refcount_inc(&cq->refcount);
+}
+
+static inline void mlx5_cq_put(struct mlx5_core_cq *cq)
+{
+ if (refcount_dec_and_test(&cq->refcount))
+ complete(&cq->free);
+}
+
int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
u32 *in, int inlen);
int mlx5_core_destroy_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq);
dma_addr_t map;
};
-struct mlx5_buf {
- struct mlx5_buf_list direct;
- int npages;
- int size;
- u8 page_shift;
-};
-
struct mlx5_frag_buf {
struct mlx5_buf_list *frags;
int npages;
u8 page_shift;
};
+struct mlx5_frag_buf_ctrl {
+ struct mlx5_frag_buf frag_buf;
+ u32 sz_m1;
+ u32 frag_sz_m1;
+ u8 log_sz;
+ u8 log_stride;
+ u8 log_frag_strides;
+};
+
struct mlx5_eq_tasklet {
struct list_head list;
struct list_head process_list;
mempool_t *pool;
};
+struct mlx5_cq_table {
+ /* protect radix tree */
+ spinlock_t lock;
+ struct radix_tree_root tree;
+};
+
struct mlx5_eq {
struct mlx5_core_dev *dev;
+ struct mlx5_cq_table cq_table;
__be32 __iomem *doorbell;
u32 cons_index;
- struct mlx5_buf buf;
+ struct mlx5_frag_buf buf;
int size;
unsigned int irqn;
u8 eqn;
struct delayed_work recover_work;
};
-struct mlx5_cq_table {
- /* protect radix tree
- */
- spinlock_t lock;
- struct radix_tree_root tree;
-};
-
struct mlx5_qp_table {
/* protect radix tree
*/
struct dentry *cmdif_debugfs;
/* end: qp staff */
- /* start: cq staff */
- struct mlx5_cq_table cq_table;
- /* end: cq staff */
-
/* start: mkey staff */
struct mlx5_mkey_table mkey_table;
/* end: mkey staff */
bool grh_required;
};
-static inline void *mlx5_buf_offset(struct mlx5_buf *buf, int offset)
+static inline void *mlx5_buf_offset(struct mlx5_frag_buf *buf, int offset)
{
- return buf->direct.buf + offset;
+ return buf->frags->buf + offset;
}
#define STRUCT_FIELD(header, field) \
return key & 0xffffff00u;
}
+static inline void mlx5_core_init_cq_frag_buf(struct mlx5_frag_buf_ctrl *fbc,
+ void *cqc)
+{
+ fbc->log_stride = 6 + MLX5_GET(cqc, cqc, cqe_sz);
+ fbc->log_sz = MLX5_GET(cqc, cqc, log_cq_size);
+ fbc->sz_m1 = (1 << fbc->log_sz) - 1;
+ fbc->log_frag_strides = PAGE_SHIFT - fbc->log_stride;
+ fbc->frag_sz_m1 = (1 << fbc->log_frag_strides) - 1;
+}
+
+static inline void *mlx5_frag_buf_get_wqe(struct mlx5_frag_buf_ctrl *fbc,
+ u32 ix)
+{
+ unsigned int frag = (ix >> fbc->log_frag_strides);
+
+ return fbc->frag_buf.frags[frag].buf +
+ ((fbc->frag_sz_m1 & ix) << fbc->log_stride);
+}
+
int mlx5_cmd_init(struct mlx5_core_dev *dev);
void mlx5_cmd_cleanup(struct mlx5_core_dev *dev);
void mlx5_cmd_use_events(struct mlx5_core_dev *dev);
void mlx5_trigger_health_work(struct mlx5_core_dev *dev);
void mlx5_drain_health_recovery(struct mlx5_core_dev *dev);
int mlx5_buf_alloc_node(struct mlx5_core_dev *dev, int size,
- struct mlx5_buf *buf, int node);
-int mlx5_buf_alloc(struct mlx5_core_dev *dev, int size, struct mlx5_buf *buf);
-void mlx5_buf_free(struct mlx5_core_dev *dev, struct mlx5_buf *buf);
+ struct mlx5_frag_buf *buf, int node);
+int mlx5_buf_alloc(struct mlx5_core_dev *dev,
+ int size, struct mlx5_frag_buf *buf);
+void mlx5_buf_free(struct mlx5_core_dev *dev, struct mlx5_frag_buf *buf);
int mlx5_frag_buf_alloc_node(struct mlx5_core_dev *dev, int size,
struct mlx5_frag_buf *buf, int node);
void mlx5_frag_buf_free(struct mlx5_core_dev *dev, struct mlx5_frag_buf *buf);
int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev);
void mlx5_register_debugfs(void);
void mlx5_unregister_debugfs(void);
-int mlx5_eq_init(struct mlx5_core_dev *dev);
-void mlx5_eq_cleanup(struct mlx5_core_dev *dev);
-void mlx5_fill_page_array(struct mlx5_buf *buf, __be64 *pas);
+
+void mlx5_fill_page_array(struct mlx5_frag_buf *buf, __be64 *pas);
void mlx5_fill_page_frag_array(struct mlx5_frag_buf *frag_buf, __be64 *pas);
-void mlx5_cq_completion(struct mlx5_core_dev *dev, u32 cqn);
void mlx5_rsc_event(struct mlx5_core_dev *dev, u32 rsn, int event_type);
void mlx5_srq_event(struct mlx5_core_dev *dev, u32 srqn, int event_type);
struct mlx5_core_srq *mlx5_core_get_srq(struct mlx5_core_dev *dev, u32 srqn);
-void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool forced);
-void mlx5_cq_event(struct mlx5_core_dev *dev, u32 cqn, int event_type);
-int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx,
- int nent, u64 mask, const char *name,
- enum mlx5_eq_type type);
-int mlx5_destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq);
-int mlx5_start_eqs(struct mlx5_core_dev *dev);
-void mlx5_stop_eqs(struct mlx5_core_dev *dev);
int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn,
unsigned int *irqn);
int mlx5_core_attach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, u32 qpn);
int size_in, void *data_out, int size_out,
u16 reg_num, int arg, int write);
-int mlx5_debug_eq_add(struct mlx5_core_dev *dev, struct mlx5_eq *eq);
-void mlx5_debug_eq_remove(struct mlx5_core_dev *dev, struct mlx5_eq *eq);
-int mlx5_core_eq_query(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
- u32 *out, int outlen);
-int mlx5_eq_debugfs_init(struct mlx5_core_dev *dev);
-void mlx5_eq_debugfs_cleanup(struct mlx5_core_dev *dev);
-int mlx5_cq_debugfs_init(struct mlx5_core_dev *dev);
-void mlx5_cq_debugfs_cleanup(struct mlx5_core_dev *dev);
int mlx5_db_alloc(struct mlx5_core_dev *dev, struct mlx5_db *db);
int mlx5_db_alloc_node(struct mlx5_core_dev *dev, struct mlx5_db *db,
int node);
return !(dev->priv.pci_dev_data & MLX5_PCI_DEV_IS_VF);
}
+#define MLX5_TOTAL_VPORTS(mdev) (1 + pci_sriov_get_totalvfs((mdev)->pdev))
+#define MLX5_VPORT_MANAGER(mdev) \
+ (MLX5_CAP_GEN(mdev, vport_group_manager) && \
+ (MLX5_CAP_GEN(mdev, port_type) == MLX5_CAP_PORT_TYPE_ETH) && \
+ mlx5_core_is_pf(mdev))
+
static inline int mlx5_get_gid_table_len(u16 param)
{
if (param > 4) {
--- /dev/null
+/* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */
+/*
+ * Copyright (c) 2018 Mellanox Technologies. All rights reserved.
+ */
+
+#ifndef _MLX5_ESWITCH_
+#define _MLX5_ESWITCH_
+
+#include <linux/mlx5/driver.h>
+
+enum {
+ SRIOV_NONE,
+ SRIOV_LEGACY,
+ SRIOV_OFFLOADS
+};
+
+enum {
+ REP_ETH,
+ REP_IB,
+ NUM_REP_TYPES,
+};
+
+struct mlx5_eswitch_rep;
+struct mlx5_eswitch_rep_if {
+ int (*load)(struct mlx5_core_dev *dev,
+ struct mlx5_eswitch_rep *rep);
+ void (*unload)(struct mlx5_eswitch_rep *rep);
+ void *(*get_proto_dev)(struct mlx5_eswitch_rep *rep);
+ void *priv;
+ bool valid;
+};
+
+struct mlx5_eswitch_rep {
+ struct mlx5_eswitch_rep_if rep_if[NUM_REP_TYPES];
+ u16 vport;
+ u8 hw_id[ETH_ALEN];
+ u16 vlan;
+ u32 vlan_refcount;
+};
+
+void mlx5_eswitch_register_vport_rep(struct mlx5_eswitch *esw,
+ int vport_index,
+ struct mlx5_eswitch_rep_if *rep_if,
+ u8 rep_type);
+void mlx5_eswitch_unregister_vport_rep(struct mlx5_eswitch *esw,
+ int vport_index,
+ u8 rep_type);
+void *mlx5_eswitch_get_proto_dev(struct mlx5_eswitch *esw,
+ int vport,
+ u8 rep_type);
+struct mlx5_eswitch_rep *mlx5_eswitch_vport_rep(struct mlx5_eswitch *esw,
+ int vport);
+void *mlx5_eswitch_uplink_get_proto_dev(struct mlx5_eswitch *esw, u8 rep_type);
+u8 mlx5_eswitch_mode(struct mlx5_eswitch *esw);
+struct mlx5_flow_handle *
+mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw,
+ int vport, u32 sqn);
+#endif
#define lru_to_page(head) (list_entry((head)->prev, struct page, lru))
-#ifdef arch_unmap_kpfn
-extern void arch_unmap_kpfn(unsigned long pfn);
-#else
-static __always_inline void arch_unmap_kpfn(unsigned long pfn) { }
-#endif
-
#endif
#include <linux/in.h>
#include <linux/pim.h>
-#include <linux/rhashtable.h>
-#include <net/sock.h>
#include <net/fib_rules.h>
#include <net/fib_notifier.h>
#include <uapi/linux/mroute.h>
+#include <linux/mroute_base.h>
#ifdef CONFIG_IP_MROUTE
static inline int ip_mroute_opt(int opt)
}
#endif
-struct vif_device {
- struct net_device *dev; /* Device we are using */
- struct netdev_phys_item_id dev_parent_id; /* Device parent ID */
- unsigned long bytes_in,bytes_out;
- unsigned long pkt_in,pkt_out; /* Statistics */
- unsigned long rate_limit; /* Traffic shaping (NI) */
- unsigned char threshold; /* TTL threshold */
- unsigned short flags; /* Control flags */
- __be32 local,remote; /* Addresses(remote for tunnels)*/
- int link; /* Physical interface index */
-};
-
struct vif_entry_notifier_info {
struct fib_notifier_info info;
struct net_device *dev;
#define VIFF_STATIC 0x8000
-#define VIF_EXISTS(_mrt, _idx) ((_mrt)->vif_table[_idx].dev != NULL)
-
-struct mr_table {
- struct list_head list;
- possible_net_t net;
- u32 id;
- struct sock __rcu *mroute_sk;
- struct timer_list ipmr_expire_timer;
- struct list_head mfc_unres_queue;
- struct vif_device vif_table[MAXVIFS];
- struct rhltable mfc_hash;
- struct list_head mfc_cache_list;
- int maxvif;
- atomic_t cache_resolve_queue_len;
- bool mroute_do_assert;
- bool mroute_do_pim;
- int mroute_reg_vif_num;
-};
-
-/* mfc_flags:
- * MFC_STATIC - the entry was added statically (not by a routing daemon)
- * MFC_OFFLOAD - the entry was offloaded to the hardware
- */
-enum {
- MFC_STATIC = BIT(0),
- MFC_OFFLOAD = BIT(1),
-};
-
struct mfc_cache_cmp_arg {
__be32 mfc_mcastgrp;
__be32 mfc_origin;
/**
* struct mfc_cache - multicast routing entries
- * @mnode: rhashtable list
+ * @_c: Common multicast routing information; has to be first [for casting]
* @mfc_mcastgrp: destination multicast group address
* @mfc_origin: source address
* @cmparg: used for rhashtable comparisons
- * @mfc_parent: source interface (iif)
- * @mfc_flags: entry flags
- * @expires: unresolved entry expire time
- * @unresolved: unresolved cached skbs
- * @last_assert: time of last assert
- * @minvif: minimum VIF id
- * @maxvif: maximum VIF id
- * @bytes: bytes that have passed for this entry
- * @pkt: packets that have passed for this entry
- * @wrong_if: number of wrong source interface hits
- * @lastuse: time of last use of the group (traffic or update)
- * @ttls: OIF TTL threshold array
- * @refcount: reference count for this entry
- * @list: global entry list
- * @rcu: used for entry destruction
*/
struct mfc_cache {
- struct rhlist_head mnode;
+ struct mr_mfc _c;
union {
struct {
__be32 mfc_mcastgrp;
};
struct mfc_cache_cmp_arg cmparg;
};
- vifi_t mfc_parent;
- int mfc_flags;
-
- union {
- struct {
- unsigned long expires;
- struct sk_buff_head unresolved;
- } unres;
- struct {
- unsigned long last_assert;
- int minvif;
- int maxvif;
- unsigned long bytes;
- unsigned long pkt;
- unsigned long wrong_if;
- unsigned long lastuse;
- unsigned char ttls[MAXVIFS];
- refcount_t refcount;
- } res;
- } mfc_un;
- struct list_head list;
- struct rcu_head rcu;
};
struct mfc_entry_notifier_info {
static inline void ipmr_cache_put(struct mfc_cache *c)
{
- if (refcount_dec_and_test(&c->mfc_un.res.refcount))
+ if (refcount_dec_and_test(&c->_c.mfc_un.res.refcount))
ipmr_cache_free(c);
}
static inline void ipmr_cache_hold(struct mfc_cache *c)
{
- refcount_inc(&c->mfc_un.res.refcount);
+ refcount_inc(&c->_c.mfc_un.res.refcount);
}
#endif
#include <linux/skbuff.h> /* for struct sk_buff_head */
#include <net/net_namespace.h>
#include <uapi/linux/mroute6.h>
+#include <linux/mroute_base.h>
#ifdef CONFIG_IPV6_MROUTE
static inline int ip6_mroute_opt(int opt)
}
#endif
-struct mif_device {
- struct net_device *dev; /* Device we are using */
- unsigned long bytes_in,bytes_out;
- unsigned long pkt_in,pkt_out; /* Statistics */
- unsigned long rate_limit; /* Traffic shaping (NI) */
- unsigned char threshold; /* TTL threshold */
- unsigned short flags; /* Control flags */
- int link; /* Physical interface index */
-};
-
#define VIFF_STATIC 0x8000
-struct mfc6_cache {
- struct list_head list;
- struct in6_addr mf6c_mcastgrp; /* Group the entry belongs to */
- struct in6_addr mf6c_origin; /* Source of packet */
- mifi_t mf6c_parent; /* Source interface */
- int mfc_flags; /* Flags on line */
+struct mfc6_cache_cmp_arg {
+ struct in6_addr mf6c_mcastgrp;
+ struct in6_addr mf6c_origin;
+};
+struct mfc6_cache {
+ struct mr_mfc _c;
union {
struct {
- unsigned long expires;
- struct sk_buff_head unresolved; /* Unresolved buffers */
- } unres;
- struct {
- unsigned long last_assert;
- int minvif;
- int maxvif;
- unsigned long bytes;
- unsigned long pkt;
- unsigned long wrong_if;
- unsigned long lastuse;
- unsigned char ttls[MAXMIFS]; /* TTL thresholds */
- } res;
- } mfc_un;
+ struct in6_addr mf6c_mcastgrp;
+ struct in6_addr mf6c_origin;
+ };
+ struct mfc6_cache_cmp_arg cmparg;
+ };
};
-#define MFC_STATIC 1
-#define MFC_NOTIFY 2
-
-#define MFC6_LINES 64
-
-#define MFC6_HASH(a, g) (((__force u32)(a)->s6_addr32[0] ^ \
- (__force u32)(a)->s6_addr32[1] ^ \
- (__force u32)(a)->s6_addr32[2] ^ \
- (__force u32)(a)->s6_addr32[3] ^ \
- (__force u32)(g)->s6_addr32[0] ^ \
- (__force u32)(g)->s6_addr32[1] ^ \
- (__force u32)(g)->s6_addr32[2] ^ \
- (__force u32)(g)->s6_addr32[3]) % MFC6_LINES)
-
#define MFC_ASSERT_THRESH (3*HZ) /* Maximal freq. of asserts */
struct rtmsg;
struct rtmsg *rtm, u32 portid);
#ifdef CONFIG_IPV6_MROUTE
-extern struct sock *mroute6_socket(struct net *net, struct sk_buff *skb);
+bool mroute6_is_socket(struct net *net, struct sk_buff *skb);
extern int ip6mr_sk_done(struct sock *sk);
#else
-static inline struct sock *mroute6_socket(struct net *net, struct sk_buff *skb)
+static inline bool mroute6_is_socket(struct net *net, struct sk_buff *skb)
{
- return NULL;
+ return false;
}
static inline int ip6mr_sk_done(struct sock *sk)
{
--- /dev/null
+#ifndef __LINUX_MROUTE_BASE_H
+#define __LINUX_MROUTE_BASE_H
+
+#include <linux/netdevice.h>
+#include <linux/rhashtable.h>
+#include <linux/spinlock.h>
+#include <net/net_namespace.h>
+#include <net/sock.h>
+
+/**
+ * struct vif_device - interface representor for multicast routing
+ * @dev: network device being used
+ * @bytes_in: statistic; bytes ingressing
+ * @bytes_out: statistic; bytes egresing
+ * @pkt_in: statistic; packets ingressing
+ * @pkt_out: statistic; packets egressing
+ * @rate_limit: Traffic shaping (NI)
+ * @threshold: TTL threshold
+ * @flags: Control flags
+ * @link: Physical interface index
+ * @dev_parent_id: device parent id
+ * @local: Local address
+ * @remote: Remote address for tunnels
+ */
+struct vif_device {
+ struct net_device *dev;
+ unsigned long bytes_in, bytes_out;
+ unsigned long pkt_in, pkt_out;
+ unsigned long rate_limit;
+ unsigned char threshold;
+ unsigned short flags;
+ int link;
+
+ /* Currently only used by ipmr */
+ struct netdev_phys_item_id dev_parent_id;
+ __be32 local, remote;
+};
+
+#ifndef MAXVIFS
+/* This one is nasty; value is defined in uapi using different symbols for
+ * mroute and morute6 but both map into same 32.
+ */
+#define MAXVIFS 32
+#endif
+
+#define VIF_EXISTS(_mrt, _idx) (!!((_mrt)->vif_table[_idx].dev))
+
+/* mfc_flags:
+ * MFC_STATIC - the entry was added statically (not by a routing daemon)
+ * MFC_OFFLOAD - the entry was offloaded to the hardware
+ */
+enum {
+ MFC_STATIC = BIT(0),
+ MFC_OFFLOAD = BIT(1),
+};
+
+/**
+ * struct mr_mfc - common multicast routing entries
+ * @mnode: rhashtable list
+ * @mfc_parent: source interface (iif)
+ * @mfc_flags: entry flags
+ * @expires: unresolved entry expire time
+ * @unresolved: unresolved cached skbs
+ * @last_assert: time of last assert
+ * @minvif: minimum VIF id
+ * @maxvif: maximum VIF id
+ * @bytes: bytes that have passed for this entry
+ * @pkt: packets that have passed for this entry
+ * @wrong_if: number of wrong source interface hits
+ * @lastuse: time of last use of the group (traffic or update)
+ * @ttls: OIF TTL threshold array
+ * @refcount: reference count for this entry
+ * @list: global entry list
+ * @rcu: used for entry destruction
+ */
+struct mr_mfc {
+ struct rhlist_head mnode;
+ unsigned short mfc_parent;
+ int mfc_flags;
+
+ union {
+ struct {
+ unsigned long expires;
+ struct sk_buff_head unresolved;
+ } unres;
+ struct {
+ unsigned long last_assert;
+ int minvif;
+ int maxvif;
+ unsigned long bytes;
+ unsigned long pkt;
+ unsigned long wrong_if;
+ unsigned long lastuse;
+ unsigned char ttls[MAXVIFS];
+ refcount_t refcount;
+ } res;
+ } mfc_un;
+ struct list_head list;
+ struct rcu_head rcu;
+};
+
+struct mr_table;
+
+/**
+ * struct mr_table_ops - callbacks and info for protocol-specific ops
+ * @rht_params: parameters for accessing the MFC hash
+ * @cmparg_any: a hash key to be used for matching on (*,*) routes
+ */
+struct mr_table_ops {
+ const struct rhashtable_params *rht_params;
+ void *cmparg_any;
+};
+
+/**
+ * struct mr_table - a multicast routing table
+ * @list: entry within a list of multicast routing tables
+ * @net: net where this table belongs
+ * @ops: protocol specific operations
+ * @id: identifier of the table
+ * @mroute_sk: socket associated with the table
+ * @ipmr_expire_timer: timer for handling unresolved routes
+ * @mfc_unres_queue: list of unresolved MFC entries
+ * @vif_table: array containing all possible vifs
+ * @mfc_hash: Hash table of all resolved routes for easy lookup
+ * @mfc_cache_list: list of resovled routes for possible traversal
+ * @maxvif: Identifier of highest value vif currently in use
+ * @cache_resolve_queue_len: current size of unresolved queue
+ * @mroute_do_assert: Whether to inform userspace on wrong ingress
+ * @mroute_do_pim: Whether to receive IGMP PIMv1
+ * @mroute_reg_vif_num: PIM-device vif index
+ */
+struct mr_table {
+ struct list_head list;
+ possible_net_t net;
+ struct mr_table_ops ops;
+ u32 id;
+ struct sock __rcu *mroute_sk;
+ struct timer_list ipmr_expire_timer;
+ struct list_head mfc_unres_queue;
+ struct vif_device vif_table[MAXVIFS];
+ struct rhltable mfc_hash;
+ struct list_head mfc_cache_list;
+ int maxvif;
+ atomic_t cache_resolve_queue_len;
+ bool mroute_do_assert;
+ bool mroute_do_pim;
+ int mroute_reg_vif_num;
+};
+
+#ifdef CONFIG_IP_MROUTE_COMMON
+void vif_device_init(struct vif_device *v,
+ struct net_device *dev,
+ unsigned long rate_limit,
+ unsigned char threshold,
+ unsigned short flags,
+ unsigned short get_iflink_mask);
+
+struct mr_table *
+mr_table_alloc(struct net *net, u32 id,
+ struct mr_table_ops *ops,
+ void (*expire_func)(struct timer_list *t),
+ void (*table_set)(struct mr_table *mrt,
+ struct net *net));
+
+/* These actually return 'struct mr_mfc *', but to avoid need for explicit
+ * castings they simply return void.
+ */
+void *mr_mfc_find_parent(struct mr_table *mrt,
+ void *hasharg, int parent);
+void *mr_mfc_find_any_parent(struct mr_table *mrt, int vifi);
+void *mr_mfc_find_any(struct mr_table *mrt, int vifi, void *hasharg);
+
+int mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
+ struct mr_mfc *c, struct rtmsg *rtm);
+int mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb,
+ struct mr_table *(*iter)(struct net *net,
+ struct mr_table *mrt),
+ int (*fill)(struct mr_table *mrt,
+ struct sk_buff *skb,
+ u32 portid, u32 seq, struct mr_mfc *c,
+ int cmd, int flags),
+ spinlock_t *lock);
+#else
+static inline void vif_device_init(struct vif_device *v,
+ struct net_device *dev,
+ unsigned long rate_limit,
+ unsigned char threshold,
+ unsigned short flags,
+ unsigned short get_iflink_mask)
+{
+}
+
+static inline void *
+mr_table_alloc(struct net *net, u32 id,
+ struct mr_table_ops *ops,
+ void (*expire_func)(struct timer_list *t),
+ void (*table_set)(struct mr_table *mrt,
+ struct net *net))
+{
+ return NULL;
+}
+
+static inline void *mr_mfc_find_parent(struct mr_table *mrt,
+ void *hasharg, int parent)
+{
+ return NULL;
+}
+
+static inline void *mr_mfc_find_any_parent(struct mr_table *mrt,
+ int vifi)
+{
+ return NULL;
+}
+
+static inline struct mr_mfc *mr_mfc_find_any(struct mr_table *mrt,
+ int vifi, void *hasharg)
+{
+ return NULL;
+}
+
+static inline int mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
+ struct mr_mfc *c, struct rtmsg *rtm)
+{
+ return -EINVAL;
+}
+
+static inline int
+mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb,
+ struct mr_table *(*iter)(struct net *net,
+ struct mr_table *mrt),
+ int (*fill)(struct mr_table *mrt,
+ struct sk_buff *skb,
+ u32 portid, u32 seq, struct mr_mfc *c,
+ int cmd, int flags),
+ spinlock_t *lock)
+{
+ return -EINVAL;
+}
+#endif
+
+static inline void *mr_mfc_find(struct mr_table *mrt, void *hasharg)
+{
+ return mr_mfc_find_parent(mrt, hasharg, -1);
+}
+
+#ifdef CONFIG_PROC_FS
+struct mr_vif_iter {
+ struct seq_net_private p;
+ struct mr_table *mrt;
+ int ct;
+};
+
+struct mr_mfc_iter {
+ struct seq_net_private p;
+ struct mr_table *mrt;
+ struct list_head *cache;
+
+ /* Lock protecting the mr_table's unresolved queue */
+ spinlock_t *lock;
+};
+
+#ifdef CONFIG_IP_MROUTE_COMMON
+void *mr_vif_seq_idx(struct net *net, struct mr_vif_iter *iter, loff_t pos);
+void *mr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos);
+
+static inline void *mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
+{
+ return *pos ? mr_vif_seq_idx(seq_file_net(seq),
+ seq->private, *pos - 1)
+ : SEQ_START_TOKEN;
+}
+
+/* These actually return 'struct mr_mfc *', but to avoid need for explicit
+ * castings they simply return void.
+ */
+void *mr_mfc_seq_idx(struct net *net,
+ struct mr_mfc_iter *it, loff_t pos);
+void *mr_mfc_seq_next(struct seq_file *seq, void *v,
+ loff_t *pos);
+
+static inline void *mr_mfc_seq_start(struct seq_file *seq, loff_t *pos,
+ struct mr_table *mrt, spinlock_t *lock)
+{
+ struct mr_mfc_iter *it = seq->private;
+
+ it->mrt = mrt;
+ it->cache = NULL;
+ it->lock = lock;
+
+ return *pos ? mr_mfc_seq_idx(seq_file_net(seq),
+ seq->private, *pos - 1)
+ : SEQ_START_TOKEN;
+}
+
+static inline void mr_mfc_seq_stop(struct seq_file *seq, void *v)
+{
+ struct mr_mfc_iter *it = seq->private;
+ struct mr_table *mrt = it->mrt;
+
+ if (it->cache == &mrt->mfc_unres_queue)
+ spin_unlock_bh(it->lock);
+ else if (it->cache == &mrt->mfc_cache_list)
+ rcu_read_unlock();
+}
+#else
+static inline void *mr_vif_seq_idx(struct net *net, struct mr_vif_iter *iter,
+ loff_t pos)
+{
+ return NULL;
+}
+
+static inline void *mr_vif_seq_next(struct seq_file *seq,
+ void *v, loff_t *pos)
+{
+ return NULL;
+}
+
+static inline void *mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
+{
+ return NULL;
+}
+
+static inline void *mr_mfc_seq_idx(struct net *net,
+ struct mr_mfc_iter *it, loff_t pos)
+{
+ return NULL;
+}
+
+static inline void *mr_mfc_seq_next(struct seq_file *seq, void *v,
+ loff_t *pos)
+{
+ return NULL;
+}
+
+static inline void *mr_mfc_seq_start(struct seq_file *seq, loff_t *pos,
+ struct mr_table *mrt, spinlock_t *lock)
+{
+ return NULL;
+}
+
+static inline void mr_mfc_seq_stop(struct seq_file *seq, void *v)
+{
+}
+#endif
+#endif
+#endif
struct socket *newsock, int flags, bool kern);
int (*getname) (struct socket *sock,
struct sockaddr *addr,
- int *sockaddr_len, int peer);
+ int peer);
__poll_t (*poll) (struct file *file, struct socket *sock,
struct poll_table_struct *wait);
int (*ioctl) (struct socket *sock, unsigned int cmd,
int kernel_accept(struct socket *sock, struct socket **newsock, int flags);
int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
int flags);
-int kernel_getsockname(struct socket *sock, struct sockaddr *addr,
- int *addrlen);
-int kernel_getpeername(struct socket *sock, struct sockaddr *addr,
- int *addrlen);
+int kernel_getsockname(struct socket *sock, struct sockaddr *addr);
+int kernel_getpeername(struct socket *sock, struct sockaddr *addr);
int kernel_getsockopt(struct socket *sock, int level, int optname, char *optval,
int *optlen);
int kernel_setsockopt(struct socket *sock, int level, int optname, char *optval,
#if IS_ENABLED(CONFIG_TIPC)
struct tipc_bearer __rcu *tipc_ptr;
#endif
+#if IS_ENABLED(CONFIG_IRDA) || IS_ENABLED(CONFIG_ATALK)
void *atalk_ptr;
+#endif
struct in_device __rcu *ip_ptr;
+#if IS_ENABLED(CONFIG_DECNET)
struct dn_dev __rcu *dn_ptr;
+#endif
struct inet6_dev __rcu *ip6_ptr;
+#if IS_ENABLED(CONFIG_AX25)
void *ax25_ptr;
+#endif
struct wireless_dev *ieee80211_ptr;
struct wpan_dev *ieee802154_ptr;
#if IS_ENABLED(CONFIG_MPLS_ROUTING)
static inline unsigned long array_index_mask_nospec(unsigned long index,
unsigned long size)
{
- /*
- * Warn developers about inappropriate array_index_nospec() usage.
- *
- * Even if the CPU speculates past the WARN_ONCE branch, the
- * sign bit of @index is taken into account when generating the
- * mask.
- *
- * This warning is compiled out when the compiler can infer that
- * @index and @size are less than LONG_MAX.
- */
- if (WARN_ONCE(index > LONG_MAX || size > LONG_MAX,
- "array_index_nospec() limited to range of [0, LONG_MAX]\n"))
- return 0;
-
/*
* Always calculate and emit the mask even if the compiler
* thinks the mask is not needed. The compiler does not take
}
#endif
+/*
+ * Warn developers about inappropriate array_index_nospec() usage.
+ *
+ * Even if the CPU speculates past the WARN_ONCE branch, the
+ * sign bit of @index is taken into account when generating the
+ * mask.
+ *
+ * This warning is compiled out when the compiler can infer that
+ * @index and @size are less than LONG_MAX.
+ */
+#define array_index_mask_nospec_check(index, size) \
+({ \
+ if (WARN_ONCE(index > LONG_MAX || size > LONG_MAX, \
+ "array_index_nospec() limited to range of [0, LONG_MAX]\n")) \
+ _mask = 0; \
+ else \
+ _mask = array_index_mask_nospec(index, size); \
+ _mask; \
+})
+
/*
* array_index_nospec - sanitize an array index after a bounds check
*
({ \
typeof(index) _i = (index); \
typeof(size) _s = (size); \
- unsigned long _mask = array_index_mask_nospec(_i, _s); \
+ unsigned long _mask = array_index_mask_nospec_check(_i, _s); \
\
BUILD_BUG_ON(sizeof(_i) > sizeof(long)); \
BUILD_BUG_ON(sizeof(_s) > sizeof(long)); \
#include <linux/interrupt.h>
#include <linux/perf_event.h>
+#include <linux/platform_device.h>
#include <linux/sysfs.h>
#include <asm/cputype.h>
-/*
- * struct arm_pmu_platdata - ARM PMU platform data
- *
- * @handle_irq: an optional handler which will be called from the
- * interrupt and passed the address of the low level handler,
- * and can be used to implement any platform specific handling
- * before or after calling it.
- *
- * @irq_flags: if non-zero, these flags will be passed to request_irq
- * when requesting interrupts for this PMU device.
- */
-struct arm_pmu_platdata {
- irqreturn_t (*handle_irq)(int irq, void *dev,
- irq_handler_t pmu_handler);
- unsigned long irq_flags;
-};
-
#ifdef CONFIG_ARM_PMU
/*
struct arm_pmu {
struct pmu pmu;
- cpumask_t active_irqs;
cpumask_t supported_cpus;
char *name;
irqreturn_t (*handle_irq)(int irq_num, void *dev);
/* Internal functions only for core arm_pmu code */
struct arm_pmu *armpmu_alloc(void);
+struct arm_pmu *armpmu_alloc_atomic(void);
void armpmu_free(struct arm_pmu *pmu);
int armpmu_register(struct arm_pmu *pmu);
-int armpmu_request_irqs(struct arm_pmu *armpmu);
-void armpmu_free_irqs(struct arm_pmu *armpmu);
-int armpmu_request_irq(struct arm_pmu *armpmu, int cpu);
-void armpmu_free_irq(struct arm_pmu *armpmu, int cpu);
+int armpmu_request_irq(int irq, int cpu);
+void armpmu_free_irq(int irq, int cpu);
#define ARMV8_PMU_PDEV_NAME "armv8-pmu"
int genphy_c45_an_disable_aneg(struct phy_device *phydev);
int genphy_c45_read_mdix(struct phy_device *phydev);
+/* The gen10g_* functions are the old Clause 45 stub */
+int gen10g_config_aneg(struct phy_device *phydev);
+int gen10g_read_status(struct phy_device *phydev);
+int gen10g_no_soft_reset(struct phy_device *phydev);
+int gen10g_config_init(struct phy_device *phydev);
+int gen10g_suspend(struct phy_device *phydev);
+int gen10g_resume(struct phy_device *phydev);
+
static inline int phy_read_status(struct phy_device *phydev)
{
if (!phydev->drv)
enum dev_dma_attr device_get_dma_attr(struct device *dev);
-void *device_get_match_data(struct device *dev);
+const void *device_get_match_data(struct device *dev);
int device_get_phy_mode(struct device *dev);
static inline void ptp_classifier_init(void)
{
}
+static inline unsigned int ptp_classify_raw(struct sk_buff *skb)
+{
+ return PTP_CLASS_NONE;
+}
#endif
#endif /* _PTP_CLASSIFY_H_ */
{
void *ptr;
+ /* The READ_ONCE in __ptr_ring_peek guarantees that anyone
+ * accessing data through the pointer is up to date. Pairs
+ * with smp_wmb in __ptr_ring_produce.
+ */
ptr = __ptr_ring_peek(r);
if (ptr)
__ptr_ring_discard_one(r);
- /* Make sure anyone accessing data through the pointer is up to date. */
- /* Pairs with smp_wmb in __ptr_ring_produce. */
- smp_read_barrier_depends();
return ptr;
}
*/
static inline void **__ptr_ring_init_queue_alloc(unsigned int size, gfp_t gfp)
{
- if (size * sizeof(void *) > KMALLOC_MAX_SIZE)
+ if (size > KMALLOC_MAX_SIZE / sizeof(void *))
return NULL;
return kvmalloc_array(size, sizeof(void *), gfp | __GFP_ZERO);
}
extern int rtnl_is_locked(void);
extern wait_queue_head_t netdev_unregistering_wq;
-extern struct mutex net_mutex;
+extern struct rw_semaphore net_sem;
#ifdef CONFIG_PROVE_LOCKING
extern bool lockdep_rtnl_is_held(void);
atomic_inc(&mm->mm_count);
}
-extern void mmdrop(struct mm_struct *mm);
+extern void __mmdrop(struct mm_struct *mm);
+
+static inline void mmdrop(struct mm_struct *mm)
+{
+ /*
+ * The implicit full barrier implied by atomic_dec_and_test() is
+ * required by the membarrier system call before returning to
+ * user-space, after storing to rq->curr.
+ */
+ if (unlikely(atomic_dec_and_test(&mm->mm_count)))
+ __mmdrop(mm);
+}
/**
* mmget() - Pin the address space associated with a &struct mm_struct.
#include <linux/uidgid.h>
#include <linux/atomic.h>
+#include <linux/ratelimit.h>
struct key;
defined(CONFIG_NET)
atomic_long_t locked_vm;
#endif
+
+ /* Miscellaneous per-user rate limit */
+ struct ratelimit_state ratelimit;
};
extern int uids_sysfs_init(void);
*
* Distributed under the terms of the GNU GPL, version 2
*
- * Please see kernel/semaphore.c for documentation of these functions
+ * Please see kernel/locking/semaphore.c for documentation of these functions
*/
#ifndef __LINUX_SEMAPHORE_H
#define __LINUX_SEMAPHORE_H
#if IS_ENABLED(CONFIG_SFP)
int sfp_parse_port(struct sfp_bus *bus, const struct sfp_eeprom_id *id,
unsigned long *support);
-phy_interface_t sfp_parse_interface(struct sfp_bus *bus,
- const struct sfp_eeprom_id *id);
void sfp_parse_support(struct sfp_bus *bus, const struct sfp_eeprom_id *id,
unsigned long *support);
+phy_interface_t sfp_select_interface(struct sfp_bus *bus,
+ const struct sfp_eeprom_id *id,
+ unsigned long *link_modes);
int sfp_get_module_info(struct sfp_bus *bus, struct ethtool_modinfo *modinfo);
int sfp_get_module_eeprom(struct sfp_bus *bus, struct ethtool_eeprom *ee,
return PORT_OTHER;
}
-static inline phy_interface_t sfp_parse_interface(struct sfp_bus *bus,
- const struct sfp_eeprom_id *id)
-{
- return PHY_INTERFACE_MODE_NA;
-}
-
static inline void sfp_parse_support(struct sfp_bus *bus,
const struct sfp_eeprom_id *id,
unsigned long *support)
{
}
+static inline phy_interface_t sfp_select_interface(struct sfp_bus *bus,
+ const struct sfp_eeprom_id *id,
+ unsigned long *link_modes)
+{
+ return PHY_INTERFACE_MODE_NA;
+}
+
static inline int sfp_get_module_info(struct sfp_bus *bus,
struct ethtool_modinfo *modinfo)
{
#define skb_uarg(SKB) ((struct ubuf_info *)(skb_shinfo(SKB)->destructor_arg))
+int mm_account_pinned_pages(struct mmpin *mmp, size_t size);
+void mm_unaccount_pinned_pages(struct mmpin *mmp);
+
struct ubuf_info *sock_zerocopy_alloc(struct sock *sk, size_t size);
struct ubuf_info *sock_zerocopy_realloc(struct sock *sk, size_t size,
struct ubuf_info *uarg);
return true;
}
-/* For small packets <= CHECKSUM_BREAK peform checksum complete directly
+/* For small packets <= CHECKSUM_BREAK perform checksum complete directly
* in checksum_init.
*/
#define CHECKSUM_BREAK 76
unsigned int flags, struct timespec *timeout);
extern int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg,
unsigned int vlen, unsigned int flags);
+
+extern struct ns_common *get_net_ns(struct ns_common *ns);
#endif /* _LINUX_SOCKET_H */
extern void mark_page_lazyfree(struct page *page);
extern void swap_setup(void);
-extern void add_page_to_unevictable_list(struct page *page);
-
extern void lru_cache_add_active_or_unevictable(struct page *page,
struct vm_area_struct *vma);
extern void workqueue_set_max_active(struct workqueue_struct *wq,
int max_active);
+extern struct work_struct *current_work(void);
extern bool current_is_workqueue_rescuer(void);
extern bool workqueue_congested(int cpu, struct workqueue_struct *wq);
extern unsigned int work_busy(struct work_struct *work);
struct net_device *mvme147lance_probe(int unit);
struct net_device *tc515_probe(int unit);
struct net_device *lance_probe(int unit);
-struct net_device *mac8390_probe(int unit);
-struct net_device *mac89x0_probe(int unit);
struct net_device *cops_probe(int unit);
struct net_device *ltpc_probe(void);
struct tcf_result *);
int (*dump)(struct sk_buff *, struct tc_action *, int, int);
void (*cleanup)(struct tc_action *);
- int (*lookup)(struct net *, struct tc_action **, u32);
+ int (*lookup)(struct net *net, struct tc_action **a, u32 index,
+ struct netlink_ext_ack *extack);
int (*init)(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **act, int ovr,
- int bind);
+ int bind, struct netlink_ext_ack *extack);
int (*walk)(struct net *, struct sk_buff *,
- struct netlink_callback *, int, const struct tc_action_ops *);
+ struct netlink_callback *, int,
+ const struct tc_action_ops *,
+ struct netlink_ext_ack *);
void (*stats_update)(struct tc_action *, u64, u32, u64);
struct net_device *(*get_dev)(const struct tc_action *a);
};
int tcf_generic_walker(struct tc_action_net *tn, struct sk_buff *skb,
struct netlink_callback *cb, int type,
- const struct tc_action_ops *ops);
+ const struct tc_action_ops *ops,
+ struct netlink_ext_ack *extack);
int tcf_idr_search(struct tc_action_net *tn, struct tc_action **a, u32 index);
bool tcf_idr_check(struct tc_action_net *tn, u32 index, struct tc_action **a,
int bind);
int nr_actions, struct tcf_result *res);
int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla,
struct nlattr *est, char *name, int ovr, int bind,
- struct list_head *actions);
+ struct list_head *actions, struct netlink_ext_ack *extack);
struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
struct nlattr *nla, struct nlattr *est,
- char *name, int ovr, int bind);
+ char *name, int ovr, int bind,
+ struct netlink_ext_ack *extack);
int tcf_action_dump(struct sk_buff *skb, struct list_head *, int, int);
int tcf_action_dump_old(struct sk_buff *skb, struct tc_action *a, int, int);
int tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int, int);
extern ax25_dev *ax25_dev_list;
extern spinlock_t ax25_dev_lock;
+#if IS_ENABLED(CONFIG_AX25)
static inline ax25_dev *ax25_dev_ax25dev(struct net_device *dev)
{
return dev->ax25_ptr;
}
+#endif
ax25_dev *ax25_addr_ax25dev(ax25_address *);
void ax25_dev_device_up(struct net_device *);
* @rx_duration: aggregate PPDU duration(usecs) for all the frames from a peer
* @pertid: per-TID statistics, see &struct cfg80211_tid_stats, using the last
* (IEEE80211_NUM_TIDS) index for MSDUs not encapsulated in QoS-MPDUs.
+ * @ack_signal: signal strength (in dBm) of the last ACK frame.
*/
struct station_info {
u64 filled;
u64 rx_duration;
u8 rx_beacon_signal_avg;
struct cfg80211_tid_stats pertid[IEEE80211_NUM_TIDS + 1];
+ s8 ack_signal;
};
#if IS_ENABLED(CONFIG_CFG80211)
* @ASSOC_REQ_DISABLE_HT: Disable HT (802.11n)
* @ASSOC_REQ_DISABLE_VHT: Disable VHT
* @ASSOC_REQ_USE_RRM: Declare RRM capability in this association
+ * @CONNECT_REQ_EXTERNAL_AUTH_SUPPORT: User space indicates external
+ * authentication capability. Drivers can offload authentication to
+ * userspace if this flag is set. Only applicable for cfg80211_connect()
+ * request (connect callback).
*/
enum cfg80211_assoc_req_flags {
- ASSOC_REQ_DISABLE_HT = BIT(0),
- ASSOC_REQ_DISABLE_VHT = BIT(1),
- ASSOC_REQ_USE_RRM = BIT(2),
+ ASSOC_REQ_DISABLE_HT = BIT(0),
+ ASSOC_REQ_DISABLE_VHT = BIT(1),
+ ASSOC_REQ_USE_RRM = BIT(2),
+ CONNECT_REQ_EXTERNAL_AUTH_SUPPORT = BIT(3),
};
/**
const u8 *pmk_r0_name;
};
+/**
+ * struct cfg80211_external_auth_params - Trigger External authentication.
+ *
+ * Commonly used across the external auth request and event interfaces.
+ *
+ * @action: action type / trigger for external authentication. Only significant
+ * for the authentication request event interface (driver to user space).
+ * @bssid: BSSID of the peer with which the authentication has
+ * to happen. Used by both the authentication request event and
+ * authentication response command interface.
+ * @ssid: SSID of the AP. Used by both the authentication request event and
+ * authentication response command interface.
+ * @key_mgmt_suite: AKM suite of the respective authentication. Used by the
+ * authentication request event interface.
+ * @status: status code, %WLAN_STATUS_SUCCESS for successful authentication,
+ * use %WLAN_STATUS_UNSPECIFIED_FAILURE if user space cannot give you
+ * the real status code for failures. Used only for the authentication
+ * response command interface (user space to driver).
+ */
+struct cfg80211_external_auth_params {
+ enum nl80211_external_auth_action action;
+ u8 bssid[ETH_ALEN] __aligned(2);
+ struct cfg80211_ssid ssid;
+ unsigned int key_mgmt_suite;
+ u16 status;
+};
+
/**
* struct cfg80211_ops - backend description for wireless configuration
*
* (invoked with the wireless_dev mutex held)
* @del_pmk: delete the previously configured PMK for the given authenticator.
* (invoked with the wireless_dev mutex held)
+ *
+ * @external_auth: indicates result of offloaded authentication processing from
+ * user space
*/
struct cfg80211_ops {
int (*suspend)(struct wiphy *wiphy, struct cfg80211_wowlan *wow);
const struct cfg80211_pmk_conf *conf);
int (*del_pmk)(struct wiphy *wiphy, struct net_device *dev,
const u8 *aa);
+ int (*external_auth)(struct wiphy *wiphy, struct net_device *dev,
+ struct cfg80211_external_auth_params *params);
};
/*
WIPHY_VENDOR_CMD_NEED_RUNNING = BIT(2),
};
+/**
+ * enum wiphy_opmode_flag - Station's ht/vht operation mode information flags
+ *
+ * @STA_OPMODE_MAX_BW_CHANGED: Max Bandwidth changed
+ * @STA_OPMODE_SMPS_MODE_CHANGED: SMPS mode changed
+ * @STA_OPMODE_N_SS_CHANGED: max N_SS (number of spatial streams) changed
+ *
+ */
+enum wiphy_opmode_flag {
+ STA_OPMODE_MAX_BW_CHANGED = BIT(0),
+ STA_OPMODE_SMPS_MODE_CHANGED = BIT(1),
+ STA_OPMODE_N_SS_CHANGED = BIT(2),
+};
+
+/**
+ * struct sta_opmode_info - Station's ht/vht operation mode information
+ * @changed: contains value from &enum wiphy_opmode_flag
+ * @smps_mode: New SMPS mode of a station
+ * @bw: new max bandwidth value of a station
+ * @rx_nss: new rx_nss value of a station
+ */
+
+struct sta_opmode_info {
+ u32 changed;
+ u8 smps_mode;
+ u8 bw;
+ u8 rx_nss;
+};
+
/**
* struct wiphy_vendor_command - vendor command definition
* @info: vendor command identifying information, as used in nl80211
* of it being pushed into the SKB
* @addr: the device MAC address
* @iftype: the virtual interface type
+ * @data_offset: offset of payload after the 802.11 header
* Return: 0 on success. Non-zero on error.
*/
int ieee80211_data_to_8023_exthdr(struct sk_buff *skb, struct ethhdr *ehdr,
- const u8 *addr, enum nl80211_iftype iftype);
+ const u8 *addr, enum nl80211_iftype iftype,
+ u8 data_offset);
/**
* ieee80211_data_to_8023 - convert an 802.11 data frame to 802.3
static inline int ieee80211_data_to_8023(struct sk_buff *skb, const u8 *addr,
enum nl80211_iftype iftype)
{
- return ieee80211_data_to_8023_exthdr(skb, NULL, addr, iftype);
+ return ieee80211_data_to_8023_exthdr(skb, NULL, addr, iftype, 0);
}
/**
void cfg80211_radar_event(struct wiphy *wiphy,
struct cfg80211_chan_def *chandef, gfp_t gfp);
+/**
+ * cfg80211_sta_opmode_change_notify - STA's ht/vht operation mode change event
+ * @dev: network device
+ * @mac: MAC address of a station which opmode got modified
+ * @sta_opmode: station's current opmode value
+ * @gfp: context flags
+ *
+ * Driver should call this function when station's opmode modified via action
+ * frame.
+ */
+void cfg80211_sta_opmode_change_notify(struct net_device *dev, const u8 *mac,
+ struct sta_opmode_info *sta_opmode,
+ gfp_t gfp);
+
/**
* cfg80211_cac_event - Channel availability check (CAC) event
* @netdev: network device
* @addr: the address of the peer
* @cookie: the cookie filled in @probe_client previously
* @acked: indicates whether probe was acked or not
+ * @ack_signal: signal strength (in dBm) of the ACK frame.
+ * @is_valid_ack_signal: indicates the ack_signal is valid or not.
* @gfp: allocation flags
*/
void cfg80211_probe_status(struct net_device *dev, const u8 *addr,
- u64 cookie, bool acked, gfp_t gfp);
+ u64 cookie, bool acked, s32 ack_signal,
+ bool is_valid_ack_signal, gfp_t gfp);
/**
* cfg80211_report_obss_beacon - report beacon from other APs
/* ethtool helper */
void cfg80211_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info);
+/**
+ * cfg80211_external_auth_request - userspace request for authentication
+ * @netdev: network device
+ * @params: External authentication parameters
+ * @gfp: allocation flags
+ * Returns: 0 on success, < 0 on error
+ */
+int cfg80211_external_auth_request(struct net_device *netdev,
+ struct cfg80211_external_auth_params *params,
+ gfp_t gfp);
+
/* Logging, debugging and troubleshooting/diagnostic helpers. */
/* wiphy_printk helpers, similar to dev_printk */
/**
* struct devlink_resource_ops - resource ops
* @occ_get: get the occupied size
- * @size_validate: validate the size of the resource before update, reload
- * is needed for changes to take place
*/
struct devlink_resource_ops {
u64 (*occ_get)(struct devlink *devlink);
- int (*size_validate)(struct devlink *devlink, u64 size,
- struct netlink_ext_ack *extack);
};
/**
#include <linux/workqueue.h>
#include <linux/of.h>
#include <linux/ethtool.h>
+#include <linux/net_tstamp.h>
#include <net/devlink.h>
#include <net/switchdev.h>
};
struct packet_type;
+struct dsa_switch;
struct dsa_device_ops {
struct sk_buff *(*xmit)(struct sk_buff *skb, struct net_device *dev);
void (*get_strings)(struct dsa_switch *ds, int port, uint8_t *data);
void (*get_ethtool_stats)(struct dsa_switch *ds,
int port, uint64_t *data);
- int (*get_sset_count)(struct dsa_switch *ds);
+ int (*get_sset_count)(struct dsa_switch *ds, int port);
/*
* ethtool Wake-on-LAN
int (*set_wol)(struct dsa_switch *ds, int port,
struct ethtool_wolinfo *w);
+ /*
+ * ethtool timestamp info
+ */
+ int (*get_ts_info)(struct dsa_switch *ds, int port,
+ struct ethtool_ts_info *ts);
+
/*
* Suspend and resume
*/
int port, struct net_device *br);
void (*crosschip_bridge_leave)(struct dsa_switch *ds, int sw_index,
int port, struct net_device *br);
+
+ /*
+ * PTP functionality
+ */
+ int (*port_hwtstamp_get)(struct dsa_switch *ds, int port,
+ struct ifreq *ifr);
+ int (*port_hwtstamp_set)(struct dsa_switch *ds, int port,
+ struct ifreq *ifr);
+ bool (*port_txtstamp)(struct dsa_switch *ds, int port,
+ struct sk_buff *clone, unsigned int type);
+ bool (*port_rxtstamp)(struct dsa_switch *ds, int port,
+ struct sk_buff *skb, unsigned int type);
};
struct dsa_switch_driver {
};
#endif /* !LINUX_NET_ETHOC_H */
-
u32 table;
u8 action;
u8 l3mdev;
- /* 2 bytes hole, try to use */
+ u8 proto;
+ u8 ip_proto;
u32 target;
__be64 tun_id;
struct fib_rule __rcu *ctarget;
char iifname[IFNAMSIZ];
char oifname[IFNAMSIZ];
struct fib_kuid_range uid_range;
+ struct fib_rule_port_range sport_range;
+ struct fib_rule_port_range dport_range;
struct rcu_head rcu;
};
struct fib_lookup_arg {
void *lookup_ptr;
+ const void *lookup_data;
void *result;
struct fib_rule *rule;
u32 table;
[FRA_SUPPRESS_IFGROUP] = { .type = NLA_U32 }, \
[FRA_GOTO] = { .type = NLA_U32 }, \
[FRA_L3MDEV] = { .type = NLA_U8 }, \
- [FRA_UID_RANGE] = { .len = sizeof(struct fib_rule_uid_range) }
+ [FRA_UID_RANGE] = { .len = sizeof(struct fib_rule_uid_range) }, \
+ [FRA_PROTOCOL] = { .type = NLA_U8 }, \
+ [FRA_IP_PROTO] = { .type = NLA_U8 }, \
+ [FRA_SPORT_RANGE] = { .len = sizeof(struct fib_rule_port_range) }, \
+ [FRA_DPORT_RANGE] = { .len = sizeof(struct fib_rule_port_range) }
+
static inline void fib_rule_get(struct fib_rule *rule)
{
return frh->table;
}
+static inline bool fib_rule_port_range_set(const struct fib_rule_port_range *range)
+{
+ return range->start != 0 && range->end != 0;
+}
+
+static inline bool fib_rule_port_inrange(const struct fib_rule_port_range *a,
+ __be16 port)
+{
+ return ntohs(port) >= a->start &&
+ ntohs(port) <= a->end;
+}
+
+static inline bool fib_rule_port_range_valid(const struct fib_rule_port_range *a)
+{
+ return a->start != 0 && a->end != 0 && a->end < 0xffff &&
+ a->start <= a->end;
+}
+
+static inline bool fib_rule_port_range_compare(struct fib_rule_port_range *a,
+ struct fib_rule_port_range *b)
+{
+ return a->start == b->start &&
+ a->end == b->end;
+}
+
+static inline bool fib_rule_requires_fldissect(struct fib_rule *rule)
+{
+ return rule->ip_proto ||
+ fib_rule_port_range_set(&rule->sport_range) ||
+ fib_rule_port_range_set(&rule->dport_range);
+}
+
struct fib_rules_ops *fib_rules_register(const struct fib_rules_ops *,
struct net *);
void fib_rules_unregister(struct fib_rules_ops *);
fl4->daddr = daddr;
fl4->saddr = saddr;
}
-
+
struct flowi6 {
struct flowi_common __fl_common;
__u32 __get_hash_from_flowi6(const struct flowi6 *fl6, struct flow_keys *keys);
-static inline __u32 get_hash_from_flowi6(const struct flowi6 *fl6)
-{
- struct flow_keys keys;
-
- return __get_hash_from_flowi6(fl6, &keys);
-}
-
-__u32 __get_hash_from_flowi4(const struct flowi4 *fl4, struct flow_keys *keys);
-
-static inline __u32 get_hash_from_flowi4(const struct flowi4 *fl4)
-{
- struct flow_keys keys;
-
- return __get_hash_from_flowi4(fl4, &keys);
-}
-
#endif
int gre_parse_header(struct sk_buff *skb, struct tnl_ptk_info *tpi,
bool *csum_err, __be16 proto, int nhs);
+bool is_gretap_dev(const struct net_device *dev);
+bool is_ip6gretap_dev(const struct net_device *dev);
+
static inline int gre_calc_hlen(__be16 o_flags)
{
int addend = 4;
IEEE80211_RADIOTAP_AMPDU_IS_LAST = 0x0008,
IEEE80211_RADIOTAP_AMPDU_DELIM_CRC_ERR = 0x0010,
IEEE80211_RADIOTAP_AMPDU_DELIM_CRC_KNOWN = 0x0020,
+ IEEE80211_RADIOTAP_AMPDU_EOF = 0x0040,
+ IEEE80211_RADIOTAP_AMPDU_EOF_KNOWN = 0x0080,
};
/* for IEEE80211_RADIOTAP_VHT */
int inet_listen(struct socket *sock, int backlog);
void inet_sock_destruct(struct sock *sk);
int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len);
-int inet_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len,
+int inet_getname(struct socket *sock, struct sockaddr *uaddr,
int peer);
int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
int inet_ctl_sock_create(struct sock **sk, unsigned short family,
u16 net_header_len;
u16 net_frag_header_len;
u16 sockaddr_len;
- int (*setsockopt)(struct sock *sk, int level, int optname,
+ int (*setsockopt)(struct sock *sk, int level, int optname,
char __user *optval, unsigned int optlen);
- int (*getsockopt)(struct sock *sk, int level, int optname,
+ int (*getsockopt)(struct sock *sk, int level, int optname,
char __user *optval, int __user *optlen);
#ifdef CONFIG_COMPAT
int (*compat_setsockopt)(struct sock *sk,
/** inet_connection_sock - INET connection oriented sock
*
- * @icsk_accept_queue: FIFO of established children
+ * @icsk_accept_queue: FIFO of established children
* @icsk_bind_hash: Bind node
* @icsk_timeout: Timeout
* @icsk_retransmit_timer: Resend (no ack)
unsigned long timeout; /* Currently scheduled timeout */
__u32 lrcvtime; /* timestamp of last received data packet */
__u16 last_seg_size; /* Size of last incoming segment */
- __u16 rcv_mss; /* MSS used for delayed ACK decisions */
+ __u16 rcv_mss; /* MSS used for delayed ACK decisions */
} icsk_ack;
struct {
int enabled;
static inline void inet_csk_clear_xmit_timer(struct sock *sk, const int what)
{
struct inet_connection_sock *icsk = inet_csk(sk);
-
+
if (what == ICSK_TIME_RETRANS || what == ICSK_TIME_PROBE0) {
icsk->icsk_pending = 0;
#ifdef INET_CSK_CLEAR_TIMERS
void ip4_datagram_release_cb(struct sock *sk);
struct ip_reply_arg {
- struct kvec iov[1];
+ struct kvec iov[1];
int flags;
__wsum csum;
int csumoffset; /* u16 offset of csum in iov[0].iov_base */
- /* -1 if not needed */
+ /* -1 if not needed */
int bound_dev_if;
u8 tos;
kuid_t uid;
-};
+};
#define IP_REPLY_ARG_NOSRCCHECK 1
/*
* Functions provided by ip_forward.c
*/
-
+
int ip_forward(struct sk_buff *skb);
-
+
/*
* Functions provided by ip_options.c
*/
-
+
void ip_options_build(struct sk_buff *skb, struct ip_options *opt,
__be32 daddr, struct rtable *rt, int is_frag);
typedef struct rt6_info *(*pol_lookup_t)(struct net *,
struct fib6_table *,
- struct flowi6 *, int);
+ struct flowi6 *,
+ const struct sk_buff *, int);
struct fib6_entry_notifier_info {
struct fib_notifier_info info; /* must be first */
struct fib6_table *fib6_get_table(struct net *net, u32 id);
struct fib6_table *fib6_new_table(struct net *net, u32 id);
struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
+ const struct sk_buff *skb,
int flags, pol_lookup_t lookup);
struct fib6_node *fib6_lookup(struct fib6_node *root,
bool fib6_rule_default(const struct fib_rule *rule);
int fib6_rules_dump(struct net *net, struct notifier_block *nb);
unsigned int fib6_rules_seq_read(struct net *net);
+
+static inline bool fib6_rules_early_flow_dissect(struct net *net,
+ struct sk_buff *skb,
+ struct flowi6 *fl6,
+ struct flow_keys *flkeys)
+{
+ unsigned int flag = FLOW_DISSECTOR_F_STOP_AT_ENCAP;
+
+ if (!net->ipv6.fib6_rules_require_fldissect)
+ return false;
+
+ skb_flow_dissect_flow_keys(skb, flkeys, flag);
+ fl6->fl6_sport = flkeys->ports.src;
+ fl6->fl6_dport = flkeys->ports.dst;
+ fl6->flowi6_proto = flkeys->basic.ip_proto;
+
+ return true;
+}
#else
static inline int fib6_rules_init(void)
{
{
return 0;
}
+static inline bool fib6_rules_early_flow_dissect(struct net *net,
+ struct sk_buff *skb,
+ struct flowi6 *fl6,
+ struct flow_keys *flkeys)
+{
+ return false;
+}
#endif
#endif
void ip6_route_input(struct sk_buff *skb);
struct dst_entry *ip6_route_input_lookup(struct net *net,
struct net_device *dev,
- struct flowi6 *fl6, int flags);
+ struct flowi6 *fl6,
+ const struct sk_buff *skb, int flags);
struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
struct flowi6 *fl6, int flags);
}
struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
- int flags);
+ const struct sk_buff *skb, int flags);
struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
- int ifindex, struct flowi6 *fl6, int flags);
+ int ifindex, struct flowi6 *fl6,
+ const struct sk_buff *skb, int flags);
void ip6_route_init_special_entries(void);
int ip6_route_init(void);
}
struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
- const struct in6_addr *saddr, int oif, int flags);
-u32 rt6_multipath_hash(const struct flowi6 *fl6, const struct sk_buff *skb);
+ const struct in6_addr *saddr, int oif,
+ const struct sk_buff *skb, int flags);
+u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6,
+ const struct sk_buff *skb, struct flow_keys *hkeys);
struct dst_entry *icmp6_dst_alloc(struct net_device *dev, struct flowi6 *fl6);
ipv6_addr_equal(&a->rt6i_gateway, &b->rt6i_gateway) &&
!lwtunnel_cmp_encap(a->dst.lwtstate, b->dst.lwtstate);
}
+
#endif
unsigned char nh_sel;
unsigned char type;
unsigned char scope;
- int err;
+ int err;
};
#ifdef CONFIG_IP_ROUTE_MULTIPATH
return 0;
}
+static inline bool fib4_rules_early_flow_dissect(struct net *net,
+ struct sk_buff *skb,
+ struct flowi4 *fl4,
+ struct flow_keys *flkeys)
+{
+ return false;
+}
#else /* CONFIG_IP_MULTIPLE_TABLES */
int __net_init fib4_rules_init(struct net *net);
void __net_exit fib4_rules_exit(struct net *net);
int fib4_rules_dump(struct net *net, struct notifier_block *nb);
unsigned int fib4_rules_seq_read(struct net *net);
+static inline bool fib4_rules_early_flow_dissect(struct net *net,
+ struct sk_buff *skb,
+ struct flowi4 *fl4,
+ struct flow_keys *flkeys)
+{
+ unsigned int flag = FLOW_DISSECTOR_F_STOP_AT_ENCAP;
+
+ if (!net->ipv4.fib_rules_require_fldissect)
+ return false;
+
+ skb_flow_dissect_flow_keys(skb, flkeys, flag);
+ fl4->fl4_sport = flkeys->ports.src;
+ fl4->fl4_dport = flkeys->ports.dst;
+ fl4->flowi4_proto = flkeys->basic.ip_proto;
+
+ return true;
+}
+
#endif /* CONFIG_IP_MULTIPLE_TABLES */
/* Exported by fib_frontend.c */
int fib_sync_up(struct net_device *dev, unsigned int nh_flags);
#ifdef CONFIG_IP_ROUTE_MULTIPATH
-int fib_multipath_hash(const struct fib_info *fi, const struct flowi4 *fl4,
- const struct sk_buff *skb);
+int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4,
+ const struct sk_buff *skb, struct flow_keys *flkeys);
#endif
void fib_select_multipath(struct fib_result *res, int hash);
void fib_select_path(struct net *net, struct fib_result *res,
#ifdef CONFIG_INET
+static inline void ip_tunnel_init_flow(struct flowi4 *fl4,
+ int proto,
+ __be32 daddr, __be32 saddr,
+ __be32 key, __u8 tos, int oif,
+ __u32 mark)
+{
+ memset(fl4, 0, sizeof(*fl4));
+ fl4->flowi4_oif = oif;
+ fl4->daddr = daddr;
+ fl4->saddr = saddr;
+ fl4->flowi4_tos = tos;
+ fl4->flowi4_proto = proto;
+ fl4->fl4_gre_key = key;
+ fl4->flowi4_mark = mark;
+}
+
int ip_tunnel_init(struct net_device *dev);
void ip_tunnel_uninit(struct net_device *dev);
void ip_tunnel_dellink(struct net_device *dev, struct list_head *head);
#define IPV6_ADDR_ANY 0x0000U
-#define IPV6_ADDR_UNICAST 0x0001U
-#define IPV6_ADDR_MULTICAST 0x0002U
+#define IPV6_ADDR_UNICAST 0x0001U
+#define IPV6_ADDR_MULTICAST 0x0002U
#define IPV6_ADDR_LOOPBACK 0x0010U
#define IPV6_ADDR_LINKLOCAL 0x0020U
#endif
}
-static inline void ipv6_addr_prefix(struct in6_addr *pfx,
+static inline void ipv6_addr_prefix(struct in6_addr *pfx,
const struct in6_addr *addr,
int plen)
{
addr[1] = wl;
}
-static inline void ipv6_addr_set(struct in6_addr *addr,
+static inline void ipv6_addr_set(struct in6_addr *addr,
__be32 w1, __be32 w2,
__be32 w3, __be32 w4)
{
}
/*
- * we should *never* get to this point since that
+ * we should *never* get to this point since that
* would mean the addrs are equal
*
* However, we do get to it 8) And exacly, when
int inet6_release(struct socket *sock);
int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len);
-int inet6_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len,
+int inet6_getname(struct socket *sock, struct sockaddr *uaddr,
int peer);
int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
int lwtunnel_input(struct sk_buff *skb);
int lwtunnel_xmit(struct sk_buff *skb);
+static inline void lwtunnel_set_redirect(struct dst_entry *dst)
+{
+ if (lwtunnel_output_redirect(dst->lwtstate)) {
+ dst->lwtstate->orig_output = dst->output;
+ dst->output = lwtunnel_output;
+ }
+ if (lwtunnel_input_redirect(dst->lwtstate)) {
+ dst->lwtstate->orig_input = dst->input;
+ dst->input = lwtunnel_input;
+ }
+}
#else
static inline void lwtstate_free(struct lwtunnel_state *lws)
return false;
}
+static inline void lwtunnel_set_redirect(struct dst_entry *dst)
+{
+}
+
static inline unsigned int lwtunnel_headroom(struct lwtunnel_state *lwtstate,
unsigned int mtu)
{
* Copyright 2007-2010 Johannes Berg <johannes@sipsolutions.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright (C) 2015 - 2017 Intel Deutschland GmbH
+ * Copyright (C) 2018 Intel Corporation
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
u8 ampdu_len;
u8 antenna;
u16 tx_time;
+ bool is_valid_ack_signal;
void *status_driver_data[19 / sizeof(void *)];
} status;
struct {
* the first subframe.
* @RX_FLAG_ICV_STRIPPED: The ICV is stripped from this frame. CRC checking must
* be done in the hardware.
+ * @RX_FLAG_AMPDU_EOF_BIT: Value of the EOF bit in the A-MPDU delimiter for this
+ * frame
+ * @RX_FLAG_AMPDU_EOF_BIT_KNOWN: The EOF value is known
*/
enum mac80211_rx_flags {
RX_FLAG_MMIC_ERROR = BIT(0),
RX_FLAG_MIC_STRIPPED = BIT(21),
RX_FLAG_ALLOW_SAME_PN = BIT(22),
RX_FLAG_ICV_STRIPPED = BIT(23),
+ RX_FLAG_AMPDU_EOF_BIT = BIT(24),
+ RX_FLAG_AMPDU_EOF_BIT_KNOWN = BIT(25),
};
/**
* @IEEE80211_HW_SUPPORTS_TDLS_BUFFER_STA: Hardware supports buffer STA on
* TDLS links.
*
+ * @IEEE80211_HW_DEAUTH_NEED_MGD_TX_PREP: The driver requires the
+ * mgd_prepare_tx() callback to be called before transmission of a
+ * deauthentication frame in case the association was completed but no
+ * beacon was heard. This is required in multi-channel scenarios, where the
+ * virtual interface might not be given air time for the transmission of
+ * the frame, as it is not synced with the AP/P2P GO yet, and thus the
+ * deauthentication frame might not be transmitted.
+ *
* @NUM_IEEE80211_HW_FLAGS: number of hardware flags, used for sizing arrays
*/
enum ieee80211_hw_flags {
IEEE80211_HW_REPORTS_LOW_ACK,
IEEE80211_HW_SUPPORTS_TX_FRAG,
IEEE80211_HW_SUPPORTS_TDLS_BUFFER_STA,
+ IEEE80211_HW_DEAUTH_NEED_MGD_TX_PREP,
/* keep last, obviously */
NUM_IEEE80211_HW_FLAGS
* management frame prior to having successfully associated to allow the
* driver to give it channel time for the transmission, to get a response
* and to be able to synchronize with the GO.
+ * For drivers that set %IEEE80211_HW_DEAUTH_NEED_MGD_TX_PREP, mac80211
+ * would also call this function before transmitting a deauthentication
+ * frame in case that no beacon was heard from the AP/P2P GO.
* The callback will be called before each transmission and upon return
* mac80211 will transmit the frame right away.
* The callback is optional and can (should!) sleep.
* The TX headroom reserved by mac80211 for its own tx_status functions.
* This is enough for the radiotap header.
*/
-#define IEEE80211_TX_STATUS_HEADROOM 14
+#define IEEE80211_TX_STATUS_HEADROOM ALIGN(14, 4)
/**
* ieee80211_sta_set_buffered - inform mac80211 about driver-buffered frames
atomic64_t cookie_gen;
struct list_head list; /* list of network namespaces */
- struct list_head cleanup_list; /* namespaces on death row */
- struct list_head exit_list; /* Use only net_mutex */
+ struct list_head exit_list; /* To linked to call pernet exit
+ * methods on dead net (net_sem
+ * read locked), or to unregister
+ * pernet ops (net_sem wr locked).
+ */
+ struct llist_node cleanup_list; /* namespaces on death row */
struct user_namespace *user_ns; /* Owning user namespace */
struct ucounts *ucounts;
/* core fib_rules */
struct list_head rules_ops;
- struct list_head fib_notifier_ops; /* protected by net_mutex */
+ struct list_head fib_notifier_ops; /* protected by net_sem */
struct net_device *loopback_dev; /* The loopback */
struct netns_core core;
void (*exit_batch)(struct list_head *net_exit_list);
unsigned int *id;
size_t size;
+ /*
+ * Indicates above methods are allowed to be executed in parallel
+ * with methods of any other pernet_operations, i.e. they are not
+ * need write locked net_sem.
+ */
+ bool async;
};
/*
NETEVENT_NEIGH_UPDATE = 1, /* arg is struct neighbour ptr */
NETEVENT_REDIRECT, /* arg is struct netevent_redirect ptr */
NETEVENT_DELAY_PROBE_TIME_UPDATE, /* arg is struct neigh_parms ptr */
- NETEVENT_MULTIPATH_HASH_UPDATE, /* arg is struct net ptr */
+ NETEVENT_IPV4_MPATH_HASH_UPDATE, /* arg is struct net ptr */
+ NETEVENT_IPV6_MPATH_HASH_UPDATE, /* arg is struct net ptr */
};
int register_netevent_notifier(struct notifier_block *nb);
#ifdef CONFIG_IP_MULTIPLE_TABLES
struct fib_rules_ops *rules_ops;
bool fib_has_custom_rules;
+ unsigned int fib_rules_require_fldissect;
struct fib_table __rcu *fib_main;
struct fib_table __rcu *fib_default;
#endif
int ip6_rt_gc_elasticity;
int ip6_rt_mtu_expires;
int ip6_rt_min_advmss;
+ int multipath_hash_policy;
int flowlabel_consistency;
int auto_flowlabels;
int icmpv6_time;
unsigned int ip6_rt_gc_expire;
unsigned long ip6_rt_last_gc;
#ifdef CONFIG_IPV6_MULTIPLE_TABLES
- bool fib6_has_custom_rules;
+ unsigned int fib6_rules_require_fldissect;
+ bool fib6_has_custom_rules;
struct rt6_info *ip6_prohibit_entry;
struct rt6_info *ip6_blk_hole_entry;
struct fib6_table *fib6_local_tbl;
struct sock *mc_autojoin_sk;
#ifdef CONFIG_IPV6_MROUTE
#ifndef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
- struct mr6_table *mrt6;
+ struct mr_table *mrt6;
#else
struct list_head mr6_tables;
struct fib_rules_ops *mr6_rules_ops;
TC_PRIO_REPLACE,
TC_PRIO_DESTROY,
TC_PRIO_STATS,
+ TC_PRIO_GRAFT,
};
struct tc_prio_qopt_offload_params {
struct gnet_stats_queue *qstats;
};
+struct tc_prio_qopt_offload_graft_params {
+ u8 band;
+ u32 child_handle;
+};
+
struct tc_prio_qopt_offload {
enum tc_prio_command command;
u32 handle;
union {
struct tc_prio_qopt_offload_params replace_params;
struct tc_qopt_offload_stats stats;
+ struct tc_prio_qopt_offload_graft_params graft_params;
};
};
+
#endif
int wiphy_idx;
enum nl80211_reg_initiator initiator;
enum nl80211_user_reg_hint_type user_reg_hint_type;
- char alpha2[2];
+ char alpha2[3];
enum nl80211_dfs_regions dfs_region;
bool intersect;
bool processed;
/* Miscellaneous cached information */
u32 rt_pmtu;
- u32 rt_table_id;
-
struct list_head rt_uncached;
struct uncached_list *rt_uncached_list;
};
return false;
}
-/* Reset all TX qdiscs greater then index of a device. */
+/* Reset all TX qdiscs greater than index of a device. */
static inline void qdisc_reset_all_tx_gt(struct net_device *dev, unsigned int i)
{
struct Qdisc *qdisc;
struct page_frag sk_frag;
netdev_features_t sk_route_caps;
netdev_features_t sk_route_nocaps;
+ netdev_features_t sk_route_forced_caps;
int sk_gso_type;
unsigned int sk_gso_max_size;
gfp_t sk_allocation;
int sock_no_connect(struct socket *, struct sockaddr *, int, int);
int sock_no_socketpair(struct socket *, struct socket *);
int sock_no_accept(struct socket *, struct socket *, int, bool);
-int sock_no_getname(struct socket *, struct sockaddr *, int *, int);
+int sock_no_getname(struct socket *, struct sockaddr *, int);
__poll_t sock_no_poll(struct file *, struct socket *,
struct poll_table_struct *);
int sock_no_ioctl(struct socket *, unsigned int, unsigned long);
sk->sk_route_caps &= ~flags;
}
-static inline bool sk_check_csum_caps(struct sock *sk)
-{
- return (sk->sk_route_caps & NETIF_F_HW_CSUM) ||
- (sk->sk_family == PF_INET &&
- (sk->sk_route_caps & NETIF_F_IP_CSUM)) ||
- (sk->sk_family == PF_INET6 &&
- (sk->sk_route_caps & NETIF_F_IPV6_CSUM));
-}
-
static inline int skb_do_copy_data_nocache(struct sock *sk, struct sk_buff *skb,
struct iov_iter *from, char *to,
int copy, int offset)
struct sk_buff *skb,
const struct tcphdr *th);
struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
- struct request_sock *req, bool fastopen);
+ struct request_sock *req, bool fastopen,
+ bool *lost_race);
int tcp_child_process(struct sock *parent, struct sock *child,
struct sk_buff *skb);
void tcp_enter_loss(struct sock *sk);
#endif
/* tcp_output.c */
-u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now,
- int min_tso_segs);
void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss,
int nonagle);
int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs);
u32 (*undo_cwnd)(struct sock *sk);
/* hook for packet ack accounting (optional) */
void (*pkts_acked)(struct sock *sk, const struct ack_sample *sample);
- /* suggest number of segments for each skb to transmit (optional) */
- u32 (*tso_segs_goal)(struct sock *sk);
+ /* override sysctl_tcp_min_tso_segs */
+ u32 (*min_tso_segs)(struct sock *sk);
/* returns the multiplier used in tcp_sndbuf_expand (optional) */
u32 (*sndbuf_expand)(struct sock *sk);
/* call when packets are delivered to update cwnd and pacing rate,
#define TCP_STATE_MASK 0xF
-#define TCP_ACTION_FIN (1 << 7)
+#define TCP_ACTION_FIN (1 << TCP_CLOSE)
enum {
- TCPF_ESTABLISHED = (1 << 1),
- TCPF_SYN_SENT = (1 << 2),
- TCPF_SYN_RECV = (1 << 3),
- TCPF_FIN_WAIT1 = (1 << 4),
- TCPF_FIN_WAIT2 = (1 << 5),
- TCPF_TIME_WAIT = (1 << 6),
- TCPF_CLOSE = (1 << 7),
- TCPF_CLOSE_WAIT = (1 << 8),
- TCPF_LAST_ACK = (1 << 9),
- TCPF_LISTEN = (1 << 10),
- TCPF_CLOSING = (1 << 11),
- TCPF_NEW_SYN_RECV = (1 << 12),
+ TCPF_ESTABLISHED = (1 << TCP_ESTABLISHED),
+ TCPF_SYN_SENT = (1 << TCP_SYN_SENT),
+ TCPF_SYN_RECV = (1 << TCP_SYN_RECV),
+ TCPF_FIN_WAIT1 = (1 << TCP_FIN_WAIT1),
+ TCPF_FIN_WAIT2 = (1 << TCP_FIN_WAIT2),
+ TCPF_TIME_WAIT = (1 << TCP_TIME_WAIT),
+ TCPF_CLOSE = (1 << TCP_CLOSE),
+ TCPF_CLOSE_WAIT = (1 << TCP_CLOSE_WAIT),
+ TCPF_LAST_ACK = (1 << TCP_LAST_ACK),
+ TCPF_LISTEN = (1 << TCP_LISTEN),
+ TCPF_CLOSING = (1 << TCP_CLOSING),
+ TCPF_NEW_SYN_RECV = (1 << TCP_NEW_SYN_RECV),
};
#endif /* _LINUX_TCP_STATES_H */
UDP_SKB_CB(skb)->cscov = cscov;
if (skb->ip_summed == CHECKSUM_COMPLETE)
skb->ip_summed = CHECKSUM_NONE;
+ skb->csum_valid = 0;
}
return 0;
static inline void xfrm_sk_free_policy(struct sock *sk) {}
static inline int xfrm_sk_clone_policy(struct sock *sk, const struct sock *osk) { return 0; }
-static inline int xfrm6_route_forward(struct sk_buff *skb) { return 1; }
-static inline int xfrm4_route_forward(struct sk_buff *skb) { return 1; }
+static inline int xfrm6_route_forward(struct sk_buff *skb) { return 1; }
+static inline int xfrm4_route_forward(struct sk_buff *skb) { return 1; }
static inline int xfrm6_policy_check(struct sock *sk, int dir, struct sk_buff *skb)
-{
- return 1;
-}
+{
+ return 1;
+}
static inline int xfrm4_policy_check(struct sock *sk, int dir, struct sk_buff *skb)
{
return 1;
{
if (ipv6_addr_equal((struct in6_addr *)daddr, (struct in6_addr *)&x->id.daddr) &&
(ipv6_addr_equal((struct in6_addr *)saddr, (struct in6_addr *)&x->props.saddr) ||
- ipv6_addr_any((struct in6_addr *)saddr) ||
+ ipv6_addr_any((struct in6_addr *)saddr) ||
ipv6_addr_any((struct in6_addr *)&x->props.saddr)))
return 1;
return 0;
static inline int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen)
{
return -ENOPROTOOPT;
-}
+}
static inline int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
{
* @RDMA_RESTRACK_QP: Queue pair (QP)
*/
RDMA_RESTRACK_QP,
- /**
- * @RDMA_RESTRACK_XRCD: XRC domain (XRCD)
- */
- RDMA_RESTRACK_XRCD,
/**
* @RDMA_RESTRACK_MAX: Last entry, used for array dclarations
*/
*/
struct uverbs_ptr_attr {
- union {
- u64 data;
- void __user *ptr;
- };
+ u64 data;
u16 len;
/* Combination of bits from enum UVERBS_ATTR_F_XXXX */
u16 flags;
}
static inline int uverbs_copy_to(const struct uverbs_attr_bundle *attrs_bundle,
- size_t idx, const void *from)
+ size_t idx, const void *from, size_t size)
{
const struct uverbs_attr *attr = uverbs_attr_get(attrs_bundle, idx);
u16 flags;
+ size_t min_size;
if (IS_ERR(attr))
return PTR_ERR(attr);
+ min_size = min_t(size_t, attr->ptr_attr.len, size);
+ if (copy_to_user(u64_to_user_ptr(attr->ptr_attr.data), from, min_size))
+ return -EFAULT;
+
flags = attr->ptr_attr.flags | UVERBS_ATTR_F_VALID_OUTPUT;
- return (!copy_to_user(attr->ptr_attr.ptr, from, attr->ptr_attr.len) &&
- !put_user(flags, &attr->uattr->flags)) ? 0 : -EFAULT;
+ if (put_user(flags, &attr->uattr->flags))
+ return -EFAULT;
+
+ return 0;
}
-static inline int _uverbs_copy_from(void *to, size_t to_size,
+static inline bool uverbs_attr_ptr_is_inline(const struct uverbs_attr *attr)
+{
+ return attr->ptr_attr.len <= sizeof(attr->ptr_attr.data);
+}
+
+static inline int _uverbs_copy_from(void *to,
const struct uverbs_attr_bundle *attrs_bundle,
- size_t idx)
+ size_t idx,
+ size_t size)
{
const struct uverbs_attr *attr = uverbs_attr_get(attrs_bundle, idx);
if (IS_ERR(attr))
return PTR_ERR(attr);
- if (to_size <= sizeof(((struct ib_uverbs_attr *)0)->data))
+ /*
+ * Validation ensures attr->ptr_attr.len >= size. If the caller is
+ * using UVERBS_ATTR_SPEC_F_MIN_SZ then it must call copy_from with
+ * the right size.
+ */
+ if (unlikely(size < attr->ptr_attr.len))
+ return -EINVAL;
+
+ if (uverbs_attr_ptr_is_inline(attr))
memcpy(to, &attr->ptr_attr.data, attr->ptr_attr.len);
- else if (copy_from_user(to, attr->ptr_attr.ptr, attr->ptr_attr.len))
+ else if (copy_from_user(to, u64_to_user_ptr(attr->ptr_attr.data),
+ attr->ptr_attr.len))
return -EFAULT;
return 0;
}
#define uverbs_copy_from(to, attrs_bundle, idx) \
- _uverbs_copy_from(to, sizeof(*(to)), attrs_bundle, idx)
+ _uverbs_copy_from(to, attrs_bundle, idx, sizeof(*to))
/* =================================================
* Definitions -> Specs infrastructure
#define AC97_HEADPHONE 0x04 /* Headphone Volume (optional) */
#define AC97_MASTER_MONO 0x06 /* Master Volume Mono (optional) */
#define AC97_MASTER_TONE 0x08 /* Master Tone (Bass & Treble) (optional) */
-#define AC97_PC_BEEP 0x0a /* PC Beep Volume (optinal) */
+#define AC97_PC_BEEP 0x0a /* PC Beep Volume (optional) */
#define AC97_PHONE 0x0c /* Phone Volume (optional) */
#define AC97_MIC 0x0e /* MIC Volume */
#define AC97_LINE 0x10 /* Line In Volume */
TP_printk("%s", "")
);
-TRACE_EVENT(xen_mmu_flush_tlb_single,
+TRACE_EVENT(xen_mmu_flush_tlb_one_user,
TP_PROTO(unsigned long addr),
TP_ARGS(addr),
TP_STRUCT__entry(
/* BPF_FUNC_skb_set_tunnel_key flags. */
#define BPF_F_ZERO_CSUM_TX (1ULL << 1)
#define BPF_F_DONT_FRAGMENT (1ULL << 2)
+#define BPF_F_SEQ_NUMBER (1ULL << 3)
/* BPF_FUNC_perf_event_output, BPF_FUNC_perf_event_read and
* BPF_FUNC_perf_event_read_value flags.
__u8 tos;
__u8 table;
- __u8 res1; /* reserved */
+ __u8 res1; /* reserved */
__u8 res2; /* reserved */
__u8 action;
__u32 end;
};
+struct fib_rule_port_range {
+ __u16 start;
+ __u16 end;
+};
+
enum {
FRA_UNSPEC,
FRA_DST, /* destination address */
FRA_PAD,
FRA_L3MDEV, /* iif or oif is l3mdev goto its table */
FRA_UID_RANGE, /* UID range */
+ FRA_PROTOCOL, /* Originator of the rule */
+ FRA_IP_PROTO, /* ip proto */
+ FRA_SPORT_RANGE, /* sport */
+ FRA_DPORT_RANGE, /* dport */
__FRA_MAX
};
#define _UAPI_LINUX_IF_ETHER_H
#include <linux/types.h>
-#include <linux/libc-compat.h>
/*
* IEEE 802.3 Ethernet magic constants. The frame sizes omit the preamble
#define ETH_P_AOE 0x88A2 /* ATA over Ethernet */
#define ETH_P_8021AD 0x88A8 /* 802.1ad Service VLAN */
#define ETH_P_802_EX1 0x88B5 /* 802.1 Local Experimental 1. */
+#define ETH_P_PREAUTH 0x88C7 /* 802.11 Preauthentication */
#define ETH_P_TIPC 0x88CA /* TIPC */
#define ETH_P_MACSEC 0x88E5 /* 802.1ae MACsec */
#define ETH_P_8021AH 0x88E7 /* 802.1ah Backbone Service Tag */
* This is an Ethernet frame header.
*/
+/* allow libcs like musl to deactivate this, glibc does not implement this. */
+#ifndef __UAPI_DEF_ETHHDR
+#define __UAPI_DEF_ETHHDR 1
+#endif
+
#if __UAPI_DEF_ETHHDR
struct ethhdr {
unsigned char h_dest[ETH_ALEN]; /* destination eth addr */
IFLA_EVENT_BONDING_OPTIONS, /* change in bonding options */
};
+/* tun section */
+
+enum {
+ IFLA_TUN_UNSPEC,
+ IFLA_TUN_OWNER,
+ IFLA_TUN_GROUP,
+ IFLA_TUN_TYPE,
+ IFLA_TUN_PI,
+ IFLA_TUN_VNET_HDR,
+ IFLA_TUN_PERSIST,
+ IFLA_TUN_MULTI_QUEUE,
+ IFLA_TUN_NUM_QUEUES,
+ IFLA_TUN_NUM_DISABLED_QUEUES,
+ __IFLA_TUN_MAX,
+};
+
+#define IFLA_TUN_MAX (__IFLA_TUN_MAX - 1)
+
#endif /* _UAPI_LINUX_IF_LINK_H */
#endif /* __GLIBC__ */
-/* Definitions for if_ether.h */
-/* allow libcs like musl to deactivate this, glibc does not implement this. */
-#ifndef __UAPI_DEF_ETHHDR
-#define __UAPI_DEF_ETHHDR 1
-#endif
-
#endif /* _UAPI_LIBC_COMPAT_H */
*
* @NL80211_CMD_RELOAD_REGDB: Request that the regdb firmware file is reloaded.
*
+ * @NL80211_CMD_EXTERNAL_AUTH: This interface is exclusively defined for host
+ * drivers that do not define separate commands for authentication and
+ * association, but rely on user space for the authentication to happen.
+ * This interface acts both as the event request (driver to user space)
+ * to trigger the authentication and command response (userspace to
+ * driver) to indicate the authentication status.
+ *
+ * User space uses the %NL80211_CMD_CONNECT command to the host driver to
+ * trigger a connection. The host driver selects a BSS and further uses
+ * this interface to offload only the authentication part to the user
+ * space. Authentication frames are passed between the driver and user
+ * space through the %NL80211_CMD_FRAME interface. Host driver proceeds
+ * further with the association after getting successful authentication
+ * status. User space indicates the authentication status through
+ * %NL80211_ATTR_STATUS_CODE attribute in %NL80211_CMD_EXTERNAL_AUTH
+ * command interface.
+ *
+ * Host driver reports this status on an authentication failure to the
+ * user space through the connect result as the user space would have
+ * initiated the connection through the connect request.
+ *
+ * @NL80211_CMD_STA_OPMODE_CHANGED: An event that notify station's
+ * ht opmode or vht opmode changes using any of &NL80211_ATTR_SMPS_MODE,
+ * &NL80211_ATTR_CHANNEL_WIDTH,&NL80211_ATTR_NSS attributes with its
+ * address(specified in &NL80211_ATTR_MAC).
+ *
* @NL80211_CMD_MAX: highest used command number
* @__NL80211_CMD_AFTER_LAST: internal use
*/
NL80211_CMD_RELOAD_REGDB,
+ NL80211_CMD_EXTERNAL_AUTH,
+
+ NL80211_CMD_STA_OPMODE_CHANGED,
+
/* add new commands above here */
/* used to define NL80211_CMD_MAX below */
* @NL80211_ATTR_PMKR0_NAME: PMK-R0 Name for offloaded FT.
* @NL80211_ATTR_PORT_AUTHORIZED: (reserved)
*
+ * @NL80211_ATTR_EXTERNAL_AUTH_ACTION: Identify the requested external
+ * authentication operation (u32 attribute with an
+ * &enum nl80211_external_auth_action value). This is used with the
+ * &NL80211_CMD_EXTERNAL_AUTH request event.
+ * @NL80211_ATTR_EXTERNAL_AUTH_SUPPORT: Flag attribute indicating that the user
+ * space supports external authentication. This attribute shall be used
+ * only with %NL80211_CMD_CONNECT request. The driver may offload
+ * authentication processing to user space if this capability is indicated
+ * in NL80211_CMD_CONNECT requests from the user space.
+ *
+ * @NL80211_ATTR_NSS: Station's New/updated RX_NSS value notified using this
+ * u8 attribute. This is used with %NL80211_CMD_STA_OPMODE_CHANGED.
+ *
* @NUM_NL80211_ATTR: total number of nl80211_attrs available
* @NL80211_ATTR_MAX: highest attribute number currently defined
* @__NL80211_ATTR_AFTER_LAST: internal use
NL80211_ATTR_PMKR0_NAME,
NL80211_ATTR_PORT_AUTHORIZED,
+ NL80211_ATTR_EXTERNAL_AUTH_ACTION,
+ NL80211_ATTR_EXTERNAL_AUTH_SUPPORT,
+
+ NL80211_ATTR_NSS,
+ NL80211_ATTR_ACK_SIGNAL,
+
/* add attributes here, update the policy in nl80211.c */
__NL80211_ATTR_AFTER_LAST,
* @NL80211_STA_INFO_RX_DURATION: aggregate PPDU duration for all frames
* received from the station (u64, usec)
* @NL80211_STA_INFO_PAD: attribute used for padding for 64-bit alignment
+ * @NL80211_STA_INFO_ACK_SIGNAL: signal strength of the last ACK frame(u8, dBm)
* @__NL80211_STA_INFO_AFTER_LAST: internal
* @NL80211_STA_INFO_MAX: highest possible station info attribute
*/
NL80211_STA_INFO_TID_STATS,
NL80211_STA_INFO_RX_DURATION,
NL80211_STA_INFO_PAD,
+ NL80211_STA_INFO_ACK_SIGNAL,
/* keep last */
__NL80211_STA_INFO_AFTER_LAST,
* probe request tx deferral and suppression
* @NL80211_EXT_FEATURE_MFP_OPTIONAL: Driver supports the %NL80211_MFP_OPTIONAL
* value in %NL80211_ATTR_USE_MFP.
+ * @NL80211_EXT_FEATURE_LOW_SPAN_SCAN: Driver supports low span scan.
+ * @NL80211_EXT_FEATURE_LOW_POWER_SCAN: Driver supports low power scan.
+ * @NL80211_EXT_FEATURE_HIGH_ACCURACY_SCAN: Driver supports high accuracy scan.
*
* @NUM_NL80211_EXT_FEATURES: number of extended features.
* @MAX_NL80211_EXT_FEATURES: highest extended feature index.
NL80211_EXT_FEATURE_OCE_PROBE_REQ_HIGH_TX_RATE,
NL80211_EXT_FEATURE_OCE_PROBE_REQ_DEFERRAL_SUPPRESSION,
NL80211_EXT_FEATURE_MFP_OPTIONAL,
+ NL80211_EXT_FEATURE_LOW_SPAN_SCAN,
+ NL80211_EXT_FEATURE_LOW_POWER_SCAN,
+ NL80211_EXT_FEATURE_HIGH_ACCURACY_SCAN,
/* add new features before the definition below */
NUM_NL80211_EXT_FEATURES,
* of NL80211_CMD_TRIGGER_SCAN and NL80211_CMD_START_SCHED_SCAN
* requests.
*
+ * NL80211_SCAN_FLAG_LOW_SPAN, NL80211_SCAN_FLAG_LOW_POWER, and
+ * NL80211_SCAN_FLAG_HIGH_ACCURACY flags are exclusive of each other, i.e., only
+ * one of them can be used in the request.
+ *
* @NL80211_SCAN_FLAG_LOW_PRIORITY: scan request has low priority
* @NL80211_SCAN_FLAG_FLUSH: flush cache before scanning
* @NL80211_SCAN_FLAG_AP: force a scan even if the interface is configured
* and suppression (if it has received a broadcast Probe Response frame,
* Beacon frame or FILS Discovery frame from an AP that the STA considers
* a suitable candidate for (re-)association - suitable in terms of
- * SSID and/or RSSI
+ * SSID and/or RSSI.
+ * @NL80211_SCAN_FLAG_LOW_SPAN: Span corresponds to the total time taken to
+ * accomplish the scan. Thus, this flag intends the driver to perform the
+ * scan request with lesser span/duration. It is specific to the driver
+ * implementations on how this is accomplished. Scan accuracy may get
+ * impacted with this flag.
+ * @NL80211_SCAN_FLAG_LOW_POWER: This flag intends the scan attempts to consume
+ * optimal possible power. Drivers can resort to their specific means to
+ * optimize the power. Scan accuracy may get impacted with this flag.
+ * @NL80211_SCAN_FLAG_HIGH_ACCURACY: Accuracy here intends to the extent of scan
+ * results obtained. Thus HIGH_ACCURACY scan flag aims to get maximum
+ * possible scan results. This flag hints the driver to use the best
+ * possible scan configuration to improve the accuracy in scanning.
+ * Latency and power use may get impacted with this flag.
*/
enum nl80211_scan_flags {
NL80211_SCAN_FLAG_LOW_PRIORITY = 1<<0,
NL80211_SCAN_FLAG_ACCEPT_BCAST_PROBE_RESP = 1<<5,
NL80211_SCAN_FLAG_OCE_PROBE_REQ_HIGH_TX_RATE = 1<<6,
NL80211_SCAN_FLAG_OCE_PROBE_REQ_DEFERRAL_SUPPRESSION = 1<<7,
+ NL80211_SCAN_FLAG_LOW_SPAN = 1<<8,
+ NL80211_SCAN_FLAG_LOW_POWER = 1<<9,
+ NL80211_SCAN_FLAG_HIGH_ACCURACY = 1<<10,
};
/**
NL80211_NAN_MATCH_ATTR_MAX = NUM_NL80211_NAN_MATCH_ATTR - 1
};
+/**
+ * nl80211_external_auth_action - Action to perform with external
+ * authentication request. Used by NL80211_ATTR_EXTERNAL_AUTH_ACTION.
+ * @NL80211_EXTERNAL_AUTH_START: Start the authentication.
+ * @NL80211_EXTERNAL_AUTH_ABORT: Abort the ongoing authentication.
+ */
+enum nl80211_external_auth_action {
+ NL80211_EXTERNAL_AUTH_START,
+ NL80211_EXTERNAL_AUTH_ABORT,
+};
+
#endif /* __LINUX_NL80211_H */
#define TCF_EM_VLAN 6
#define TCF_EM_CANID 7
#define TCF_EM_IPSET 8
-#define TCF_EM_MAX 8
+#define TCF_EM_IPT 9
+#define TCF_EM_MAX 9
enum {
TCF_EM_PROG_TC
#define PTRACE_SECCOMP_GET_METADATA 0x420d
struct seccomp_metadata {
- unsigned long filter_off; /* Input: which filter */
- unsigned int flags; /* Output: filter's flags */
+ __u64 filter_off; /* Input: which filter */
+ __u64 flags; /* Output: filter's flags */
};
/* Read signals from a shared (process wide) queue */
#define RDS_CMSG_MASKED_ATOMIC_FADD 8
#define RDS_CMSG_MASKED_ATOMIC_CSWP 9
#define RDS_CMSG_RXPATH_LATENCY 11
+#define RDS_CMSG_ZCOPY_COOKIE 12
+#define RDS_CMSG_ZCOPY_COMPLETION 13
#define RDS_INFO_FIRST 10000
#define RDS_INFO_COUNTERS 10000
#define RDS_RDMA_DROPPED 3
#define RDS_RDMA_OTHER_ERROR 4
+#define RDS_MAX_ZCOOKIES 8
+struct rds_zcopy_cookies {
+ __u32 num;
+ __u32 cookies[RDS_MAX_ZCOOKIES];
+};
+
/*
* Common set of flags for all RDMA related structs
*/
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef __LINUX_TC_EM_IPT_H
+#define __LINUX_TC_EM_IPT_H
+
+#include <linux/types.h>
+#include <linux/pkt_cls.h>
+
+enum {
+ TCA_EM_IPT_UNSPEC,
+ TCA_EM_IPT_HOOK,
+ TCA_EM_IPT_MATCH_NAME,
+ TCA_EM_IPT_MATCH_REVISION,
+ TCA_EM_IPT_NFPROTO,
+ TCA_EM_IPT_MATCH_DATA,
+ __TCA_EM_IPT_MAX
+};
+
+#define TCA_EM_IPT_MAX (__TCA_EM_IPT_MAX - 1)
+
+#endif
__u16 len; /* only for pointers */
__u16 flags; /* combination of UVERBS_ATTR_F_XXXX */
__u16 reserved;
- __u64 data; /* ptr to command, inline data or idr/fd */
+ __aligned_u64 data; /* ptr to command, inline data or idr/fd */
};
struct ib_uverbs_ioctl_hdr {
__u16 object_id;
__u16 method_id;
__u16 num_attrs;
- __u64 reserved;
+ __aligned_u64 reserved;
struct ib_uverbs_attr attrs[0];
};
.exit = audit_net_exit,
.id = &audit_net_id,
.size = sizeof(struct audit_net),
+ .async = true,
};
/* Initialize audit support at boot time. */
{
int i;
- for (i = 0; i < array->map.max_entries; i++)
+ for (i = 0; i < array->map.max_entries; i++) {
free_percpu(array->pptrs[i]);
+ cond_resched();
+ }
}
static int bpf_array_alloc_percpu(struct bpf_array *array)
return -ENOMEM;
}
array->pptrs[i] = ptr;
+ cond_resched();
}
return 0;
static struct bpf_map *array_map_alloc(union bpf_attr *attr)
{
bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY;
- int numa_node = bpf_map_attr_numa_node(attr);
+ int ret, numa_node = bpf_map_attr_numa_node(attr);
u32 elem_size, index_mask, max_entries;
bool unpriv = !capable(CAP_SYS_ADMIN);
+ u64 cost, array_size, mask64;
struct bpf_array *array;
- u64 array_size, mask64;
elem_size = round_up(attr->value_size, 8);
array_size += (u64) max_entries * elem_size;
/* make sure there is no u32 overflow later in round_up() */
- if (array_size >= U32_MAX - PAGE_SIZE)
+ cost = array_size;
+ if (cost >= U32_MAX - PAGE_SIZE)
return ERR_PTR(-ENOMEM);
+ if (percpu) {
+ cost += (u64)attr->max_entries * elem_size * num_possible_cpus();
+ if (cost >= U32_MAX - PAGE_SIZE)
+ return ERR_PTR(-ENOMEM);
+ }
+ cost = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;
+
+ ret = bpf_map_precharge_memlock(cost);
+ if (ret < 0)
+ return ERR_PTR(ret);
/* allocate all map elements and zero-initialize them */
array = bpf_map_area_alloc(array_size, numa_node);
/* copy mandatory map attributes */
bpf_map_init_from_attr(&array->map, attr);
+ array->map.pages = cost;
array->elem_size = elem_size;
- if (!percpu)
- goto out;
-
- array_size += (u64) attr->max_entries * elem_size * num_possible_cpus();
-
- if (array_size >= U32_MAX - PAGE_SIZE ||
- bpf_array_alloc_percpu(array)) {
+ if (percpu && bpf_array_alloc_percpu(array)) {
bpf_map_area_free(array);
return ERR_PTR(-ENOMEM);
}
-out:
- array->map.pages = round_up(array_size, PAGE_SIZE) >> PAGE_SHIFT;
return &array->map;
}
* so always copy 'cnt' prog_ids to the user.
* In a rare race the user will see zero prog_ids
*/
- ids = kcalloc(cnt, sizeof(u32), GFP_USER);
+ ids = kcalloc(cnt, sizeof(u32), GFP_USER | __GFP_NOWARN);
if (!ids)
return -ENOMEM;
rcu_read_lock();
static struct bpf_cpu_map_entry *__cpu_map_entry_alloc(u32 qsize, u32 cpu,
int map_id)
{
- gfp_t gfp = GFP_ATOMIC|__GFP_NOWARN;
+ gfp_t gfp = GFP_KERNEL | __GFP_NOWARN;
struct bpf_cpu_map_entry *rcpu;
int numa, err;
struct lpm_trie_node __rcu **slot;
struct lpm_trie_node *node;
- raw_spin_lock(&trie->lock);
+ /* Wait for outstanding programs to complete
+ * update/lookup/delete/get_next_key and free the trie.
+ */
+ synchronize_rcu();
/* Always start at the root and walk down to a node that has no
* children. Then free that node, nullify its reference in the parent
slot = &trie->root;
for (;;) {
- node = rcu_dereference_protected(*slot,
- lockdep_is_held(&trie->lock));
+ node = rcu_dereference_protected(*slot, 1);
if (!node)
- goto unlock;
+ goto out;
if (rcu_access_pointer(node->child[0])) {
slot = &node->child[0];
}
}
-unlock:
- raw_spin_unlock(&trie->lock);
+out:
+ kfree(trie);
}
static int trie_get_next_key(struct bpf_map *map, void *_key, void *_next_key)
static struct bpf_map *sock_map_alloc(union bpf_attr *attr)
{
struct bpf_stab *stab;
- int err = -EINVAL;
u64 cost;
+ int err;
if (!capable(CAP_NET_ADMIN))
return ERR_PTR(-EPERM);
/* make sure page count doesn't overflow */
cost = (u64) stab->map.max_entries * sizeof(struct sock *);
+ err = -EINVAL;
if (cost >= U32_MAX - PAGE_SIZE)
goto free_stab;
static const int caller_saved[CALLER_SAVED_REGS] = {
BPF_REG_0, BPF_REG_1, BPF_REG_2, BPF_REG_3, BPF_REG_4, BPF_REG_5
};
-#define CALLEE_SAVED_REGS 5
-static const int callee_saved[CALLEE_SAVED_REGS] = {
- BPF_REG_6, BPF_REG_7, BPF_REG_8, BPF_REG_9
-};
static void __mark_reg_not_init(struct bpf_reg_state *reg);
* is dropped: either by a lazy thread or by
* mmput. Free the page directory and the mm.
*/
-static void __mmdrop(struct mm_struct *mm)
+void __mmdrop(struct mm_struct *mm)
{
BUG_ON(mm == &init_mm);
mm_free_pgd(mm);
put_user_ns(mm->user_ns);
free_mm(mm);
}
-
-void mmdrop(struct mm_struct *mm)
-{
- /*
- * The implicit full barrier implied by atomic_dec_and_test() is
- * required by the membarrier system call before returning to
- * user-space, after storing to rq->curr.
- */
- if (unlikely(atomic_dec_and_test(&mm->mm_count)))
- __mmdrop(mm);
-}
-EXPORT_SYMBOL_GPL(mmdrop);
+EXPORT_SYMBOL_GPL(__mmdrop);
static void mmdrop_async_fn(struct work_struct *work)
{
irq_domain_debug_show_one(m, d, 0);
return 0;
}
-
-static int irq_domain_debug_open(struct inode *inode, struct file *file)
-{
- return single_open(file, irq_domain_debug_show, inode->i_private);
-}
-
-static const struct file_operations dfs_domain_ops = {
- .open = irq_domain_debug_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
+DEFINE_SHOW_ATTRIBUTE(irq_domain_debug);
static void debugfs_add_domain_dir(struct irq_domain *d)
{
if (!d->name || !domain_dir || d->debugfs_file)
return;
d->debugfs_file = debugfs_create_file(d->name, 0444, domain_dir, d,
- &dfs_domain_ops);
+ &irq_domain_debug_fops);
}
static void debugfs_remove_domain_dir(struct irq_domain *d)
if (!domain_dir)
return;
- debugfs_create_file("default", 0444, domain_dir, NULL, &dfs_domain_ops);
+ debugfs_create_file("default", 0444, domain_dir, NULL,
+ &irq_domain_debug_fops);
mutex_lock(&irq_domain_mutex);
list_for_each_entry(d, &irq_domain_list, link)
debugfs_add_domain_dir(d);
}
/* Caller must lock kprobe_mutex */
-static void arm_kprobe_ftrace(struct kprobe *p)
+static int arm_kprobe_ftrace(struct kprobe *p)
{
- int ret;
+ int ret = 0;
ret = ftrace_set_filter_ip(&kprobe_ftrace_ops,
(unsigned long)p->addr, 0, 0);
- WARN(ret < 0, "Failed to arm kprobe-ftrace at %p (%d)\n", p->addr, ret);
- kprobe_ftrace_enabled++;
- if (kprobe_ftrace_enabled == 1) {
+ if (ret) {
+ pr_debug("Failed to arm kprobe-ftrace at %p (%d)\n", p->addr, ret);
+ return ret;
+ }
+
+ if (kprobe_ftrace_enabled == 0) {
ret = register_ftrace_function(&kprobe_ftrace_ops);
- WARN(ret < 0, "Failed to init kprobe-ftrace (%d)\n", ret);
+ if (ret) {
+ pr_debug("Failed to init kprobe-ftrace (%d)\n", ret);
+ goto err_ftrace;
+ }
}
+
+ kprobe_ftrace_enabled++;
+ return ret;
+
+err_ftrace:
+ /*
+ * Note: Since kprobe_ftrace_ops has IPMODIFY set, and ftrace requires a
+ * non-empty filter_hash for IPMODIFY ops, we're safe from an accidental
+ * empty filter_hash which would undesirably trace all functions.
+ */
+ ftrace_set_filter_ip(&kprobe_ftrace_ops, (unsigned long)p->addr, 1, 0);
+ return ret;
}
/* Caller must lock kprobe_mutex */
-static void disarm_kprobe_ftrace(struct kprobe *p)
+static int disarm_kprobe_ftrace(struct kprobe *p)
{
- int ret;
+ int ret = 0;
- kprobe_ftrace_enabled--;
- if (kprobe_ftrace_enabled == 0) {
+ if (kprobe_ftrace_enabled == 1) {
ret = unregister_ftrace_function(&kprobe_ftrace_ops);
- WARN(ret < 0, "Failed to init kprobe-ftrace (%d)\n", ret);
+ if (WARN(ret < 0, "Failed to unregister kprobe-ftrace (%d)\n", ret))
+ return ret;
}
+
+ kprobe_ftrace_enabled--;
+
ret = ftrace_set_filter_ip(&kprobe_ftrace_ops,
(unsigned long)p->addr, 1, 0);
WARN(ret < 0, "Failed to disarm kprobe-ftrace at %p (%d)\n", p->addr, ret);
+ return ret;
}
#else /* !CONFIG_KPROBES_ON_FTRACE */
#define prepare_kprobe(p) arch_prepare_kprobe(p)
-#define arm_kprobe_ftrace(p) do {} while (0)
-#define disarm_kprobe_ftrace(p) do {} while (0)
+#define arm_kprobe_ftrace(p) (-ENODEV)
+#define disarm_kprobe_ftrace(p) (-ENODEV)
#endif
/* Arm a kprobe with text_mutex */
-static void arm_kprobe(struct kprobe *kp)
+static int arm_kprobe(struct kprobe *kp)
{
- if (unlikely(kprobe_ftrace(kp))) {
- arm_kprobe_ftrace(kp);
- return;
- }
+ if (unlikely(kprobe_ftrace(kp)))
+ return arm_kprobe_ftrace(kp);
+
cpus_read_lock();
mutex_lock(&text_mutex);
__arm_kprobe(kp);
mutex_unlock(&text_mutex);
cpus_read_unlock();
+
+ return 0;
}
/* Disarm a kprobe with text_mutex */
-static void disarm_kprobe(struct kprobe *kp, bool reopt)
+static int disarm_kprobe(struct kprobe *kp, bool reopt)
{
- if (unlikely(kprobe_ftrace(kp))) {
- disarm_kprobe_ftrace(kp);
- return;
- }
+ if (unlikely(kprobe_ftrace(kp)))
+ return disarm_kprobe_ftrace(kp);
cpus_read_lock();
mutex_lock(&text_mutex);
__disarm_kprobe(kp, reopt);
mutex_unlock(&text_mutex);
cpus_read_unlock();
+
+ return 0;
}
/*
if (ret == 0 && kprobe_disabled(ap) && !kprobe_disabled(p)) {
ap->flags &= ~KPROBE_FLAG_DISABLED;
- if (!kprobes_all_disarmed)
+ if (!kprobes_all_disarmed) {
/* Arm the breakpoint again. */
- arm_kprobe(ap);
+ ret = arm_kprobe(ap);
+ if (ret) {
+ ap->flags |= KPROBE_FLAG_DISABLED;
+ list_del_rcu(&p->list);
+ synchronize_sched();
+ }
+ }
}
return ret;
}
hlist_add_head_rcu(&p->hlist,
&kprobe_table[hash_ptr(p->addr, KPROBE_HASH_BITS)]);
- if (!kprobes_all_disarmed && !kprobe_disabled(p))
- arm_kprobe(p);
+ if (!kprobes_all_disarmed && !kprobe_disabled(p)) {
+ ret = arm_kprobe(p);
+ if (ret) {
+ hlist_del_rcu(&p->hlist);
+ synchronize_sched();
+ goto out;
+ }
+ }
/* Try to optimize kprobe */
try_to_optimize_kprobe(p);
static struct kprobe *__disable_kprobe(struct kprobe *p)
{
struct kprobe *orig_p;
+ int ret;
/* Get an original kprobe for return */
orig_p = __get_valid_kprobe(p);
if (unlikely(orig_p == NULL))
- return NULL;
+ return ERR_PTR(-EINVAL);
if (!kprobe_disabled(p)) {
/* Disable probe if it is a child probe */
* should have already been disarmed, so
* skip unneed disarming process.
*/
- if (!kprobes_all_disarmed)
- disarm_kprobe(orig_p, true);
+ if (!kprobes_all_disarmed) {
+ ret = disarm_kprobe(orig_p, true);
+ if (ret) {
+ p->flags &= ~KPROBE_FLAG_DISABLED;
+ return ERR_PTR(ret);
+ }
+ }
orig_p->flags |= KPROBE_FLAG_DISABLED;
}
}
/* Disable kprobe. This will disarm it if needed. */
ap = __disable_kprobe(p);
- if (ap == NULL)
- return -EINVAL;
+ if (IS_ERR(ap))
+ return PTR_ERR(ap);
if (ap == p)
/*
int disable_kprobe(struct kprobe *kp)
{
int ret = 0;
+ struct kprobe *p;
mutex_lock(&kprobe_mutex);
/* Disable this kprobe */
- if (__disable_kprobe(kp) == NULL)
- ret = -EINVAL;
+ p = __disable_kprobe(kp);
+ if (IS_ERR(p))
+ ret = PTR_ERR(p);
mutex_unlock(&kprobe_mutex);
return ret;
if (!kprobes_all_disarmed && kprobe_disabled(p)) {
p->flags &= ~KPROBE_FLAG_DISABLED;
- arm_kprobe(p);
+ ret = arm_kprobe(p);
+ if (ret)
+ p->flags |= KPROBE_FLAG_DISABLED;
}
out:
mutex_unlock(&kprobe_mutex);
.release = seq_release,
};
-static void arm_all_kprobes(void)
+static int arm_all_kprobes(void)
{
struct hlist_head *head;
struct kprobe *p;
- unsigned int i;
+ unsigned int i, total = 0, errors = 0;
+ int err, ret = 0;
mutex_lock(&kprobe_mutex);
/* Arming kprobes doesn't optimize kprobe itself */
for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
head = &kprobe_table[i];
- hlist_for_each_entry_rcu(p, head, hlist)
- if (!kprobe_disabled(p))
- arm_kprobe(p);
+ /* Arm all kprobes on a best-effort basis */
+ hlist_for_each_entry_rcu(p, head, hlist) {
+ if (!kprobe_disabled(p)) {
+ err = arm_kprobe(p);
+ if (err) {
+ errors++;
+ ret = err;
+ }
+ total++;
+ }
+ }
}
- printk(KERN_INFO "Kprobes globally enabled\n");
+ if (errors)
+ pr_warn("Kprobes globally enabled, but failed to arm %d out of %d probes\n",
+ errors, total);
+ else
+ pr_info("Kprobes globally enabled\n");
already_enabled:
mutex_unlock(&kprobe_mutex);
- return;
+ return ret;
}
-static void disarm_all_kprobes(void)
+static int disarm_all_kprobes(void)
{
struct hlist_head *head;
struct kprobe *p;
- unsigned int i;
+ unsigned int i, total = 0, errors = 0;
+ int err, ret = 0;
mutex_lock(&kprobe_mutex);
/* If kprobes are already disarmed, just return */
if (kprobes_all_disarmed) {
mutex_unlock(&kprobe_mutex);
- return;
+ return 0;
}
kprobes_all_disarmed = true;
- printk(KERN_INFO "Kprobes globally disabled\n");
for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
head = &kprobe_table[i];
+ /* Disarm all kprobes on a best-effort basis */
hlist_for_each_entry_rcu(p, head, hlist) {
- if (!arch_trampoline_kprobe(p) && !kprobe_disabled(p))
- disarm_kprobe(p, false);
+ if (!arch_trampoline_kprobe(p) && !kprobe_disabled(p)) {
+ err = disarm_kprobe(p, false);
+ if (err) {
+ errors++;
+ ret = err;
+ }
+ total++;
+ }
}
}
+
+ if (errors)
+ pr_warn("Kprobes globally disabled, but failed to disarm %d out of %d probes\n",
+ errors, total);
+ else
+ pr_info("Kprobes globally disabled\n");
+
mutex_unlock(&kprobe_mutex);
/* Wait for disarming all kprobes by optimizer */
wait_for_kprobe_optimizer();
+
+ return ret;
}
/*
{
char buf[32];
size_t buf_size;
+ int ret = 0;
buf_size = min(count, (sizeof(buf)-1));
if (copy_from_user(buf, user_buf, buf_size))
case 'y':
case 'Y':
case '1':
- arm_all_kprobes();
+ ret = arm_all_kprobes();
break;
case 'n':
case 'N':
case '0':
- disarm_all_kprobes();
+ ret = disarm_all_kprobes();
break;
default:
return -EINVAL;
}
+ if (ret)
+ return ret;
+
return count;
}
tail = encode_tail(smp_processor_id(), idx);
node += idx;
+
+ /*
+ * Ensure that we increment the head node->count before initialising
+ * the actual node. If the compiler is kind enough to reorder these
+ * stores, then an IRQ could overwrite our assignments.
+ */
+ barrier();
+
node->locked = 0;
node->next = NULL;
pv_init_node(node);
*/
if (old & _Q_TAIL_MASK) {
prev = decode_tail(old);
+
/*
- * The above xchg_tail() is also a load of @lock which
- * generates, through decode_tail(), a pointer. The address
- * dependency matches the RELEASE of xchg_tail() such that
- * the subsequent access to @prev happens after.
+ * We must ensure that the stores to @node are observed before
+ * the write to prev->next. The address dependency from
+ * xchg_tail is not sufficient to ensure this because the read
+ * component of xchg_tail is unordered with respect to the
+ * initialisation of @node.
*/
-
- WRITE_ONCE(prev->next, node);
+ smp_store_release(&prev->next, node);
pv_wait_node(node, prev);
arch_mcs_spin_lock_contended(&node->locked);
{
struct rchan_buf *buf;
- if (chan->n_subbufs > UINT_MAX / sizeof(size_t *))
+ if (chan->n_subbufs > KMALLOC_MAX_SIZE / sizeof(size_t *))
return NULL;
buf = kzalloc(sizeof(struct rchan_buf), GFP_KERNEL);
#endif
}
-static inline void finish_lock_switch(struct rq *rq)
+static inline void
+prepare_lock_switch(struct rq *rq, struct task_struct *next, struct rq_flags *rf)
{
+ /*
+ * Since the runqueue lock will be released by the next
+ * task (which is an invalid locking op but in the case
+ * of the scheduler it's an obvious special-case), so we
+ * do an early lockdep release here:
+ */
+ rq_unpin_lock(rq, rf);
+ spin_release(&rq->lock.dep_map, 1, _THIS_IP_);
#ifdef CONFIG_DEBUG_SPINLOCK
/* this is a valid case when another task releases the spinlock */
- rq->lock.owner = current;
+ rq->lock.owner = next;
#endif
+}
+
+static inline void finish_lock_switch(struct rq *rq)
+{
/*
* If we are tracking spinlock dependencies then we have to
* fix up the runqueue lock - which gets 'carried over' from
* prev into current:
*/
spin_acquire(&rq->lock.dep_map, 0, 0, _THIS_IP_);
-
raw_spin_unlock_irq(&rq->lock);
}
rq->clock_update_flags &= ~(RQCF_ACT_SKIP|RQCF_REQ_SKIP);
- /*
- * Since the runqueue lock will be released by the next
- * task (which is an invalid locking op but in the case
- * of the scheduler it's an obvious special-case), so we
- * do an early lockdep release here:
- */
- rq_unpin_lock(rq, rf);
- spin_release(&rq->lock.dep_map, 1, _THIS_IP_);
+ prepare_lock_switch(rq, next, rf);
/* Here we just switch the register state and the stack. */
switch_to(prev, next, prev);
#include "sched.h"
-#define SUGOV_KTHREAD_PRIORITY 50
-
struct sugov_tunables {
struct gov_attr_set attr_set;
unsigned int rate_limit_us;
struct sched_dl_entity *dl_se = &curr->dl;
u64 delta_exec, scaled_delta_exec;
int cpu = cpu_of(rq);
+ u64 now;
if (!dl_task(curr) || !on_dl_rq(dl_se))
return;
* natural solution, but the full ramifications of this
* approach need further study.
*/
- delta_exec = rq_clock_task(rq) - curr->se.exec_start;
+ now = rq_clock_task(rq);
+ delta_exec = now - curr->se.exec_start;
if (unlikely((s64)delta_exec <= 0)) {
if (unlikely(dl_se->dl_yielded))
goto throttle;
curr->se.sum_exec_runtime += delta_exec;
account_group_exec_runtime(curr, delta_exec);
- curr->se.exec_start = rq_clock_task(rq);
+ curr->se.exec_start = now;
cgroup_account_cputime(curr, delta_exec);
sched_rt_avg_update(rq, delta_exec);
{
struct task_struct *curr = rq->curr;
struct sched_rt_entity *rt_se = &curr->rt;
- u64 now = rq_clock_task(rq);
u64 delta_exec;
+ u64 now;
if (curr->sched_class != &rt_sched_class)
return;
+ now = rq_clock_task(rq);
delta_exec = now - curr->se.exec_start;
if (unlikely((s64)delta_exec <= 0))
return;
size = min_t(unsigned long, size, sizeof(kmd));
- if (copy_from_user(&kmd, data, size))
+ if (size < sizeof(kmd.filter_off))
+ return -EINVAL;
+
+ if (copy_from_user(&kmd.filter_off, data, sizeof(kmd.filter_off)))
return -EFAULT;
filter = get_nth_filter(task, kmd.filter_off);
if (IS_ERR(filter))
return PTR_ERR(filter);
- memset(&kmd, 0, sizeof(kmd));
if (filter->log)
kmd.flags |= SECCOMP_FILTER_FLAG_LOG;
return -EINVAL;
if (copy_from_user(&query, uquery, sizeof(query)))
return -EFAULT;
+ if (query.ids_len > BPF_TRACE_MAX_PROGS)
+ return -E2BIG;
mutex_lock(&bpf_event_mutex);
ret = bpf_prog_array_copy_info(event->tp_event->prog_array,
.sigpending = ATOMIC_INIT(0),
.locked_shm = 0,
.uid = GLOBAL_ROOT_UID,
+ .ratelimit = RATELIMIT_STATE_INIT(root_user.ratelimit, 0, 0),
};
/*
new->uid = uid;
atomic_set(&new->__count, 1);
+ ratelimit_state_init(&new->ratelimit, HZ, 100);
+ ratelimit_set_flags(&new->ratelimit, RATELIMIT_MSG_ON_RELEASE);
/*
* Before adding this, check whether we raced
}
EXPORT_SYMBOL_GPL(workqueue_set_max_active);
+/**
+ * current_work - retrieve %current task's work struct
+ *
+ * Determine if %current task is a workqueue worker and what it's working on.
+ * Useful to find out the context that the %current task is running in.
+ *
+ * Return: work struct if %current task is a workqueue worker, %NULL otherwise.
+ */
+struct work_struct *current_work(void)
+{
+ struct worker *worker = current_wq_worker();
+
+ return worker ? worker->current_work : NULL;
+}
+EXPORT_SYMBOL(current_work);
+
/**
* current_is_workqueue_rescuer - is %current workqueue rescuer?
*
menuconfig RUNTIME_TESTING_MENU
bool "Runtime Testing"
+ def_bool y
if RUNTIME_TESTING_MENU
return page_address(page);
}
+/*
+ * NOTE: this function must never look at the dma_addr argument, because we want
+ * to be able to use it as a helper for iommu implementations as well.
+ */
void dma_direct_free(struct device *dev, size_t size, void *cpu_addr,
dma_addr_t dma_addr, unsigned long attrs)
{
.map_sg = dma_direct_map_sg,
.dma_supported = dma_direct_supported,
.mapping_error = dma_direct_mapping_error,
+ .is_phys = 1,
};
EXPORT_SYMBOL(dma_direct_ops);
bitmap = this_cpu_xchg(ida_bitmap, NULL);
if (!bitmap)
return -EAGAIN;
- memset(bitmap, 0, sizeof(*bitmap));
bitmap->bitmap[0] = tmp >> RADIX_TREE_EXCEPTIONAL_SHIFT;
rcu_assign_pointer(*slot, bitmap);
}
bitmap = this_cpu_xchg(ida_bitmap, NULL);
if (!bitmap)
return -EAGAIN;
- memset(bitmap, 0, sizeof(*bitmap));
__set_bit(bit, bitmap->bitmap);
radix_tree_iter_replace(root, &iter, slot, bitmap);
}
static struct pernet_operations uevent_net_ops = {
.init = uevent_net_init,
.exit = uevent_net_exit,
+ .async = true,
};
static int __init kobject_uevent_init(void)
preempt_enable();
if (!this_cpu_read(ida_bitmap)) {
- struct ida_bitmap *bitmap = kmalloc(sizeof(*bitmap), gfp);
+ struct ida_bitmap *bitmap = kzalloc(sizeof(*bitmap), gfp);
if (!bitmap)
return 0;
if (this_cpu_cmpxchg(ida_bitmap, NULL, bitmap))
{
const int default_width = 2 * sizeof(void *);
- if (!ptr && *fmt != 'K') {
+ if (!ptr && *fmt != 'K' && *fmt != 'x') {
/*
* Print (null) with the same width as a pointer so it makes
* tabular output look nice.
return 0;
}
- arch_unmap_kpfn(pfn);
-
orig_head = hpage = compound_head(p);
num_poisoned_pages_inc();
#include "internal.h"
-#ifdef LAST_CPUPID_NOT_IN_PAGE_FLAGS
+#if defined(LAST_CPUPID_NOT_IN_PAGE_FLAGS) && !defined(CONFIG_COMPILE_TEST)
#warning Unfortunate NUMA and NUMA Balancing config, growing page-frame for last_cpupid.
#endif
mod_zone_page_state(page_zone(page), NR_MLOCK,
-hpage_nr_pages(page));
count_vm_event(UNEVICTABLE_PGCLEARED);
+ /*
+ * The previous TestClearPageMlocked() corresponds to the smp_mb()
+ * in __pagevec_lru_add_fn().
+ *
+ * See __pagevec_lru_add_fn for more explanation.
+ */
if (!isolate_lru_page(page)) {
putback_lru_page(page);
} else {
#include <linux/stop_machine.h>
#include <linux/sort.h>
#include <linux/pfn.h>
+#include <xen/xen.h>
#include <linux/backing-dev.h>
#include <linux/fault-inject.h>
#include <linux/page-isolation.h>
/* Always populate low zones for address-constrained allocations */
if (zone_end < pgdat_end_pfn(pgdat))
return true;
+ /* Xen PV domains need page structures early */
+ if (xen_pv_domain())
+ return true;
(*nr_initialised)++;
if ((*nr_initialised > pgdat->static_init_pgcnt) &&
(pfn & (PAGES_PER_SECTION - 1)) == 0) {
__lru_cache_add(page);
}
-/**
- * add_page_to_unevictable_list - add a page to the unevictable list
- * @page: the page to be added to the unevictable list
- *
- * Add page directly to its zone's unevictable list. To avoid races with
- * tasks that might be making the page evictable, through eg. munlock,
- * munmap or exit, while it's not on the lru, we want to add the page
- * while it's locked or otherwise "invisible" to other tasks. This is
- * difficult to do when using the pagevec cache, so bypass that.
- */
-void add_page_to_unevictable_list(struct page *page)
-{
- struct pglist_data *pgdat = page_pgdat(page);
- struct lruvec *lruvec;
-
- spin_lock_irq(&pgdat->lru_lock);
- lruvec = mem_cgroup_page_lruvec(page, pgdat);
- ClearPageActive(page);
- SetPageUnevictable(page);
- SetPageLRU(page);
- add_page_to_lru_list(page, lruvec, LRU_UNEVICTABLE);
- spin_unlock_irq(&pgdat->lru_lock);
-}
-
/**
* lru_cache_add_active_or_unevictable
* @page: the page to be added to LRU
{
VM_BUG_ON_PAGE(PageLRU(page), page);
- if (likely((vma->vm_flags & (VM_LOCKED | VM_SPECIAL)) != VM_LOCKED)) {
+ if (likely((vma->vm_flags & (VM_LOCKED | VM_SPECIAL)) != VM_LOCKED))
SetPageActive(page);
- lru_cache_add(page);
- return;
- }
-
- if (!TestSetPageMlocked(page)) {
+ else if (!TestSetPageMlocked(page)) {
/*
* We use the irq-unsafe __mod_zone_page_stat because this
* counter is not modified from interrupt context, and the pte
hpage_nr_pages(page));
count_vm_event(UNEVICTABLE_PGMLOCKED);
}
- add_page_to_unevictable_list(page);
+ lru_cache_add(page);
}
/*
static void __pagevec_lru_add_fn(struct page *page, struct lruvec *lruvec,
void *arg)
{
- int file = page_is_file_cache(page);
- int active = PageActive(page);
- enum lru_list lru = page_lru(page);
+ enum lru_list lru;
+ int was_unevictable = TestClearPageUnevictable(page);
VM_BUG_ON_PAGE(PageLRU(page), page);
SetPageLRU(page);
+ /*
+ * Page becomes evictable in two ways:
+ * 1) Within LRU lock [munlock_vma_pages() and __munlock_pagevec()].
+ * 2) Before acquiring LRU lock to put the page to correct LRU and then
+ * a) do PageLRU check with lock [check_move_unevictable_pages]
+ * b) do PageLRU check before lock [clear_page_mlock]
+ *
+ * (1) & (2a) are ok as LRU lock will serialize them. For (2b), we need
+ * following strict ordering:
+ *
+ * #0: __pagevec_lru_add_fn #1: clear_page_mlock
+ *
+ * SetPageLRU() TestClearPageMlocked()
+ * smp_mb() // explicit ordering // above provides strict
+ * // ordering
+ * PageMlocked() PageLRU()
+ *
+ *
+ * if '#1' does not observe setting of PG_lru by '#0' and fails
+ * isolation, the explicit barrier will make sure that page_evictable
+ * check will put the page in correct LRU. Without smp_mb(), SetPageLRU
+ * can be reordered after PageMlocked check and can make '#1' to fail
+ * the isolation of the page whose Mlocked bit is cleared (#0 is also
+ * looking at the same page) and the evictable page will be stranded
+ * in an unevictable LRU.
+ */
+ smp_mb();
+
+ if (page_evictable(page)) {
+ lru = page_lru(page);
+ update_page_reclaim_stat(lruvec, page_is_file_cache(page),
+ PageActive(page));
+ if (was_unevictable)
+ count_vm_event(UNEVICTABLE_PGRESCUED);
+ } else {
+ lru = LRU_UNEVICTABLE;
+ ClearPageActive(page);
+ SetPageUnevictable(page);
+ if (!was_unevictable)
+ count_vm_event(UNEVICTABLE_PGCULLED);
+ }
+
add_page_to_lru_list(page, lruvec, lru);
- update_page_reclaim_stat(lruvec, file, active);
trace_mm_lru_insertion(page, lru);
}
* @pvec: Where the resulting entries are placed
* @mapping: The address_space to search
* @start: The starting entry index
- * @nr_pages: The maximum number of pages
+ * @nr_entries: The maximum number of pages
* @indices: The cache indices corresponding to the entries in @pvec
*
* pagevec_lookup_entries() will search for and return a group of up
}
#if defined(CONFIG_64BIT) && defined(CONFIG_ZONE_DMA32)
-#define GFP_VMALLOC32 GFP_DMA32 | GFP_KERNEL
+#define GFP_VMALLOC32 (GFP_DMA32 | GFP_KERNEL)
#elif defined(CONFIG_64BIT) && defined(CONFIG_ZONE_DMA)
-#define GFP_VMALLOC32 GFP_DMA | GFP_KERNEL
+#define GFP_VMALLOC32 (GFP_DMA | GFP_KERNEL)
#else
-#define GFP_VMALLOC32 GFP_KERNEL
+/*
+ * 64b systems should always have either DMA or DMA32 zones. For others
+ * GFP_DMA32 should do the right thing and use the normal zone.
+ */
+#define GFP_VMALLOC32 GFP_DMA32 | GFP_KERNEL
#endif
/**
*/
void putback_lru_page(struct page *page)
{
- bool is_unevictable;
- int was_unevictable = PageUnevictable(page);
-
- VM_BUG_ON_PAGE(PageLRU(page), page);
-
-redo:
- ClearPageUnevictable(page);
-
- if (page_evictable(page)) {
- /*
- * For evictable pages, we can use the cache.
- * In event of a race, worst case is we end up with an
- * unevictable page on [in]active list.
- * We know how to handle that.
- */
- is_unevictable = false;
- lru_cache_add(page);
- } else {
- /*
- * Put unevictable pages directly on zone's unevictable
- * list.
- */
- is_unevictable = true;
- add_page_to_unevictable_list(page);
- /*
- * When racing with an mlock or AS_UNEVICTABLE clearing
- * (page is unlocked) make sure that if the other thread
- * does not observe our setting of PG_lru and fails
- * isolation/check_move_unevictable_pages,
- * we see PG_mlocked/AS_UNEVICTABLE cleared below and move
- * the page back to the evictable list.
- *
- * The other side is TestClearPageMlocked() or shmem_lock().
- */
- smp_mb();
- }
-
- /*
- * page's status can change while we move it among lru. If an evictable
- * page is on unevictable list, it never be freed. To avoid that,
- * check after we added it to the list, again.
- */
- if (is_unevictable && page_evictable(page)) {
- if (!isolate_lru_page(page)) {
- put_page(page);
- goto redo;
- }
- /* This means someone else dropped this page from LRU
- * So, it will be freed or putback to LRU again. There is
- * nothing to do here.
- */
- }
-
- if (was_unevictable && !is_unevictable)
- count_vm_event(UNEVICTABLE_PGRESCUED);
- else if (!was_unevictable && is_unevictable)
- count_vm_event(UNEVICTABLE_PGCULLED);
-
+ lru_cache_add(page);
put_page(page); /* drop ref from isolate */
}
/**
* zpool_evictable() - Test if zpool is potentially evictable
- * @pool The zpool to test
+ * @zpool: The zpool to test
*
* Zpool is only potentially evictable when it's created with struct
* zpool_ops.evict and its driver implements struct zpool_driver.shrink.
u8 *src, *dst;
struct zswap_header zhdr = { .swpentry = swp_entry(type, offset) };
+ /* THP isn't supported */
+ if (PageTransHuge(page)) {
+ ret = -EINVAL;
+ goto reject;
+ }
+
if (!zswap_enabled || !tree) {
ret = -ENODEV;
goto reject;
.exit = vlan_exit_net,
.id = &vlan_net_id,
.size = sizeof(struct vlan_net),
+ .async = true,
};
static int __init vlan_proto_init(void)
spin_unlock_irqrestore(&chan->lock, flags);
/* Wakeup if anyone waiting for VirtIO ring space. */
wake_up(chan->vc_wq);
- p9_client_cb(chan->client, req, REQ_STATUS_RCVD);
+ if (len)
+ p9_client_cb(chan->client, req, REQ_STATUS_RCVD);
}
}
* fields into the sockaddr.
*/
static int atalk_getname(struct socket *sock, struct sockaddr *uaddr,
- int *uaddr_len, int peer)
+ int peer)
{
struct sockaddr_at sat;
struct sock *sk = sock->sk;
if (atalk_autobind(sk) < 0)
goto out;
- *uaddr_len = sizeof(struct sockaddr_at);
memset(&sat, 0, sizeof(sat));
if (peer) {
sat.sat_port = at->src_port;
}
- err = 0;
sat.sat_family = AF_APPLETALK;
memcpy(uaddr, &sat, sizeof(sat));
+ err = sizeof(struct sockaddr_at);
out:
release_sock(sk);
}
static int pvc_getname(struct socket *sock, struct sockaddr *sockaddr,
- int *sockaddr_len, int peer)
+ int peer)
{
struct sockaddr_atmpvc *addr;
struct atm_vcc *vcc = ATM_SD(sock);
if (!vcc->dev || !test_bit(ATM_VF_ADDR, &vcc->flags))
return -ENOTCONN;
- *sockaddr_len = sizeof(struct sockaddr_atmpvc);
addr = (struct sockaddr_atmpvc *)sockaddr;
memset(addr, 0, sizeof(*addr));
addr->sap_family = AF_ATMPVC;
addr->sap_addr.itf = vcc->dev->number;
addr->sap_addr.vpi = vcc->vpi;
addr->sap_addr.vci = vcc->vci;
- return 0;
+ return sizeof(struct sockaddr_atmpvc);
}
static const struct proto_ops pvc_proto_ops = {
}
static int svc_getname(struct socket *sock, struct sockaddr *sockaddr,
- int *sockaddr_len, int peer)
+ int peer)
{
struct sockaddr_atmsvc *addr;
- *sockaddr_len = sizeof(struct sockaddr_atmsvc);
addr = (struct sockaddr_atmsvc *) sockaddr;
memcpy(addr, peer ? &ATM_SD(sock)->remote : &ATM_SD(sock)->local,
sizeof(struct sockaddr_atmsvc));
- return 0;
+ return sizeof(struct sockaddr_atmsvc);
}
int svc_change_qos(struct atm_vcc *vcc, struct atm_qos *qos)
}
static int ax25_getname(struct socket *sock, struct sockaddr *uaddr,
- int *uaddr_len, int peer)
+ int peer)
{
struct full_sockaddr_ax25 *fsa = (struct full_sockaddr_ax25 *)uaddr;
struct sock *sk = sock->sk;
fsa->fsa_digipeater[0] = null_ax25_address;
}
}
- *uaddr_len = sizeof (struct full_sockaddr_ax25);
+ err = sizeof (struct full_sockaddr_ax25);
out:
release_sock(sk);
/* Slave connection state and connectable mode bit 38
* and scannable bit 21.
*/
- if (connectable && (!(hdev->le_states[4] & 0x01) ||
- !(hdev->le_states[2] & 0x40)))
+ if (connectable && (!(hdev->le_states[4] & 0x40) ||
+ !(hdev->le_states[2] & 0x20)))
return false;
}
/* Master connection state and connectable mode bit 35 and
* scannable 19.
*/
- if (connectable && (!(hdev->le_states[4] & 0x10) ||
+ if (connectable && (!(hdev->le_states[4] & 0x08) ||
!(hdev->le_states[2] & 0x08)))
return false;
}
}
static int hci_sock_getname(struct socket *sock, struct sockaddr *addr,
- int *addr_len, int peer)
+ int peer)
{
struct sockaddr_hci *haddr = (struct sockaddr_hci *)addr;
struct sock *sk = sock->sk;
goto done;
}
- *addr_len = sizeof(*haddr);
haddr->hci_family = AF_BLUETOOTH;
haddr->hci_dev = hdev->id;
haddr->hci_channel= hci_pi(sk)->channel;
+ err = sizeof(*haddr);
done:
release_sock(sk);
}
static int l2cap_sock_getname(struct socket *sock, struct sockaddr *addr,
- int *len, int peer)
+ int peer)
{
struct sockaddr_l2 *la = (struct sockaddr_l2 *) addr;
struct sock *sk = sock->sk;
memset(la, 0, sizeof(struct sockaddr_l2));
addr->sa_family = AF_BLUETOOTH;
- *len = sizeof(struct sockaddr_l2);
la->l2_psm = chan->psm;
la->l2_bdaddr_type = chan->src_type;
}
- return 0;
+ return sizeof(struct sockaddr_l2);
}
static int l2cap_sock_getsockopt_old(struct socket *sock, int optname,
return err;
}
-static int rfcomm_sock_getname(struct socket *sock, struct sockaddr *addr, int *len, int peer)
+static int rfcomm_sock_getname(struct socket *sock, struct sockaddr *addr, int peer)
{
struct sockaddr_rc *sa = (struct sockaddr_rc *) addr;
struct sock *sk = sock->sk;
else
bacpy(&sa->rc_bdaddr, &rfcomm_pi(sk)->src);
- *len = sizeof(struct sockaddr_rc);
- return 0;
+ return sizeof(struct sockaddr_rc);
}
static int rfcomm_sock_sendmsg(struct socket *sock, struct msghdr *msg,
}
static int sco_sock_getname(struct socket *sock, struct sockaddr *addr,
- int *len, int peer)
+ int peer)
{
struct sockaddr_sco *sa = (struct sockaddr_sco *) addr;
struct sock *sk = sock->sk;
BT_DBG("sock %p, sk %p", sock, sk);
addr->sa_family = AF_BLUETOOTH;
- *len = sizeof(struct sockaddr_sco);
if (peer)
bacpy(&sa->sco_bdaddr, &sco_pi(sk)->dst);
else
bacpy(&sa->sco_bdaddr, &sco_pi(sk)->src);
- return 0;
+ return sizeof(struct sockaddr_sco);
}
static int sco_sock_sendmsg(struct socket *sock, struct msghdr *msg,
static struct pernet_operations br_net_ops = {
.exit = br_net_exit,
+ .async = true,
};
static const struct stp_proto br_stp_proto = {
.exit = brnf_exit_net,
.id = &brnf_net_id,
.size = sizeof(struct brnf_net),
+ .async = true,
};
static struct notifier_block brnf_notifier __read_mostly = {
struct brport_attribute *brport_attr = to_brport_attr(attr);
struct net_bridge_port *p = to_brport(kobj);
+ if (!brport_attr->show)
+ return -EINVAL;
+
return brport_attr->show(p, buf);
}
expected_length += ebt_mac_wormhash_size(wh_src);
if (em->match_size != EBT_ALIGN(expected_length)) {
- pr_info("wrong size: %d against expected %d, rounded to %zd\n",
- em->match_size, expected_length,
- EBT_ALIGN(expected_length));
+ pr_err_ratelimited("wrong size: %d against expected %d, rounded to %zd\n",
+ em->match_size, expected_length,
+ EBT_ALIGN(expected_length));
return -EINVAL;
}
if (wh_dst && (err = ebt_mac_wormhash_check_integrity(wh_dst))) {
- pr_info("dst integrity fail: %x\n", -err);
+ pr_err_ratelimited("dst integrity fail: %x\n", -err);
return -EINVAL;
}
if (wh_src && (err = ebt_mac_wormhash_check_integrity(wh_src))) {
- pr_info("src integrity fail: %x\n", -err);
+ pr_err_ratelimited("src integrity fail: %x\n", -err);
return -EINVAL;
}
return 0;
/* Check for overflow. */
if (info->burst == 0 ||
user2credits(info->avg * info->burst) < user2credits(info->avg)) {
- pr_info("overflow, try lower: %u/%u\n",
- info->avg, info->burst);
+ pr_info_ratelimited("overflow, try lower: %u/%u\n",
+ info->avg, info->burst);
return -EINVAL;
}
static struct pernet_operations canbcm_pernet_ops __read_mostly = {
.init = canbcm_pernet_init,
.exit = canbcm_pernet_exit,
+ .async = true,
};
static int __init bcm_module_init(void)
}
static int raw_getname(struct socket *sock, struct sockaddr *uaddr,
- int *len, int peer)
+ int peer)
{
struct sockaddr_can *addr = (struct sockaddr_can *)uaddr;
struct sock *sk = sock->sk;
addr->can_family = AF_CAN;
addr->can_ifindex = ro->ifindex;
- *len = sizeof(*addr);
-
- return 0;
+ return sizeof(*addr);
}
static int raw_setsockopt(struct socket *sock, int level, int optname,
/*
* Routine to help set real_num_tx_queues. To avoid skbs mapped to queues
- * greater then real_num_tx_queues stale skbs on the qdisc must be flushed.
+ * greater than real_num_tx_queues stale skbs on the qdisc must be flushed.
*/
int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq)
{
+ bool disabling;
int rc;
+ disabling = txq < dev->real_num_tx_queues;
+
if (txq < 1 || txq > dev->num_tx_queues)
return -EINVAL;
if (dev->num_tc)
netif_setup_tc(dev, txq);
- if (txq < dev->real_num_tx_queues) {
+ dev->real_num_tx_queues = txq;
+
+ if (disabling) {
+ synchronize_net();
qdisc_reset_all_tx_gt(dev, txq);
#ifdef CONFIG_XPS
netif_reset_xps_queues_gt(dev, txq);
#endif
}
+ } else {
+ dev->real_num_tx_queues = txq;
}
- dev->real_num_tx_queues = txq;
return 0;
}
EXPORT_SYMBOL(netif_set_real_num_tx_queues);
BUG_ON(!list_empty(&dev->ptype_specific));
WARN_ON(rcu_access_pointer(dev->ip_ptr));
WARN_ON(rcu_access_pointer(dev->ip6_ptr));
+#if IS_ENABLED(CONFIG_DECNET)
WARN_ON(dev->dn_ptr);
-
+#endif
if (dev->priv_destructor)
dev->priv_destructor(dev);
if (dev->needs_free_netdev)
static struct pernet_operations __net_initdata netdev_net_ops = {
.init = netdev_init,
.exit = netdev_exit,
+ .async = true,
};
static void __net_exit default_device_exit(struct net *net)
static struct pernet_operations __net_initdata default_device_ops = {
.exit = default_device_exit,
.exit_batch = default_device_exit_batch,
+ .async = true,
};
/*
resource->size_valid = size_valid;
}
+static int
+devlink_resource_validate_size(struct devlink_resource *resource, u64 size,
+ struct netlink_ext_ack *extack)
+{
+ u64 reminder;
+ int err = 0;
+
+ if (size > resource->size_params->size_max) {
+ NL_SET_ERR_MSG_MOD(extack, "Size larger than maximum");
+ err = -EINVAL;
+ }
+
+ if (size < resource->size_params->size_min) {
+ NL_SET_ERR_MSG_MOD(extack, "Size smaller than minimum");
+ err = -EINVAL;
+ }
+
+ div64_u64_rem(size, resource->size_params->size_granularity, &reminder);
+ if (reminder) {
+ NL_SET_ERR_MSG_MOD(extack, "Wrong granularity");
+ err = -EINVAL;
+ }
+
+ return err;
+}
+
static int devlink_nl_cmd_resource_set(struct sk_buff *skb,
struct genl_info *info)
{
if (!resource)
return -EINVAL;
- if (!resource->resource_ops->size_validate)
- return -EINVAL;
-
size = nla_get_u64(info->attrs[DEVLINK_ATTR_RESOURCE_SIZE]);
- err = resource->resource_ops->size_validate(devlink, size,
- info->extack);
+ err = devlink_resource_validate_size(resource, size, info->extack);
if (err)
return err;
static struct pernet_operations fib_notifier_net_ops = {
.init = fib_notifier_net_init,
.exit = fib_notifier_net_exit,
+ .async = true,
};
static int __init fib_notifier_init(void)
if (!uid_eq(rule->uid_range.start, fib_kuid_range_unset.start) ||
!uid_eq(rule->uid_range.end, fib_kuid_range_unset.end))
return false;
+ if (fib_rule_port_range_set(&rule->sport_range))
+ return false;
+ if (fib_rule_port_range_set(&rule->dport_range))
+ return false;
return true;
}
EXPORT_SYMBOL_GPL(fib_rule_matchall);
r->pref = pref;
r->table = table;
r->flags = flags;
+ r->proto = RTPROT_KERNEL;
r->fr_net = ops->fro_net;
r->uid_range = fib_kuid_range_unset;
return nla_put(skb, FRA_UID_RANGE, sizeof(out), &out);
}
+static int nla_get_port_range(struct nlattr *pattr,
+ struct fib_rule_port_range *port_range)
+{
+ const struct fib_rule_port_range *pr = nla_data(pattr);
+
+ if (!fib_rule_port_range_valid(pr))
+ return -EINVAL;
+
+ port_range->start = pr->start;
+ port_range->end = pr->end;
+
+ return 0;
+}
+
+static int nla_put_port_range(struct sk_buff *skb, int attrtype,
+ struct fib_rule_port_range *range)
+{
+ return nla_put(skb, attrtype, sizeof(*range), range);
+}
+
static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops,
struct flowi *fl, int flags,
struct fib_lookup_arg *arg)
!uid_eq(r->uid_range.end, rule->uid_range.end))
continue;
+ if (r->ip_proto != rule->ip_proto)
+ continue;
+
+ if (!fib_rule_port_range_compare(&r->sport_range,
+ &rule->sport_range))
+ continue;
+
+ if (!fib_rule_port_range_compare(&r->dport_range,
+ &rule->dport_range))
+ continue;
+
if (!ops->compare(r, frh, tb))
continue;
return 1;
rule->pref = tb[FRA_PRIORITY] ? nla_get_u32(tb[FRA_PRIORITY])
: fib_default_rule_pref(ops);
+ rule->proto = tb[FRA_PROTOCOL] ?
+ nla_get_u8(tb[FRA_PROTOCOL]) : RTPROT_UNSPEC;
+
if (tb[FRA_IIFNAME]) {
struct net_device *dev;
rule->uid_range = fib_kuid_range_unset;
}
+ if (tb[FRA_IP_PROTO])
+ rule->ip_proto = nla_get_u8(tb[FRA_IP_PROTO]);
+
+ if (tb[FRA_SPORT_RANGE]) {
+ err = nla_get_port_range(tb[FRA_SPORT_RANGE],
+ &rule->sport_range);
+ if (err)
+ goto errout_free;
+ }
+
+ if (tb[FRA_DPORT_RANGE]) {
+ err = nla_get_port_range(tb[FRA_DPORT_RANGE],
+ &rule->dport_range);
+ if (err)
+ goto errout_free;
+ }
+
if ((nlh->nlmsg_flags & NLM_F_EXCL) &&
rule_exists(ops, frh, tb, rule)) {
err = -EEXIST;
{
struct net *net = sock_net(skb->sk);
struct fib_rule_hdr *frh = nlmsg_data(nlh);
+ struct fib_rule_port_range sprange = {0, 0};
+ struct fib_rule_port_range dprange = {0, 0};
struct fib_rules_ops *ops = NULL;
struct fib_rule *rule, *r;
struct nlattr *tb[FRA_MAX+1];
range = fib_kuid_range_unset;
}
+ if (tb[FRA_SPORT_RANGE]) {
+ err = nla_get_port_range(tb[FRA_SPORT_RANGE],
+ &sprange);
+ if (err)
+ goto errout;
+ }
+
+ if (tb[FRA_DPORT_RANGE]) {
+ err = nla_get_port_range(tb[FRA_DPORT_RANGE],
+ &dprange);
+ if (err)
+ goto errout;
+ }
+
list_for_each_entry(rule, &ops->rules_list, list) {
+ if (tb[FRA_PROTOCOL] &&
+ (rule->proto != nla_get_u8(tb[FRA_PROTOCOL])))
+ continue;
+
if (frh->action && (frh->action != rule->action))
continue;
!uid_eq(rule->uid_range.end, range.end)))
continue;
+ if (tb[FRA_IP_PROTO] &&
+ (rule->ip_proto != nla_get_u8(tb[FRA_IP_PROTO])))
+ continue;
+
+ if (fib_rule_port_range_set(&sprange) &&
+ !fib_rule_port_range_compare(&rule->sport_range, &sprange))
+ continue;
+
+ if (fib_rule_port_range_set(&dprange) &&
+ !fib_rule_port_range_compare(&rule->dport_range, &dprange))
+ continue;
+
if (!ops->compare(rule, frh, tb))
continue;
+ nla_total_size(4) /* FRA_FWMARK */
+ nla_total_size(4) /* FRA_FWMASK */
+ nla_total_size_64bit(8) /* FRA_TUN_ID */
- + nla_total_size(sizeof(struct fib_kuid_range));
+ + nla_total_size(sizeof(struct fib_kuid_range))
+ + nla_total_size(1) /* FRA_PROTOCOL */
+ + nla_total_size(1) /* FRA_IP_PROTO */
+ + nla_total_size(sizeof(struct fib_rule_port_range)) /* FRA_SPORT_RANGE */
+ + nla_total_size(sizeof(struct fib_rule_port_range)); /* FRA_DPORT_RANGE */
if (ops->nlmsg_payload)
payload += ops->nlmsg_payload(rule);
frh->action = rule->action;
frh->flags = rule->flags;
+ if (nla_put_u8(skb, FRA_PROTOCOL, rule->proto))
+ goto nla_put_failure;
+
if (rule->action == FR_ACT_GOTO &&
rcu_access_pointer(rule->ctarget) == NULL)
frh->flags |= FIB_RULE_UNRESOLVED;
(rule->l3mdev &&
nla_put_u8(skb, FRA_L3MDEV, rule->l3mdev)) ||
(uid_range_set(&rule->uid_range) &&
- nla_put_uid_range(skb, &rule->uid_range)))
+ nla_put_uid_range(skb, &rule->uid_range)) ||
+ (fib_rule_port_range_set(&rule->sport_range) &&
+ nla_put_port_range(skb, FRA_SPORT_RANGE, &rule->sport_range)) ||
+ (fib_rule_port_range_set(&rule->dport_range) &&
+ nla_put_port_range(skb, FRA_DPORT_RANGE, &rule->dport_range)) ||
+ (rule->ip_proto && nla_put_u8(skb, FRA_IP_PROTO, rule->ip_proto)))
goto nla_put_failure;
if (rule->suppress_ifgroup != -1) {
static struct pernet_operations fib_rules_net_ops = {
.init = fib_rules_net_init,
.exit = fib_rules_net_exit,
+ .async = true,
};
static int __init fib_rules_init(void)
struct ip_tunnel_info *info;
if (unlikely(flags & ~(BPF_F_TUNINFO_IPV6 | BPF_F_ZERO_CSUM_TX |
- BPF_F_DONT_FRAGMENT)))
+ BPF_F_DONT_FRAGMENT | BPF_F_SEQ_NUMBER)))
return -EINVAL;
if (unlikely(size != sizeof(struct bpf_tunnel_key))) {
switch (size) {
info->key.tun_flags |= TUNNEL_DONT_FRAGMENT;
if (flags & BPF_F_ZERO_CSUM_TX)
info->key.tun_flags &= ~TUNNEL_CSUM;
+ if (flags & BPF_F_SEQ_NUMBER)
+ info->key.tun_flags |= TUNNEL_SEQ;
info->key.tun_id = cpu_to_be64(from->tunnel_id);
info->key.tos = from->tunnel_tos;
struct sock *sk = bpf_sock->sk;
int val = argval & BPF_SOCK_OPS_ALL_CB_FLAGS;
- if (!sk_fullsock(sk))
+ if (!IS_ENABLED(CONFIG_INET) || !sk_fullsock(sk))
return -EINVAL;
-#ifdef CONFIG_INET
if (val)
tcp_sk(sk)->bpf_sock_ops_cb_flags = val;
return argval & (~BPF_SOCK_OPS_ALL_CB_FLAGS);
-#else
- return -EINVAL;
-#endif
}
static const struct bpf_func_proto bpf_sock_ops_cb_flags_set_proto = {
}
EXPORT_SYMBOL(__get_hash_from_flowi6);
-__u32 __get_hash_from_flowi4(const struct flowi4 *fl4, struct flow_keys *keys)
-{
- memset(keys, 0, sizeof(*keys));
-
- keys->addrs.v4addrs.src = fl4->saddr;
- keys->addrs.v4addrs.dst = fl4->daddr;
- keys->control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
- keys->ports.src = fl4->fl4_sport;
- keys->ports.dst = fl4->fl4_dport;
- keys->keyid.keyid = fl4->fl4_gre_key;
- keys->basic.ip_proto = fl4->flowi4_proto;
-
- return flow_hash_from_keys(keys);
-}
-EXPORT_SYMBOL(__get_hash_from_flowi4);
-
static const struct flow_dissector_key flow_keys_dissector_keys[] = {
{
.key_id = FLOW_DISSECTOR_KEY_CONTROL,
static void est_fetch_counters(struct net_rate_estimator *e,
struct gnet_stats_basic_packed *b)
{
+ memset(b, 0, sizeof(*b));
if (e->stats_lock)
spin_lock(e->stats_lock);
static struct pernet_operations __net_initdata dev_proc_ops = {
.init = dev_proc_net_init,
.exit = dev_proc_net_exit,
+ .async = true,
};
static int dev_mc_seq_show(struct seq_file *seq, void *v)
static struct pernet_operations __net_initdata dev_mc_net_ops = {
.init = dev_mc_net_init,
.exit = dev_mc_net_exit,
+ .async = true,
};
int __init dev_proc_init(void)
static LIST_HEAD(pernet_list);
static struct list_head *first_device = &pernet_list;
-DEFINE_MUTEX(net_mutex);
LIST_HEAD(net_namespace_list);
EXPORT_SYMBOL_GPL(net_namespace_list);
EXPORT_SYMBOL(init_net);
static bool init_net_initialized;
+static unsigned nr_sync_pernet_ops;
+/*
+ * net_sem: protects: pernet_list, net_generic_ids, nr_sync_pernet_ops,
+ * init_net_initialized and first_device pointer.
+ */
+DECLARE_RWSEM(net_sem);
#define MIN_PERNET_OPS_ID \
((sizeof(struct net_generic) + sizeof(void *) - 1) / sizeof(void *))
{
struct net_generic *ng, *old_ng;
- BUG_ON(!mutex_is_locked(&net_mutex));
BUG_ON(id < MIN_PERNET_OPS_ID);
old_ng = rcu_dereference_protected(net->gen,
- lockdep_is_held(&net_mutex));
+ lockdep_is_held(&net_sem));
if (old_ng->s.len > id) {
old_ng->ptr[id] = data;
return 0;
*/
static __net_init int setup_net(struct net *net, struct user_namespace *user_ns)
{
- /* Must be called with net_mutex held */
+ /* Must be called with net_sem held */
const struct pernet_operations *ops, *saved_ops;
int error = 0;
LIST_HEAD(net_exit_list);
if (error < 0)
goto out_undo;
}
+ rtnl_lock();
+ list_add_tail_rcu(&net->list, &net_namespace_list);
+ rtnl_unlock();
out:
return error;
static struct pernet_operations net_defaults_ops = {
.init = net_defaults_init_net,
+ .async = true,
};
static __init int net_defaults_init(void)
dec_ucount(ucounts, UCOUNT_NET_NAMESPACES);
}
-static struct kmem_cache *net_cachep;
+static struct kmem_cache *net_cachep __ro_after_init;
static struct workqueue_struct *netns_wq;
static struct net *net_alloc(void)
{
struct ucounts *ucounts;
struct net *net;
+ unsigned write;
int rv;
if (!(flags & CLONE_NEWNET))
net = net_alloc();
if (!net) {
- dec_net_namespaces(ucounts);
- return ERR_PTR(-ENOMEM);
+ rv = -ENOMEM;
+ goto dec_ucounts;
}
-
+ refcount_set(&net->passive, 1);
+ net->ucounts = ucounts;
get_user_ns(user_ns);
+again:
+ write = READ_ONCE(nr_sync_pernet_ops);
+ if (write)
+ rv = down_write_killable(&net_sem);
+ else
+ rv = down_read_killable(&net_sem);
+ if (rv < 0)
+ goto put_userns;
- rv = mutex_lock_killable(&net_mutex);
- if (rv < 0) {
- net_free(net);
- dec_net_namespaces(ucounts);
- put_user_ns(user_ns);
- return ERR_PTR(rv);
+ if (!write && unlikely(READ_ONCE(nr_sync_pernet_ops))) {
+ up_read(&net_sem);
+ goto again;
}
-
- net->ucounts = ucounts;
rv = setup_net(net, user_ns);
- if (rv == 0) {
- rtnl_lock();
- list_add_tail_rcu(&net->list, &net_namespace_list);
- rtnl_unlock();
- }
- mutex_unlock(&net_mutex);
+
+ if (write)
+ up_write(&net_sem);
+ else
+ up_read(&net_sem);
+
if (rv < 0) {
- dec_net_namespaces(ucounts);
+put_userns:
put_user_ns(user_ns);
net_drop_ns(net);
+dec_ucounts:
+ dec_net_namespaces(ucounts);
return ERR_PTR(rv);
}
return net;
spin_unlock_bh(&net->nsid_lock);
}
-static DEFINE_SPINLOCK(cleanup_list_lock);
-static LIST_HEAD(cleanup_list); /* Must hold cleanup_list_lock to touch */
+static LLIST_HEAD(cleanup_list);
static void cleanup_net(struct work_struct *work)
{
const struct pernet_operations *ops;
struct net *net, *tmp, *last;
- struct list_head net_kill_list;
+ struct llist_node *net_kill_list;
LIST_HEAD(net_exit_list);
+ unsigned write;
/* Atomically snapshot the list of namespaces to cleanup */
- spin_lock_irq(&cleanup_list_lock);
- list_replace_init(&cleanup_list, &net_kill_list);
- spin_unlock_irq(&cleanup_list_lock);
+ net_kill_list = llist_del_all(&cleanup_list);
+again:
+ write = READ_ONCE(nr_sync_pernet_ops);
+ if (write)
+ down_write(&net_sem);
+ else
+ down_read(&net_sem);
- mutex_lock(&net_mutex);
+ if (!write && unlikely(READ_ONCE(nr_sync_pernet_ops))) {
+ up_read(&net_sem);
+ goto again;
+ }
/* Don't let anyone else find us. */
rtnl_lock();
- list_for_each_entry(net, &net_kill_list, cleanup_list)
+ llist_for_each_entry(net, net_kill_list, cleanup_list)
list_del_rcu(&net->list);
/* Cache last net. After we unlock rtnl, no one new net
* added to net_namespace_list can assign nsid pointer
last = list_last_entry(&net_namespace_list, struct net, list);
rtnl_unlock();
- list_for_each_entry(net, &net_kill_list, cleanup_list) {
+ llist_for_each_entry(net, net_kill_list, cleanup_list) {
unhash_nsid(net, last);
list_add_tail(&net->exit_list, &net_exit_list);
}
list_for_each_entry_reverse(ops, &pernet_list, list)
ops_free_list(ops, &net_exit_list);
- mutex_unlock(&net_mutex);
+ if (write)
+ up_write(&net_sem);
+ else
+ up_read(&net_sem);
/* Ensure there are no outstanding rcu callbacks using this
* network namespace.
*/
void net_ns_barrier(void)
{
- mutex_lock(&net_mutex);
- mutex_unlock(&net_mutex);
+ down_write(&net_sem);
+ up_write(&net_sem);
}
EXPORT_SYMBOL(net_ns_barrier);
void __put_net(struct net *net)
{
/* Cleanup the network namespace in process context */
- unsigned long flags;
-
- spin_lock_irqsave(&cleanup_list_lock, flags);
- list_add(&net->cleanup_list, &cleanup_list);
- spin_unlock_irqrestore(&cleanup_list_lock, flags);
-
- queue_work(netns_wq, &net_cleanup_work);
+ if (llist_add(&net->cleanup_list, &cleanup_list))
+ queue_work(netns_wq, &net_cleanup_work);
}
EXPORT_SYMBOL_GPL(__put_net);
static struct pernet_operations __net_initdata net_ns_ops = {
.init = net_ns_net_init,
.exit = net_ns_net_exit,
+ .async = true,
};
static const struct nla_policy rtnl_net_policy[NETNSA_MAX + 1] = {
rcu_assign_pointer(init_net.gen, ng);
- mutex_lock(&net_mutex);
+ down_write(&net_sem);
if (setup_net(&init_net, &init_user_ns))
panic("Could not setup the initial network namespace");
init_net_initialized = true;
-
- rtnl_lock();
- list_add_tail_rcu(&init_net.list, &net_namespace_list);
- rtnl_unlock();
-
- mutex_unlock(&net_mutex);
+ up_write(&net_sem);
register_pernet_subsys(&net_ns_ops);
rcu_barrier();
if (ops->id)
ida_remove(&net_generic_ids, *ops->id);
+ } else if (!ops->async) {
+ pr_info_once("Pernet operations %ps are sync.\n", ops);
+ nr_sync_pernet_ops++;
}
return error;
static void unregister_pernet_operations(struct pernet_operations *ops)
{
-
+ if (!ops->async)
+ BUG_ON(nr_sync_pernet_ops-- == 0);
__unregister_pernet_operations(ops);
rcu_barrier();
if (ops->id)
int register_pernet_subsys(struct pernet_operations *ops)
{
int error;
- mutex_lock(&net_mutex);
+ down_write(&net_sem);
error = register_pernet_operations(first_device, ops);
- mutex_unlock(&net_mutex);
+ up_write(&net_sem);
return error;
}
EXPORT_SYMBOL_GPL(register_pernet_subsys);
*/
void unregister_pernet_subsys(struct pernet_operations *ops)
{
- mutex_lock(&net_mutex);
+ down_write(&net_sem);
unregister_pernet_operations(ops);
- mutex_unlock(&net_mutex);
+ up_write(&net_sem);
}
EXPORT_SYMBOL_GPL(unregister_pernet_subsys);
int register_pernet_device(struct pernet_operations *ops)
{
int error;
- mutex_lock(&net_mutex);
+ down_write(&net_sem);
error = register_pernet_operations(&pernet_list, ops);
if (!error && (first_device == &pernet_list))
first_device = &ops->list;
- mutex_unlock(&net_mutex);
+ up_write(&net_sem);
return error;
}
EXPORT_SYMBOL_GPL(register_pernet_device);
*/
void unregister_pernet_device(struct pernet_operations *ops)
{
- mutex_lock(&net_mutex);
+ down_write(&net_sem);
if (&ops->list == first_device)
first_device = first_device->next;
unregister_pernet_operations(ops);
- mutex_unlock(&net_mutex);
+ up_write(&net_sem);
}
EXPORT_SYMBOL_GPL(unregister_pernet_device);
void rtnl_link_unregister(struct rtnl_link_ops *ops)
{
/* Close the race with cleanup_net() */
- mutex_lock(&net_mutex);
+ down_write(&net_sem);
rtnl_lock_unregistering_all();
__rtnl_link_unregister(ops);
rtnl_unlock();
- mutex_unlock(&net_mutex);
+ up_write(&net_sem);
}
EXPORT_SYMBOL_GPL(rtnl_link_unregister);
static struct pernet_operations rtnetlink_net_ops = {
.init = rtnetlink_net_init,
.exit = rtnetlink_net_exit,
+ .async = true,
};
void __init rtnetlink_init(void)
#include <linux/capability.h>
#include <linux/user_namespace.h>
-struct kmem_cache *skbuff_head_cache __read_mostly;
-static struct kmem_cache *skbuff_fclone_cache __read_mostly;
+struct kmem_cache *skbuff_head_cache __ro_after_init;
+static struct kmem_cache *skbuff_fclone_cache __ro_after_init;
int sysctl_max_skb_frags __read_mostly = MAX_SKB_FRAGS;
EXPORT_SYMBOL(sysctl_max_skb_frags);
}
EXPORT_SYMBOL_GPL(skb_morph);
-static int mm_account_pinned_pages(struct mmpin *mmp, size_t size)
+int mm_account_pinned_pages(struct mmpin *mmp, size_t size)
{
unsigned long max_pg, num_pg, new_pg, old_pg;
struct user_struct *user;
return 0;
}
+EXPORT_SYMBOL_GPL(mm_account_pinned_pages);
-static void mm_unaccount_pinned_pages(struct mmpin *mmp)
+void mm_unaccount_pinned_pages(struct mmpin *mmp)
{
if (mmp->user) {
atomic_long_sub(mmp->num_pg, &mmp->user->locked_vm);
free_uid(mmp->user);
}
}
+EXPORT_SYMBOL_GPL(mm_unaccount_pinned_pages);
struct ubuf_info *sock_zerocopy_alloc(struct sock *sk, size_t size)
{
break;
case SO_ZEROCOPY:
- if (sk->sk_family != PF_INET && sk->sk_family != PF_INET6)
+ if (sk->sk_family == PF_INET || sk->sk_family == PF_INET6) {
+ if (sk->sk_protocol != IPPROTO_TCP)
+ ret = -ENOTSUPP;
+ else if (sk->sk_state != TCP_CLOSE)
+ ret = -EBUSY;
+ } else if (sk->sk_family != PF_RDS) {
ret = -ENOTSUPP;
- else if (sk->sk_protocol != IPPROTO_TCP)
- ret = -ENOTSUPP;
- else if (sk->sk_state != TCP_CLOSE)
- ret = -EBUSY;
- else if (val < 0 || val > 1)
- ret = -EINVAL;
- else
- sock_valbool_flag(sk, SOCK_ZEROCOPY, valbool);
- break;
-
+ }
+ if (!ret) {
+ if (val < 0 || val > 1)
+ ret = -EINVAL;
+ else
+ sock_valbool_flag(sk, SOCK_ZEROCOPY, valbool);
+ break;
+ }
default:
ret = -ENOPROTOOPT;
break;
{
char address[128];
- if (sock->ops->getname(sock, (struct sockaddr *)address, &lv, 2))
+ lv = sock->ops->getname(sock, (struct sockaddr *)address, 2);
+ if (lv < 0)
return -ENOTCONN;
if (lv < len)
return -EINVAL;
u32 max_segs = 1;
sk_dst_set(sk, dst);
- sk->sk_route_caps = dst->dev->features;
+ sk->sk_route_caps = dst->dev->features | sk->sk_route_forced_caps;
if (sk->sk_route_caps & NETIF_F_GSO)
sk->sk_route_caps |= NETIF_F_GSO_SOFTWARE;
sk->sk_route_caps &= ~sk->sk_route_nocaps;
EXPORT_SYMBOL(sock_no_accept);
int sock_no_getname(struct socket *sock, struct sockaddr *saddr,
- int *len, int peer)
+ int peer)
{
return -EOPNOTSUPP;
}
static struct pernet_operations net_inuse_ops = {
.init = sock_inuse_init_net,
.exit = sock_inuse_exit_net,
+ .async = true,
};
static __init int net_inuse_init(void)
static __net_initdata struct pernet_operations proto_net_ops = {
.init = proto_init_net,
.exit = proto_exit_net,
+ .async = true,
};
static int __init proto_init(void)
static struct pernet_operations diag_net_ops = {
.init = diag_net_init,
.exit = diag_net_exit,
+ .async = true,
};
static int __init sock_diag_init(void)
static __net_initdata struct pernet_operations sysctl_core_ops = {
.init = sysctl_core_net_init,
.exit = sysctl_core_net_exit,
+ .async = true,
};
static __init int sysctl_core_init(void)
}
-static int dn_getname(struct socket *sock, struct sockaddr *uaddr,int *uaddr_len,int peer)
+static int dn_getname(struct socket *sock, struct sockaddr *uaddr,int peer)
{
struct sockaddr_dn *sa = (struct sockaddr_dn *)uaddr;
struct sock *sk = sock->sk;
struct dn_scp *scp = DN_SK(sk);
- *uaddr_len = sizeof(struct sockaddr_dn);
-
lock_sock(sk);
if (peer) {
release_sock(sk);
- return 0;
+ return sizeof(struct sockaddr_dn);
}
lock_sock(sk);
err = __dn_setsockopt(sock, level, optname, optval, optlen, 0);
release_sock(sk);
+#ifdef CONFIG_NETFILTER
+ /* we need to exclude all possible ENOPROTOOPTs except default case */
+ if (err == -ENOPROTOOPT && optname != DSO_LINKINFO &&
+ optname != DSO_STREAM && optname != DSO_SEQPACKET)
+ err = nf_setsockopt(sk, PF_DECnet, optname, optval, optlen);
+#endif
return err;
}
dn_nsp_send_disc(sk, 0x38, 0, sk->sk_allocation);
break;
- default:
-#ifdef CONFIG_NETFILTER
- return nf_setsockopt(sk, PF_DECnet, optname, optval, optlen);
-#endif
- case DSO_LINKINFO:
- case DSO_STREAM:
- case DSO_SEQPACKET:
- return -ENOPROTOOPT;
-
case DSO_MAXWINDOW:
if (optlen != sizeof(unsigned long))
return -EINVAL;
return -EINVAL;
scp->info_loc = u.info;
break;
+
+ case DSO_LINKINFO:
+ case DSO_STREAM:
+ case DSO_SEQPACKET:
+ default:
+ return -ENOPROTOOPT;
}
return 0;
lock_sock(sk);
err = __dn_getsockopt(sock, level, optname, optval, optlen, 0);
release_sock(sk);
+#ifdef CONFIG_NETFILTER
+ if (err == -ENOPROTOOPT && optname != DSO_STREAM &&
+ optname != DSO_SEQPACKET && optname != DSO_CONACCEPT &&
+ optname != DSO_CONREJECT) {
+ int len;
+
+ if (get_user(len, optlen))
+ return -EFAULT;
+
+ err = nf_getsockopt(sk, PF_DECnet, optname, optval, &len);
+ if (err >= 0)
+ err = put_user(len, optlen);
+ }
+#endif
return err;
}
r_data = &link;
break;
- default:
-#ifdef CONFIG_NETFILTER
- {
- int ret, len;
-
- if (get_user(len, optlen))
- return -EFAULT;
-
- ret = nf_getsockopt(sk, PF_DECnet, optname, optval, &len);
- if (ret >= 0)
- ret = put_user(len, optlen);
- return ret;
- }
-#endif
- case DSO_STREAM:
- case DSO_SEQPACKET:
- case DSO_CONACCEPT:
- case DSO_CONREJECT:
- return -ENOPROTOOPT;
-
case DSO_MAXWINDOW:
if (r_len > sizeof(unsigned long))
r_len = sizeof(unsigned long);
r_len = sizeof(unsigned char);
r_data = &scp->info_rem;
break;
+
+ case DSO_STREAM:
+ case DSO_SEQPACKET:
+ case DSO_CONACCEPT:
+ case DSO_CONREJECT:
+ default:
+ return -ENOPROTOOPT;
}
if (r_data) {
#include <linux/netdevice.h>
#include <linux/sysfs.h>
#include <linux/phy_fixed.h>
+#include <linux/ptp_classify.h>
#include <linux/gpio/consumer.h>
#include <linux/etherdevice.h>
}
EXPORT_SYMBOL_GPL(dsa_dev_to_net_device);
+/* Determine if we should defer delivery of skb until we have a rx timestamp.
+ *
+ * Called from dsa_switch_rcv. For now, this will only work if tagging is
+ * enabled on the switch. Normally the MAC driver would retrieve the hardware
+ * timestamp when it reads the packet out of the hardware. However in a DSA
+ * switch, the DSA driver owning the interface to which the packet is
+ * delivered is never notified unless we do so here.
+ */
+static bool dsa_skb_defer_rx_timestamp(struct dsa_slave_priv *p,
+ struct sk_buff *skb)
+{
+ struct dsa_switch *ds = p->dp->ds;
+ unsigned int type;
+
+ if (skb_headroom(skb) < ETH_HLEN)
+ return false;
+
+ __skb_push(skb, ETH_HLEN);
+
+ type = ptp_classify_raw(skb);
+
+ __skb_pull(skb, ETH_HLEN);
+
+ if (type == PTP_CLASS_NONE)
+ return false;
+
+ if (likely(ds->ops->port_rxtstamp))
+ return ds->ops->port_rxtstamp(ds, p->dp->index, skb, type);
+
+ return false;
+}
+
static int dsa_switch_rcv(struct sk_buff *skb, struct net_device *dev,
struct packet_type *pt, struct net_device *unused)
{
s->rx_bytes += skb->len;
u64_stats_update_end(&s->syncp);
+ if (dsa_skb_defer_rx_timestamp(p, skb))
+ return 0;
+
netif_receive_skb(skb);
return 0;
count += ops->get_sset_count(dev, sset);
if (sset == ETH_SS_STATS && ds->ops->get_sset_count)
- count += ds->ops->get_sset_count(ds);
+ count += ds->ops->get_sset_count(ds, cpu_dp->index);
return count;
}
* constructed earlier
*/
ds->ops->get_strings(ds, port, ndata);
- count = ds->ops->get_sset_count(ds);
+ count = ds->ops->get_sset_count(ds, port);
for (i = 0; i < count; i++) {
memmove(ndata + (i * len + sizeof(pfx)),
ndata + i * len, len - sizeof(pfx));
#include <net/tc_act/tc_mirred.h>
#include <linux/if_bridge.h>
#include <linux/netpoll.h>
+#include <linux/ptp_classify.h>
#include "dsa_priv.h"
static int dsa_slave_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
{
+ struct dsa_slave_priv *p = netdev_priv(dev);
+ struct dsa_switch *ds = p->dp->ds;
+ int port = p->dp->index;
+
+ /* Pass through to switch driver if it supports timestamping */
+ switch (cmd) {
+ case SIOCGHWTSTAMP:
+ if (ds->ops->port_hwtstamp_get)
+ return ds->ops->port_hwtstamp_get(ds, port, ifr);
+ break;
+ case SIOCSHWTSTAMP:
+ if (ds->ops->port_hwtstamp_set)
+ return ds->ops->port_hwtstamp_set(ds, port, ifr);
+ break;
+ }
+
if (!dev->phydev)
return -ENODEV;
return NETDEV_TX_OK;
}
+static void dsa_skb_tx_timestamp(struct dsa_slave_priv *p,
+ struct sk_buff *skb)
+{
+ struct dsa_switch *ds = p->dp->ds;
+ struct sk_buff *clone;
+ unsigned int type;
+
+ type = ptp_classify_raw(skb);
+ if (type == PTP_CLASS_NONE)
+ return;
+
+ if (!ds->ops->port_txtstamp)
+ return;
+
+ clone = skb_clone_sk(skb);
+ if (!clone)
+ return;
+
+ if (ds->ops->port_txtstamp(ds, p->dp->index, clone, type))
+ return;
+
+ kfree_skb(clone);
+}
+
static netdev_tx_t dsa_slave_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct dsa_slave_priv *p = netdev_priv(dev);
s->tx_bytes += skb->len;
u64_stats_update_end(&s->syncp);
+ /* Identify PTP protocol packets, clone them, and pass them to the
+ * switch driver
+ */
+ dsa_skb_tx_timestamp(p, skb);
+
/* Transmit function may have to reallocate the original SKB,
* in which case it must have freed it. Only free it here on error.
*/
count = 4;
if (ds->ops->get_sset_count)
- count += ds->ops->get_sset_count(ds);
+ count += ds->ops->get_sset_count(ds, dp->index);
return count;
}
return ds->ops->set_rxnfc(ds, dp->index, nfc);
}
+static int dsa_slave_get_ts_info(struct net_device *dev,
+ struct ethtool_ts_info *ts)
+{
+ struct dsa_slave_priv *p = netdev_priv(dev);
+ struct dsa_switch *ds = p->dp->ds;
+
+ if (!ds->ops->get_ts_info)
+ return -EOPNOTSUPP;
+
+ return ds->ops->get_ts_info(ds, p->dp->index, ts);
+}
+
static const struct ethtool_ops dsa_slave_ethtool_ops = {
.get_drvinfo = dsa_slave_get_drvinfo,
.get_regs_len = dsa_slave_get_regs_len,
.set_link_ksettings = phy_ethtool_set_link_ksettings,
.get_rxnfc = dsa_slave_get_rxnfc,
.set_rxnfc = dsa_slave_set_rxnfc,
+ .get_ts_info = dsa_slave_get_ts_info,
};
/* legacy way, bypassing the bridge *****************************************/
/* We need an ipv6hdr as minimum len when calling xmit */
ldev->hard_header_len = sizeof(struct ipv6hdr);
ldev->flags = IFF_BROADCAST | IFF_MULTICAST;
+ ldev->priv_flags |= IFF_NO_QUEUE;
ldev->netdev_ops = &lowpan_netdev_ops;
ldev->header_ops = &lowpan_header_ops;
static struct pernet_operations cfg802154_pernet_ops = {
.exit = cfg802154_pernet_exit,
+ .async = true,
};
static int __init wpan_phy_class_init(void)
Network), but can be distributed all over the Internet. If you want
to do that, say Y here and to "IP multicast routing" below.
+config IP_MROUTE_COMMON
+ bool
+ depends on IP_MROUTE || IPV6_MROUTE
+
config IP_MROUTE
bool "IP: multicast routing"
depends on IP_MULTICAST
+ select IP_MROUTE_COMMON
help
This is used if you want your machine to act as a router for IP
packets that have several destination addresses. It is needed on the
obj-$(CONFIG_PROC_FS) += proc.o
obj-$(CONFIG_IP_MULTIPLE_TABLES) += fib_rules.o
obj-$(CONFIG_IP_MROUTE) += ipmr.o
+obj-$(CONFIG_IP_MROUTE_COMMON) += ipmr_base.o
obj-$(CONFIG_NET_IPIP) += ipip.o
gre-y := gre_demux.o
obj-$(CONFIG_NET_FOU) += fou.o
* This does both peername and sockname.
*/
int inet_getname(struct socket *sock, struct sockaddr *uaddr,
- int *uaddr_len, int peer)
+ int peer)
{
struct sock *sk = sock->sk;
struct inet_sock *inet = inet_sk(sk);
sin->sin_addr.s_addr = addr;
}
memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
- *uaddr_len = sizeof(*sin);
- return 0;
+ return sizeof(*sin);
}
EXPORT_SYMBOL(inet_getname);
static __net_initdata struct pernet_operations ipv4_mib_ops = {
.init = ipv4_mib_init_net,
.exit = ipv4_mib_exit_net,
+ .async = true,
};
static int __init init_ipv4_mibs(void)
static __net_initdata struct pernet_operations af_inet_ops = {
.init = inet_init_net,
.exit = inet_exit_net,
+ .async = true,
};
static int __init init_inet_pernet_ops(void)
static struct pernet_operations arp_net_ops = {
.init = arp_net_init,
.exit = arp_net_exit,
+ .async = true,
};
static int __init arp_proc_init(void)
static __net_initdata struct pernet_operations devinet_ops = {
.init = devinet_init_net,
.exit = devinet_exit_net,
+ .async = true,
};
static struct rtnl_af_ops inet_af_ops __read_mostly = {
static struct pernet_operations fib_net_ops = {
.init = fib_net_init,
.exit = fib_net_exit,
+ .async = true,
};
void __init ip_fib_init(void)
if (r->tos && (r->tos != fl4->flowi4_tos))
return 0;
+ if (rule->ip_proto && (rule->ip_proto != fl4->flowi4_proto))
+ return 0;
+
+ if (fib_rule_port_range_set(&rule->sport_range) &&
+ !fib_rule_port_inrange(&rule->sport_range, fl4->fl4_sport))
+ return 0;
+
+ if (fib_rule_port_range_set(&rule->dport_range) &&
+ !fib_rule_port_inrange(&rule->dport_range, fl4->fl4_dport))
+ return 0;
+
return 1;
}
}
#endif
+ if (fib_rule_requires_fldissect(rule))
+ net->ipv4.fib_rules_require_fldissect++;
+
rule4->src_len = frh->src_len;
rule4->srcmask = inet_make_mask(rule4->src_len);
rule4->dst_len = frh->dst_len;
net->ipv4.fib_num_tclassid_users--;
#endif
net->ipv4.fib_has_custom_rules = true;
+
+ if (net->ipv4.fib_rules_require_fldissect &&
+ fib_rule_requires_fldissect(rule))
+ net->ipv4.fib_rules_require_fldissect--;
errout:
return err;
}
goto fail;
net->ipv4.rules_ops = ops;
net->ipv4.fib_has_custom_rules = false;
+ net->ipv4.fib_rules_require_fldissect = 0;
return 0;
fail:
fnhe = rcu_dereference_protected(hash[i].chain, 1);
while (fnhe) {
struct fib_nh_exception *next;
-
+
next = rcu_dereference_protected(fnhe->fnhe_next, 1);
rt_fibinfo_free(&fnhe->fnhe_rth_input);
fi->fib_nh, cfg, extack))
return 1;
}
+#ifdef CONFIG_IP_ROUTE_CLASSID
+ if (cfg->fc_flow &&
+ cfg->fc_flow != fi->fib_nh->nh_tclassid)
+ return 1;
+#endif
if ((!cfg->fc_oif || cfg->fc_oif == fi->fib_nh->nh_oif) &&
(!cfg->fc_gw || cfg->fc_gw == fi->fib_nh->nh_gw))
return 0;
void fib_select_path(struct net *net, struct fib_result *res,
struct flowi4 *fl4, const struct sk_buff *skb)
{
- bool oif_check;
-
- oif_check = (fl4->flowi4_oif == 0 ||
- fl4->flowi4_flags & FLOWI_FLAG_SKIP_NH_OIF);
+ if (fl4->flowi4_oif && !(fl4->flowi4_flags & FLOWI_FLAG_SKIP_NH_OIF))
+ goto check_saddr;
#ifdef CONFIG_IP_ROUTE_MULTIPATH
- if (res->fi->fib_nhs > 1 && oif_check) {
- int h = fib_multipath_hash(res->fi, fl4, skb);
+ if (res->fi->fib_nhs > 1) {
+ int h = fib_multipath_hash(net, fl4, skb, NULL);
fib_select_multipath(res, h);
}
#endif
if (!res->prefixlen &&
res->table->tb_num_default > 1 &&
- res->type == RTN_UNICAST && oif_check)
+ res->type == RTN_UNICAST)
fib_select_default(fl4, res);
+check_saddr:
if (!fl4->saddr)
fl4->saddr = FIB_RES_PREFSRC(net, *res);
}
-EXPORT_SYMBOL_GPL(fib_select_path);
#define VERSION "0.409"
+#include <linux/cache.h>
#include <linux/uaccess.h>
#include <linux/bitops.h>
#include <linux/types.h>
*/
static const int sync_pages = 128;
-static struct kmem_cache *fn_alias_kmem __read_mostly;
-static struct kmem_cache *trie_leaf_kmem __read_mostly;
+static struct kmem_cache *fn_alias_kmem __ro_after_init;
+static struct kmem_cache *trie_leaf_kmem __ro_after_init;
static inline struct tnode *tn_info(struct key_vector *kv)
{
static struct pernet_operations __net_initdata icmp_sk_ops = {
.init = icmp_sk_init,
.exit = icmp_sk_exit,
+ .async = true,
};
int __init icmp_init(void)
static struct pernet_operations igmp_net_ops = {
.init = igmp_net_init,
.exit = igmp_net_exit,
+ .async = true,
};
#endif
* Authors: Andrey V. Savochkin <saw@msu.ru>
*/
+#include <linux/cache.h>
#include <linux/module.h>
#include <linux/types.h>
#include <linux/slab.h>
* daddr: unchangeable
*/
-static struct kmem_cache *peer_cachep __read_mostly;
+static struct kmem_cache *peer_cachep __ro_after_init;
void inet_peer_base_init(struct inet_peer_base *bp)
{
static struct pernet_operations ip4_frags_ops = {
.init = ipv4_frags_init_net,
.exit = ipv4_frags_exit_net,
+ .async = true,
};
void __init ipfrag_init(void)
static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev,
__be16 proto)
{
+ struct ip_tunnel *tunnel = netdev_priv(dev);
struct ip_tunnel_info *tun_info;
const struct ip_tunnel_key *key;
struct rtable *rt = NULL;
if (gre_handle_offloads(skb, !!(tun_info->key.tun_flags & TUNNEL_CSUM)))
goto err_free_rt;
- flags = tun_info->key.tun_flags & (TUNNEL_CSUM | TUNNEL_KEY);
+ flags = tun_info->key.tun_flags &
+ (TUNNEL_CSUM | TUNNEL_KEY | TUNNEL_SEQ);
gre_build_header(skb, tunnel_hlen, flags, proto,
- tunnel_id_to_key32(tun_info->key.tun_id), 0);
+ tunnel_id_to_key32(tun_info->key.tun_id),
+ (flags | TUNNEL_SEQ) ? htonl(tunnel->o_seqno++) : 0);
df = key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0;
.exit_batch = ipgre_exit_batch_net,
.id = &ipgre_net_id,
.size = sizeof(struct ip_tunnel_net),
+ .async = true,
};
static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[],
ip_tunnel_setup(dev, gre_tap_net_id);
}
+bool is_gretap_dev(const struct net_device *dev)
+{
+ return dev->netdev_ops == &gre_tap_netdev_ops;
+}
+EXPORT_SYMBOL_GPL(is_gretap_dev);
+
static int ipgre_newlink(struct net *src_net, struct net_device *dev,
struct nlattr *tb[], struct nlattr *data[],
struct netlink_ext_ack *extack)
.exit_batch = ipgre_tap_exit_batch_net,
.id = &gre_tap_net_id,
.size = sizeof(struct ip_tunnel_net),
+ .async = true,
};
static int __net_init erspan_init_net(struct net *net)
.exit_batch = erspan_exit_batch_net,
.id = &erspan_net_id,
.size = sizeof(struct ip_tunnel_net),
+ .async = true,
};
static int __init ipgre_init(void)
src_info = (struct in6_pktinfo *)CMSG_DATA(cmsg);
if (!ipv6_addr_v4mapped(&src_info->ipi6_addr))
return -EINVAL;
- ipc->oif = src_info->ipi6_ifindex;
+ if (src_info->ipi6_ifindex)
+ ipc->oif = src_info->ipi6_ifindex;
ipc->addr = src_info->ipi6_addr.s6_addr32[3];
continue;
}
if (cmsg->cmsg_len != CMSG_LEN(sizeof(struct in_pktinfo)))
return -EINVAL;
info = (struct in_pktinfo *)CMSG_DATA(cmsg);
- ipc->oif = info->ipi_ifindex;
+ if (info->ipi_ifindex)
+ ipc->oif = info->ipi_ifindex;
ipc->addr = info->ipi_spec_dst.s_addr;
break;
}
if (get_user(len, optlen))
return -EFAULT;
- lock_sock(sk);
- err = nf_getsockopt(sk, PF_INET, optname, optval,
- &len);
- release_sock(sk);
+ err = nf_getsockopt(sk, PF_INET, optname, optval, &len);
if (err >= 0)
err = put_user(len, optlen);
return err;
if (get_user(len, optlen))
return -EFAULT;
- lock_sock(sk);
err = compat_nf_getsockopt(sk, PF_INET, optname, optval, &len);
- release_sock(sk);
if (err >= 0)
err = put_user(len, optlen);
return err;
return ERR_PTR(err);
}
-static inline void init_tunnel_flow(struct flowi4 *fl4,
- int proto,
- __be32 daddr, __be32 saddr,
- __be32 key, __u8 tos, int oif,
- __u32 mark)
-{
- memset(fl4, 0, sizeof(*fl4));
- fl4->flowi4_oif = oif;
- fl4->daddr = daddr;
- fl4->saddr = saddr;
- fl4->flowi4_tos = tos;
- fl4->flowi4_proto = proto;
- fl4->fl4_gre_key = key;
- fl4->flowi4_mark = mark;
-}
-
static int ip_tunnel_bind_dev(struct net_device *dev)
{
struct net_device *tdev = NULL;
struct flowi4 fl4;
struct rtable *rt;
- init_tunnel_flow(&fl4, iph->protocol, iph->daddr,
- iph->saddr, tunnel->parms.o_key,
- RT_TOS(iph->tos), tunnel->parms.link,
- tunnel->fwmark);
+ ip_tunnel_init_flow(&fl4, iph->protocol, iph->daddr,
+ iph->saddr, tunnel->parms.o_key,
+ RT_TOS(iph->tos), tunnel->parms.link,
+ tunnel->fwmark);
rt = ip_route_output_key(tunnel->net, &fl4);
if (!IS_ERR(rt)) {
else if (skb->protocol == htons(ETH_P_IPV6))
tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
}
- init_tunnel_flow(&fl4, proto, key->u.ipv4.dst, key->u.ipv4.src, 0,
- RT_TOS(tos), tunnel->parms.link, tunnel->fwmark);
+ ip_tunnel_init_flow(&fl4, proto, key->u.ipv4.dst, key->u.ipv4.src, 0,
+ RT_TOS(tos), tunnel->parms.link, tunnel->fwmark);
if (tunnel->encap.type != TUNNEL_ENCAP_NONE)
goto tx_error;
rt = ip_route_output_key(tunnel->net, &fl4);
}
if (tunnel->fwmark) {
- init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr,
- tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link,
- tunnel->fwmark);
+ ip_tunnel_init_flow(&fl4, protocol, dst, tnl_params->saddr,
+ tunnel->parms.o_key, RT_TOS(tos),
+ tunnel->parms.link, tunnel->fwmark);
}
else {
- init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr,
- tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link,
- skb->mark);
+ ip_tunnel_init_flow(&fl4, protocol, dst, tnl_params->saddr,
+ tunnel->parms.o_key, RT_TOS(tos),
+ tunnel->parms.link, skb->mark);
}
if (ip_tunnel_encap(skb, tunnel, &protocol, &fl4) < 0)
.exit_batch = vti_exit_batch_net,
.id = &vti_net_id,
.size = sizeof(struct ip_tunnel_net),
+ .async = true,
};
static int vti_tunnel_validate(struct nlattr *tb[], struct nlattr *data[],
.exit_batch = ipip_exit_batch_net,
.id = &ipip_net_id,
.size = sizeof(struct ip_tunnel_net),
+ .async = true,
};
static int __init ipip_init(void)
#include <linux/uaccess.h>
#include <linux/types.h>
+#include <linux/cache.h>
#include <linux/capability.h>
#include <linux/errno.h>
-#include <linux/timer.h>
#include <linux/mm.h>
#include <linux/kernel.h>
#include <linux/fcntl.h>
#include <net/protocol.h>
#include <linux/skbuff.h>
#include <net/route.h>
-#include <net/sock.h>
#include <net/icmp.h>
#include <net/udp.h>
#include <net/raw.h>
* In this case data path is free of exclusive locks at all.
*/
-static struct kmem_cache *mrt_cachep __read_mostly;
+static struct kmem_cache *mrt_cachep __ro_after_init;
static struct mr_table *ipmr_new_table(struct net *net, u32 id);
static void ipmr_free_table(struct mr_table *mrt);
struct mfc_cache *cache, int local);
static int ipmr_cache_report(struct mr_table *mrt,
struct sk_buff *pkt, vifi_t vifi, int assert);
-static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
- struct mfc_cache *c, struct rtmsg *rtm);
static void mroute_netlink_event(struct mr_table *mrt, struct mfc_cache *mfc,
int cmd);
static void igmpmsg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt);
#define ipmr_for_each_table(mrt, net) \
list_for_each_entry_rcu(mrt, &net->ipv4.mr_tables, list)
+static struct mr_table *ipmr_mr_table_iter(struct net *net,
+ struct mr_table *mrt)
+{
+ struct mr_table *ret;
+
+ if (!mrt)
+ ret = list_entry_rcu(net->ipv4.mr_tables.next,
+ struct mr_table, list);
+ else
+ ret = list_entry_rcu(mrt->list.next,
+ struct mr_table, list);
+
+ if (&ret->list == &net->ipv4.mr_tables)
+ return NULL;
+ return ret;
+}
+
static struct mr_table *ipmr_get_table(struct net *net, u32 id)
{
struct mr_table *mrt;
#define ipmr_for_each_table(mrt, net) \
for (mrt = net->ipv4.mrt; mrt; mrt = NULL)
+static struct mr_table *ipmr_mr_table_iter(struct net *net,
+ struct mr_table *mrt)
+{
+ if (!mrt)
+ return net->ipv4.mrt;
+ return NULL;
+}
+
static struct mr_table *ipmr_get_table(struct net *net, u32 id)
{
return net->ipv4.mrt;
}
static const struct rhashtable_params ipmr_rht_params = {
- .head_offset = offsetof(struct mfc_cache, mnode),
+ .head_offset = offsetof(struct mr_mfc, mnode),
.key_offset = offsetof(struct mfc_cache, cmparg),
.key_len = sizeof(struct mfc_cache_cmp_arg),
.nelem_hint = 3,
.automatic_shrinking = true,
};
+static void ipmr_new_table_set(struct mr_table *mrt,
+ struct net *net)
+{
+#ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
+ list_add_tail_rcu(&mrt->list, &net->ipv4.mr_tables);
+#endif
+}
+
+static struct mfc_cache_cmp_arg ipmr_mr_table_ops_cmparg_any = {
+ .mfc_mcastgrp = htonl(INADDR_ANY),
+ .mfc_origin = htonl(INADDR_ANY),
+};
+
+static struct mr_table_ops ipmr_mr_table_ops = {
+ .rht_params = &ipmr_rht_params,
+ .cmparg_any = &ipmr_mr_table_ops_cmparg_any,
+};
+
static struct mr_table *ipmr_new_table(struct net *net, u32 id)
{
struct mr_table *mrt;
if (mrt)
return mrt;
- mrt = kzalloc(sizeof(*mrt), GFP_KERNEL);
- if (!mrt)
- return ERR_PTR(-ENOMEM);
- write_pnet(&mrt->net, net);
- mrt->id = id;
-
- rhltable_init(&mrt->mfc_hash, &ipmr_rht_params);
- INIT_LIST_HEAD(&mrt->mfc_cache_list);
- INIT_LIST_HEAD(&mrt->mfc_unres_queue);
-
- timer_setup(&mrt->ipmr_expire_timer, ipmr_expire_process, 0);
-
- mrt->mroute_reg_vif_num = -1;
-#ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
- list_add_tail_rcu(&mrt->list, &net->ipv4.mr_tables);
-#endif
- return mrt;
+ return mr_table_alloc(net, id, &ipmr_mr_table_ops,
+ ipmr_expire_process, ipmr_new_table_set);
}
static void ipmr_free_table(struct mr_table *mrt)
static void ipmr_cache_free_rcu(struct rcu_head *head)
{
- struct mfc_cache *c = container_of(head, struct mfc_cache, rcu);
+ struct mr_mfc *c = container_of(head, struct mr_mfc, rcu);
- kmem_cache_free(mrt_cachep, c);
+ kmem_cache_free(mrt_cachep, (struct mfc_cache *)c);
}
void ipmr_cache_free(struct mfc_cache *c)
{
- call_rcu(&c->rcu, ipmr_cache_free_rcu);
+ call_rcu(&c->_c.rcu, ipmr_cache_free_rcu);
}
EXPORT_SYMBOL(ipmr_cache_free);
atomic_dec(&mrt->cache_resolve_queue_len);
- while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved))) {
+ while ((skb = skb_dequeue(&c->_c.mfc_un.unres.unresolved))) {
if (ip_hdr(skb)->version == 0) {
struct nlmsghdr *nlh = skb_pull(skb,
sizeof(struct iphdr));
static void ipmr_expire_process(struct timer_list *t)
{
struct mr_table *mrt = from_timer(mrt, t, ipmr_expire_timer);
- unsigned long now;
+ struct mr_mfc *c, *next;
unsigned long expires;
- struct mfc_cache *c, *next;
+ unsigned long now;
if (!spin_trylock(&mfc_unres_lock)) {
mod_timer(&mrt->ipmr_expire_timer, jiffies+HZ/10);
}
list_del(&c->list);
- mroute_netlink_event(mrt, c, RTM_DELROUTE);
- ipmr_destroy_unres(mrt, c);
+ mroute_netlink_event(mrt, (struct mfc_cache *)c, RTM_DELROUTE);
+ ipmr_destroy_unres(mrt, (struct mfc_cache *)c);
}
if (!list_empty(&mrt->mfc_unres_queue))
}
/* Fill oifs list. It is called under write locked mrt_lock. */
-static void ipmr_update_thresholds(struct mr_table *mrt, struct mfc_cache *cache,
+static void ipmr_update_thresholds(struct mr_table *mrt, struct mr_mfc *cache,
unsigned char *ttls)
{
int vifi;
ip_rt_multicast_event(in_dev);
/* Fill in the VIF structures */
+ vif_device_init(v, dev, vifc->vifc_rate_limit,
+ vifc->vifc_threshold,
+ vifc->vifc_flags | (!mrtsock ? VIFF_STATIC : 0),
+ (VIFF_TUNNEL | VIFF_REGISTER));
attr.orig_dev = dev;
if (!switchdev_port_attr_get(dev, &attr)) {
} else {
v->dev_parent_id.id_len = 0;
}
- v->rate_limit = vifc->vifc_rate_limit;
+
v->local = vifc->vifc_lcl_addr.s_addr;
v->remote = vifc->vifc_rmt_addr.s_addr;
- v->flags = vifc->vifc_flags;
- if (!mrtsock)
- v->flags |= VIFF_STATIC;
- v->threshold = vifc->vifc_threshold;
- v->bytes_in = 0;
- v->bytes_out = 0;
- v->pkt_in = 0;
- v->pkt_out = 0;
- v->link = dev->ifindex;
- if (v->flags & (VIFF_TUNNEL | VIFF_REGISTER))
- v->link = dev_get_iflink(dev);
/* And finish update writing critical data */
write_lock_bh(&mrt_lock);
.mfc_mcastgrp = mcastgrp,
.mfc_origin = origin
};
- struct rhlist_head *tmp, *list;
- struct mfc_cache *c;
-
- list = rhltable_lookup(&mrt->mfc_hash, &arg, ipmr_rht_params);
- rhl_for_each_entry_rcu(c, tmp, list, mnode)
- return c;
-
- return NULL;
-}
-
-/* Look for a (*,*,oif) entry */
-static struct mfc_cache *ipmr_cache_find_any_parent(struct mr_table *mrt,
- int vifi)
-{
- struct mfc_cache_cmp_arg arg = {
- .mfc_mcastgrp = htonl(INADDR_ANY),
- .mfc_origin = htonl(INADDR_ANY)
- };
- struct rhlist_head *tmp, *list;
- struct mfc_cache *c;
- list = rhltable_lookup(&mrt->mfc_hash, &arg, ipmr_rht_params);
- rhl_for_each_entry_rcu(c, tmp, list, mnode)
- if (c->mfc_un.res.ttls[vifi] < 255)
- return c;
-
- return NULL;
+ return mr_mfc_find(mrt, &arg);
}
/* Look for a (*,G) entry */
.mfc_mcastgrp = mcastgrp,
.mfc_origin = htonl(INADDR_ANY)
};
- struct rhlist_head *tmp, *list;
- struct mfc_cache *c, *proxy;
if (mcastgrp == htonl(INADDR_ANY))
- goto skip;
-
- list = rhltable_lookup(&mrt->mfc_hash, &arg, ipmr_rht_params);
- rhl_for_each_entry_rcu(c, tmp, list, mnode) {
- if (c->mfc_un.res.ttls[vifi] < 255)
- return c;
-
- /* It's ok if the vifi is part of the static tree */
- proxy = ipmr_cache_find_any_parent(mrt, c->mfc_parent);
- if (proxy && proxy->mfc_un.res.ttls[vifi] < 255)
- return c;
- }
-
-skip:
- return ipmr_cache_find_any_parent(mrt, vifi);
+ return mr_mfc_find_any_parent(mrt, vifi);
+ return mr_mfc_find_any(mrt, vifi, &arg);
}
/* Look for a (S,G,iif) entry if parent != -1 */
.mfc_mcastgrp = mcastgrp,
.mfc_origin = origin,
};
- struct rhlist_head *tmp, *list;
- struct mfc_cache *c;
-
- list = rhltable_lookup(&mrt->mfc_hash, &arg, ipmr_rht_params);
- rhl_for_each_entry_rcu(c, tmp, list, mnode)
- if (parent == -1 || parent == c->mfc_parent)
- return c;
- return NULL;
+ return mr_mfc_find_parent(mrt, &arg, parent);
}
/* Allocate a multicast cache entry */
struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
if (c) {
- c->mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1;
- c->mfc_un.res.minvif = MAXVIFS;
- refcount_set(&c->mfc_un.res.refcount, 1);
+ c->_c.mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1;
+ c->_c.mfc_un.res.minvif = MAXVIFS;
+ refcount_set(&c->_c.mfc_un.res.refcount, 1);
}
return c;
}
struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
if (c) {
- skb_queue_head_init(&c->mfc_un.unres.unresolved);
- c->mfc_un.unres.expires = jiffies + 10*HZ;
+ skb_queue_head_init(&c->_c.mfc_un.unres.unresolved);
+ c->_c.mfc_un.unres.expires = jiffies + 10 * HZ;
}
return c;
}
struct nlmsgerr *e;
/* Play the pending entries through our router */
- while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
+ while ((skb = __skb_dequeue(&uc->_c.mfc_un.unres.unresolved))) {
if (ip_hdr(skb)->version == 0) {
struct nlmsghdr *nlh = skb_pull(skb,
sizeof(struct iphdr));
- if (__ipmr_fill_mroute(mrt, skb, c, nlmsg_data(nlh)) > 0) {
+ if (mr_fill_mroute(mrt, skb, &c->_c,
+ nlmsg_data(nlh)) > 0) {
nlh->nlmsg_len = skb_tail_pointer(skb) -
(u8 *)nlh;
} else {
int err;
spin_lock_bh(&mfc_unres_lock);
- list_for_each_entry(c, &mrt->mfc_unres_queue, list) {
+ list_for_each_entry(c, &mrt->mfc_unres_queue, _c.list) {
if (c->mfc_mcastgrp == iph->daddr &&
c->mfc_origin == iph->saddr) {
found = true;
}
/* Fill in the new cache entry */
- c->mfc_parent = -1;
+ c->_c.mfc_parent = -1;
c->mfc_origin = iph->saddr;
c->mfc_mcastgrp = iph->daddr;
/* Reflect first query at mrouted. */
err = ipmr_cache_report(mrt, skb, vifi, IGMPMSG_NOCACHE);
+
if (err < 0) {
/* If the report failed throw the cache entry
out - Brad Parker
}
atomic_inc(&mrt->cache_resolve_queue_len);
- list_add(&c->list, &mrt->mfc_unres_queue);
+ list_add(&c->_c.list, &mrt->mfc_unres_queue);
mroute_netlink_event(mrt, c, RTM_NEWROUTE);
if (atomic_read(&mrt->cache_resolve_queue_len) == 1)
- mod_timer(&mrt->ipmr_expire_timer, c->mfc_un.unres.expires);
+ mod_timer(&mrt->ipmr_expire_timer,
+ c->_c.mfc_un.unres.expires);
}
/* See if we can append the packet */
- if (c->mfc_un.unres.unresolved.qlen > 3) {
+ if (c->_c.mfc_un.unres.unresolved.qlen > 3) {
kfree_skb(skb);
err = -ENOBUFS;
} else {
skb->dev = dev;
skb->skb_iif = dev->ifindex;
}
- skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
+ skb_queue_tail(&c->_c.mfc_un.unres.unresolved, skb);
err = 0;
}
rcu_read_unlock();
if (!c)
return -ENOENT;
- rhltable_remove(&mrt->mfc_hash, &c->mnode, ipmr_rht_params);
- list_del_rcu(&c->list);
+ rhltable_remove(&mrt->mfc_hash, &c->_c.mnode, ipmr_rht_params);
+ list_del_rcu(&c->_c.list);
call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, c, mrt->id);
mroute_netlink_event(mrt, c, RTM_DELROUTE);
ipmr_cache_put(c);
struct mfcctl *mfc, int mrtsock, int parent)
{
struct mfc_cache *uc, *c;
+ struct mr_mfc *_uc;
bool found;
int ret;
rcu_read_unlock();
if (c) {
write_lock_bh(&mrt_lock);
- c->mfc_parent = mfc->mfcc_parent;
- ipmr_update_thresholds(mrt, c, mfc->mfcc_ttls);
+ c->_c.mfc_parent = mfc->mfcc_parent;
+ ipmr_update_thresholds(mrt, &c->_c, mfc->mfcc_ttls);
if (!mrtsock)
- c->mfc_flags |= MFC_STATIC;
+ c->_c.mfc_flags |= MFC_STATIC;
write_unlock_bh(&mrt_lock);
call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE, c,
mrt->id);
c->mfc_origin = mfc->mfcc_origin.s_addr;
c->mfc_mcastgrp = mfc->mfcc_mcastgrp.s_addr;
- c->mfc_parent = mfc->mfcc_parent;
- ipmr_update_thresholds(mrt, c, mfc->mfcc_ttls);
+ c->_c.mfc_parent = mfc->mfcc_parent;
+ ipmr_update_thresholds(mrt, &c->_c, mfc->mfcc_ttls);
if (!mrtsock)
- c->mfc_flags |= MFC_STATIC;
+ c->_c.mfc_flags |= MFC_STATIC;
- ret = rhltable_insert_key(&mrt->mfc_hash, &c->cmparg, &c->mnode,
+ ret = rhltable_insert_key(&mrt->mfc_hash, &c->cmparg, &c->_c.mnode,
ipmr_rht_params);
if (ret) {
pr_err("ipmr: rhtable insert error %d\n", ret);
ipmr_cache_free(c);
return ret;
}
- list_add_tail_rcu(&c->list, &mrt->mfc_cache_list);
+ list_add_tail_rcu(&c->_c.list, &mrt->mfc_cache_list);
/* Check to see if we resolved a queued list. If so we
* need to send on the frames and tidy up.
*/
found = false;
spin_lock_bh(&mfc_unres_lock);
- list_for_each_entry(uc, &mrt->mfc_unres_queue, list) {
+ list_for_each_entry(_uc, &mrt->mfc_unres_queue, list) {
+ uc = (struct mfc_cache *)_uc;
if (uc->mfc_origin == c->mfc_origin &&
uc->mfc_mcastgrp == c->mfc_mcastgrp) {
- list_del(&uc->list);
+ list_del(&_uc->list);
atomic_dec(&mrt->cache_resolve_queue_len);
found = true;
break;
static void mroute_clean_tables(struct mr_table *mrt, bool all)
{
struct net *net = read_pnet(&mrt->net);
- struct mfc_cache *c, *tmp;
+ struct mr_mfc *c, *tmp;
+ struct mfc_cache *cache;
LIST_HEAD(list);
int i;
continue;
rhltable_remove(&mrt->mfc_hash, &c->mnode, ipmr_rht_params);
list_del_rcu(&c->list);
- call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, c,
+ cache = (struct mfc_cache *)c;
+ call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, cache,
mrt->id);
- mroute_netlink_event(mrt, c, RTM_DELROUTE);
- ipmr_cache_put(c);
+ mroute_netlink_event(mrt, cache, RTM_DELROUTE);
+ ipmr_cache_put(cache);
}
if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
spin_lock_bh(&mfc_unres_lock);
list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue, list) {
list_del(&c->list);
- mroute_netlink_event(mrt, c, RTM_DELROUTE);
- ipmr_destroy_unres(mrt, c);
+ cache = (struct mfc_cache *)c;
+ mroute_netlink_event(mrt, cache, RTM_DELROUTE);
+ ipmr_destroy_unres(mrt, cache);
}
spin_unlock_bh(&mfc_unres_lock);
}
rcu_read_lock();
c = ipmr_cache_find(mrt, sr.src.s_addr, sr.grp.s_addr);
if (c) {
- sr.pktcnt = c->mfc_un.res.pkt;
- sr.bytecnt = c->mfc_un.res.bytes;
- sr.wrong_if = c->mfc_un.res.wrong_if;
+ sr.pktcnt = c->_c.mfc_un.res.pkt;
+ sr.bytecnt = c->_c.mfc_un.res.bytes;
+ sr.wrong_if = c->_c.mfc_un.res.wrong_if;
rcu_read_unlock();
if (copy_to_user(arg, &sr, sizeof(sr)))
rcu_read_lock();
c = ipmr_cache_find(mrt, sr.src.s_addr, sr.grp.s_addr);
if (c) {
- sr.pktcnt = c->mfc_un.res.pkt;
- sr.bytecnt = c->mfc_un.res.bytes;
- sr.wrong_if = c->mfc_un.res.wrong_if;
+ sr.pktcnt = c->_c.mfc_un.res.pkt;
+ sr.bytecnt = c->_c.mfc_un.res.bytes;
+ sr.wrong_if = c->_c.mfc_un.res.wrong_if;
rcu_read_unlock();
if (copy_to_user(arg, &sr, sizeof(sr)))
/* "local" means that we should preserve one skb (for local delivery) */
static void ip_mr_forward(struct net *net, struct mr_table *mrt,
struct net_device *dev, struct sk_buff *skb,
- struct mfc_cache *cache, int local)
+ struct mfc_cache *c, int local)
{
int true_vifi = ipmr_find_vif(mrt, dev);
int psend = -1;
int vif, ct;
- vif = cache->mfc_parent;
- cache->mfc_un.res.pkt++;
- cache->mfc_un.res.bytes += skb->len;
- cache->mfc_un.res.lastuse = jiffies;
+ vif = c->_c.mfc_parent;
+ c->_c.mfc_un.res.pkt++;
+ c->_c.mfc_un.res.bytes += skb->len;
+ c->_c.mfc_un.res.lastuse = jiffies;
- if (cache->mfc_origin == htonl(INADDR_ANY) && true_vifi >= 0) {
+ if (c->mfc_origin == htonl(INADDR_ANY) && true_vifi >= 0) {
struct mfc_cache *cache_proxy;
/* For an (*,G) entry, we only check that the incomming
* interface is part of the static tree.
*/
- cache_proxy = ipmr_cache_find_any_parent(mrt, vif);
+ cache_proxy = mr_mfc_find_any_parent(mrt, vif);
if (cache_proxy &&
- cache_proxy->mfc_un.res.ttls[true_vifi] < 255)
+ cache_proxy->_c.mfc_un.res.ttls[true_vifi] < 255)
goto forward;
}
goto dont_forward;
}
- cache->mfc_un.res.wrong_if++;
+ c->_c.mfc_un.res.wrong_if++;
if (true_vifi >= 0 && mrt->mroute_do_assert &&
/* pimsm uses asserts, when switching from RPT to SPT,
* large chunk of pimd to kernel. Ough... --ANK
*/
(mrt->mroute_do_pim ||
- cache->mfc_un.res.ttls[true_vifi] < 255) &&
+ c->_c.mfc_un.res.ttls[true_vifi] < 255) &&
time_after(jiffies,
- cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
- cache->mfc_un.res.last_assert = jiffies;
+ c->_c.mfc_un.res.last_assert +
+ MFC_ASSERT_THRESH)) {
+ c->_c.mfc_un.res.last_assert = jiffies;
ipmr_cache_report(mrt, skb, true_vifi, IGMPMSG_WRONGVIF);
}
goto dont_forward;
mrt->vif_table[vif].bytes_in += skb->len;
/* Forward the frame */
- if (cache->mfc_origin == htonl(INADDR_ANY) &&
- cache->mfc_mcastgrp == htonl(INADDR_ANY)) {
+ if (c->mfc_origin == htonl(INADDR_ANY) &&
+ c->mfc_mcastgrp == htonl(INADDR_ANY)) {
if (true_vifi >= 0 &&
- true_vifi != cache->mfc_parent &&
+ true_vifi != c->_c.mfc_parent &&
ip_hdr(skb)->ttl >
- cache->mfc_un.res.ttls[cache->mfc_parent]) {
+ c->_c.mfc_un.res.ttls[c->_c.mfc_parent]) {
/* It's an (*,*) entry and the packet is not coming from
* the upstream: forward the packet to the upstream
* only.
*/
- psend = cache->mfc_parent;
+ psend = c->_c.mfc_parent;
goto last_forward;
}
goto dont_forward;
}
- for (ct = cache->mfc_un.res.maxvif - 1;
- ct >= cache->mfc_un.res.minvif; ct--) {
+ for (ct = c->_c.mfc_un.res.maxvif - 1;
+ ct >= c->_c.mfc_un.res.minvif; ct--) {
/* For (*,G) entry, don't forward to the incoming interface */
- if ((cache->mfc_origin != htonl(INADDR_ANY) ||
+ if ((c->mfc_origin != htonl(INADDR_ANY) ||
ct != true_vifi) &&
- ip_hdr(skb)->ttl > cache->mfc_un.res.ttls[ct]) {
+ ip_hdr(skb)->ttl > c->_c.mfc_un.res.ttls[ct]) {
if (psend != -1) {
struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
if (skb2)
ipmr_queue_xmit(net, mrt, true_vifi,
- skb2, cache, psend);
+ skb2, c, psend);
}
psend = ct;
}
if (skb2)
ipmr_queue_xmit(net, mrt, true_vifi, skb2,
- cache, psend);
+ c, psend);
} else {
- ipmr_queue_xmit(net, mrt, true_vifi, skb, cache, psend);
+ ipmr_queue_xmit(net, mrt, true_vifi, skb, c, psend);
return;
}
}
}
#endif
-static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
- struct mfc_cache *c, struct rtmsg *rtm)
-{
- struct rta_mfc_stats mfcs;
- struct nlattr *mp_attr;
- struct rtnexthop *nhp;
- unsigned long lastuse;
- int ct;
-
- /* If cache is unresolved, don't try to parse IIF and OIF */
- if (c->mfc_parent >= MAXVIFS) {
- rtm->rtm_flags |= RTNH_F_UNRESOLVED;
- return -ENOENT;
- }
-
- if (VIF_EXISTS(mrt, c->mfc_parent) &&
- nla_put_u32(skb, RTA_IIF, mrt->vif_table[c->mfc_parent].dev->ifindex) < 0)
- return -EMSGSIZE;
-
- if (c->mfc_flags & MFC_OFFLOAD)
- rtm->rtm_flags |= RTNH_F_OFFLOAD;
-
- if (!(mp_attr = nla_nest_start(skb, RTA_MULTIPATH)))
- return -EMSGSIZE;
-
- for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
- if (VIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) {
- if (!(nhp = nla_reserve_nohdr(skb, sizeof(*nhp)))) {
- nla_nest_cancel(skb, mp_attr);
- return -EMSGSIZE;
- }
-
- nhp->rtnh_flags = 0;
- nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
- nhp->rtnh_ifindex = mrt->vif_table[ct].dev->ifindex;
- nhp->rtnh_len = sizeof(*nhp);
- }
- }
-
- nla_nest_end(skb, mp_attr);
-
- lastuse = READ_ONCE(c->mfc_un.res.lastuse);
- lastuse = time_after_eq(jiffies, lastuse) ? jiffies - lastuse : 0;
-
- mfcs.mfcs_packets = c->mfc_un.res.pkt;
- mfcs.mfcs_bytes = c->mfc_un.res.bytes;
- mfcs.mfcs_wrong_if = c->mfc_un.res.wrong_if;
- if (nla_put_64bit(skb, RTA_MFC_STATS, sizeof(mfcs), &mfcs, RTA_PAD) ||
- nla_put_u64_64bit(skb, RTA_EXPIRES, jiffies_to_clock_t(lastuse),
- RTA_PAD))
- return -EMSGSIZE;
-
- rtm->rtm_type = RTN_MULTICAST;
- return 1;
-}
-
int ipmr_get_route(struct net *net, struct sk_buff *skb,
__be32 saddr, __be32 daddr,
struct rtmsg *rtm, u32 portid)
}
read_lock(&mrt_lock);
- err = __ipmr_fill_mroute(mrt, skb, cache, rtm);
+ err = mr_fill_mroute(mrt, skb, &cache->_c, rtm);
read_unlock(&mrt_lock);
rcu_read_unlock();
return err;
goto nla_put_failure;
rtm->rtm_type = RTN_MULTICAST;
rtm->rtm_scope = RT_SCOPE_UNIVERSE;
- if (c->mfc_flags & MFC_STATIC)
+ if (c->_c.mfc_flags & MFC_STATIC)
rtm->rtm_protocol = RTPROT_STATIC;
else
rtm->rtm_protocol = RTPROT_MROUTED;
if (nla_put_in_addr(skb, RTA_SRC, c->mfc_origin) ||
nla_put_in_addr(skb, RTA_DST, c->mfc_mcastgrp))
goto nla_put_failure;
- err = __ipmr_fill_mroute(mrt, skb, c, rtm);
+ err = mr_fill_mroute(mrt, skb, &c->_c, rtm);
/* do not break the dump if cache is unresolved */
if (err < 0 && err != -ENOENT)
goto nla_put_failure;
return -EMSGSIZE;
}
+static int _ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
+ u32 portid, u32 seq, struct mr_mfc *c, int cmd,
+ int flags)
+{
+ return ipmr_fill_mroute(mrt, skb, portid, seq, (struct mfc_cache *)c,
+ cmd, flags);
+}
+
static size_t mroute_msgsize(bool unresolved, int maxvif)
{
size_t len =
struct sk_buff *skb;
int err = -ENOBUFS;
- skb = nlmsg_new(mroute_msgsize(mfc->mfc_parent >= MAXVIFS, mrt->maxvif),
+ skb = nlmsg_new(mroute_msgsize(mfc->_c.mfc_parent >= MAXVIFS,
+ mrt->maxvif),
GFP_ATOMIC);
if (!skb)
goto errout;
static int ipmr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
{
- struct net *net = sock_net(skb->sk);
- struct mr_table *mrt;
- struct mfc_cache *mfc;
- unsigned int t = 0, s_t;
- unsigned int e = 0, s_e;
-
- s_t = cb->args[0];
- s_e = cb->args[1];
-
- rcu_read_lock();
- ipmr_for_each_table(mrt, net) {
- if (t < s_t)
- goto next_table;
- list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list) {
- if (e < s_e)
- goto next_entry;
- if (ipmr_fill_mroute(mrt, skb,
- NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq,
- mfc, RTM_NEWROUTE,
- NLM_F_MULTI) < 0)
- goto done;
-next_entry:
- e++;
- }
- e = 0;
- s_e = 0;
-
- spin_lock_bh(&mfc_unres_lock);
- list_for_each_entry(mfc, &mrt->mfc_unres_queue, list) {
- if (e < s_e)
- goto next_entry2;
- if (ipmr_fill_mroute(mrt, skb,
- NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq,
- mfc, RTM_NEWROUTE,
- NLM_F_MULTI) < 0) {
- spin_unlock_bh(&mfc_unres_lock);
- goto done;
- }
-next_entry2:
- e++;
- }
- spin_unlock_bh(&mfc_unres_lock);
- e = 0;
- s_e = 0;
-next_table:
- t++;
- }
-done:
- rcu_read_unlock();
-
- cb->args[1] = e;
- cb->args[0] = t;
-
- return skb->len;
+ return mr_rtm_dumproute(skb, cb, ipmr_mr_table_iter,
+ _ipmr_fill_mroute, &mfc_unres_lock);
}
static const struct nla_policy rtm_ipmr_policy[RTA_MAX + 1] = {
/* The /proc interfaces to multicast routing :
* /proc/net/ip_mr_cache & /proc/net/ip_mr_vif
*/
-struct ipmr_vif_iter {
- struct seq_net_private p;
- struct mr_table *mrt;
- int ct;
-};
-
-static struct vif_device *ipmr_vif_seq_idx(struct net *net,
- struct ipmr_vif_iter *iter,
- loff_t pos)
-{
- struct mr_table *mrt = iter->mrt;
-
- for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) {
- if (!VIF_EXISTS(mrt, iter->ct))
- continue;
- if (pos-- == 0)
- return &mrt->vif_table[iter->ct];
- }
- return NULL;
-}
static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos)
__acquires(mrt_lock)
{
- struct ipmr_vif_iter *iter = seq->private;
+ struct mr_vif_iter *iter = seq->private;
struct net *net = seq_file_net(seq);
struct mr_table *mrt;
iter->mrt = mrt;
read_lock(&mrt_lock);
- return *pos ? ipmr_vif_seq_idx(net, seq->private, *pos - 1)
- : SEQ_START_TOKEN;
-}
-
-static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
-{
- struct ipmr_vif_iter *iter = seq->private;
- struct net *net = seq_file_net(seq);
- struct mr_table *mrt = iter->mrt;
-
- ++*pos;
- if (v == SEQ_START_TOKEN)
- return ipmr_vif_seq_idx(net, iter, 0);
-
- while (++iter->ct < mrt->maxvif) {
- if (!VIF_EXISTS(mrt, iter->ct))
- continue;
- return &mrt->vif_table[iter->ct];
- }
- return NULL;
+ return mr_vif_seq_start(seq, pos);
}
static void ipmr_vif_seq_stop(struct seq_file *seq, void *v)
static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
{
- struct ipmr_vif_iter *iter = seq->private;
+ struct mr_vif_iter *iter = seq->private;
struct mr_table *mrt = iter->mrt;
if (v == SEQ_START_TOKEN) {
"Interface BytesIn PktsIn BytesOut PktsOut Flags Local Remote\n");
} else {
const struct vif_device *vif = v;
- const char *name = vif->dev ? vif->dev->name : "none";
+ const char *name = vif->dev ?
+ vif->dev->name : "none";
seq_printf(seq,
"%2td %-10s %8ld %7ld %8ld %7ld %05X %08X %08X\n",
static const struct seq_operations ipmr_vif_seq_ops = {
.start = ipmr_vif_seq_start,
- .next = ipmr_vif_seq_next,
+ .next = mr_vif_seq_next,
.stop = ipmr_vif_seq_stop,
.show = ipmr_vif_seq_show,
};
static int ipmr_vif_open(struct inode *inode, struct file *file)
{
return seq_open_net(inode, file, &ipmr_vif_seq_ops,
- sizeof(struct ipmr_vif_iter));
+ sizeof(struct mr_vif_iter));
}
static const struct file_operations ipmr_vif_fops = {
.release = seq_release_net,
};
-struct ipmr_mfc_iter {
- struct seq_net_private p;
- struct mr_table *mrt;
- struct list_head *cache;
-};
-
-static struct mfc_cache *ipmr_mfc_seq_idx(struct net *net,
- struct ipmr_mfc_iter *it, loff_t pos)
-{
- struct mr_table *mrt = it->mrt;
- struct mfc_cache *mfc;
-
- rcu_read_lock();
- it->cache = &mrt->mfc_cache_list;
- list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list)
- if (pos-- == 0)
- return mfc;
- rcu_read_unlock();
-
- spin_lock_bh(&mfc_unres_lock);
- it->cache = &mrt->mfc_unres_queue;
- list_for_each_entry(mfc, it->cache, list)
- if (pos-- == 0)
- return mfc;
- spin_unlock_bh(&mfc_unres_lock);
-
- it->cache = NULL;
- return NULL;
-}
-
-
static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
{
- struct ipmr_mfc_iter *it = seq->private;
struct net *net = seq_file_net(seq);
struct mr_table *mrt;
if (!mrt)
return ERR_PTR(-ENOENT);
- it->mrt = mrt;
- it->cache = NULL;
- return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
- : SEQ_START_TOKEN;
-}
-
-static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
-{
- struct ipmr_mfc_iter *it = seq->private;
- struct net *net = seq_file_net(seq);
- struct mr_table *mrt = it->mrt;
- struct mfc_cache *mfc = v;
-
- ++*pos;
-
- if (v == SEQ_START_TOKEN)
- return ipmr_mfc_seq_idx(net, seq->private, 0);
-
- if (mfc->list.next != it->cache)
- return list_entry(mfc->list.next, struct mfc_cache, list);
-
- if (it->cache == &mrt->mfc_unres_queue)
- goto end_of_list;
-
- /* exhausted cache_array, show unresolved */
- rcu_read_unlock();
- it->cache = &mrt->mfc_unres_queue;
-
- spin_lock_bh(&mfc_unres_lock);
- if (!list_empty(it->cache))
- return list_first_entry(it->cache, struct mfc_cache, list);
-
-end_of_list:
- spin_unlock_bh(&mfc_unres_lock);
- it->cache = NULL;
-
- return NULL;
-}
-
-static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
-{
- struct ipmr_mfc_iter *it = seq->private;
- struct mr_table *mrt = it->mrt;
-
- if (it->cache == &mrt->mfc_unres_queue)
- spin_unlock_bh(&mfc_unres_lock);
- else if (it->cache == &mrt->mfc_cache_list)
- rcu_read_unlock();
+ return mr_mfc_seq_start(seq, pos, mrt, &mfc_unres_lock);
}
static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
"Group Origin Iif Pkts Bytes Wrong Oifs\n");
} else {
const struct mfc_cache *mfc = v;
- const struct ipmr_mfc_iter *it = seq->private;
+ const struct mr_mfc_iter *it = seq->private;
const struct mr_table *mrt = it->mrt;
seq_printf(seq, "%08X %08X %-3hd",
(__force u32) mfc->mfc_mcastgrp,
(__force u32) mfc->mfc_origin,
- mfc->mfc_parent);
+ mfc->_c.mfc_parent);
if (it->cache != &mrt->mfc_unres_queue) {
seq_printf(seq, " %8lu %8lu %8lu",
- mfc->mfc_un.res.pkt,
- mfc->mfc_un.res.bytes,
- mfc->mfc_un.res.wrong_if);
- for (n = mfc->mfc_un.res.minvif;
- n < mfc->mfc_un.res.maxvif; n++) {
+ mfc->_c.mfc_un.res.pkt,
+ mfc->_c.mfc_un.res.bytes,
+ mfc->_c.mfc_un.res.wrong_if);
+ for (n = mfc->_c.mfc_un.res.minvif;
+ n < mfc->_c.mfc_un.res.maxvif; n++) {
if (VIF_EXISTS(mrt, n) &&
- mfc->mfc_un.res.ttls[n] < 255)
+ mfc->_c.mfc_un.res.ttls[n] < 255)
seq_printf(seq,
" %2d:%-3d",
- n, mfc->mfc_un.res.ttls[n]);
+ n, mfc->_c.mfc_un.res.ttls[n]);
}
} else {
/* unresolved mfc_caches don't contain
static const struct seq_operations ipmr_mfc_seq_ops = {
.start = ipmr_mfc_seq_start,
- .next = ipmr_mfc_seq_next,
- .stop = ipmr_mfc_seq_stop,
+ .next = mr_mfc_seq_next,
+ .stop = mr_mfc_seq_stop,
.show = ipmr_mfc_seq_show,
};
static int ipmr_mfc_open(struct inode *inode, struct file *file)
{
return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
- sizeof(struct ipmr_mfc_iter));
+ sizeof(struct mr_mfc_iter));
}
static const struct file_operations ipmr_mfc_fops = {
ipmr_for_each_table(mrt, net) {
struct vif_device *v = &mrt->vif_table[0];
- struct mfc_cache *mfc;
+ struct mr_mfc *mfc;
int vifi;
/* Notifiy on table VIF entries */
/* Notify on table MFC entries */
list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list)
call_ipmr_mfc_entry_notifier(nb, net,
- FIB_EVENT_ENTRY_ADD, mfc,
+ FIB_EVENT_ENTRY_ADD,
+ (struct mfc_cache *)mfc,
mrt->id);
}
static struct pernet_operations ipmr_net_ops = {
.init = ipmr_net_init,
.exit = ipmr_net_exit,
+ .async = true,
};
int __init ip_mr_init(void)
--- /dev/null
+/* Linux multicast routing support
+ * Common logic shared by IPv4 [ipmr] and IPv6 [ip6mr] implementation
+ */
+
+#include <linux/mroute_base.h>
+
+/* Sets everything common except 'dev', since that is done under locking */
+void vif_device_init(struct vif_device *v,
+ struct net_device *dev,
+ unsigned long rate_limit,
+ unsigned char threshold,
+ unsigned short flags,
+ unsigned short get_iflink_mask)
+{
+ v->dev = NULL;
+ v->bytes_in = 0;
+ v->bytes_out = 0;
+ v->pkt_in = 0;
+ v->pkt_out = 0;
+ v->rate_limit = rate_limit;
+ v->flags = flags;
+ v->threshold = threshold;
+ if (v->flags & get_iflink_mask)
+ v->link = dev_get_iflink(dev);
+ else
+ v->link = dev->ifindex;
+}
+EXPORT_SYMBOL(vif_device_init);
+
+struct mr_table *
+mr_table_alloc(struct net *net, u32 id,
+ struct mr_table_ops *ops,
+ void (*expire_func)(struct timer_list *t),
+ void (*table_set)(struct mr_table *mrt,
+ struct net *net))
+{
+ struct mr_table *mrt;
+
+ mrt = kzalloc(sizeof(*mrt), GFP_KERNEL);
+ if (!mrt)
+ return NULL;
+ mrt->id = id;
+ write_pnet(&mrt->net, net);
+
+ mrt->ops = *ops;
+ rhltable_init(&mrt->mfc_hash, mrt->ops.rht_params);
+ INIT_LIST_HEAD(&mrt->mfc_cache_list);
+ INIT_LIST_HEAD(&mrt->mfc_unres_queue);
+
+ timer_setup(&mrt->ipmr_expire_timer, expire_func, 0);
+
+ mrt->mroute_reg_vif_num = -1;
+ table_set(mrt, net);
+ return mrt;
+}
+EXPORT_SYMBOL(mr_table_alloc);
+
+void *mr_mfc_find_parent(struct mr_table *mrt, void *hasharg, int parent)
+{
+ struct rhlist_head *tmp, *list;
+ struct mr_mfc *c;
+
+ list = rhltable_lookup(&mrt->mfc_hash, hasharg, *mrt->ops.rht_params);
+ rhl_for_each_entry_rcu(c, tmp, list, mnode)
+ if (parent == -1 || parent == c->mfc_parent)
+ return c;
+
+ return NULL;
+}
+EXPORT_SYMBOL(mr_mfc_find_parent);
+
+void *mr_mfc_find_any_parent(struct mr_table *mrt, int vifi)
+{
+ struct rhlist_head *tmp, *list;
+ struct mr_mfc *c;
+
+ list = rhltable_lookup(&mrt->mfc_hash, mrt->ops.cmparg_any,
+ *mrt->ops.rht_params);
+ rhl_for_each_entry_rcu(c, tmp, list, mnode)
+ if (c->mfc_un.res.ttls[vifi] < 255)
+ return c;
+
+ return NULL;
+}
+EXPORT_SYMBOL(mr_mfc_find_any_parent);
+
+void *mr_mfc_find_any(struct mr_table *mrt, int vifi, void *hasharg)
+{
+ struct rhlist_head *tmp, *list;
+ struct mr_mfc *c, *proxy;
+
+ list = rhltable_lookup(&mrt->mfc_hash, hasharg, *mrt->ops.rht_params);
+ rhl_for_each_entry_rcu(c, tmp, list, mnode) {
+ if (c->mfc_un.res.ttls[vifi] < 255)
+ return c;
+
+ /* It's ok if the vifi is part of the static tree */
+ proxy = mr_mfc_find_any_parent(mrt, c->mfc_parent);
+ if (proxy && proxy->mfc_un.res.ttls[vifi] < 255)
+ return c;
+ }
+
+ return mr_mfc_find_any_parent(mrt, vifi);
+}
+EXPORT_SYMBOL(mr_mfc_find_any);
+
+#ifdef CONFIG_PROC_FS
+void *mr_vif_seq_idx(struct net *net, struct mr_vif_iter *iter, loff_t pos)
+{
+ struct mr_table *mrt = iter->mrt;
+
+ for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) {
+ if (!VIF_EXISTS(mrt, iter->ct))
+ continue;
+ if (pos-- == 0)
+ return &mrt->vif_table[iter->ct];
+ }
+ return NULL;
+}
+EXPORT_SYMBOL(mr_vif_seq_idx);
+
+void *mr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+ struct mr_vif_iter *iter = seq->private;
+ struct net *net = seq_file_net(seq);
+ struct mr_table *mrt = iter->mrt;
+
+ ++*pos;
+ if (v == SEQ_START_TOKEN)
+ return mr_vif_seq_idx(net, iter, 0);
+
+ while (++iter->ct < mrt->maxvif) {
+ if (!VIF_EXISTS(mrt, iter->ct))
+ continue;
+ return &mrt->vif_table[iter->ct];
+ }
+ return NULL;
+}
+EXPORT_SYMBOL(mr_vif_seq_next);
+
+void *mr_mfc_seq_idx(struct net *net,
+ struct mr_mfc_iter *it, loff_t pos)
+{
+ struct mr_table *mrt = it->mrt;
+ struct mr_mfc *mfc;
+
+ rcu_read_lock();
+ it->cache = &mrt->mfc_cache_list;
+ list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list)
+ if (pos-- == 0)
+ return mfc;
+ rcu_read_unlock();
+
+ spin_lock_bh(it->lock);
+ it->cache = &mrt->mfc_unres_queue;
+ list_for_each_entry(mfc, it->cache, list)
+ if (pos-- == 0)
+ return mfc;
+ spin_unlock_bh(it->lock);
+
+ it->cache = NULL;
+ return NULL;
+}
+EXPORT_SYMBOL(mr_mfc_seq_idx);
+
+void *mr_mfc_seq_next(struct seq_file *seq, void *v,
+ loff_t *pos)
+{
+ struct mr_mfc_iter *it = seq->private;
+ struct net *net = seq_file_net(seq);
+ struct mr_table *mrt = it->mrt;
+ struct mr_mfc *c = v;
+
+ ++*pos;
+
+ if (v == SEQ_START_TOKEN)
+ return mr_mfc_seq_idx(net, seq->private, 0);
+
+ if (c->list.next != it->cache)
+ return list_entry(c->list.next, struct mr_mfc, list);
+
+ if (it->cache == &mrt->mfc_unres_queue)
+ goto end_of_list;
+
+ /* exhausted cache_array, show unresolved */
+ rcu_read_unlock();
+ it->cache = &mrt->mfc_unres_queue;
+
+ spin_lock_bh(it->lock);
+ if (!list_empty(it->cache))
+ return list_first_entry(it->cache, struct mr_mfc, list);
+
+end_of_list:
+ spin_unlock_bh(it->lock);
+ it->cache = NULL;
+
+ return NULL;
+}
+EXPORT_SYMBOL(mr_mfc_seq_next);
+#endif
+
+int mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
+ struct mr_mfc *c, struct rtmsg *rtm)
+{
+ struct rta_mfc_stats mfcs;
+ struct nlattr *mp_attr;
+ struct rtnexthop *nhp;
+ unsigned long lastuse;
+ int ct;
+
+ /* If cache is unresolved, don't try to parse IIF and OIF */
+ if (c->mfc_parent >= MAXVIFS) {
+ rtm->rtm_flags |= RTNH_F_UNRESOLVED;
+ return -ENOENT;
+ }
+
+ if (VIF_EXISTS(mrt, c->mfc_parent) &&
+ nla_put_u32(skb, RTA_IIF,
+ mrt->vif_table[c->mfc_parent].dev->ifindex) < 0)
+ return -EMSGSIZE;
+
+ if (c->mfc_flags & MFC_OFFLOAD)
+ rtm->rtm_flags |= RTNH_F_OFFLOAD;
+
+ mp_attr = nla_nest_start(skb, RTA_MULTIPATH);
+ if (!mp_attr)
+ return -EMSGSIZE;
+
+ for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
+ if (VIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) {
+ struct vif_device *vif;
+
+ nhp = nla_reserve_nohdr(skb, sizeof(*nhp));
+ if (!nhp) {
+ nla_nest_cancel(skb, mp_attr);
+ return -EMSGSIZE;
+ }
+
+ nhp->rtnh_flags = 0;
+ nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
+ vif = &mrt->vif_table[ct];
+ nhp->rtnh_ifindex = vif->dev->ifindex;
+ nhp->rtnh_len = sizeof(*nhp);
+ }
+ }
+
+ nla_nest_end(skb, mp_attr);
+
+ lastuse = READ_ONCE(c->mfc_un.res.lastuse);
+ lastuse = time_after_eq(jiffies, lastuse) ? jiffies - lastuse : 0;
+
+ mfcs.mfcs_packets = c->mfc_un.res.pkt;
+ mfcs.mfcs_bytes = c->mfc_un.res.bytes;
+ mfcs.mfcs_wrong_if = c->mfc_un.res.wrong_if;
+ if (nla_put_64bit(skb, RTA_MFC_STATS, sizeof(mfcs), &mfcs, RTA_PAD) ||
+ nla_put_u64_64bit(skb, RTA_EXPIRES, jiffies_to_clock_t(lastuse),
+ RTA_PAD))
+ return -EMSGSIZE;
+
+ rtm->rtm_type = RTN_MULTICAST;
+ return 1;
+}
+EXPORT_SYMBOL(mr_fill_mroute);
+
+int mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb,
+ struct mr_table *(*iter)(struct net *net,
+ struct mr_table *mrt),
+ int (*fill)(struct mr_table *mrt,
+ struct sk_buff *skb,
+ u32 portid, u32 seq, struct mr_mfc *c,
+ int cmd, int flags),
+ spinlock_t *lock)
+{
+ unsigned int t = 0, e = 0, s_t = cb->args[0], s_e = cb->args[1];
+ struct net *net = sock_net(skb->sk);
+ struct mr_table *mrt;
+ struct mr_mfc *mfc;
+
+ rcu_read_lock();
+ for (mrt = iter(net, NULL); mrt; mrt = iter(net, mrt)) {
+ if (t < s_t)
+ goto next_table;
+ list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list) {
+ if (e < s_e)
+ goto next_entry;
+ if (fill(mrt, skb, NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq, mfc,
+ RTM_NEWROUTE, NLM_F_MULTI) < 0)
+ goto done;
+next_entry:
+ e++;
+ }
+ e = 0;
+ s_e = 0;
+
+ spin_lock_bh(lock);
+ list_for_each_entry(mfc, &mrt->mfc_unres_queue, list) {
+ if (e < s_e)
+ goto next_entry2;
+ if (fill(mrt, skb, NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq, mfc,
+ RTM_NEWROUTE, NLM_F_MULTI) < 0) {
+ spin_unlock_bh(lock);
+ goto done;
+ }
+next_entry2:
+ e++;
+ }
+ spin_unlock_bh(lock);
+ e = 0;
+ s_e = 0;
+next_table:
+ t++;
+ }
+done:
+ rcu_read_unlock();
+
+ cb->args[1] = e;
+ cb->args[0] = t;
+
+ return skb->len;
+}
+EXPORT_SYMBOL(mr_rtm_dumproute);
}
if (table_base + v
!= arpt_next_entry(e)) {
+ if (unlikely(stackidx >= private->stacksize)) {
+ verdict = NF_DROP;
+ break;
+ }
jumpstack[stackidx++] = e;
}
continue;
}
if (table_base + v != ipt_next_entry(e) &&
- !(e->ip.flags & IPT_F_GOTO))
+ !(e->ip.flags & IPT_F_GOTO)) {
+ if (unlikely(stackidx >= private->stacksize)) {
+ verdict = NF_DROP;
+ break;
+ }
jumpstack[stackidx++] = e;
+ }
e = get_entry(table_base, v);
continue;
static struct pernet_operations ip_tables_net_ops = {
.init = ip_tables_net_init,
.exit = ip_tables_net_exit,
+ .async = true,
};
static int __init ip_tables_init(void)
local_bh_disable();
if (refcount_dec_and_lock(&c->entries, &cn->lock)) {
- list_del_rcu(&c->list);
- spin_unlock(&cn->lock);
- local_bh_enable();
-
- unregister_netdevice_notifier(&c->notifier);
-
/* In case anyone still accesses the file, the open/close
* functions are also incrementing the refcount on their own,
* so it's safe to remove the entry even if it's in use. */
if (cn->procdir)
proc_remove(c->pde);
#endif
+ list_del_rcu(&c->list);
+ spin_unlock(&cn->lock);
+ local_bh_enable();
+
+ unregister_netdevice_notifier(&c->notifier);
+
return;
}
local_bh_enable();
#endif
if (unlikely(!refcount_inc_not_zero(&c->refcount)))
c = NULL;
- else if (entry)
- refcount_inc(&c->entries);
+ else if (entry) {
+ if (unlikely(!refcount_inc_not_zero(&c->entries))) {
+ clusterip_config_put(c);
+ c = NULL;
+ }
+ }
}
rcu_read_unlock_bh();
.exit = clusterip_net_exit,
.id = &clusterip_net_id,
.size = sizeof(struct clusterip_net),
+ .async = true,
};
static int __init clusterip_tg_init(void)
const struct ipt_ECN_info *einfo = par->targinfo;
const struct ipt_entry *e = par->entryinfo;
- if (einfo->operation & IPT_ECN_OP_MASK) {
- pr_info("unsupported ECN operation %x\n", einfo->operation);
+ if (einfo->operation & IPT_ECN_OP_MASK)
return -EINVAL;
- }
- if (einfo->ip_ect & ~IPT_ECN_IP_MASK) {
- pr_info("new ECT codepoint %x out of mask\n", einfo->ip_ect);
+
+ if (einfo->ip_ect & ~IPT_ECN_IP_MASK)
return -EINVAL;
- }
+
if ((einfo->operation & (IPT_ECN_OP_SET_ECE|IPT_ECN_OP_SET_CWR)) &&
(e->ip.proto != IPPROTO_TCP || (e->ip.invflags & XT_INV_PROTO))) {
- pr_info("cannot use TCP operations on a non-tcp rule\n");
+ pr_info_ratelimited("cannot use operation on non-tcp rule\n");
return -EINVAL;
}
return 0;
const struct ipt_entry *e = par->entryinfo;
if (rejinfo->with == IPT_ICMP_ECHOREPLY) {
- pr_info("ECHOREPLY no longer supported.\n");
+ pr_info_ratelimited("ECHOREPLY no longer supported.\n");
return -EINVAL;
} else if (rejinfo->with == IPT_TCP_RESET) {
/* Must specify that it's a TCP packet */
if (e->ip.proto != IPPROTO_TCP ||
(e->ip.invflags & XT_INV_PROTO)) {
- pr_info("TCP_RESET invalid for non-tcp\n");
+ pr_info_ratelimited("TCP_RESET invalid for non-tcp\n");
return -EINVAL;
}
}
const struct xt_rpfilter_info *info = par->matchinfo;
unsigned int options = ~XT_RPFILTER_OPTION_MASK;
if (info->flags & options) {
- pr_info("unknown options encountered");
+ pr_info_ratelimited("unknown options\n");
return -EINVAL;
}
if (strcmp(par->table, "mangle") != 0 &&
strcmp(par->table, "raw") != 0) {
- pr_info("match only valid in the \'raw\' "
- "or \'mangle\' tables, not \'%s\'.\n", par->table);
+ pr_info_ratelimited("only valid in \'raw\' or \'mangle\' table, not \'%s\'\n",
+ par->table);
return -EINVAL;
}
static struct pernet_operations iptable_filter_net_ops = {
.init = iptable_filter_net_init,
.exit = iptable_filter_net_exit,
+ .async = true,
};
static int __init iptable_filter_init(void)
static struct pernet_operations defrag4_net_ops = {
.exit = defrag4_net_exit,
+ .async = true,
};
static int __init nf_defrag_init(void)
static struct pernet_operations ping_v4_net_ops = {
.init = ping_v4_proc_init_net,
.exit = ping_v4_proc_exit_net,
+ .async = true,
};
int __init ping_proc_init(void)
static __net_initdata struct pernet_operations ip_proc_ops = {
.init = ip_proc_init_net,
.exit = ip_proc_exit_net,
+ .async = true,
};
int __init ip_misc_proc_init(void)
{
return register_pernet_subsys(&ip_proc_ops);
}
-
static __net_initdata struct pernet_operations raw_net_ops = {
.init = raw_init_net,
.exit = raw_exit_net,
+ .async = true,
};
int __init raw_proc_init(void)
static struct pernet_operations ip_rt_proc_ops __net_initdata = {
.init = ip_rt_do_proc_init,
.exit = ip_rt_do_proc_exit,
+ .async = true,
};
static int __init ip_rt_proc_init(void)
rt->rt_pmtu = 0;
rt->rt_gateway = 0;
rt->rt_uses_gateway = 0;
- rt->rt_table_id = 0;
INIT_LIST_HEAD(&rt->rt_uncached);
rt->dst.output = ip_output;
spin_unlock_bh(&fnhe_lock);
}
-static void set_lwt_redirect(struct rtable *rth)
-{
- if (lwtunnel_output_redirect(rth->dst.lwtstate)) {
- rth->dst.lwtstate->orig_output = rth->dst.output;
- rth->dst.output = lwtunnel_output;
- }
-
- if (lwtunnel_input_redirect(rth->dst.lwtstate)) {
- rth->dst.lwtstate->orig_input = rth->dst.input;
- rth->dst.input = lwtunnel_input;
- }
-}
-
/* called in rcu_read_lock() section */
static int __mkroute_input(struct sk_buff *skb,
const struct fib_result *res,
}
rth->rt_is_input = 1;
- if (res->table)
- rth->rt_table_id = res->table->tb_id;
RT_CACHE_STAT_INC(in_slow_tot);
rth->dst.input = ip_forward;
rt_set_nexthop(rth, daddr, res, fnhe, res->fi, res->type, itag,
do_cache);
- set_lwt_redirect(rth);
+ lwtunnel_set_redirect(&rth->dst);
skb_dst_set(skb, &rth->dst);
out:
err = 0;
struct flow_keys *hash_keys)
{
const struct iphdr *outer_iph = ip_hdr(skb);
+ const struct iphdr *key_iph = outer_iph;
const struct iphdr *inner_iph;
const struct icmphdr *icmph;
struct iphdr _inner_iph;
struct icmphdr _icmph;
- hash_keys->addrs.v4addrs.src = outer_iph->saddr;
- hash_keys->addrs.v4addrs.dst = outer_iph->daddr;
if (likely(outer_iph->protocol != IPPROTO_ICMP))
- return;
+ goto out;
if (unlikely((outer_iph->frag_off & htons(IP_OFFSET)) != 0))
- return;
+ goto out;
icmph = skb_header_pointer(skb, outer_iph->ihl * 4, sizeof(_icmph),
&_icmph);
if (!icmph)
- return;
+ goto out;
if (icmph->type != ICMP_DEST_UNREACH &&
icmph->type != ICMP_REDIRECT &&
icmph->type != ICMP_TIME_EXCEEDED &&
icmph->type != ICMP_PARAMETERPROB)
- return;
+ goto out;
inner_iph = skb_header_pointer(skb,
outer_iph->ihl * 4 + sizeof(_icmph),
sizeof(_inner_iph), &_inner_iph);
if (!inner_iph)
- return;
- hash_keys->addrs.v4addrs.src = inner_iph->saddr;
- hash_keys->addrs.v4addrs.dst = inner_iph->daddr;
+ goto out;
+
+ key_iph = inner_iph;
+out:
+ hash_keys->addrs.v4addrs.src = key_iph->saddr;
+ hash_keys->addrs.v4addrs.dst = key_iph->daddr;
}
/* if skb is set it will be used and fl4 can be NULL */
-int fib_multipath_hash(const struct fib_info *fi, const struct flowi4 *fl4,
- const struct sk_buff *skb)
+int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4,
+ const struct sk_buff *skb, struct flow_keys *flkeys)
{
- struct net *net = fi->fib_net;
struct flow_keys hash_keys;
u32 mhash;
/* short-circuit if we already have L4 hash present */
if (skb->l4_hash)
return skb_get_hash_raw(skb) >> 1;
+
memset(&hash_keys, 0, sizeof(hash_keys));
- skb_flow_dissect_flow_keys(skb, &keys, flag);
- hash_keys.addrs.v4addrs.src = keys.addrs.v4addrs.src;
- hash_keys.addrs.v4addrs.dst = keys.addrs.v4addrs.dst;
- hash_keys.ports.src = keys.ports.src;
- hash_keys.ports.dst = keys.ports.dst;
- hash_keys.basic.ip_proto = keys.basic.ip_proto;
+
+ if (!flkeys) {
+ skb_flow_dissect_flow_keys(skb, &keys, flag);
+ flkeys = &keys;
+ }
+
+ hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
+ hash_keys.addrs.v4addrs.src = flkeys->addrs.v4addrs.src;
+ hash_keys.addrs.v4addrs.dst = flkeys->addrs.v4addrs.dst;
+ hash_keys.ports.src = flkeys->ports.src;
+ hash_keys.ports.dst = flkeys->ports.dst;
+ hash_keys.basic.ip_proto = flkeys->basic.ip_proto;
} else {
memset(&hash_keys, 0, sizeof(hash_keys));
hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
return mhash >> 1;
}
-EXPORT_SYMBOL_GPL(fib_multipath_hash);
#endif /* CONFIG_IP_ROUTE_MULTIPATH */
static int ip_mkroute_input(struct sk_buff *skb,
struct fib_result *res,
struct in_device *in_dev,
- __be32 daddr, __be32 saddr, u32 tos)
+ __be32 daddr, __be32 saddr, u32 tos,
+ struct flow_keys *hkeys)
{
#ifdef CONFIG_IP_ROUTE_MULTIPATH
if (res->fi && res->fi->fib_nhs > 1) {
- int h = fib_multipath_hash(res->fi, NULL, skb);
+ int h = fib_multipath_hash(res->fi->fib_net, NULL, skb, hkeys);
fib_select_multipath(res, h);
}
struct fib_result *res)
{
struct in_device *in_dev = __in_dev_get_rcu(dev);
+ struct flow_keys *flkeys = NULL, _flkeys;
+ struct net *net = dev_net(dev);
struct ip_tunnel_info *tun_info;
- struct flowi4 fl4;
+ int err = -EINVAL;
unsigned int flags = 0;
u32 itag = 0;
struct rtable *rth;
- int err = -EINVAL;
- struct net *net = dev_net(dev);
+ struct flowi4 fl4;
bool do_cache;
/* IP on this device is disabled. */
fl4.daddr = daddr;
fl4.saddr = saddr;
fl4.flowi4_uid = sock_net_uid(net, NULL);
+
+ if (fib4_rules_early_flow_dissect(net, skb, &fl4, &_flkeys))
+ flkeys = &_flkeys;
+
err = fib_lookup(net, &fl4, res, 0);
if (err != 0) {
if (!IN_DEV_FORWARD(in_dev))
if (res->type != RTN_UNICAST)
goto martian_destination;
- err = ip_mkroute_input(skb, res, in_dev, daddr, saddr, tos);
+ err = ip_mkroute_input(skb, res, in_dev, daddr, saddr, tos, flkeys);
out: return err;
brd_input:
rth->dst.tclassid = itag;
#endif
rth->rt_is_input = 1;
- if (res->table)
- rth->rt_table_id = res->table->tb_id;
RT_CACHE_STAT_INC(in_slow_tot);
if (res->type == RTN_UNREACHABLE) {
return ERR_PTR(-ENOBUFS);
rth->rt_iif = orig_oif;
- if (res->table)
- rth->rt_table_id = res->table->tb_id;
RT_CACHE_STAT_INC(out_slow_tot);
}
rt_set_nexthop(rth, fl4->daddr, res, fnhe, fi, type, 0, do_cache);
- set_lwt_redirect(rth);
+ lwtunnel_set_redirect(&rth->dst);
return rth;
}
rt->rt_flags |= RTCF_NOTIFY;
if (rtm->rtm_flags & RTM_F_LOOKUP_TABLE)
- table_id = rt->rt_table_id;
+ table_id = res.table ? res.table->tb_id : 0;
if (rtm->rtm_flags & RTM_F_FIB_MATCH) {
if (!res.fi) {
static __net_initdata struct pernet_operations sysctl_route_ops = {
.init = sysctl_route_net_init,
.exit = sysctl_route_net_exit,
+ .async = true,
};
#endif
static __net_initdata struct pernet_operations rt_genid_ops = {
.init = rt_genid_init,
+ .async = true,
};
static int __net_init ipv4_inetpeer_init(struct net *net)
static __net_initdata struct pernet_operations ipv4_inetpeer_ops = {
.init = ipv4_inetpeer_init,
.exit = ipv4_inetpeer_exit,
+ .async = true,
};
#ifdef CONFIG_IP_ROUTE_CLASSID
ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
if (write && ret == 0)
- call_netevent_notifiers(NETEVENT_MULTIPATH_HASH_UPDATE, net);
+ call_netevent_notifiers(NETEVENT_IPV4_MPATH_HASH_UPDATE, net);
return ret;
}
static __net_initdata struct pernet_operations ipv4_sysctl_ops = {
.init = ipv4_sysctl_init_net,
.exit = ipv4_sysctl_exit_net,
+ .async = true,
};
static __init int sysctl_ipv4_init(void)
sk->sk_rcvbuf = sock_net(sk)->ipv4.sysctl_tcp_rmem[1];
sk_sockets_allocated_inc(sk);
+ sk->sk_route_forced_caps = NETIF_F_GSO;
}
EXPORT_SYMBOL(tcp_init_sock);
struct tcp_sock *tp = tcp_sk(sk);
u32 new_size_goal, size_goal;
- if (!large_allowed || !sk_can_gso(sk))
+ if (!large_allowed)
return mss_now;
/* Note : tcp_tso_autosize() will eventually split this later */
int tcp_sendpage_locked(struct sock *sk, struct page *page, int offset,
size_t size, int flags)
{
- if (!(sk->sk_route_caps & NETIF_F_SG) ||
- !sk_check_csum_caps(sk))
+ if (!(sk->sk_route_caps & NETIF_F_SG))
return sock_no_sendpage_locked(sk, page, offset, size, flags);
tcp_rate_check_app_limited(sk); /* is sending application-limited? */
return 0;
}
-static int select_size(const struct sock *sk, bool sg, bool first_skb, bool zc)
+static int select_size(bool first_skb, bool zc)
{
- const struct tcp_sock *tp = tcp_sk(sk);
- int tmp = tp->mss_cache;
-
- if (sg) {
- if (zc)
- return 0;
-
- if (sk_can_gso(sk)) {
- tmp = linear_payload_sz(first_skb);
- } else {
- int pgbreak = SKB_MAX_HEAD(MAX_TCP_HEADER);
-
- if (tmp >= pgbreak &&
- tmp <= pgbreak + (MAX_SKB_FRAGS - 1) * PAGE_SIZE)
- tmp = pgbreak;
- }
- }
-
- return tmp;
+ if (zc)
+ return 0;
+ return linear_payload_sz(first_skb);
}
void tcp_free_fastopen_req(struct tcp_sock *tp)
int flags, err, copied = 0;
int mss_now = 0, size_goal, copied_syn = 0;
bool process_backlog = false;
- bool sg, zc = false;
+ bool zc = false;
long timeo;
flags = msg->msg_flags;
goto out_err;
}
- zc = sk_check_csum_caps(sk) && sk->sk_route_caps & NETIF_F_SG;
+ zc = sk->sk_route_caps & NETIF_F_SG;
if (!zc)
uarg->zerocopy = 0;
}
if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
goto do_error;
- sg = !!(sk->sk_route_caps & NETIF_F_SG);
-
while (msg_data_left(msg)) {
int copy = 0;
- int max = size_goal;
skb = tcp_write_queue_tail(sk);
- if (skb) {
- if (skb->ip_summed == CHECKSUM_NONE)
- max = mss_now;
- copy = max - skb->len;
- }
+ if (skb)
+ copy = size_goal - skb->len;
if (copy <= 0 || !tcp_skb_can_collapse_to(skb)) {
bool first_skb;
goto restart;
}
first_skb = tcp_rtx_and_write_queues_empty(sk);
- linear = select_size(sk, sg, first_skb, zc);
+ linear = select_size(first_skb, zc);
skb = sk_stream_alloc_skb(sk, linear, sk->sk_allocation,
first_skb);
if (!skb)
goto wait_for_memory;
process_backlog = true;
- /*
- * Check whether we can use HW checksum.
- */
- if (sk_check_csum_caps(sk))
- skb->ip_summed = CHECKSUM_PARTIAL;
+ skb->ip_summed = CHECKSUM_PARTIAL;
skb_entail(sk, skb);
copy = size_goal;
- max = size_goal;
/* All packets are restored as if they have
* already been sent. skb_mstamp isn't set to
if (!skb_can_coalesce(skb, i, pfrag->page,
pfrag->offset)) {
- if (i >= sysctl_max_skb_frags || !sg) {
+ if (i >= sysctl_max_skb_frags) {
tcp_mark_push(tp, skb);
goto new_segment;
}
goto out;
}
- if (skb->len < max || (flags & MSG_OOB) || unlikely(tp->repair))
+ if (skb->len < size_goal || (flags & MSG_OOB) || unlikely(tp->repair))
continue;
if (forced_push(tp)) {
packet_conservation:1, /* use packet conservation? */
restore_cwnd:1, /* decided to revert cwnd to old value */
round_start:1, /* start of packet-timed tx->ack round? */
- tso_segs_goal:7, /* segments we want in each skb we send */
idle_restart:1, /* restarting after idle? */
probe_rtt_round_done:1, /* a BBR_PROBE_RTT round at 4 pkts? */
- unused:5,
+ unused:12,
lt_is_sampling:1, /* taking long-term ("LT") samples now? */
lt_rtt_cnt:7, /* round trips in long-term interval */
lt_use_bw:1; /* use lt_bw as our bw estimate? */
sk->sk_pacing_rate = rate;
}
-/* Return count of segments we want in the skbs we send, or 0 for default. */
-static u32 bbr_tso_segs_goal(struct sock *sk)
+/* override sysctl_tcp_min_tso_segs */
+static u32 bbr_min_tso_segs(struct sock *sk)
{
- struct bbr *bbr = inet_csk_ca(sk);
-
- return bbr->tso_segs_goal;
+ return sk->sk_pacing_rate < (bbr_min_tso_rate >> 3) ? 1 : 2;
}
-static void bbr_set_tso_segs_goal(struct sock *sk)
+static u32 bbr_tso_segs_goal(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
- struct bbr *bbr = inet_csk_ca(sk);
- u32 min_segs;
+ u32 segs, bytes;
+
+ /* Sort of tcp_tso_autosize() but ignoring
+ * driver provided sk_gso_max_size.
+ */
+ bytes = min_t(u32, sk->sk_pacing_rate >> sk->sk_pacing_shift,
+ GSO_MAX_SIZE - 1 - MAX_TCP_HEADER);
+ segs = max_t(u32, bytes / tp->mss_cache, bbr_min_tso_segs(sk));
- min_segs = sk->sk_pacing_rate < (bbr_min_tso_rate >> 3) ? 1 : 2;
- bbr->tso_segs_goal = min(tcp_tso_autosize(sk, tp->mss_cache, min_segs),
- 0x7FU);
+ return min(segs, 0x7FU);
}
/* Save "last known good" cwnd so we can restore it after losses or PROBE_RTT */
cwnd = (((w * gain) >> BBR_SCALE) + BW_UNIT - 1) / BW_UNIT;
/* Allow enough full-sized skbs in flight to utilize end systems. */
- cwnd += 3 * bbr->tso_segs_goal;
+ cwnd += 3 * bbr_tso_segs_goal(sk);
/* Reduce delayed ACKs by rounding up cwnd to the next even number. */
cwnd = (cwnd + 1) & ~1U;
bw = bbr_bw(sk);
bbr_set_pacing_rate(sk, bw, bbr->pacing_gain);
- bbr_set_tso_segs_goal(sk);
bbr_set_cwnd(sk, rs, rs->acked_sacked, bw, bbr->cwnd_gain);
}
struct bbr *bbr = inet_csk_ca(sk);
bbr->prior_cwnd = 0;
- bbr->tso_segs_goal = 0; /* default segs per skb until first ACK */
bbr->rtt_cnt = 0;
bbr->next_rtt_delivered = 0;
bbr->prev_ca_state = TCP_CA_Open;
.undo_cwnd = bbr_undo_cwnd,
.cwnd_event = bbr_cwnd_event,
.ssthresh = bbr_ssthresh,
- .tso_segs_goal = bbr_tso_segs_goal,
+ .min_tso_segs = bbr_min_tso_segs,
.get_info = bbr_get_info,
.set_state = bbr_set_state,
};
int len;
int in_sack;
- if (!sk_can_gso(sk))
- goto fallback;
-
/* Normally R but no L won't result in plain S */
if (!dup_sack &&
(TCP_SKB_CB(skb)->sacked & (TCPCB_LOST|TCPCB_SACKED_RETRANS)) == TCPCB_SACKED_RETRANS)
tp->rx_opt.saw_tstamp = 0;
req = tp->fastopen_rsk;
if (req) {
+ bool req_stolen;
+
WARN_ON_ONCE(sk->sk_state != TCP_SYN_RECV &&
sk->sk_state != TCP_FIN_WAIT1);
- if (!tcp_check_req(sk, skb, req, true))
+ if (!tcp_check_req(sk, skb, req, true, &req_stolen))
goto discard;
}
{
struct tcphdr *th = tcp_hdr(skb);
- if (skb->ip_summed == CHECKSUM_PARTIAL) {
- th->check = ~tcp_v4_check(skb->len, saddr, daddr, 0);
- skb->csum_start = skb_transport_header(skb) - skb->head;
- skb->csum_offset = offsetof(struct tcphdr, check);
- } else {
- th->check = tcp_v4_check(skb->len, saddr, daddr,
- csum_partial(th,
- th->doff << 2,
- skb->csum));
- }
+ th->check = ~tcp_v4_check(skb->len, saddr, daddr, 0);
+ skb->csum_start = skb_transport_header(skb) - skb->head;
+ skb->csum_offset = offsetof(struct tcphdr, check);
}
/* This routine computes an IPv4 TCP checksum. */
if (sk->sk_state == TCP_NEW_SYN_RECV) {
struct request_sock *req = inet_reqsk(sk);
+ bool req_stolen = false;
struct sock *nsk;
sk = req->rsk_listener;
th = (const struct tcphdr *)skb->data;
iph = ip_hdr(skb);
tcp_v4_fill_cb(skb, iph, th);
- nsk = tcp_check_req(sk, skb, req, false);
+ nsk = tcp_check_req(sk, skb, req, false, &req_stolen);
}
if (!nsk) {
reqsk_put(req);
+ if (req_stolen) {
+ /* Another cpu got exclusive access to req
+ * and created a full blown socket.
+ * Try to feed this packet to this socket
+ * instead of discarding it.
+ */
+ tcp_v4_restore_cb(skb);
+ sock_put(sk);
+ goto lookup;
+ }
goto discard_and_relse;
}
if (nsk == sk) {
static struct pernet_operations tcp4_net_ops = {
.init = tcp4_proc_init_net,
.exit = tcp4_proc_exit_net,
+ .async = true,
};
int __init tcp4_proc_init(void)
.init = tcp_sk_init,
.exit = tcp_sk_exit,
.exit_batch = tcp_sk_exit_batch,
+ .async = true,
};
void __init tcp_v4_init(void)
static __net_initdata struct pernet_operations tcp_net_metrics_ops = {
.init = tcp_net_metrics_init,
.exit_batch = tcp_net_metrics_exit_batch,
+ .async = true,
};
void __init tcp_metrics_init(void)
struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
struct request_sock *req,
- bool fastopen)
+ bool fastopen, bool *req_stolen)
{
struct tcp_options_received tmp_opt;
struct sock *child;
sock_rps_save_rxhash(child, skb);
tcp_synack_rtt_meas(child, req);
+ *req_stolen = !own_req;
return inet_csk_complete_hashdance(sk, child, req, own_req);
listen_overflow:
/* Initialize TSO segments for a packet. */
static void tcp_set_skb_tso_segs(struct sk_buff *skb, unsigned int mss_now)
{
- if (skb->len <= mss_now || skb->ip_summed == CHECKSUM_NONE) {
+ if (skb->len <= mss_now) {
/* Avoid the costly divide in the normal
* non-TSO case.
*/
TCP_SKB_CB(buff)->sacked = TCP_SKB_CB(skb)->sacked;
tcp_skb_fragment_eor(skb, buff);
- if (!skb_shinfo(skb)->nr_frags && skb->ip_summed != CHECKSUM_PARTIAL) {
- /* Copy and checksum data tail into the new buffer. */
- buff->csum = csum_partial_copy_nocheck(skb->data + len,
- skb_put(buff, nsize),
- nsize, 0);
-
- skb_trim(skb, len);
-
- skb->csum = csum_block_sub(skb->csum, buff->csum, len);
- } else {
- skb->ip_summed = CHECKSUM_PARTIAL;
- skb_split(skb, buff, len);
- }
+ skb_split(skb, buff, len);
- buff->ip_summed = skb->ip_summed;
+ buff->ip_summed = CHECKSUM_PARTIAL;
buff->tstamp = skb->tstamp;
tcp_fragment_tstamp(skb, buff);
/* Return how many segs we'd like on a TSO packet,
* to send one TSO packet per ms
*/
-u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now,
- int min_tso_segs)
+static u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now,
+ int min_tso_segs)
{
u32 bytes, segs;
*/
segs = max_t(u32, bytes / mss_now, min_tso_segs);
- return min_t(u32, segs, sk->sk_gso_max_segs);
+ return segs;
}
-EXPORT_SYMBOL(tcp_tso_autosize);
/* Return the number of segments we want in the skb we are transmitting.
* See if congestion control module wants to decide; otherwise, autosize.
static u32 tcp_tso_segs(struct sock *sk, unsigned int mss_now)
{
const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops;
- u32 tso_segs = ca_ops->tso_segs_goal ? ca_ops->tso_segs_goal(sk) : 0;
+ u32 min_tso, tso_segs;
+
+ min_tso = ca_ops->min_tso_segs ?
+ ca_ops->min_tso_segs(sk) :
+ sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs;
- return tso_segs ? :
- tcp_tso_autosize(sk, mss_now,
- sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs);
+ tso_segs = tcp_tso_autosize(sk, mss_now, min_tso);
+ return min_t(u32, tso_segs, sk->sk_gso_max_segs);
}
/* Returns the portion of skb which can be sent right away */
tcp_skb_fragment_eor(skb, buff);
- buff->ip_summed = skb->ip_summed = CHECKSUM_PARTIAL;
+ buff->ip_summed = CHECKSUM_PARTIAL;
skb_split(skb, buff, len);
tcp_fragment_tstamp(skb, buff);
}
}
+static bool tcp_can_coalesce_send_queue_head(struct sock *sk, int len)
+{
+ struct sk_buff *skb, *next;
+
+ skb = tcp_send_head(sk);
+ tcp_for_write_queue_from_safe(skb, next, sk) {
+ if (len <= skb->len)
+ break;
+
+ if (unlikely(TCP_SKB_CB(skb)->eor))
+ return false;
+
+ len -= skb->len;
+ }
+
+ return true;
+}
+
/* Create a new MTU probe if we are ready.
* MTU probe is regularly attempting to increase the path MTU by
* deliberately sending larger packets. This discovers routing
return 0;
}
+ if (!tcp_can_coalesce_send_queue_head(sk, probe_size))
+ return -1;
+
/* We're allowed to probe. Build it now. */
nskb = sk_stream_alloc_skb(sk, probe_size, GFP_ATOMIC, false);
if (!nskb)
TCP_SKB_CB(nskb)->tcp_flags = TCPHDR_ACK;
TCP_SKB_CB(nskb)->sacked = 0;
nskb->csum = 0;
- nskb->ip_summed = skb->ip_summed;
+ nskb->ip_summed = CHECKSUM_PARTIAL;
tcp_insert_write_queue_before(nskb, skb, sk);
tcp_highest_sack_replace(sk, skb, nskb);
len = 0;
tcp_for_write_queue_from_safe(skb, next, sk) {
copy = min_t(int, skb->len, probe_size - len);
- if (nskb->ip_summed) {
- skb_copy_bits(skb, 0, skb_put(nskb, copy), copy);
- } else {
- __wsum csum = skb_copy_and_csum_bits(skb, 0,
- skb_put(nskb, copy),
- copy, 0);
- nskb->csum = csum_block_add(nskb->csum, csum, len);
- }
+ skb_copy_bits(skb, 0, skb_put(nskb, copy), copy);
if (skb->len <= copy) {
/* We've eaten all the data from this skb.
* Throw it away. */
TCP_SKB_CB(nskb)->tcp_flags |= TCP_SKB_CB(skb)->tcp_flags;
+ /* If this is the last SKB we copy and eor is set
+ * we need to propagate it to the new skb.
+ */
+ TCP_SKB_CB(nskb)->eor = TCP_SKB_CB(skb)->eor;
tcp_unlink_write_queue(skb, sk);
sk_wmem_free_skb(sk, skb);
} else {
~(TCPHDR_FIN|TCPHDR_PSH);
if (!skb_shinfo(skb)->nr_frags) {
skb_pull(skb, copy);
- if (skb->ip_summed != CHECKSUM_PARTIAL)
- skb->csum = csum_partial(skb->data,
- skb->len, 0);
} else {
__pskb_trim_head(skb, copy);
tcp_set_skb_tso_segs(skb, mss_now);
}
tcp_highest_sack_replace(sk, next_skb, skb);
- if (next_skb->ip_summed == CHECKSUM_PARTIAL)
- skb->ip_summed = CHECKSUM_PARTIAL;
-
- if (skb->ip_summed != CHECKSUM_PARTIAL)
- skb->csum = csum_block_add(skb->csum, next_skb->csum, skb_size);
-
/* Update sequence range on original skb. */
TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(next_skb)->end_seq;
for (handler = rcu_dereference(head); \
handler != NULL; \
handler = rcu_dereference(handler->next)) \
-
+
static int tunnel4_rcv(struct sk_buff *skb)
{
struct xfrm_tunnel *handler;
err = udplite_checksum_init(skb, uh);
if (err)
return err;
+
+ if (UDP_SKB_CB(skb)->partial_cov) {
+ skb->csum = inet_compute_pseudo(skb, proto);
+ return 0;
+ }
}
/* Note, we are only interested in != 0 or == 0, thus the
static struct pernet_operations udp4_net_ops = {
.init = udp4_proc_init_net,
.exit = udp4_proc_exit_net,
+ .async = true,
};
int __init udp4_proc_init(void)
static struct pernet_operations udplite4_net_ops = {
.init = udplite4_proc_init_net,
.exit = udplite4_proc_exit_net,
+ .async = true,
};
static __init int udplite4_proc_init(void)
xdst->u.rt.rt_gateway = rt->rt_gateway;
xdst->u.rt.rt_uses_gateway = rt->rt_uses_gateway;
xdst->u.rt.rt_pmtu = rt->rt_pmtu;
- xdst->u.rt.rt_table_id = rt->rt_table_id;
INIT_LIST_HEAD(&xdst->u.rt.rt_uncached);
return 0;
static struct pernet_operations __net_initdata xfrm4_net_ops = {
.init = xfrm4_net_init,
.exit = xfrm4_net_exit,
+ .async = true,
};
static void __init xfrm4_policy_init(void)
xfrm4_protocol_init();
register_pernet_subsys(&xfrm4_net_ops);
}
-
config IPV6_MROUTE
bool "IPv6: multicast routing"
depends on IPV6
+ select IP_MROUTE_COMMON
---help---
Experimental support for IPv6 multicast forwarding.
If unsure, say N.
#endif
}
+static bool ipv6_allow_optimistic_dad(struct net *net,
+ struct inet6_dev *idev)
+{
+#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
+ if (!idev)
+ return false;
+ if (!net->ipv6.devconf_all->optimistic_dad && !idev->cnf.optimistic_dad)
+ return false;
+
+ return true;
+#else
+ return false;
+#endif
+}
+
static int ipv6_get_saddr_eval(struct net *net,
struct ipv6_saddr_score *score,
struct ipv6_saddr_dst *dst,
spin_lock_bh(&ifp->lock);
addrconf_del_dad_work(ifp);
ifp->flags |= IFA_F_TENTATIVE;
+ if (dad_failed)
+ ifp->flags &= ~IFA_F_OPTIMISTIC;
spin_unlock_bh(&ifp->lock);
if (dad_failed)
ipv6_ifa_notify(0, ifp);
static struct pernet_operations if6_proc_net_ops = {
.init = if6_proc_net_init,
.exit = if6_proc_net_exit,
+ .async = true,
};
int __init if6_proc_init(void)
(ifp->flags & IFA_F_TEMPORARY || ifp->prefix_len != 64))
return -EINVAL;
+ if (!(ifp->flags & IFA_F_TENTATIVE) || ifp->flags & IFA_F_DADFAILED)
+ ifa_flags &= ~IFA_F_OPTIMISTIC;
+
timeout = addrconf_timeout_fixup(valid_lft, HZ);
if (addrconf_finite_timeout(timeout)) {
expires = jiffies_to_clock_t(timeout * HZ);
struct in6_addr *pfx, *peer_pfx;
struct inet6_ifaddr *ifa;
struct net_device *dev;
+ struct inet6_dev *idev;
u32 valid_lft = INFINITY_LIFE_TIME, preferred_lft = INFINITY_LIFE_TIME;
u32 ifa_flags;
int err;
/* We ignore other flags so far. */
ifa_flags &= IFA_F_NODAD | IFA_F_HOMEADDRESS | IFA_F_MANAGETEMPADDR |
- IFA_F_NOPREFIXROUTE | IFA_F_MCAUTOJOIN;
+ IFA_F_NOPREFIXROUTE | IFA_F_MCAUTOJOIN | IFA_F_OPTIMISTIC;
+
+ idev = ipv6_find_idev(dev);
+ if (IS_ERR(idev))
+ return PTR_ERR(idev);
+
+ if (!ipv6_allow_optimistic_dad(net, idev))
+ ifa_flags &= ~IFA_F_OPTIMISTIC;
+
+ if (ifa_flags & IFA_F_NODAD && ifa_flags & IFA_F_OPTIMISTIC) {
+ NL_SET_ERR_MSG(extack, "IFA_F_NODAD and IFA_F_OPTIMISTIC are mutually exclusive");
+ return -EINVAL;
+ }
ifa = ipv6_get_ifaddr(net, pfx, dev, 1);
if (!ifa) {
static struct pernet_operations addrconf_ops = {
.init = addrconf_init_net,
.exit = addrconf_exit_net,
+ .async = true,
};
static struct rtnl_af_ops inet6_ops __read_mostly = {
static struct pernet_operations ipv6_addr_label_ops = {
.init = ip6addrlbl_net_init,
.exit = ip6addrlbl_net_exit,
+ .async = true,
};
int __init ipv6_addr_label_init(void)
*/
int inet6_getname(struct socket *sock, struct sockaddr *uaddr,
- int *uaddr_len, int peer)
+ int peer)
{
struct sockaddr_in6 *sin = (struct sockaddr_in6 *)uaddr;
struct sock *sk = sock->sk;
}
sin->sin6_scope_id = ipv6_iface_scope_id(&sin->sin6_addr,
sk->sk_bound_dev_if);
- *uaddr_len = sizeof(*sin);
- return 0;
+ return sizeof(*sin);
}
EXPORT_SYMBOL(inet6_getname);
static struct pernet_operations inet6_net_ops = {
.init = inet6_net_init,
.exit = inet6_net_exit,
+ .async = true,
};
static const struct ipv6_stub ipv6_stub_impl = {
if (ifindex == 0) {
struct rt6_info *rt;
- rt = rt6_lookup(net, addr, NULL, 0, 0);
+ rt = rt6_lookup(net, addr, NULL, 0, NULL, 0);
if (rt) {
dev = rt->dst.dev;
ip6_rt_put(rt);
remove_proc_entry("anycast6", net->proc_net);
}
#endif
-
return nexthdr;
}
EXPORT_SYMBOL(ipv6_find_hdr);
-
}
struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
+ const struct sk_buff *skb,
int flags, pol_lookup_t lookup)
{
if (net->ipv6.fib6_has_custom_rules) {
struct fib_lookup_arg arg = {
.lookup_ptr = lookup,
+ .lookup_data = skb,
.flags = FIB_LOOKUP_NOREF,
};
} else {
struct rt6_info *rt;
- rt = lookup(net, net->ipv6.fib6_local_tbl, fl6, flags);
+ rt = lookup(net, net->ipv6.fib6_local_tbl, fl6, skb, flags);
if (rt != net->ipv6.ip6_null_entry && rt->dst.error != -EAGAIN)
return &rt->dst;
ip6_rt_put(rt);
- rt = lookup(net, net->ipv6.fib6_main_tbl, fl6, flags);
+ rt = lookup(net, net->ipv6.fib6_main_tbl, fl6, skb, flags);
if (rt->dst.error != -EAGAIN)
return &rt->dst;
ip6_rt_put(rt);
goto out;
}
- rt = lookup(net, table, flp6, flags);
+ rt = lookup(net, table, flp6, arg->lookup_data, flags);
if (rt != net->ipv6.ip6_null_entry) {
struct fib6_rule *r = (struct fib6_rule *)rule;
if (r->tclass && r->tclass != ip6_tclass(fl6->flowlabel))
return 0;
+ if (rule->ip_proto && (rule->ip_proto != fl6->flowi6_proto))
+ return 0;
+
+ if (fib_rule_port_range_set(&rule->sport_range) &&
+ !fib_rule_port_inrange(&rule->sport_range, fl6->fl6_sport))
+ return 0;
+
+ if (fib_rule_port_range_set(&rule->dport_range) &&
+ !fib_rule_port_inrange(&rule->dport_range, fl6->fl6_dport))
+ return 0;
+
return 1;
}
rule6->dst.plen = frh->dst_len;
rule6->tclass = frh->tos;
+ if (fib_rule_requires_fldissect(rule))
+ net->ipv6.fib6_rules_require_fldissect++;
+
net->ipv6.fib6_has_custom_rules = true;
err = 0;
errout:
return err;
}
+static int fib6_rule_delete(struct fib_rule *rule)
+{
+ struct net *net = rule->fr_net;
+
+ if (net->ipv6.fib6_rules_require_fldissect &&
+ fib_rule_requires_fldissect(rule))
+ net->ipv6.fib6_rules_require_fldissect--;
+
+ return 0;
+}
+
static int fib6_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
struct nlattr **tb)
{
.match = fib6_rule_match,
.suppress = fib6_rule_suppress,
.configure = fib6_rule_configure,
+ .delete = fib6_rule_delete,
.compare = fib6_rule_compare,
.fill = fib6_rule_fill,
.nlmsg_payload = fib6_rule_nlmsg_payload,
goto out_fib6_rules_ops;
net->ipv6.fib6_rules_ops = ops;
+ net->ipv6.fib6_rules_require_fldissect = 0;
out:
return err;
static struct pernet_operations fib6_rules_net_ops = {
.init = fib6_rules_net_init,
.exit = fib6_rules_net_exit,
+ .async = true,
};
int __init fib6_rules_init(void)
fl6.fl6_icmp_type = type;
fl6.fl6_icmp_code = code;
fl6.flowi6_uid = sock_net_uid(net, NULL);
- fl6.mp_hash = rt6_multipath_hash(&fl6, skb);
+ fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, NULL);
security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
sk = icmpv6_xmit_lock(net);
skb_pull(skb2, nhs);
skb_reset_network_header(skb2);
- rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr, NULL, 0, 0);
+ rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr, NULL, 0,
+ skb, 0);
if (rt && rt->dst.dev)
skb2->dev = rt->dst.dev;
static struct pernet_operations icmpv6_sk_ops = {
.init = icmpv6_sk_init,
.exit = icmpv6_sk_exit,
+ .async = true,
};
int __init icmpv6_init(void)
.exit = ila_exit_net,
.id = &ila_net_id,
.size = sizeof(struct ila_net),
+ .async = true,
};
static int ila_xlat_addr(struct sk_buff *skb, bool sir2ila)
err = udplite_checksum_init(skb, uh);
if (err)
return err;
+
+ if (UDP_SKB_CB(skb)->partial_cov) {
+ skb->csum = ip6_compute_pseudo(skb, proto);
+ return 0;
+ }
}
/* To support RFC 6936 (allow zero checksum in UDP/IPV6 for tunnels)
}
struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
+ const struct sk_buff *skb,
int flags, pol_lookup_t lookup)
{
struct rt6_info *rt;
- rt = lookup(net, net->ipv6.fib6_main_tbl, fl6, flags);
+ rt = lookup(net, net->ipv6.fib6_main_tbl, fl6, skb, flags);
if (rt->dst.error == -EAGAIN) {
ip6_rt_put(rt);
rt = net->ipv6.ip6_null_entry;
static struct pernet_operations fib6_net_ops = {
.init = fib6_net_init,
.exit = fib6_net_exit,
+ .async = true,
};
int __init fib6_init(void)
static struct pernet_operations ip6_flowlabel_net_ops = {
.init = ip6_flowlabel_proc_init,
.exit = ip6_flowlabel_net_exit,
+ .async = true,
};
int ip6_flowlabel_init(void)
else
fl6->daddr = tunnel->parms.raddr;
- if (tunnel->parms.o_flags & TUNNEL_SEQ)
- tunnel->o_seqno++;
-
/* Push GRE header. */
protocol = (dev->type == ARPHRD_ETHER) ? htons(ETH_P_TEB) : proto;
fl6->flowi6_uid = sock_net_uid(dev_net(dev), NULL);
dsfield = key->tos;
- flags = key->tun_flags & (TUNNEL_CSUM | TUNNEL_KEY);
+ flags = key->tun_flags &
+ (TUNNEL_CSUM | TUNNEL_KEY | TUNNEL_SEQ);
tunnel->tun_hlen = gre_calc_hlen(flags);
gre_build_header(skb, tunnel->tun_hlen,
flags, protocol,
- tunnel_id_to_key32(tun_info->key.tun_id), 0);
+ tunnel_id_to_key32(tun_info->key.tun_id),
+ (flags | TUNNEL_SEQ) ? htonl(tunnel->o_seqno++)
+ : 0);
} else {
+ if (tunnel->parms.o_flags & TUNNEL_SEQ)
+ tunnel->o_seqno++;
+
gre_build_header(skb, tunnel->tun_hlen, tunnel->parms.o_flags,
protocol, tunnel->parms.o_key,
htonl(tunnel->o_seqno));
struct rt6_info *rt = rt6_lookup(t->net,
&p->raddr, &p->laddr,
- p->link, strict);
+ p->link, NULL, strict);
if (!rt)
return;
.exit_batch = ip6gre_exit_batch_net,
.id = &ip6gre_net_id,
.size = sizeof(struct ip6gre_net),
+ .async = true,
};
static int ip6gre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[],
netif_keep_dst(dev);
}
+bool is_ip6gretap_dev(const struct net_device *dev)
+{
+ return dev->netdev_ops == &ip6gre_tap_netdev_ops;
+}
+EXPORT_SYMBOL_GPL(is_ip6gretap_dev);
+
static bool ip6gre_netlink_encap_parms(struct nlattr *data[],
struct ip_tunnel_encap *ipencap)
{
struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(sk) &&
- ((mroute6_socket(net, skb) &&
+ ((mroute6_is_socket(net, skb) &&
!(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
&ipv6_hdr(skb)->saddr))) {
/* Try to guess incoming interface */
rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr,
- NULL, 0, 0);
+ NULL, 0, skb2, 0);
if (rt && rt->dst.dev)
skb2->dev = rt->dst.dev;
struct rt6_info *rt = rt6_lookup(t->net,
&p->raddr, &p->laddr,
- p->link, strict);
+ p->link, NULL, strict);
if (!rt)
return;
.exit_batch = ip6_tnl_exit_batch_net,
.id = &ip6_tnl_net_id,
.size = sizeof(struct ip6_tnl_net),
+ .async = true,
};
/**
(IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL));
struct rt6_info *rt = rt6_lookup(t->net,
&p->raddr, &p->laddr,
- p->link, strict);
+ p->link, NULL, strict);
if (rt)
tdev = rt->dst.dev;
.exit_batch = vti6_exit_batch_net,
.id = &vti6_net_id,
.size = sizeof(struct vti6_net),
+ .async = true,
};
static struct xfrm6_protocol vti_esp6_protocol __read_mostly = {
#include <linux/types.h>
#include <linux/sched.h>
#include <linux/errno.h>
-#include <linux/timer.h>
#include <linux/mm.h>
#include <linux/kernel.h>
#include <linux/fcntl.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/init.h>
-#include <linux/slab.h>
#include <linux/compat.h>
#include <net/protocol.h>
#include <linux/skbuff.h>
-#include <net/sock.h>
#include <net/raw.h>
#include <linux/notifier.h>
#include <linux/if_arp.h>
#include <net/ip6_checksum.h>
#include <linux/netconf.h>
-struct mr6_table {
- struct list_head list;
- possible_net_t net;
- u32 id;
- struct sock *mroute6_sk;
- struct timer_list ipmr_expire_timer;
- struct list_head mfc6_unres_queue;
- struct list_head mfc6_cache_array[MFC6_LINES];
- struct mif_device vif6_table[MAXMIFS];
- int maxvif;
- atomic_t cache_resolve_queue_len;
- bool mroute_do_assert;
- bool mroute_do_pim;
-#ifdef CONFIG_IPV6_PIMSM_V2
- int mroute_reg_vif_num;
-#endif
-};
-
struct ip6mr_rule {
struct fib_rule common;
};
struct ip6mr_result {
- struct mr6_table *mrt;
+ struct mr_table *mrt;
};
/* Big lock, protecting vif table, mrt cache and mroute socket state.
static DEFINE_RWLOCK(mrt_lock);
-/*
- * Multicast router control variables
- */
-
-#define MIF_EXISTS(_mrt, _idx) ((_mrt)->vif6_table[_idx].dev != NULL)
+/* Multicast router control variables */
/* Special spinlock for queue of unresolved entries */
static DEFINE_SPINLOCK(mfc_unres_lock);
static struct kmem_cache *mrt_cachep __read_mostly;
-static struct mr6_table *ip6mr_new_table(struct net *net, u32 id);
-static void ip6mr_free_table(struct mr6_table *mrt);
+static struct mr_table *ip6mr_new_table(struct net *net, u32 id);
+static void ip6mr_free_table(struct mr_table *mrt);
-static void ip6_mr_forward(struct net *net, struct mr6_table *mrt,
+static void ip6_mr_forward(struct net *net, struct mr_table *mrt,
struct sk_buff *skb, struct mfc6_cache *cache);
-static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
+static int ip6mr_cache_report(struct mr_table *mrt, struct sk_buff *pkt,
mifi_t mifi, int assert);
-static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
- struct mfc6_cache *c, struct rtmsg *rtm);
-static void mr6_netlink_event(struct mr6_table *mrt, struct mfc6_cache *mfc,
+static void mr6_netlink_event(struct mr_table *mrt, struct mfc6_cache *mfc,
int cmd);
-static void mrt6msg_netlink_event(struct mr6_table *mrt, struct sk_buff *pkt);
+static void mrt6msg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt);
static int ip6mr_rtm_dumproute(struct sk_buff *skb,
struct netlink_callback *cb);
-static void mroute_clean_tables(struct mr6_table *mrt, bool all);
+static void mroute_clean_tables(struct mr_table *mrt, bool all);
static void ipmr_expire_process(struct timer_list *t);
#ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
#define ip6mr_for_each_table(mrt, net) \
list_for_each_entry_rcu(mrt, &net->ipv6.mr6_tables, list)
-static struct mr6_table *ip6mr_get_table(struct net *net, u32 id)
+static struct mr_table *ip6mr_mr_table_iter(struct net *net,
+ struct mr_table *mrt)
+{
+ struct mr_table *ret;
+
+ if (!mrt)
+ ret = list_entry_rcu(net->ipv6.mr6_tables.next,
+ struct mr_table, list);
+ else
+ ret = list_entry_rcu(mrt->list.next,
+ struct mr_table, list);
+
+ if (&ret->list == &net->ipv6.mr6_tables)
+ return NULL;
+ return ret;
+}
+
+static struct mr_table *ip6mr_get_table(struct net *net, u32 id)
{
- struct mr6_table *mrt;
+ struct mr_table *mrt;
ip6mr_for_each_table(mrt, net) {
if (mrt->id == id)
}
static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
- struct mr6_table **mrt)
+ struct mr_table **mrt)
{
int err;
struct ip6mr_result res;
int flags, struct fib_lookup_arg *arg)
{
struct ip6mr_result *res = arg->result;
- struct mr6_table *mrt;
+ struct mr_table *mrt;
switch (rule->action) {
case FR_ACT_TO_TBL:
static int __net_init ip6mr_rules_init(struct net *net)
{
struct fib_rules_ops *ops;
- struct mr6_table *mrt;
+ struct mr_table *mrt;
int err;
ops = fib_rules_register(&ip6mr_rules_ops_template, net);
static void __net_exit ip6mr_rules_exit(struct net *net)
{
- struct mr6_table *mrt, *next;
+ struct mr_table *mrt, *next;
rtnl_lock();
list_for_each_entry_safe(mrt, next, &net->ipv6.mr6_tables, list) {
#define ip6mr_for_each_table(mrt, net) \
for (mrt = net->ipv6.mrt6; mrt; mrt = NULL)
-static struct mr6_table *ip6mr_get_table(struct net *net, u32 id)
+static struct mr_table *ip6mr_mr_table_iter(struct net *net,
+ struct mr_table *mrt)
+{
+ if (!mrt)
+ return net->ipv6.mrt6;
+ return NULL;
+}
+
+static struct mr_table *ip6mr_get_table(struct net *net, u32 id)
{
return net->ipv6.mrt6;
}
static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
- struct mr6_table **mrt)
+ struct mr_table **mrt)
{
*mrt = net->ipv6.mrt6;
return 0;
}
#endif
-static struct mr6_table *ip6mr_new_table(struct net *net, u32 id)
+static int ip6mr_hash_cmp(struct rhashtable_compare_arg *arg,
+ const void *ptr)
{
- struct mr6_table *mrt;
- unsigned int i;
+ const struct mfc6_cache_cmp_arg *cmparg = arg->key;
+ struct mfc6_cache *c = (struct mfc6_cache *)ptr;
- mrt = ip6mr_get_table(net, id);
- if (mrt)
- return mrt;
-
- mrt = kzalloc(sizeof(*mrt), GFP_KERNEL);
- if (!mrt)
- return NULL;
- mrt->id = id;
- write_pnet(&mrt->net, net);
-
- /* Forwarding cache */
- for (i = 0; i < MFC6_LINES; i++)
- INIT_LIST_HEAD(&mrt->mfc6_cache_array[i]);
-
- INIT_LIST_HEAD(&mrt->mfc6_unres_queue);
+ return !ipv6_addr_equal(&c->mf6c_mcastgrp, &cmparg->mf6c_mcastgrp) ||
+ !ipv6_addr_equal(&c->mf6c_origin, &cmparg->mf6c_origin);
+}
- timer_setup(&mrt->ipmr_expire_timer, ipmr_expire_process, 0);
+static const struct rhashtable_params ip6mr_rht_params = {
+ .head_offset = offsetof(struct mr_mfc, mnode),
+ .key_offset = offsetof(struct mfc6_cache, cmparg),
+ .key_len = sizeof(struct mfc6_cache_cmp_arg),
+ .nelem_hint = 3,
+ .locks_mul = 1,
+ .obj_cmpfn = ip6mr_hash_cmp,
+ .automatic_shrinking = true,
+};
-#ifdef CONFIG_IPV6_PIMSM_V2
- mrt->mroute_reg_vif_num = -1;
-#endif
+static void ip6mr_new_table_set(struct mr_table *mrt,
+ struct net *net)
+{
#ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
list_add_tail_rcu(&mrt->list, &net->ipv6.mr6_tables);
#endif
- return mrt;
}
-static void ip6mr_free_table(struct mr6_table *mrt)
-{
- del_timer_sync(&mrt->ipmr_expire_timer);
- mroute_clean_tables(mrt, true);
- kfree(mrt);
-}
-
-#ifdef CONFIG_PROC_FS
-
-struct ipmr_mfc_iter {
- struct seq_net_private p;
- struct mr6_table *mrt;
- struct list_head *cache;
- int ct;
+static struct mfc6_cache_cmp_arg ip6mr_mr_table_ops_cmparg_any = {
+ .mf6c_origin = IN6ADDR_ANY_INIT,
+ .mf6c_mcastgrp = IN6ADDR_ANY_INIT,
};
+static struct mr_table_ops ip6mr_mr_table_ops = {
+ .rht_params = &ip6mr_rht_params,
+ .cmparg_any = &ip6mr_mr_table_ops_cmparg_any,
+};
-static struct mfc6_cache *ipmr_mfc_seq_idx(struct net *net,
- struct ipmr_mfc_iter *it, loff_t pos)
+static struct mr_table *ip6mr_new_table(struct net *net, u32 id)
{
- struct mr6_table *mrt = it->mrt;
- struct mfc6_cache *mfc;
-
- read_lock(&mrt_lock);
- for (it->ct = 0; it->ct < MFC6_LINES; it->ct++) {
- it->cache = &mrt->mfc6_cache_array[it->ct];
- list_for_each_entry(mfc, it->cache, list)
- if (pos-- == 0)
- return mfc;
- }
- read_unlock(&mrt_lock);
+ struct mr_table *mrt;
- spin_lock_bh(&mfc_unres_lock);
- it->cache = &mrt->mfc6_unres_queue;
- list_for_each_entry(mfc, it->cache, list)
- if (pos-- == 0)
- return mfc;
- spin_unlock_bh(&mfc_unres_lock);
+ mrt = ip6mr_get_table(net, id);
+ if (mrt)
+ return mrt;
- it->cache = NULL;
- return NULL;
+ return mr_table_alloc(net, id, &ip6mr_mr_table_ops,
+ ipmr_expire_process, ip6mr_new_table_set);
}
-/*
- * The /proc interfaces to multicast routing /proc/ip6_mr_cache /proc/ip6_mr_vif
- */
-
-struct ipmr_vif_iter {
- struct seq_net_private p;
- struct mr6_table *mrt;
- int ct;
-};
-
-static struct mif_device *ip6mr_vif_seq_idx(struct net *net,
- struct ipmr_vif_iter *iter,
- loff_t pos)
+static void ip6mr_free_table(struct mr_table *mrt)
{
- struct mr6_table *mrt = iter->mrt;
-
- for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) {
- if (!MIF_EXISTS(mrt, iter->ct))
- continue;
- if (pos-- == 0)
- return &mrt->vif6_table[iter->ct];
- }
- return NULL;
+ del_timer_sync(&mrt->ipmr_expire_timer);
+ mroute_clean_tables(mrt, true);
+ rhltable_destroy(&mrt->mfc_hash);
+ kfree(mrt);
}
+#ifdef CONFIG_PROC_FS
+/* The /proc interfaces to multicast routing
+ * /proc/ip6_mr_cache /proc/ip6_mr_vif
+ */
+
static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
__acquires(mrt_lock)
{
- struct ipmr_vif_iter *iter = seq->private;
+ struct mr_vif_iter *iter = seq->private;
struct net *net = seq_file_net(seq);
- struct mr6_table *mrt;
+ struct mr_table *mrt;
mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
if (!mrt)
iter->mrt = mrt;
read_lock(&mrt_lock);
- return *pos ? ip6mr_vif_seq_idx(net, seq->private, *pos - 1)
- : SEQ_START_TOKEN;
-}
-
-static void *ip6mr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
-{
- struct ipmr_vif_iter *iter = seq->private;
- struct net *net = seq_file_net(seq);
- struct mr6_table *mrt = iter->mrt;
-
- ++*pos;
- if (v == SEQ_START_TOKEN)
- return ip6mr_vif_seq_idx(net, iter, 0);
-
- while (++iter->ct < mrt->maxvif) {
- if (!MIF_EXISTS(mrt, iter->ct))
- continue;
- return &mrt->vif6_table[iter->ct];
- }
- return NULL;
+ return mr_vif_seq_start(seq, pos);
}
static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v)
static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
{
- struct ipmr_vif_iter *iter = seq->private;
- struct mr6_table *mrt = iter->mrt;
+ struct mr_vif_iter *iter = seq->private;
+ struct mr_table *mrt = iter->mrt;
if (v == SEQ_START_TOKEN) {
seq_puts(seq,
"Interface BytesIn PktsIn BytesOut PktsOut Flags\n");
} else {
- const struct mif_device *vif = v;
+ const struct vif_device *vif = v;
const char *name = vif->dev ? vif->dev->name : "none";
seq_printf(seq,
"%2td %-10s %8ld %7ld %8ld %7ld %05X\n",
- vif - mrt->vif6_table,
+ vif - mrt->vif_table,
name, vif->bytes_in, vif->pkt_in,
vif->bytes_out, vif->pkt_out,
vif->flags);
static const struct seq_operations ip6mr_vif_seq_ops = {
.start = ip6mr_vif_seq_start,
- .next = ip6mr_vif_seq_next,
+ .next = mr_vif_seq_next,
.stop = ip6mr_vif_seq_stop,
.show = ip6mr_vif_seq_show,
};
static int ip6mr_vif_open(struct inode *inode, struct file *file)
{
return seq_open_net(inode, file, &ip6mr_vif_seq_ops,
- sizeof(struct ipmr_vif_iter));
+ sizeof(struct mr_vif_iter));
}
static const struct file_operations ip6mr_vif_fops = {
static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
{
- struct ipmr_mfc_iter *it = seq->private;
struct net *net = seq_file_net(seq);
- struct mr6_table *mrt;
+ struct mr_table *mrt;
mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
if (!mrt)
return ERR_PTR(-ENOENT);
- it->mrt = mrt;
- it->cache = NULL;
- return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
- : SEQ_START_TOKEN;
-}
-
-static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
-{
- struct mfc6_cache *mfc = v;
- struct ipmr_mfc_iter *it = seq->private;
- struct net *net = seq_file_net(seq);
- struct mr6_table *mrt = it->mrt;
-
- ++*pos;
-
- if (v == SEQ_START_TOKEN)
- return ipmr_mfc_seq_idx(net, seq->private, 0);
-
- if (mfc->list.next != it->cache)
- return list_entry(mfc->list.next, struct mfc6_cache, list);
-
- if (it->cache == &mrt->mfc6_unres_queue)
- goto end_of_list;
-
- BUG_ON(it->cache != &mrt->mfc6_cache_array[it->ct]);
-
- while (++it->ct < MFC6_LINES) {
- it->cache = &mrt->mfc6_cache_array[it->ct];
- if (list_empty(it->cache))
- continue;
- return list_first_entry(it->cache, struct mfc6_cache, list);
- }
-
- /* exhausted cache_array, show unresolved */
- read_unlock(&mrt_lock);
- it->cache = &mrt->mfc6_unres_queue;
- it->ct = 0;
-
- spin_lock_bh(&mfc_unres_lock);
- if (!list_empty(it->cache))
- return list_first_entry(it->cache, struct mfc6_cache, list);
-
- end_of_list:
- spin_unlock_bh(&mfc_unres_lock);
- it->cache = NULL;
-
- return NULL;
-}
-
-static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
-{
- struct ipmr_mfc_iter *it = seq->private;
- struct mr6_table *mrt = it->mrt;
-
- if (it->cache == &mrt->mfc6_unres_queue)
- spin_unlock_bh(&mfc_unres_lock);
- else if (it->cache == &mrt->mfc6_cache_array[it->ct])
- read_unlock(&mrt_lock);
+ return mr_mfc_seq_start(seq, pos, mrt, &mfc_unres_lock);
}
static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
"Iif Pkts Bytes Wrong Oifs\n");
} else {
const struct mfc6_cache *mfc = v;
- const struct ipmr_mfc_iter *it = seq->private;
- struct mr6_table *mrt = it->mrt;
+ const struct mr_mfc_iter *it = seq->private;
+ struct mr_table *mrt = it->mrt;
seq_printf(seq, "%pI6 %pI6 %-3hd",
&mfc->mf6c_mcastgrp, &mfc->mf6c_origin,
- mfc->mf6c_parent);
+ mfc->_c.mfc_parent);
- if (it->cache != &mrt->mfc6_unres_queue) {
+ if (it->cache != &mrt->mfc_unres_queue) {
seq_printf(seq, " %8lu %8lu %8lu",
- mfc->mfc_un.res.pkt,
- mfc->mfc_un.res.bytes,
- mfc->mfc_un.res.wrong_if);
- for (n = mfc->mfc_un.res.minvif;
- n < mfc->mfc_un.res.maxvif; n++) {
- if (MIF_EXISTS(mrt, n) &&
- mfc->mfc_un.res.ttls[n] < 255)
+ mfc->_c.mfc_un.res.pkt,
+ mfc->_c.mfc_un.res.bytes,
+ mfc->_c.mfc_un.res.wrong_if);
+ for (n = mfc->_c.mfc_un.res.minvif;
+ n < mfc->_c.mfc_un.res.maxvif; n++) {
+ if (VIF_EXISTS(mrt, n) &&
+ mfc->_c.mfc_un.res.ttls[n] < 255)
seq_printf(seq,
- " %2d:%-3d",
- n, mfc->mfc_un.res.ttls[n]);
+ " %2d:%-3d", n,
+ mfc->_c.mfc_un.res.ttls[n]);
}
} else {
/* unresolved mfc_caches don't contain
static const struct seq_operations ipmr_mfc_seq_ops = {
.start = ipmr_mfc_seq_start,
- .next = ipmr_mfc_seq_next,
- .stop = ipmr_mfc_seq_stop,
+ .next = mr_mfc_seq_next,
+ .stop = mr_mfc_seq_stop,
.show = ipmr_mfc_seq_show,
};
static int ipmr_mfc_open(struct inode *inode, struct file *file)
{
return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
- sizeof(struct ipmr_mfc_iter));
+ sizeof(struct mr_mfc_iter));
}
static const struct file_operations ip6mr_mfc_fops = {
struct ipv6hdr *encap;
struct net_device *reg_dev = NULL;
struct net *net = dev_net(skb->dev);
- struct mr6_table *mrt;
+ struct mr_table *mrt;
struct flowi6 fl6 = {
.flowi6_iif = skb->dev->ifindex,
.flowi6_mark = skb->mark,
read_lock(&mrt_lock);
if (reg_vif_num >= 0)
- reg_dev = mrt->vif6_table[reg_vif_num].dev;
+ reg_dev = mrt->vif_table[reg_vif_num].dev;
if (reg_dev)
dev_hold(reg_dev);
read_unlock(&mrt_lock);
struct net_device *dev)
{
struct net *net = dev_net(dev);
- struct mr6_table *mrt;
+ struct mr_table *mrt;
struct flowi6 fl6 = {
.flowi6_oif = dev->ifindex,
.flowi6_iif = skb->skb_iif ? : LOOPBACK_IFINDEX,
dev->features |= NETIF_F_NETNS_LOCAL;
}
-static struct net_device *ip6mr_reg_vif(struct net *net, struct mr6_table *mrt)
+static struct net_device *ip6mr_reg_vif(struct net *net, struct mr_table *mrt)
{
struct net_device *dev;
char name[IFNAMSIZ];
* Delete a VIF entry
*/
-static int mif6_delete(struct mr6_table *mrt, int vifi, int notify,
+static int mif6_delete(struct mr_table *mrt, int vifi, int notify,
struct list_head *head)
{
- struct mif_device *v;
+ struct vif_device *v;
struct net_device *dev;
struct inet6_dev *in6_dev;
if (vifi < 0 || vifi >= mrt->maxvif)
return -EADDRNOTAVAIL;
- v = &mrt->vif6_table[vifi];
+ v = &mrt->vif_table[vifi];
write_lock_bh(&mrt_lock);
dev = v->dev;
if (vifi + 1 == mrt->maxvif) {
int tmp;
for (tmp = vifi - 1; tmp >= 0; tmp--) {
- if (MIF_EXISTS(mrt, tmp))
+ if (VIF_EXISTS(mrt, tmp))
break;
}
mrt->maxvif = tmp + 1;
return 0;
}
+static inline void ip6mr_cache_free_rcu(struct rcu_head *head)
+{
+ struct mr_mfc *c = container_of(head, struct mr_mfc, rcu);
+
+ kmem_cache_free(mrt_cachep, (struct mfc6_cache *)c);
+}
+
static inline void ip6mr_cache_free(struct mfc6_cache *c)
{
- kmem_cache_free(mrt_cachep, c);
+ call_rcu(&c->_c.rcu, ip6mr_cache_free_rcu);
}
/* Destroy an unresolved cache entry, killing queued skbs
and reporting error to netlink readers.
*/
-static void ip6mr_destroy_unres(struct mr6_table *mrt, struct mfc6_cache *c)
+static void ip6mr_destroy_unres(struct mr_table *mrt, struct mfc6_cache *c)
{
struct net *net = read_pnet(&mrt->net);
struct sk_buff *skb;
atomic_dec(&mrt->cache_resolve_queue_len);
- while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved)) != NULL) {
+ while ((skb = skb_dequeue(&c->_c.mfc_un.unres.unresolved)) != NULL) {
if (ipv6_hdr(skb)->version == 0) {
struct nlmsghdr *nlh = skb_pull(skb,
sizeof(struct ipv6hdr));
/* Timer process for all the unresolved queue. */
-static void ipmr_do_expire_process(struct mr6_table *mrt)
+static void ipmr_do_expire_process(struct mr_table *mrt)
{
unsigned long now = jiffies;
unsigned long expires = 10 * HZ;
- struct mfc6_cache *c, *next;
+ struct mr_mfc *c, *next;
- list_for_each_entry_safe(c, next, &mrt->mfc6_unres_queue, list) {
+ list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) {
if (time_after(c->mfc_un.unres.expires, now)) {
/* not yet... */
unsigned long interval = c->mfc_un.unres.expires - now;
}
list_del(&c->list);
- mr6_netlink_event(mrt, c, RTM_DELROUTE);
- ip6mr_destroy_unres(mrt, c);
+ mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE);
+ ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c);
}
- if (!list_empty(&mrt->mfc6_unres_queue))
+ if (!list_empty(&mrt->mfc_unres_queue))
mod_timer(&mrt->ipmr_expire_timer, jiffies + expires);
}
static void ipmr_expire_process(struct timer_list *t)
{
- struct mr6_table *mrt = from_timer(mrt, t, ipmr_expire_timer);
+ struct mr_table *mrt = from_timer(mrt, t, ipmr_expire_timer);
if (!spin_trylock(&mfc_unres_lock)) {
mod_timer(&mrt->ipmr_expire_timer, jiffies + 1);
return;
}
- if (!list_empty(&mrt->mfc6_unres_queue))
+ if (!list_empty(&mrt->mfc_unres_queue))
ipmr_do_expire_process(mrt);
spin_unlock(&mfc_unres_lock);
/* Fill oifs list. It is called under write locked mrt_lock. */
-static void ip6mr_update_thresholds(struct mr6_table *mrt, struct mfc6_cache *cache,
+static void ip6mr_update_thresholds(struct mr_table *mrt,
+ struct mr_mfc *cache,
unsigned char *ttls)
{
int vifi;
memset(cache->mfc_un.res.ttls, 255, MAXMIFS);
for (vifi = 0; vifi < mrt->maxvif; vifi++) {
- if (MIF_EXISTS(mrt, vifi) &&
+ if (VIF_EXISTS(mrt, vifi) &&
ttls[vifi] && ttls[vifi] < 255) {
cache->mfc_un.res.ttls[vifi] = ttls[vifi];
if (cache->mfc_un.res.minvif > vifi)
cache->mfc_un.res.lastuse = jiffies;
}
-static int mif6_add(struct net *net, struct mr6_table *mrt,
+static int mif6_add(struct net *net, struct mr_table *mrt,
struct mif6ctl *vifc, int mrtsock)
{
int vifi = vifc->mif6c_mifi;
- struct mif_device *v = &mrt->vif6_table[vifi];
+ struct vif_device *v = &mrt->vif_table[vifi];
struct net_device *dev;
struct inet6_dev *in6_dev;
int err;
/* Is vif busy ? */
- if (MIF_EXISTS(mrt, vifi))
+ if (VIF_EXISTS(mrt, vifi))
return -EADDRINUSE;
switch (vifc->mif6c_flags) {
dev->ifindex, &in6_dev->cnf);
}
- /*
- * Fill in the VIF structures
- */
- v->rate_limit = vifc->vifc_rate_limit;
- v->flags = vifc->mif6c_flags;
- if (!mrtsock)
- v->flags |= VIFF_STATIC;
- v->threshold = vifc->vifc_threshold;
- v->bytes_in = 0;
- v->bytes_out = 0;
- v->pkt_in = 0;
- v->pkt_out = 0;
- v->link = dev->ifindex;
- if (v->flags & MIFF_REGISTER)
- v->link = dev_get_iflink(dev);
+ /* Fill in the VIF structures */
+ vif_device_init(v, dev, vifc->vifc_rate_limit, vifc->vifc_threshold,
+ vifc->mif6c_flags | (!mrtsock ? VIFF_STATIC : 0),
+ MIFF_REGISTER);
/* And finish update writing critical data */
write_lock_bh(&mrt_lock);
return 0;
}
-static struct mfc6_cache *ip6mr_cache_find(struct mr6_table *mrt,
+static struct mfc6_cache *ip6mr_cache_find(struct mr_table *mrt,
const struct in6_addr *origin,
const struct in6_addr *mcastgrp)
{
- int line = MFC6_HASH(mcastgrp, origin);
- struct mfc6_cache *c;
-
- list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) {
- if (ipv6_addr_equal(&c->mf6c_origin, origin) &&
- ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp))
- return c;
- }
- return NULL;
-}
-
-/* Look for a (*,*,oif) entry */
-static struct mfc6_cache *ip6mr_cache_find_any_parent(struct mr6_table *mrt,
- mifi_t mifi)
-{
- int line = MFC6_HASH(&in6addr_any, &in6addr_any);
- struct mfc6_cache *c;
-
- list_for_each_entry(c, &mrt->mfc6_cache_array[line], list)
- if (ipv6_addr_any(&c->mf6c_origin) &&
- ipv6_addr_any(&c->mf6c_mcastgrp) &&
- (c->mfc_un.res.ttls[mifi] < 255))
- return c;
+ struct mfc6_cache_cmp_arg arg = {
+ .mf6c_origin = *origin,
+ .mf6c_mcastgrp = *mcastgrp,
+ };
- return NULL;
+ return mr_mfc_find(mrt, &arg);
}
/* Look for a (*,G) entry */
-static struct mfc6_cache *ip6mr_cache_find_any(struct mr6_table *mrt,
+static struct mfc6_cache *ip6mr_cache_find_any(struct mr_table *mrt,
struct in6_addr *mcastgrp,
mifi_t mifi)
{
- int line = MFC6_HASH(mcastgrp, &in6addr_any);
- struct mfc6_cache *c, *proxy;
+ struct mfc6_cache_cmp_arg arg = {
+ .mf6c_origin = in6addr_any,
+ .mf6c_mcastgrp = *mcastgrp,
+ };
if (ipv6_addr_any(mcastgrp))
- goto skip;
-
- list_for_each_entry(c, &mrt->mfc6_cache_array[line], list)
- if (ipv6_addr_any(&c->mf6c_origin) &&
- ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp)) {
- if (c->mfc_un.res.ttls[mifi] < 255)
- return c;
-
- /* It's ok if the mifi is part of the static tree */
- proxy = ip6mr_cache_find_any_parent(mrt,
- c->mf6c_parent);
- if (proxy && proxy->mfc_un.res.ttls[mifi] < 255)
- return c;
- }
+ return mr_mfc_find_any_parent(mrt, mifi);
+ return mr_mfc_find_any(mrt, mifi, &arg);
+}
-skip:
- return ip6mr_cache_find_any_parent(mrt, mifi);
+/* Look for a (S,G,iif) entry if parent != -1 */
+static struct mfc6_cache *
+ip6mr_cache_find_parent(struct mr_table *mrt,
+ const struct in6_addr *origin,
+ const struct in6_addr *mcastgrp,
+ int parent)
+{
+ struct mfc6_cache_cmp_arg arg = {
+ .mf6c_origin = *origin,
+ .mf6c_mcastgrp = *mcastgrp,
+ };
+
+ return mr_mfc_find_parent(mrt, &arg, parent);
}
-/*
- * Allocate a multicast cache entry
- */
+/* Allocate a multicast cache entry */
static struct mfc6_cache *ip6mr_cache_alloc(void)
{
struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
if (!c)
return NULL;
- c->mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1;
- c->mfc_un.res.minvif = MAXMIFS;
+ c->_c.mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1;
+ c->_c.mfc_un.res.minvif = MAXMIFS;
return c;
}
struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
if (!c)
return NULL;
- skb_queue_head_init(&c->mfc_un.unres.unresolved);
- c->mfc_un.unres.expires = jiffies + 10 * HZ;
+ skb_queue_head_init(&c->_c.mfc_un.unres.unresolved);
+ c->_c.mfc_un.unres.expires = jiffies + 10 * HZ;
return c;
}
* A cache entry has gone into a resolved state from queued
*/
-static void ip6mr_cache_resolve(struct net *net, struct mr6_table *mrt,
+static void ip6mr_cache_resolve(struct net *net, struct mr_table *mrt,
struct mfc6_cache *uc, struct mfc6_cache *c)
{
struct sk_buff *skb;
* Play the pending entries through our router
*/
- while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
+ while ((skb = __skb_dequeue(&uc->_c.mfc_un.unres.unresolved))) {
if (ipv6_hdr(skb)->version == 0) {
struct nlmsghdr *nlh = skb_pull(skb,
sizeof(struct ipv6hdr));
- if (__ip6mr_fill_mroute(mrt, skb, c, nlmsg_data(nlh)) > 0) {
+ if (mr_fill_mroute(mrt, skb, &c->_c,
+ nlmsg_data(nlh)) > 0) {
nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh;
} else {
nlh->nlmsg_type = NLMSG_ERROR;
* Called under mrt_lock.
*/
-static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
+static int ip6mr_cache_report(struct mr_table *mrt, struct sk_buff *pkt,
mifi_t mifi, int assert)
{
+ struct sock *mroute6_sk;
struct sk_buff *skb;
struct mrt6msg *msg;
int ret;
skb->ip_summed = CHECKSUM_UNNECESSARY;
}
- if (!mrt->mroute6_sk) {
+ rcu_read_lock();
+ mroute6_sk = rcu_dereference(mrt->mroute_sk);
+ if (!mroute6_sk) {
+ rcu_read_unlock();
kfree_skb(skb);
return -EINVAL;
}
mrt6msg_netlink_event(mrt, skb);
- /*
- * Deliver to user space multicast routing algorithms
- */
- ret = sock_queue_rcv_skb(mrt->mroute6_sk, skb);
+ /* Deliver to user space multicast routing algorithms */
+ ret = sock_queue_rcv_skb(mroute6_sk, skb);
+ rcu_read_unlock();
if (ret < 0) {
net_warn_ratelimited("mroute6: pending queue full, dropping entries\n");
kfree_skb(skb);
return ret;
}
-/*
- * Queue a packet for resolution. It gets locked cache entry!
- */
-
-static int
-ip6mr_cache_unresolved(struct mr6_table *mrt, mifi_t mifi, struct sk_buff *skb)
+/* Queue a packet for resolution. It gets locked cache entry! */
+static int ip6mr_cache_unresolved(struct mr_table *mrt, mifi_t mifi,
+ struct sk_buff *skb)
{
+ struct mfc6_cache *c;
bool found = false;
int err;
- struct mfc6_cache *c;
spin_lock_bh(&mfc_unres_lock);
- list_for_each_entry(c, &mrt->mfc6_unres_queue, list) {
+ list_for_each_entry(c, &mrt->mfc_unres_queue, _c.list) {
if (ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) &&
ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr)) {
found = true;
return -ENOBUFS;
}
- /*
- * Fill in the new cache entry
- */
- c->mf6c_parent = -1;
+ /* Fill in the new cache entry */
+ c->_c.mfc_parent = -1;
c->mf6c_origin = ipv6_hdr(skb)->saddr;
c->mf6c_mcastgrp = ipv6_hdr(skb)->daddr;
}
atomic_inc(&mrt->cache_resolve_queue_len);
- list_add(&c->list, &mrt->mfc6_unres_queue);
+ list_add(&c->_c.list, &mrt->mfc_unres_queue);
mr6_netlink_event(mrt, c, RTM_NEWROUTE);
ipmr_do_expire_process(mrt);
}
- /*
- * See if we can append the packet
- */
- if (c->mfc_un.unres.unresolved.qlen > 3) {
+ /* See if we can append the packet */
+ if (c->_c.mfc_un.unres.unresolved.qlen > 3) {
kfree_skb(skb);
err = -ENOBUFS;
} else {
- skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
+ skb_queue_tail(&c->_c.mfc_un.unres.unresolved, skb);
err = 0;
}
* MFC6 cache manipulation by user space
*/
-static int ip6mr_mfc_delete(struct mr6_table *mrt, struct mf6cctl *mfc,
+static int ip6mr_mfc_delete(struct mr_table *mrt, struct mf6cctl *mfc,
int parent)
{
- int line;
- struct mfc6_cache *c, *next;
-
- line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
+ struct mfc6_cache *c;
- list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[line], list) {
- if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
- ipv6_addr_equal(&c->mf6c_mcastgrp,
- &mfc->mf6cc_mcastgrp.sin6_addr) &&
- (parent == -1 || parent == c->mf6c_parent)) {
- write_lock_bh(&mrt_lock);
- list_del(&c->list);
- write_unlock_bh(&mrt_lock);
+ /* The entries are added/deleted only under RTNL */
+ rcu_read_lock();
+ c = ip6mr_cache_find_parent(mrt, &mfc->mf6cc_origin.sin6_addr,
+ &mfc->mf6cc_mcastgrp.sin6_addr, parent);
+ rcu_read_unlock();
+ if (!c)
+ return -ENOENT;
+ rhltable_remove(&mrt->mfc_hash, &c->_c.mnode, ip6mr_rht_params);
+ list_del_rcu(&c->_c.list);
- mr6_netlink_event(mrt, c, RTM_DELROUTE);
- ip6mr_cache_free(c);
- return 0;
- }
- }
- return -ENOENT;
+ mr6_netlink_event(mrt, c, RTM_DELROUTE);
+ ip6mr_cache_free(c);
+ return 0;
}
static int ip6mr_device_event(struct notifier_block *this,
{
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
struct net *net = dev_net(dev);
- struct mr6_table *mrt;
- struct mif_device *v;
+ struct mr_table *mrt;
+ struct vif_device *v;
int ct;
if (event != NETDEV_UNREGISTER)
return NOTIFY_DONE;
ip6mr_for_each_table(mrt, net) {
- v = &mrt->vif6_table[0];
+ v = &mrt->vif_table[0];
for (ct = 0; ct < mrt->maxvif; ct++, v++) {
if (v->dev == dev)
mif6_delete(mrt, ct, 1, NULL);
static struct pernet_operations ip6mr_net_ops = {
.init = ip6mr_net_init,
.exit = ip6mr_net_exit,
+ .async = true,
};
int __init ip6_mr_init(void)
kmem_cache_destroy(mrt_cachep);
}
-static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt,
+static int ip6mr_mfc_add(struct net *net, struct mr_table *mrt,
struct mf6cctl *mfc, int mrtsock, int parent)
{
- bool found = false;
- int line;
- struct mfc6_cache *uc, *c;
unsigned char ttls[MAXMIFS];
- int i;
+ struct mfc6_cache *uc, *c;
+ struct mr_mfc *_uc;
+ bool found;
+ int i, err;
if (mfc->mf6cc_parent >= MAXMIFS)
return -ENFILE;
for (i = 0; i < MAXMIFS; i++) {
if (IF_ISSET(i, &mfc->mf6cc_ifset))
ttls[i] = 1;
-
- }
-
- line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
-
- list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) {
- if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
- ipv6_addr_equal(&c->mf6c_mcastgrp,
- &mfc->mf6cc_mcastgrp.sin6_addr) &&
- (parent == -1 || parent == mfc->mf6cc_parent)) {
- found = true;
- break;
- }
}
- if (found) {
+ /* The entries are added/deleted only under RTNL */
+ rcu_read_lock();
+ c = ip6mr_cache_find_parent(mrt, &mfc->mf6cc_origin.sin6_addr,
+ &mfc->mf6cc_mcastgrp.sin6_addr, parent);
+ rcu_read_unlock();
+ if (c) {
write_lock_bh(&mrt_lock);
- c->mf6c_parent = mfc->mf6cc_parent;
- ip6mr_update_thresholds(mrt, c, ttls);
+ c->_c.mfc_parent = mfc->mf6cc_parent;
+ ip6mr_update_thresholds(mrt, &c->_c, ttls);
if (!mrtsock)
- c->mfc_flags |= MFC_STATIC;
+ c->_c.mfc_flags |= MFC_STATIC;
write_unlock_bh(&mrt_lock);
mr6_netlink_event(mrt, c, RTM_NEWROUTE);
return 0;
c->mf6c_origin = mfc->mf6cc_origin.sin6_addr;
c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr;
- c->mf6c_parent = mfc->mf6cc_parent;
- ip6mr_update_thresholds(mrt, c, ttls);
+ c->_c.mfc_parent = mfc->mf6cc_parent;
+ ip6mr_update_thresholds(mrt, &c->_c, ttls);
if (!mrtsock)
- c->mfc_flags |= MFC_STATIC;
+ c->_c.mfc_flags |= MFC_STATIC;
- write_lock_bh(&mrt_lock);
- list_add(&c->list, &mrt->mfc6_cache_array[line]);
- write_unlock_bh(&mrt_lock);
+ err = rhltable_insert_key(&mrt->mfc_hash, &c->cmparg, &c->_c.mnode,
+ ip6mr_rht_params);
+ if (err) {
+ pr_err("ip6mr: rhtable insert error %d\n", err);
+ ip6mr_cache_free(c);
+ return err;
+ }
+ list_add_tail_rcu(&c->_c.list, &mrt->mfc_cache_list);
- /*
- * Check to see if we resolved a queued list. If so we
- * need to send on the frames and tidy up.
+ /* Check to see if we resolved a queued list. If so we
+ * need to send on the frames and tidy up.
*/
found = false;
spin_lock_bh(&mfc_unres_lock);
- list_for_each_entry(uc, &mrt->mfc6_unres_queue, list) {
+ list_for_each_entry(_uc, &mrt->mfc_unres_queue, list) {
+ uc = (struct mfc6_cache *)_uc;
if (ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) &&
ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) {
- list_del(&uc->list);
+ list_del(&_uc->list);
atomic_dec(&mrt->cache_resolve_queue_len);
found = true;
break;
}
}
- if (list_empty(&mrt->mfc6_unres_queue))
+ if (list_empty(&mrt->mfc_unres_queue))
del_timer(&mrt->ipmr_expire_timer);
spin_unlock_bh(&mfc_unres_lock);
* Close the multicast socket, and clear the vif tables etc
*/
-static void mroute_clean_tables(struct mr6_table *mrt, bool all)
+static void mroute_clean_tables(struct mr_table *mrt, bool all)
{
- int i;
+ struct mr_mfc *c, *tmp;
LIST_HEAD(list);
- struct mfc6_cache *c, *next;
+ int i;
- /*
- * Shut down all active vif entries
- */
+ /* Shut down all active vif entries */
for (i = 0; i < mrt->maxvif; i++) {
- if (!all && (mrt->vif6_table[i].flags & VIFF_STATIC))
+ if (!all && (mrt->vif_table[i].flags & VIFF_STATIC))
continue;
mif6_delete(mrt, i, 0, &list);
}
unregister_netdevice_many(&list);
- /*
- * Wipe the cache
- */
- for (i = 0; i < MFC6_LINES; i++) {
- list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[i], list) {
- if (!all && (c->mfc_flags & MFC_STATIC))
- continue;
- write_lock_bh(&mrt_lock);
- list_del(&c->list);
- write_unlock_bh(&mrt_lock);
-
- mr6_netlink_event(mrt, c, RTM_DELROUTE);
- ip6mr_cache_free(c);
- }
+ /* Wipe the cache */
+ list_for_each_entry_safe(c, tmp, &mrt->mfc_cache_list, list) {
+ if (!all && (c->mfc_flags & MFC_STATIC))
+ continue;
+ rhltable_remove(&mrt->mfc_hash, &c->mnode, ip6mr_rht_params);
+ list_del_rcu(&c->list);
+ mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE);
+ ip6mr_cache_free((struct mfc6_cache *)c);
}
if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
spin_lock_bh(&mfc_unres_lock);
- list_for_each_entry_safe(c, next, &mrt->mfc6_unres_queue, list) {
+ list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue, list) {
list_del(&c->list);
- mr6_netlink_event(mrt, c, RTM_DELROUTE);
- ip6mr_destroy_unres(mrt, c);
+ mr6_netlink_event(mrt, (struct mfc6_cache *)c,
+ RTM_DELROUTE);
+ ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c);
}
spin_unlock_bh(&mfc_unres_lock);
}
}
-static int ip6mr_sk_init(struct mr6_table *mrt, struct sock *sk)
+static int ip6mr_sk_init(struct mr_table *mrt, struct sock *sk)
{
int err = 0;
struct net *net = sock_net(sk);
rtnl_lock();
write_lock_bh(&mrt_lock);
- if (likely(mrt->mroute6_sk == NULL)) {
- mrt->mroute6_sk = sk;
- net->ipv6.devconf_all->mc_forwarding++;
- } else {
+ if (rtnl_dereference(mrt->mroute_sk)) {
err = -EADDRINUSE;
+ } else {
+ rcu_assign_pointer(mrt->mroute_sk, sk);
+ net->ipv6.devconf_all->mc_forwarding++;
}
write_unlock_bh(&mrt_lock);
{
int err = -EACCES;
struct net *net = sock_net(sk);
- struct mr6_table *mrt;
+ struct mr_table *mrt;
if (sk->sk_type != SOCK_RAW ||
inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
rtnl_lock();
ip6mr_for_each_table(mrt, net) {
- if (sk == mrt->mroute6_sk) {
+ if (sk == rtnl_dereference(mrt->mroute_sk)) {
write_lock_bh(&mrt_lock);
- mrt->mroute6_sk = NULL;
+ RCU_INIT_POINTER(mrt->mroute_sk, NULL);
net->ipv6.devconf_all->mc_forwarding--;
write_unlock_bh(&mrt_lock);
inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
}
}
rtnl_unlock();
+ synchronize_rcu();
return err;
}
-struct sock *mroute6_socket(struct net *net, struct sk_buff *skb)
+bool mroute6_is_socket(struct net *net, struct sk_buff *skb)
{
- struct mr6_table *mrt;
+ struct mr_table *mrt;
struct flowi6 fl6 = {
.flowi6_iif = skb->skb_iif ? : LOOPBACK_IFINDEX,
.flowi6_oif = skb->dev->ifindex,
if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
return NULL;
- return mrt->mroute6_sk;
+ return rcu_access_pointer(mrt->mroute_sk);
}
+EXPORT_SYMBOL(mroute6_is_socket);
/*
* Socket options and virtual interface manipulation. The whole
struct mf6cctl mfc;
mifi_t mifi;
struct net *net = sock_net(sk);
- struct mr6_table *mrt;
+ struct mr_table *mrt;
if (sk->sk_type != SOCK_RAW ||
inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
return -ENOENT;
if (optname != MRT6_INIT) {
- if (sk != mrt->mroute6_sk && !ns_capable(net->user_ns, CAP_NET_ADMIN))
+ if (sk != rcu_access_pointer(mrt->mroute_sk) &&
+ !ns_capable(net->user_ns, CAP_NET_ADMIN))
return -EACCES;
}
if (vif.mif6c_mifi >= MAXMIFS)
return -ENFILE;
rtnl_lock();
- ret = mif6_add(net, mrt, &vif, sk == mrt->mroute6_sk);
+ ret = mif6_add(net, mrt, &vif,
+ sk == rtnl_dereference(mrt->mroute_sk));
rtnl_unlock();
return ret;
ret = ip6mr_mfc_delete(mrt, &mfc, parent);
else
ret = ip6mr_mfc_add(net, mrt, &mfc,
- sk == mrt->mroute6_sk, parent);
+ sk ==
+ rtnl_dereference(mrt->mroute_sk),
+ parent);
rtnl_unlock();
return ret;
/* "pim6reg%u" should not exceed 16 bytes (IFNAMSIZ) */
if (v != RT_TABLE_DEFAULT && v >= 100000000)
return -EINVAL;
- if (sk == mrt->mroute6_sk)
+ if (sk == rcu_access_pointer(mrt->mroute_sk))
return -EBUSY;
rtnl_lock();
int olr;
int val;
struct net *net = sock_net(sk);
- struct mr6_table *mrt;
+ struct mr_table *mrt;
if (sk->sk_type != SOCK_RAW ||
inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
{
struct sioc_sg_req6 sr;
struct sioc_mif_req6 vr;
- struct mif_device *vif;
+ struct vif_device *vif;
struct mfc6_cache *c;
struct net *net = sock_net(sk);
- struct mr6_table *mrt;
+ struct mr_table *mrt;
mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
if (!mrt)
if (vr.mifi >= mrt->maxvif)
return -EINVAL;
read_lock(&mrt_lock);
- vif = &mrt->vif6_table[vr.mifi];
- if (MIF_EXISTS(mrt, vr.mifi)) {
+ vif = &mrt->vif_table[vr.mifi];
+ if (VIF_EXISTS(mrt, vr.mifi)) {
vr.icount = vif->pkt_in;
vr.ocount = vif->pkt_out;
vr.ibytes = vif->bytes_in;
if (copy_from_user(&sr, arg, sizeof(sr)))
return -EFAULT;
- read_lock(&mrt_lock);
+ rcu_read_lock();
c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
if (c) {
- sr.pktcnt = c->mfc_un.res.pkt;
- sr.bytecnt = c->mfc_un.res.bytes;
- sr.wrong_if = c->mfc_un.res.wrong_if;
- read_unlock(&mrt_lock);
+ sr.pktcnt = c->_c.mfc_un.res.pkt;
+ sr.bytecnt = c->_c.mfc_un.res.bytes;
+ sr.wrong_if = c->_c.mfc_un.res.wrong_if;
+ rcu_read_unlock();
if (copy_to_user(arg, &sr, sizeof(sr)))
return -EFAULT;
return 0;
}
- read_unlock(&mrt_lock);
+ rcu_read_unlock();
return -EADDRNOTAVAIL;
default:
return -ENOIOCTLCMD;
{
struct compat_sioc_sg_req6 sr;
struct compat_sioc_mif_req6 vr;
- struct mif_device *vif;
+ struct vif_device *vif;
struct mfc6_cache *c;
struct net *net = sock_net(sk);
- struct mr6_table *mrt;
+ struct mr_table *mrt;
mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
if (!mrt)
if (vr.mifi >= mrt->maxvif)
return -EINVAL;
read_lock(&mrt_lock);
- vif = &mrt->vif6_table[vr.mifi];
- if (MIF_EXISTS(mrt, vr.mifi)) {
+ vif = &mrt->vif_table[vr.mifi];
+ if (VIF_EXISTS(mrt, vr.mifi)) {
vr.icount = vif->pkt_in;
vr.ocount = vif->pkt_out;
vr.ibytes = vif->bytes_in;
if (copy_from_user(&sr, arg, sizeof(sr)))
return -EFAULT;
- read_lock(&mrt_lock);
+ rcu_read_lock();
c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
if (c) {
- sr.pktcnt = c->mfc_un.res.pkt;
- sr.bytecnt = c->mfc_un.res.bytes;
- sr.wrong_if = c->mfc_un.res.wrong_if;
- read_unlock(&mrt_lock);
+ sr.pktcnt = c->_c.mfc_un.res.pkt;
+ sr.bytecnt = c->_c.mfc_un.res.bytes;
+ sr.wrong_if = c->_c.mfc_un.res.wrong_if;
+ rcu_read_unlock();
if (copy_to_user(arg, &sr, sizeof(sr)))
return -EFAULT;
return 0;
}
- read_unlock(&mrt_lock);
+ rcu_read_unlock();
return -EADDRNOTAVAIL;
default:
return -ENOIOCTLCMD;
* Processing handlers for ip6mr_forward
*/
-static int ip6mr_forward2(struct net *net, struct mr6_table *mrt,
+static int ip6mr_forward2(struct net *net, struct mr_table *mrt,
struct sk_buff *skb, struct mfc6_cache *c, int vifi)
{
struct ipv6hdr *ipv6h;
- struct mif_device *vif = &mrt->vif6_table[vifi];
+ struct vif_device *vif = &mrt->vif_table[vifi];
struct net_device *dev;
struct dst_entry *dst;
struct flowi6 fl6;
return 0;
}
-static int ip6mr_find_vif(struct mr6_table *mrt, struct net_device *dev)
+static int ip6mr_find_vif(struct mr_table *mrt, struct net_device *dev)
{
int ct;
for (ct = mrt->maxvif - 1; ct >= 0; ct--) {
- if (mrt->vif6_table[ct].dev == dev)
+ if (mrt->vif_table[ct].dev == dev)
break;
}
return ct;
}
-static void ip6_mr_forward(struct net *net, struct mr6_table *mrt,
- struct sk_buff *skb, struct mfc6_cache *cache)
+static void ip6_mr_forward(struct net *net, struct mr_table *mrt,
+ struct sk_buff *skb, struct mfc6_cache *c)
{
int psend = -1;
int vif, ct;
int true_vifi = ip6mr_find_vif(mrt, skb->dev);
- vif = cache->mf6c_parent;
- cache->mfc_un.res.pkt++;
- cache->mfc_un.res.bytes += skb->len;
- cache->mfc_un.res.lastuse = jiffies;
+ vif = c->_c.mfc_parent;
+ c->_c.mfc_un.res.pkt++;
+ c->_c.mfc_un.res.bytes += skb->len;
+ c->_c.mfc_un.res.lastuse = jiffies;
- if (ipv6_addr_any(&cache->mf6c_origin) && true_vifi >= 0) {
+ if (ipv6_addr_any(&c->mf6c_origin) && true_vifi >= 0) {
struct mfc6_cache *cache_proxy;
/* For an (*,G) entry, we only check that the incoming
* interface is part of the static tree.
*/
- cache_proxy = ip6mr_cache_find_any_parent(mrt, vif);
+ rcu_read_lock();
+ cache_proxy = mr_mfc_find_any_parent(mrt, vif);
if (cache_proxy &&
- cache_proxy->mfc_un.res.ttls[true_vifi] < 255)
+ cache_proxy->_c.mfc_un.res.ttls[true_vifi] < 255) {
+ rcu_read_unlock();
goto forward;
+ }
+ rcu_read_unlock();
}
/*
* Wrong interface: drop packet and (maybe) send PIM assert.
*/
- if (mrt->vif6_table[vif].dev != skb->dev) {
- cache->mfc_un.res.wrong_if++;
+ if (mrt->vif_table[vif].dev != skb->dev) {
+ c->_c.mfc_un.res.wrong_if++;
if (true_vifi >= 0 && mrt->mroute_do_assert &&
/* pimsm uses asserts, when switching from RPT to SPT,
large chunk of pimd to kernel. Ough... --ANK
*/
(mrt->mroute_do_pim ||
- cache->mfc_un.res.ttls[true_vifi] < 255) &&
+ c->_c.mfc_un.res.ttls[true_vifi] < 255) &&
time_after(jiffies,
- cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
- cache->mfc_un.res.last_assert = jiffies;
+ c->_c.mfc_un.res.last_assert +
+ MFC_ASSERT_THRESH)) {
+ c->_c.mfc_un.res.last_assert = jiffies;
ip6mr_cache_report(mrt, skb, true_vifi, MRT6MSG_WRONGMIF);
}
goto dont_forward;
}
forward:
- mrt->vif6_table[vif].pkt_in++;
- mrt->vif6_table[vif].bytes_in += skb->len;
+ mrt->vif_table[vif].pkt_in++;
+ mrt->vif_table[vif].bytes_in += skb->len;
/*
* Forward the frame
*/
- if (ipv6_addr_any(&cache->mf6c_origin) &&
- ipv6_addr_any(&cache->mf6c_mcastgrp)) {
+ if (ipv6_addr_any(&c->mf6c_origin) &&
+ ipv6_addr_any(&c->mf6c_mcastgrp)) {
if (true_vifi >= 0 &&
- true_vifi != cache->mf6c_parent &&
+ true_vifi != c->_c.mfc_parent &&
ipv6_hdr(skb)->hop_limit >
- cache->mfc_un.res.ttls[cache->mf6c_parent]) {
+ c->_c.mfc_un.res.ttls[c->_c.mfc_parent]) {
/* It's an (*,*) entry and the packet is not coming from
* the upstream: forward the packet to the upstream
* only.
*/
- psend = cache->mf6c_parent;
+ psend = c->_c.mfc_parent;
goto last_forward;
}
goto dont_forward;
}
- for (ct = cache->mfc_un.res.maxvif - 1; ct >= cache->mfc_un.res.minvif; ct--) {
+ for (ct = c->_c.mfc_un.res.maxvif - 1;
+ ct >= c->_c.mfc_un.res.minvif; ct--) {
/* For (*,G) entry, don't forward to the incoming interface */
- if ((!ipv6_addr_any(&cache->mf6c_origin) || ct != true_vifi) &&
- ipv6_hdr(skb)->hop_limit > cache->mfc_un.res.ttls[ct]) {
+ if ((!ipv6_addr_any(&c->mf6c_origin) || ct != true_vifi) &&
+ ipv6_hdr(skb)->hop_limit > c->_c.mfc_un.res.ttls[ct]) {
if (psend != -1) {
struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
if (skb2)
- ip6mr_forward2(net, mrt, skb2, cache, psend);
+ ip6mr_forward2(net, mrt, skb2,
+ c, psend);
}
psend = ct;
}
}
last_forward:
if (psend != -1) {
- ip6mr_forward2(net, mrt, skb, cache, psend);
+ ip6mr_forward2(net, mrt, skb, c, psend);
return;
}
{
struct mfc6_cache *cache;
struct net *net = dev_net(skb->dev);
- struct mr6_table *mrt;
+ struct mr_table *mrt;
struct flowi6 fl6 = {
.flowi6_iif = skb->dev->ifindex,
.flowi6_mark = skb->mark,
return 0;
}
-
-static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
- struct mfc6_cache *c, struct rtmsg *rtm)
-{
- struct rta_mfc_stats mfcs;
- struct nlattr *mp_attr;
- struct rtnexthop *nhp;
- unsigned long lastuse;
- int ct;
-
- /* If cache is unresolved, don't try to parse IIF and OIF */
- if (c->mf6c_parent >= MAXMIFS) {
- rtm->rtm_flags |= RTNH_F_UNRESOLVED;
- return -ENOENT;
- }
-
- if (MIF_EXISTS(mrt, c->mf6c_parent) &&
- nla_put_u32(skb, RTA_IIF, mrt->vif6_table[c->mf6c_parent].dev->ifindex) < 0)
- return -EMSGSIZE;
- mp_attr = nla_nest_start(skb, RTA_MULTIPATH);
- if (!mp_attr)
- return -EMSGSIZE;
-
- for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
- if (MIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) {
- nhp = nla_reserve_nohdr(skb, sizeof(*nhp));
- if (!nhp) {
- nla_nest_cancel(skb, mp_attr);
- return -EMSGSIZE;
- }
-
- nhp->rtnh_flags = 0;
- nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
- nhp->rtnh_ifindex = mrt->vif6_table[ct].dev->ifindex;
- nhp->rtnh_len = sizeof(*nhp);
- }
- }
-
- nla_nest_end(skb, mp_attr);
-
- lastuse = READ_ONCE(c->mfc_un.res.lastuse);
- lastuse = time_after_eq(jiffies, lastuse) ? jiffies - lastuse : 0;
-
- mfcs.mfcs_packets = c->mfc_un.res.pkt;
- mfcs.mfcs_bytes = c->mfc_un.res.bytes;
- mfcs.mfcs_wrong_if = c->mfc_un.res.wrong_if;
- if (nla_put_64bit(skb, RTA_MFC_STATS, sizeof(mfcs), &mfcs, RTA_PAD) ||
- nla_put_u64_64bit(skb, RTA_EXPIRES, jiffies_to_clock_t(lastuse),
- RTA_PAD))
- return -EMSGSIZE;
-
- rtm->rtm_type = RTN_MULTICAST;
- return 1;
-}
-
int ip6mr_get_route(struct net *net, struct sk_buff *skb, struct rtmsg *rtm,
u32 portid)
{
int err;
- struct mr6_table *mrt;
+ struct mr_table *mrt;
struct mfc6_cache *cache;
struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
return err;
}
- if (rtm->rtm_flags & RTM_F_NOTIFY)
- cache->mfc_flags |= MFC_NOTIFY;
-
- err = __ip6mr_fill_mroute(mrt, skb, cache, rtm);
+ err = mr_fill_mroute(mrt, skb, &cache->_c, rtm);
read_unlock(&mrt_lock);
return err;
}
-static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
+static int ip6mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
u32 portid, u32 seq, struct mfc6_cache *c, int cmd,
int flags)
{
goto nla_put_failure;
rtm->rtm_type = RTN_MULTICAST;
rtm->rtm_scope = RT_SCOPE_UNIVERSE;
- if (c->mfc_flags & MFC_STATIC)
+ if (c->_c.mfc_flags & MFC_STATIC)
rtm->rtm_protocol = RTPROT_STATIC;
else
rtm->rtm_protocol = RTPROT_MROUTED;
if (nla_put_in6_addr(skb, RTA_SRC, &c->mf6c_origin) ||
nla_put_in6_addr(skb, RTA_DST, &c->mf6c_mcastgrp))
goto nla_put_failure;
- err = __ip6mr_fill_mroute(mrt, skb, c, rtm);
+ err = mr_fill_mroute(mrt, skb, &c->_c, rtm);
/* do not break the dump if cache is unresolved */
if (err < 0 && err != -ENOENT)
goto nla_put_failure;
return -EMSGSIZE;
}
+static int _ip6mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
+ u32 portid, u32 seq, struct mr_mfc *c,
+ int cmd, int flags)
+{
+ return ip6mr_fill_mroute(mrt, skb, portid, seq, (struct mfc6_cache *)c,
+ cmd, flags);
+}
+
static int mr6_msgsize(bool unresolved, int maxvif)
{
size_t len =
return len;
}
-static void mr6_netlink_event(struct mr6_table *mrt, struct mfc6_cache *mfc,
+static void mr6_netlink_event(struct mr_table *mrt, struct mfc6_cache *mfc,
int cmd)
{
struct net *net = read_pnet(&mrt->net);
struct sk_buff *skb;
int err = -ENOBUFS;
- skb = nlmsg_new(mr6_msgsize(mfc->mf6c_parent >= MAXMIFS, mrt->maxvif),
+ skb = nlmsg_new(mr6_msgsize(mfc->_c.mfc_parent >= MAXMIFS, mrt->maxvif),
GFP_ATOMIC);
if (!skb)
goto errout;
return len;
}
-static void mrt6msg_netlink_event(struct mr6_table *mrt, struct sk_buff *pkt)
+static void mrt6msg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt)
{
struct net *net = read_pnet(&mrt->net);
struct nlmsghdr *nlh;
static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
{
- struct net *net = sock_net(skb->sk);
- struct mr6_table *mrt;
- struct mfc6_cache *mfc;
- unsigned int t = 0, s_t;
- unsigned int h = 0, s_h;
- unsigned int e = 0, s_e;
-
- s_t = cb->args[0];
- s_h = cb->args[1];
- s_e = cb->args[2];
-
- read_lock(&mrt_lock);
- ip6mr_for_each_table(mrt, net) {
- if (t < s_t)
- goto next_table;
- if (t > s_t)
- s_h = 0;
- for (h = s_h; h < MFC6_LINES; h++) {
- list_for_each_entry(mfc, &mrt->mfc6_cache_array[h], list) {
- if (e < s_e)
- goto next_entry;
- if (ip6mr_fill_mroute(mrt, skb,
- NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq,
- mfc, RTM_NEWROUTE,
- NLM_F_MULTI) < 0)
- goto done;
-next_entry:
- e++;
- }
- e = s_e = 0;
- }
- spin_lock_bh(&mfc_unres_lock);
- list_for_each_entry(mfc, &mrt->mfc6_unres_queue, list) {
- if (e < s_e)
- goto next_entry2;
- if (ip6mr_fill_mroute(mrt, skb,
- NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq,
- mfc, RTM_NEWROUTE,
- NLM_F_MULTI) < 0) {
- spin_unlock_bh(&mfc_unres_lock);
- goto done;
- }
-next_entry2:
- e++;
- }
- spin_unlock_bh(&mfc_unres_lock);
- e = s_e = 0;
- s_h = 0;
-next_table:
- t++;
- }
-done:
- read_unlock(&mrt_lock);
-
- cb->args[2] = e;
- cb->args[1] = h;
- cb->args[0] = t;
-
- return skb->len;
+ return mr_rtm_dumproute(skb, cb, ip6mr_mr_table_iter,
+ _ip6mr_fill_mroute, &mfc_unres_lock);
}
if (get_user(len, optlen))
return -EFAULT;
- lock_sock(sk);
- err = nf_getsockopt(sk, PF_INET6, optname, optval,
- &len);
- release_sock(sk);
+ err = nf_getsockopt(sk, PF_INET6, optname, optval, &len);
if (err >= 0)
err = put_user(len, optlen);
}
if (get_user(len, optlen))
return -EFAULT;
- lock_sock(sk);
- err = compat_nf_getsockopt(sk, PF_INET6,
- optname, optval, &len);
- release_sock(sk);
+ err = compat_nf_getsockopt(sk, PF_INET6, optname, optval, &len);
if (err >= 0)
err = put_user(len, optlen);
}
}
EXPORT_SYMBOL(compat_ipv6_getsockopt);
#endif
-
if (ifindex == 0) {
struct rt6_info *rt;
- rt = rt6_lookup(net, addr, NULL, 0, 0);
+ rt = rt6_lookup(net, addr, NULL, 0, NULL, 0);
if (rt) {
dev = rt->dst.dev;
ip6_rt_put(rt);
struct inet6_dev *idev = NULL;
if (ifindex == 0) {
- struct rt6_info *rt = rt6_lookup(net, group, NULL, 0, 0);
+ struct rt6_info *rt = rt6_lookup(net, group, NULL, 0, NULL, 0);
if (rt) {
dev = rt->dst.dev;
static struct pernet_operations igmp6_net_ops = {
.init = igmp6_net_init,
.exit = igmp6_net_exit,
+ .async = true,
};
int __init igmp6_init(void)
static struct pernet_operations ndisc_net_ops = {
.init = ndisc_net_init,
.exit = ndisc_net_exit,
+ .async = true,
};
int __init ndisc_init(void)
}
if (table_base + v != ip6t_next_entry(e) &&
!(e->ipv6.flags & IP6T_F_GOTO)) {
+ if (unlikely(stackidx >= private->stacksize)) {
+ verdict = NF_DROP;
+ break;
+ }
jumpstack[stackidx++] = e;
}
const struct ip6t_entry *e = par->entryinfo;
if (rejinfo->with == IP6T_ICMP6_ECHOREPLY) {
- pr_info("ECHOREPLY is not supported.\n");
+ pr_info_ratelimited("ECHOREPLY is not supported\n");
return -EINVAL;
} else if (rejinfo->with == IP6T_TCP_RESET) {
/* Must specify that it's a TCP packet */
if (!(e->ipv6.flags & IP6T_F_PROTO) ||
e->ipv6.proto != IPPROTO_TCP ||
(e->ipv6.invflags & XT_INV_PROTO)) {
- pr_info("TCP_RESET illegal for non-tcp\n");
+ pr_info_ratelimited("TCP_RESET illegal for non-tcp\n");
return -EINVAL;
}
}
lookup_flags |= RT6_LOOKUP_F_IFACE;
}
- rt = (void *) ip6_route_lookup(net, &fl6, lookup_flags);
+ rt = (void *)ip6_route_lookup(net, &fl6, skb, lookup_flags);
if (rt->dst.error)
goto out;
unsigned int options = ~XT_RPFILTER_OPTION_MASK;
if (info->flags & options) {
- pr_info("unknown options encountered");
+ pr_info_ratelimited("unknown options\n");
return -EINVAL;
}
if (strcmp(par->table, "mangle") != 0 &&
strcmp(par->table, "raw") != 0) {
- pr_info("match only valid in the \'raw\' "
- "or \'mangle\' tables, not \'%s\'.\n", par->table);
+ pr_info_ratelimited("only valid in \'raw\' or \'mangle\' table, not \'%s\'\n",
+ par->table);
return -EINVAL;
}
const struct ip6t_srh *srhinfo = par->matchinfo;
if (srhinfo->mt_flags & ~IP6T_SRH_MASK) {
- pr_err("unknown srh match flags %X\n", srhinfo->mt_flags);
+ pr_info_ratelimited("unknown srh match flags %X\n",
+ srhinfo->mt_flags);
return -EINVAL;
}
if (srhinfo->mt_invflags & ~IP6T_SRH_INV_MASK) {
- pr_err("unknown srh invflags %X\n", srhinfo->mt_invflags);
+ pr_info_ratelimited("unknown srh invflags %X\n",
+ srhinfo->mt_invflags);
return -EINVAL;
}
static struct pernet_operations defrag6_net_ops = {
.exit = defrag6_net_exit,
+ .async = true,
};
static int __init nf_defrag_init(void)
*dest = 0;
again:
- rt = (void *)ip6_route_lookup(nft_net(pkt), &fl6, lookup_flags);
+ rt = (void *)ip6_route_lookup(nft_net(pkt), &fl6, pkt->skb,
+ lookup_flags);
if (rt->dst.error)
goto put_rt_err;
static struct pernet_operations ping_v6_net_ops = {
.init = ping_v6_proc_init_net,
.exit = ping_v6_proc_exit_net,
+ .async = true,
};
#endif
static struct pernet_operations ipv6_proc_ops = {
.init = ipv6_proc_init_net,
.exit = ipv6_proc_exit_net,
+ .async = true,
};
int __init ipv6_misc_proc_init(void)
{
unregister_pernet_subsys(&ipv6_proc_ops);
}
-
static struct pernet_operations raw6_net_ops = {
.init = raw6_init_net,
.exit = raw6_exit_net,
+ .async = true,
};
int __init raw6_proc_init(void)
static struct pernet_operations ip6_frags_ops = {
.init = ipv6_frags_init_net,
.exit = ipv6_frags_exit_net,
+ .async = true,
};
int __init ipv6_frag_init(void)
return false;
}
-static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
+static struct rt6_info *rt6_multipath_select(const struct net *net,
+ struct rt6_info *match,
struct flowi6 *fl6, int oif,
+ const struct sk_buff *skb,
int strict)
{
struct rt6_info *sibling, *next_sibling;
* case it will always be non-zero. Otherwise now is the time to do it.
*/
if (!fl6->mp_hash)
- fl6->mp_hash = rt6_multipath_hash(fl6, NULL);
+ fl6->mp_hash = rt6_multipath_hash(net, fl6, skb, NULL);
if (fl6->mp_hash <= atomic_read(&match->rt6i_nh_upper_bound))
return match;
static struct rt6_info *ip6_pol_route_lookup(struct net *net,
struct fib6_table *table,
- struct flowi6 *fl6, int flags)
+ struct flowi6 *fl6,
+ const struct sk_buff *skb,
+ int flags)
{
struct rt6_info *rt, *rt_cache;
struct fib6_node *fn;
rt = rt6_device_match(net, rt, &fl6->saddr,
fl6->flowi6_oif, flags);
if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
- rt = rt6_multipath_select(rt, fl6,
- fl6->flowi6_oif, flags);
+ rt = rt6_multipath_select(net, rt, fl6, fl6->flowi6_oif,
+ skb, flags);
}
if (rt == net->ipv6.ip6_null_entry) {
fn = fib6_backtrack(fn, &fl6->saddr);
}
struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
- int flags)
+ const struct sk_buff *skb, int flags)
{
- return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
+ return fib6_rule_lookup(net, fl6, skb, flags, ip6_pol_route_lookup);
}
EXPORT_SYMBOL_GPL(ip6_route_lookup);
struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
- const struct in6_addr *saddr, int oif, int strict)
+ const struct in6_addr *saddr, int oif,
+ const struct sk_buff *skb, int strict)
{
struct flowi6 fl6 = {
.flowi6_oif = oif,
flags |= RT6_LOOKUP_F_HAS_SADDR;
}
- dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
+ dst = fib6_rule_lookup(net, &fl6, skb, flags, ip6_pol_route_lookup);
if (dst->error == 0)
return (struct rt6_info *) dst;
}
struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
- int oif, struct flowi6 *fl6, int flags)
+ int oif, struct flowi6 *fl6,
+ const struct sk_buff *skb, int flags)
{
struct fib6_node *fn, *saved_fn;
struct rt6_info *rt, *rt_cache;
redo_rt6_select:
rt = rt6_select(net, fn, oif, strict);
if (rt->rt6i_nsiblings)
- rt = rt6_multipath_select(rt, fl6, oif, strict);
+ rt = rt6_multipath_select(net, rt, fl6, oif, skb, strict);
if (rt == net->ipv6.ip6_null_entry) {
fn = fib6_backtrack(fn, &fl6->saddr);
if (fn)
}
EXPORT_SYMBOL_GPL(ip6_pol_route);
-static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
- struct flowi6 *fl6, int flags)
+static struct rt6_info *ip6_pol_route_input(struct net *net,
+ struct fib6_table *table,
+ struct flowi6 *fl6,
+ const struct sk_buff *skb,
+ int flags)
{
- return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
+ return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, skb, flags);
}
struct dst_entry *ip6_route_input_lookup(struct net *net,
struct net_device *dev,
- struct flowi6 *fl6, int flags)
+ struct flowi6 *fl6,
+ const struct sk_buff *skb,
+ int flags)
{
if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
flags |= RT6_LOOKUP_F_IFACE;
- return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
+ return fib6_rule_lookup(net, fl6, skb, flags, ip6_pol_route_input);
}
EXPORT_SYMBOL_GPL(ip6_route_input_lookup);
static void ip6_multipath_l3_keys(const struct sk_buff *skb,
- struct flow_keys *keys)
+ struct flow_keys *keys,
+ struct flow_keys *flkeys)
{
const struct ipv6hdr *outer_iph = ipv6_hdr(skb);
const struct ipv6hdr *key_iph = outer_iph;
+ struct flow_keys *_flkeys = flkeys;
const struct ipv6hdr *inner_iph;
const struct icmp6hdr *icmph;
struct ipv6hdr _inner_iph;
goto out;
key_iph = inner_iph;
+ _flkeys = NULL;
out:
- memset(keys, 0, sizeof(*keys));
- keys->control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
- keys->addrs.v6addrs.src = key_iph->saddr;
- keys->addrs.v6addrs.dst = key_iph->daddr;
- keys->tags.flow_label = ip6_flowinfo(key_iph);
- keys->basic.ip_proto = key_iph->nexthdr;
+ if (_flkeys) {
+ keys->addrs.v6addrs.src = _flkeys->addrs.v6addrs.src;
+ keys->addrs.v6addrs.dst = _flkeys->addrs.v6addrs.dst;
+ keys->tags.flow_label = _flkeys->tags.flow_label;
+ keys->basic.ip_proto = _flkeys->basic.ip_proto;
+ } else {
+ keys->addrs.v6addrs.src = key_iph->saddr;
+ keys->addrs.v6addrs.dst = key_iph->daddr;
+ keys->tags.flow_label = ip6_flowinfo(key_iph);
+ keys->basic.ip_proto = key_iph->nexthdr;
+ }
}
/* if skb is set it will be used and fl6 can be NULL */
-u32 rt6_multipath_hash(const struct flowi6 *fl6, const struct sk_buff *skb)
+u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6,
+ const struct sk_buff *skb, struct flow_keys *flkeys)
{
struct flow_keys hash_keys;
+ u32 mhash;
- if (skb) {
- ip6_multipath_l3_keys(skb, &hash_keys);
- return flow_hash_from_keys(&hash_keys) >> 1;
+ switch (net->ipv6.sysctl.multipath_hash_policy) {
+ case 0:
+ memset(&hash_keys, 0, sizeof(hash_keys));
+ hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
+ if (skb) {
+ ip6_multipath_l3_keys(skb, &hash_keys, flkeys);
+ } else {
+ hash_keys.addrs.v6addrs.src = fl6->saddr;
+ hash_keys.addrs.v6addrs.dst = fl6->daddr;
+ hash_keys.tags.flow_label = (__force u32)fl6->flowlabel;
+ hash_keys.basic.ip_proto = fl6->flowi6_proto;
+ }
+ break;
+ case 1:
+ if (skb) {
+ unsigned int flag = FLOW_DISSECTOR_F_STOP_AT_ENCAP;
+ struct flow_keys keys;
+
+ /* short-circuit if we already have L4 hash present */
+ if (skb->l4_hash)
+ return skb_get_hash_raw(skb) >> 1;
+
+ memset(&hash_keys, 0, sizeof(hash_keys));
+
+ if (!flkeys) {
+ skb_flow_dissect_flow_keys(skb, &keys, flag);
+ flkeys = &keys;
+ }
+ hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
+ hash_keys.addrs.v6addrs.src = flkeys->addrs.v6addrs.src;
+ hash_keys.addrs.v6addrs.dst = flkeys->addrs.v6addrs.dst;
+ hash_keys.ports.src = flkeys->ports.src;
+ hash_keys.ports.dst = flkeys->ports.dst;
+ hash_keys.basic.ip_proto = flkeys->basic.ip_proto;
+ } else {
+ memset(&hash_keys, 0, sizeof(hash_keys));
+ hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
+ hash_keys.addrs.v6addrs.src = fl6->saddr;
+ hash_keys.addrs.v6addrs.dst = fl6->daddr;
+ hash_keys.ports.src = fl6->fl6_sport;
+ hash_keys.ports.dst = fl6->fl6_dport;
+ hash_keys.basic.ip_proto = fl6->flowi6_proto;
+ }
+ break;
}
+ mhash = flow_hash_from_keys(&hash_keys);
- return get_hash_from_flowi6(fl6) >> 1;
+ return mhash >> 1;
}
void ip6_route_input(struct sk_buff *skb)
.flowi6_mark = skb->mark,
.flowi6_proto = iph->nexthdr,
};
+ struct flow_keys *flkeys = NULL, _flkeys;
tun_info = skb_tunnel_info(skb);
if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
+
+ if (fib6_rules_early_flow_dissect(net, skb, &fl6, &_flkeys))
+ flkeys = &_flkeys;
+
if (unlikely(fl6.flowi6_proto == IPPROTO_ICMPV6))
- fl6.mp_hash = rt6_multipath_hash(&fl6, skb);
+ fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, flkeys);
skb_dst_drop(skb);
- skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
+ skb_dst_set(skb,
+ ip6_route_input_lookup(net, skb->dev, &fl6, skb, flags));
}
-static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
- struct flowi6 *fl6, int flags)
+static struct rt6_info *ip6_pol_route_output(struct net *net,
+ struct fib6_table *table,
+ struct flowi6 *fl6,
+ const struct sk_buff *skb,
+ int flags)
{
- return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
+ return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, skb, flags);
}
struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
else if (sk)
flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
- return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
+ return fib6_rule_lookup(net, fl6, NULL, flags, ip6_pol_route_output);
}
EXPORT_SYMBOL_GPL(ip6_route_output_flags);
static struct rt6_info *__ip6_route_redirect(struct net *net,
struct fib6_table *table,
struct flowi6 *fl6,
+ const struct sk_buff *skb,
int flags)
{
struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
};
static struct dst_entry *ip6_route_redirect(struct net *net,
- const struct flowi6 *fl6,
- const struct in6_addr *gateway)
+ const struct flowi6 *fl6,
+ const struct sk_buff *skb,
+ const struct in6_addr *gateway)
{
int flags = RT6_LOOKUP_F_HAS_SADDR;
struct ip6rd_flowi rdfl;
rdfl.fl6 = *fl6;
rdfl.gateway = *gateway;
- return fib6_rule_lookup(net, &rdfl.fl6,
+ return fib6_rule_lookup(net, &rdfl.fl6, skb,
flags, __ip6_route_redirect);
}
fl6.flowlabel = ip6_flowinfo(iph);
fl6.flowi6_uid = uid;
- dst = ip6_route_redirect(net, &fl6, &ipv6_hdr(skb)->saddr);
+ dst = ip6_route_redirect(net, &fl6, skb, &ipv6_hdr(skb)->saddr);
rt6_do_redirect(dst, NULL, skb);
dst_release(dst);
}
fl6.saddr = iph->daddr;
fl6.flowi6_uid = sock_net_uid(net, NULL);
- dst = ip6_route_redirect(net, &fl6, &iph->saddr);
+ dst = ip6_route_redirect(net, &fl6, skb, &iph->saddr);
rt6_do_redirect(dst, NULL, skb);
dst_release(dst);
}
flags |= RT6_LOOKUP_F_HAS_SADDR;
flags |= RT6_LOOKUP_F_IGNORE_LINKSTATE;
- rt = ip6_pol_route(net, table, cfg->fc_ifindex, &fl6, flags);
+ rt = ip6_pol_route(net, table, cfg->fc_ifindex, &fl6, NULL, flags);
/* if table lookup failed, fall back to full lookup */
if (rt == net->ipv6.ip6_null_entry) {
}
if (!grt)
- grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
+ grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, NULL, 1);
if (!grt)
goto out;
if (err)
goto out;
rt->dst.lwtstate = lwtstate_get(lwtstate);
- if (lwtunnel_output_redirect(rt->dst.lwtstate)) {
- rt->dst.lwtstate->orig_output = rt->dst.output;
- rt->dst.output = lwtunnel_output;
- }
- if (lwtunnel_input_redirect(rt->dst.lwtstate)) {
- rt->dst.lwtstate->orig_input = rt->dst.input;
- rt->dst.input = lwtunnel_input;
- }
+ lwtunnel_set_redirect(&rt->dst);
}
ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
if (!ipv6_addr_any(&fl6.saddr))
flags |= RT6_LOOKUP_F_HAS_SADDR;
- dst = ip6_route_input_lookup(net, dev, &fl6, flags);
+ dst = ip6_route_input_lookup(net, dev, &fl6, NULL, flags);
rcu_read_unlock();
} else {
static struct pernet_operations ip6_route_net_ops = {
.init = ip6_route_net_init,
.exit = ip6_route_net_exit,
+ .async = true,
};
static int __net_init ipv6_inetpeer_init(struct net *net)
static struct pernet_operations ipv6_inetpeer_ops = {
.init = ipv6_inetpeer_init,
.exit = ipv6_inetpeer_exit,
+ .async = true,
};
static struct pernet_operations ip6_route_net_late_ops = {
.init = ip6_route_net_init_late,
.exit = ip6_route_net_exit_late,
+ .async = true,
};
static struct notifier_block ip6_route_dev_notifier = {
static struct pernet_operations ip6_segments_ops = {
.init = seg6_net_init,
.exit = seg6_net_exit,
+ .async = true,
};
static const struct genl_ops seg6_genl_ops[] = {
fl6.flowi6_flags = FLOWI_FLAG_KNOWN_NH;
if (!tbl_id) {
- dst = ip6_route_input_lookup(net, skb->dev, &fl6, flags);
+ dst = ip6_route_input_lookup(net, skb->dev, &fl6, skb, flags);
} else {
struct fib6_table *table;
if (!table)
goto out;
- rt = ip6_pol_route(net, table, 0, &fl6, flags);
+ rt = ip6_pol_route(net, table, 0, &fl6, skb, flags);
dst = &rt->dst;
}
#ifdef CONFIG_IPV6_SIT_6RD
struct ip_tunnel *t = netdev_priv(dev);
- if (t->dev == sitn->fb_tunnel_dev) {
+ if (dev == sitn->fb_tunnel_dev) {
ipv6_addr_set(&t->ip6rd.prefix, htonl(0x20020000), 0, 0, 0);
t->ip6rd.relay_prefix = 0;
t->ip6rd.prefixlen = 16;
.exit_batch = sit_exit_batch_net,
.id = &sit_net_id,
.size = sizeof(struct sit_net),
+ .async = true,
};
static void __exit sit_cleanup(void)
#include <net/ipv6.h>
#include <net/addrconf.h>
#include <net/inet_frag.h>
+#include <net/netevent.h>
#ifdef CONFIG_NETLABEL
#include <net/calipso.h>
#endif
+static int zero;
static int one = 1;
static int auto_flowlabels_min;
static int auto_flowlabels_max = IP6_AUTO_FLOW_LABEL_MAX;
+static int proc_rt6_multipath_hash_policy(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp,
+ loff_t *ppos)
+{
+ struct net *net;
+ int ret;
+
+ net = container_of(table->data, struct net,
+ ipv6.sysctl.multipath_hash_policy);
+ ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
+ if (write && ret == 0)
+ call_netevent_notifiers(NETEVENT_IPV6_MPATH_HASH_UPDATE, net);
+
+ return ret;
+}
static struct ctl_table ipv6_table_template[] = {
{
.mode = 0644,
.proc_handler = proc_dointvec
},
+ {
+ .procname = "fib_multipath_hash_policy",
+ .data = &init_net.ipv6.sysctl.multipath_hash_policy,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_rt6_multipath_hash_policy,
+ .extra1 = &zero,
+ .extra2 = &one,
+ },
{ }
};
ipv6_table[11].data = &net->ipv6.sysctl.max_hbh_opts_cnt;
ipv6_table[12].data = &net->ipv6.sysctl.max_dst_opts_len;
ipv6_table[13].data = &net->ipv6.sysctl.max_hbh_opts_len;
+ ipv6_table[14].data = &net->ipv6.sysctl.multipath_hash_policy,
ipv6_route_table = ipv6_route_sysctl_init(net);
if (!ipv6_route_table)
static struct pernet_operations ipv6_sysctl_net_ops = {
.init = ipv6_sysctl_net_init,
.exit = ipv6_sysctl_net_exit,
+ .async = true,
};
static struct ctl_table_header *ip6_header;
if (sk->sk_state == TCP_NEW_SYN_RECV) {
struct request_sock *req = inet_reqsk(sk);
+ bool req_stolen = false;
struct sock *nsk;
sk = req->rsk_listener;
th = (const struct tcphdr *)skb->data;
hdr = ipv6_hdr(skb);
tcp_v6_fill_cb(skb, hdr, th);
- nsk = tcp_check_req(sk, skb, req, false);
+ nsk = tcp_check_req(sk, skb, req, false, &req_stolen);
}
if (!nsk) {
reqsk_put(req);
+ if (req_stolen) {
+ /* Another cpu got exclusive access to req
+ * and created a full blown socket.
+ * Try to feed this packet to this socket
+ * instead of discarding it.
+ */
+ tcp_v6_restore_cb(skb);
+ sock_put(sk);
+ goto lookup;
+ }
goto discard_and_relse;
}
if (nsk == sk) {
.init = tcpv6_net_init,
.exit = tcpv6_net_exit,
.exit_batch = tcpv6_net_exit_batch,
+ .async = true,
};
int __init tcpv6_init(void)
static struct pernet_operations udplite6_net_ops = {
.init = udplite6_proc_init_net,
.exit = udplite6_proc_exit_net,
+ .async = true,
};
int __init udplite6_proc_init(void)
static struct pernet_operations xfrm6_net_ops = {
.init = xfrm6_net_init,
.exit = xfrm6_net_exit,
+ .async = true,
};
int __init xfrm6_init(void)
{
xfrm_state_unregister_afinfo(&xfrm6_state_afinfo);
}
-
.exit = xfrm6_tunnel_net_exit,
.id = &xfrm6_tunnel_net_id,
.size = sizeof(struct xfrm6_tunnel_net),
+ .async = true,
};
static int __init xfrm6_tunnel_init(void)
}
static int iucv_sock_getname(struct socket *sock, struct sockaddr *addr,
- int *len, int peer)
+ int peer)
{
struct sockaddr_iucv *siucv = (struct sockaddr_iucv *) addr;
struct sock *sk = sock->sk;
struct iucv_sock *iucv = iucv_sk(sk);
addr->sa_family = AF_IUCV;
- *len = sizeof(struct sockaddr_iucv);
if (peer) {
memcpy(siucv->siucv_user_id, iucv->dst_user_id, 8);
memset(&siucv->siucv_addr, 0, sizeof(siucv->siucv_addr));
memset(&siucv->siucv_nodeid, 0, sizeof(siucv->siucv_nodeid));
- return 0;
+ return sizeof(struct sockaddr_iucv);
}
/**
static struct pernet_operations kcm_net_ops = {
.init = kcm_proc_init_net,
.exit = kcm_proc_exit_net,
+ .async = true,
};
int __init kcm_proc_init(void)
*/
if (csk->sk_user_data) {
write_unlock_bh(&csk->sk_callback_lock);
+ strp_stop(&psock->strp);
strp_done(&psock->strp);
kmem_cache_free(kcm_psockp, psock);
return -EALREADY;
.exit = kcm_exit_net,
.id = &kcm_net_id,
.size = sizeof(struct kcm_net),
+ .async = true,
};
static int __init kcm_init(void)
.exit = pfkey_net_exit,
.id = &pfkey_net_id,
.size = sizeof(struct netns_pfkey),
+ .async = true,
};
static void __exit ipsec_pfkey_exit(void)
}
static int l2tp_ip_getname(struct socket *sock, struct sockaddr *uaddr,
- int *uaddr_len, int peer)
+ int peer)
{
struct sock *sk = sock->sk;
struct inet_sock *inet = inet_sk(sk);
lsa->l2tp_conn_id = lsk->conn_id;
lsa->l2tp_addr.s_addr = addr;
}
- *uaddr_len = sizeof(*lsa);
- return 0;
+ return sizeof(*lsa);
}
static int l2tp_ip_backlog_recv(struct sock *sk, struct sk_buff *skb)
}
static int l2tp_ip6_getname(struct socket *sock, struct sockaddr *uaddr,
- int *uaddr_len, int peer)
+ int peer)
{
struct sockaddr_l2tpip6 *lsa = (struct sockaddr_l2tpip6 *)uaddr;
struct sock *sk = sock->sk;
}
if (ipv6_addr_type(&lsa->l2tp_addr) & IPV6_ADDR_LINKLOCAL)
lsa->l2tp_scope_id = sk->sk_bound_dev_if;
- *uaddr_len = sizeof(*lsa);
- return 0;
+ return sizeof(*lsa);
}
static int l2tp_ip6_backlog_recv(struct sock *sk, struct sk_buff *skb)
/* getname() support.
*/
static int pppol2tp_getname(struct socket *sock, struct sockaddr *uaddr,
- int *usockaddr_len, int peer)
+ int peer)
{
int len = 0;
int error = 0;
memcpy(uaddr, &sp, len);
}
- *usockaddr_len = len;
- error = 0;
+ error = len;
sock_put(sk);
end:
.init = pppol2tp_init_net,
.exit = pppol2tp_exit_net,
.id = &pppol2tp_net_id,
+ .async = true,
};
/*****************************************************************************
* Return the address information of a socket.
*/
static int llc_ui_getname(struct socket *sock, struct sockaddr *uaddr,
- int *uaddrlen, int peer)
+ int peer)
{
struct sockaddr_llc sllc;
struct sock *sk = sock->sk;
lock_sock(sk);
if (sock_flag(sk, SOCK_ZAPPED))
goto out;
- *uaddrlen = sizeof(sllc);
if (peer) {
rc = -ENOTCONN;
if (sk->sk_state != TCP_ESTABLISHED)
IFHWADDRLEN);
}
}
- rc = 0;
sllc.sllc_family = AF_LLC;
memcpy(uaddr, &sllc, sizeof(sllc));
+ rc = sizeof(sllc);
out:
release_sock(sk);
return rc;
* Copyright 2007, Michael Wu <flamingice@sourmilk.net>
* Copyright 2007-2010, Intel Corporation
* Copyright(c) 2015-2017 Intel Deutschland GmbH
+ * Copyright (C) 2018 Intel Corporation
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
if (test_bit(tid, sta->ampdu_mlme.agg_session_valid)) {
if (sta->ampdu_mlme.tid_rx_token[tid] == dialog_token) {
+ struct tid_ampdu_rx *tid_rx;
+
ht_dbg_ratelimited(sta->sdata,
"updated AddBA Req from %pM on tid %u\n",
sta->sta.addr, tid);
/* We have no API to update the timeout value in the
- * driver so reject the timeout update.
+ * driver so reject the timeout update if the timeout
+ * changed. If if did not change, i.e., no real update,
+ * just reply with success.
*/
- status = WLAN_STATUS_REQUEST_DECLINED;
- ieee80211_send_addba_resp(sta->sdata, sta->sta.addr,
- tid, dialog_token, status,
- 1, buf_size, timeout);
+ rcu_read_lock();
+ tid_rx = rcu_dereference(sta->ampdu_mlme.tid_rx[tid]);
+ if (tid_rx && tid_rx->timeout == timeout)
+ status = WLAN_STATUS_SUCCESS;
+ else
+ status = WLAN_STATUS_REQUEST_DECLINED;
+ rcu_read_unlock();
goto end;
}
ieee80211_recalc_ps(local);
ieee80211_recalc_ps_vif(sdata);
+ ieee80211_check_fast_rx_iface(sdata);
return 0;
}
}
if (beacon->probe_resp_len) {
new_beacon->probe_resp_len = beacon->probe_resp_len;
- beacon->probe_resp = pos;
+ new_beacon->probe_resp = pos;
memcpy(pos, beacon->probe_resp, beacon->probe_resp_len);
pos += beacon->probe_resp_len;
}
FLAG(REPORTS_LOW_ACK),
FLAG(SUPPORTS_TX_FRAG),
FLAG(SUPPORTS_TDLS_BUFFER_STA),
+ FLAG(DEAUTH_NEED_MGD_TX_PREP),
#undef FLAG
};
sta->cparams.ecn ? "yes" : "no");
p += scnprintf(p,
bufsz+buf-p,
- "tid ac backlog-bytes backlog-packets new-flows drops marks overlimit collisions tx-bytes tx-packets\n");
+ "tid ac backlog-bytes backlog-packets new-flows drops marks overlimit collisions tx-bytes tx-packets flags\n");
for (i = 0; i < IEEE80211_NUM_TIDS; i++) {
txqi = to_txq_info(sta->sta.txq[i]);
p += scnprintf(p, bufsz+buf-p,
- "%d %d %u %u %u %u %u %u %u %u %u\n",
+ "%d %d %u %u %u %u %u %u %u %u %u 0x%lx(%s%s%s)\n",
txqi->txq.tid,
txqi->txq.ac,
txqi->tin.backlog_bytes,
txqi->tin.overlimit,
txqi->tin.collisions,
txqi->tin.tx_bytes,
- txqi->tin.tx_packets);
+ txqi->tin.tx_packets,
+ txqi->flags,
+ txqi->flags & (1<<IEEE80211_TXQ_STOP) ? "STOP" : "RUN",
+ txqi->flags & (1<<IEEE80211_TXQ_AMPDU) ? " AMPDU" : "",
+ txqi->flags & (1<<IEEE80211_TXQ_NO_AMSDU) ? " NO-AMSDU" : "");
}
rcu_read_unlock();
const struct ieee80211_timeout_interval_ie *timeout_int;
const u8 *opmode_notif;
const struct ieee80211_sec_chan_offs_ie *sec_chan_offs;
- const struct ieee80211_mesh_chansw_params_ie *mesh_chansw_params_ie;
+ struct ieee80211_mesh_chansw_params_ie *mesh_chansw_params_ie;
const struct ieee80211_bss_max_idle_period_ie *max_idle_period_ie;
/* length of them, respectively */
mutex_lock(&local->sta_mtx);
sta = sta_info_get_bss(sdata, mgmt->sa);
if (sta) {
- u16 tid = *ieee80211_get_qos_ctl(hdr) &
- IEEE80211_QOS_CTL_TID_MASK;
+ u16 tid = ieee80211_get_tid(hdr);
__ieee80211_stop_rx_ba_session(
sta, tid, WLAN_BACK_RECIPIENT,
}
static int mesh_fwd_csa_frame(struct ieee80211_sub_if_data *sdata,
- struct ieee80211_mgmt *mgmt, size_t len)
+ struct ieee80211_mgmt *mgmt, size_t len,
+ struct ieee802_11_elems *elems)
{
struct ieee80211_mgmt *mgmt_fwd;
struct sk_buff *skb;
struct ieee80211_local *local = sdata->local;
- u8 *pos = mgmt->u.action.u.chan_switch.variable;
- size_t offset_ttl;
skb = dev_alloc_skb(local->tx_headroom + len);
if (!skb)
skb_reserve(skb, local->tx_headroom);
mgmt_fwd = skb_put(skb, len);
- /* offset_ttl is based on whether the secondary channel
- * offset is available or not. Subtract 1 from the mesh TTL
- * and disable the initiator flag before forwarding.
- */
- offset_ttl = (len < 42) ? 7 : 10;
- *(pos + offset_ttl) -= 1;
- *(pos + offset_ttl + 1) &= ~WLAN_EID_CHAN_SWITCH_PARAM_INITIATOR;
+ elems->mesh_chansw_params_ie->mesh_ttl--;
+ elems->mesh_chansw_params_ie->mesh_flags &=
+ ~WLAN_EID_CHAN_SWITCH_PARAM_INITIATOR;
memcpy(mgmt_fwd, mgmt, len);
eth_broadcast_addr(mgmt_fwd->da);
/* forward or re-broadcast the CSA frame */
if (fwd_csa) {
- if (mesh_fwd_csa_frame(sdata, mgmt, len) < 0)
+ if (mesh_fwd_csa_frame(sdata, mgmt, len, &elems) < 0)
mcsa_dbg(sdata, "Failed to forward the CSA frame");
}
}
da = ieee80211_get_DA(hdr);
sa = ieee80211_get_SA(hdr);
if (ieee80211_is_data_qos(hdr->frame_control))
- tid = *ieee80211_get_qos_ctl(hdr) & IEEE80211_QOS_CTL_TID_MASK;
+ tid = ieee80211_get_tid(hdr);
else
tid = 0;
* Copyright 2007, Michael Wu <flamingice@sourmilk.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright (C) 2015 - 2017 Intel Deutschland GmbH
+ * Copyright (C) 2018 Intel Corporation
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
ieee80211_flush_queues(local, sdata, true);
/* deauthenticate/disassociate now */
- if (tx || frame_buf)
+ if (tx || frame_buf) {
+ struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
+
+ /*
+ * In multi channel scenarios guarantee that the virtual
+ * interface is granted immediate airtime to transmit the
+ * deauthentication frame by calling mgd_prepare_tx, if the
+ * driver requested so.
+ */
+ if (ieee80211_hw_check(&local->hw, DEAUTH_NEED_MGD_TX_PREP) &&
+ !ifmgd->have_beacon)
+ drv_mgd_prepare_tx(sdata->local, sdata);
+
ieee80211_send_deauth_disassoc(sdata, ifmgd->bssid, stype,
reason, tx, frame_buf);
+ }
/* flush out frame - make sure the deauth was actually sent */
if (tx)
u16 tx_time)
{
struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
- u16 tid = *ieee80211_get_qos_ctl(hdr) & IEEE80211_QOS_CTL_TID_MASK;
+ u16 tid = ieee80211_get_tid(hdr);
int ac = ieee80211_ac_from_tid(tid);
struct ieee80211_sta_tx_tspec *tx_tspec = &ifmgd->tx_tspec[ac];
unsigned long now = jiffies;
if (unlikely(skb->protocol == cpu_to_be16(ETH_P_PAE)))
return;
- tid = *ieee80211_get_qos_ctl(hdr) & IEEE80211_QOS_CTL_TID_MASK;
+ tid = ieee80211_get_tid(hdr);
if (likely(sta->ampdu_mlme.tid_tx[tid]))
return;
flags |= IEEE80211_RADIOTAP_AMPDU_DELIM_CRC_ERR;
if (status->flag & RX_FLAG_AMPDU_DELIM_CRC_KNOWN)
flags |= IEEE80211_RADIOTAP_AMPDU_DELIM_CRC_KNOWN;
+ if (status->flag & RX_FLAG_AMPDU_EOF_BIT_KNOWN)
+ flags |= IEEE80211_RADIOTAP_AMPDU_EOF_KNOWN;
+ if (status->flag & RX_FLAG_AMPDU_EOF_BIT)
+ flags |= IEEE80211_RADIOTAP_AMPDU_EOF;
put_unaligned_le16(flags, pos);
pos += 2;
if (status->flag & RX_FLAG_AMPDU_DELIM_CRC_KNOWN)
ack_policy = *ieee80211_get_qos_ctl(hdr) &
IEEE80211_QOS_CTL_ACK_POLICY_MASK;
- tid = *ieee80211_get_qos_ctl(hdr) & IEEE80211_QOS_CTL_TID_MASK;
+ tid = ieee80211_get_tid(hdr);
tid_agg_rx = rcu_dereference(sta->ampdu_mlme.tid_rx[tid]);
if (!tid_agg_rx) {
ieee80211_has_pm(hdr->frame_control) &&
(ieee80211_is_data_qos(hdr->frame_control) ||
ieee80211_is_qos_nullfunc(hdr->frame_control))) {
- u8 tid;
-
- tid = *ieee80211_get_qos_ctl(hdr) & IEEE80211_QOS_CTL_TID_MASK;
+ u8 tid = ieee80211_get_tid(hdr);
ieee80211_sta_uapsd_trigger(&rx->sta->sta, tid);
}
}
static ieee80211_rx_result debug_noinline
-ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx)
+__ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx, u8 data_offset)
{
struct net_device *dev = rx->sdata->dev;
struct sk_buff *skb = rx->skb;
struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
__le16 fc = hdr->frame_control;
struct sk_buff_head frame_list;
- struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(rx->skb);
struct ethhdr ethhdr;
const u8 *check_da = ethhdr.h_dest, *check_sa = ethhdr.h_source;
- if (unlikely(!ieee80211_is_data(fc)))
- return RX_CONTINUE;
-
- if (unlikely(!ieee80211_is_data_present(fc)))
- return RX_DROP_MONITOR;
-
- if (!(status->rx_flags & IEEE80211_RX_AMSDU))
- return RX_CONTINUE;
-
if (unlikely(ieee80211_has_a4(hdr->frame_control))) {
- switch (rx->sdata->vif.type) {
- case NL80211_IFTYPE_AP_VLAN:
- if (!rx->sdata->u.vlan.sta)
- return RX_DROP_UNUSABLE;
- break;
- case NL80211_IFTYPE_STATION:
- if (!rx->sdata->u.mgd.use_4addr)
- return RX_DROP_UNUSABLE;
- break;
- default:
- return RX_DROP_UNUSABLE;
- }
check_da = NULL;
check_sa = NULL;
} else switch (rx->sdata->vif.type) {
break;
}
- if (is_multicast_ether_addr(hdr->addr1))
- return RX_DROP_UNUSABLE;
-
skb->dev = dev;
__skb_queue_head_init(&frame_list);
if (ieee80211_data_to_8023_exthdr(skb, ðhdr,
rx->sdata->vif.addr,
- rx->sdata->vif.type))
+ rx->sdata->vif.type,
+ data_offset))
return RX_DROP_UNUSABLE;
ieee80211_amsdu_to_8023s(skb, &frame_list, dev->dev_addr,
return RX_QUEUED;
}
+static ieee80211_rx_result debug_noinline
+ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx)
+{
+ struct sk_buff *skb = rx->skb;
+ struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
+ struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
+ __le16 fc = hdr->frame_control;
+
+ if (!(status->rx_flags & IEEE80211_RX_AMSDU))
+ return RX_CONTINUE;
+
+ if (unlikely(!ieee80211_is_data(fc)))
+ return RX_CONTINUE;
+
+ if (unlikely(!ieee80211_is_data_present(fc)))
+ return RX_DROP_MONITOR;
+
+ if (unlikely(ieee80211_has_a4(hdr->frame_control))) {
+ switch (rx->sdata->vif.type) {
+ case NL80211_IFTYPE_AP_VLAN:
+ if (!rx->sdata->u.vlan.sta)
+ return RX_DROP_UNUSABLE;
+ break;
+ case NL80211_IFTYPE_STATION:
+ if (!rx->sdata->u.mgd.use_4addr)
+ return RX_DROP_UNUSABLE;
+ break;
+ default:
+ return RX_DROP_UNUSABLE;
+ }
+ }
+
+ if (is_multicast_ether_addr(hdr->addr1))
+ return RX_DROP_UNUSABLE;
+
+ return __ieee80211_rx_h_amsdu(rx, 0);
+}
+
#ifdef CONFIG_MAC80211_MESH
static ieee80211_rx_result
ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx)
case WLAN_HT_ACTION_SMPS: {
struct ieee80211_supported_band *sband;
enum ieee80211_smps_mode smps_mode;
+ struct sta_opmode_info sta_opmode = {};
/* convert to HT capability */
switch (mgmt->u.action.u.ht_smps.smps_control) {
if (rx->sta->sta.smps_mode == smps_mode)
goto handled;
rx->sta->sta.smps_mode = smps_mode;
+ sta_opmode.smps_mode = smps_mode;
+ sta_opmode.changed = STA_OPMODE_SMPS_MODE_CHANGED;
sband = rx->local->hw.wiphy->bands[status->band];
rate_control_rate_update(local, sband, rx->sta,
IEEE80211_RC_SMPS_CHANGED);
+ cfg80211_sta_opmode_change_notify(sdata->dev,
+ rx->sta->addr,
+ &sta_opmode,
+ GFP_KERNEL);
goto handled;
}
case WLAN_HT_ACTION_NOTIFY_CHANWIDTH: {
struct ieee80211_supported_band *sband;
u8 chanwidth = mgmt->u.action.u.ht_notify_cw.chanwidth;
enum ieee80211_sta_rx_bandwidth max_bw, new_bw;
+ struct sta_opmode_info sta_opmode = {};
/* If it doesn't support 40 MHz it can't change ... */
if (!(rx->sta->sta.ht_cap.cap &
rx->sta->sta.bandwidth = new_bw;
sband = rx->local->hw.wiphy->bands[status->band];
+ sta_opmode.bw = new_bw;
+ sta_opmode.changed = STA_OPMODE_MAX_BW_CHANGED;
rate_control_rate_update(local, sband, rx->sta,
IEEE80211_RC_BW_CHANGED);
+ cfg80211_sta_opmode_change_notify(sdata->dev,
+ rx->sta->addr,
+ &sta_opmode,
+ GFP_KERNEL);
goto handled;
}
default:
switch (sdata->vif.type) {
case NL80211_IFTYPE_STATION:
- /* 4-addr is harder to deal with, later maybe */
- if (sdata->u.mgd.use_4addr)
- goto clear;
- /* software powersave is a huge mess, avoid all of it */
- if (ieee80211_hw_check(&local->hw, PS_NULLFUNC_STACK))
- goto clear;
- if (ieee80211_hw_check(&local->hw, SUPPORTS_PS) &&
- !ieee80211_hw_check(&local->hw, SUPPORTS_DYNAMIC_PS))
- goto clear;
if (sta->sta.tdls) {
fastrx.da_offs = offsetof(struct ieee80211_hdr, addr1);
fastrx.sa_offs = offsetof(struct ieee80211_hdr, addr2);
fastrx.expected_ds_bits =
cpu_to_le16(IEEE80211_FCTL_FROMDS);
}
+
+ if (sdata->u.mgd.use_4addr && !sta->sta.tdls) {
+ fastrx.expected_ds_bits |=
+ cpu_to_le16(IEEE80211_FCTL_TODS);
+ fastrx.da_offs = offsetof(struct ieee80211_hdr, addr3);
+ fastrx.sa_offs = offsetof(struct ieee80211_hdr, addr4);
+ }
+
+ if (!sdata->u.mgd.powersave)
+ break;
+
+ /* software powersave is a huge mess, avoid all of it */
+ if (ieee80211_hw_check(&local->hw, PS_NULLFUNC_STACK))
+ goto clear;
+ if (ieee80211_hw_check(&local->hw, SUPPORTS_PS) &&
+ !ieee80211_hw_check(&local->hw, SUPPORTS_DYNAMIC_PS))
+ goto clear;
break;
case NL80211_IFTYPE_AP_VLAN:
case NL80211_IFTYPE_AP:
!(sdata->flags & IEEE80211_SDATA_DONT_BRIDGE_PACKETS) &&
(sdata->vif.type != NL80211_IFTYPE_AP_VLAN ||
!sdata->u.vlan.sta);
+
+ if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN &&
+ sdata->u.vlan.sta) {
+ fastrx.expected_ds_bits |=
+ cpu_to_le16(IEEE80211_FCTL_FROMDS);
+ fastrx.sa_offs = offsetof(struct ieee80211_hdr, addr4);
+ fastrx.internal_forward = 0;
+ }
+
break;
default:
goto clear;
struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
struct sta_info *sta = rx->sta;
int orig_len = skb->len;
- int snap_offs = ieee80211_hdrlen(hdr->frame_control);
+ int hdrlen = ieee80211_hdrlen(hdr->frame_control);
+ int snap_offs = hdrlen;
struct {
u8 snap[sizeof(rfc1042_header)];
__be16 proto;
(status->flag & FAST_RX_CRYPT_FLAGS) != FAST_RX_CRYPT_FLAGS)
return false;
- /* we don't deal with A-MSDU deaggregation here */
- if (status->rx_flags & IEEE80211_RX_AMSDU)
- return false;
-
if (unlikely(!ieee80211_is_data_present(hdr->frame_control)))
return false;
snap_offs += IEEE80211_CCMP_HDR_LEN;
}
- if (!pskb_may_pull(skb, snap_offs + sizeof(*payload)))
- goto drop;
- payload = (void *)(skb->data + snap_offs);
+ if (!(status->rx_flags & IEEE80211_RX_AMSDU)) {
+ if (!pskb_may_pull(skb, snap_offs + sizeof(*payload)))
+ goto drop;
- if (!ether_addr_equal(payload->snap, fast_rx->rfc1042_hdr))
- return false;
+ payload = (void *)(skb->data + snap_offs);
- /* Don't handle these here since they require special code.
- * Accept AARP and IPX even though they should come with a
- * bridge-tunnel header - but if we get them this way then
- * there's little point in discarding them.
- */
- if (unlikely(payload->proto == cpu_to_be16(ETH_P_TDLS) ||
- payload->proto == fast_rx->control_port_protocol))
- return false;
+ if (!ether_addr_equal(payload->snap, fast_rx->rfc1042_hdr))
+ return false;
+
+ /* Don't handle these here since they require special code.
+ * Accept AARP and IPX even though they should come with a
+ * bridge-tunnel header - but if we get them this way then
+ * there's little point in discarding them.
+ */
+ if (unlikely(payload->proto == cpu_to_be16(ETH_P_TDLS) ||
+ payload->proto == fast_rx->control_port_protocol))
+ return false;
+ }
/* after this point, don't punt to the slowpath! */
}
/* statistics part of ieee80211_rx_h_sta_process() */
- stats->last_rx = jiffies;
- stats->last_rate = sta_stats_encode_rate(status);
-
- stats->fragments++;
- stats->packets++;
-
if (!(status->flag & RX_FLAG_NO_SIGNAL_VAL)) {
stats->last_signal = status->signal;
if (!fast_rx->uses_rss)
if (rx->key && !ieee80211_has_protected(hdr->frame_control))
goto drop;
+ if (status->rx_flags & IEEE80211_RX_AMSDU) {
+ if (__ieee80211_rx_h_amsdu(rx, snap_offs - hdrlen) !=
+ RX_QUEUED)
+ goto drop;
+
+ return true;
+ }
+
+ stats->last_rx = jiffies;
+ stats->last_rate = sta_stats_encode_rate(status);
+
+ stats->fragments++;
+ stats->packets++;
+
/* do the header conversion - first grab the addresses */
ether_addr_copy(addrs.da, skb->data + fast_rx->da_offs);
ether_addr_copy(addrs.sa, skb->data + fast_rx->sa_offs);
* Copyright 2007, Michael Wu <flamingice@sourmilk.net>
* Copyright 2007-2008, Intel Corporation
* Copyright 2008, Johannes Berg <johannes@sipsolutions.net>
+ * Copyright (C) 2018 Intel Corporation
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
u32 sta_flags, u8 *bssid,
struct ieee80211_csa_ie *csa_ie)
{
- enum nl80211_band new_band;
+ enum nl80211_band new_band = current_band;
int new_freq;
u8 new_chan_no;
struct ieee80211_channel *new_chan;
elems->ext_chansw_ie->new_operating_class,
&new_band)) {
sdata_info(sdata,
- "cannot understand ECSA IE operating class %d, disconnecting\n",
+ "cannot understand ECSA IE operating class, %d, ignoring\n",
elems->ext_chansw_ie->new_operating_class);
- return -EINVAL;
}
new_chan_no = elems->ext_chansw_ie->new_ch_num;
csa_ie->count = elems->ext_chansw_ie->count;
csa_ie->mode = elems->ext_chansw_ie->mode;
} else if (elems->ch_switch_ie) {
- new_band = current_band;
new_chan_no = elems->ch_switch_ie->new_ch_num;
csa_ie->count = elems->ch_switch_ie->count;
csa_ie->mode = elems->ch_switch_ie->mode;
if (ieee80211_hw_check(hw, USES_RSS)) {
sta->pcpu_rx_stats =
- alloc_percpu(struct ieee80211_sta_rx_stats);
+ alloc_percpu_gfp(struct ieee80211_sta_rx_stats, gfp);
if (!sta->pcpu_rx_stats)
goto free;
}
if (sta->sta.txq[0])
kfree(to_txq_info(sta->sta.txq[0]));
free:
+ free_percpu(sta->pcpu_rx_stats);
#ifdef CONFIG_MAC80211_MESH
kfree(sta->mesh);
#endif
sinfo->filled |= BIT(NL80211_STA_INFO_EXPECTED_THROUGHPUT);
sinfo->expected_throughput = thr;
}
+
+ if (!(sinfo->filled & BIT_ULL(NL80211_STA_INFO_ACK_SIGNAL)) &&
+ sta->status_stats.ack_signal_filled) {
+ sinfo->ack_signal = sta->status_stats.last_ack_signal;
+ sinfo->filled |= BIT_ULL(NL80211_STA_INFO_ACK_SIGNAL);
+ }
}
u32 sta_get_expected_throughput(struct sta_info *sta)
u64 msdu_retries[IEEE80211_NUM_TIDS + 1];
u64 msdu_failed[IEEE80211_NUM_TIDS + 1];
unsigned long last_ack;
+ s8 last_ack_signal;
+ bool ack_signal_filled;
} status_stats;
/* Updated from TX path only, no locking requirements */
struct ieee80211_mgmt *mgmt = (void *) skb->data;
struct ieee80211_local *local = sta->local;
struct ieee80211_sub_if_data *sdata = sta->sdata;
+ struct ieee80211_tx_info *txinfo = IEEE80211_SKB_CB(skb);
- if (ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS))
+ if (ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS)) {
sta->status_stats.last_ack = jiffies;
+ if (txinfo->status.is_valid_ack_signal) {
+ sta->status_stats.last_ack_signal =
+ (s8)txinfo->status.ack_signal;
+ sta->status_stats.ack_signal_filled = true;
+ }
+ }
if (ieee80211_is_data_qos(mgmt->frame_control)) {
struct ieee80211_hdr *hdr = (void *) skb->data;
ieee80211_is_qos_nullfunc(hdr->frame_control))
cfg80211_probe_status(sdata->dev, hdr->addr1,
cookie, acked,
+ info->status.ack_signal,
+ info->status.is_valid_ack_signal,
GFP_ATOMIC);
else
cfg80211_mgmt_tx_status(&sdata->wdev, cookie,
{
struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb);
struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)tx->skb->data;
- u8 *qc;
int tid;
/*
return TX_CONTINUE;
/* include per-STA, per-TID sequence counter */
-
- qc = ieee80211_get_qos_ctl(hdr);
- tid = *qc & IEEE80211_QOS_CTL_TID_MASK;
+ tid = ieee80211_get_tid(hdr);
tx->sta->tx_stats.msdu[tid]++;
hdr->seq_ctrl = ieee80211_tx_next_seq(tx->sta, tid);
struct ieee80211_hdr *hdr;
struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
int tid;
- u8 *qc;
memset(tx, 0, sizeof(*tx));
tx->skb = skb;
!ieee80211_hw_check(&local->hw, TX_AMPDU_SETUP_IN_HW)) {
struct tid_ampdu_tx *tid_tx;
- qc = ieee80211_get_qos_ctl(hdr);
- tid = *qc & IEEE80211_QOS_CTL_TID_MASK;
+ tid = ieee80211_get_tid(hdr);
tid_tx = rcu_dereference(tx->sta->ampdu_mlme.tid_tx[tid]);
if (tid_tx) {
{
struct ieee80211_local *local = sdata->local;
struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
- struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
+ struct ieee80211_hdr *hdr;
int headroom;
bool may_encrypt;
enum nl80211_band band)
{
enum ieee80211_sta_rx_bandwidth new_bw;
+ struct sta_opmode_info sta_opmode = {};
u32 changed = 0;
u8 nss;
if (sta->sta.rx_nss != nss) {
sta->sta.rx_nss = nss;
+ sta_opmode.rx_nss = nss;
changed |= IEEE80211_RC_NSS_CHANGED;
+ sta_opmode.changed |= STA_OPMODE_N_SS_CHANGED;
}
switch (opmode & IEEE80211_OPMODE_NOTIF_CHANWIDTH_MASK) {
new_bw = ieee80211_sta_cur_vht_bw(sta);
if (new_bw != sta->sta.bandwidth) {
sta->sta.bandwidth = new_bw;
+ sta_opmode.bw = new_bw;
changed |= IEEE80211_RC_BW_CHANGED;
+ sta_opmode.changed |= STA_OPMODE_MAX_BW_CHANGED;
}
+ if (sta_opmode.changed)
+ cfg80211_sta_opmode_change_notify(sdata->dev, sta->addr,
+ &sta_opmode, GFP_KERNEL);
+
return changed;
}
a4_included = ieee80211_has_a4(hdr->frame_control);
if (ieee80211_is_data_qos(hdr->frame_control))
- qos_tid = *ieee80211_get_qos_ctl(hdr) & IEEE80211_QOS_CTL_TID_MASK;
+ qos_tid = ieee80211_get_tid(hdr);
else
qos_tid = 0;
aad[23] = 0;
if (ieee80211_is_data_qos(hdr->frame_control))
- qos_tid = *ieee80211_get_qos_ctl(hdr) &
- IEEE80211_QOS_CTL_TID_MASK;
+ qos_tid = ieee80211_get_tid(hdr);
else
qos_tid = 0;
return RX_DROP_UNUSABLE;
if (ieee80211_is_data_qos(hdr->frame_control))
- qos_tid = *ieee80211_get_qos_ctl(hdr) &
- IEEE80211_QOS_CTL_TID_MASK;
+ qos_tid = ieee80211_get_tid(hdr);
else
qos_tid = 0;
static struct pernet_operations netfilter_net_ops = {
.init = netfilter_net_init,
.exit = netfilter_net_exit,
+ .async = true,
};
int __init netfilter_init(void)
static struct pernet_operations ip_vs_lblc_ops = {
.init = __ip_vs_lblc_init,
.exit = __ip_vs_lblc_exit,
+ .async = true,
};
static int __init ip_vs_lblc_init(void)
static struct pernet_operations ip_vs_lblcr_ops = {
.init = __ip_vs_lblcr_init,
.exit = __ip_vs_lblcr_exit,
+ .async = true,
};
static int __init ip_vs_lblcr_init(void)
static struct pernet_operations nf_log_net_ops = {
.init = nf_log_net_init,
.exit = nf_log_net_exit,
+ .async = true,
};
int __init netfilter_log_init(void)
const struct nf_conn *ct,
u16 *rover)
{
- unsigned int range_size, min, i;
+ unsigned int range_size, min, max, i;
__be16 *portptr;
u_int16_t off;
}
} else {
min = ntohs(range->min_proto.all);
- range_size = ntohs(range->max_proto.all) - min + 1;
+ max = ntohs(range->max_proto.all);
+ if (unlikely(max < min))
+ swap(max, min);
+ range_size = max - min + 1;
}
if (range->flags & NF_NAT_RANGE_PROTO_RANDOM) {
.exit = synproxy_net_exit,
.id = &synproxy_net_id,
.size = sizeof(struct synproxy_net),
+ .async = true,
};
static int __init synproxy_core_init(void)
* ebt_among is exempt from centralized matchsize checking
* because it uses a dynamic-size data set.
*/
- pr_err("%s_tables: %s.%u match: invalid size "
- "%u (kernel) != (user) %u\n",
- xt_prefix[par->family], par->match->name,
- par->match->revision,
- XT_ALIGN(par->match->matchsize), size);
+ pr_err_ratelimited("%s_tables: %s.%u match: invalid size %u (kernel) != (user) %u\n",
+ xt_prefix[par->family], par->match->name,
+ par->match->revision,
+ XT_ALIGN(par->match->matchsize), size);
return -EINVAL;
}
if (par->match->table != NULL &&
strcmp(par->match->table, par->table) != 0) {
- pr_err("%s_tables: %s match: only valid in %s table, not %s\n",
- xt_prefix[par->family], par->match->name,
- par->match->table, par->table);
+ pr_info_ratelimited("%s_tables: %s match: only valid in %s table, not %s\n",
+ xt_prefix[par->family], par->match->name,
+ par->match->table, par->table);
return -EINVAL;
}
if (par->match->hooks && (par->hook_mask & ~par->match->hooks) != 0) {
char used[64], allow[64];
- pr_err("%s_tables: %s match: used from hooks %s, but only "
- "valid from %s\n",
- xt_prefix[par->family], par->match->name,
- textify_hooks(used, sizeof(used), par->hook_mask,
- par->family),
- textify_hooks(allow, sizeof(allow), par->match->hooks,
- par->family));
+ pr_info_ratelimited("%s_tables: %s match: used from hooks %s, but only valid from %s\n",
+ xt_prefix[par->family], par->match->name,
+ textify_hooks(used, sizeof(used),
+ par->hook_mask, par->family),
+ textify_hooks(allow, sizeof(allow),
+ par->match->hooks,
+ par->family));
return -EINVAL;
}
if (par->match->proto && (par->match->proto != proto || inv_proto)) {
- pr_err("%s_tables: %s match: only valid for protocol %u\n",
- xt_prefix[par->family], par->match->name,
- par->match->proto);
+ pr_info_ratelimited("%s_tables: %s match: only valid for protocol %u\n",
+ xt_prefix[par->family], par->match->name,
+ par->match->proto);
return -EINVAL;
}
if (par->match->checkentry != NULL) {
int ret;
if (XT_ALIGN(par->target->targetsize) != size) {
- pr_err("%s_tables: %s.%u target: invalid size "
- "%u (kernel) != (user) %u\n",
- xt_prefix[par->family], par->target->name,
- par->target->revision,
- XT_ALIGN(par->target->targetsize), size);
+ pr_err_ratelimited("%s_tables: %s.%u target: invalid size %u (kernel) != (user) %u\n",
+ xt_prefix[par->family], par->target->name,
+ par->target->revision,
+ XT_ALIGN(par->target->targetsize), size);
return -EINVAL;
}
if (par->target->table != NULL &&
strcmp(par->target->table, par->table) != 0) {
- pr_err("%s_tables: %s target: only valid in %s table, not %s\n",
- xt_prefix[par->family], par->target->name,
- par->target->table, par->table);
+ pr_info_ratelimited("%s_tables: %s target: only valid in %s table, not %s\n",
+ xt_prefix[par->family], par->target->name,
+ par->target->table, par->table);
return -EINVAL;
}
if (par->target->hooks && (par->hook_mask & ~par->target->hooks) != 0) {
char used[64], allow[64];
- pr_err("%s_tables: %s target: used from hooks %s, but only "
- "usable from %s\n",
- xt_prefix[par->family], par->target->name,
- textify_hooks(used, sizeof(used), par->hook_mask,
- par->family),
- textify_hooks(allow, sizeof(allow), par->target->hooks,
- par->family));
+ pr_info_ratelimited("%s_tables: %s target: used from hooks %s, but only usable from %s\n",
+ xt_prefix[par->family], par->target->name,
+ textify_hooks(used, sizeof(used),
+ par->hook_mask, par->family),
+ textify_hooks(allow, sizeof(allow),
+ par->target->hooks,
+ par->family));
return -EINVAL;
}
if (par->target->proto && (par->target->proto != proto || inv_proto)) {
- pr_err("%s_tables: %s target: only valid for protocol %u\n",
- xt_prefix[par->family], par->target->name,
- par->target->proto);
+ pr_info_ratelimited("%s_tables: %s target: only valid for protocol %u\n",
+ xt_prefix[par->family], par->target->name,
+ par->target->proto);
return -EINVAL;
}
if (par->target->checkentry != NULL) {
if (sz < sizeof(*info))
return NULL;
- /* Pedantry: prevent them from hitting BUG() in vmalloc.c --RR */
- if ((size >> PAGE_SHIFT) + 2 > totalram_pages)
- return NULL;
-
/* __GFP_NORETRY is not fully supported by kvmalloc but it should
* work reasonably well if sz is too large and bail out rather
* than shoot all processes down before realizing there is nothing
static struct pernet_operations xt_net_ops = {
.init = xt_net_init,
.exit = xt_net_exit,
+ .async = true,
};
static int __init xt_init(void)
const struct xt_audit_info *info = par->targinfo;
if (info->type > XT_AUDIT_TYPE_MAX) {
- pr_info("Audit type out of range (valid range: 0..%hhu)\n",
- XT_AUDIT_TYPE_MAX);
+ pr_info_ratelimited("Audit type out of range (valid range: 0..%hhu)\n",
+ XT_AUDIT_TYPE_MAX);
return -ERANGE;
}
const struct xt_CHECKSUM_info *einfo = par->targinfo;
if (einfo->operation & ~XT_CHECKSUM_OP_FILL) {
- pr_info("unsupported CHECKSUM operation %x\n", einfo->operation);
+ pr_info_ratelimited("unsupported CHECKSUM operation %x\n",
+ einfo->operation);
return -EINVAL;
}
- if (!einfo->operation) {
- pr_info("no CHECKSUM operation enabled\n");
+ if (!einfo->operation)
return -EINVAL;
- }
+
return 0;
}
if (strcmp(par->table, "mangle") != 0 &&
strcmp(par->table, "security") != 0) {
- pr_info("target only valid in the \'mangle\' "
- "or \'security\' tables, not \'%s\'.\n", par->table);
+ pr_info_ratelimited("only valid in \'mangle\' or \'security\' table, not \'%s\'\n",
+ par->table);
return -EINVAL;
}
break;
default:
- pr_info("invalid mode: %hu\n", info->mode);
+ pr_info_ratelimited("invalid mode: %hu\n", info->mode);
return -EINVAL;
}
ret = nf_ct_netns_get(par->net, par->family);
if (ret < 0)
- pr_info("cannot load conntrack support for proto=%u\n",
- par->family);
+ pr_info_ratelimited("cannot load conntrack support for proto=%u\n",
+ par->family);
return ret;
}
proto = xt_ct_find_proto(par);
if (!proto) {
- pr_info("You must specify a L4 protocol, and not use "
- "inversions on it.\n");
+ pr_info_ratelimited("You must specify a L4 protocol and not use inversions on it\n");
return -ENOENT;
}
helper = nf_conntrack_helper_try_module_get(helper_name, par->family,
proto);
if (helper == NULL) {
- pr_info("No such helper \"%s\"\n", helper_name);
+ pr_info_ratelimited("No such helper \"%s\"\n", helper_name);
return -ENOENT;
}
const struct nf_conntrack_l4proto *l4proto;
struct ctnl_timeout *timeout;
struct nf_conn_timeout *timeout_ext;
+ const char *errmsg = NULL;
int ret = 0;
u8 proto;
timeout_find_get = rcu_dereference(nf_ct_timeout_find_get_hook);
if (timeout_find_get == NULL) {
ret = -ENOENT;
- pr_info("Timeout policy base is empty\n");
+ errmsg = "Timeout policy base is empty";
goto out;
}
proto = xt_ct_find_proto(par);
if (!proto) {
ret = -EINVAL;
- pr_info("You must specify a L4 protocol, and not use "
- "inversions on it.\n");
+ errmsg = "You must specify a L4 protocol and not use inversions on it";
goto out;
}
timeout = timeout_find_get(par->net, timeout_name);
if (timeout == NULL) {
ret = -ENOENT;
- pr_info("No such timeout policy \"%s\"\n", timeout_name);
+ pr_info_ratelimited("No such timeout policy \"%s\"\n",
+ timeout_name);
goto out;
}
if (timeout->l3num != par->family) {
ret = -EINVAL;
- pr_info("Timeout policy `%s' can only be used by L3 protocol "
- "number %d\n", timeout_name, timeout->l3num);
+ pr_info_ratelimited("Timeout policy `%s' can only be used by L%d protocol number %d\n",
+ timeout_name, 3, timeout->l3num);
goto err_put_timeout;
}
/* Make sure the timeout policy matches any existing protocol tracker,
l4proto = __nf_ct_l4proto_find(par->family, proto);
if (timeout->l4proto->l4proto != l4proto->l4proto) {
ret = -EINVAL;
- pr_info("Timeout policy `%s' can only be used by L4 protocol "
- "number %d\n",
- timeout_name, timeout->l4proto->l4proto);
+ pr_info_ratelimited("Timeout policy `%s' can only be used by L%d protocol number %d\n",
+ timeout_name, 4, timeout->l4proto->l4proto);
goto err_put_timeout;
}
timeout_ext = nf_ct_timeout_ext_add(ct, timeout, GFP_ATOMIC);
__xt_ct_tg_timeout_put(timeout);
out:
rcu_read_unlock();
+ if (errmsg)
+ pr_info_ratelimited("%s\n", errmsg);
return ret;
#else
return -EOPNOTSUPP;
{
const struct xt_DSCP_info *info = par->targinfo;
- if (info->dscp > XT_DSCP_MAX) {
- pr_info("dscp %x out of range\n", info->dscp);
+ if (info->dscp > XT_DSCP_MAX)
return -EDOM;
- }
return 0;
}
{
const struct ipt_TTL_info *info = par->targinfo;
- if (info->mode > IPT_TTL_MAXMODE) {
- pr_info("TTL: invalid or unknown mode %u\n", info->mode);
+ if (info->mode > IPT_TTL_MAXMODE)
return -EINVAL;
- }
if (info->mode != IPT_TTL_SET && info->ttl == 0)
return -EINVAL;
return 0;
{
const struct ip6t_HL_info *info = par->targinfo;
- if (info->mode > IP6T_HL_MAXMODE) {
- pr_info("invalid or unknown mode %u\n", info->mode);
+ if (info->mode > IP6T_HL_MAXMODE)
return -EINVAL;
- }
- if (info->mode != IP6T_HL_SET && info->hop_limit == 0) {
- pr_info("increment/decrement does not "
- "make sense with value 0\n");
+ if (info->mode != IP6T_HL_SET && info->hop_limit == 0)
return -EINVAL;
- }
return 0;
}
* the Free Software Foundation.
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/module.h>
#include <linux/skbuff.h>
#include <linux/icmp.h>
static int hmark_tg_check(const struct xt_tgchk_param *par)
{
const struct xt_hmark_info *info = par->targinfo;
+ const char *errmsg = "proto mask must be zero with L3 mode";
- if (!info->hmodulus) {
- pr_info("xt_HMARK: hash modulus can't be zero\n");
+ if (!info->hmodulus)
return -EINVAL;
- }
+
if (info->proto_mask &&
- (info->flags & XT_HMARK_FLAG(XT_HMARK_METHOD_L3))) {
- pr_info("xt_HMARK: proto mask must be zero with L3 mode\n");
- return -EINVAL;
- }
+ (info->flags & XT_HMARK_FLAG(XT_HMARK_METHOD_L3)))
+ goto err;
+
if (info->flags & XT_HMARK_FLAG(XT_HMARK_SPI_MASK) &&
(info->flags & (XT_HMARK_FLAG(XT_HMARK_SPORT_MASK) |
- XT_HMARK_FLAG(XT_HMARK_DPORT_MASK)))) {
- pr_info("xt_HMARK: spi-mask and port-mask can't be combined\n");
+ XT_HMARK_FLAG(XT_HMARK_DPORT_MASK))))
return -EINVAL;
- }
+
if (info->flags & XT_HMARK_FLAG(XT_HMARK_SPI) &&
(info->flags & (XT_HMARK_FLAG(XT_HMARK_SPORT) |
XT_HMARK_FLAG(XT_HMARK_DPORT)))) {
- pr_info("xt_HMARK: spi-set and port-set can't be combined\n");
- return -EINVAL;
+ errmsg = "spi-set and port-set can't be combined";
+ goto err;
}
return 0;
+err:
+ pr_info_ratelimited("%s\n", errmsg);
+ return -EINVAL;
}
static struct xt_target hmark_tg_reg[] __read_mostly = {
timer_setup(&info->timer->timer, idletimer_tg_expired, 0);
info->timer->refcnt = 1;
+ INIT_WORK(&info->timer->work, idletimer_tg_work);
+
mod_timer(&info->timer->timer,
msecs_to_jiffies(info->timeout * 1000) + jiffies);
- INIT_WORK(&info->timer->work, idletimer_tg_work);
-
return 0;
out_free_attr:
pr_debug("timeout value is zero\n");
return -EINVAL;
}
-
+ if (info->timeout >= INT_MAX / 1000) {
+ pr_debug("timeout value is too big\n");
+ return -EINVAL;
+ }
if (info->label[0] == '\0' ||
strnlen(info->label,
MAX_IDLETIMER_LABEL_SIZE) == MAX_IDLETIMER_LABEL_SIZE) {
struct xt_led_info_internal *ledinternal;
int err;
- if (ledinfo->id[0] == '\0') {
- pr_info("No 'id' parameter given.\n");
+ if (ledinfo->id[0] == '\0')
return -EINVAL;
- }
mutex_lock(&xt_led_mutex);
err = led_trigger_register(&ledinternal->netfilter_led_trigger);
if (err) {
- pr_err("Trigger name is already in use.\n");
+ pr_info_ratelimited("Trigger name is already in use.\n");
goto exit_alloc;
}
- /* See if we need to set up a timer */
- if (ledinfo->delay > 0)
- timer_setup(&ledinternal->timer, led_timeout_callback, 0);
+ /* Since the letinternal timer can be shared between multiple targets,
+ * always set it up, even if the current target does not need it
+ */
+ timer_setup(&ledinternal->timer, led_timeout_callback, 0);
list_add_tail(&ledinternal->list, &xt_led_triggers);
list_del(&ledinternal->list);
- if (ledinfo->delay > 0)
- del_timer_sync(&ledinternal->timer);
+ del_timer_sync(&ledinternal->timer);
led_trigger_unregister(&ledinternal->netfilter_led_trigger);
*
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/module.h>
#include <linux/skbuff.h>
init_hashrandom(&jhash_initval);
if (info->queues_total == 0) {
- pr_err("NFQUEUE: number of total queues is 0\n");
+ pr_info_ratelimited("number of total queues is 0\n");
return -EINVAL;
}
maxid = info->queues_total - 1 + info->queuenum;
if (maxid > 0xffff) {
- pr_err("NFQUEUE: number of queues (%u) out of range (got %u)\n",
- info->queues_total, maxid);
+ pr_info_ratelimited("number of queues (%u) out of range (got %u)\n",
+ info->queues_total, maxid);
return -ERANGE;
}
if (par->target->revision == 2 && info->flags > 1)
&info->secid);
if (err) {
if (err == -EINVAL)
- pr_info("invalid security context \'%s\'\n", info->secctx);
+ pr_info_ratelimited("invalid security context \'%s\'\n",
+ info->secctx);
return err;
}
if (!info->secid) {
- pr_info("unable to map security context \'%s\'\n", info->secctx);
+ pr_info_ratelimited("unable to map security context \'%s\'\n",
+ info->secctx);
return -ENOENT;
}
err = security_secmark_relabel_packet(info->secid);
if (err) {
- pr_info("unable to obtain relabeling permission\n");
+ pr_info_ratelimited("unable to obtain relabeling permission\n");
return err;
}
if (strcmp(par->table, "mangle") != 0 &&
strcmp(par->table, "security") != 0) {
- pr_info("target only valid in the \'mangle\' "
- "or \'security\' tables, not \'%s\'.\n", par->table);
+ pr_info_ratelimited("only valid in \'mangle\' or \'security\' table, not \'%s\'\n",
+ par->table);
return -EINVAL;
}
if (mode && mode != info->mode) {
- pr_info("mode already set to %hu cannot mix with "
- "rules for mode %hu\n", mode, info->mode);
+ pr_info_ratelimited("mode already set to %hu cannot mix with rules for mode %hu\n",
+ mode, info->mode);
return -EINVAL;
}
case SECMARK_MODE_SEL:
break;
default:
- pr_info("invalid mode: %hu\n", info->mode);
+ pr_info_ratelimited("invalid mode: %hu\n", info->mode);
return -EINVAL;
}
(par->hook_mask & ~((1 << NF_INET_FORWARD) |
(1 << NF_INET_LOCAL_OUT) |
(1 << NF_INET_POST_ROUTING))) != 0) {
- pr_info("path-MTU clamping only supported in "
- "FORWARD, OUTPUT and POSTROUTING hooks\n");
+ pr_info_ratelimited("path-MTU clamping only supported in FORWARD, OUTPUT and POSTROUTING hooks\n");
return -EINVAL;
}
if (par->nft_compat)
xt_ematch_foreach(ematch, e)
if (find_syn_match(ematch))
return 0;
- pr_info("Only works on TCP SYN packets\n");
+ pr_info_ratelimited("Only works on TCP SYN packets\n");
return -EINVAL;
}
(par->hook_mask & ~((1 << NF_INET_FORWARD) |
(1 << NF_INET_LOCAL_OUT) |
(1 << NF_INET_POST_ROUTING))) != 0) {
- pr_info("path-MTU clamping only supported in "
- "FORWARD, OUTPUT and POSTROUTING hooks\n");
+ pr_info_ratelimited("path-MTU clamping only supported in FORWARD, OUTPUT and POSTROUTING hooks\n");
return -EINVAL;
}
if (par->nft_compat)
xt_ematch_foreach(ematch, e)
if (find_syn_match(ematch))
return 0;
- pr_info("Only works on TCP SYN packets\n");
+ pr_info_ratelimited("Only works on TCP SYN packets\n");
return -EINVAL;
}
#endif
!(i->invflags & IP6T_INV_PROTO))
return 0;
- pr_info("Can be used only in combination with "
- "either -p tcp or -p udp\n");
+ pr_info_ratelimited("Can be used only with -p tcp or -p udp\n");
return -EINVAL;
}
#endif
&& !(i->invflags & IPT_INV_PROTO))
return 0;
- pr_info("Can be used only in combination with "
- "either -p tcp or -p udp\n");
+ pr_info_ratelimited("Can be used only with -p tcp or -p udp\n");
return -EINVAL;
}
static int addrtype_mt_checkentry_v1(const struct xt_mtchk_param *par)
{
+ const char *errmsg = "both incoming and outgoing interface limitation cannot be selected";
struct xt_addrtype_info_v1 *info = par->matchinfo;
if (info->flags & XT_ADDRTYPE_LIMIT_IFACE_IN &&
- info->flags & XT_ADDRTYPE_LIMIT_IFACE_OUT) {
- pr_info("both incoming and outgoing "
- "interface limitation cannot be selected\n");
- return -EINVAL;
- }
+ info->flags & XT_ADDRTYPE_LIMIT_IFACE_OUT)
+ goto err;
if (par->hook_mask & ((1 << NF_INET_PRE_ROUTING) |
(1 << NF_INET_LOCAL_IN)) &&
info->flags & XT_ADDRTYPE_LIMIT_IFACE_OUT) {
- pr_info("output interface limitation "
- "not valid in PREROUTING and INPUT\n");
- return -EINVAL;
+ errmsg = "output interface limitation not valid in PREROUTING and INPUT";
+ goto err;
}
if (par->hook_mask & ((1 << NF_INET_POST_ROUTING) |
(1 << NF_INET_LOCAL_OUT)) &&
info->flags & XT_ADDRTYPE_LIMIT_IFACE_IN) {
- pr_info("input interface limitation "
- "not valid in POSTROUTING and OUTPUT\n");
- return -EINVAL;
+ errmsg = "input interface limitation not valid in POSTROUTING and OUTPUT";
+ goto err;
}
#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
if (par->family == NFPROTO_IPV6) {
if ((info->source | info->dest) & XT_ADDRTYPE_BLACKHOLE) {
- pr_err("ipv6 BLACKHOLE matching not supported\n");
- return -EINVAL;
+ errmsg = "ipv6 BLACKHOLE matching not supported";
+ goto err;
}
if ((info->source | info->dest) >= XT_ADDRTYPE_PROHIBIT) {
- pr_err("ipv6 PROHIBIT (THROW, NAT ..) matching not supported\n");
- return -EINVAL;
+ errmsg = "ipv6 PROHIBIT (THROW, NAT ..) matching not supported";
+ goto err;
}
if ((info->source | info->dest) & XT_ADDRTYPE_BROADCAST) {
- pr_err("ipv6 does not support BROADCAST matching\n");
- return -EINVAL;
+ errmsg = "ipv6 does not support BROADCAST matching";
+ goto err;
}
}
#endif
return 0;
+err:
+ pr_info_ratelimited("%s\n", errmsg);
+ return -EINVAL;
}
static struct xt_match addrtype_mt_reg[] __read_mostly = {
* published by the Free Software Foundation.
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/module.h>
#include <linux/syscalls.h>
#include <linux/skbuff.h>
program.filter = insns;
if (bpf_prog_create(ret, &program)) {
- pr_info("bpf: check failed: parse error\n");
+ pr_info_ratelimited("check failed: parse error\n");
return -EINVAL;
}
* published by the Free Software Foundation.
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/skbuff.h>
#include <linux/module.h>
#include <linux/netfilter/x_tables.h>
}
if (info->has_path && info->has_classid) {
- pr_info("xt_cgroup: both path and classid specified\n");
+ pr_info_ratelimited("path and classid specified\n");
return -EINVAL;
}
if (info->has_path) {
cgrp = cgroup_get_from_path(info->path);
if (IS_ERR(cgrp)) {
- pr_info("xt_cgroup: invalid path, errno=%ld\n",
- PTR_ERR(cgrp));
+ pr_info_ratelimited("invalid path, errno=%ld\n",
+ PTR_ERR(cgrp));
return -EINVAL;
}
info->priv = cgrp;
struct xt_cluster_match_info *info = par->matchinfo;
if (info->total_nodes > XT_CLUSTER_NODES_MAX) {
- pr_info("you have exceeded the maximum "
- "number of cluster nodes (%u > %u)\n",
- info->total_nodes, XT_CLUSTER_NODES_MAX);
+ pr_info_ratelimited("you have exceeded the maximum number of cluster nodes (%u > %u)\n",
+ info->total_nodes, XT_CLUSTER_NODES_MAX);
return -EINVAL;
}
if (info->node_mask >= (1ULL << info->total_nodes)) {
- pr_info("this node mask cannot be "
- "higher than the total number of nodes\n");
+ pr_info_ratelimited("node mask cannot exceed total number of nodes\n");
return -EDOM;
}
return 0;
ret = nf_ct_netns_get(par->net, par->family);
if (ret < 0)
- pr_info("cannot load conntrack support for proto=%u\n",
- par->family);
+ pr_info_ratelimited("cannot load conntrack support for proto=%u\n",
+ par->family);
/*
* This filter cannot function correctly unless connection tracking
int ret;
if (info->options & ~options) {
- pr_err("Unknown options in mask %x\n", info->options);
+ pr_info_ratelimited("Unknown options in mask %x\n",
+ info->options);
return -EINVAL;
}
ret = nf_ct_netns_get(par->net, par->family);
if (ret < 0) {
- pr_info("cannot load conntrack support for proto=%u\n",
- par->family);
+ pr_info_ratelimited("cannot load conntrack support for proto=%u\n",
+ par->family);
return ret;
}
ret = nf_ct_netns_get(par->net, par->family);
if (ret < 0)
- pr_info("cannot load conntrack support for proto=%u\n",
- par->family);
+ pr_info_ratelimited("cannot load conntrack support for proto=%u\n",
+ par->family);
return ret;
}
ret = nf_ct_netns_get(par->net, par->family);
if (ret < 0)
- pr_info("cannot load conntrack support for proto=%u\n",
- par->family);
+ pr_info_ratelimited("cannot load conntrack support for proto=%u\n",
+ par->family);
return ret;
}
ret = nf_ct_netns_get(par->net, par->family);
if (ret < 0)
- pr_info("cannot load conntrack support for proto=%u\n",
- par->family);
+ pr_info_ratelimited("cannot load conntrack support for proto=%u\n",
+ par->family);
return ret;
}
{
const struct xt_dscp_info *info = par->matchinfo;
- if (info->dscp > XT_DSCP_MAX) {
- pr_info("dscp %x out of range\n", info->dscp);
+ if (info->dscp > XT_DSCP_MAX)
return -EDOM;
- }
return 0;
}
if (info->operation & (XT_ECN_OP_MATCH_ECE | XT_ECN_OP_MATCH_CWR) &&
(ip->proto != IPPROTO_TCP || ip->invflags & IPT_INV_PROTO)) {
- pr_info("cannot match TCP bits in rule for non-tcp packets\n");
+ pr_info_ratelimited("cannot match TCP bits for non-tcp packets\n");
return -EINVAL;
}
if (info->operation & (XT_ECN_OP_MATCH_ECE | XT_ECN_OP_MATCH_CWR) &&
(ip->proto != IPPROTO_TCP || ip->invflags & IP6T_INV_PROTO)) {
- pr_info("cannot match TCP bits in rule for non-tcp packets\n");
+ pr_info_ratelimited("cannot match TCP bits for non-tcp packets\n");
return -EINVAL;
}
if (user != 0) {
return div64_u64(XT_HASHLIMIT_SCALE_v2, user);
} else {
- pr_warn("invalid rate from userspace: %llu\n", user);
+ pr_info_ratelimited("invalid rate from userspace: %llu\n",
+ user);
return 0;
}
}
if (!dh->rateinfo.prev_window &&
(dh->rateinfo.current_rate <= dh->rateinfo.burst)) {
spin_unlock(&dh->lock);
- rcu_read_unlock_bh();
+ local_bh_enable();
return !(cfg->mode & XT_HASHLIMIT_INVERT);
} else {
goto overlimit;
}
if (cfg->mode & ~XT_HASHLIMIT_ALL) {
- pr_info("Unknown mode mask %X, kernel too old?\n",
- cfg->mode);
+ pr_info_ratelimited("Unknown mode mask %X, kernel too old?\n",
+ cfg->mode);
return -EINVAL;
}
/* Check for overflow. */
if (revision >= 3 && cfg->mode & XT_HASHLIMIT_RATE_MATCH) {
if (cfg->avg == 0 || cfg->avg > U32_MAX) {
- pr_info("hashlimit invalid rate\n");
+ pr_info_ratelimited("invalid rate\n");
return -ERANGE;
}
if (cfg->interval == 0) {
- pr_info("hashlimit invalid interval\n");
+ pr_info_ratelimited("invalid interval\n");
return -EINVAL;
}
} else if (cfg->mode & XT_HASHLIMIT_BYTES) {
if (user2credits_byte(cfg->avg) == 0) {
- pr_info("overflow, rate too high: %llu\n", cfg->avg);
+ pr_info_ratelimited("overflow, rate too high: %llu\n",
+ cfg->avg);
return -EINVAL;
}
} else if (cfg->burst == 0 ||
- user2credits(cfg->avg * cfg->burst, revision) <
- user2credits(cfg->avg, revision)) {
- pr_info("overflow, try lower: %llu/%llu\n",
- cfg->avg, cfg->burst);
- return -ERANGE;
+ user2credits(cfg->avg * cfg->burst, revision) <
+ user2credits(cfg->avg, revision)) {
+ pr_info_ratelimited("overflow, try lower: %llu/%llu\n",
+ cfg->avg, cfg->burst);
+ return -ERANGE;
}
mutex_lock(&hashlimit_mutex);
.exit = hashlimit_net_exit,
.id = &hashlimit_net_id,
.size = sizeof(struct hashlimit_net),
+ .async = true,
};
static int __init hashlimit_mt_init(void)
ret = nf_ct_netns_get(par->net, par->family);
if (ret < 0) {
- pr_info("cannot load conntrack support for proto=%u\n",
- par->family);
+ pr_info_ratelimited("cannot load conntrack support for proto=%u\n",
+ par->family);
return ret;
}
info->name[sizeof(info->name) - 1] = '\0';
/* Must specify no unknown invflags */
if (compinfo->invflags & ~XT_IPCOMP_INV_MASK) {
- pr_err("unknown flags %X\n", compinfo->invflags);
+ pr_info_ratelimited("unknown flags %X\n", compinfo->invflags);
return -EINVAL;
}
return 0;
&& par->family != NFPROTO_IPV6
#endif
) {
- pr_info("protocol family %u not supported\n", par->family);
+ pr_info_ratelimited("protocol family %u not supported\n",
+ par->family);
return -EINVAL;
}
/* Check for invalid flags */
if (info->flags & ~(XT_L2TP_TID | XT_L2TP_SID | XT_L2TP_VERSION |
XT_L2TP_TYPE)) {
- pr_info("unknown flags: %x\n", info->flags);
+ pr_info_ratelimited("unknown flags: %x\n", info->flags);
return -EINVAL;
}
(!(info->flags & XT_L2TP_SID)) &&
((!(info->flags & XT_L2TP_TYPE)) ||
(info->type != XT_L2TP_TYPE_CONTROL))) {
- pr_info("invalid flags combination: %x\n", info->flags);
+ pr_info_ratelimited("invalid flags combination: %x\n",
+ info->flags);
return -EINVAL;
}
*/
if (info->flags & XT_L2TP_VERSION) {
if ((info->version < 2) || (info->version > 3)) {
- pr_info("wrong L2TP version: %u\n", info->version);
+ pr_info_ratelimited("wrong L2TP version: %u\n",
+ info->version);
return -EINVAL;
}
if (info->version == 2) {
if ((info->flags & XT_L2TP_TID) &&
(info->tid > 0xffff)) {
- pr_info("v2 tid > 0xffff: %u\n", info->tid);
+ pr_info_ratelimited("v2 tid > 0xffff: %u\n",
+ info->tid);
return -EINVAL;
}
if ((info->flags & XT_L2TP_SID) &&
(info->sid > 0xffff)) {
- pr_info("v2 sid > 0xffff: %u\n", info->sid);
+ pr_info_ratelimited("v2 sid > 0xffff: %u\n",
+ info->sid);
return -EINVAL;
}
}
if ((ip->proto != IPPROTO_UDP) &&
(ip->proto != IPPROTO_L2TP)) {
- pr_info("missing protocol rule (udp|l2tpip)\n");
+ pr_info_ratelimited("missing protocol rule (udp|l2tpip)\n");
return -EINVAL;
}
if ((ip->proto == IPPROTO_L2TP) &&
(info->version == 2)) {
- pr_info("v2 doesn't support IP mode\n");
+ pr_info_ratelimited("v2 doesn't support IP mode\n");
return -EINVAL;
}
if ((ip->proto != IPPROTO_UDP) &&
(ip->proto != IPPROTO_L2TP)) {
- pr_info("missing protocol rule (udp|l2tpip)\n");
+ pr_info_ratelimited("missing protocol rule (udp|l2tpip)\n");
return -EINVAL;
}
if ((ip->proto == IPPROTO_L2TP) &&
(info->version == 2)) {
- pr_info("v2 doesn't support IP mode\n");
+ pr_info_ratelimited("v2 doesn't support IP mode\n");
return -EINVAL;
}
/* Check for overflow. */
if (r->burst == 0
|| user2credits(r->avg * r->burst) < user2credits(r->avg)) {
- pr_info("Overflow, try lower: %u/%u\n",
- r->avg, r->burst);
+ pr_info_ratelimited("Overflow, try lower: %u/%u\n",
+ r->avg, r->burst);
return -ERANGE;
}
* published by the Free Software Foundation.
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/module.h>
#include <linux/skbuff.h>
#include <linux/netfilter.h>
const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo;
if (mr->rangesize != 1) {
- pr_info("%s: multiple ranges no longer supported\n",
- par->target->name);
+ pr_info_ratelimited("multiple ranges no longer supported\n");
return -EINVAL;
}
return nf_ct_netns_get(par->net, par->family);
* it under the terms of the GNU General Public License version 2 (or any
* later at your option) as published by the Free Software Foundation.
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/module.h>
#include <linux/skbuff.h>
nfacct = nfnl_acct_find_get(par->net, info->name);
if (nfacct == NULL) {
- pr_info("xt_nfacct: accounting object with name `%s' "
- "does not exists\n", info->name);
+ pr_info_ratelimited("accounting object `%s' does not exists\n",
+ info->name);
return -ENOENT;
}
info->nfacct = nfacct;
info->invert & XT_PHYSDEV_OP_BRIDGED) &&
par->hook_mask & ((1 << NF_INET_LOCAL_OUT) |
(1 << NF_INET_FORWARD) | (1 << NF_INET_POST_ROUTING))) {
- pr_info("using --physdev-out and --physdev-is-out are only "
- "supported in the FORWARD and POSTROUTING chains with "
- "bridged traffic.\n");
+ pr_info_ratelimited("--physdev-out and --physdev-is-out only supported in the FORWARD and POSTROUTING chains with bridged traffic\n");
if (par->hook_mask & (1 << NF_INET_LOCAL_OUT))
return -EINVAL;
}
static int policy_mt_check(const struct xt_mtchk_param *par)
{
const struct xt_policy_info *info = par->matchinfo;
+ const char *errmsg = "neither incoming nor outgoing policy selected";
+
+ if (!(info->flags & (XT_POLICY_MATCH_IN|XT_POLICY_MATCH_OUT)))
+ goto err;
- if (!(info->flags & (XT_POLICY_MATCH_IN|XT_POLICY_MATCH_OUT))) {
- pr_info("neither incoming nor outgoing policy selected\n");
- return -EINVAL;
- }
if (par->hook_mask & ((1 << NF_INET_PRE_ROUTING) |
(1 << NF_INET_LOCAL_IN)) && info->flags & XT_POLICY_MATCH_OUT) {
- pr_info("output policy not valid in PREROUTING and INPUT\n");
- return -EINVAL;
+ errmsg = "output policy not valid in PREROUTING and INPUT";
+ goto err;
}
if (par->hook_mask & ((1 << NF_INET_POST_ROUTING) |
(1 << NF_INET_LOCAL_OUT)) && info->flags & XT_POLICY_MATCH_IN) {
- pr_info("input policy not valid in POSTROUTING and OUTPUT\n");
- return -EINVAL;
+ errmsg = "input policy not valid in POSTROUTING and OUTPUT";
+ goto err;
}
if (info->len > XT_POLICY_MAX_ELEM) {
- pr_info("too many policy elements\n");
- return -EINVAL;
+ errmsg = "too many policy elements";
+ goto err;
}
return 0;
+err:
+ pr_info_ratelimited("%s\n", errmsg);
+ return -EINVAL;
}
static struct xt_match policy_mt_reg[] __read_mostly = {
net_get_random_once(&hash_rnd, sizeof(hash_rnd));
if (info->check_set & ~XT_RECENT_VALID_FLAGS) {
- pr_info("Unsupported user space flags (%08x)\n",
- info->check_set);
+ pr_info_ratelimited("Unsupported userspace flags (%08x)\n",
+ info->check_set);
return -EINVAL;
}
if (hweight8(info->check_set &
if ((info->check_set & XT_RECENT_REAP) && !info->seconds)
return -EINVAL;
if (info->hit_count >= XT_RECENT_MAX_NSTAMPS) {
- pr_info("hitcount (%u) is larger than allowed maximum (%u)\n",
- info->hit_count, XT_RECENT_MAX_NSTAMPS - 1);
+ pr_info_ratelimited("hitcount (%u) is larger than allowed maximum (%u)\n",
+ info->hit_count, XT_RECENT_MAX_NSTAMPS - 1);
return -EINVAL;
}
if (info->name[0] == '\0' ||
add = true;
break;
default:
- pr_info("Need \"+ip\", \"-ip\" or \"/\"\n");
+ pr_info_ratelimited("Need \"+ip\", \"-ip\" or \"/\"\n");
return -EINVAL;
}
succ = in4_pton(c, size, (void *)&addr, '\n', NULL);
}
- if (!succ) {
- pr_info("illegal address written to procfs\n");
+ if (!succ)
return -EINVAL;
- }
spin_lock_bh(&recent_lock);
e = recent_entry_lookup(t, &addr, family, 0);
.exit = recent_net_exit,
.id = &recent_net_id,
.size = sizeof(struct recent_net),
+ .async = true,
};
static struct xt_match recent_mt_reg[] __read_mostly = {
index = ip_set_nfnl_get_byindex(par->net, info->match_set.index);
if (index == IPSET_INVALID_ID) {
- pr_warn("Cannot find set identified by id %u to match\n",
- info->match_set.index);
+ pr_info_ratelimited("Cannot find set identified by id %u to match\n",
+ info->match_set.index);
return -ENOENT;
}
if (info->match_set.u.flags[IPSET_DIM_MAX - 1] != 0) {
- pr_warn("Protocol error: set match dimension is over the limit!\n");
+ pr_info_ratelimited("set match dimension is over the limit!\n");
ip_set_nfnl_put(par->net, info->match_set.index);
return -ERANGE;
}
index = ip_set_nfnl_get_byindex(par->net, info->match_set.index);
if (index == IPSET_INVALID_ID) {
- pr_warn("Cannot find set identified by id %u to match\n",
- info->match_set.index);
+ pr_info_ratelimited("Cannot find set identified by id %u to match\n",
+ info->match_set.index);
return -ENOENT;
}
if (info->match_set.dim > IPSET_DIM_MAX) {
- pr_warn("Protocol error: set match dimension is over the limit!\n");
+ pr_info_ratelimited("set match dimension is over the limit!\n");
ip_set_nfnl_put(par->net, info->match_set.index);
return -ERANGE;
}
if (info->add_set.index != IPSET_INVALID_ID) {
index = ip_set_nfnl_get_byindex(par->net, info->add_set.index);
if (index == IPSET_INVALID_ID) {
- pr_warn("Cannot find add_set index %u as target\n",
- info->add_set.index);
+ pr_info_ratelimited("Cannot find add_set index %u as target\n",
+ info->add_set.index);
return -ENOENT;
}
}
if (info->del_set.index != IPSET_INVALID_ID) {
index = ip_set_nfnl_get_byindex(par->net, info->del_set.index);
if (index == IPSET_INVALID_ID) {
- pr_warn("Cannot find del_set index %u as target\n",
- info->del_set.index);
+ pr_info_ratelimited("Cannot find del_set index %u as target\n",
+ info->del_set.index);
if (info->add_set.index != IPSET_INVALID_ID)
ip_set_nfnl_put(par->net, info->add_set.index);
return -ENOENT;
}
if (info->add_set.u.flags[IPSET_DIM_MAX - 1] != 0 ||
info->del_set.u.flags[IPSET_DIM_MAX - 1] != 0) {
- pr_warn("Protocol error: SET target dimension is over the limit!\n");
+ pr_info_ratelimited("SET target dimension over the limit!\n");
if (info->add_set.index != IPSET_INVALID_ID)
ip_set_nfnl_put(par->net, info->add_set.index);
if (info->del_set.index != IPSET_INVALID_ID)
if (info->add_set.index != IPSET_INVALID_ID) {
index = ip_set_nfnl_get_byindex(par->net, info->add_set.index);
if (index == IPSET_INVALID_ID) {
- pr_warn("Cannot find add_set index %u as target\n",
- info->add_set.index);
+ pr_info_ratelimited("Cannot find add_set index %u as target\n",
+ info->add_set.index);
return -ENOENT;
}
}
if (info->del_set.index != IPSET_INVALID_ID) {
index = ip_set_nfnl_get_byindex(par->net, info->del_set.index);
if (index == IPSET_INVALID_ID) {
- pr_warn("Cannot find del_set index %u as target\n",
- info->del_set.index);
+ pr_info_ratelimited("Cannot find del_set index %u as target\n",
+ info->del_set.index);
if (info->add_set.index != IPSET_INVALID_ID)
ip_set_nfnl_put(par->net, info->add_set.index);
return -ENOENT;
}
if (info->add_set.dim > IPSET_DIM_MAX ||
info->del_set.dim > IPSET_DIM_MAX) {
- pr_warn("Protocol error: SET target dimension is over the limit!\n");
+ pr_info_ratelimited("SET target dimension over the limit!\n");
if (info->add_set.index != IPSET_INVALID_ID)
ip_set_nfnl_put(par->net, info->add_set.index);
if (info->del_set.index != IPSET_INVALID_ID)
index = ip_set_nfnl_get_byindex(par->net,
info->add_set.index);
if (index == IPSET_INVALID_ID) {
- pr_warn("Cannot find add_set index %u as target\n",
- info->add_set.index);
+ pr_info_ratelimited("Cannot find add_set index %u as target\n",
+ info->add_set.index);
return -ENOENT;
}
}
index = ip_set_nfnl_get_byindex(par->net,
info->del_set.index);
if (index == IPSET_INVALID_ID) {
- pr_warn("Cannot find del_set index %u as target\n",
- info->del_set.index);
+ pr_info_ratelimited("Cannot find del_set index %u as target\n",
+ info->del_set.index);
if (info->add_set.index != IPSET_INVALID_ID)
ip_set_nfnl_put(par->net,
info->add_set.index);
if (info->map_set.index != IPSET_INVALID_ID) {
if (strncmp(par->table, "mangle", 7)) {
- pr_warn("--map-set only usable from mangle table\n");
+ pr_info_ratelimited("--map-set only usable from mangle table\n");
return -EINVAL;
}
if (((info->flags & IPSET_FLAG_MAP_SKBPRIO) |
!(par->hook_mask & (1 << NF_INET_FORWARD |
1 << NF_INET_LOCAL_OUT |
1 << NF_INET_POST_ROUTING))) {
- pr_warn("mapping of prio or/and queue is allowed only from OUTPUT/FORWARD/POSTROUTING chains\n");
+ pr_info_ratelimited("mapping of prio or/and queue is allowed only from OUTPUT/FORWARD/POSTROUTING chains\n");
return -EINVAL;
}
index = ip_set_nfnl_get_byindex(par->net,
info->map_set.index);
if (index == IPSET_INVALID_ID) {
- pr_warn("Cannot find map_set index %u as target\n",
- info->map_set.index);
+ pr_info_ratelimited("Cannot find map_set index %u as target\n",
+ info->map_set.index);
if (info->add_set.index != IPSET_INVALID_ID)
ip_set_nfnl_put(par->net,
info->add_set.index);
if (info->add_set.dim > IPSET_DIM_MAX ||
info->del_set.dim > IPSET_DIM_MAX ||
info->map_set.dim > IPSET_DIM_MAX) {
- pr_warn("Protocol error: SET target dimension is over the limit!\n");
+ pr_info_ratelimited("SET target dimension over the limit!\n");
if (info->add_set.index != IPSET_INVALID_ID)
ip_set_nfnl_put(par->net, info->add_set.index);
if (info->del_set.index != IPSET_INVALID_ID)
return err;
if (info->flags & ~XT_SOCKET_FLAGS_V1) {
- pr_info("unknown flags 0x%x\n", info->flags & ~XT_SOCKET_FLAGS_V1);
+ pr_info_ratelimited("unknown flags 0x%x\n",
+ info->flags & ~XT_SOCKET_FLAGS_V1);
return -EINVAL;
}
return 0;
return err;
if (info->flags & ~XT_SOCKET_FLAGS_V2) {
- pr_info("unknown flags 0x%x\n", info->flags & ~XT_SOCKET_FLAGS_V2);
+ pr_info_ratelimited("unknown flags 0x%x\n",
+ info->flags & ~XT_SOCKET_FLAGS_V2);
return -EINVAL;
}
return 0;
if (err)
return err;
if (info->flags & ~XT_SOCKET_FLAGS_V3) {
- pr_info("unknown flags 0x%x\n",
- info->flags & ~XT_SOCKET_FLAGS_V3);
+ pr_info_ratelimited("unknown flags 0x%x\n",
+ info->flags & ~XT_SOCKET_FLAGS_V3);
return -EINVAL;
}
return 0;
ret = nf_ct_netns_get(par->net, par->family);
if (ret < 0)
- pr_info("cannot load conntrack support for proto=%u\n",
- par->family);
+ pr_info_ratelimited("cannot load conntrack support for proto=%u\n",
+ par->family);
return ret;
}
if (info->daytime_start > XT_TIME_MAX_DAYTIME ||
info->daytime_stop > XT_TIME_MAX_DAYTIME) {
- pr_info("invalid argument - start or "
- "stop time greater than 23:59:59\n");
+ pr_info_ratelimited("invalid argument - start or stop time greater than 23:59:59\n");
return -EDOM;
}
if (info->flags & ~XT_TIME_ALL_FLAGS) {
- pr_info("unknown flags 0x%x\n", info->flags & ~XT_TIME_ALL_FLAGS);
+ pr_info_ratelimited("unknown flags 0x%x\n",
+ info->flags & ~XT_TIME_ALL_FLAGS);
return -EINVAL;
}
.exit = netlink_tap_exit_net,
.id = &netlink_tap_net_id,
.size = sizeof(struct netlink_tap_net),
+ .async = true,
};
static bool netlink_filter_tap(const struct sk_buff *skb)
}
static int netlink_getname(struct socket *sock, struct sockaddr *addr,
- int *addr_len, int peer)
+ int peer)
{
struct sock *sk = sock->sk;
struct netlink_sock *nlk = nlk_sk(sk);
nladdr->nl_family = AF_NETLINK;
nladdr->nl_pad = 0;
- *addr_len = sizeof(*nladdr);
if (peer) {
nladdr->nl_pid = nlk->dst_portid;
nladdr->nl_groups = nlk->groups ? nlk->groups[0] : 0;
netlink_unlock_table();
}
- return 0;
+ return sizeof(*nladdr);
}
static int netlink_ioctl(struct socket *sock, unsigned int cmd,
if (cb->start) {
ret = cb->start(cb);
if (ret)
- goto error_unlock;
+ goto error_put;
}
nlk->cb_running = true;
*/
return -EINTR;
+error_put:
+ module_put(control->module);
error_unlock:
sock_put(sk);
mutex_unlock(nlk->cb_mutex);
static struct pernet_operations __net_initdata netlink_net_ops = {
.init = netlink_net_init,
.exit = netlink_net_exit,
+ .async = true,
};
static inline u32 netlink_hash(const void *data, u32 len, u32 seed)
static struct pernet_operations genl_pernet_ops = {
.init = genl_pernet_init,
.exit = genl_pernet_exit,
+ .async = true,
};
static int __init genl_init(void)
}
static int nr_getname(struct socket *sock, struct sockaddr *uaddr,
- int *uaddr_len, int peer)
+ int peer)
{
struct full_sockaddr_ax25 *sax = (struct full_sockaddr_ax25 *)uaddr;
struct sock *sk = sock->sk;
struct nr_sock *nr = nr_sk(sk);
+ int uaddr_len;
memset(&sax->fsa_ax25, 0, sizeof(struct sockaddr_ax25));
sax->fsa_ax25.sax25_call = nr->user_addr;
memset(sax->fsa_digipeater, 0, sizeof(sax->fsa_digipeater));
sax->fsa_digipeater[0] = nr->dest_addr;
- *uaddr_len = sizeof(struct full_sockaddr_ax25);
+ uaddr_len = sizeof(struct full_sockaddr_ax25);
} else {
sax->fsa_ax25.sax25_family = AF_NETROM;
sax->fsa_ax25.sax25_ndigis = 0;
sax->fsa_ax25.sax25_call = nr->source_addr;
- *uaddr_len = sizeof(struct sockaddr_ax25);
+ uaddr_len = sizeof(struct sockaddr_ax25);
}
release_sock(sk);
- return 0;
+ return uaddr_len;
}
int nr_rx_frame(struct sk_buff *skb, struct net_device *dev)
pr_debug("uri: %s, len: %zu\n", uri, uri_len);
+ /* sdreq->tlv_len is u8, takes uri_len, + 3 for header, + 1 for NULL */
+ if (WARN_ON_ONCE(uri_len > U8_MAX - 4))
+ return NULL;
+
sdreq = kzalloc(sizeof(struct nfc_llcp_sdp_tlv), GFP_KERNEL);
if (sdreq == NULL)
return NULL;
}
static int llcp_sock_getname(struct socket *sock, struct sockaddr *uaddr,
- int *len, int peer)
+ int peer)
{
struct sock *sk = sock->sk;
struct nfc_llcp_sock *llcp_sock = nfc_llcp_sock(sk);
llcp_sock->dsap, llcp_sock->ssap);
memset(llcp_addr, 0, sizeof(*llcp_addr));
- *len = sizeof(struct sockaddr_nfc_llcp);
lock_sock(sk);
if (!llcp_sock->dev) {
llcp_addr->service_name_len);
release_sock(sk);
- return 0;
+ return sizeof(struct sockaddr_nfc_llcp);
}
static inline __poll_t llcp_accept_poll(struct sock *parent)
};
static const struct nla_policy nfc_sdp_genl_policy[NFC_SDP_ATTR_MAX + 1] = {
- [NFC_SDP_ATTR_URI] = { .type = NLA_STRING },
+ [NFC_SDP_ATTR_URI] = { .type = NLA_STRING,
+ .len = U8_MAX - 4 },
[NFC_SDP_ATTR_SAP] = { .type = NLA_U8 },
};
}
static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr,
- int *uaddr_len, int peer)
+ int peer)
{
struct net_device *dev;
struct sock *sk = sock->sk;
if (dev)
strlcpy(uaddr->sa_data, dev->name, sizeof(uaddr->sa_data));
rcu_read_unlock();
- *uaddr_len = sizeof(*uaddr);
- return 0;
+ return sizeof(*uaddr);
}
static int packet_getname(struct socket *sock, struct sockaddr *uaddr,
- int *uaddr_len, int peer)
+ int peer)
{
struct net_device *dev;
struct sock *sk = sock->sk;
sll->sll_halen = 0;
}
rcu_read_unlock();
- *uaddr_len = offsetof(struct sockaddr_ll, sll_addr) + sll->sll_halen;
- return 0;
+ return offsetof(struct sockaddr_ll, sll_addr) + sll->sll_halen;
}
static int packet_dev_mc(struct net_device *dev, struct packet_mclist *i,
static struct pernet_operations packet_net_ops = {
.init = packet_net_init,
.exit = packet_net_exit,
+ .async = true,
};
.exit = phonet_exit_net,
.id = &phonet_net_id,
.size = sizeof(struct phonet_net),
+ .async = true,
};
/* Initialize Phonet devices list */
}
static int pn_socket_getname(struct socket *sock, struct sockaddr *addr,
- int *sockaddr_len, int peer)
+ int peer)
{
struct sock *sk = sock->sk;
struct pn_sock *pn = pn_sk(sk);
pn_sockaddr_set_object((struct sockaddr_pn *)addr,
pn->sobject);
- *sockaddr_len = sizeof(struct sockaddr_pn);
- return 0;
+ return sizeof(struct sockaddr_pn);
}
static __poll_t pn_socket_poll(struct file *file, struct socket *sock,
}
static int qrtr_getname(struct socket *sock, struct sockaddr *saddr,
- int *len, int peer)
+ int peer)
{
struct qrtr_sock *ipc = qrtr_sk(sock->sk);
struct sockaddr_qrtr qaddr;
}
release_sock(sk);
- *len = sizeof(qaddr);
qaddr.sq_family = AF_QIPCRTR;
memcpy(saddr, &qaddr, sizeof(qaddr));
- return 0;
+ return sizeof(qaddr);
}
static int qrtr_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
rds_send_drop_to(rs, NULL);
rds_rdma_drop_keys(rs);
rds_notify_queue_get(rs, NULL);
+ __skb_queue_purge(&rs->rs_zcookie_queue);
spin_lock_bh(&rds_sock_lock);
list_del_init(&rs->rs_item);
}
static int rds_getname(struct socket *sock, struct sockaddr *uaddr,
- int *uaddr_len, int peer)
+ int peer)
{
struct sockaddr_in *sin = (struct sockaddr_in *)uaddr;
struct rds_sock *rs = rds_sk_to_rs(sock->sk);
sin->sin_family = AF_INET;
- *uaddr_len = sizeof(*sin);
- return 0;
+ return sizeof(*sin);
}
/*
* - to signal that a previously congested destination may have become
* uncongested
* - A notification has been queued to the socket (this can be a congestion
- * update, or a RDMA completion).
+ * update, or a RDMA completion, or a MSG_ZEROCOPY completion).
*
* EPOLLOUT is asserted if there is room on the send queue. This does not mean
* however, that the next sendmsg() call will succeed. If the application tries
spin_unlock(&rs->rs_lock);
}
if (!list_empty(&rs->rs_recv_queue) ||
- !list_empty(&rs->rs_notify_queue))
+ !list_empty(&rs->rs_notify_queue) ||
+ !skb_queue_empty(&rs->rs_zcookie_queue))
mask |= (EPOLLIN | EPOLLRDNORM);
if (rs->rs_snd_bytes < rds_sk_sndbuf(rs))
mask |= (EPOLLOUT | EPOLLWRNORM);
+ if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
+ mask |= POLLERR;
read_unlock_irqrestore(&rs->rs_recv_lock, flags);
/* clear state any time we wake a seen-congested socket */
INIT_LIST_HEAD(&rs->rs_recv_queue);
INIT_LIST_HEAD(&rs->rs_notify_queue);
INIT_LIST_HEAD(&rs->rs_cong_list);
+ skb_queue_head_init(&rs->rs_zcookie_queue);
spin_lock_init(&rs->rs_rdma_lock);
rs->rs_rdma_keys = RB_ROOT;
rs->rs_rx_traces = 0;
if (rds_destroy_pending(conn))
ret = -ENETDOWN;
else
- ret = trans->conn_alloc(conn, gfp);
+ ret = trans->conn_alloc(conn, GFP_ATOMIC);
if (ret) {
rcu_read_unlock();
kfree(conn->c_path);
#include <linux/kernel.h>
#include <linux/slab.h>
#include <linux/export.h>
+#include <linux/skbuff.h>
+#include <linux/list.h>
+#include <linux/errqueue.h>
#include "rds.h"
}
EXPORT_SYMBOL_GPL(rds_message_addref);
+static inline bool skb_zcookie_add(struct sk_buff *skb, u32 cookie)
+{
+ struct rds_zcopy_cookies *ck = (struct rds_zcopy_cookies *)skb->cb;
+ int ncookies = ck->num;
+
+ if (ncookies == RDS_MAX_ZCOOKIES)
+ return false;
+ ck->cookies[ncookies] = cookie;
+ ck->num = ++ncookies;
+ return true;
+}
+
+static void rds_rm_zerocopy_callback(struct rds_sock *rs,
+ struct rds_znotifier *znotif)
+{
+ struct sk_buff *skb, *tail;
+ unsigned long flags;
+ struct sk_buff_head *q;
+ u32 cookie = znotif->z_cookie;
+ struct rds_zcopy_cookies *ck;
+
+ q = &rs->rs_zcookie_queue;
+ spin_lock_irqsave(&q->lock, flags);
+ tail = skb_peek_tail(q);
+
+ if (tail && skb_zcookie_add(tail, cookie)) {
+ spin_unlock_irqrestore(&q->lock, flags);
+ mm_unaccount_pinned_pages(&znotif->z_mmp);
+ consume_skb(rds_skb_from_znotifier(znotif));
+ /* caller invokes rds_wake_sk_sleep() */
+ return;
+ }
+
+ skb = rds_skb_from_znotifier(znotif);
+ ck = (struct rds_zcopy_cookies *)skb->cb;
+ memset(ck, 0, sizeof(*ck));
+ WARN_ON(!skb_zcookie_add(skb, cookie));
+
+ __skb_queue_tail(q, skb);
+
+ spin_unlock_irqrestore(&q->lock, flags);
+ /* caller invokes rds_wake_sk_sleep() */
+
+ mm_unaccount_pinned_pages(&znotif->z_mmp);
+}
+
/*
* This relies on dma_map_sg() not touching sg[].page during merging.
*/
static void rds_message_purge(struct rds_message *rm)
{
- unsigned long i;
+ unsigned long i, flags;
+ bool zcopy = false;
if (unlikely(test_bit(RDS_MSG_PAGEVEC, &rm->m_flags)))
return;
+ spin_lock_irqsave(&rm->m_rs_lock, flags);
+ if (rm->m_rs) {
+ struct rds_sock *rs = rm->m_rs;
+
+ if (rm->data.op_mmp_znotifier) {
+ zcopy = true;
+ rds_rm_zerocopy_callback(rs, rm->data.op_mmp_znotifier);
+ rds_wake_sk_sleep(rs);
+ rm->data.op_mmp_znotifier = NULL;
+ }
+ sock_put(rds_rs_to_sk(rs));
+ rm->m_rs = NULL;
+ }
+ spin_unlock_irqrestore(&rm->m_rs_lock, flags);
+
for (i = 0; i < rm->data.op_nents; i++) {
- rdsdebug("putting data page %p\n", (void *)sg_page(&rm->data.op_sg[i]));
/* XXX will have to put_page for page refs */
- __free_page(sg_page(&rm->data.op_sg[i]));
+ if (!zcopy)
+ __free_page(sg_page(&rm->data.op_sg[i]));
+ else
+ put_page(sg_page(&rm->data.op_sg[i]));
}
rm->data.op_nents = 0;
return rm;
}
-int rds_message_copy_from_user(struct rds_message *rm, struct iov_iter *from)
+int rds_message_copy_from_user(struct rds_message *rm, struct iov_iter *from,
+ bool zcopy)
{
unsigned long to_copy, nbytes;
unsigned long sg_off;
struct scatterlist *sg;
int ret = 0;
+ int length = iov_iter_count(from);
rm->m_inc.i_hdr.h_len = cpu_to_be32(iov_iter_count(from));
sg = rm->data.op_sg;
sg_off = 0; /* Dear gcc, sg->page will be null from kzalloc. */
+ if (zcopy) {
+ int total_copied = 0;
+ struct sk_buff *skb;
+
+ skb = alloc_skb(0, GFP_KERNEL);
+ if (!skb)
+ return -ENOMEM;
+ BUILD_BUG_ON(sizeof(skb->cb) <
+ max_t(int, sizeof(struct rds_znotifier),
+ sizeof(struct rds_zcopy_cookies)));
+ rm->data.op_mmp_znotifier = RDS_ZCOPY_SKB(skb);
+ if (mm_account_pinned_pages(&rm->data.op_mmp_znotifier->z_mmp,
+ length)) {
+ ret = -ENOMEM;
+ goto err;
+ }
+ while (iov_iter_count(from)) {
+ struct page *pages;
+ size_t start;
+ ssize_t copied;
+
+ copied = iov_iter_get_pages(from, &pages, PAGE_SIZE,
+ 1, &start);
+ if (copied < 0) {
+ struct mmpin *mmp;
+ int i;
+
+ for (i = 0; i < rm->data.op_nents; i++)
+ put_page(sg_page(&rm->data.op_sg[i]));
+ mmp = &rm->data.op_mmp_znotifier->z_mmp;
+ mm_unaccount_pinned_pages(mmp);
+ ret = -EFAULT;
+ goto err;
+ }
+ total_copied += copied;
+ iov_iter_advance(from, copied);
+ length -= copied;
+ sg_set_page(sg, pages, copied, start);
+ rm->data.op_nents++;
+ sg++;
+ }
+ WARN_ON_ONCE(length != 0);
+ return ret;
+err:
+ consume_skb(skb);
+ rm->data.op_mmp_znotifier = NULL;
+ return ret;
+ } /* zcopy */
+
while (iov_iter_count(from)) {
if (!sg_page(sg)) {
ret = rds_page_remainder_alloc(sg, iov_iter_count(from),
#define RDS_MSG_PAGEVEC 7
#define RDS_MSG_FLUSH 8
+struct rds_znotifier {
+ struct list_head z_list;
+ struct mmpin z_mmp;
+ u32 z_cookie;
+};
+
+#define RDS_ZCOPY_SKB(__skb) ((struct rds_znotifier *)&((__skb)->cb[0]))
+
+static inline struct sk_buff *rds_skb_from_znotifier(struct rds_znotifier *z)
+{
+ return container_of((void *)z, struct sk_buff, cb);
+}
+
struct rds_message {
refcount_t m_refcount;
struct list_head m_sock_item;
unsigned int op_count;
unsigned int op_dmasg;
unsigned int op_dmaoff;
+ struct rds_znotifier *op_mmp_znotifier;
struct scatterlist *op_sg;
} data;
};
/* Socket receive path trace points*/
u8 rs_rx_traces;
u8 rs_rx_trace[RDS_MSG_RX_DGRAM_TRACE_MAX];
+
+ struct sk_buff_head rs_zcookie_queue;
};
static inline struct rds_sock *rds_sk_to_rs(const struct sock *sk)
/* message.c */
struct rds_message *rds_message_alloc(unsigned int nents, gfp_t gfp);
struct scatterlist *rds_message_alloc_sgs(struct rds_message *rm, int nents);
-int rds_message_copy_from_user(struct rds_message *rm, struct iov_iter *from);
+int rds_message_copy_from_user(struct rds_message *rm, struct iov_iter *from,
+ bool zcopy);
struct rds_message *rds_message_map_pages(unsigned long *page_addrs, unsigned int total_len);
void rds_message_populate_header(struct rds_header *hdr, __be16 sport,
__be16 dport, u64 seq);
return ret;
}
+static bool rds_recvmsg_zcookie(struct rds_sock *rs, struct msghdr *msg)
+{
+ struct sk_buff *skb;
+ struct sk_buff_head *q = &rs->rs_zcookie_queue;
+ struct rds_zcopy_cookies *done;
+
+ if (!msg->msg_control)
+ return false;
+
+ if (!sock_flag(rds_rs_to_sk(rs), SOCK_ZEROCOPY) ||
+ msg->msg_controllen < CMSG_SPACE(sizeof(*done)))
+ return false;
+
+ skb = skb_dequeue(q);
+ if (!skb)
+ return false;
+ done = (struct rds_zcopy_cookies *)skb->cb;
+ if (put_cmsg(msg, SOL_RDS, RDS_CMSG_ZCOPY_COMPLETION, sizeof(*done),
+ done)) {
+ skb_queue_head(q, skb);
+ return false;
+ }
+ consume_skb(skb);
+ return true;
+}
+
int rds_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
int msg_flags)
{
if (msg_flags & MSG_OOB)
goto out;
+ if (msg_flags & MSG_ERRQUEUE)
+ return sock_recv_errqueue(sk, msg, size, SOL_IP, IP_RECVERR);
while (1) {
/* If there are pending notifications, do those - and nothing else */
if (!rds_next_incoming(rs, &inc)) {
if (nonblock) {
- ret = -EAGAIN;
+ bool reaped = rds_recvmsg_zcookie(rs, msg);
+
+ ret = reaped ? 0 : -EAGAIN;
break;
}
ret = -EFAULT;
goto out;
}
+ rds_recvmsg_zcookie(rs, msg);
rds_stats_inc(s_recv_delivered);
rm->rdma.op_notifier = NULL;
}
was_on_sock = 1;
- rm->m_rs = NULL;
}
spin_unlock(&rs->rs_lock);
*/
if (!test_and_clear_bit(RDS_MSG_ON_CONN, &rm->m_flags)) {
spin_unlock_irqrestore(&cp->cp_lock, flags);
- spin_lock_irqsave(&rm->m_rs_lock, flags);
- rm->m_rs = NULL;
- spin_unlock_irqrestore(&rm->m_rs_lock, flags);
continue;
}
list_del_init(&rm->m_conn_item);
__rds_send_complete(rs, rm, RDS_RDMA_CANCELED);
spin_unlock(&rs->rs_lock);
- rm->m_rs = NULL;
spin_unlock_irqrestore(&rm->m_rs_lock, flags);
rds_message_put(rm);
__rds_send_complete(rs, rm, RDS_RDMA_CANCELED);
spin_unlock(&rs->rs_lock);
- rm->m_rs = NULL;
spin_unlock_irqrestore(&rm->m_rs_lock, flags);
rds_message_put(rm);
list_add_tail(&rm->m_sock_item, &rs->rs_send_queue);
set_bit(RDS_MSG_ON_SOCK, &rm->m_flags);
rds_message_addref(rm);
+ sock_hold(rds_rs_to_sk(rs));
rm->m_rs = rs;
/* The code ordering is a little weird, but we're
* rds_message is getting to be quite complicated, and we'd like to allocate
* it all in one go. This figures out how big it needs to be up front.
*/
-static int rds_rm_size(struct msghdr *msg, int data_len)
+static int rds_rm_size(struct msghdr *msg, int num_sgs)
{
struct cmsghdr *cmsg;
int size = 0;
int cmsg_groups = 0;
int retval;
+ bool zcopy_cookie = false;
for_each_cmsghdr(cmsg, msg) {
if (!CMSG_OK(msg, cmsg))
break;
+ case RDS_CMSG_ZCOPY_COOKIE:
+ zcopy_cookie = true;
+ /* fall through */
+
case RDS_CMSG_RDMA_DEST:
case RDS_CMSG_RDMA_MAP:
cmsg_groups |= 2;
}
- size += ceil(data_len, PAGE_SIZE) * sizeof(struct scatterlist);
+ if ((msg->msg_flags & MSG_ZEROCOPY) && !zcopy_cookie)
+ return -EINVAL;
+
+ size += num_sgs * sizeof(struct scatterlist);
/* Ensure (DEST, MAP) are never used with (ARGS, ATOMIC) */
if (cmsg_groups == 3)
return size;
}
+static int rds_cmsg_zcopy(struct rds_sock *rs, struct rds_message *rm,
+ struct cmsghdr *cmsg)
+{
+ u32 *cookie;
+
+ if (cmsg->cmsg_len < CMSG_LEN(sizeof(*cookie)) ||
+ !rm->data.op_mmp_znotifier)
+ return -EINVAL;
+ cookie = CMSG_DATA(cmsg);
+ rm->data.op_mmp_znotifier->z_cookie = *cookie;
+ return 0;
+}
+
static int rds_cmsg_send(struct rds_sock *rs, struct rds_message *rm,
struct msghdr *msg, int *allocated_mr)
{
ret = rds_cmsg_atomic(rs, rm, cmsg);
break;
+ case RDS_CMSG_ZCOPY_COOKIE:
+ ret = rds_cmsg_zcopy(rs, rm, cmsg);
+ break;
+
default:
return -EINVAL;
}
long timeo = sock_sndtimeo(sk, nonblock);
struct rds_conn_path *cpath;
size_t total_payload_len = payload_len, rdma_payload_len = 0;
+ bool zcopy = ((msg->msg_flags & MSG_ZEROCOPY) &&
+ sock_flag(rds_rs_to_sk(rs), SOCK_ZEROCOPY));
+ int num_sgs = ceil(payload_len, PAGE_SIZE);
/* Mirror Linux UDP mirror of BSD error message compatibility */
/* XXX: Perhaps MSG_MORE someday */
- if (msg->msg_flags & ~(MSG_DONTWAIT | MSG_CMSG_COMPAT)) {
+ if (msg->msg_flags & ~(MSG_DONTWAIT | MSG_CMSG_COMPAT | MSG_ZEROCOPY)) {
ret = -EOPNOTSUPP;
goto out;
}
goto out;
}
+ if (zcopy) {
+ if (rs->rs_transport->t_type != RDS_TRANS_TCP) {
+ ret = -EOPNOTSUPP;
+ goto out;
+ }
+ num_sgs = iov_iter_npages(&msg->msg_iter, INT_MAX);
+ }
/* size of rm including all sgs */
- ret = rds_rm_size(msg, payload_len);
+ ret = rds_rm_size(msg, num_sgs);
if (ret < 0)
goto out;
/* Attach data to the rm */
if (payload_len) {
- rm->data.op_sg = rds_message_alloc_sgs(rm, ceil(payload_len, PAGE_SIZE));
+ rm->data.op_sg = rds_message_alloc_sgs(rm, num_sgs);
if (!rm->data.op_sg) {
ret = -ENOMEM;
goto out;
}
- ret = rds_message_copy_from_user(rm, &msg->msg_iter);
+ ret = rds_message_copy_from_user(rm, &msg->msg_iter, zcopy);
if (ret)
goto out;
}
struct rds_tcp_connection *tc;
unsigned long flags;
struct sockaddr_in sin;
- int sinlen;
struct socket *sock;
spin_lock_irqsave(&rds_tcp_tc_list_lock, flags);
sock = tc->t_sock;
if (sock) {
- sock->ops->getname(sock, (struct sockaddr *)&sin,
- &sinlen, 0);
+ sock->ops->getname(sock, (struct sockaddr *)&sin, 0);
tsinfo.local_addr = sin.sin_addr.s_addr;
tsinfo.local_port = sin.sin_port;
- sock->ops->getname(sock, (struct sockaddr *)&sin,
- &sinlen, 1);
+ sock->ops->getname(sock, (struct sockaddr *)&sin, 1);
tsinfo.peer_addr = sin.sin_addr.s_addr;
tsinfo.peer_port = sin.sin_port;
}
}
static int rose_getname(struct socket *sock, struct sockaddr *uaddr,
- int *uaddr_len, int peer)
+ int peer)
{
struct full_sockaddr_rose *srose = (struct full_sockaddr_rose *)uaddr;
struct sock *sk = sock->sk;
srose->srose_digis[n] = rose->source_digis[n];
}
- *uaddr_len = sizeof(struct full_sockaddr_rose);
- return 0;
+ return sizeof(struct full_sockaddr_rose);
}
int rose_rx_call_request(struct sk_buff *skb, struct net_device *dev, struct rose_neigh *neigh, unsigned int lci)
(char *)&opt, sizeof(opt));
if (ret == 0) {
ret = kernel_sendmsg(conn->params.local->socket, &msg,
- iov, 1, iov[0].iov_len);
+ iov, 2, len);
opt = IPV6_PMTUDISC_DO;
kernel_setsockopt(conn->params.local->socket,
ret = put_cmsg(msg, SOL_RXRPC, RXRPC_USER_CALL_ID,
sizeof(unsigned int), &id32);
} else {
+ unsigned long idl = call->user_call_ID;
+
ret = put_cmsg(msg, SOL_RXRPC, RXRPC_USER_CALL_ID,
- sizeof(unsigned long),
- &call->user_call_ID);
+ sizeof(unsigned long), &idl);
}
if (ret < 0)
goto error_unlock_call;
To compile this code as a module, choose M here: the
module will be called em_ipset.
+config NET_EMATCH_IPT
+ tristate "IPtables Matches"
+ depends on NET_EMATCH && NETFILTER && NETFILTER_XTABLES
+ ---help---
+ Say Y here to be able to classify packets based on iptables
+ matches.
+ Current supported match is "policy" which allows packet classification
+ based on IPsec policy that was used during decapsulation
+
+ To compile this code as a module, choose M here: the
+ module will be called em_ipt.
+
config NET_CLS_ACT
bool "Actions"
select NET_CLS
obj-$(CONFIG_NET_EMATCH_TEXT) += em_text.o
obj-$(CONFIG_NET_EMATCH_CANID) += em_canid.o
obj-$(CONFIG_NET_EMATCH_IPSET) += em_ipset.o
+obj-$(CONFIG_NET_EMATCH_IPT) += em_ipt.o
int tcf_generic_walker(struct tc_action_net *tn, struct sk_buff *skb,
struct netlink_callback *cb, int type,
- const struct tc_action_ops *ops)
+ const struct tc_action_ops *ops,
+ struct netlink_ext_ack *extack)
{
struct tcf_idrinfo *idrinfo = tn->idrinfo;
} else if (type == RTM_GETACTION) {
return tcf_dump_walker(idrinfo, skb, cb);
} else {
- WARN(1, "tcf_generic_walker: unknown action %d\n", type);
+ WARN(1, "tcf_generic_walker: unknown command %d\n", type);
+ NL_SET_ERR_MSG(extack, "tcf_generic_walker: unknown command");
return -EINVAL;
}
}
struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
struct nlattr *nla, struct nlattr *est,
- char *name, int ovr, int bind)
+ char *name, int ovr, int bind,
+ struct netlink_ext_ack *extack)
{
struct tc_action *a;
struct tc_action_ops *a_o;
int err;
if (name == NULL) {
- err = nla_parse_nested(tb, TCA_ACT_MAX, nla, NULL, NULL);
+ err = nla_parse_nested(tb, TCA_ACT_MAX, nla, NULL, extack);
if (err < 0)
goto err_out;
err = -EINVAL;
kind = tb[TCA_ACT_KIND];
- if (kind == NULL)
+ if (!kind) {
+ NL_SET_ERR_MSG(extack, "TC action kind must be specified");
goto err_out;
- if (nla_strlcpy(act_name, kind, IFNAMSIZ) >= IFNAMSIZ)
+ }
+ if (nla_strlcpy(act_name, kind, IFNAMSIZ) >= IFNAMSIZ) {
+ NL_SET_ERR_MSG(extack, "TC action name too long");
goto err_out;
+ }
if (tb[TCA_ACT_COOKIE]) {
int cklen = nla_len(tb[TCA_ACT_COOKIE]);
- if (cklen > TC_COOKIE_MAX_SIZE)
+ if (cklen > TC_COOKIE_MAX_SIZE) {
+ NL_SET_ERR_MSG(extack, "TC cookie size above the maximum");
goto err_out;
+ }
cookie = nla_memdup_cookie(tb);
if (!cookie) {
+ NL_SET_ERR_MSG(extack, "No memory to generate TC cookie");
err = -ENOMEM;
goto err_out;
}
}
} else {
- err = -EINVAL;
- if (strlcpy(act_name, name, IFNAMSIZ) >= IFNAMSIZ)
+ if (strlcpy(act_name, name, IFNAMSIZ) >= IFNAMSIZ) {
+ NL_SET_ERR_MSG(extack, "TC action name too long");
+ err = -EINVAL;
goto err_out;
+ }
}
a_o = tc_lookup_action_n(act_name);
goto err_mod;
}
#endif
+ NL_SET_ERR_MSG(extack, "Failed to load TC action module");
err = -ENOENT;
goto err_out;
}
/* backward compatibility for policer */
if (name == NULL)
- err = a_o->init(net, tb[TCA_ACT_OPTIONS], est, &a, ovr, bind);
+ err = a_o->init(net, tb[TCA_ACT_OPTIONS], est, &a, ovr, bind,
+ extack);
else
- err = a_o->init(net, nla, est, &a, ovr, bind);
+ err = a_o->init(net, nla, est, &a, ovr, bind, extack);
if (err < 0)
goto err_mod;
list_add_tail(&a->list, &actions);
tcf_action_destroy(&actions, bind);
+ NL_SET_ERR_MSG(extack, "Failed to init TC action chain");
return ERR_PTR(err);
}
}
int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla,
struct nlattr *est, char *name, int ovr, int bind,
- struct list_head *actions)
+ struct list_head *actions, struct netlink_ext_ack *extack)
{
struct nlattr *tb[TCA_ACT_MAX_PRIO + 1];
struct tc_action *act;
int err;
int i;
- err = nla_parse_nested(tb, TCA_ACT_MAX_PRIO, nla, NULL, NULL);
+ err = nla_parse_nested(tb, TCA_ACT_MAX_PRIO, nla, NULL, extack);
if (err < 0)
return err;
for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) {
- act = tcf_action_init_1(net, tp, tb[i], est, name, ovr, bind);
+ act = tcf_action_init_1(net, tp, tb[i], est, name, ovr, bind,
+ extack);
if (IS_ERR(act)) {
err = PTR_ERR(act);
goto err;
t->tca__pad2 = 0;
nest = nla_nest_start(skb, TCA_ACT_TAB);
- if (nest == NULL)
+ if (!nest)
goto out_nlmsg_trim;
if (tcf_action_dump(skb, actions, bind, ref) < 0)
static int
tcf_get_notify(struct net *net, u32 portid, struct nlmsghdr *n,
- struct list_head *actions, int event)
+ struct list_head *actions, int event,
+ struct netlink_ext_ack *extack)
{
struct sk_buff *skb;
return -ENOBUFS;
if (tca_get_fill(skb, actions, portid, n->nlmsg_seq, 0, event,
0, 0) <= 0) {
+ NL_SET_ERR_MSG(extack, "Failed to fill netlink attributes while adding TC action");
kfree_skb(skb);
return -EINVAL;
}
}
static struct tc_action *tcf_action_get_1(struct net *net, struct nlattr *nla,
- struct nlmsghdr *n, u32 portid)
+ struct nlmsghdr *n, u32 portid,
+ struct netlink_ext_ack *extack)
{
struct nlattr *tb[TCA_ACT_MAX + 1];
const struct tc_action_ops *ops;
int index;
int err;
- err = nla_parse_nested(tb, TCA_ACT_MAX, nla, NULL, NULL);
+ err = nla_parse_nested(tb, TCA_ACT_MAX, nla, NULL, extack);
if (err < 0)
goto err_out;
err = -EINVAL;
if (tb[TCA_ACT_INDEX] == NULL ||
- nla_len(tb[TCA_ACT_INDEX]) < sizeof(index))
+ nla_len(tb[TCA_ACT_INDEX]) < sizeof(index)) {
+ NL_SET_ERR_MSG(extack, "Invalid TC action index value");
goto err_out;
+ }
index = nla_get_u32(tb[TCA_ACT_INDEX]);
err = -EINVAL;
ops = tc_lookup_action(tb[TCA_ACT_KIND]);
- if (!ops) /* could happen in batch of actions */
+ if (!ops) { /* could happen in batch of actions */
+ NL_SET_ERR_MSG(extack, "Specified TC action not found");
goto err_out;
+ }
err = -ENOENT;
- if (ops->lookup(net, &a, index) == 0)
+ if (ops->lookup(net, &a, index, extack) == 0)
goto err_mod;
module_put(ops->owner);
}
static int tca_action_flush(struct net *net, struct nlattr *nla,
- struct nlmsghdr *n, u32 portid)
+ struct nlmsghdr *n, u32 portid,
+ struct netlink_ext_ack *extack)
{
struct sk_buff *skb;
unsigned char *b;
int err = -ENOMEM;
skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
- if (!skb) {
- pr_debug("tca_action_flush: failed skb alloc\n");
+ if (!skb)
return err;
- }
b = skb_tail_pointer(skb);
- err = nla_parse_nested(tb, TCA_ACT_MAX, nla, NULL, NULL);
+ err = nla_parse_nested(tb, TCA_ACT_MAX, nla, NULL, extack);
if (err < 0)
goto err_out;
err = -EINVAL;
kind = tb[TCA_ACT_KIND];
ops = tc_lookup_action(kind);
- if (!ops) /*some idjot trying to flush unknown action */
+ if (!ops) { /*some idjot trying to flush unknown action */
+ NL_SET_ERR_MSG(extack, "Cannot flush unknown TC action");
goto err_out;
+ }
nlh = nlmsg_put(skb, portid, n->nlmsg_seq, RTM_DELACTION,
sizeof(*t), 0);
- if (!nlh)
+ if (!nlh) {
+ NL_SET_ERR_MSG(extack, "Failed to create TC action flush notification");
goto out_module_put;
+ }
t = nlmsg_data(nlh);
t->tca_family = AF_UNSPEC;
t->tca__pad1 = 0;
t->tca__pad2 = 0;
nest = nla_nest_start(skb, TCA_ACT_TAB);
- if (nest == NULL)
+ if (!nest) {
+ NL_SET_ERR_MSG(extack, "Failed to add new netlink message");
goto out_module_put;
+ }
- err = ops->walk(net, skb, &dcb, RTM_DELACTION, ops);
- if (err <= 0)
+ err = ops->walk(net, skb, &dcb, RTM_DELACTION, ops, extack);
+ if (err <= 0) {
+ nla_nest_cancel(skb, nest);
goto out_module_put;
+ }
nla_nest_end(skb, nest);
n->nlmsg_flags & NLM_F_ECHO);
if (err > 0)
return 0;
+ if (err < 0)
+ NL_SET_ERR_MSG(extack, "Failed to send TC action flush notification");
return err;
static int
tcf_del_notify(struct net *net, struct nlmsghdr *n, struct list_head *actions,
- u32 portid)
+ u32 portid, struct netlink_ext_ack *extack)
{
int ret;
struct sk_buff *skb;
if (tca_get_fill(skb, actions, portid, n->nlmsg_seq, 0, RTM_DELACTION,
0, 1) <= 0) {
+ NL_SET_ERR_MSG(extack, "Failed to fill netlink TC action attributes");
kfree_skb(skb);
return -EINVAL;
}
/* now do the delete */
ret = tcf_action_destroy(actions, 0);
if (ret < 0) {
+ NL_SET_ERR_MSG(extack, "Failed to delete TC action");
kfree_skb(skb);
return ret;
}
static int
tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n,
- u32 portid, int event)
+ u32 portid, int event, struct netlink_ext_ack *extack)
{
int i, ret;
struct nlattr *tb[TCA_ACT_MAX_PRIO + 1];
struct tc_action *act;
LIST_HEAD(actions);
- ret = nla_parse_nested(tb, TCA_ACT_MAX_PRIO, nla, NULL, NULL);
+ ret = nla_parse_nested(tb, TCA_ACT_MAX_PRIO, nla, NULL, extack);
if (ret < 0)
return ret;
if (event == RTM_DELACTION && n->nlmsg_flags & NLM_F_ROOT) {
- if (tb[1] != NULL)
- return tca_action_flush(net, tb[1], n, portid);
- else
- return -EINVAL;
+ if (tb[1])
+ return tca_action_flush(net, tb[1], n, portid, extack);
+
+ NL_SET_ERR_MSG(extack, "Invalid netlink attributes while flushing TC action");
+ return -EINVAL;
}
for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) {
- act = tcf_action_get_1(net, tb[i], n, portid);
+ act = tcf_action_get_1(net, tb[i], n, portid, extack);
if (IS_ERR(act)) {
ret = PTR_ERR(act);
goto err;
}
if (event == RTM_GETACTION)
- ret = tcf_get_notify(net, portid, n, &actions, event);
+ ret = tcf_get_notify(net, portid, n, &actions, event, extack);
else { /* delete */
- ret = tcf_del_notify(net, n, &actions, portid);
+ ret = tcf_del_notify(net, n, &actions, portid, extack);
if (ret)
goto err;
return ret;
static int
tcf_add_notify(struct net *net, struct nlmsghdr *n, struct list_head *actions,
- u32 portid)
+ u32 portid, struct netlink_ext_ack *extack)
{
struct sk_buff *skb;
int err = 0;
if (tca_get_fill(skb, actions, portid, n->nlmsg_seq, n->nlmsg_flags,
RTM_NEWACTION, 0, 0) <= 0) {
+ NL_SET_ERR_MSG(extack, "Failed to fill netlink attributes while deleting TC action");
kfree_skb(skb);
return -EINVAL;
}
}
static int tcf_action_add(struct net *net, struct nlattr *nla,
- struct nlmsghdr *n, u32 portid, int ovr)
+ struct nlmsghdr *n, u32 portid, int ovr,
+ struct netlink_ext_ack *extack)
{
int ret = 0;
LIST_HEAD(actions);
- ret = tcf_action_init(net, NULL, nla, NULL, NULL, ovr, 0, &actions);
+ ret = tcf_action_init(net, NULL, nla, NULL, NULL, ovr, 0, &actions,
+ extack);
if (ret)
return ret;
- return tcf_add_notify(net, n, &actions, portid);
+ return tcf_add_notify(net, n, &actions, portid, extack);
}
static u32 tcaa_root_flags_allowed = TCA_FLAG_LARGE_DUMP_ON;
return ret;
if (tca[TCA_ACT_TAB] == NULL) {
- pr_notice("tc_ctl_action: received NO action attribs\n");
+ NL_SET_ERR_MSG(extack, "Netlink action attributes missing");
return -EINVAL;
}
if (n->nlmsg_flags & NLM_F_REPLACE)
ovr = 1;
replay:
- ret = tcf_action_add(net, tca[TCA_ACT_TAB], n, portid, ovr);
+ ret = tcf_action_add(net, tca[TCA_ACT_TAB], n, portid, ovr,
+ extack);
if (ret == -EAGAIN)
goto replay;
break;
case RTM_DELACTION:
ret = tca_action_gd(net, tca[TCA_ACT_TAB], n,
- portid, RTM_DELACTION);
+ portid, RTM_DELACTION, extack);
break;
case RTM_GETACTION:
ret = tca_action_gd(net, tca[TCA_ACT_TAB], n,
- portid, RTM_GETACTION);
+ portid, RTM_GETACTION, extack);
break;
default:
BUG();
if (nest == NULL)
goto out_module_put;
- ret = a_o->walk(net, skb, cb, RTM_GETACTION, a_o);
+ ret = a_o->walk(net, skb, cb, RTM_GETACTION, a_o, NULL);
if (ret < 0)
goto out_module_put;
.exit = tcf_action_net_exit,
.id = &tcf_action_net_id,
.size = sizeof(struct tcf_action_net),
+ .async = true,
};
static int __init tc_action_init(void)
static int tcf_bpf_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **act,
- int replace, int bind)
+ int replace, int bind, struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, bpf_net_id);
struct nlattr *tb[TCA_ACT_BPF_MAX + 1];
static int tcf_bpf_walker(struct net *net, struct sk_buff *skb,
struct netlink_callback *cb, int type,
- const struct tc_action_ops *ops)
+ const struct tc_action_ops *ops,
+ struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, bpf_net_id);
- return tcf_generic_walker(tn, skb, cb, type, ops);
+ return tcf_generic_walker(tn, skb, cb, type, ops, extack);
}
-static int tcf_bpf_search(struct net *net, struct tc_action **a, u32 index)
+static int tcf_bpf_search(struct net *net, struct tc_action **a, u32 index,
+ struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, bpf_net_id);
.exit_batch = bpf_exit_net,
.id = &bpf_net_id,
.size = sizeof(struct tc_action_net),
+ .async = true,
};
static int __init bpf_init_module(void)
static int tcf_connmark_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
- int ovr, int bind)
+ int ovr, int bind,
+ struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, connmark_net_id);
struct nlattr *tb[TCA_CONNMARK_MAX + 1];
static int tcf_connmark_walker(struct net *net, struct sk_buff *skb,
struct netlink_callback *cb, int type,
- const struct tc_action_ops *ops)
+ const struct tc_action_ops *ops,
+ struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, connmark_net_id);
- return tcf_generic_walker(tn, skb, cb, type, ops);
+ return tcf_generic_walker(tn, skb, cb, type, ops, extack);
}
-static int tcf_connmark_search(struct net *net, struct tc_action **a, u32 index)
+static int tcf_connmark_search(struct net *net, struct tc_action **a, u32 index,
+ struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, connmark_net_id);
.exit_batch = connmark_exit_net,
.id = &connmark_net_id,
.size = sizeof(struct tc_action_net),
+ .async = true,
};
static int __init connmark_init_module(void)
static int tcf_csum_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a, int ovr,
- int bind)
+ int bind, struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, csum_net_id);
struct tcf_csum_params *params_old, *params_new;
static int tcf_csum_walker(struct net *net, struct sk_buff *skb,
struct netlink_callback *cb, int type,
- const struct tc_action_ops *ops)
+ const struct tc_action_ops *ops,
+ struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, csum_net_id);
- return tcf_generic_walker(tn, skb, cb, type, ops);
+ return tcf_generic_walker(tn, skb, cb, type, ops, extack);
}
-static int tcf_csum_search(struct net *net, struct tc_action **a, u32 index)
+static int tcf_csum_search(struct net *net, struct tc_action **a, u32 index,
+ struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, csum_net_id);
.exit_batch = csum_exit_net,
.id = &csum_net_id,
.size = sizeof(struct tc_action_net),
+ .async = true,
};
MODULE_DESCRIPTION("Checksum updating actions");
static int tcf_gact_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
- int ovr, int bind)
+ int ovr, int bind, struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, gact_net_id);
struct nlattr *tb[TCA_GACT_MAX + 1];
static int tcf_gact_walker(struct net *net, struct sk_buff *skb,
struct netlink_callback *cb, int type,
- const struct tc_action_ops *ops)
+ const struct tc_action_ops *ops,
+ struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, gact_net_id);
- return tcf_generic_walker(tn, skb, cb, type, ops);
+ return tcf_generic_walker(tn, skb, cb, type, ops, extack);
}
-static int tcf_gact_search(struct net *net, struct tc_action **a, u32 index)
+static int tcf_gact_search(struct net *net, struct tc_action **a, u32 index,
+ struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, gact_net_id);
.exit_batch = gact_exit_net,
.id = &gact_net_id,
.size = sizeof(struct tc_action_net),
+ .async = true,
};
MODULE_AUTHOR("Jamal Hadi Salim(2002-4)");
static int tcf_ife_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
- int ovr, int bind)
+ int ovr, int bind, struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, ife_net_id);
struct nlattr *tb[TCA_IFE_MAX + 1];
static int tcf_ife_walker(struct net *net, struct sk_buff *skb,
struct netlink_callback *cb, int type,
- const struct tc_action_ops *ops)
+ const struct tc_action_ops *ops,
+ struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, ife_net_id);
- return tcf_generic_walker(tn, skb, cb, type, ops);
+ return tcf_generic_walker(tn, skb, cb, type, ops, extack);
}
-static int tcf_ife_search(struct net *net, struct tc_action **a, u32 index)
+static int tcf_ife_search(struct net *net, struct tc_action **a, u32 index,
+ struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, ife_net_id);
.exit_batch = ife_exit_net,
.id = &ife_net_id,
.size = sizeof(struct tc_action_net),
+ .async = true,
};
static int __init ife_init_module(void)
static int tcf_ipt_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a, int ovr,
- int bind)
+ int bind, struct netlink_ext_ack *extack)
{
return __tcf_ipt_init(net, ipt_net_id, nla, est, a, &act_ipt_ops, ovr,
bind);
static int tcf_xt_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a, int ovr,
- int bind)
+ int bind, struct netlink_ext_ack *extack)
{
return __tcf_ipt_init(net, xt_net_id, nla, est, a, &act_xt_ops, ovr,
bind);
static int tcf_ipt_walker(struct net *net, struct sk_buff *skb,
struct netlink_callback *cb, int type,
- const struct tc_action_ops *ops)
+ const struct tc_action_ops *ops,
+ struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, ipt_net_id);
- return tcf_generic_walker(tn, skb, cb, type, ops);
+ return tcf_generic_walker(tn, skb, cb, type, ops, extack);
}
-static int tcf_ipt_search(struct net *net, struct tc_action **a, u32 index)
+static int tcf_ipt_search(struct net *net, struct tc_action **a, u32 index,
+ struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, ipt_net_id);
.exit_batch = ipt_exit_net,
.id = &ipt_net_id,
.size = sizeof(struct tc_action_net),
+ .async = true,
};
static int tcf_xt_walker(struct net *net, struct sk_buff *skb,
struct netlink_callback *cb, int type,
- const struct tc_action_ops *ops)
+ const struct tc_action_ops *ops,
+ struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, xt_net_id);
- return tcf_generic_walker(tn, skb, cb, type, ops);
+ return tcf_generic_walker(tn, skb, cb, type, ops, extack);
}
-static int tcf_xt_search(struct net *net, struct tc_action **a, u32 index)
+static int tcf_xt_search(struct net *net, struct tc_action **a, u32 index,
+ struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, xt_net_id);
.exit_batch = xt_exit_net,
.id = &xt_net_id,
.size = sizeof(struct tc_action_net),
+ .async = true,
};
MODULE_AUTHOR("Jamal Hadi Salim(2002-13)");
static int tcf_mirred_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a, int ovr,
- int bind)
+ int bind, struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, mirred_net_id);
struct nlattr *tb[TCA_MIRRED_MAX + 1];
bool exists = false;
int ret;
- if (nla == NULL)
+ if (!nla) {
+ NL_SET_ERR_MSG_MOD(extack, "Mirred requires attributes to be passed");
return -EINVAL;
- ret = nla_parse_nested(tb, TCA_MIRRED_MAX, nla, mirred_policy, NULL);
+ }
+ ret = nla_parse_nested(tb, TCA_MIRRED_MAX, nla, mirred_policy, extack);
if (ret < 0)
return ret;
- if (tb[TCA_MIRRED_PARMS] == NULL)
+ if (!tb[TCA_MIRRED_PARMS]) {
+ NL_SET_ERR_MSG_MOD(extack, "Missing required mirred parameters");
return -EINVAL;
+ }
parm = nla_data(tb[TCA_MIRRED_PARMS]);
exists = tcf_idr_check(tn, parm->index, a, bind);
default:
if (exists)
tcf_idr_release(*a, bind);
+ NL_SET_ERR_MSG_MOD(extack, "Unknown mirred option");
return -EINVAL;
}
if (parm->ifindex) {
}
if (!exists) {
- if (dev == NULL)
+ if (!dev) {
+ NL_SET_ERR_MSG_MOD(extack, "Specified device does not exist");
return -EINVAL;
+ }
ret = tcf_idr_create(tn, parm->index, est, a,
&act_mirred_ops, bind, true);
if (ret)
static int tcf_mirred_walker(struct net *net, struct sk_buff *skb,
struct netlink_callback *cb, int type,
- const struct tc_action_ops *ops)
+ const struct tc_action_ops *ops,
+ struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, mirred_net_id);
- return tcf_generic_walker(tn, skb, cb, type, ops);
+ return tcf_generic_walker(tn, skb, cb, type, ops, extack);
}
-static int tcf_mirred_search(struct net *net, struct tc_action **a, u32 index)
+static int tcf_mirred_search(struct net *net, struct tc_action **a, u32 index,
+ struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, mirred_net_id);
.exit_batch = mirred_exit_net,
.id = &mirred_net_id,
.size = sizeof(struct tc_action_net),
+ .async = true,
};
MODULE_AUTHOR("Jamal Hadi Salim(2002)");
};
static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est,
- struct tc_action **a, int ovr, int bind)
+ struct tc_action **a, int ovr, int bind,
+ struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, nat_net_id);
struct nlattr *tb[TCA_NAT_MAX + 1];
static int tcf_nat_walker(struct net *net, struct sk_buff *skb,
struct netlink_callback *cb, int type,
- const struct tc_action_ops *ops)
+ const struct tc_action_ops *ops,
+ struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, nat_net_id);
- return tcf_generic_walker(tn, skb, cb, type, ops);
+ return tcf_generic_walker(tn, skb, cb, type, ops, extack);
}
-static int tcf_nat_search(struct net *net, struct tc_action **a, u32 index)
+static int tcf_nat_search(struct net *net, struct tc_action **a, u32 index,
+ struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, nat_net_id);
.exit_batch = nat_exit_net,
.id = &nat_net_id,
.size = sizeof(struct tc_action_net),
+ .async = true,
};
MODULE_DESCRIPTION("Stateless NAT actions");
static int tcf_pedit_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
- int ovr, int bind)
+ int ovr, int bind, struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, pedit_net_id);
struct nlattr *tb[TCA_PEDIT_MAX + 1];
static int tcf_pedit_walker(struct net *net, struct sk_buff *skb,
struct netlink_callback *cb, int type,
- const struct tc_action_ops *ops)
+ const struct tc_action_ops *ops,
+ struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, pedit_net_id);
- return tcf_generic_walker(tn, skb, cb, type, ops);
+ return tcf_generic_walker(tn, skb, cb, type, ops, extack);
}
-static int tcf_pedit_search(struct net *net, struct tc_action **a, u32 index)
+static int tcf_pedit_search(struct net *net, struct tc_action **a, u32 index,
+ struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, pedit_net_id);
.exit_batch = pedit_exit_net,
.id = &pedit_net_id,
.size = sizeof(struct tc_action_net),
+ .async = true,
};
MODULE_AUTHOR("Jamal Hadi Salim(2002-4)");
static int tcf_act_police_walker(struct net *net, struct sk_buff *skb,
struct netlink_callback *cb, int type,
- const struct tc_action_ops *ops)
+ const struct tc_action_ops *ops,
+ struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, police_net_id);
- return tcf_generic_walker(tn, skb, cb, type, ops);
+ return tcf_generic_walker(tn, skb, cb, type, ops, extack);
}
static const struct nla_policy police_policy[TCA_POLICE_MAX + 1] = {
static int tcf_act_police_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
- int ovr, int bind)
+ int ovr, int bind,
+ struct netlink_ext_ack *extack)
{
int ret = 0, err;
struct nlattr *tb[TCA_POLICE_MAX + 1];
return -1;
}
-static int tcf_police_search(struct net *net, struct tc_action **a, u32 index)
+static int tcf_police_search(struct net *net, struct tc_action **a, u32 index,
+ struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, police_net_id);
.exit_batch = police_exit_net,
.id = &police_net_id,
.size = sizeof(struct tc_action_net),
+ .async = true,
};
static int __init police_init_module(void)
static int tcf_sample_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a, int ovr,
- int bind)
+ int bind, struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, sample_net_id);
struct nlattr *tb[TCA_SAMPLE_MAX + 1];
static int tcf_sample_walker(struct net *net, struct sk_buff *skb,
struct netlink_callback *cb, int type,
- const struct tc_action_ops *ops)
+ const struct tc_action_ops *ops,
+ struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, sample_net_id);
- return tcf_generic_walker(tn, skb, cb, type, ops);
+ return tcf_generic_walker(tn, skb, cb, type, ops, extack);
}
-static int tcf_sample_search(struct net *net, struct tc_action **a, u32 index)
+static int tcf_sample_search(struct net *net, struct tc_action **a, u32 index,
+ struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, sample_net_id);
.exit_batch = sample_exit_net,
.id = &sample_net_id,
.size = sizeof(struct tc_action_net),
+ .async = true,
};
static int __init sample_init_module(void)
static int tcf_simp_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
- int ovr, int bind)
+ int ovr, int bind, struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, simp_net_id);
struct nlattr *tb[TCA_DEF_MAX + 1];
static int tcf_simp_walker(struct net *net, struct sk_buff *skb,
struct netlink_callback *cb, int type,
- const struct tc_action_ops *ops)
+ const struct tc_action_ops *ops,
+ struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, simp_net_id);
- return tcf_generic_walker(tn, skb, cb, type, ops);
+ return tcf_generic_walker(tn, skb, cb, type, ops, extack);
}
-static int tcf_simp_search(struct net *net, struct tc_action **a, u32 index)
+static int tcf_simp_search(struct net *net, struct tc_action **a, u32 index,
+ struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, simp_net_id);
.exit_batch = simp_exit_net,
.id = &simp_net_id,
.size = sizeof(struct tc_action_net),
+ .async = true,
};
MODULE_AUTHOR("Jamal Hadi Salim(2005)");
static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
- int ovr, int bind)
+ int ovr, int bind, struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, skbedit_net_id);
struct nlattr *tb[TCA_SKBEDIT_MAX + 1];
static int tcf_skbedit_walker(struct net *net, struct sk_buff *skb,
struct netlink_callback *cb, int type,
- const struct tc_action_ops *ops)
+ const struct tc_action_ops *ops,
+ struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, skbedit_net_id);
- return tcf_generic_walker(tn, skb, cb, type, ops);
+ return tcf_generic_walker(tn, skb, cb, type, ops, extack);
}
-static int tcf_skbedit_search(struct net *net, struct tc_action **a, u32 index)
+static int tcf_skbedit_search(struct net *net, struct tc_action **a, u32 index,
+ struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, skbedit_net_id);
.exit_batch = skbedit_exit_net,
.id = &skbedit_net_id,
.size = sizeof(struct tc_action_net),
+ .async = true,
};
MODULE_AUTHOR("Alexander Duyck, <alexander.h.duyck@intel.com>");
static int tcf_skbmod_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
- int ovr, int bind)
+ int ovr, int bind, struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, skbmod_net_id);
struct nlattr *tb[TCA_SKBMOD_MAX + 1];
static int tcf_skbmod_walker(struct net *net, struct sk_buff *skb,
struct netlink_callback *cb, int type,
- const struct tc_action_ops *ops)
+ const struct tc_action_ops *ops,
+ struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, skbmod_net_id);
- return tcf_generic_walker(tn, skb, cb, type, ops);
+ return tcf_generic_walker(tn, skb, cb, type, ops, extack);
}
-static int tcf_skbmod_search(struct net *net, struct tc_action **a, u32 index)
+static int tcf_skbmod_search(struct net *net, struct tc_action **a, u32 index,
+ struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, skbmod_net_id);
.exit_batch = skbmod_exit_net,
.id = &skbmod_net_id,
.size = sizeof(struct tc_action_net),
+ .async = true,
};
MODULE_AUTHOR("Jamal Hadi Salim, <jhs@mojatatu.com>");
static int tunnel_key_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
- int ovr, int bind)
+ int ovr, int bind, struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, tunnel_key_net_id);
struct nlattr *tb[TCA_TUNNEL_KEY_MAX + 1];
static int tunnel_key_walker(struct net *net, struct sk_buff *skb,
struct netlink_callback *cb, int type,
- const struct tc_action_ops *ops)
+ const struct tc_action_ops *ops,
+ struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, tunnel_key_net_id);
- return tcf_generic_walker(tn, skb, cb, type, ops);
+ return tcf_generic_walker(tn, skb, cb, type, ops, extack);
}
-static int tunnel_key_search(struct net *net, struct tc_action **a, u32 index)
+static int tunnel_key_search(struct net *net, struct tc_action **a, u32 index,
+ struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, tunnel_key_net_id);
.exit_batch = tunnel_key_exit_net,
.id = &tunnel_key_net_id,
.size = sizeof(struct tc_action_net),
+ .async = true,
};
static int __init tunnel_key_init_module(void)
static int tcf_vlan_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
- int ovr, int bind)
+ int ovr, int bind, struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, vlan_net_id);
struct nlattr *tb[TCA_VLAN_MAX + 1];
static int tcf_vlan_walker(struct net *net, struct sk_buff *skb,
struct netlink_callback *cb, int type,
- const struct tc_action_ops *ops)
+ const struct tc_action_ops *ops,
+ struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, vlan_net_id);
- return tcf_generic_walker(tn, skb, cb, type, ops);
+ return tcf_generic_walker(tn, skb, cb, type, ops, extack);
}
-static int tcf_vlan_search(struct net *net, struct tc_action **a, u32 index)
+static int tcf_vlan_search(struct net *net, struct tc_action **a, u32 index,
+ struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, vlan_net_id);
.exit_batch = vlan_exit_net,
.id = &vlan_net_id,
.size = sizeof(struct tc_action_net),
+ .async = true,
};
static int __init vlan_init_module(void)
static unsigned int tcf_net_id;
static int tcf_block_insert(struct tcf_block *block, struct net *net,
- u32 block_index, struct netlink_ext_ack *extack)
+ struct netlink_ext_ack *extack)
{
struct tcf_net *tn = net_generic(net, tcf_net_id);
- int err;
- err = idr_alloc_u32(&tn->idr, block, &block_index, block_index,
- GFP_KERNEL);
- if (err)
- return err;
- block->index = block_index;
- return 0;
+ return idr_alloc_u32(&tn->idr, block, &block->index, block->index,
+ GFP_KERNEL);
}
static void tcf_block_remove(struct tcf_block *block, struct net *net)
}
static struct tcf_block *tcf_block_create(struct net *net, struct Qdisc *q,
+ u32 block_index,
struct netlink_ext_ack *extack)
{
struct tcf_block *block;
err = -ENOMEM;
goto err_chain_create;
}
- block->net = qdisc_net(q);
block->refcnt = 1;
block->net = net;
- block->q = q;
+ block->index = block_index;
+
+ /* Don't store q pointer for blocks which are shared */
+ if (!tcf_block_shared(block))
+ block->q = q;
return block;
err_chain_create:
}
if (!block) {
- block = tcf_block_create(net, q, extack);
+ block = tcf_block_create(net, q, ei->block_index, extack);
if (IS_ERR(block))
return PTR_ERR(block);
created = true;
- if (ei->block_index) {
- err = tcf_block_insert(block, net,
- ei->block_index, extack);
+ if (tcf_block_shared(block)) {
+ err = tcf_block_insert(block, net, extack);
if (err)
goto err_block_insert;
}
nla_get_u32(tca[TCA_CHAIN]) != chain->index)
continue;
if (!tcf_chain_dump(chain, q, parent, skb, cb,
- index_start, &index))
+ index_start, &index)) {
+ err = -EMSGSIZE;
break;
+ }
}
cb->args[0] = index;
out:
+ /* If we did no progress, the error (EMSGSIZE) is real */
+ if (skb->len == 0 && err)
+ return err;
return skb->len;
}
if (exts->police && tb[exts->police]) {
act = tcf_action_init_1(net, tp, tb[exts->police],
rate_tlv, "police", ovr,
- TCA_ACT_BIND);
+ TCA_ACT_BIND, extack);
if (IS_ERR(act))
return PTR_ERR(act);
err = tcf_action_init(net, tp, tb[exts->action],
rate_tlv, NULL, ovr, TCA_ACT_BIND,
- &actions);
+ &actions, extack);
if (err)
return err;
list_for_each_entry(act, &actions, list)
.exit = tcf_net_exit,
.id = &tcf_net_id,
.size = sizeof(struct tcf_net),
+ .async = true,
};
static int __init tc_filter_init(void)
struct tc_u_common {
struct tc_u_hnode __rcu *hlist;
- struct tcf_block *block;
+ void *ptr;
int refcnt;
struct idr handle_idr;
struct hlist_node hnode;
#define U32_HASH_SHIFT 10
#define U32_HASH_SIZE (1 << U32_HASH_SHIFT)
+static void *tc_u_common_ptr(const struct tcf_proto *tp)
+{
+ struct tcf_block *block = tp->chain->block;
+
+ /* The block sharing is currently supported only
+ * for classless qdiscs. In that case we use block
+ * for tc_u_common identification. In case the
+ * block is not shared, block->q is a valid pointer
+ * and we can use that. That works for classful qdiscs.
+ */
+ if (tcf_block_shared(block))
+ return block;
+ else
+ return block->q;
+}
+
static unsigned int tc_u_hash(const struct tcf_proto *tp)
{
- return hash_ptr(tp->chain->block, U32_HASH_SHIFT);
+ return hash_ptr(tc_u_common_ptr(tp), U32_HASH_SHIFT);
}
static struct tc_u_common *tc_u_common_find(const struct tcf_proto *tp)
h = tc_u_hash(tp);
hlist_for_each_entry(tc, &tc_u_common_hash[h], hnode) {
- if (tc->block == tp->chain->block)
+ if (tc->ptr == tc_u_common_ptr(tp))
return tc;
}
return NULL;
kfree(root_ht);
return -ENOBUFS;
}
- tp_c->block = tp->chain->block;
+ tp_c->ptr = tc_u_common_ptr(tp);
INIT_HLIST_NODE(&tp_c->hnode);
idr_init(&tp_c->handle_idr);
--- /dev/null
+/*
+ * net/sched/em_ipt.c IPtables matches Ematch
+ *
+ * (c) 2018 Eyal Birger <eyal.birger@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/gfp.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/skbuff.h>
+#include <linux/tc_ematch/tc_em_ipt.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv6/ip6_tables.h>
+#include <net/pkt_cls.h>
+
+struct em_ipt_match {
+ const struct xt_match *match;
+ u32 hook;
+ u8 match_data[0] __aligned(8);
+};
+
+struct em_ipt_xt_match {
+ char *match_name;
+ int (*validate_match_data)(struct nlattr **tb, u8 mrev);
+};
+
+static const struct nla_policy em_ipt_policy[TCA_EM_IPT_MAX + 1] = {
+ [TCA_EM_IPT_MATCH_NAME] = { .type = NLA_STRING,
+ .len = XT_EXTENSION_MAXNAMELEN },
+ [TCA_EM_IPT_MATCH_REVISION] = { .type = NLA_U8 },
+ [TCA_EM_IPT_HOOK] = { .type = NLA_U32 },
+ [TCA_EM_IPT_NFPROTO] = { .type = NLA_U8 },
+ [TCA_EM_IPT_MATCH_DATA] = { .type = NLA_UNSPEC },
+};
+
+static int check_match(struct net *net, struct em_ipt_match *im, int mdata_len)
+{
+ struct xt_mtchk_param mtpar = {};
+ union {
+ struct ipt_entry e4;
+ struct ip6t_entry e6;
+ } e = {};
+
+ mtpar.net = net;
+ mtpar.table = "filter";
+ mtpar.hook_mask = 1 << im->hook;
+ mtpar.family = im->match->family;
+ mtpar.match = im->match;
+ mtpar.entryinfo = &e;
+ mtpar.matchinfo = (void *)im->match_data;
+ return xt_check_match(&mtpar, mdata_len, 0, 0);
+}
+
+static int policy_validate_match_data(struct nlattr **tb, u8 mrev)
+{
+ if (mrev != 0) {
+ pr_err("only policy match revision 0 supported");
+ return -EINVAL;
+ }
+
+ if (nla_get_u32(tb[TCA_EM_IPT_HOOK]) != NF_INET_PRE_ROUTING) {
+ pr_err("policy can only be matched on NF_INET_PRE_ROUTING");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static const struct em_ipt_xt_match em_ipt_xt_matches[] = {
+ {
+ .match_name = "policy",
+ .validate_match_data = policy_validate_match_data
+ },
+ {}
+};
+
+static struct xt_match *get_xt_match(struct nlattr **tb)
+{
+ const struct em_ipt_xt_match *m;
+ struct nlattr *mname_attr;
+ u8 nfproto, mrev = 0;
+ int ret;
+
+ mname_attr = tb[TCA_EM_IPT_MATCH_NAME];
+ for (m = em_ipt_xt_matches; m->match_name; m++) {
+ if (!nla_strcmp(mname_attr, m->match_name))
+ break;
+ }
+
+ if (!m->match_name) {
+ pr_err("Unsupported xt match");
+ return ERR_PTR(-EINVAL);
+ }
+
+ if (tb[TCA_EM_IPT_MATCH_REVISION])
+ mrev = nla_get_u8(tb[TCA_EM_IPT_MATCH_REVISION]);
+
+ ret = m->validate_match_data(tb, mrev);
+ if (ret < 0)
+ return ERR_PTR(ret);
+
+ nfproto = nla_get_u8(tb[TCA_EM_IPT_NFPROTO]);
+ return xt_request_find_match(nfproto, m->match_name, mrev);
+}
+
+static int em_ipt_change(struct net *net, void *data, int data_len,
+ struct tcf_ematch *em)
+{
+ struct nlattr *tb[TCA_EM_IPT_MAX + 1];
+ struct em_ipt_match *im = NULL;
+ struct xt_match *match;
+ int mdata_len, ret;
+
+ ret = nla_parse(tb, TCA_EM_IPT_MAX, data, data_len, em_ipt_policy,
+ NULL);
+ if (ret < 0)
+ return ret;
+
+ if (!tb[TCA_EM_IPT_HOOK] || !tb[TCA_EM_IPT_MATCH_NAME] ||
+ !tb[TCA_EM_IPT_MATCH_DATA] || !tb[TCA_EM_IPT_NFPROTO])
+ return -EINVAL;
+
+ match = get_xt_match(tb);
+ if (IS_ERR(match)) {
+ pr_err("unable to load match\n");
+ return PTR_ERR(match);
+ }
+
+ mdata_len = XT_ALIGN(nla_len(tb[TCA_EM_IPT_MATCH_DATA]));
+ im = kzalloc(sizeof(*im) + mdata_len, GFP_KERNEL);
+ if (!im) {
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ im->match = match;
+ im->hook = nla_get_u32(tb[TCA_EM_IPT_HOOK]);
+ nla_memcpy(im->match_data, tb[TCA_EM_IPT_MATCH_DATA], mdata_len);
+
+ ret = check_match(net, im, mdata_len);
+ if (ret)
+ goto err;
+
+ em->datalen = sizeof(*im) + mdata_len;
+ em->data = (unsigned long)im;
+ return 0;
+
+err:
+ kfree(im);
+ module_put(match->me);
+ return ret;
+}
+
+static void em_ipt_destroy(struct tcf_ematch *em)
+{
+ struct em_ipt_match *im = (void *)em->data;
+
+ if (!im)
+ return;
+
+ if (im->match->destroy) {
+ struct xt_mtdtor_param par = {
+ .net = em->net,
+ .match = im->match,
+ .matchinfo = im->match_data,
+ .family = im->match->family
+ };
+ im->match->destroy(&par);
+ }
+ module_put(im->match->me);
+ kfree((void *)im);
+}
+
+static int em_ipt_match(struct sk_buff *skb, struct tcf_ematch *em,
+ struct tcf_pkt_info *info)
+{
+ const struct em_ipt_match *im = (const void *)em->data;
+ struct xt_action_param acpar = {};
+ struct net_device *indev = NULL;
+ struct nf_hook_state state;
+ int ret;
+
+ rcu_read_lock();
+
+ if (skb->skb_iif)
+ indev = dev_get_by_index_rcu(em->net, skb->skb_iif);
+
+ nf_hook_state_init(&state, im->hook, im->match->family,
+ indev ?: skb->dev, skb->dev, NULL, em->net, NULL);
+
+ acpar.match = im->match;
+ acpar.matchinfo = im->match_data;
+ acpar.state = &state;
+
+ ret = im->match->match(skb, &acpar);
+
+ rcu_read_unlock();
+ return ret;
+}
+
+static int em_ipt_dump(struct sk_buff *skb, struct tcf_ematch *em)
+{
+ struct em_ipt_match *im = (void *)em->data;
+
+ if (nla_put_string(skb, TCA_EM_IPT_MATCH_NAME, im->match->name) < 0)
+ return -EMSGSIZE;
+ if (nla_put_u32(skb, TCA_EM_IPT_HOOK, im->hook) < 0)
+ return -EMSGSIZE;
+ if (nla_put_u8(skb, TCA_EM_IPT_MATCH_REVISION, im->match->revision) < 0)
+ return -EMSGSIZE;
+ if (nla_put_u8(skb, TCA_EM_IPT_NFPROTO, im->match->family) < 0)
+ return -EMSGSIZE;
+ if (nla_put(skb, TCA_EM_IPT_MATCH_DATA,
+ im->match->usersize ?: im->match->matchsize,
+ im->match_data) < 0)
+ return -EMSGSIZE;
+
+ return 0;
+}
+
+static struct tcf_ematch_ops em_ipt_ops = {
+ .kind = TCF_EM_IPT,
+ .change = em_ipt_change,
+ .destroy = em_ipt_destroy,
+ .match = em_ipt_match,
+ .dump = em_ipt_dump,
+ .owner = THIS_MODULE,
+ .link = LIST_HEAD_INIT(em_ipt_ops.link)
+};
+
+static int __init init_em_ipt(void)
+{
+ return tcf_em_register(&em_ipt_ops);
+}
+
+static void __exit exit_em_ipt(void)
+{
+ tcf_em_unregister(&em_ipt_ops);
+}
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Eyal Birger <eyal.birger@gmail.com>");
+MODULE_DESCRIPTION("TC extended match for IPtables matches");
+
+module_init(init_em_ipt);
+module_exit(exit_em_ipt);
+
+MODULE_ALIAS_TCF_EMATCH(TCF_EM_IPT);
void qdisc_tree_reduce_backlog(struct Qdisc *sch, unsigned int n,
unsigned int len)
{
+ bool qdisc_is_offloaded = sch->flags & TCQ_F_OFFLOADED;
const struct Qdisc_class_ops *cops;
unsigned long cl;
u32 parentid;
* If child was empty even before update then backlog
* counter is screwed and we skip notification because
* parent class is already passive.
+ *
+ * If the original child was offloaded then it is allowed
+ * to be seem as empty, so the parent is notified anyway.
*/
- notify = !sch->q.qlen && !WARN_ON_ONCE(!n);
+ notify = !sch->q.qlen && !WARN_ON_ONCE(!n &&
+ !qdisc_is_offloaded);
/* TODO: perform the search on a per txq basis */
sch = qdisc_lookup(qdisc_dev(sch), TC_H_MAJ(parentid));
if (sch == NULL) {
static struct pernet_operations psched_net_ops = {
.init = psched_net_init,
.exit = psched_net_exit,
+ .async = true,
};
static int __init pktsched_init(void)
sch->q.qlen = 0;
}
-static int prio_offload(struct Qdisc *sch, bool enable)
+static int prio_offload(struct Qdisc *sch, struct tc_prio_qopt *qopt)
{
- struct prio_sched_data *q = qdisc_priv(sch);
struct net_device *dev = qdisc_dev(sch);
struct tc_prio_qopt_offload opt = {
.handle = sch->handle,
if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
return -EOPNOTSUPP;
- if (enable) {
+ if (qopt) {
opt.command = TC_PRIO_REPLACE;
- opt.replace_params.bands = q->bands;
- memcpy(&opt.replace_params.priomap, q->prio2band,
+ opt.replace_params.bands = qopt->bands;
+ memcpy(&opt.replace_params.priomap, qopt->priomap,
TC_PRIO_MAX + 1);
opt.replace_params.qstats = &sch->qstats;
} else {
struct prio_sched_data *q = qdisc_priv(sch);
tcf_block_put(q->block);
- prio_offload(sch, false);
+ prio_offload(sch, NULL);
for (prio = 0; prio < q->bands; prio++)
qdisc_destroy(q->queues[prio]);
}
}
}
+ prio_offload(sch, qopt);
sch_tree_lock(sch);
q->bands = qopt->bands;
memcpy(q->prio2band, qopt->priomap, TC_PRIO_MAX+1);
}
sch_tree_unlock(sch);
- prio_offload(sch, true);
return 0;
}
struct Qdisc **old, struct netlink_ext_ack *extack)
{
struct prio_sched_data *q = qdisc_priv(sch);
+ struct tc_prio_qopt_offload graft_offload;
+ struct net_device *dev = qdisc_dev(sch);
unsigned long band = arg - 1;
+ bool any_qdisc_is_offloaded;
+ int err;
if (new == NULL)
new = &noop_qdisc;
*old = qdisc_replace(sch, new, &q->queues[band]);
+
+ if (!tc_can_offload(dev))
+ return 0;
+
+ graft_offload.handle = sch->handle;
+ graft_offload.parent = sch->parent;
+ graft_offload.graft_params.band = band;
+ graft_offload.graft_params.child_handle = new->handle;
+ graft_offload.command = TC_PRIO_GRAFT;
+
+ err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_PRIO,
+ &graft_offload);
+
+ /* Don't report error if the graft is part of destroy operation. */
+ if (err && new != &noop_qdisc) {
+ /* Don't report error if the parent, the old child and the new
+ * one are not offloaded.
+ */
+ any_qdisc_is_offloaded = sch->flags & TCQ_F_OFFLOADED;
+ any_qdisc_is_offloaded |= new->flags & TCQ_F_OFFLOADED;
+ if (*old)
+ any_qdisc_is_offloaded |= (*old)->flags &
+ TCQ_F_OFFLOADED;
+
+ if (any_qdisc_is_offloaded)
+ NL_SET_ERR_MSG(extack, "Offloading graft operation failed.");
+ }
+
return 0;
}
offload.o stream_sched.o stream_sched_prio.o \
stream_sched_rr.o stream_interleave.o
+sctp_diag-y := diag.o
+
sctp-$(CONFIG_SCTP_DBG_OBJCNT) += objcnt.o
sctp-$(CONFIG_PROC_FS) += proc.o
sctp-$(CONFIG_SYSCTL) += sysctl.o
case SCTP_CID_RECONF:
return "RECONF";
+ case SCTP_CID_I_DATA:
+ return "I_DATA";
+
+ case SCTP_CID_I_FWD_TSN:
+ return "I_FWD_TSN";
+
default:
break;
}
--- /dev/null
+/* SCTP kernel implementation
+ * (C) Copyright Red Hat Inc. 2017
+ *
+ * This file is part of the SCTP kernel implementation
+ *
+ * These functions implement sctp diag support.
+ *
+ * This SCTP implementation is free software;
+ * you can redistribute it and/or modify it under the terms of
+ * the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This SCTP implementation is distributed in the hope that it
+ * will be useful, but WITHOUT ANY WARRANTY; without even the implied
+ * ************************
+ * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU CC; see the file COPYING. If not, see
+ * <http://www.gnu.org/licenses/>.
+ *
+ * Please send any bug reports or fixes you make to the
+ * email addresched(es):
+ * lksctp developers <linux-sctp@vger.kernel.org>
+ *
+ * Written or modified by:
+ * Xin Long <lucien.xin@gmail.com>
+ */
+
+#include <linux/module.h>
+#include <linux/inet_diag.h>
+#include <linux/sock_diag.h>
+#include <net/sctp/sctp.h>
+
+static void sctp_diag_get_info(struct sock *sk, struct inet_diag_msg *r,
+ void *info);
+
+/* define some functions to make asoc/ep fill look clean */
+static void inet_diag_msg_sctpasoc_fill(struct inet_diag_msg *r,
+ struct sock *sk,
+ struct sctp_association *asoc)
+{
+ union sctp_addr laddr, paddr;
+ struct dst_entry *dst;
+ struct timer_list *t3_rtx = &asoc->peer.primary_path->T3_rtx_timer;
+
+ laddr = list_entry(asoc->base.bind_addr.address_list.next,
+ struct sctp_sockaddr_entry, list)->a;
+ paddr = asoc->peer.primary_path->ipaddr;
+ dst = asoc->peer.primary_path->dst;
+
+ r->idiag_family = sk->sk_family;
+ r->id.idiag_sport = htons(asoc->base.bind_addr.port);
+ r->id.idiag_dport = htons(asoc->peer.port);
+ r->id.idiag_if = dst ? dst->dev->ifindex : 0;
+ sock_diag_save_cookie(sk, r->id.idiag_cookie);
+
+#if IS_ENABLED(CONFIG_IPV6)
+ if (sk->sk_family == AF_INET6) {
+ *(struct in6_addr *)r->id.idiag_src = laddr.v6.sin6_addr;
+ *(struct in6_addr *)r->id.idiag_dst = paddr.v6.sin6_addr;
+ } else
+#endif
+ {
+ memset(&r->id.idiag_src, 0, sizeof(r->id.idiag_src));
+ memset(&r->id.idiag_dst, 0, sizeof(r->id.idiag_dst));
+
+ r->id.idiag_src[0] = laddr.v4.sin_addr.s_addr;
+ r->id.idiag_dst[0] = paddr.v4.sin_addr.s_addr;
+ }
+
+ r->idiag_state = asoc->state;
+ if (timer_pending(t3_rtx)) {
+ r->idiag_timer = SCTP_EVENT_TIMEOUT_T3_RTX;
+ r->idiag_retrans = asoc->rtx_data_chunks;
+ r->idiag_expires = jiffies_to_msecs(t3_rtx->expires - jiffies);
+ } else {
+ r->idiag_timer = 0;
+ r->idiag_retrans = 0;
+ r->idiag_expires = 0;
+ }
+}
+
+static int inet_diag_msg_sctpladdrs_fill(struct sk_buff *skb,
+ struct list_head *address_list)
+{
+ struct sctp_sockaddr_entry *laddr;
+ int addrlen = sizeof(struct sockaddr_storage);
+ int addrcnt = 0;
+ struct nlattr *attr;
+ void *info = NULL;
+
+ list_for_each_entry_rcu(laddr, address_list, list)
+ addrcnt++;
+
+ attr = nla_reserve(skb, INET_DIAG_LOCALS, addrlen * addrcnt);
+ if (!attr)
+ return -EMSGSIZE;
+
+ info = nla_data(attr);
+ list_for_each_entry_rcu(laddr, address_list, list) {
+ memcpy(info, &laddr->a, sizeof(laddr->a));
+ memset(info + sizeof(laddr->a), 0, addrlen - sizeof(laddr->a));
+ info += addrlen;
+ }
+
+ return 0;
+}
+
+static int inet_diag_msg_sctpaddrs_fill(struct sk_buff *skb,
+ struct sctp_association *asoc)
+{
+ int addrlen = sizeof(struct sockaddr_storage);
+ struct sctp_transport *from;
+ struct nlattr *attr;
+ void *info = NULL;
+
+ attr = nla_reserve(skb, INET_DIAG_PEERS,
+ addrlen * asoc->peer.transport_count);
+ if (!attr)
+ return -EMSGSIZE;
+
+ info = nla_data(attr);
+ list_for_each_entry(from, &asoc->peer.transport_addr_list,
+ transports) {
+ memcpy(info, &from->ipaddr, sizeof(from->ipaddr));
+ memset(info + sizeof(from->ipaddr), 0,
+ addrlen - sizeof(from->ipaddr));
+ info += addrlen;
+ }
+
+ return 0;
+}
+
+/* sctp asoc/ep fill*/
+static int inet_sctp_diag_fill(struct sock *sk, struct sctp_association *asoc,
+ struct sk_buff *skb,
+ const struct inet_diag_req_v2 *req,
+ struct user_namespace *user_ns,
+ int portid, u32 seq, u16 nlmsg_flags,
+ const struct nlmsghdr *unlh,
+ bool net_admin)
+{
+ struct sctp_endpoint *ep = sctp_sk(sk)->ep;
+ struct list_head *addr_list;
+ struct inet_diag_msg *r;
+ struct nlmsghdr *nlh;
+ int ext = req->idiag_ext;
+ struct sctp_infox infox;
+ void *info = NULL;
+
+ nlh = nlmsg_put(skb, portid, seq, unlh->nlmsg_type, sizeof(*r),
+ nlmsg_flags);
+ if (!nlh)
+ return -EMSGSIZE;
+
+ r = nlmsg_data(nlh);
+ BUG_ON(!sk_fullsock(sk));
+
+ if (asoc) {
+ inet_diag_msg_sctpasoc_fill(r, sk, asoc);
+ } else {
+ inet_diag_msg_common_fill(r, sk);
+ r->idiag_state = sk->sk_state;
+ r->idiag_timer = 0;
+ r->idiag_retrans = 0;
+ }
+
+ if (inet_diag_msg_attrs_fill(sk, skb, r, ext, user_ns, net_admin))
+ goto errout;
+
+ if (ext & (1 << (INET_DIAG_SKMEMINFO - 1))) {
+ u32 mem[SK_MEMINFO_VARS];
+ int amt;
+
+ if (asoc && asoc->ep->sndbuf_policy)
+ amt = asoc->sndbuf_used;
+ else
+ amt = sk_wmem_alloc_get(sk);
+ mem[SK_MEMINFO_WMEM_ALLOC] = amt;
+ if (asoc && asoc->ep->rcvbuf_policy)
+ amt = atomic_read(&asoc->rmem_alloc);
+ else
+ amt = sk_rmem_alloc_get(sk);
+ mem[SK_MEMINFO_RMEM_ALLOC] = amt;
+ mem[SK_MEMINFO_RCVBUF] = sk->sk_rcvbuf;
+ mem[SK_MEMINFO_SNDBUF] = sk->sk_sndbuf;
+ mem[SK_MEMINFO_FWD_ALLOC] = sk->sk_forward_alloc;
+ mem[SK_MEMINFO_WMEM_QUEUED] = sk->sk_wmem_queued;
+ mem[SK_MEMINFO_OPTMEM] = atomic_read(&sk->sk_omem_alloc);
+ mem[SK_MEMINFO_BACKLOG] = sk->sk_backlog.len;
+ mem[SK_MEMINFO_DROPS] = atomic_read(&sk->sk_drops);
+
+ if (nla_put(skb, INET_DIAG_SKMEMINFO, sizeof(mem), &mem) < 0)
+ goto errout;
+ }
+
+ if (ext & (1 << (INET_DIAG_INFO - 1))) {
+ struct nlattr *attr;
+
+ attr = nla_reserve_64bit(skb, INET_DIAG_INFO,
+ sizeof(struct sctp_info),
+ INET_DIAG_PAD);
+ if (!attr)
+ goto errout;
+
+ info = nla_data(attr);
+ }
+ infox.sctpinfo = (struct sctp_info *)info;
+ infox.asoc = asoc;
+ sctp_diag_get_info(sk, r, &infox);
+
+ addr_list = asoc ? &asoc->base.bind_addr.address_list
+ : &ep->base.bind_addr.address_list;
+ if (inet_diag_msg_sctpladdrs_fill(skb, addr_list))
+ goto errout;
+
+ if (asoc && (ext & (1 << (INET_DIAG_CONG - 1))))
+ if (nla_put_string(skb, INET_DIAG_CONG, "reno") < 0)
+ goto errout;
+
+ if (asoc && inet_diag_msg_sctpaddrs_fill(skb, asoc))
+ goto errout;
+
+ nlmsg_end(skb, nlh);
+ return 0;
+
+errout:
+ nlmsg_cancel(skb, nlh);
+ return -EMSGSIZE;
+}
+
+/* callback and param */
+struct sctp_comm_param {
+ struct sk_buff *skb;
+ struct netlink_callback *cb;
+ const struct inet_diag_req_v2 *r;
+ const struct nlmsghdr *nlh;
+ bool net_admin;
+};
+
+static size_t inet_assoc_attr_size(struct sctp_association *asoc)
+{
+ int addrlen = sizeof(struct sockaddr_storage);
+ int addrcnt = 0;
+ struct sctp_sockaddr_entry *laddr;
+
+ list_for_each_entry_rcu(laddr, &asoc->base.bind_addr.address_list,
+ list)
+ addrcnt++;
+
+ return nla_total_size(sizeof(struct sctp_info))
+ + nla_total_size(1) /* INET_DIAG_SHUTDOWN */
+ + nla_total_size(1) /* INET_DIAG_TOS */
+ + nla_total_size(1) /* INET_DIAG_TCLASS */
+ + nla_total_size(4) /* INET_DIAG_MARK */
+ + nla_total_size(addrlen * asoc->peer.transport_count)
+ + nla_total_size(addrlen * addrcnt)
+ + nla_total_size(sizeof(struct inet_diag_meminfo))
+ + nla_total_size(sizeof(struct inet_diag_msg))
+ + 64;
+}
+
+static int sctp_tsp_dump_one(struct sctp_transport *tsp, void *p)
+{
+ struct sctp_association *assoc = tsp->asoc;
+ struct sock *sk = tsp->asoc->base.sk;
+ struct sctp_comm_param *commp = p;
+ struct sk_buff *in_skb = commp->skb;
+ const struct inet_diag_req_v2 *req = commp->r;
+ const struct nlmsghdr *nlh = commp->nlh;
+ struct net *net = sock_net(in_skb->sk);
+ struct sk_buff *rep;
+ int err;
+
+ err = sock_diag_check_cookie(sk, req->id.idiag_cookie);
+ if (err)
+ goto out;
+
+ err = -ENOMEM;
+ rep = nlmsg_new(inet_assoc_attr_size(assoc), GFP_KERNEL);
+ if (!rep)
+ goto out;
+
+ lock_sock(sk);
+ if (sk != assoc->base.sk) {
+ release_sock(sk);
+ sk = assoc->base.sk;
+ lock_sock(sk);
+ }
+ err = inet_sctp_diag_fill(sk, assoc, rep, req,
+ sk_user_ns(NETLINK_CB(in_skb).sk),
+ NETLINK_CB(in_skb).portid,
+ nlh->nlmsg_seq, 0, nlh,
+ commp->net_admin);
+ release_sock(sk);
+ if (err < 0) {
+ WARN_ON(err == -EMSGSIZE);
+ kfree_skb(rep);
+ goto out;
+ }
+
+ err = netlink_unicast(net->diag_nlsk, rep, NETLINK_CB(in_skb).portid,
+ MSG_DONTWAIT);
+ if (err > 0)
+ err = 0;
+out:
+ return err;
+}
+
+static int sctp_sock_dump(struct sctp_transport *tsp, void *p)
+{
+ struct sctp_endpoint *ep = tsp->asoc->ep;
+ struct sctp_comm_param *commp = p;
+ struct sock *sk = ep->base.sk;
+ struct sk_buff *skb = commp->skb;
+ struct netlink_callback *cb = commp->cb;
+ const struct inet_diag_req_v2 *r = commp->r;
+ struct sctp_association *assoc;
+ int err = 0;
+
+ lock_sock(sk);
+ list_for_each_entry(assoc, &ep->asocs, asocs) {
+ if (cb->args[4] < cb->args[1])
+ goto next;
+
+ if (r->id.idiag_sport != htons(assoc->base.bind_addr.port) &&
+ r->id.idiag_sport)
+ goto next;
+ if (r->id.idiag_dport != htons(assoc->peer.port) &&
+ r->id.idiag_dport)
+ goto next;
+
+ if (!cb->args[3] &&
+ inet_sctp_diag_fill(sk, NULL, skb, r,
+ sk_user_ns(NETLINK_CB(cb->skb).sk),
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ NLM_F_MULTI, cb->nlh,
+ commp->net_admin) < 0) {
+ err = 1;
+ goto release;
+ }
+ cb->args[3] = 1;
+
+ if (inet_sctp_diag_fill(sk, assoc, skb, r,
+ sk_user_ns(NETLINK_CB(cb->skb).sk),
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq, 0, cb->nlh,
+ commp->net_admin) < 0) {
+ err = 1;
+ goto release;
+ }
+next:
+ cb->args[4]++;
+ }
+ cb->args[1] = 0;
+ cb->args[3] = 0;
+ cb->args[4] = 0;
+release:
+ release_sock(sk);
+ return err;
+}
+
+static int sctp_sock_filter(struct sctp_transport *tsp, void *p)
+{
+ struct sctp_endpoint *ep = tsp->asoc->ep;
+ struct sctp_comm_param *commp = p;
+ struct sock *sk = ep->base.sk;
+ const struct inet_diag_req_v2 *r = commp->r;
+ struct sctp_association *assoc =
+ list_entry(ep->asocs.next, struct sctp_association, asocs);
+
+ /* find the ep only once through the transports by this condition */
+ if (tsp->asoc != assoc)
+ return 0;
+
+ if (r->sdiag_family != AF_UNSPEC && sk->sk_family != r->sdiag_family)
+ return 0;
+
+ return 1;
+}
+
+static int sctp_ep_dump(struct sctp_endpoint *ep, void *p)
+{
+ struct sctp_comm_param *commp = p;
+ struct sock *sk = ep->base.sk;
+ struct sk_buff *skb = commp->skb;
+ struct netlink_callback *cb = commp->cb;
+ const struct inet_diag_req_v2 *r = commp->r;
+ struct net *net = sock_net(skb->sk);
+ struct inet_sock *inet = inet_sk(sk);
+ int err = 0;
+
+ if (!net_eq(sock_net(sk), net))
+ goto out;
+
+ if (cb->args[4] < cb->args[1])
+ goto next;
+
+ if (!(r->idiag_states & TCPF_LISTEN) && !list_empty(&ep->asocs))
+ goto next;
+
+ if (r->sdiag_family != AF_UNSPEC &&
+ sk->sk_family != r->sdiag_family)
+ goto next;
+
+ if (r->id.idiag_sport != inet->inet_sport &&
+ r->id.idiag_sport)
+ goto next;
+
+ if (r->id.idiag_dport != inet->inet_dport &&
+ r->id.idiag_dport)
+ goto next;
+
+ if (inet_sctp_diag_fill(sk, NULL, skb, r,
+ sk_user_ns(NETLINK_CB(cb->skb).sk),
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq, NLM_F_MULTI,
+ cb->nlh, commp->net_admin) < 0) {
+ err = 2;
+ goto out;
+ }
+next:
+ cb->args[4]++;
+out:
+ return err;
+}
+
+/* define the functions for sctp_diag_handler*/
+static void sctp_diag_get_info(struct sock *sk, struct inet_diag_msg *r,
+ void *info)
+{
+ struct sctp_infox *infox = (struct sctp_infox *)info;
+
+ if (infox->asoc) {
+ r->idiag_rqueue = atomic_read(&infox->asoc->rmem_alloc);
+ r->idiag_wqueue = infox->asoc->sndbuf_used;
+ } else {
+ r->idiag_rqueue = sk->sk_ack_backlog;
+ r->idiag_wqueue = sk->sk_max_ack_backlog;
+ }
+ if (infox->sctpinfo)
+ sctp_get_sctp_info(sk, infox->asoc, infox->sctpinfo);
+}
+
+static int sctp_diag_dump_one(struct sk_buff *in_skb,
+ const struct nlmsghdr *nlh,
+ const struct inet_diag_req_v2 *req)
+{
+ struct net *net = sock_net(in_skb->sk);
+ union sctp_addr laddr, paddr;
+ struct sctp_comm_param commp = {
+ .skb = in_skb,
+ .r = req,
+ .nlh = nlh,
+ .net_admin = netlink_net_capable(in_skb, CAP_NET_ADMIN),
+ };
+
+ if (req->sdiag_family == AF_INET) {
+ laddr.v4.sin_port = req->id.idiag_sport;
+ laddr.v4.sin_addr.s_addr = req->id.idiag_src[0];
+ laddr.v4.sin_family = AF_INET;
+
+ paddr.v4.sin_port = req->id.idiag_dport;
+ paddr.v4.sin_addr.s_addr = req->id.idiag_dst[0];
+ paddr.v4.sin_family = AF_INET;
+ } else {
+ laddr.v6.sin6_port = req->id.idiag_sport;
+ memcpy(&laddr.v6.sin6_addr, req->id.idiag_src,
+ sizeof(laddr.v6.sin6_addr));
+ laddr.v6.sin6_family = AF_INET6;
+
+ paddr.v6.sin6_port = req->id.idiag_dport;
+ memcpy(&paddr.v6.sin6_addr, req->id.idiag_dst,
+ sizeof(paddr.v6.sin6_addr));
+ paddr.v6.sin6_family = AF_INET6;
+ }
+
+ return sctp_transport_lookup_process(sctp_tsp_dump_one,
+ net, &laddr, &paddr, &commp);
+}
+
+static void sctp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
+ const struct inet_diag_req_v2 *r, struct nlattr *bc)
+{
+ u32 idiag_states = r->idiag_states;
+ struct net *net = sock_net(skb->sk);
+ struct sctp_comm_param commp = {
+ .skb = skb,
+ .cb = cb,
+ .r = r,
+ .net_admin = netlink_net_capable(cb->skb, CAP_NET_ADMIN),
+ };
+ int pos = cb->args[2];
+
+ /* eps hashtable dumps
+ * args:
+ * 0 : if it will traversal listen sock
+ * 1 : to record the sock pos of this time's traversal
+ * 4 : to work as a temporary variable to traversal list
+ */
+ if (cb->args[0] == 0) {
+ if (!(idiag_states & TCPF_LISTEN))
+ goto skip;
+ if (sctp_for_each_endpoint(sctp_ep_dump, &commp))
+ goto done;
+skip:
+ cb->args[0] = 1;
+ cb->args[1] = 0;
+ cb->args[4] = 0;
+ }
+
+ /* asocs by transport hashtable dump
+ * args:
+ * 1 : to record the assoc pos of this time's traversal
+ * 2 : to record the transport pos of this time's traversal
+ * 3 : to mark if we have dumped the ep info of the current asoc
+ * 4 : to work as a temporary variable to traversal list
+ * 5 : to save the sk we get from travelsing the tsp list.
+ */
+ if (!(idiag_states & ~(TCPF_LISTEN | TCPF_CLOSE)))
+ goto done;
+
+ sctp_for_each_transport(sctp_sock_filter, sctp_sock_dump,
+ net, &pos, &commp);
+ cb->args[2] = pos;
+
+done:
+ cb->args[1] = cb->args[4];
+ cb->args[4] = 0;
+}
+
+static const struct inet_diag_handler sctp_diag_handler = {
+ .dump = sctp_diag_dump,
+ .dump_one = sctp_diag_dump_one,
+ .idiag_get_info = sctp_diag_get_info,
+ .idiag_type = IPPROTO_SCTP,
+ .idiag_info_size = sizeof(struct sctp_info),
+};
+
+static int __init sctp_diag_init(void)
+{
+ return inet_diag_register(&sctp_diag_handler);
+}
+
+static void __exit sctp_diag_exit(void)
+{
+ inet_diag_unregister(&sctp_diag_handler);
+}
+
+module_init(sctp_diag_init);
+module_exit(sctp_diag_exit);
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 2-132);
rhl_for_each_entry_rcu(transport, tmp, list, node)
if (transport->asoc->ep == t->asoc->ep) {
rcu_read_unlock();
- err = -EEXIST;
- goto out;
+ return -EEXIST;
}
rcu_read_unlock();
err = rhltable_insert_key(&sctp_transport_hashtable, &arg,
&t->node, sctp_hash_params);
-
-out:
if (err)
pr_err_once("insert transport fail, errno %d\n", err);
/* Handle SCTP_I_WANT_MAPPED_V4_ADDR for getpeername() and getsockname() */
static int sctp_getname(struct socket *sock, struct sockaddr *uaddr,
- int *uaddr_len, int peer)
+ int peer)
{
int rc;
- rc = inet6_getname(sock, uaddr, uaddr_len, peer);
+ rc = inet6_getname(sock, uaddr, peer);
- if (rc != 0)
+ if (rc < 0)
return rc;
- *uaddr_len = sctp_v6_addr_to_user(sctp_sk(sock->sk),
+ rc = sctp_v6_addr_to_user(sctp_sk(sock->sk),
(union sctp_addr *)uaddr);
return rc;
+++ /dev/null
-#include <linux/module.h>
-#include <linux/inet_diag.h>
-#include <linux/sock_diag.h>
-#include <net/sctp/sctp.h>
-
-static void sctp_diag_get_info(struct sock *sk, struct inet_diag_msg *r,
- void *info);
-
-/* define some functions to make asoc/ep fill look clean */
-static void inet_diag_msg_sctpasoc_fill(struct inet_diag_msg *r,
- struct sock *sk,
- struct sctp_association *asoc)
-{
- union sctp_addr laddr, paddr;
- struct dst_entry *dst;
- struct timer_list *t3_rtx = &asoc->peer.primary_path->T3_rtx_timer;
-
- laddr = list_entry(asoc->base.bind_addr.address_list.next,
- struct sctp_sockaddr_entry, list)->a;
- paddr = asoc->peer.primary_path->ipaddr;
- dst = asoc->peer.primary_path->dst;
-
- r->idiag_family = sk->sk_family;
- r->id.idiag_sport = htons(asoc->base.bind_addr.port);
- r->id.idiag_dport = htons(asoc->peer.port);
- r->id.idiag_if = dst ? dst->dev->ifindex : 0;
- sock_diag_save_cookie(sk, r->id.idiag_cookie);
-
-#if IS_ENABLED(CONFIG_IPV6)
- if (sk->sk_family == AF_INET6) {
- *(struct in6_addr *)r->id.idiag_src = laddr.v6.sin6_addr;
- *(struct in6_addr *)r->id.idiag_dst = paddr.v6.sin6_addr;
- } else
-#endif
- {
- memset(&r->id.idiag_src, 0, sizeof(r->id.idiag_src));
- memset(&r->id.idiag_dst, 0, sizeof(r->id.idiag_dst));
-
- r->id.idiag_src[0] = laddr.v4.sin_addr.s_addr;
- r->id.idiag_dst[0] = paddr.v4.sin_addr.s_addr;
- }
-
- r->idiag_state = asoc->state;
- if (timer_pending(t3_rtx)) {
- r->idiag_timer = SCTP_EVENT_TIMEOUT_T3_RTX;
- r->idiag_retrans = asoc->rtx_data_chunks;
- r->idiag_expires = jiffies_to_msecs(t3_rtx->expires - jiffies);
- } else {
- r->idiag_timer = 0;
- r->idiag_retrans = 0;
- r->idiag_expires = 0;
- }
-}
-
-static int inet_diag_msg_sctpladdrs_fill(struct sk_buff *skb,
- struct list_head *address_list)
-{
- struct sctp_sockaddr_entry *laddr;
- int addrlen = sizeof(struct sockaddr_storage);
- int addrcnt = 0;
- struct nlattr *attr;
- void *info = NULL;
-
- list_for_each_entry_rcu(laddr, address_list, list)
- addrcnt++;
-
- attr = nla_reserve(skb, INET_DIAG_LOCALS, addrlen * addrcnt);
- if (!attr)
- return -EMSGSIZE;
-
- info = nla_data(attr);
- list_for_each_entry_rcu(laddr, address_list, list) {
- memcpy(info, &laddr->a, sizeof(laddr->a));
- memset(info + sizeof(laddr->a), 0, addrlen - sizeof(laddr->a));
- info += addrlen;
- }
-
- return 0;
-}
-
-static int inet_diag_msg_sctpaddrs_fill(struct sk_buff *skb,
- struct sctp_association *asoc)
-{
- int addrlen = sizeof(struct sockaddr_storage);
- struct sctp_transport *from;
- struct nlattr *attr;
- void *info = NULL;
-
- attr = nla_reserve(skb, INET_DIAG_PEERS,
- addrlen * asoc->peer.transport_count);
- if (!attr)
- return -EMSGSIZE;
-
- info = nla_data(attr);
- list_for_each_entry(from, &asoc->peer.transport_addr_list,
- transports) {
- memcpy(info, &from->ipaddr, sizeof(from->ipaddr));
- memset(info + sizeof(from->ipaddr), 0,
- addrlen - sizeof(from->ipaddr));
- info += addrlen;
- }
-
- return 0;
-}
-
-/* sctp asoc/ep fill*/
-static int inet_sctp_diag_fill(struct sock *sk, struct sctp_association *asoc,
- struct sk_buff *skb,
- const struct inet_diag_req_v2 *req,
- struct user_namespace *user_ns,
- int portid, u32 seq, u16 nlmsg_flags,
- const struct nlmsghdr *unlh,
- bool net_admin)
-{
- struct sctp_endpoint *ep = sctp_sk(sk)->ep;
- struct list_head *addr_list;
- struct inet_diag_msg *r;
- struct nlmsghdr *nlh;
- int ext = req->idiag_ext;
- struct sctp_infox infox;
- void *info = NULL;
-
- nlh = nlmsg_put(skb, portid, seq, unlh->nlmsg_type, sizeof(*r),
- nlmsg_flags);
- if (!nlh)
- return -EMSGSIZE;
-
- r = nlmsg_data(nlh);
- BUG_ON(!sk_fullsock(sk));
-
- if (asoc) {
- inet_diag_msg_sctpasoc_fill(r, sk, asoc);
- } else {
- inet_diag_msg_common_fill(r, sk);
- r->idiag_state = sk->sk_state;
- r->idiag_timer = 0;
- r->idiag_retrans = 0;
- }
-
- if (inet_diag_msg_attrs_fill(sk, skb, r, ext, user_ns, net_admin))
- goto errout;
-
- if (ext & (1 << (INET_DIAG_SKMEMINFO - 1))) {
- u32 mem[SK_MEMINFO_VARS];
- int amt;
-
- if (asoc && asoc->ep->sndbuf_policy)
- amt = asoc->sndbuf_used;
- else
- amt = sk_wmem_alloc_get(sk);
- mem[SK_MEMINFO_WMEM_ALLOC] = amt;
- if (asoc && asoc->ep->rcvbuf_policy)
- amt = atomic_read(&asoc->rmem_alloc);
- else
- amt = sk_rmem_alloc_get(sk);
- mem[SK_MEMINFO_RMEM_ALLOC] = amt;
- mem[SK_MEMINFO_RCVBUF] = sk->sk_rcvbuf;
- mem[SK_MEMINFO_SNDBUF] = sk->sk_sndbuf;
- mem[SK_MEMINFO_FWD_ALLOC] = sk->sk_forward_alloc;
- mem[SK_MEMINFO_WMEM_QUEUED] = sk->sk_wmem_queued;
- mem[SK_MEMINFO_OPTMEM] = atomic_read(&sk->sk_omem_alloc);
- mem[SK_MEMINFO_BACKLOG] = sk->sk_backlog.len;
- mem[SK_MEMINFO_DROPS] = atomic_read(&sk->sk_drops);
-
- if (nla_put(skb, INET_DIAG_SKMEMINFO, sizeof(mem), &mem) < 0)
- goto errout;
- }
-
- if (ext & (1 << (INET_DIAG_INFO - 1))) {
- struct nlattr *attr;
-
- attr = nla_reserve_64bit(skb, INET_DIAG_INFO,
- sizeof(struct sctp_info),
- INET_DIAG_PAD);
- if (!attr)
- goto errout;
-
- info = nla_data(attr);
- }
- infox.sctpinfo = (struct sctp_info *)info;
- infox.asoc = asoc;
- sctp_diag_get_info(sk, r, &infox);
-
- addr_list = asoc ? &asoc->base.bind_addr.address_list
- : &ep->base.bind_addr.address_list;
- if (inet_diag_msg_sctpladdrs_fill(skb, addr_list))
- goto errout;
-
- if (asoc && (ext & (1 << (INET_DIAG_CONG - 1))))
- if (nla_put_string(skb, INET_DIAG_CONG, "reno") < 0)
- goto errout;
-
- if (asoc && inet_diag_msg_sctpaddrs_fill(skb, asoc))
- goto errout;
-
- nlmsg_end(skb, nlh);
- return 0;
-
-errout:
- nlmsg_cancel(skb, nlh);
- return -EMSGSIZE;
-}
-
-/* callback and param */
-struct sctp_comm_param {
- struct sk_buff *skb;
- struct netlink_callback *cb;
- const struct inet_diag_req_v2 *r;
- const struct nlmsghdr *nlh;
- bool net_admin;
-};
-
-static size_t inet_assoc_attr_size(struct sctp_association *asoc)
-{
- int addrlen = sizeof(struct sockaddr_storage);
- int addrcnt = 0;
- struct sctp_sockaddr_entry *laddr;
-
- list_for_each_entry_rcu(laddr, &asoc->base.bind_addr.address_list,
- list)
- addrcnt++;
-
- return nla_total_size(sizeof(struct sctp_info))
- + nla_total_size(1) /* INET_DIAG_SHUTDOWN */
- + nla_total_size(1) /* INET_DIAG_TOS */
- + nla_total_size(1) /* INET_DIAG_TCLASS */
- + nla_total_size(4) /* INET_DIAG_MARK */
- + nla_total_size(addrlen * asoc->peer.transport_count)
- + nla_total_size(addrlen * addrcnt)
- + nla_total_size(sizeof(struct inet_diag_meminfo))
- + nla_total_size(sizeof(struct inet_diag_msg))
- + 64;
-}
-
-static int sctp_tsp_dump_one(struct sctp_transport *tsp, void *p)
-{
- struct sctp_association *assoc = tsp->asoc;
- struct sock *sk = tsp->asoc->base.sk;
- struct sctp_comm_param *commp = p;
- struct sk_buff *in_skb = commp->skb;
- const struct inet_diag_req_v2 *req = commp->r;
- const struct nlmsghdr *nlh = commp->nlh;
- struct net *net = sock_net(in_skb->sk);
- struct sk_buff *rep;
- int err;
-
- err = sock_diag_check_cookie(sk, req->id.idiag_cookie);
- if (err)
- goto out;
-
- err = -ENOMEM;
- rep = nlmsg_new(inet_assoc_attr_size(assoc), GFP_KERNEL);
- if (!rep)
- goto out;
-
- lock_sock(sk);
- if (sk != assoc->base.sk) {
- release_sock(sk);
- sk = assoc->base.sk;
- lock_sock(sk);
- }
- err = inet_sctp_diag_fill(sk, assoc, rep, req,
- sk_user_ns(NETLINK_CB(in_skb).sk),
- NETLINK_CB(in_skb).portid,
- nlh->nlmsg_seq, 0, nlh,
- commp->net_admin);
- release_sock(sk);
- if (err < 0) {
- WARN_ON(err == -EMSGSIZE);
- kfree_skb(rep);
- goto out;
- }
-
- err = netlink_unicast(net->diag_nlsk, rep, NETLINK_CB(in_skb).portid,
- MSG_DONTWAIT);
- if (err > 0)
- err = 0;
-out:
- return err;
-}
-
-static int sctp_sock_dump(struct sctp_transport *tsp, void *p)
-{
- struct sctp_endpoint *ep = tsp->asoc->ep;
- struct sctp_comm_param *commp = p;
- struct sock *sk = ep->base.sk;
- struct sk_buff *skb = commp->skb;
- struct netlink_callback *cb = commp->cb;
- const struct inet_diag_req_v2 *r = commp->r;
- struct sctp_association *assoc;
- int err = 0;
-
- lock_sock(sk);
- list_for_each_entry(assoc, &ep->asocs, asocs) {
- if (cb->args[4] < cb->args[1])
- goto next;
-
- if (r->id.idiag_sport != htons(assoc->base.bind_addr.port) &&
- r->id.idiag_sport)
- goto next;
- if (r->id.idiag_dport != htons(assoc->peer.port) &&
- r->id.idiag_dport)
- goto next;
-
- if (!cb->args[3] &&
- inet_sctp_diag_fill(sk, NULL, skb, r,
- sk_user_ns(NETLINK_CB(cb->skb).sk),
- NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq,
- NLM_F_MULTI, cb->nlh,
- commp->net_admin) < 0) {
- err = 1;
- goto release;
- }
- cb->args[3] = 1;
-
- if (inet_sctp_diag_fill(sk, assoc, skb, r,
- sk_user_ns(NETLINK_CB(cb->skb).sk),
- NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq, 0, cb->nlh,
- commp->net_admin) < 0) {
- err = 1;
- goto release;
- }
-next:
- cb->args[4]++;
- }
- cb->args[1] = 0;
- cb->args[3] = 0;
- cb->args[4] = 0;
-release:
- release_sock(sk);
- return err;
-}
-
-static int sctp_sock_filter(struct sctp_transport *tsp, void *p)
-{
- struct sctp_endpoint *ep = tsp->asoc->ep;
- struct sctp_comm_param *commp = p;
- struct sock *sk = ep->base.sk;
- const struct inet_diag_req_v2 *r = commp->r;
- struct sctp_association *assoc =
- list_entry(ep->asocs.next, struct sctp_association, asocs);
-
- /* find the ep only once through the transports by this condition */
- if (tsp->asoc != assoc)
- return 0;
-
- if (r->sdiag_family != AF_UNSPEC && sk->sk_family != r->sdiag_family)
- return 0;
-
- return 1;
-}
-
-static int sctp_ep_dump(struct sctp_endpoint *ep, void *p)
-{
- struct sctp_comm_param *commp = p;
- struct sock *sk = ep->base.sk;
- struct sk_buff *skb = commp->skb;
- struct netlink_callback *cb = commp->cb;
- const struct inet_diag_req_v2 *r = commp->r;
- struct net *net = sock_net(skb->sk);
- struct inet_sock *inet = inet_sk(sk);
- int err = 0;
-
- if (!net_eq(sock_net(sk), net))
- goto out;
-
- if (cb->args[4] < cb->args[1])
- goto next;
-
- if (!(r->idiag_states & TCPF_LISTEN) && !list_empty(&ep->asocs))
- goto next;
-
- if (r->sdiag_family != AF_UNSPEC &&
- sk->sk_family != r->sdiag_family)
- goto next;
-
- if (r->id.idiag_sport != inet->inet_sport &&
- r->id.idiag_sport)
- goto next;
-
- if (r->id.idiag_dport != inet->inet_dport &&
- r->id.idiag_dport)
- goto next;
-
- if (inet_sctp_diag_fill(sk, NULL, skb, r,
- sk_user_ns(NETLINK_CB(cb->skb).sk),
- NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq, NLM_F_MULTI,
- cb->nlh, commp->net_admin) < 0) {
- err = 2;
- goto out;
- }
-next:
- cb->args[4]++;
-out:
- return err;
-}
-
-/* define the functions for sctp_diag_handler*/
-static void sctp_diag_get_info(struct sock *sk, struct inet_diag_msg *r,
- void *info)
-{
- struct sctp_infox *infox = (struct sctp_infox *)info;
-
- if (infox->asoc) {
- r->idiag_rqueue = atomic_read(&infox->asoc->rmem_alloc);
- r->idiag_wqueue = infox->asoc->sndbuf_used;
- } else {
- r->idiag_rqueue = sk->sk_ack_backlog;
- r->idiag_wqueue = sk->sk_max_ack_backlog;
- }
- if (infox->sctpinfo)
- sctp_get_sctp_info(sk, infox->asoc, infox->sctpinfo);
-}
-
-static int sctp_diag_dump_one(struct sk_buff *in_skb,
- const struct nlmsghdr *nlh,
- const struct inet_diag_req_v2 *req)
-{
- struct net *net = sock_net(in_skb->sk);
- union sctp_addr laddr, paddr;
- struct sctp_comm_param commp = {
- .skb = in_skb,
- .r = req,
- .nlh = nlh,
- .net_admin = netlink_net_capable(in_skb, CAP_NET_ADMIN),
- };
-
- if (req->sdiag_family == AF_INET) {
- laddr.v4.sin_port = req->id.idiag_sport;
- laddr.v4.sin_addr.s_addr = req->id.idiag_src[0];
- laddr.v4.sin_family = AF_INET;
-
- paddr.v4.sin_port = req->id.idiag_dport;
- paddr.v4.sin_addr.s_addr = req->id.idiag_dst[0];
- paddr.v4.sin_family = AF_INET;
- } else {
- laddr.v6.sin6_port = req->id.idiag_sport;
- memcpy(&laddr.v6.sin6_addr, req->id.idiag_src,
- sizeof(laddr.v6.sin6_addr));
- laddr.v6.sin6_family = AF_INET6;
-
- paddr.v6.sin6_port = req->id.idiag_dport;
- memcpy(&paddr.v6.sin6_addr, req->id.idiag_dst,
- sizeof(paddr.v6.sin6_addr));
- paddr.v6.sin6_family = AF_INET6;
- }
-
- return sctp_transport_lookup_process(sctp_tsp_dump_one,
- net, &laddr, &paddr, &commp);
-}
-
-static void sctp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
- const struct inet_diag_req_v2 *r, struct nlattr *bc)
-{
- u32 idiag_states = r->idiag_states;
- struct net *net = sock_net(skb->sk);
- struct sctp_comm_param commp = {
- .skb = skb,
- .cb = cb,
- .r = r,
- .net_admin = netlink_net_capable(cb->skb, CAP_NET_ADMIN),
- };
- int pos = cb->args[2];
-
- /* eps hashtable dumps
- * args:
- * 0 : if it will traversal listen sock
- * 1 : to record the sock pos of this time's traversal
- * 4 : to work as a temporary variable to traversal list
- */
- if (cb->args[0] == 0) {
- if (!(idiag_states & TCPF_LISTEN))
- goto skip;
- if (sctp_for_each_endpoint(sctp_ep_dump, &commp))
- goto done;
-skip:
- cb->args[0] = 1;
- cb->args[1] = 0;
- cb->args[4] = 0;
- }
-
- /* asocs by transport hashtable dump
- * args:
- * 1 : to record the assoc pos of this time's traversal
- * 2 : to record the transport pos of this time's traversal
- * 3 : to mark if we have dumped the ep info of the current asoc
- * 4 : to work as a temporary variable to traversal list
- * 5 : to save the sk we get from travelsing the tsp list.
- */
- if (!(idiag_states & ~(TCPF_LISTEN | TCPF_CLOSE)))
- goto done;
-
- sctp_for_each_transport(sctp_sock_filter, sctp_sock_dump,
- net, &pos, &commp);
- cb->args[2] = pos;
-
-done:
- cb->args[1] = cb->args[4];
- cb->args[4] = 0;
-}
-
-static const struct inet_diag_handler sctp_diag_handler = {
- .dump = sctp_diag_dump,
- .dump_one = sctp_diag_dump_one,
- .idiag_get_info = sctp_diag_get_info,
- .idiag_type = IPPROTO_SCTP,
- .idiag_info_size = sizeof(struct sctp_info),
-};
-
-static int __init sctp_diag_init(void)
-{
- return inet_diag_register(&sctp_diag_handler);
-}
-
-static void __exit sctp_diag_exit(void)
-{
- inet_diag_unregister(&sctp_diag_handler);
-}
-
-module_init(sctp_diag_init);
-module_exit(sctp_diag_exit);
-MODULE_LICENSE("GPL");
-MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 2-132);
static int sctp_msghdr_parse(const struct msghdr *msg,
struct sctp_cmsgs *cmsgs);
-static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len)
+static int sctp_sendmsg_parse(struct sock *sk, struct sctp_cmsgs *cmsgs,
+ struct sctp_sndrcvinfo *srinfo,
+ const struct msghdr *msg, size_t msg_len)
{
- struct net *net = sock_net(sk);
- struct sctp_sock *sp;
- struct sctp_endpoint *ep;
- struct sctp_association *new_asoc = NULL, *asoc = NULL;
- struct sctp_transport *transport, *chunk_tp;
- struct sctp_chunk *chunk;
- union sctp_addr to;
- struct sockaddr *msg_name = NULL;
- struct sctp_sndrcvinfo default_sinfo;
- struct sctp_sndrcvinfo *sinfo;
- struct sctp_initmsg *sinit;
- sctp_assoc_t associd = 0;
- struct sctp_cmsgs cmsgs = { NULL };
- enum sctp_scope scope;
- bool fill_sinfo_ttl = false, wait_connect = false;
- struct sctp_datamsg *datamsg;
- int msg_flags = msg->msg_flags;
- __u16 sinfo_flags = 0;
- long timeo;
+ __u16 sflags;
int err;
- err = 0;
- sp = sctp_sk(sk);
- ep = sp->ep;
-
- pr_debug("%s: sk:%p, msg:%p, msg_len:%zu ep:%p\n", __func__, sk,
- msg, msg_len, ep);
+ if (sctp_sstate(sk, LISTENING) && sctp_style(sk, TCP))
+ return -EPIPE;
- /* We cannot send a message over a TCP-style listening socket. */
- if (sctp_style(sk, TCP) && sctp_sstate(sk, LISTENING)) {
- err = -EPIPE;
- goto out_nounlock;
- }
+ if (msg_len > sk->sk_sndbuf)
+ return -EMSGSIZE;
- /* Parse out the SCTP CMSGs. */
- err = sctp_msghdr_parse(msg, &cmsgs);
+ memset(cmsgs, 0, sizeof(*cmsgs));
+ err = sctp_msghdr_parse(msg, cmsgs);
if (err) {
pr_debug("%s: msghdr parse err:%x\n", __func__, err);
- goto out_nounlock;
+ return err;
}
- /* Fetch the destination address for this packet. This
- * address only selects the association--it is not necessarily
- * the address we will send to.
- * For a peeled-off socket, msg_name is ignored.
- */
- if (!sctp_style(sk, UDP_HIGH_BANDWIDTH) && msg->msg_name) {
- int msg_namelen = msg->msg_namelen;
-
- err = sctp_verify_addr(sk, (union sctp_addr *)msg->msg_name,
- msg_namelen);
- if (err)
- return err;
+ memset(srinfo, 0, sizeof(*srinfo));
+ if (cmsgs->srinfo) {
+ srinfo->sinfo_stream = cmsgs->srinfo->sinfo_stream;
+ srinfo->sinfo_flags = cmsgs->srinfo->sinfo_flags;
+ srinfo->sinfo_ppid = cmsgs->srinfo->sinfo_ppid;
+ srinfo->sinfo_context = cmsgs->srinfo->sinfo_context;
+ srinfo->sinfo_assoc_id = cmsgs->srinfo->sinfo_assoc_id;
+ srinfo->sinfo_timetolive = cmsgs->srinfo->sinfo_timetolive;
+ }
- if (msg_namelen > sizeof(to))
- msg_namelen = sizeof(to);
- memcpy(&to, msg->msg_name, msg_namelen);
- msg_name = msg->msg_name;
+ if (cmsgs->sinfo) {
+ srinfo->sinfo_stream = cmsgs->sinfo->snd_sid;
+ srinfo->sinfo_flags = cmsgs->sinfo->snd_flags;
+ srinfo->sinfo_ppid = cmsgs->sinfo->snd_ppid;
+ srinfo->sinfo_context = cmsgs->sinfo->snd_context;
+ srinfo->sinfo_assoc_id = cmsgs->sinfo->snd_assoc_id;
}
- sinit = cmsgs.init;
- if (cmsgs.sinfo != NULL) {
- memset(&default_sinfo, 0, sizeof(default_sinfo));
- default_sinfo.sinfo_stream = cmsgs.sinfo->snd_sid;
- default_sinfo.sinfo_flags = cmsgs.sinfo->snd_flags;
- default_sinfo.sinfo_ppid = cmsgs.sinfo->snd_ppid;
- default_sinfo.sinfo_context = cmsgs.sinfo->snd_context;
- default_sinfo.sinfo_assoc_id = cmsgs.sinfo->snd_assoc_id;
+ sflags = srinfo->sinfo_flags;
+ if (!sflags && msg_len)
+ return 0;
- sinfo = &default_sinfo;
- fill_sinfo_ttl = true;
- } else {
- sinfo = cmsgs.srinfo;
- }
- /* Did the user specify SNDINFO/SNDRCVINFO? */
- if (sinfo) {
- sinfo_flags = sinfo->sinfo_flags;
- associd = sinfo->sinfo_assoc_id;
- }
+ if (sctp_style(sk, TCP) && (sflags & (SCTP_EOF | SCTP_ABORT)))
+ return -EINVAL;
- pr_debug("%s: msg_len:%zu, sinfo_flags:0x%x\n", __func__,
- msg_len, sinfo_flags);
+ if (((sflags & SCTP_EOF) && msg_len > 0) ||
+ (!(sflags & (SCTP_EOF | SCTP_ABORT)) && msg_len == 0))
+ return -EINVAL;
- /* SCTP_EOF or SCTP_ABORT cannot be set on a TCP-style socket. */
- if (sctp_style(sk, TCP) && (sinfo_flags & (SCTP_EOF | SCTP_ABORT))) {
- err = -EINVAL;
- goto out_nounlock;
- }
+ if ((sflags & SCTP_ADDR_OVER) && !msg->msg_name)
+ return -EINVAL;
- /* If SCTP_EOF is set, no data can be sent. Disallow sending zero
- * length messages when SCTP_EOF|SCTP_ABORT is not set.
- * If SCTP_ABORT is set, the message length could be non zero with
- * the msg_iov set to the user abort reason.
- */
- if (((sinfo_flags & SCTP_EOF) && (msg_len > 0)) ||
- (!(sinfo_flags & (SCTP_EOF|SCTP_ABORT)) && (msg_len == 0))) {
- err = -EINVAL;
- goto out_nounlock;
- }
+ return 0;
+}
- /* If SCTP_ADDR_OVER is set, there must be an address
- * specified in msg_name.
- */
- if ((sinfo_flags & SCTP_ADDR_OVER) && (!msg->msg_name)) {
- err = -EINVAL;
- goto out_nounlock;
- }
+static int sctp_sendmsg_new_asoc(struct sock *sk, __u16 sflags,
+ struct sctp_cmsgs *cmsgs,
+ union sctp_addr *daddr,
+ struct sctp_transport **tp)
+{
+ struct sctp_endpoint *ep = sctp_sk(sk)->ep;
+ struct net *net = sock_net(sk);
+ struct sctp_association *asoc;
+ enum sctp_scope scope;
+ int err = -EINVAL;
- transport = NULL;
+ *tp = NULL;
- pr_debug("%s: about to look up association\n", __func__);
+ if (sflags & (SCTP_EOF | SCTP_ABORT))
+ return -EINVAL;
- lock_sock(sk);
+ if (sctp_style(sk, TCP) && (sctp_sstate(sk, ESTABLISHED) ||
+ sctp_sstate(sk, CLOSING)))
+ return -EADDRNOTAVAIL;
- /* If a msg_name has been specified, assume this is to be used. */
- if (msg_name) {
- /* Look for a matching association on the endpoint. */
- asoc = sctp_endpoint_lookup_assoc(ep, &to, &transport);
+ if (sctp_endpoint_is_peeled_off(ep, daddr))
+ return -EADDRNOTAVAIL;
- /* If we could not find a matching association on the
- * endpoint, make sure that it is not a TCP-style
- * socket that already has an association or there is
- * no peeled-off association on another socket.
- */
- if (!asoc &&
- ((sctp_style(sk, TCP) &&
- (sctp_sstate(sk, ESTABLISHED) ||
- sctp_sstate(sk, CLOSING))) ||
- sctp_endpoint_is_peeled_off(ep, &to))) {
- err = -EADDRNOTAVAIL;
- goto out_unlock;
- }
+ if (!ep->base.bind_addr.port) {
+ if (sctp_autobind(sk))
+ return -EAGAIN;
} else {
- asoc = sctp_id2assoc(sk, associd);
- if (!asoc) {
- err = -EPIPE;
- goto out_unlock;
- }
+ if (ep->base.bind_addr.port < inet_prot_sock(net) &&
+ !ns_capable(net->user_ns, CAP_NET_BIND_SERVICE))
+ return -EACCES;
}
- if (asoc) {
- pr_debug("%s: just looked up association:%p\n", __func__, asoc);
+ scope = sctp_scope(daddr);
- /* We cannot send a message on a TCP-style SCTP_SS_ESTABLISHED
- * socket that has an association in CLOSED state. This can
- * happen when an accepted socket has an association that is
- * already CLOSED.
- */
- if (sctp_state(asoc, CLOSED) && sctp_style(sk, TCP)) {
- err = -EPIPE;
- goto out_unlock;
- }
+ asoc = sctp_association_new(ep, sk, scope, GFP_KERNEL);
+ if (!asoc)
+ return -ENOMEM;
- if (sinfo_flags & SCTP_EOF) {
- pr_debug("%s: shutting down association:%p\n",
- __func__, asoc);
+ if (sctp_assoc_set_bind_addr_from_ep(asoc, scope, GFP_KERNEL) < 0) {
+ err = -ENOMEM;
+ goto free;
+ }
- sctp_primitive_SHUTDOWN(net, asoc, NULL);
- err = 0;
- goto out_unlock;
+ if (cmsgs->init) {
+ struct sctp_initmsg *init = cmsgs->init;
+
+ if (init->sinit_num_ostreams) {
+ __u16 outcnt = init->sinit_num_ostreams;
+
+ asoc->c.sinit_num_ostreams = outcnt;
+ /* outcnt has been changed, need to re-init stream */
+ err = sctp_stream_init(&asoc->stream, outcnt, 0,
+ GFP_KERNEL);
+ if (err)
+ goto free;
}
- if (sinfo_flags & SCTP_ABORT) {
- chunk = sctp_make_abort_user(asoc, msg, msg_len);
- if (!chunk) {
- err = -ENOMEM;
- goto out_unlock;
- }
+ if (init->sinit_max_instreams)
+ asoc->c.sinit_max_instreams = init->sinit_max_instreams;
- pr_debug("%s: aborting association:%p\n",
- __func__, asoc);
+ if (init->sinit_max_attempts)
+ asoc->max_init_attempts = init->sinit_max_attempts;
- sctp_primitive_ABORT(net, asoc, chunk);
- err = 0;
- goto out_unlock;
- }
+ if (init->sinit_max_init_timeo)
+ asoc->max_init_timeo =
+ msecs_to_jiffies(init->sinit_max_init_timeo);
}
- /* Do we need to create the association? */
- if (!asoc) {
- pr_debug("%s: there is no association yet\n", __func__);
+ *tp = sctp_assoc_add_peer(asoc, daddr, GFP_KERNEL, SCTP_UNKNOWN);
+ if (!*tp) {
+ err = -ENOMEM;
+ goto free;
+ }
- if (sinfo_flags & (SCTP_EOF | SCTP_ABORT)) {
- err = -EINVAL;
- goto out_unlock;
- }
+ return 0;
- /* Check for invalid stream against the stream counts,
- * either the default or the user specified stream counts.
- */
- if (sinfo) {
- if (!sinit || !sinit->sinit_num_ostreams) {
- /* Check against the defaults. */
- if (sinfo->sinfo_stream >=
- sp->initmsg.sinit_num_ostreams) {
- err = -EINVAL;
- goto out_unlock;
- }
- } else {
- /* Check against the requested. */
- if (sinfo->sinfo_stream >=
- sinit->sinit_num_ostreams) {
- err = -EINVAL;
- goto out_unlock;
- }
- }
- }
+free:
+ sctp_association_free(asoc);
+ return err;
+}
- /*
- * API 3.1.2 bind() - UDP Style Syntax
- * If a bind() or sctp_bindx() is not called prior to a
- * sendmsg() call that initiates a new association, the
- * system picks an ephemeral port and will choose an address
- * set equivalent to binding with a wildcard address.
- */
- if (!ep->base.bind_addr.port) {
- if (sctp_autobind(sk)) {
- err = -EAGAIN;
- goto out_unlock;
- }
- } else {
- /*
- * If an unprivileged user inherits a one-to-many
- * style socket with open associations on a privileged
- * port, it MAY be permitted to accept new associations,
- * but it SHOULD NOT be permitted to open new
- * associations.
- */
- if (ep->base.bind_addr.port < inet_prot_sock(net) &&
- !ns_capable(net->user_ns, CAP_NET_BIND_SERVICE)) {
- err = -EACCES;
- goto out_unlock;
- }
- }
+static int sctp_sendmsg_check_sflags(struct sctp_association *asoc,
+ __u16 sflags, struct msghdr *msg,
+ size_t msg_len)
+{
+ struct sock *sk = asoc->base.sk;
+ struct net *net = sock_net(sk);
- scope = sctp_scope(&to);
- new_asoc = sctp_association_new(ep, sk, scope, GFP_KERNEL);
- if (!new_asoc) {
- err = -ENOMEM;
- goto out_unlock;
- }
- asoc = new_asoc;
- err = sctp_assoc_set_bind_addr_from_ep(asoc, scope, GFP_KERNEL);
- if (err < 0) {
- err = -ENOMEM;
- goto out_free;
- }
+ if (sctp_state(asoc, CLOSED) && sctp_style(sk, TCP))
+ return -EPIPE;
- /* If the SCTP_INIT ancillary data is specified, set all
- * the association init values accordingly.
- */
- if (sinit) {
- if (sinit->sinit_num_ostreams) {
- __u16 outcnt = sinit->sinit_num_ostreams;
-
- asoc->c.sinit_num_ostreams = outcnt;
- /* outcnt has been changed, so re-init stream */
- err = sctp_stream_init(&asoc->stream, outcnt, 0,
- GFP_KERNEL);
- if (err)
- goto out_free;
- }
- if (sinit->sinit_max_instreams) {
- asoc->c.sinit_max_instreams =
- sinit->sinit_max_instreams;
- }
- if (sinit->sinit_max_attempts) {
- asoc->max_init_attempts
- = sinit->sinit_max_attempts;
- }
- if (sinit->sinit_max_init_timeo) {
- asoc->max_init_timeo =
- msecs_to_jiffies(sinit->sinit_max_init_timeo);
- }
- }
+ if (sflags & SCTP_EOF) {
+ pr_debug("%s: shutting down association:%p\n", __func__, asoc);
+ sctp_primitive_SHUTDOWN(net, asoc, NULL);
- /* Prime the peer's transport structures. */
- transport = sctp_assoc_add_peer(asoc, &to, GFP_KERNEL, SCTP_UNKNOWN);
- if (!transport) {
- err = -ENOMEM;
- goto out_free;
- }
+ return 0;
}
- /* ASSERT: we have a valid association at this point. */
- pr_debug("%s: we have a valid association\n", __func__);
+ if (sflags & SCTP_ABORT) {
+ struct sctp_chunk *chunk;
- if (!sinfo) {
- /* If the user didn't specify SNDINFO/SNDRCVINFO, make up
- * one with some defaults.
- */
- memset(&default_sinfo, 0, sizeof(default_sinfo));
- default_sinfo.sinfo_stream = asoc->default_stream;
- default_sinfo.sinfo_flags = asoc->default_flags;
- default_sinfo.sinfo_ppid = asoc->default_ppid;
- default_sinfo.sinfo_context = asoc->default_context;
- default_sinfo.sinfo_timetolive = asoc->default_timetolive;
- default_sinfo.sinfo_assoc_id = sctp_assoc2id(asoc);
-
- sinfo = &default_sinfo;
- } else if (fill_sinfo_ttl) {
- /* In case SNDINFO was specified, we still need to fill
- * it with a default ttl from the assoc here.
- */
- sinfo->sinfo_timetolive = asoc->default_timetolive;
- }
+ chunk = sctp_make_abort_user(asoc, msg, msg_len);
+ if (!chunk)
+ return -ENOMEM;
- /* API 7.1.7, the sndbuf size per association bounds the
- * maximum size of data that can be sent in a single send call.
- */
- if (msg_len > sk->sk_sndbuf) {
- err = -EMSGSIZE;
- goto out_free;
+ pr_debug("%s: aborting association:%p\n", __func__, asoc);
+ sctp_primitive_ABORT(net, asoc, chunk);
+
+ return 0;
}
- if (asoc->pmtu_pending)
- sctp_assoc_pending_pmtu(asoc);
+ return 1;
+}
- /* If fragmentation is disabled and the message length exceeds the
- * association fragmentation point, return EMSGSIZE. The I-D
- * does not specify what this error is, but this looks like
- * a great fit.
- */
- if (sctp_sk(sk)->disable_fragments && (msg_len > asoc->frag_point)) {
- err = -EMSGSIZE;
- goto out_free;
- }
+static int sctp_sendmsg_to_asoc(struct sctp_association *asoc,
+ struct msghdr *msg, size_t msg_len,
+ struct sctp_transport *transport,
+ struct sctp_sndrcvinfo *sinfo)
+{
+ struct sock *sk = asoc->base.sk;
+ struct net *net = sock_net(sk);
+ struct sctp_datamsg *datamsg;
+ bool wait_connect = false;
+ struct sctp_chunk *chunk;
+ long timeo;
+ int err;
- /* Check for invalid stream. */
if (sinfo->sinfo_stream >= asoc->stream.outcnt) {
err = -EINVAL;
- goto out_free;
+ goto err;
}
- /* Allocate sctp_stream_out_ext if not already done */
if (unlikely(!asoc->stream.out[sinfo->sinfo_stream].ext)) {
err = sctp_stream_init_ext(&asoc->stream, sinfo->sinfo_stream);
if (err)
- goto out_free;
+ goto err;
}
- if (sctp_wspace(asoc) < msg_len)
- sctp_prsctp_prune(asoc, sinfo, msg_len - sctp_wspace(asoc));
-
- timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
- if (!sctp_wspace(asoc)) {
- /* sk can be changed by peel off when waiting for buf. */
- err = sctp_wait_for_sndbuf(asoc, &timeo, msg_len);
- if (err) {
- if (err == -ESRCH) {
- /* asoc is already dead. */
- new_asoc = NULL;
- err = -EPIPE;
- }
- goto out_free;
- }
+ if (sctp_sk(sk)->disable_fragments && msg_len > asoc->frag_point) {
+ err = -EMSGSIZE;
+ goto err;
}
- /* If an address is passed with the sendto/sendmsg call, it is used
- * to override the primary destination address in the TCP model, or
- * when SCTP_ADDR_OVER flag is set in the UDP model.
- */
- if ((sctp_style(sk, TCP) && msg_name) ||
- (sinfo_flags & SCTP_ADDR_OVER)) {
- chunk_tp = sctp_assoc_lookup_paddr(asoc, &to);
- if (!chunk_tp) {
- err = -EINVAL;
- goto out_free;
- }
- } else
- chunk_tp = NULL;
-
- /* Auto-connect, if we aren't connected already. */
if (sctp_state(asoc, CLOSED)) {
err = sctp_primitive_ASSOCIATE(net, asoc, NULL);
- if (err < 0)
- goto out_free;
+ if (err)
+ goto err;
- /* If stream interleave is enabled, wait_connect has to be
- * done earlier than data enqueue, as it needs to make data
- * or idata according to asoc->intl_enable which is set
- * after connection is done.
- */
- if (sctp_sk(asoc->base.sk)->strm_interleave) {
+ if (sctp_sk(sk)->strm_interleave) {
timeo = sock_sndtimeo(sk, 0);
err = sctp_wait_for_connect(asoc, &timeo);
if (err)
- goto out_unlock;
+ goto err;
} else {
wait_connect = true;
}
pr_debug("%s: we associated primitively\n", __func__);
}
- /* Break the message into multiple chunks of maximum size. */
+ if (asoc->pmtu_pending)
+ sctp_assoc_pending_pmtu(asoc);
+
+ if (sctp_wspace(asoc) < msg_len)
+ sctp_prsctp_prune(asoc, sinfo, msg_len - sctp_wspace(asoc));
+
+ if (!sctp_wspace(asoc)) {
+ timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
+ err = sctp_wait_for_sndbuf(asoc, &timeo, msg_len);
+ if (err)
+ goto err;
+ }
+
datamsg = sctp_datamsg_from_user(asoc, sinfo, &msg->msg_iter);
if (IS_ERR(datamsg)) {
err = PTR_ERR(datamsg);
- goto out_free;
+ goto err;
}
+
asoc->force_delay = !!(msg->msg_flags & MSG_MORE);
- /* Now send the (possibly) fragmented message. */
list_for_each_entry(chunk, &datamsg->chunks, frag_list) {
sctp_chunk_hold(chunk);
-
- /* Do accounting for the write space. */
sctp_set_owner_w(chunk);
-
- chunk->transport = chunk_tp;
+ chunk->transport = transport;
}
- /* Send it to the lower layers. Note: all chunks
- * must either fail or succeed. The lower layer
- * works that way today. Keep it that way or this
- * breaks.
- */
err = sctp_primitive_SEND(net, asoc, datamsg);
- /* Did the lower layer accept the chunk? */
if (err) {
sctp_datamsg_free(datamsg);
- goto out_free;
+ goto err;
}
pr_debug("%s: we sent primitively\n", __func__);
sctp_datamsg_put(datamsg);
- err = msg_len;
if (unlikely(wait_connect)) {
- timeo = sock_sndtimeo(sk, msg_flags & MSG_DONTWAIT);
+ timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
sctp_wait_for_connect(asoc, &timeo);
}
- /* If we are already past ASSOCIATE, the lower
- * layers are responsible for association cleanup.
- */
- goto out_unlock;
+ err = msg_len;
-out_free:
- if (new_asoc)
- sctp_association_free(asoc);
-out_unlock:
- release_sock(sk);
+err:
+ return err;
+}
-out_nounlock:
- return sctp_error(sk, msg_flags, err);
+static union sctp_addr *sctp_sendmsg_get_daddr(struct sock *sk,
+ const struct msghdr *msg,
+ struct sctp_cmsgs *cmsgs)
+{
+ union sctp_addr *daddr = NULL;
+ int err;
-#if 0
-do_sock_err:
- if (msg_len)
- err = msg_len;
- else
- err = sock_error(sk);
- goto out;
+ if (!sctp_style(sk, UDP_HIGH_BANDWIDTH) && msg->msg_name) {
+ int len = msg->msg_namelen;
-do_interrupted:
- if (msg_len)
- err = msg_len;
- goto out;
-#endif /* 0 */
+ if (len > sizeof(*daddr))
+ len = sizeof(*daddr);
+
+ daddr = (union sctp_addr *)msg->msg_name;
+
+ err = sctp_verify_addr(sk, daddr, len);
+ if (err)
+ return ERR_PTR(err);
+ }
+
+ return daddr;
+}
+
+static void sctp_sendmsg_update_sinfo(struct sctp_association *asoc,
+ struct sctp_sndrcvinfo *sinfo,
+ struct sctp_cmsgs *cmsgs)
+{
+ if (!cmsgs->srinfo && !cmsgs->sinfo) {
+ sinfo->sinfo_stream = asoc->default_stream;
+ sinfo->sinfo_ppid = asoc->default_ppid;
+ sinfo->sinfo_context = asoc->default_context;
+ sinfo->sinfo_assoc_id = sctp_assoc2id(asoc);
+ }
+
+ if (!cmsgs->srinfo)
+ sinfo->sinfo_timetolive = asoc->default_timetolive;
+}
+
+static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len)
+{
+ struct sctp_endpoint *ep = sctp_sk(sk)->ep;
+ struct sctp_transport *transport = NULL;
+ struct sctp_sndrcvinfo _sinfo, *sinfo;
+ struct sctp_association *asoc;
+ struct sctp_cmsgs cmsgs;
+ union sctp_addr *daddr;
+ bool new = false;
+ __u16 sflags;
+ int err;
+
+ /* Parse and get snd_info */
+ err = sctp_sendmsg_parse(sk, &cmsgs, &_sinfo, msg, msg_len);
+ if (err)
+ goto out;
+
+ sinfo = &_sinfo;
+ sflags = sinfo->sinfo_flags;
+
+ /* Get daddr from msg */
+ daddr = sctp_sendmsg_get_daddr(sk, msg, &cmsgs);
+ if (IS_ERR(daddr)) {
+ err = PTR_ERR(daddr);
+ goto out;
+ }
+
+ lock_sock(sk);
+
+ /* Get and check or create asoc */
+ if (daddr) {
+ asoc = sctp_endpoint_lookup_assoc(ep, daddr, &transport);
+ if (asoc) {
+ err = sctp_sendmsg_check_sflags(asoc, sflags, msg,
+ msg_len);
+ if (err <= 0)
+ goto out_unlock;
+ } else {
+ err = sctp_sendmsg_new_asoc(sk, sflags, &cmsgs, daddr,
+ &transport);
+ if (err)
+ goto out_unlock;
+
+ asoc = transport->asoc;
+ new = true;
+ }
+
+ if (!sctp_style(sk, TCP) && !(sflags & SCTP_ADDR_OVER))
+ transport = NULL;
+ } else {
+ asoc = sctp_id2assoc(sk, sinfo->sinfo_assoc_id);
+ if (!asoc) {
+ err = -EPIPE;
+ goto out_unlock;
+ }
+
+ err = sctp_sendmsg_check_sflags(asoc, sflags, msg, msg_len);
+ if (err <= 0)
+ goto out_unlock;
+ }
+
+ /* Update snd_info with the asoc */
+ sctp_sendmsg_update_sinfo(asoc, sinfo, &cmsgs);
+
+ /* Send msg to the asoc */
+ err = sctp_sendmsg_to_asoc(asoc, msg, msg_len, transport, sinfo);
+ if (err < 0 && err != -ESRCH && new)
+ sctp_association_free(asoc);
+
+out_unlock:
+ release_sock(sk);
+out:
+ return sctp_error(sk, msg->msg_flags, err);
}
/* This is an extended version of skb_pull() that removes the data from the
*
* This file is part of the SCTP kernel implementation
*
- * These functions manipulate sctp tsn mapping array.
+ * This file contains sctp stream maniuplation primitives and helpers.
*
* This SCTP implementation is free software;
* you can redistribute it and/or modify it under the terms of
*
* This file is part of the SCTP kernel implementation
*
- * These functions manipulate sctp stream queue/scheduling.
+ * These functions implement sctp stream message interleaving, mostly
+ * including I-DATA and I-FORWARD-TSN chunks process.
*
* This SCTP implementation is free software;
* you can redistribute it and/or modify it under the terms of
__u32 freed = 0;
__u16 needed;
- if (chunk) {
- needed = ntohs(chunk->chunk_hdr->length);
- needed -= sizeof(struct sctp_idata_chunk);
- } else {
- needed = SCTP_DEFAULT_MAXWINDOW;
- }
+ needed = ntohs(chunk->chunk_hdr->length) -
+ sizeof(struct sctp_idata_chunk);
if (skb_queue_empty(&asoc->base.sk->sk_receive_queue)) {
freed = sctp_ulpq_renege_list(ulpq, &ulpq->lobby, needed);
needed);
}
- if (chunk && freed >= needed)
- if (sctp_ulpevent_idata(ulpq, chunk, gfp) <= 0)
- sctp_intl_start_pd(ulpq, gfp);
+ if (freed >= needed && sctp_ulpevent_idata(ulpq, chunk, gfp) <= 0)
+ sctp_intl_start_pd(ulpq, gfp);
sk_mem_reclaim(asoc->base.sk);
}
* applicable with RoCE-cards only
*
* Initial restrictions:
- * - non-blocking connect postponed
* - IPv6 support postponed
* - support for alternate links postponed
* - partial support for non-blocking sockets only
#include <linux/module.h>
#include <linux/socket.h>
-#include <linux/inetdevice.h>
#include <linux/workqueue.h>
#include <linux/in.h>
#include <linux/sched/signal.h>
smc_copy_sock_settings(&smc->sk, smc->clcsock->sk, SK_FLAGS_CLC_TO_SMC);
}
-/* determine subnet and mask of internal TCP socket */
-int smc_netinfo_by_tcpsk(struct socket *clcsock,
- __be32 *subnet, u8 *prefix_len)
-{
- struct dst_entry *dst = sk_dst_get(clcsock->sk);
- struct in_device *in_dev;
- struct sockaddr_in addr;
- int rc = -ENOENT;
- int len;
-
- if (!dst) {
- rc = -ENOTCONN;
- goto out;
- }
- if (!dst->dev) {
- rc = -ENODEV;
- goto out_rel;
- }
-
- /* get address to which the internal TCP socket is bound */
- kernel_getsockname(clcsock, (struct sockaddr *)&addr, &len);
- /* analyze IPv4 specific data of net_device belonging to TCP socket */
- rcu_read_lock();
- in_dev = __in_dev_get_rcu(dst->dev);
- for_ifa(in_dev) {
- if (!inet_ifa_match(addr.sin_addr.s_addr, ifa))
- continue;
- *prefix_len = inet_mask_len(ifa->ifa_mask);
- *subnet = ifa->ifa_address & ifa->ifa_mask;
- rc = 0;
- break;
- } endfor_ifa(in_dev);
- rcu_read_unlock();
-
-out_rel:
- dst_release(dst);
-out:
- return rc;
-}
-
-static int smc_clnt_conf_first_link(struct smc_sock *smc, union ib_gid *gid)
+static int smc_clnt_conf_first_link(struct smc_sock *smc)
{
struct smc_link_group *lgr = smc->conn.lgr;
struct smc_link *link;
return rc;
}
+ if (link->llc_confirm_rc)
+ return SMC_CLC_DECL_RMBE_EC;
+
rc = smc_ib_modify_qp_rts(link);
if (rc)
return SMC_CLC_DECL_INTERR;
/* send CONFIRM LINK response over RoCE fabric */
rc = smc_llc_send_confirm_link(link,
link->smcibdev->mac[link->ibport - 1],
- gid, SMC_LLC_RESP);
+ &link->smcibdev->gid[link->ibport - 1],
+ SMC_LLC_RESP);
if (rc < 0)
return SMC_CLC_DECL_TCL;
- return rc;
+ /* receive ADD LINK request from server over RoCE fabric */
+ rest = wait_for_completion_interruptible_timeout(&link->llc_add,
+ SMC_LLC_WAIT_TIME);
+ if (rest <= 0) {
+ struct smc_clc_msg_decline dclc;
+
+ rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
+ SMC_CLC_DECLINE);
+ return rc;
+ }
+
+ /* send add link reject message, only one link supported for now */
+ rc = smc_llc_send_add_link(link,
+ link->smcibdev->mac[link->ibport - 1],
+ &link->smcibdev->gid[link->ibport - 1],
+ SMC_LLC_RESP);
+ if (rc < 0)
+ return SMC_CLC_DECL_TCL;
+
+ link->state = SMC_LNK_ACTIVE;
+
+ return 0;
}
static void smc_conn_save_peer_info(struct smc_sock *smc,
link->peer_mtu = clc->qp_mtu;
}
-static void smc_lgr_forget(struct smc_link_group *lgr)
-{
- spin_lock_bh(&smc_lgr_list.lock);
- /* do not use this link group for new connections */
- if (!list_empty(&lgr->list))
- list_del_init(&lgr->list);
- spin_unlock_bh(&smc_lgr_list.lock);
-}
-
/* setup for RDMA connection of client */
static int smc_connect_rdma(struct smc_sock *smc)
{
- struct sockaddr_in *inaddr = (struct sockaddr_in *)smc->addr;
struct smc_clc_msg_accept_confirm aclc;
int local_contact = SMC_FIRST_CONTACT;
struct smc_ib_device *smcibdev;
srv_first_contact = aclc.hdr.flag;
mutex_lock(&smc_create_lgr_pending);
- local_contact = smc_conn_create(smc, inaddr->sin_addr.s_addr, smcibdev,
- ibport, &aclc.lcl, srv_first_contact);
+ local_contact = smc_conn_create(smc, smcibdev, ibport, &aclc.lcl,
+ srv_first_contact);
if (local_contact < 0) {
rc = local_contact;
if (rc == -ENOMEM)
if (local_contact == SMC_FIRST_CONTACT) {
/* QP confirmation over RoCE fabric */
- reason_code = smc_clnt_conf_first_link(
- smc, &smcibdev->gid[ibport - 1]);
+ reason_code = smc_clnt_conf_first_link(smc);
if (reason_code < 0) {
rc = reason_code;
goto out_err_unlock;
goto out_err;
if (addr->sa_family != AF_INET)
goto out_err;
- smc->addr = addr; /* needed for nonblocking connect */
lock_sock(sk);
switch (sk->sk_state) {
rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
SMC_CLC_DECLINE);
+ return rc;
}
- return rc;
+ if (link->llc_confirm_resp_rc)
+ return SMC_CLC_DECL_RMBE_EC;
+
+ /* send ADD LINK request to client over the RoCE fabric */
+ rc = smc_llc_send_add_link(link,
+ link->smcibdev->mac[link->ibport - 1],
+ &link->smcibdev->gid[link->ibport - 1],
+ SMC_LLC_REQ);
+ if (rc < 0)
+ return SMC_CLC_DECL_TCL;
+
+ /* receive ADD LINK response from client over the RoCE fabric */
+ rest = wait_for_completion_interruptible_timeout(&link->llc_add_resp,
+ SMC_LLC_WAIT_TIME);
+ if (rest <= 0) {
+ struct smc_clc_msg_decline dclc;
+
+ rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
+ SMC_CLC_DECLINE);
+ return rc;
+ }
+
+ link->state = SMC_LNK_ACTIVE;
+
+ return 0;
}
/* setup for RDMA connection of server */
struct sock *newsmcsk = &new_smc->sk;
struct smc_clc_msg_proposal *pclc;
struct smc_ib_device *smcibdev;
- struct sockaddr_in peeraddr;
u8 buf[SMC_CLC_MAX_LEN];
struct smc_link *link;
int reason_code = 0;
- int rc = 0, len;
+ int rc = 0;
__be32 subnet;
u8 prefix_len;
u8 ibport;
}
/* determine subnet and mask from internal TCP socket */
- rc = smc_netinfo_by_tcpsk(newclcsock, &subnet, &prefix_len);
+ rc = smc_clc_netinfo_by_tcpsk(newclcsock, &subnet, &prefix_len);
if (rc) {
reason_code = SMC_CLC_DECL_CNFERR; /* configuration error */
goto decline_rdma;
goto decline_rdma;
}
- /* get address of the peer connected to the internal TCP socket */
- kernel_getpeername(newclcsock, (struct sockaddr *)&peeraddr, &len);
-
/* allocate connection / link group */
mutex_lock(&smc_create_lgr_pending);
- local_contact = smc_conn_create(new_smc, peeraddr.sin_addr.s_addr,
- smcibdev, ibport, &pclc->lcl, 0);
+ local_contact = smc_conn_create(new_smc, smcibdev, ibport, &pclc->lcl,
+ 0);
if (local_contact < 0) {
rc = local_contact;
if (rc == -ENOMEM)
}
static int smc_getname(struct socket *sock, struct sockaddr *addr,
- int *len, int peer)
+ int peer)
{
struct smc_sock *smc;
smc = smc_sk(sock->sk);
- return smc->clcsock->ops->getname(smc->clcsock, addr, len, peer);
+ return smc->clcsock->ops->getname(smc->clcsock, addr, peer);
}
static int smc_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
struct sock sk;
struct socket *clcsock; /* internal tcp socket */
struct smc_connection conn; /* smc connection */
- struct sockaddr *addr; /* inet connect address */
struct smc_sock *listen_smc; /* listen parent */
struct work_struct tcp_listen_work;/* handle tcp socket accepts */
struct work_struct smc_listen_work;/* prepare new accept socket */
struct smc_clc_msg_local;
-int smc_netinfo_by_tcpsk(struct socket *clcsock, __be32 *subnet,
- u8 *prefix_len);
void smc_conn_free(struct smc_connection *conn);
-int smc_conn_create(struct smc_sock *smc, __be32 peer_in_addr,
+int smc_conn_create(struct smc_sock *smc,
struct smc_ib_device *smcibdev, u8 ibport,
struct smc_clc_msg_local *lcl, int srv_first_contact);
struct sock *smc_accept_dequeue(struct sock *parent, struct socket *new_sock);
*/
#include <linux/in.h>
+#include <linux/inetdevice.h>
#include <linux/if_ether.h>
#include <linux/sched/signal.h>
#include "smc_clc.h"
#include "smc_ib.h"
+/* eye catcher "SMCR" EBCDIC for CLC messages */
+static const char SMC_EYECATCHER[4] = {'\xe2', '\xd4', '\xc3', '\xd9'};
+
/* check if received message has a correct header length and contains valid
* heading and trailing eyecatchers
*/
return true;
}
+/* determine subnet and mask of internal TCP socket */
+int smc_clc_netinfo_by_tcpsk(struct socket *clcsock,
+ __be32 *subnet, u8 *prefix_len)
+{
+ struct dst_entry *dst = sk_dst_get(clcsock->sk);
+ struct in_device *in_dev;
+ struct sockaddr_in addr;
+ int rc = -ENOENT;
+
+ if (!dst) {
+ rc = -ENOTCONN;
+ goto out;
+ }
+ if (!dst->dev) {
+ rc = -ENODEV;
+ goto out_rel;
+ }
+
+ /* get address to which the internal TCP socket is bound */
+ kernel_getsockname(clcsock, (struct sockaddr *)&addr);
+ /* analyze IPv4 specific data of net_device belonging to TCP socket */
+ rcu_read_lock();
+ in_dev = __in_dev_get_rcu(dst->dev);
+ for_ifa(in_dev) {
+ if (!inet_ifa_match(addr.sin_addr.s_addr, ifa))
+ continue;
+ *prefix_len = inet_mask_len(ifa->ifa_mask);
+ *subnet = ifa->ifa_address & ifa->ifa_mask;
+ rc = 0;
+ break;
+ } endfor_ifa(in_dev);
+ rcu_read_unlock();
+
+out_rel:
+ dst_release(dst);
+out:
+ return rc;
+}
+
/* Wait for data on the tcp-socket, analyze received data
* Returns:
* 0 if success and it was not a decline that we received.
memset(&pclc_prfx, 0, sizeof(pclc_prfx));
/* determine subnet and mask from internal TCP socket */
- rc = smc_netinfo_by_tcpsk(smc->clcsock, &pclc_prfx.outgoing_subnet,
- &pclc_prfx.prefix_len);
+ rc = smc_clc_netinfo_by_tcpsk(smc->clcsock, &pclc_prfx.outgoing_subnet,
+ &pclc_prfx.prefix_len);
if (rc)
return SMC_CLC_DECL_CNFERR; /* configuration error */
pclc_prfx.ipv6_prefixes_cnt = 0;
#define SMC_CLC_CONFIRM 0x03
#define SMC_CLC_DECLINE 0x04
-/* eye catcher "SMCR" EBCDIC for CLC messages */
-static const char SMC_EYECATCHER[4] = {'\xe2', '\xd4', '\xc3', '\xd9'};
-
#define SMC_CLC_V1 0x1 /* SMC version */
#define CLC_WAIT_TIME (6 * HZ) /* max. wait time on clcsock */
#define SMC_CLC_DECL_MEM 0x01010000 /* insufficient memory resources */
#define SMC_CLC_DECL_INTERR 0x99990000 /* internal error */
#define SMC_CLC_DECL_TCL 0x02040000 /* timeout w4 QP confirm */
#define SMC_CLC_DECL_SEND 0x07000000 /* sending problem */
+#define SMC_CLC_DECL_RMBE_EC 0x08000000 /* peer has eyecatcher in RMBE */
struct smc_clc_msg_hdr { /* header1 of clc messages */
u8 eyecatcher[4]; /* eye catcher */
((u8 *)pclc + sizeof(*pclc) + ntohs(pclc->iparea_offset));
}
-struct smc_sock;
-struct smc_ib_device;
-
+int smc_clc_netinfo_by_tcpsk(struct socket *clcsock, __be32 *subnet,
+ u8 *prefix_len);
int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen,
u8 expected_type);
int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info);
}
/* create a new SMC link group */
-static int smc_lgr_create(struct smc_sock *smc, __be32 peer_in_addr,
+static int smc_lgr_create(struct smc_sock *smc,
struct smc_ib_device *smcibdev, u8 ibport,
char *peer_systemid, unsigned short vlan_id)
{
}
lgr->role = smc->listen_smc ? SMC_SERV : SMC_CLNT;
lgr->sync_err = false;
- lgr->daddr = peer_in_addr;
memcpy(lgr->peer_systemid, peer_systemid, SMC_SYSTEMID_LEN);
lgr->vlan_id = vlan_id;
rwlock_init(&lgr->sndbufs_lock);
lnk = &lgr->lnk[SMC_SINGLE_LINK];
/* initialize link */
+ lnk->state = SMC_LNK_ACTIVATING;
lnk->smcibdev = smcibdev;
lnk->ibport = ibport;
lnk->path_mtu = smcibdev->pattr[ibport - 1].active_mtu;
goto destroy_qp;
init_completion(&lnk->llc_confirm);
init_completion(&lnk->llc_confirm_resp);
+ init_completion(&lnk->llc_add);
+ init_completion(&lnk->llc_add_resp);
smc->conn.lgr = lgr;
rwlock_init(&lgr->conns_lock);
kfree(lgr);
}
+void smc_lgr_forget(struct smc_link_group *lgr)
+{
+ spin_lock_bh(&smc_lgr_list.lock);
+ /* do not use this link group for new connections */
+ if (!list_empty(&lgr->list))
+ list_del_init(&lgr->list);
+ spin_unlock_bh(&smc_lgr_list.lock);
+}
+
/* terminate linkgroup abnormally */
void smc_lgr_terminate(struct smc_link_group *lgr)
{
struct smc_sock *smc;
struct rb_node *node;
- spin_lock_bh(&smc_lgr_list.lock);
- if (list_empty(&lgr->list)) {
- /* termination already triggered */
- spin_unlock_bh(&smc_lgr_list.lock);
- return;
- }
- /* do not use this link group for new connections */
- list_del_init(&lgr->list);
- spin_unlock_bh(&smc_lgr_list.lock);
+ smc_lgr_forget(lgr);
write_lock_bh(&lgr->conns_lock);
node = rb_first(&lgr->conns_all);
}
/* create a new SMC connection (and a new link group if necessary) */
-int smc_conn_create(struct smc_sock *smc, __be32 peer_in_addr,
+int smc_conn_create(struct smc_sock *smc,
struct smc_ib_device *smcibdev, u8 ibport,
struct smc_clc_msg_local *lcl, int srv_first_contact)
{
create:
if (local_contact == SMC_FIRST_CONTACT) {
- rc = smc_lgr_create(smc, peer_in_addr, smcibdev, ibport,
+ rc = smc_lgr_create(smc, smcibdev, ibport,
lcl->id_for_peer, vlan_id);
if (rc)
goto out;
return -ENOSPC;
}
-/* save rkey and dma_addr received from peer during clc handshake */
-int smc_rmb_rtoken_handling(struct smc_connection *conn,
- struct smc_clc_msg_accept_confirm *clc)
+/* add a new rtoken from peer */
+int smc_rtoken_add(struct smc_link_group *lgr, __be64 nw_vaddr, __be32 nw_rkey)
{
- u64 dma_addr = be64_to_cpu(clc->rmb_dma_addr);
- struct smc_link_group *lgr = conn->lgr;
- u32 rkey = ntohl(clc->rmb_rkey);
+ u64 dma_addr = be64_to_cpu(nw_vaddr);
+ u32 rkey = ntohl(nw_rkey);
int i;
for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) {
if ((lgr->rtokens[i][SMC_SINGLE_LINK].rkey == rkey) &&
(lgr->rtokens[i][SMC_SINGLE_LINK].dma_addr == dma_addr) &&
test_bit(i, lgr->rtokens_used_mask)) {
- conn->rtoken_idx = i;
+ /* already in list */
+ return i;
+ }
+ }
+ i = smc_rmb_reserve_rtoken_idx(lgr);
+ if (i < 0)
+ return i;
+ lgr->rtokens[i][SMC_SINGLE_LINK].rkey = rkey;
+ lgr->rtokens[i][SMC_SINGLE_LINK].dma_addr = dma_addr;
+ return i;
+}
+
+/* delete an rtoken */
+int smc_rtoken_delete(struct smc_link_group *lgr, __be32 nw_rkey)
+{
+ u32 rkey = ntohl(nw_rkey);
+ int i;
+
+ for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) {
+ if (lgr->rtokens[i][SMC_SINGLE_LINK].rkey == rkey &&
+ test_bit(i, lgr->rtokens_used_mask)) {
+ lgr->rtokens[i][SMC_SINGLE_LINK].rkey = 0;
+ lgr->rtokens[i][SMC_SINGLE_LINK].dma_addr = 0;
+
+ clear_bit(i, lgr->rtokens_used_mask);
return 0;
}
}
- conn->rtoken_idx = smc_rmb_reserve_rtoken_idx(lgr);
+ return -ENOENT;
+}
+
+/* save rkey and dma_addr received from peer during clc handshake */
+int smc_rmb_rtoken_handling(struct smc_connection *conn,
+ struct smc_clc_msg_accept_confirm *clc)
+{
+ conn->rtoken_idx = smc_rtoken_add(conn->lgr, clc->rmb_dma_addr,
+ clc->rmb_rkey);
if (conn->rtoken_idx < 0)
return conn->rtoken_idx;
- lgr->rtokens[conn->rtoken_idx][SMC_SINGLE_LINK].rkey = rkey;
- lgr->rtokens[conn->rtoken_idx][SMC_SINGLE_LINK].dma_addr = dma_addr;
return 0;
}
SMC_SERV /* server */
};
+enum smc_link_state { /* possible states of a link */
+ SMC_LNK_INACTIVE, /* link is inactive */
+ SMC_LNK_ACTIVATING, /* link is being activated */
+ SMC_LNK_ACTIVE /* link is active */
+};
+
#define SMC_WR_BUF_SIZE 48 /* size of work request buffer */
struct smc_wr_buf {
u8 peer_mac[ETH_ALEN]; /* = gid[8:10||13:15] */
u8 peer_gid[sizeof(union ib_gid)]; /* gid of peer*/
u8 link_id; /* unique # within link group */
+
+ enum smc_link_state state; /* state of link */
struct completion llc_confirm; /* wait for rx of conf link */
struct completion llc_confirm_resp; /* wait 4 rx of cnf lnk rsp */
+ int llc_confirm_rc; /* rc from confirm link msg */
+ int llc_confirm_resp_rc; /* rc from conf_resp msg */
+ struct completion llc_add; /* wait for rx of add link */
+ struct completion llc_add_resp; /* wait for rx of add link rsp*/
};
/* For now we just allow one parallel link per link group. The SMC protocol
struct smc_link_group {
struct list_head list;
enum smc_lgr_role role; /* client or server */
- __be32 daddr; /* destination ip address */
struct smc_link lnk[SMC_LINKS_PER_LGR_MAX]; /* smc link */
char peer_systemid[SMC_SYSTEMID_LEN];
/* unique system_id of peer */
struct smc_clc_msg_accept_confirm;
void smc_lgr_free(struct smc_link_group *lgr);
+void smc_lgr_forget(struct smc_link_group *lgr);
void smc_lgr_terminate(struct smc_link_group *lgr);
int smc_buf_create(struct smc_sock *smc);
int smc_rmb_rtoken_handling(struct smc_connection *conn,
struct smc_clc_msg_accept_confirm *clc);
+int smc_rtoken_add(struct smc_link_group *lgr, __be64 nw_vaddr, __be32 nw_rkey);
+int smc_rtoken_delete(struct smc_link_group *lgr, __be32 nw_rkey);
void smc_sndbuf_sync_sg_for_cpu(struct smc_connection *conn);
void smc_sndbuf_sync_sg_for_device(struct smc_connection *conn);
void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn);
*
* Link Layer Control (LLC)
*
- * For now, we only support the necessary "confirm link" functionality
- * which happens for the first RoCE link after successful CLC handshake.
- *
* Copyright IBM Corp. 2016
*
* Author(s): Klaus Wacker <Klaus.Wacker@de.ibm.com>
#include "smc_clc.h"
#include "smc_llc.h"
+#define SMC_LLC_DATA_LEN 40
+
+struct smc_llc_hdr {
+ struct smc_wr_rx_hdr common;
+ u8 length; /* 44 */
+#if defined(__BIG_ENDIAN_BITFIELD)
+ u8 reserved:4,
+ add_link_rej_rsn:4;
+#elif defined(__LITTLE_ENDIAN_BITFIELD)
+ u8 add_link_rej_rsn:4,
+ reserved:4;
+#endif
+ u8 flags;
+};
+
+#define SMC_LLC_FLAG_NO_RMBE_EYEC 0x03
+
+struct smc_llc_msg_confirm_link { /* type 0x01 */
+ struct smc_llc_hdr hd;
+ u8 sender_mac[ETH_ALEN];
+ u8 sender_gid[SMC_GID_SIZE];
+ u8 sender_qp_num[3];
+ u8 link_num;
+ u8 link_uid[SMC_LGR_ID_SIZE];
+ u8 max_links;
+ u8 reserved[9];
+};
+
+#define SMC_LLC_FLAG_ADD_LNK_REJ 0x40
+#define SMC_LLC_REJ_RSN_NO_ALT_PATH 1
+
+#define SMC_LLC_ADD_LNK_MAX_LINKS 2
+
+struct smc_llc_msg_add_link { /* type 0x02 */
+ struct smc_llc_hdr hd;
+ u8 sender_mac[ETH_ALEN];
+ u8 reserved2[2];
+ u8 sender_gid[SMC_GID_SIZE];
+ u8 sender_qp_num[3];
+ u8 link_num;
+ u8 flags2; /* QP mtu */
+ u8 initial_psn[3];
+ u8 reserved[8];
+};
+
+#define SMC_LLC_FLAG_DEL_LINK_ALL 0x40
+#define SMC_LLC_FLAG_DEL_LINK_ORDERLY 0x20
+
+struct smc_llc_msg_del_link { /* type 0x04 */
+ struct smc_llc_hdr hd;
+ u8 link_num;
+ __be32 reason;
+ u8 reserved[35];
+} __packed; /* format defined in RFC7609 */
+
+struct smc_llc_msg_test_link { /* type 0x07 */
+ struct smc_llc_hdr hd;
+ u8 user_data[16];
+ u8 reserved[24];
+};
+
+struct smc_rmb_rtoken {
+ union {
+ u8 num_rkeys; /* first rtoken byte of CONFIRM LINK msg */
+ /* is actually the num of rtokens, first */
+ /* rtoken is always for the current link */
+ u8 link_id; /* link id of the rtoken */
+ };
+ __be32 rmb_key;
+ __be64 rmb_vaddr;
+} __packed; /* format defined in RFC7609 */
+
+#define SMC_LLC_RKEYS_PER_MSG 3
+
+struct smc_llc_msg_confirm_rkey { /* type 0x06 */
+ struct smc_llc_hdr hd;
+ struct smc_rmb_rtoken rtoken[SMC_LLC_RKEYS_PER_MSG];
+ u8 reserved;
+};
+
+struct smc_llc_msg_confirm_rkey_cont { /* type 0x08 */
+ struct smc_llc_hdr hd;
+ u8 num_rkeys;
+ struct smc_rmb_rtoken rtoken[SMC_LLC_RKEYS_PER_MSG];
+};
+
+#define SMC_LLC_DEL_RKEY_MAX 8
+#define SMC_LLC_FLAG_RKEY_NEG 0x20
+
+struct smc_llc_msg_delete_rkey { /* type 0x09 */
+ struct smc_llc_hdr hd;
+ u8 num_rkeys;
+ u8 err_mask;
+ u8 reserved[2];
+ __be32 rkey[8];
+ u8 reserved2[4];
+};
+
+union smc_llc_msg {
+ struct smc_llc_msg_confirm_link confirm_link;
+ struct smc_llc_msg_add_link add_link;
+ struct smc_llc_msg_del_link delete_link;
+
+ struct smc_llc_msg_confirm_rkey confirm_rkey;
+ struct smc_llc_msg_confirm_rkey_cont confirm_rkey_cont;
+ struct smc_llc_msg_delete_rkey delete_rkey;
+
+ struct smc_llc_msg_test_link test_link;
+ struct {
+ struct smc_llc_hdr hdr;
+ u8 data[SMC_LLC_DATA_LEN];
+ } raw;
+};
+
+#define SMC_LLC_FLAG_RESP 0x80
+
/********************************** send *************************************/
struct smc_llc_tx_pend {
memset(confllc, 0, sizeof(*confllc));
confllc->hd.common.type = SMC_LLC_CONFIRM_LINK;
confllc->hd.length = sizeof(struct smc_llc_msg_confirm_link);
+ confllc->hd.flags |= SMC_LLC_FLAG_NO_RMBE_EYEC;
if (reqresp == SMC_LLC_RESP)
confllc->hd.flags |= SMC_LLC_FLAG_RESP;
memcpy(confllc->sender_mac, mac, ETH_ALEN);
hton24(confllc->sender_qp_num, link->roce_qp->qp_num);
/* confllc->link_num = SMC_SINGLE_LINK; already done by memset above */
memcpy(confllc->link_uid, lgr->id, SMC_LGR_ID_SIZE);
- confllc->max_links = SMC_LINKS_PER_LGR_MAX;
+ confllc->max_links = SMC_LLC_ADD_LNK_MAX_LINKS; /* enforce peer resp. */
+ /* send llc message */
+ rc = smc_wr_tx_send(link, pend);
+ return rc;
+}
+
+/* send ADD LINK request or response */
+int smc_llc_send_add_link(struct smc_link *link, u8 mac[],
+ union ib_gid *gid,
+ enum smc_llc_reqresp reqresp)
+{
+ struct smc_llc_msg_add_link *addllc;
+ struct smc_wr_tx_pend_priv *pend;
+ struct smc_wr_buf *wr_buf;
+ int rc;
+
+ rc = smc_llc_add_pending_send(link, &wr_buf, &pend);
+ if (rc)
+ return rc;
+ addllc = (struct smc_llc_msg_add_link *)wr_buf;
+ memset(addllc, 0, sizeof(*addllc));
+ addllc->hd.common.type = SMC_LLC_ADD_LINK;
+ addllc->hd.length = sizeof(struct smc_llc_msg_add_link);
+ if (reqresp == SMC_LLC_RESP) {
+ addllc->hd.flags |= SMC_LLC_FLAG_RESP;
+ /* always reject more links for now */
+ addllc->hd.flags |= SMC_LLC_FLAG_ADD_LNK_REJ;
+ addllc->hd.add_link_rej_rsn = SMC_LLC_REJ_RSN_NO_ALT_PATH;
+ }
+ memcpy(addllc->sender_mac, mac, ETH_ALEN);
+ memcpy(addllc->sender_gid, gid, SMC_GID_SIZE);
+ /* send llc message */
+ rc = smc_wr_tx_send(link, pend);
+ return rc;
+}
+
+/* send DELETE LINK request or response */
+int smc_llc_send_delete_link(struct smc_link *link,
+ enum smc_llc_reqresp reqresp)
+{
+ struct smc_llc_msg_del_link *delllc;
+ struct smc_wr_tx_pend_priv *pend;
+ struct smc_wr_buf *wr_buf;
+ int rc;
+
+ rc = smc_llc_add_pending_send(link, &wr_buf, &pend);
+ if (rc)
+ return rc;
+ delllc = (struct smc_llc_msg_del_link *)wr_buf;
+ memset(delllc, 0, sizeof(*delllc));
+ delllc->hd.common.type = SMC_LLC_DELETE_LINK;
+ delllc->hd.length = sizeof(struct smc_llc_msg_add_link);
+ if (reqresp == SMC_LLC_RESP)
+ delllc->hd.flags |= SMC_LLC_FLAG_RESP;
+ /* DEL_LINK_ALL because only 1 link supported */
+ delllc->hd.flags |= SMC_LLC_FLAG_DEL_LINK_ALL;
+ delllc->hd.flags |= SMC_LLC_FLAG_DEL_LINK_ORDERLY;
+ delllc->link_num = link->link_id;
+ /* send llc message */
+ rc = smc_wr_tx_send(link, pend);
+ return rc;
+}
+
+/* send LLC test link request or response */
+int smc_llc_send_test_link(struct smc_link *link, u8 user_data[16],
+ enum smc_llc_reqresp reqresp)
+{
+ struct smc_llc_msg_test_link *testllc;
+ struct smc_wr_tx_pend_priv *pend;
+ struct smc_wr_buf *wr_buf;
+ int rc;
+
+ rc = smc_llc_add_pending_send(link, &wr_buf, &pend);
+ if (rc)
+ return rc;
+ testllc = (struct smc_llc_msg_test_link *)wr_buf;
+ memset(testllc, 0, sizeof(*testllc));
+ testllc->hd.common.type = SMC_LLC_TEST_LINK;
+ testllc->hd.length = sizeof(struct smc_llc_msg_test_link);
+ if (reqresp == SMC_LLC_RESP)
+ testllc->hd.flags |= SMC_LLC_FLAG_RESP;
+ memcpy(testllc->user_data, user_data, sizeof(testllc->user_data));
+ /* send llc message */
+ rc = smc_wr_tx_send(link, pend);
+ return rc;
+}
+
+/* send a prepared message */
+static int smc_llc_send_message(struct smc_link *link, void *llcbuf, int llclen)
+{
+ struct smc_wr_tx_pend_priv *pend;
+ struct smc_wr_buf *wr_buf;
+ int rc;
+
+ rc = smc_llc_add_pending_send(link, &wr_buf, &pend);
+ if (rc)
+ return rc;
+ memcpy(wr_buf, llcbuf, llclen);
/* send llc message */
rc = smc_wr_tx_send(link, pend);
return rc;
struct smc_llc_msg_confirm_link *llc)
{
struct smc_link_group *lgr;
+ int conf_rc;
lgr = container_of(link, struct smc_link_group, lnk[SMC_SINGLE_LINK]);
+
+ /* RMBE eyecatchers are not supported */
+ if (llc->hd.flags & SMC_LLC_FLAG_NO_RMBE_EYEC)
+ conf_rc = 0;
+ else
+ conf_rc = ENOTSUPP;
+
if (llc->hd.flags & SMC_LLC_FLAG_RESP) {
- if (lgr->role == SMC_SERV)
+ if (lgr->role == SMC_SERV &&
+ link->state == SMC_LNK_ACTIVATING) {
+ link->llc_confirm_resp_rc = conf_rc;
complete(&link->llc_confirm_resp);
+ }
} else {
- if (lgr->role == SMC_CLNT) {
+ if (lgr->role == SMC_CLNT &&
+ link->state == SMC_LNK_ACTIVATING) {
+ link->llc_confirm_rc = conf_rc;
link->link_id = llc->link_num;
complete(&link->llc_confirm);
}
}
}
+static void smc_llc_rx_add_link(struct smc_link *link,
+ struct smc_llc_msg_add_link *llc)
+{
+ struct smc_link_group *lgr = container_of(link, struct smc_link_group,
+ lnk[SMC_SINGLE_LINK]);
+
+ if (llc->hd.flags & SMC_LLC_FLAG_RESP) {
+ if (link->state == SMC_LNK_ACTIVATING)
+ complete(&link->llc_add_resp);
+ } else {
+ if (link->state == SMC_LNK_ACTIVATING) {
+ complete(&link->llc_add);
+ return;
+ }
+
+ if (lgr->role == SMC_SERV) {
+ smc_llc_send_add_link(link,
+ link->smcibdev->mac[link->ibport - 1],
+ &link->smcibdev->gid[link->ibport - 1],
+ SMC_LLC_REQ);
+
+ } else {
+ smc_llc_send_add_link(link,
+ link->smcibdev->mac[link->ibport - 1],
+ &link->smcibdev->gid[link->ibport - 1],
+ SMC_LLC_RESP);
+ }
+ }
+}
+
+static void smc_llc_rx_delete_link(struct smc_link *link,
+ struct smc_llc_msg_del_link *llc)
+{
+ struct smc_link_group *lgr = container_of(link, struct smc_link_group,
+ lnk[SMC_SINGLE_LINK]);
+
+ if (llc->hd.flags & SMC_LLC_FLAG_RESP) {
+ if (lgr->role == SMC_SERV)
+ smc_lgr_terminate(lgr);
+ } else {
+ if (lgr->role == SMC_SERV) {
+ smc_lgr_forget(lgr);
+ smc_llc_send_delete_link(link, SMC_LLC_REQ);
+ } else {
+ smc_llc_send_delete_link(link, SMC_LLC_RESP);
+ smc_lgr_terminate(lgr);
+ }
+ }
+}
+
+static void smc_llc_rx_test_link(struct smc_link *link,
+ struct smc_llc_msg_test_link *llc)
+{
+ if (llc->hd.flags & SMC_LLC_FLAG_RESP) {
+ /* unused as long as we don't send this type of msg */
+ } else {
+ smc_llc_send_test_link(link, llc->user_data, SMC_LLC_RESP);
+ }
+}
+
+static void smc_llc_rx_confirm_rkey(struct smc_link *link,
+ struct smc_llc_msg_confirm_rkey *llc)
+{
+ struct smc_link_group *lgr;
+ int rc;
+
+ lgr = container_of(link, struct smc_link_group, lnk[SMC_SINGLE_LINK]);
+
+ if (llc->hd.flags & SMC_LLC_FLAG_RESP) {
+ /* unused as long as we don't send this type of msg */
+ } else {
+ rc = smc_rtoken_add(lgr,
+ llc->rtoken[0].rmb_vaddr,
+ llc->rtoken[0].rmb_key);
+
+ /* ignore rtokens for other links, we have only one link */
+
+ llc->hd.flags |= SMC_LLC_FLAG_RESP;
+ if (rc < 0)
+ llc->hd.flags |= SMC_LLC_FLAG_RKEY_NEG;
+ smc_llc_send_message(link, (void *)llc, sizeof(*llc));
+ }
+}
+
+static void smc_llc_rx_confirm_rkey_cont(struct smc_link *link,
+ struct smc_llc_msg_confirm_rkey_cont *llc)
+{
+ if (llc->hd.flags & SMC_LLC_FLAG_RESP) {
+ /* unused as long as we don't send this type of msg */
+ } else {
+ /* ignore rtokens for other links, we have only one link */
+ llc->hd.flags |= SMC_LLC_FLAG_RESP;
+ smc_llc_send_message(link, (void *)llc, sizeof(*llc));
+ }
+}
+
+static void smc_llc_rx_delete_rkey(struct smc_link *link,
+ struct smc_llc_msg_delete_rkey *llc)
+{
+ struct smc_link_group *lgr;
+ u8 err_mask = 0;
+ int i, max;
+
+ lgr = container_of(link, struct smc_link_group, lnk[SMC_SINGLE_LINK]);
+
+ if (llc->hd.flags & SMC_LLC_FLAG_RESP) {
+ /* unused as long as we don't send this type of msg */
+ } else {
+ max = min_t(u8, llc->num_rkeys, SMC_LLC_DEL_RKEY_MAX);
+ for (i = 0; i < max; i++) {
+ if (smc_rtoken_delete(lgr, llc->rkey[i]))
+ err_mask |= 1 << (SMC_LLC_DEL_RKEY_MAX - 1 - i);
+ }
+
+ if (err_mask) {
+ llc->hd.flags |= SMC_LLC_FLAG_RKEY_NEG;
+ llc->err_mask = err_mask;
+ }
+
+ llc->hd.flags |= SMC_LLC_FLAG_RESP;
+ smc_llc_send_message(link, (void *)llc, sizeof(*llc));
+ }
+}
+
static void smc_llc_rx_handler(struct ib_wc *wc, void *buf)
{
struct smc_link *link = (struct smc_link *)wc->qp->qp_context;
return; /* short message */
if (llc->raw.hdr.length != sizeof(*llc))
return; /* invalid message */
- if (llc->raw.hdr.common.type == SMC_LLC_CONFIRM_LINK)
+
+ switch (llc->raw.hdr.common.type) {
+ case SMC_LLC_TEST_LINK:
+ smc_llc_rx_test_link(link, &llc->test_link);
+ break;
+ case SMC_LLC_CONFIRM_LINK:
smc_llc_rx_confirm_link(link, &llc->confirm_link);
+ break;
+ case SMC_LLC_ADD_LINK:
+ smc_llc_rx_add_link(link, &llc->add_link);
+ break;
+ case SMC_LLC_DELETE_LINK:
+ smc_llc_rx_delete_link(link, &llc->delete_link);
+ break;
+ case SMC_LLC_CONFIRM_RKEY:
+ smc_llc_rx_confirm_rkey(link, &llc->confirm_rkey);
+ break;
+ case SMC_LLC_CONFIRM_RKEY_CONT:
+ smc_llc_rx_confirm_rkey_cont(link, &llc->confirm_rkey_cont);
+ break;
+ case SMC_LLC_DELETE_RKEY:
+ smc_llc_rx_delete_rkey(link, &llc->delete_rkey);
+ break;
+ }
}
/***************************** init, exit, misc ******************************/
.handler = smc_llc_rx_handler,
.type = SMC_LLC_CONFIRM_LINK
},
+ {
+ .handler = smc_llc_rx_handler,
+ .type = SMC_LLC_TEST_LINK
+ },
+ {
+ .handler = smc_llc_rx_handler,
+ .type = SMC_LLC_ADD_LINK
+ },
+ {
+ .handler = smc_llc_rx_handler,
+ .type = SMC_LLC_DELETE_LINK
+ },
+ {
+ .handler = smc_llc_rx_handler,
+ .type = SMC_LLC_CONFIRM_RKEY
+ },
+ {
+ .handler = smc_llc_rx_handler,
+ .type = SMC_LLC_CONFIRM_RKEY_CONT
+ },
+ {
+ .handler = smc_llc_rx_handler,
+ .type = SMC_LLC_DELETE_RKEY
+ },
{
.handler = NULL,
}
#define SMC_LLC_FLAG_RESP 0x80
#define SMC_LLC_WAIT_FIRST_TIME (5 * HZ)
+#define SMC_LLC_WAIT_TIME (2 * HZ)
enum smc_llc_reqresp {
SMC_LLC_REQ,
enum smc_llc_msg_type {
SMC_LLC_CONFIRM_LINK = 0x01,
-};
-
-#define SMC_LLC_DATA_LEN 40
-
-struct smc_llc_hdr {
- struct smc_wr_rx_hdr common;
- u8 length; /* 44 */
- u8 reserved;
- u8 flags;
-};
-
-struct smc_llc_msg_confirm_link { /* type 0x01 */
- struct smc_llc_hdr hd;
- u8 sender_mac[ETH_ALEN];
- u8 sender_gid[SMC_GID_SIZE];
- u8 sender_qp_num[3];
- u8 link_num;
- u8 link_uid[SMC_LGR_ID_SIZE];
- u8 max_links;
- u8 reserved[9];
-};
-
-union smc_llc_msg {
- struct smc_llc_msg_confirm_link confirm_link;
- struct {
- struct smc_llc_hdr hdr;
- u8 data[SMC_LLC_DATA_LEN];
- } raw;
+ SMC_LLC_ADD_LINK = 0x02,
+ SMC_LLC_DELETE_LINK = 0x04,
+ SMC_LLC_CONFIRM_RKEY = 0x06,
+ SMC_LLC_TEST_LINK = 0x07,
+ SMC_LLC_CONFIRM_RKEY_CONT = 0x08,
+ SMC_LLC_DELETE_RKEY = 0x09,
};
/* transmit */
int smc_llc_send_confirm_link(struct smc_link *lnk, u8 mac[], union ib_gid *gid,
enum smc_llc_reqresp reqresp);
+int smc_llc_send_add_link(struct smc_link *link, u8 mac[], union ib_gid *gid,
+ enum smc_llc_reqresp reqresp);
+int smc_llc_send_delete_link(struct smc_link *link,
+ enum smc_llc_reqresp reqresp);
+int smc_llc_send_test_link(struct smc_link *lnk, u8 user_data[16],
+ enum smc_llc_reqresp reqresp);
int smc_llc_init(void) __init;
#endif /* SMC_LLC_H */
#include <linux/ipv6_route.h>
#include <linux/route.h>
#include <linux/sockios.h>
-#include <linux/atalk.h>
#include <net/busy_poll.h>
#include <linux/errqueue.h>
return __put_user(klen, ulen);
}
-static struct kmem_cache *sock_inode_cachep __read_mostly;
+static struct kmem_cache *sock_inode_cachep __ro_after_init;
static struct inode *sock_alloc_inode(struct super_block *sb)
{
* what to do with it - that's up to the protocol still.
*/
-static struct ns_common *get_net_ns(struct ns_common *ns)
+struct ns_common *get_net_ns(struct ns_common *ns)
{
return &get_net(container_of(ns, struct net, ns))->ns;
}
+EXPORT_SYMBOL_GPL(get_net_ns);
static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
{
goto out_fd;
if (upeer_sockaddr) {
- if (newsock->ops->getname(newsock, (struct sockaddr *)&address,
- &len, 2) < 0) {
+ len = newsock->ops->getname(newsock,
+ (struct sockaddr *)&address, 2);
+ if (len < 0) {
err = -ECONNABORTED;
goto out_fd;
}
{
struct socket *sock;
struct sockaddr_storage address;
- int len, err, fput_needed;
+ int err, fput_needed;
sock = sockfd_lookup_light(fd, &err, &fput_needed);
if (!sock)
if (err)
goto out_put;
- err = sock->ops->getname(sock, (struct sockaddr *)&address, &len, 0);
- if (err)
+ err = sock->ops->getname(sock, (struct sockaddr *)&address, 0);
+ if (err < 0)
goto out_put;
- err = move_addr_to_user(&address, len, usockaddr, usockaddr_len);
+ /* "err" is actually length in this case */
+ err = move_addr_to_user(&address, err, usockaddr, usockaddr_len);
out_put:
fput_light(sock->file, fput_needed);
{
struct socket *sock;
struct sockaddr_storage address;
- int len, err, fput_needed;
+ int err, fput_needed;
sock = sockfd_lookup_light(fd, &err, &fput_needed);
if (sock != NULL) {
return err;
}
- err =
- sock->ops->getname(sock, (struct sockaddr *)&address, &len,
- 1);
- if (!err)
- err = move_addr_to_user(&address, len, usockaddr,
+ err = sock->ops->getname(sock, (struct sockaddr *)&address, 1);
+ if (err >= 0)
+ /* "err" is actually length in this case */
+ err = move_addr_to_user(&address, err, usockaddr,
usockaddr_len);
fput_light(sock->file, fput_needed);
}
if (!sock)
return err;
- err = sock_error(sock->sk);
- if (err) {
- datagrams = err;
- goto out_put;
+ if (likely(!(flags & MSG_ERRQUEUE))) {
+ err = sock_error(sock->sk);
+ if (err) {
+ datagrams = err;
+ goto out_put;
+ }
}
entry = mmsg;
}
EXPORT_SYMBOL(kernel_connect);
-int kernel_getsockname(struct socket *sock, struct sockaddr *addr,
- int *addrlen)
+int kernel_getsockname(struct socket *sock, struct sockaddr *addr)
{
- return sock->ops->getname(sock, addr, addrlen, 0);
+ return sock->ops->getname(sock, addr, 0);
}
EXPORT_SYMBOL(kernel_getsockname);
-int kernel_getpeername(struct socket *sock, struct sockaddr *addr,
- int *addrlen)
+int kernel_getpeername(struct socket *sock, struct sockaddr *addr)
{
- return sock->ops->getname(sock, addr, addrlen, 1);
+ return sock->ops->getname(sock, addr, 1);
}
EXPORT_SYMBOL(kernel_getpeername);
* negative errno is returned.
*/
static int rpc_sockname(struct net *net, struct sockaddr *sap, size_t salen,
- struct sockaddr *buf, int buflen)
+ struct sockaddr *buf)
{
struct socket *sock;
int err;
goto out_release;
}
- err = kernel_getsockname(sock, buf, &buflen);
+ err = kernel_getsockname(sock, buf);
if (err < 0) {
dprintk("RPC: getsockname failed (%d)\n", err);
goto out_release;
rcu_read_unlock();
rpc_set_port(sap, 0);
- err = rpc_sockname(net, sap, salen, buf, buflen);
+ err = rpc_sockname(net, sap, salen, buf);
put_net(net);
if (err != 0)
/* Couldn't discover local address, return ANYADDR */
}
set_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags);
- err = kernel_getpeername(newsock, sin, &slen);
+ err = kernel_getpeername(newsock, sin);
if (err < 0) {
net_warn_ratelimited("%s: peername failed (err %d)!\n",
serv->sv_name, -err);
goto failed; /* aborted connection or whatever */
}
+ slen = err;
/* Ideally, we would want to reject connections from unauthorized
* hosts here, but when we get encryption, the IP of the host won't
if (IS_ERR(newsvsk))
goto failed;
svc_xprt_set_remote(&newsvsk->sk_xprt, sin, slen);
- err = kernel_getsockname(newsock, sin, &slen);
+ err = kernel_getsockname(newsock, sin);
+ slen = err;
if (unlikely(err < 0)) {
dprintk("svc_tcp_accept: kernel_getsockname error %d\n", -err);
slen = offsetof(struct sockaddr, sa_data);
err = PTR_ERR(svsk);
goto out;
}
- if (kernel_getsockname(svsk->sk_sock, sin, &salen) == 0)
+ salen = kernel_getsockname(svsk->sk_sock, sin);
+ if (salen >= 0)
svc_xprt_set_local(&svsk->sk_xprt, sin, salen);
svc_add_new_perm_xprt(serv, &svsk->sk_xprt);
return svc_one_sock_name(svsk, name_return, len);
if (error < 0)
goto bummer;
- newlen = len;
- error = kernel_getsockname(sock, newsin, &newlen);
+ error = kernel_getsockname(sock, newsin);
if (error < 0)
goto bummer;
+ newlen = error;
if (protocol == IPPROTO_TCP) {
if ((error = kernel_listen(sock, 64)) < 0)
static unsigned short xs_sock_getport(struct socket *sock)
{
struct sockaddr_storage buf;
- int buflen;
unsigned short port = 0;
- if (kernel_getsockname(sock, (struct sockaddr *)&buf, &buflen) < 0)
+ if (kernel_getsockname(sock, (struct sockaddr *)&buf) < 0)
goto out;
switch (buf.ss_family) {
case AF_INET6:
static struct pernet_operations sysctl_pernet_ops = {
.init = sysctl_net_init,
.exit = sysctl_net_exit,
+ .async = true,
};
static struct ctl_table_header *net_header;
core.o link.o discover.o msg.o \
name_distr.o subscr.o monitor.o name_table.o net.o \
netlink.o netlink_compat.o node.o socket.o eth_media.o \
- server.o socket.o group.o
+ topsrv.o socket.o group.o
tipc-$(CONFIG_TIPC_MEDIA_UDP) += udp_media.o
tipc-$(CONFIG_TIPC_MEDIA_IB) += ib_media.o
return err;
}
-int tipc_nl_bearer_disable(struct sk_buff *skb, struct genl_info *info)
+int __tipc_nl_bearer_disable(struct sk_buff *skb, struct genl_info *info)
{
int err;
char *name;
name = nla_data(attrs[TIPC_NLA_BEARER_NAME]);
- rtnl_lock();
bearer = tipc_bearer_find(net, name);
- if (!bearer) {
- rtnl_unlock();
+ if (!bearer)
return -EINVAL;
- }
bearer_disable(net, bearer);
- rtnl_unlock();
return 0;
}
-int tipc_nl_bearer_enable(struct sk_buff *skb, struct genl_info *info)
+int tipc_nl_bearer_disable(struct sk_buff *skb, struct genl_info *info)
+{
+ int err;
+
+ rtnl_lock();
+ err = __tipc_nl_bearer_disable(skb, info);
+ rtnl_unlock();
+
+ return err;
+}
+
+int __tipc_nl_bearer_enable(struct sk_buff *skb, struct genl_info *info)
{
int err;
char *bearer;
prio = nla_get_u32(props[TIPC_NLA_PROP_PRIO]);
}
+ return tipc_enable_bearer(net, bearer, domain, prio, attrs);
+}
+
+int tipc_nl_bearer_enable(struct sk_buff *skb, struct genl_info *info)
+{
+ int err;
+
rtnl_lock();
- err = tipc_enable_bearer(net, bearer, domain, prio, attrs);
- if (err) {
- rtnl_unlock();
- return err;
- }
+ err = __tipc_nl_bearer_enable(skb, info);
rtnl_unlock();
- return 0;
+ return err;
}
int tipc_nl_bearer_add(struct sk_buff *skb, struct genl_info *info)
return 0;
}
-int tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info)
+int __tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info)
{
- int err;
- char *name;
struct tipc_bearer *b;
struct nlattr *attrs[TIPC_NLA_BEARER_MAX + 1];
struct net *net = sock_net(skb->sk);
+ char *name;
+ int err;
if (!info->attrs[TIPC_NLA_BEARER])
return -EINVAL;
return -EINVAL;
name = nla_data(attrs[TIPC_NLA_BEARER_NAME]);
- rtnl_lock();
b = tipc_bearer_find(net, name);
- if (!b) {
- rtnl_unlock();
+ if (!b)
return -EINVAL;
- }
if (attrs[TIPC_NLA_BEARER_PROP]) {
struct nlattr *props[TIPC_NLA_PROP_MAX + 1];
err = tipc_nl_parse_link_prop(attrs[TIPC_NLA_BEARER_PROP],
props);
- if (err) {
- rtnl_unlock();
+ if (err)
return err;
- }
- if (props[TIPC_NLA_PROP_TOL])
+ if (props[TIPC_NLA_PROP_TOL]) {
b->tolerance = nla_get_u32(props[TIPC_NLA_PROP_TOL]);
+ tipc_node_apply_tolerance(net, b);
+ }
if (props[TIPC_NLA_PROP_PRIO])
b->priority = nla_get_u32(props[TIPC_NLA_PROP_PRIO]);
if (props[TIPC_NLA_PROP_WIN])
b->window = nla_get_u32(props[TIPC_NLA_PROP_WIN]);
}
- rtnl_unlock();
return 0;
}
+int tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info)
+{
+ int err;
+
+ rtnl_lock();
+ err = __tipc_nl_bearer_set(skb, info);
+ rtnl_unlock();
+
+ return err;
+}
+
static int __tipc_nl_add_media(struct tipc_nl_msg *msg,
struct tipc_media *media, int nlflags)
{
return err;
}
-int tipc_nl_media_set(struct sk_buff *skb, struct genl_info *info)
+int __tipc_nl_media_set(struct sk_buff *skb, struct genl_info *info)
{
int err;
char *name;
return -EINVAL;
name = nla_data(attrs[TIPC_NLA_MEDIA_NAME]);
- rtnl_lock();
m = tipc_media_find(name);
- if (!m) {
- rtnl_unlock();
+ if (!m)
return -EINVAL;
- }
if (attrs[TIPC_NLA_MEDIA_PROP]) {
struct nlattr *props[TIPC_NLA_PROP_MAX + 1];
err = tipc_nl_parse_link_prop(attrs[TIPC_NLA_MEDIA_PROP],
props);
- if (err) {
- rtnl_unlock();
+ if (err)
return err;
- }
if (props[TIPC_NLA_PROP_TOL])
m->tolerance = nla_get_u32(props[TIPC_NLA_PROP_TOL]);
if (props[TIPC_NLA_PROP_WIN])
m->window = nla_get_u32(props[TIPC_NLA_PROP_WIN]);
}
- rtnl_unlock();
return 0;
}
+
+int tipc_nl_media_set(struct sk_buff *skb, struct genl_info *info)
+{
+ int err;
+
+ rtnl_lock();
+ err = __tipc_nl_media_set(skb, info);
+ rtnl_unlock();
+
+ return err;
+}
#endif
int tipc_nl_bearer_disable(struct sk_buff *skb, struct genl_info *info);
+int __tipc_nl_bearer_disable(struct sk_buff *skb, struct genl_info *info);
int tipc_nl_bearer_enable(struct sk_buff *skb, struct genl_info *info);
+int __tipc_nl_bearer_enable(struct sk_buff *skb, struct genl_info *info);
int tipc_nl_bearer_dump(struct sk_buff *skb, struct netlink_callback *cb);
int tipc_nl_bearer_get(struct sk_buff *skb, struct genl_info *info);
int tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info);
+int __tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info);
int tipc_nl_bearer_add(struct sk_buff *skb, struct genl_info *info);
int tipc_nl_media_dump(struct sk_buff *skb, struct netlink_callback *cb);
int tipc_nl_media_get(struct sk_buff *skb, struct genl_info *info);
int tipc_nl_media_set(struct sk_buff *skb, struct genl_info *info);
+int __tipc_nl_media_set(struct sk_buff *skb, struct genl_info *info);
int tipc_media_set_priority(const char *name, u32 new_value);
int tipc_media_set_window(const char *name, u32 new_value);
struct tipc_bc_base;
struct tipc_link;
struct tipc_name_table;
-struct tipc_server;
+struct tipc_topsrv;
struct tipc_monitor;
#define TIPC_MOD_VER "2.0.0"
struct list_head dist_queue;
/* Topology subscription server */
- struct tipc_server *topsrv;
+ struct tipc_topsrv *topsrv;
atomic_t subscription_count;
};
return &tipc_net(net)->node_list;
}
-static inline struct tipc_server *tipc_topsrv(struct net *net)
+static inline struct tipc_topsrv *tipc_topsrv(struct net *net)
{
return tipc_net(net)->topsrv;
}
#include "addr.h"
#include "group.h"
#include "bcast.h"
-#include "server.h"
+#include "topsrv.h"
#include "msg.h"
#include "socket.h"
#include "node.h"
struct sk_buff_head *xmitq)
{
l->tolerance = tol;
- tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, tol, 0, xmitq);
+ if (link_is_up(l))
+ tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, tol, 0, xmitq);
}
void tipc_link_set_prio(struct tipc_link *l, u32 prio,
/* Any subscriptions waiting for notification? */
list_for_each_entry_safe(s, st, &nseq->subscriptions, nameseq_list) {
- tipc_subscrp_report_overlap(s, publ->lower, publ->upper,
- TIPC_PUBLISHED, publ->ref,
- publ->node, publ->scope,
- created_subseq);
+ tipc_sub_report_overlap(s, publ->lower, publ->upper,
+ TIPC_PUBLISHED, publ->ref,
+ publ->node, publ->scope,
+ created_subseq);
}
return publ;
}
/* Notify any waiting subscriptions */
list_for_each_entry_safe(s, st, &nseq->subscriptions, nameseq_list) {
- tipc_subscrp_report_overlap(s, publ->lower, publ->upper,
- TIPC_WITHDRAWN, publ->ref,
- publ->node, publ->scope,
- removed_subseq);
+ tipc_sub_report_overlap(s, publ->lower, publ->upper,
+ TIPC_WITHDRAWN, publ->ref, publ->node,
+ publ->scope, removed_subseq);
}
return publ;
* sequence overlapping with the requested sequence
*/
static void tipc_nameseq_subscribe(struct name_seq *nseq,
- struct tipc_subscription *s,
- bool status)
+ struct tipc_subscription *sub)
{
struct sub_seq *sseq = nseq->sseqs;
struct tipc_name_seq ns;
+ struct tipc_subscr *s = &sub->evt.s;
+ bool no_status;
- tipc_subscrp_convert_seq(&s->evt.s.seq, s->swap, &ns);
+ ns.type = tipc_sub_read(s, seq.type);
+ ns.lower = tipc_sub_read(s, seq.lower);
+ ns.upper = tipc_sub_read(s, seq.upper);
+ no_status = tipc_sub_read(s, filter) & TIPC_SUB_NO_STATUS;
- tipc_subscrp_get(s);
- list_add(&s->nameseq_list, &nseq->subscriptions);
+ tipc_sub_get(sub);
+ list_add(&sub->nameseq_list, &nseq->subscriptions);
- if (!status || !sseq)
+ if (no_status || !sseq)
return;
while (sseq != &nseq->sseqs[nseq->first_free]) {
- if (tipc_subscrp_check_overlap(&ns, sseq->lower, sseq->upper)) {
+ if (tipc_sub_check_overlap(&ns, sseq->lower, sseq->upper)) {
struct publication *crs;
struct name_info *info = sseq->info;
int must_report = 1;
list_for_each_entry(crs, &info->zone_list, zone_list) {
- tipc_subscrp_report_overlap(s, sseq->lower,
- sseq->upper,
- TIPC_PUBLISHED,
- crs->ref, crs->node,
- crs->scope,
- must_report);
+ tipc_sub_report_overlap(sub, sseq->lower,
+ sseq->upper,
+ TIPC_PUBLISHED,
+ crs->ref, crs->node,
+ crs->scope,
+ must_report);
must_report = 0;
}
}
/**
* tipc_nametbl_subscribe - add a subscription object to the name table
*/
-void tipc_nametbl_subscribe(struct tipc_subscription *s, bool status)
+void tipc_nametbl_subscribe(struct tipc_subscription *sub)
{
- struct tipc_net *tn = net_generic(s->net, tipc_net_id);
- u32 type = tipc_subscrp_convert_seq_type(s->evt.s.seq.type, s->swap);
+ struct tipc_net *tn = tipc_net(sub->net);
+ struct tipc_subscr *s = &sub->evt.s;
+ u32 type = tipc_sub_read(s, seq.type);
int index = hash(type);
struct name_seq *seq;
struct tipc_name_seq ns;
spin_lock_bh(&tn->nametbl_lock);
- seq = nametbl_find_seq(s->net, type);
+ seq = nametbl_find_seq(sub->net, type);
if (!seq)
seq = tipc_nameseq_create(type, &tn->nametbl->seq_hlist[index]);
if (seq) {
spin_lock_bh(&seq->lock);
- tipc_nameseq_subscribe(seq, s, status);
+ tipc_nameseq_subscribe(seq, sub);
spin_unlock_bh(&seq->lock);
} else {
- tipc_subscrp_convert_seq(&s->evt.s.seq, s->swap, &ns);
+ ns.type = tipc_sub_read(s, seq.type);
+ ns.lower = tipc_sub_read(s, seq.lower);
+ ns.upper = tipc_sub_read(s, seq.upper);
pr_warn("Failed to create subscription for {%u,%u,%u}\n",
ns.type, ns.lower, ns.upper);
}
/**
* tipc_nametbl_unsubscribe - remove a subscription object from name table
*/
-void tipc_nametbl_unsubscribe(struct tipc_subscription *s)
+void tipc_nametbl_unsubscribe(struct tipc_subscription *sub)
{
- struct tipc_net *tn = net_generic(s->net, tipc_net_id);
+ struct tipc_subscr *s = &sub->evt.s;
+ struct tipc_net *tn = tipc_net(sub->net);
struct name_seq *seq;
- u32 type = tipc_subscrp_convert_seq_type(s->evt.s.seq.type, s->swap);
+ u32 type = tipc_sub_read(s, seq.type);
spin_lock_bh(&tn->nametbl_lock);
- seq = nametbl_find_seq(s->net, type);
+ seq = nametbl_find_seq(sub->net, type);
if (seq != NULL) {
spin_lock_bh(&seq->lock);
- list_del_init(&s->nameseq_list);
- tipc_subscrp_put(s);
+ list_del_init(&sub->nameseq_list);
+ tipc_sub_put(sub);
if (!seq->first_free && list_empty(&seq->subscriptions)) {
hlist_del_init_rcu(&seq->ns_list);
kfree(seq->sseqs);
struct publication *tipc_nametbl_remove_publ(struct net *net, u32 type,
u32 lower, u32 node, u32 ref,
u32 key);
-void tipc_nametbl_subscribe(struct tipc_subscription *s, bool status);
+void tipc_nametbl_subscribe(struct tipc_subscription *s);
void tipc_nametbl_unsubscribe(struct tipc_subscription *s);
int tipc_nametbl_init(struct net *net);
void tipc_nametbl_stop(struct net *net);
return skb->len;
}
-int tipc_nl_net_set(struct sk_buff *skb, struct genl_info *info)
+int __tipc_nl_net_set(struct sk_buff *skb, struct genl_info *info)
{
struct net *net = sock_net(skb->sk);
struct tipc_net *tn = net_generic(net, tipc_net_id);
if (!tipc_addr_node_valid(addr))
return -EINVAL;
- rtnl_lock();
tipc_net_start(net, addr);
- rtnl_unlock();
}
return 0;
}
+
+int tipc_nl_net_set(struct sk_buff *skb, struct genl_info *info)
+{
+ int err;
+
+ rtnl_lock();
+ err = __tipc_nl_net_set(skb, info);
+ rtnl_unlock();
+
+ return err;
+}
int tipc_nl_net_dump(struct sk_buff *skb, struct netlink_callback *cb);
int tipc_nl_net_set(struct sk_buff *skb, struct genl_info *info);
+int __tipc_nl_net_set(struct sk_buff *skb, struct genl_info *info);
#endif
if (!trans_buf)
return -ENOMEM;
- err = (*cmd->transcode)(cmd, trans_buf, msg);
- if (err)
- goto trans_out;
-
attrbuf = kmalloc((tipc_genl_family.maxattr + 1) *
sizeof(struct nlattr *), GFP_KERNEL);
if (!attrbuf) {
goto trans_out;
}
- err = nla_parse(attrbuf, tipc_genl_family.maxattr,
- (const struct nlattr *)trans_buf->data,
- trans_buf->len, NULL, NULL);
- if (err)
- goto parse_out;
-
doit_buf = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
if (!doit_buf) {
err = -ENOMEM;
- goto parse_out;
+ goto attrbuf_out;
}
- doit_buf->sk = msg->dst_sk;
-
memset(&info, 0, sizeof(info));
info.attrs = attrbuf;
+ rtnl_lock();
+ err = (*cmd->transcode)(cmd, trans_buf, msg);
+ if (err)
+ goto doit_out;
+
+ err = nla_parse(attrbuf, tipc_genl_family.maxattr,
+ (const struct nlattr *)trans_buf->data,
+ trans_buf->len, NULL, NULL);
+ if (err)
+ goto doit_out;
+
+ doit_buf->sk = msg->dst_sk;
+
err = (*cmd->doit)(doit_buf, &info);
+doit_out:
+ rtnl_unlock();
kfree_skb(doit_buf);
-parse_out:
+attrbuf_out:
kfree(attrbuf);
trans_out:
kfree_skb(trans_buf);
media = tipc_media_find(lc->name);
if (media) {
- cmd->doit = &tipc_nl_media_set;
+ cmd->doit = &__tipc_nl_media_set;
return tipc_nl_compat_media_set(skb, msg);
}
bearer = tipc_bearer_find(msg->net, lc->name);
if (bearer) {
- cmd->doit = &tipc_nl_bearer_set;
+ cmd->doit = &__tipc_nl_bearer_set;
return tipc_nl_compat_bearer_set(skb, msg);
}
return tipc_nl_compat_dumpit(&dump, msg);
case TIPC_CMD_ENABLE_BEARER:
msg->req_type = TIPC_TLV_BEARER_CONFIG;
- doit.doit = tipc_nl_bearer_enable;
+ doit.doit = __tipc_nl_bearer_enable;
doit.transcode = tipc_nl_compat_bearer_enable;
return tipc_nl_compat_doit(&doit, msg);
case TIPC_CMD_DISABLE_BEARER:
msg->req_type = TIPC_TLV_BEARER_NAME;
- doit.doit = tipc_nl_bearer_disable;
+ doit.doit = __tipc_nl_bearer_disable;
doit.transcode = tipc_nl_compat_bearer_disable;
return tipc_nl_compat_doit(&doit, msg);
case TIPC_CMD_SHOW_LINK_STATS:
return tipc_nl_compat_dumpit(&dump, msg);
case TIPC_CMD_SET_NODE_ADDR:
msg->req_type = TIPC_TLV_NET_ADDR;
- doit.doit = tipc_nl_net_set;
+ doit.doit = __tipc_nl_net_set;
doit.transcode = tipc_nl_compat_net_set;
return tipc_nl_compat_doit(&doit, msg);
case TIPC_CMD_SET_NETID:
msg->req_type = TIPC_TLV_UNSIGNED;
- doit.doit = tipc_nl_net_set;
+ doit.doit = __tipc_nl_net_set;
doit.transcode = tipc_nl_compat_net_set;
return tipc_nl_compat_doit(&doit, msg);
case TIPC_CMD_GET_NETID:
kfree_skb(skb);
}
+void tipc_node_apply_tolerance(struct net *net, struct tipc_bearer *b)
+{
+ struct tipc_net *tn = tipc_net(net);
+ int bearer_id = b->identity;
+ struct sk_buff_head xmitq;
+ struct tipc_link_entry *e;
+ struct tipc_node *n;
+
+ __skb_queue_head_init(&xmitq);
+
+ rcu_read_lock();
+
+ list_for_each_entry_rcu(n, &tn->node_list, list) {
+ tipc_node_write_lock(n);
+ e = &n->links[bearer_id];
+ if (e->link)
+ tipc_link_set_tolerance(e->link, b->tolerance, &xmitq);
+ tipc_node_write_unlock(n);
+ tipc_bearer_xmit(net, bearer_id, &xmitq, &e->maddr);
+ }
+
+ rcu_read_unlock();
+}
+
int tipc_nl_peer_rm(struct sk_buff *skb, struct genl_info *info)
{
struct net *net = sock_net(skb->sk);
struct tipc_media_addr *maddr,
bool *respond, bool *dupl_addr);
void tipc_node_delete_links(struct net *net, int bearer_id);
+void tipc_node_apply_tolerance(struct net *net, struct tipc_bearer *b);
int tipc_node_get_linkname(struct net *net, u32 bearer_id, u32 node,
char *linkname, size_t len);
int tipc_node_xmit(struct net *net, struct sk_buff_head *list, u32 dnode,
+++ /dev/null
-/*
- * net/tipc/server.c: TIPC server infrastructure
- *
- * Copyright (c) 2012-2013, Wind River Systems
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. Neither the names of the copyright holders nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * Alternatively, this software may be distributed under the terms of the
- * GNU General Public License ("GPL") version 2 as published by the Free
- * Software Foundation.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "server.h"
-#include "core.h"
-#include "socket.h"
-#include "addr.h"
-#include "msg.h"
-#include <net/sock.h>
-#include <linux/module.h>
-
-/* Number of messages to send before rescheduling */
-#define MAX_SEND_MSG_COUNT 25
-#define MAX_RECV_MSG_COUNT 25
-#define CF_CONNECTED 1
-#define CF_SERVER 2
-
-#define sock2con(x) ((struct tipc_conn *)(x)->sk_user_data)
-
-/**
- * struct tipc_conn - TIPC connection structure
- * @kref: reference counter to connection object
- * @conid: connection identifier
- * @sock: socket handler associated with connection
- * @flags: indicates connection state
- * @server: pointer to connected server
- * @rwork: receive work item
- * @usr_data: user-specified field
- * @rx_action: what to do when connection socket is active
- * @outqueue: pointer to first outbound message in queue
- * @outqueue_lock: control access to the outqueue
- * @outqueue: list of connection objects for its server
- * @swork: send work item
- */
-struct tipc_conn {
- struct kref kref;
- int conid;
- struct socket *sock;
- unsigned long flags;
- struct tipc_server *server;
- struct work_struct rwork;
- int (*rx_action) (struct tipc_conn *con);
- void *usr_data;
- struct list_head outqueue;
- spinlock_t outqueue_lock;
- struct work_struct swork;
-};
-
-/* An entry waiting to be sent */
-struct outqueue_entry {
- struct list_head list;
- struct kvec iov;
- struct sockaddr_tipc dest;
-};
-
-static void tipc_recv_work(struct work_struct *work);
-static void tipc_send_work(struct work_struct *work);
-static void tipc_clean_outqueues(struct tipc_conn *con);
-
-static void tipc_conn_kref_release(struct kref *kref)
-{
- struct tipc_conn *con = container_of(kref, struct tipc_conn, kref);
- struct tipc_server *s = con->server;
- struct sockaddr_tipc *saddr = s->saddr;
- struct socket *sock = con->sock;
- struct sock *sk;
-
- if (sock) {
- sk = sock->sk;
- if (test_bit(CF_SERVER, &con->flags)) {
- __module_get(sock->ops->owner);
- __module_get(sk->sk_prot_creator->owner);
- }
- saddr->scope = -TIPC_NODE_SCOPE;
- kernel_bind(sock, (struct sockaddr *)saddr, sizeof(*saddr));
- sock_release(sock);
- con->sock = NULL;
- }
- spin_lock_bh(&s->idr_lock);
- idr_remove(&s->conn_idr, con->conid);
- s->idr_in_use--;
- spin_unlock_bh(&s->idr_lock);
- tipc_clean_outqueues(con);
- kfree(con);
-}
-
-static void conn_put(struct tipc_conn *con)
-{
- kref_put(&con->kref, tipc_conn_kref_release);
-}
-
-static void conn_get(struct tipc_conn *con)
-{
- kref_get(&con->kref);
-}
-
-static struct tipc_conn *tipc_conn_lookup(struct tipc_server *s, int conid)
-{
- struct tipc_conn *con;
-
- spin_lock_bh(&s->idr_lock);
- con = idr_find(&s->conn_idr, conid);
- if (con) {
- if (!test_bit(CF_CONNECTED, &con->flags) ||
- !kref_get_unless_zero(&con->kref))
- con = NULL;
- }
- spin_unlock_bh(&s->idr_lock);
- return con;
-}
-
-static void sock_data_ready(struct sock *sk)
-{
- struct tipc_conn *con;
-
- read_lock_bh(&sk->sk_callback_lock);
- con = sock2con(sk);
- if (con && test_bit(CF_CONNECTED, &con->flags)) {
- conn_get(con);
- if (!queue_work(con->server->rcv_wq, &con->rwork))
- conn_put(con);
- }
- read_unlock_bh(&sk->sk_callback_lock);
-}
-
-static void sock_write_space(struct sock *sk)
-{
- struct tipc_conn *con;
-
- read_lock_bh(&sk->sk_callback_lock);
- con = sock2con(sk);
- if (con && test_bit(CF_CONNECTED, &con->flags)) {
- conn_get(con);
- if (!queue_work(con->server->send_wq, &con->swork))
- conn_put(con);
- }
- read_unlock_bh(&sk->sk_callback_lock);
-}
-
-static void tipc_register_callbacks(struct socket *sock, struct tipc_conn *con)
-{
- struct sock *sk = sock->sk;
-
- write_lock_bh(&sk->sk_callback_lock);
-
- sk->sk_data_ready = sock_data_ready;
- sk->sk_write_space = sock_write_space;
- sk->sk_user_data = con;
-
- con->sock = sock;
-
- write_unlock_bh(&sk->sk_callback_lock);
-}
-
-static void tipc_close_conn(struct tipc_conn *con)
-{
- struct tipc_server *s = con->server;
- struct sock *sk = con->sock->sk;
- bool disconnect = false;
-
- write_lock_bh(&sk->sk_callback_lock);
- disconnect = test_and_clear_bit(CF_CONNECTED, &con->flags);
- if (disconnect) {
- sk->sk_user_data = NULL;
- if (con->conid)
- s->tipc_conn_release(con->conid, con->usr_data);
- }
- write_unlock_bh(&sk->sk_callback_lock);
-
- /* Handle concurrent calls from sending and receiving threads */
- if (!disconnect)
- return;
-
- /* Don't flush pending works, -just let them expire */
- kernel_sock_shutdown(con->sock, SHUT_RDWR);
- conn_put(con);
-}
-
-static struct tipc_conn *tipc_alloc_conn(struct tipc_server *s)
-{
- struct tipc_conn *con;
- int ret;
-
- con = kzalloc(sizeof(struct tipc_conn), GFP_ATOMIC);
- if (!con)
- return ERR_PTR(-ENOMEM);
-
- kref_init(&con->kref);
- INIT_LIST_HEAD(&con->outqueue);
- spin_lock_init(&con->outqueue_lock);
- INIT_WORK(&con->swork, tipc_send_work);
- INIT_WORK(&con->rwork, tipc_recv_work);
-
- spin_lock_bh(&s->idr_lock);
- ret = idr_alloc(&s->conn_idr, con, 0, 0, GFP_ATOMIC);
- if (ret < 0) {
- kfree(con);
- spin_unlock_bh(&s->idr_lock);
- return ERR_PTR(-ENOMEM);
- }
- con->conid = ret;
- s->idr_in_use++;
- spin_unlock_bh(&s->idr_lock);
-
- set_bit(CF_CONNECTED, &con->flags);
- con->server = s;
-
- return con;
-}
-
-static int tipc_receive_from_sock(struct tipc_conn *con)
-{
- struct tipc_server *s = con->server;
- struct sock *sk = con->sock->sk;
- struct sockaddr_tipc addr;
- struct msghdr msg = {};
- struct kvec iov;
- void *buf;
- int ret;
-
- buf = kmem_cache_alloc(s->rcvbuf_cache, GFP_ATOMIC);
- if (!buf) {
- ret = -ENOMEM;
- goto out_close;
- }
-
- iov.iov_base = buf;
- iov.iov_len = s->max_rcvbuf_size;
- msg.msg_name = &addr;
- iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &iov, 1, iov.iov_len);
- ret = sock_recvmsg(con->sock, &msg, MSG_DONTWAIT);
- if (ret <= 0) {
- kmem_cache_free(s->rcvbuf_cache, buf);
- goto out_close;
- }
-
- read_lock_bh(&sk->sk_callback_lock);
- if (test_bit(CF_CONNECTED, &con->flags))
- ret = s->tipc_conn_recvmsg(sock_net(con->sock->sk), con->conid,
- &addr, con->usr_data, buf, ret);
- read_unlock_bh(&sk->sk_callback_lock);
- kmem_cache_free(s->rcvbuf_cache, buf);
- if (ret < 0)
- tipc_conn_terminate(s, con->conid);
- return ret;
-
-out_close:
- if (ret != -EWOULDBLOCK)
- tipc_close_conn(con);
- else if (ret == 0)
- /* Don't return success if we really got EOF */
- ret = -EAGAIN;
-
- return ret;
-}
-
-static int tipc_accept_from_sock(struct tipc_conn *con)
-{
- struct tipc_server *s = con->server;
- struct socket *sock = con->sock;
- struct socket *newsock;
- struct tipc_conn *newcon;
- int ret;
-
- ret = kernel_accept(sock, &newsock, O_NONBLOCK);
- if (ret < 0)
- return ret;
-
- newcon = tipc_alloc_conn(con->server);
- if (IS_ERR(newcon)) {
- ret = PTR_ERR(newcon);
- sock_release(newsock);
- return ret;
- }
-
- newcon->rx_action = tipc_receive_from_sock;
- tipc_register_callbacks(newsock, newcon);
-
- /* Notify that new connection is incoming */
- newcon->usr_data = s->tipc_conn_new(newcon->conid);
- if (!newcon->usr_data) {
- sock_release(newsock);
- conn_put(newcon);
- return -ENOMEM;
- }
-
- /* Wake up receive process in case of 'SYN+' message */
- newsock->sk->sk_data_ready(newsock->sk);
- return ret;
-}
-
-static struct socket *tipc_create_listen_sock(struct tipc_conn *con)
-{
- struct tipc_server *s = con->server;
- struct socket *sock = NULL;
- int ret;
-
- ret = sock_create_kern(s->net, AF_TIPC, SOCK_SEQPACKET, 0, &sock);
- if (ret < 0)
- return NULL;
- ret = kernel_setsockopt(sock, SOL_TIPC, TIPC_IMPORTANCE,
- (char *)&s->imp, sizeof(s->imp));
- if (ret < 0)
- goto create_err;
- ret = kernel_bind(sock, (struct sockaddr *)s->saddr, sizeof(*s->saddr));
- if (ret < 0)
- goto create_err;
-
- switch (s->type) {
- case SOCK_STREAM:
- case SOCK_SEQPACKET:
- con->rx_action = tipc_accept_from_sock;
-
- ret = kernel_listen(sock, 0);
- if (ret < 0)
- goto create_err;
- break;
- case SOCK_DGRAM:
- case SOCK_RDM:
- con->rx_action = tipc_receive_from_sock;
- break;
- default:
- pr_err("Unknown socket type %d\n", s->type);
- goto create_err;
- }
-
- /* As server's listening socket owner and creator is the same module,
- * we have to decrease TIPC module reference count to guarantee that
- * it remains zero after the server socket is created, otherwise,
- * executing "rmmod" command is unable to make TIPC module deleted
- * after TIPC module is inserted successfully.
- *
- * However, the reference count is ever increased twice in
- * sock_create_kern(): one is to increase the reference count of owner
- * of TIPC socket's proto_ops struct; another is to increment the
- * reference count of owner of TIPC proto struct. Therefore, we must
- * decrement the module reference count twice to ensure that it keeps
- * zero after server's listening socket is created. Of course, we
- * must bump the module reference count twice as well before the socket
- * is closed.
- */
- module_put(sock->ops->owner);
- module_put(sock->sk->sk_prot_creator->owner);
- set_bit(CF_SERVER, &con->flags);
-
- return sock;
-
-create_err:
- kernel_sock_shutdown(sock, SHUT_RDWR);
- sock_release(sock);
- return NULL;
-}
-
-static int tipc_open_listening_sock(struct tipc_server *s)
-{
- struct socket *sock;
- struct tipc_conn *con;
-
- con = tipc_alloc_conn(s);
- if (IS_ERR(con))
- return PTR_ERR(con);
-
- sock = tipc_create_listen_sock(con);
- if (!sock) {
- idr_remove(&s->conn_idr, con->conid);
- s->idr_in_use--;
- kfree(con);
- return -EINVAL;
- }
-
- tipc_register_callbacks(sock, con);
- return 0;
-}
-
-static struct outqueue_entry *tipc_alloc_entry(void *data, int len)
-{
- struct outqueue_entry *entry;
- void *buf;
-
- entry = kmalloc(sizeof(struct outqueue_entry), GFP_ATOMIC);
- if (!entry)
- return NULL;
-
- buf = kmemdup(data, len, GFP_ATOMIC);
- if (!buf) {
- kfree(entry);
- return NULL;
- }
-
- entry->iov.iov_base = buf;
- entry->iov.iov_len = len;
-
- return entry;
-}
-
-static void tipc_free_entry(struct outqueue_entry *e)
-{
- kfree(e->iov.iov_base);
- kfree(e);
-}
-
-static void tipc_clean_outqueues(struct tipc_conn *con)
-{
- struct outqueue_entry *e, *safe;
-
- spin_lock_bh(&con->outqueue_lock);
- list_for_each_entry_safe(e, safe, &con->outqueue, list) {
- list_del(&e->list);
- tipc_free_entry(e);
- }
- spin_unlock_bh(&con->outqueue_lock);
-}
-
-int tipc_conn_sendmsg(struct tipc_server *s, int conid,
- struct sockaddr_tipc *addr, void *data, size_t len)
-{
- struct outqueue_entry *e;
- struct tipc_conn *con;
-
- con = tipc_conn_lookup(s, conid);
- if (!con)
- return -EINVAL;
-
- if (!test_bit(CF_CONNECTED, &con->flags)) {
- conn_put(con);
- return 0;
- }
-
- e = tipc_alloc_entry(data, len);
- if (!e) {
- conn_put(con);
- return -ENOMEM;
- }
-
- if (addr)
- memcpy(&e->dest, addr, sizeof(struct sockaddr_tipc));
-
- spin_lock_bh(&con->outqueue_lock);
- list_add_tail(&e->list, &con->outqueue);
- spin_unlock_bh(&con->outqueue_lock);
-
- if (!queue_work(s->send_wq, &con->swork))
- conn_put(con);
- return 0;
-}
-
-void tipc_conn_terminate(struct tipc_server *s, int conid)
-{
- struct tipc_conn *con;
-
- con = tipc_conn_lookup(s, conid);
- if (con) {
- tipc_close_conn(con);
- conn_put(con);
- }
-}
-
-bool tipc_topsrv_kern_subscr(struct net *net, u32 port, u32 type, u32 lower,
- u32 upper, u32 filter, int *conid)
-{
- struct tipc_subscriber *scbr;
- struct tipc_subscr sub;
- struct tipc_server *s;
- struct tipc_conn *con;
-
- sub.seq.type = type;
- sub.seq.lower = lower;
- sub.seq.upper = upper;
- sub.timeout = TIPC_WAIT_FOREVER;
- sub.filter = filter;
- *(u32 *)&sub.usr_handle = port;
-
- con = tipc_alloc_conn(tipc_topsrv(net));
- if (IS_ERR(con))
- return false;
-
- *conid = con->conid;
- s = con->server;
- scbr = s->tipc_conn_new(*conid);
- if (!scbr) {
- conn_put(con);
- return false;
- }
-
- con->usr_data = scbr;
- con->sock = NULL;
- s->tipc_conn_recvmsg(net, *conid, NULL, scbr, &sub, sizeof(sub));
- return true;
-}
-
-void tipc_topsrv_kern_unsubscr(struct net *net, int conid)
-{
- struct tipc_conn *con;
- struct tipc_server *srv;
-
- con = tipc_conn_lookup(tipc_topsrv(net), conid);
- if (!con)
- return;
-
- test_and_clear_bit(CF_CONNECTED, &con->flags);
- srv = con->server;
- if (con->conid)
- srv->tipc_conn_release(con->conid, con->usr_data);
- conn_put(con);
- conn_put(con);
-}
-
-static void tipc_send_kern_top_evt(struct net *net, struct tipc_event *evt)
-{
- u32 port = *(u32 *)&evt->s.usr_handle;
- u32 self = tipc_own_addr(net);
- struct sk_buff_head evtq;
- struct sk_buff *skb;
-
- skb = tipc_msg_create(TOP_SRV, 0, INT_H_SIZE, sizeof(*evt),
- self, self, port, port, 0);
- if (!skb)
- return;
- msg_set_dest_droppable(buf_msg(skb), true);
- memcpy(msg_data(buf_msg(skb)), evt, sizeof(*evt));
- skb_queue_head_init(&evtq);
- __skb_queue_tail(&evtq, skb);
- tipc_sk_rcv(net, &evtq);
-}
-
-static void tipc_send_to_sock(struct tipc_conn *con)
-{
- struct tipc_server *s = con->server;
- struct outqueue_entry *e;
- struct tipc_event *evt;
- struct msghdr msg;
- int count = 0;
- int ret;
-
- spin_lock_bh(&con->outqueue_lock);
- while (test_bit(CF_CONNECTED, &con->flags)) {
- e = list_entry(con->outqueue.next, struct outqueue_entry, list);
- if ((struct list_head *) e == &con->outqueue)
- break;
-
- spin_unlock_bh(&con->outqueue_lock);
-
- if (con->sock) {
- memset(&msg, 0, sizeof(msg));
- msg.msg_flags = MSG_DONTWAIT;
- if (s->type == SOCK_DGRAM || s->type == SOCK_RDM) {
- msg.msg_name = &e->dest;
- msg.msg_namelen = sizeof(struct sockaddr_tipc);
- }
- ret = kernel_sendmsg(con->sock, &msg, &e->iov, 1,
- e->iov.iov_len);
- if (ret == -EWOULDBLOCK || ret == 0) {
- cond_resched();
- goto out;
- } else if (ret < 0) {
- goto send_err;
- }
- } else {
- evt = e->iov.iov_base;
- tipc_send_kern_top_evt(s->net, evt);
- }
- /* Don't starve users filling buffers */
- if (++count >= MAX_SEND_MSG_COUNT) {
- cond_resched();
- count = 0;
- }
-
- spin_lock_bh(&con->outqueue_lock);
- list_del(&e->list);
- tipc_free_entry(e);
- }
- spin_unlock_bh(&con->outqueue_lock);
-out:
- return;
-
-send_err:
- tipc_close_conn(con);
-}
-
-static void tipc_recv_work(struct work_struct *work)
-{
- struct tipc_conn *con = container_of(work, struct tipc_conn, rwork);
- int count = 0;
-
- while (test_bit(CF_CONNECTED, &con->flags)) {
- if (con->rx_action(con))
- break;
-
- /* Don't flood Rx machine */
- if (++count >= MAX_RECV_MSG_COUNT) {
- cond_resched();
- count = 0;
- }
- }
- conn_put(con);
-}
-
-static void tipc_send_work(struct work_struct *work)
-{
- struct tipc_conn *con = container_of(work, struct tipc_conn, swork);
-
- if (test_bit(CF_CONNECTED, &con->flags))
- tipc_send_to_sock(con);
-
- conn_put(con);
-}
-
-static void tipc_work_stop(struct tipc_server *s)
-{
- destroy_workqueue(s->rcv_wq);
- destroy_workqueue(s->send_wq);
-}
-
-static int tipc_work_start(struct tipc_server *s)
-{
- s->rcv_wq = alloc_ordered_workqueue("tipc_rcv", 0);
- if (!s->rcv_wq) {
- pr_err("can't start tipc receive workqueue\n");
- return -ENOMEM;
- }
-
- s->send_wq = alloc_ordered_workqueue("tipc_send", 0);
- if (!s->send_wq) {
- pr_err("can't start tipc send workqueue\n");
- destroy_workqueue(s->rcv_wq);
- return -ENOMEM;
- }
-
- return 0;
-}
-
-int tipc_server_start(struct tipc_server *s)
-{
- int ret;
-
- spin_lock_init(&s->idr_lock);
- idr_init(&s->conn_idr);
- s->idr_in_use = 0;
-
- s->rcvbuf_cache = kmem_cache_create(s->name, s->max_rcvbuf_size,
- 0, SLAB_HWCACHE_ALIGN, NULL);
- if (!s->rcvbuf_cache)
- return -ENOMEM;
-
- ret = tipc_work_start(s);
- if (ret < 0) {
- kmem_cache_destroy(s->rcvbuf_cache);
- return ret;
- }
- ret = tipc_open_listening_sock(s);
- if (ret < 0) {
- tipc_work_stop(s);
- kmem_cache_destroy(s->rcvbuf_cache);
- return ret;
- }
- return ret;
-}
-
-void tipc_server_stop(struct tipc_server *s)
-{
- struct tipc_conn *con;
- int id;
-
- spin_lock_bh(&s->idr_lock);
- for (id = 0; s->idr_in_use; id++) {
- con = idr_find(&s->conn_idr, id);
- if (con) {
- spin_unlock_bh(&s->idr_lock);
- tipc_close_conn(con);
- spin_lock_bh(&s->idr_lock);
- }
- }
- spin_unlock_bh(&s->idr_lock);
-
- tipc_work_stop(s);
- kmem_cache_destroy(s->rcvbuf_cache);
- idr_destroy(&s->conn_idr);
-}
+++ /dev/null
-/*
- * net/tipc/server.h: Include file for TIPC server code
- *
- * Copyright (c) 2012-2013, Wind River Systems
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. Neither the names of the copyright holders nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * Alternatively, this software may be distributed under the terms of the
- * GNU General Public License ("GPL") version 2 as published by the Free
- * Software Foundation.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef _TIPC_SERVER_H
-#define _TIPC_SERVER_H
-
-#include <linux/idr.h>
-#include <linux/tipc.h>
-#include <net/net_namespace.h>
-
-#define TIPC_SERVER_NAME_LEN 32
-#define TIPC_SUB_CLUSTER_SCOPE 0x20
-#define TIPC_SUB_NODE_SCOPE 0x40
-#define TIPC_SUB_NO_STATUS 0x80
-
-/**
- * struct tipc_server - TIPC server structure
- * @conn_idr: identifier set of connection
- * @idr_lock: protect the connection identifier set
- * @idr_in_use: amount of allocated identifier entry
- * @net: network namspace instance
- * @rcvbuf_cache: memory cache of server receive buffer
- * @rcv_wq: receive workqueue
- * @send_wq: send workqueue
- * @max_rcvbuf_size: maximum permitted receive message length
- * @tipc_conn_new: callback will be called when new connection is incoming
- * @tipc_conn_release: callback will be called before releasing the connection
- * @tipc_conn_recvmsg: callback will be called when message arrives
- * @saddr: TIPC server address
- * @name: server name
- * @imp: message importance
- * @type: socket type
- */
-struct tipc_server {
- struct idr conn_idr;
- spinlock_t idr_lock;
- int idr_in_use;
- struct net *net;
- struct kmem_cache *rcvbuf_cache;
- struct workqueue_struct *rcv_wq;
- struct workqueue_struct *send_wq;
- int max_rcvbuf_size;
- void *(*tipc_conn_new)(int conid);
- void (*tipc_conn_release)(int conid, void *usr_data);
- int (*tipc_conn_recvmsg)(struct net *net, int conid,
- struct sockaddr_tipc *addr, void *usr_data,
- void *buf, size_t len);
- struct sockaddr_tipc *saddr;
- char name[TIPC_SERVER_NAME_LEN];
- int imp;
- int type;
-};
-
-int tipc_conn_sendmsg(struct tipc_server *s, int conid,
- struct sockaddr_tipc *addr, void *data, size_t len);
-
-bool tipc_topsrv_kern_subscr(struct net *net, u32 port, u32 type, u32 lower,
- u32 upper, u32 filter, int *conid);
-void tipc_topsrv_kern_unsubscr(struct net *net, int conid);
-
-/**
- * tipc_conn_terminate - terminate connection with server
- *
- * Note: Must call it in process context since it might sleep
- */
-void tipc_conn_terminate(struct tipc_server *s, int conid);
-int tipc_server_start(struct tipc_server *s);
-
-void tipc_server_stop(struct tipc_server *s);
-
-#endif
* a completely predictable manner).
*/
static int tipc_getname(struct socket *sock, struct sockaddr *uaddr,
- int *uaddr_len, int peer)
+ int peer)
{
struct sockaddr_tipc *addr = (struct sockaddr_tipc *)uaddr;
struct sock *sk = sock->sk;
addr->addr.id.node = tn->own_addr;
}
- *uaddr_len = sizeof(*addr);
addr->addrtype = TIPC_ADDR_ID;
addr->family = AF_TIPC;
addr->scope = 0;
addr->addr.name.domain = 0;
- return 0;
+ return sizeof(*addr);
}
/**
/*
* net/tipc/subscr.c: TIPC network topology service
*
- * Copyright (c) 2000-2006, Ericsson AB
+ * Copyright (c) 2000-2017, Ericsson AB
* Copyright (c) 2005-2007, 2010-2013, Wind River Systems
* All rights reserved.
*
#include "name_table.h"
#include "subscr.h"
-/**
- * struct tipc_subscriber - TIPC network topology subscriber
- * @kref: reference counter to tipc_subscription object
- * @conid: connection identifier to server connecting to subscriber
- * @lock: control access to subscriber
- * @subscrp_list: list of subscription objects for this subscriber
- */
-struct tipc_subscriber {
- struct kref kref;
- int conid;
- spinlock_t lock;
- struct list_head subscrp_list;
-};
-
-static void tipc_subscrb_put(struct tipc_subscriber *subscriber);
-
-/**
- * htohl - convert value to endianness used by destination
- * @in: value to convert
- * @swap: non-zero if endianness must be reversed
- *
- * Returns converted value
- */
-static u32 htohl(u32 in, int swap)
-{
- return swap ? swab32(in) : in;
-}
-
-static void tipc_subscrp_send_event(struct tipc_subscription *sub,
- u32 found_lower, u32 found_upper,
- u32 event, u32 port_ref, u32 node)
+static void tipc_sub_send_event(struct tipc_subscription *sub,
+ u32 found_lower, u32 found_upper,
+ u32 event, u32 port, u32 node)
{
- struct tipc_net *tn = net_generic(sub->net, tipc_net_id);
- struct tipc_subscriber *subscriber = sub->subscriber;
- struct kvec msg_sect;
+ struct tipc_event *evt = &sub->evt;
- msg_sect.iov_base = (void *)&sub->evt;
- msg_sect.iov_len = sizeof(struct tipc_event);
- sub->evt.event = htohl(event, sub->swap);
- sub->evt.found_lower = htohl(found_lower, sub->swap);
- sub->evt.found_upper = htohl(found_upper, sub->swap);
- sub->evt.port.ref = htohl(port_ref, sub->swap);
- sub->evt.port.node = htohl(node, sub->swap);
- tipc_conn_sendmsg(tn->topsrv, subscriber->conid, NULL,
- msg_sect.iov_base, msg_sect.iov_len);
+ if (sub->inactive)
+ return;
+ tipc_evt_write(evt, event, event);
+ tipc_evt_write(evt, found_lower, found_lower);
+ tipc_evt_write(evt, found_upper, found_upper);
+ tipc_evt_write(evt, port.ref, port);
+ tipc_evt_write(evt, port.node, node);
+ tipc_topsrv_queue_evt(sub->net, sub->conid, event, evt);
}
/**
- * tipc_subscrp_check_overlap - test for subscription overlap with the
+ * tipc_sub_check_overlap - test for subscription overlap with the
* given values
*
* Returns 1 if there is overlap, otherwise 0.
*/
-int tipc_subscrp_check_overlap(struct tipc_name_seq *seq, u32 found_lower,
- u32 found_upper)
+int tipc_sub_check_overlap(struct tipc_name_seq *seq, u32 found_lower,
+ u32 found_upper)
{
if (found_lower < seq->lower)
found_lower = seq->lower;
return 1;
}
-u32 tipc_subscrp_convert_seq_type(u32 type, int swap)
-{
- return htohl(type, swap);
-}
-
-void tipc_subscrp_convert_seq(struct tipc_name_seq *in, int swap,
- struct tipc_name_seq *out)
-{
- out->type = htohl(in->type, swap);
- out->lower = htohl(in->lower, swap);
- out->upper = htohl(in->upper, swap);
-}
-
-void tipc_subscrp_report_overlap(struct tipc_subscription *sub, u32 found_lower,
- u32 found_upper, u32 event, u32 port_ref,
- u32 node, u32 scope, int must)
+void tipc_sub_report_overlap(struct tipc_subscription *sub,
+ u32 found_lower, u32 found_upper,
+ u32 event, u32 port, u32 node,
+ u32 scope, int must)
{
- u32 filter = htohl(sub->evt.s.filter, sub->swap);
+ struct tipc_subscr *s = &sub->evt.s;
+ u32 filter = tipc_sub_read(s, filter);
struct tipc_name_seq seq;
- tipc_subscrp_convert_seq(&sub->evt.s.seq, sub->swap, &seq);
- if (!tipc_subscrp_check_overlap(&seq, found_lower, found_upper))
+ seq.type = tipc_sub_read(s, seq.type);
+ seq.lower = tipc_sub_read(s, seq.lower);
+ seq.upper = tipc_sub_read(s, seq.upper);
+
+ if (!tipc_sub_check_overlap(&seq, found_lower, found_upper))
return;
+
if (!must && !(filter & TIPC_SUB_PORTS))
return;
if (filter & TIPC_SUB_CLUSTER_SCOPE && scope == TIPC_NODE_SCOPE)
return;
if (filter & TIPC_SUB_NODE_SCOPE && scope != TIPC_NODE_SCOPE)
return;
-
- tipc_subscrp_send_event(sub, found_lower, found_upper, event, port_ref,
- node);
+ spin_lock(&sub->lock);
+ tipc_sub_send_event(sub, found_lower, found_upper,
+ event, port, node);
+ spin_unlock(&sub->lock);
}
-static void tipc_subscrp_timeout(struct timer_list *t)
+static void tipc_sub_timeout(struct timer_list *t)
{
struct tipc_subscription *sub = from_timer(sub, t, timer);
- struct tipc_subscriber *subscriber = sub->subscriber;
-
- spin_lock_bh(&subscriber->lock);
- tipc_nametbl_unsubscribe(sub);
- list_del(&sub->subscrp_list);
- spin_unlock_bh(&subscriber->lock);
-
- /* Notify subscriber of timeout */
- tipc_subscrp_send_event(sub, sub->evt.s.seq.lower, sub->evt.s.seq.upper,
- TIPC_SUBSCR_TIMEOUT, 0, 0);
-
- tipc_subscrp_put(sub);
-}
-
-static void tipc_subscrb_kref_release(struct kref *kref)
-{
- kfree(container_of(kref,struct tipc_subscriber, kref));
-}
-
-static void tipc_subscrb_put(struct tipc_subscriber *subscriber)
-{
- kref_put(&subscriber->kref, tipc_subscrb_kref_release);
-}
+ struct tipc_subscr *s = &sub->evt.s;
-static void tipc_subscrb_get(struct tipc_subscriber *subscriber)
-{
- kref_get(&subscriber->kref);
+ spin_lock(&sub->lock);
+ tipc_sub_send_event(sub, s->seq.lower, s->seq.upper,
+ TIPC_SUBSCR_TIMEOUT, 0, 0);
+ sub->inactive = true;
+ spin_unlock(&sub->lock);
}
-static void tipc_subscrp_kref_release(struct kref *kref)
+static void tipc_sub_kref_release(struct kref *kref)
{
- struct tipc_subscription *sub = container_of(kref,
- struct tipc_subscription,
- kref);
- struct tipc_net *tn = net_generic(sub->net, tipc_net_id);
- struct tipc_subscriber *subscriber = sub->subscriber;
-
- atomic_dec(&tn->subscription_count);
- kfree(sub);
- tipc_subscrb_put(subscriber);
+ kfree(container_of(kref, struct tipc_subscription, kref));
}
-void tipc_subscrp_put(struct tipc_subscription *subscription)
+void tipc_sub_put(struct tipc_subscription *subscription)
{
- kref_put(&subscription->kref, tipc_subscrp_kref_release);
+ kref_put(&subscription->kref, tipc_sub_kref_release);
}
-void tipc_subscrp_get(struct tipc_subscription *subscription)
+void tipc_sub_get(struct tipc_subscription *subscription)
{
kref_get(&subscription->kref);
}
-/* tipc_subscrb_subscrp_delete - delete a specific subscription or all
- * subscriptions for a given subscriber.
- */
-static void tipc_subscrb_subscrp_delete(struct tipc_subscriber *subscriber,
- struct tipc_subscr *s)
-{
- struct list_head *subscription_list = &subscriber->subscrp_list;
- struct tipc_subscription *sub, *temp;
- u32 timeout;
-
- spin_lock_bh(&subscriber->lock);
- list_for_each_entry_safe(sub, temp, subscription_list, subscrp_list) {
- if (s && memcmp(s, &sub->evt.s, sizeof(struct tipc_subscr)))
- continue;
-
- timeout = htohl(sub->evt.s.timeout, sub->swap);
- if (timeout == TIPC_WAIT_FOREVER || del_timer(&sub->timer)) {
- tipc_nametbl_unsubscribe(sub);
- list_del(&sub->subscrp_list);
- tipc_subscrp_put(sub);
- }
-
- if (s)
- break;
- }
- spin_unlock_bh(&subscriber->lock);
-}
-
-static struct tipc_subscriber *tipc_subscrb_create(int conid)
-{
- struct tipc_subscriber *subscriber;
-
- subscriber = kzalloc(sizeof(*subscriber), GFP_ATOMIC);
- if (!subscriber) {
- pr_warn("Subscriber rejected, no memory\n");
- return NULL;
- }
- INIT_LIST_HEAD(&subscriber->subscrp_list);
- kref_init(&subscriber->kref);
- subscriber->conid = conid;
- spin_lock_init(&subscriber->lock);
-
- return subscriber;
-}
-
-static void tipc_subscrb_delete(struct tipc_subscriber *subscriber)
-{
- tipc_subscrb_subscrp_delete(subscriber, NULL);
- tipc_subscrb_put(subscriber);
-}
-
-static void tipc_subscrp_cancel(struct tipc_subscr *s,
- struct tipc_subscriber *subscriber)
-{
- tipc_subscrb_get(subscriber);
- tipc_subscrb_subscrp_delete(subscriber, s);
- tipc_subscrb_put(subscriber);
-}
-
-static struct tipc_subscription *tipc_subscrp_create(struct net *net,
- struct tipc_subscr *s,
- int swap)
+struct tipc_subscription *tipc_sub_subscribe(struct net *net,
+ struct tipc_subscr *s,
+ int conid)
{
- struct tipc_net *tn = net_generic(net, tipc_net_id);
+ u32 filter = tipc_sub_read(s, filter);
struct tipc_subscription *sub;
- u32 filter = htohl(s->filter, swap);
+ u32 timeout;
- /* Refuse subscription if global limit exceeded */
- if (atomic_read(&tn->subscription_count) >= TIPC_MAX_SUBSCRIPTIONS) {
- pr_warn("Subscription rejected, limit reached (%u)\n",
- TIPC_MAX_SUBSCRIPTIONS);
+ if ((filter & TIPC_SUB_PORTS && filter & TIPC_SUB_SERVICE) ||
+ (tipc_sub_read(s, seq.lower) > tipc_sub_read(s, seq.upper))) {
+ pr_warn("Subscription rejected, illegal request\n");
return NULL;
}
-
- /* Allocate subscription object */
sub = kmalloc(sizeof(*sub), GFP_ATOMIC);
if (!sub) {
pr_warn("Subscription rejected, no memory\n");
return NULL;
}
-
- /* Initialize subscription object */
sub->net = net;
- if (((filter & TIPC_SUB_PORTS) && (filter & TIPC_SUB_SERVICE)) ||
- (htohl(s->seq.lower, swap) > htohl(s->seq.upper, swap))) {
- pr_warn("Subscription rejected, illegal request\n");
- kfree(sub);
- return NULL;
- }
-
- sub->swap = swap;
+ sub->conid = conid;
+ sub->inactive = false;
memcpy(&sub->evt.s, s, sizeof(*s));
- atomic_inc(&tn->subscription_count);
+ spin_lock_init(&sub->lock);
kref_init(&sub->kref);
- return sub;
-}
-
-static int tipc_subscrp_subscribe(struct net *net, struct tipc_subscr *s,
- struct tipc_subscriber *subscriber, int swap,
- bool status)
-{
- struct tipc_subscription *sub = NULL;
- u32 timeout;
-
- sub = tipc_subscrp_create(net, s, swap);
- if (!sub)
- return -1;
-
- spin_lock_bh(&subscriber->lock);
- list_add(&sub->subscrp_list, &subscriber->subscrp_list);
- sub->subscriber = subscriber;
- tipc_nametbl_subscribe(sub, status);
- tipc_subscrb_get(subscriber);
- spin_unlock_bh(&subscriber->lock);
-
- timer_setup(&sub->timer, tipc_subscrp_timeout, 0);
- timeout = htohl(sub->evt.s.timeout, swap);
-
+ tipc_nametbl_subscribe(sub);
+ timer_setup(&sub->timer, tipc_sub_timeout, 0);
+ timeout = tipc_sub_read(&sub->evt.s, timeout);
if (timeout != TIPC_WAIT_FOREVER)
mod_timer(&sub->timer, jiffies + msecs_to_jiffies(timeout));
- return 0;
-}
-
-/* Handle one termination request for the subscriber */
-static void tipc_subscrb_release_cb(int conid, void *usr_data)
-{
- tipc_subscrb_delete((struct tipc_subscriber *)usr_data);
-}
-
-/* Handle one request to create a new subscription for the subscriber */
-static int tipc_subscrb_rcv_cb(struct net *net, int conid,
- struct sockaddr_tipc *addr, void *usr_data,
- void *buf, size_t len)
-{
- struct tipc_subscriber *subscriber = usr_data;
- struct tipc_subscr *s = (struct tipc_subscr *)buf;
- bool status;
- int swap;
-
- /* Determine subscriber's endianness */
- swap = !(s->filter & (TIPC_SUB_PORTS | TIPC_SUB_SERVICE |
- TIPC_SUB_CANCEL));
-
- /* Detect & process a subscription cancellation request */
- if (s->filter & htohl(TIPC_SUB_CANCEL, swap)) {
- s->filter &= ~htohl(TIPC_SUB_CANCEL, swap);
- tipc_subscrp_cancel(s, subscriber);
- return 0;
- }
- status = !(s->filter & htohl(TIPC_SUB_NO_STATUS, swap));
- return tipc_subscrp_subscribe(net, s, subscriber, swap, status);
-}
-
-/* Handle one request to establish a new subscriber */
-static void *tipc_subscrb_connect_cb(int conid)
-{
- return (void *)tipc_subscrb_create(conid);
-}
-
-int tipc_topsrv_start(struct net *net)
-{
- struct tipc_net *tn = net_generic(net, tipc_net_id);
- const char name[] = "topology_server";
- struct tipc_server *topsrv;
- struct sockaddr_tipc *saddr;
-
- saddr = kzalloc(sizeof(*saddr), GFP_ATOMIC);
- if (!saddr)
- return -ENOMEM;
- saddr->family = AF_TIPC;
- saddr->addrtype = TIPC_ADDR_NAMESEQ;
- saddr->addr.nameseq.type = TIPC_TOP_SRV;
- saddr->addr.nameseq.lower = TIPC_TOP_SRV;
- saddr->addr.nameseq.upper = TIPC_TOP_SRV;
- saddr->scope = TIPC_NODE_SCOPE;
-
- topsrv = kzalloc(sizeof(*topsrv), GFP_ATOMIC);
- if (!topsrv) {
- kfree(saddr);
- return -ENOMEM;
- }
- topsrv->net = net;
- topsrv->saddr = saddr;
- topsrv->imp = TIPC_CRITICAL_IMPORTANCE;
- topsrv->type = SOCK_SEQPACKET;
- topsrv->max_rcvbuf_size = sizeof(struct tipc_subscr);
- topsrv->tipc_conn_recvmsg = tipc_subscrb_rcv_cb;
- topsrv->tipc_conn_new = tipc_subscrb_connect_cb;
- topsrv->tipc_conn_release = tipc_subscrb_release_cb;
-
- strncpy(topsrv->name, name, strlen(name) + 1);
- tn->topsrv = topsrv;
- atomic_set(&tn->subscription_count, 0);
-
- return tipc_server_start(topsrv);
+ return sub;
}
-void tipc_topsrv_stop(struct net *net)
+void tipc_sub_unsubscribe(struct tipc_subscription *sub)
{
- struct tipc_net *tn = net_generic(net, tipc_net_id);
- struct tipc_server *topsrv = tn->topsrv;
-
- tipc_server_stop(topsrv);
- kfree(topsrv->saddr);
- kfree(topsrv);
+ tipc_nametbl_unsubscribe(sub);
+ if (sub->evt.s.timeout != TIPC_WAIT_FOREVER)
+ del_timer_sync(&sub->timer);
+ list_del(&sub->sub_list);
+ tipc_sub_put(sub);
}
/*
* net/tipc/subscr.h: Include file for TIPC network topology service
*
- * Copyright (c) 2003-2006, Ericsson AB
+ * Copyright (c) 2003-2017, Ericsson AB
* Copyright (c) 2005-2007, 2012-2013, Wind River Systems
* All rights reserved.
*
#ifndef _TIPC_SUBSCR_H
#define _TIPC_SUBSCR_H
-#include "server.h"
+#include "topsrv.h"
-#define TIPC_MAX_SUBSCRIPTIONS 65535
-#define TIPC_MAX_PUBLICATIONS 65535
+#define TIPC_MAX_SUBSCR 65535
+#define TIPC_MAX_PUBLICATIONS 65535
struct tipc_subscription;
-struct tipc_subscriber;
+struct tipc_conn;
/**
* struct tipc_subscription - TIPC network topology subscription object
* @subscriber: pointer to its subscriber
* @seq: name sequence associated with subscription
- * @net: point to network namespace
* @timer: timer governing subscription duration (optional)
* @nameseq_list: adjacent subscriptions in name sequence's subscription list
- * @subscrp_list: adjacent subscriptions in subscriber's subscription list
- * @swap: indicates if subscriber uses opposite endianness in its messages
+ * @sub_list: adjacent subscriptions in subscriber's subscription list
* @evt: template for events generated by subscription
*/
struct tipc_subscription {
struct kref kref;
- struct tipc_subscriber *subscriber;
struct net *net;
struct timer_list timer;
struct list_head nameseq_list;
- struct list_head subscrp_list;
- int swap;
+ struct list_head sub_list;
struct tipc_event evt;
+ int conid;
+ bool inactive;
+ spinlock_t lock; /* serialize up/down and timer events */
};
-int tipc_subscrp_check_overlap(struct tipc_name_seq *seq, u32 found_lower,
- u32 found_upper);
-void tipc_subscrp_report_overlap(struct tipc_subscription *sub,
- u32 found_lower, u32 found_upper, u32 event,
- u32 port_ref, u32 node, u32 scope, int must);
-void tipc_subscrp_convert_seq(struct tipc_name_seq *in, int swap,
- struct tipc_name_seq *out);
-u32 tipc_subscrp_convert_seq_type(u32 type, int swap);
+struct tipc_subscription *tipc_sub_subscribe(struct net *net,
+ struct tipc_subscr *s,
+ int conid);
+void tipc_sub_unsubscribe(struct tipc_subscription *sub);
+
+int tipc_sub_check_overlap(struct tipc_name_seq *seq, u32 found_lower,
+ u32 found_upper);
+void tipc_sub_report_overlap(struct tipc_subscription *sub,
+ u32 found_lower, u32 found_upper,
+ u32 event, u32 port, u32 node,
+ u32 scope, int must);
int tipc_topsrv_start(struct net *net);
void tipc_topsrv_stop(struct net *net);
-void tipc_subscrp_put(struct tipc_subscription *subscription);
-void tipc_subscrp_get(struct tipc_subscription *subscription);
+void tipc_sub_put(struct tipc_subscription *subscription);
+void tipc_sub_get(struct tipc_subscription *subscription);
+
+#define TIPC_FILTER_MASK (TIPC_SUB_PORTS | TIPC_SUB_SERVICE | TIPC_SUB_CANCEL)
+
+/* tipc_sub_read - return field_ of struct sub_ in host endian format
+ */
+#define tipc_sub_read(sub_, field_) \
+ ({ \
+ struct tipc_subscr *sub__ = sub_; \
+ u32 val__ = (sub__)->field_; \
+ int swap_ = !((sub__)->filter & TIPC_FILTER_MASK); \
+ (swap_ ? swab32(val__) : val__); \
+ })
+
+/* tipc_evt_write - write val_ to field_ of struct evt_ in user endian format
+ */
+#define tipc_evt_write(evt_, field_, val_) \
+ ({ \
+ struct tipc_event *evt__ = evt_; \
+ u32 val__ = val_; \
+ int swap_ = !((evt__)->s.filter & (TIPC_FILTER_MASK)); \
+ (evt__)->field_ = swap_ ? swab32(val__) : val__; \
+ })
#endif
--- /dev/null
+/*
+ * net/tipc/server.c: TIPC server infrastructure
+ *
+ * Copyright (c) 2012-2013, Wind River Systems
+ * Copyright (c) 2017-2018, Ericsson AB
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "subscr.h"
+#include "topsrv.h"
+#include "core.h"
+#include "socket.h"
+#include "addr.h"
+#include "msg.h"
+#include <net/sock.h>
+#include <linux/module.h>
+
+/* Number of messages to send before rescheduling */
+#define MAX_SEND_MSG_COUNT 25
+#define MAX_RECV_MSG_COUNT 25
+#define CF_CONNECTED 1
+#define CF_SERVER 2
+
+#define TIPC_SERVER_NAME_LEN 32
+
+/**
+ * struct tipc_topsrv - TIPC server structure
+ * @conn_idr: identifier set of connection
+ * @idr_lock: protect the connection identifier set
+ * @idr_in_use: amount of allocated identifier entry
+ * @net: network namspace instance
+ * @rcvbuf_cache: memory cache of server receive buffer
+ * @rcv_wq: receive workqueue
+ * @send_wq: send workqueue
+ * @max_rcvbuf_size: maximum permitted receive message length
+ * @tipc_conn_new: callback will be called when new connection is incoming
+ * @tipc_conn_release: callback will be called before releasing the connection
+ * @tipc_conn_recvmsg: callback will be called when message arrives
+ * @name: server name
+ * @imp: message importance
+ * @type: socket type
+ */
+struct tipc_topsrv {
+ struct idr conn_idr;
+ spinlock_t idr_lock; /* for idr list */
+ int idr_in_use;
+ struct net *net;
+ struct work_struct awork;
+ struct workqueue_struct *rcv_wq;
+ struct workqueue_struct *send_wq;
+ int max_rcvbuf_size;
+ struct socket *listener;
+ char name[TIPC_SERVER_NAME_LEN];
+};
+
+/**
+ * struct tipc_conn - TIPC connection structure
+ * @kref: reference counter to connection object
+ * @conid: connection identifier
+ * @sock: socket handler associated with connection
+ * @flags: indicates connection state
+ * @server: pointer to connected server
+ * @sub_list: lsit to all pertaing subscriptions
+ * @sub_lock: lock protecting the subscription list
+ * @outqueue_lock: control access to the outqueue
+ * @rwork: receive work item
+ * @rx_action: what to do when connection socket is active
+ * @outqueue: pointer to first outbound message in queue
+ * @outqueue_lock: control access to the outqueue
+ * @swork: send work item
+ */
+struct tipc_conn {
+ struct kref kref;
+ int conid;
+ struct socket *sock;
+ unsigned long flags;
+ struct tipc_topsrv *server;
+ struct list_head sub_list;
+ spinlock_t sub_lock; /* for subscription list */
+ struct work_struct rwork;
+ struct list_head outqueue;
+ spinlock_t outqueue_lock; /* for outqueue */
+ struct work_struct swork;
+};
+
+/* An entry waiting to be sent */
+struct outqueue_entry {
+ bool inactive;
+ struct tipc_event evt;
+ struct list_head list;
+};
+
+static void tipc_conn_recv_work(struct work_struct *work);
+static void tipc_conn_send_work(struct work_struct *work);
+static void tipc_topsrv_kern_evt(struct net *net, struct tipc_event *evt);
+static void tipc_conn_delete_sub(struct tipc_conn *con, struct tipc_subscr *s);
+
+static bool connected(struct tipc_conn *con)
+{
+ return con && test_bit(CF_CONNECTED, &con->flags);
+}
+
+static void tipc_conn_kref_release(struct kref *kref)
+{
+ struct tipc_conn *con = container_of(kref, struct tipc_conn, kref);
+ struct tipc_topsrv *s = con->server;
+ struct outqueue_entry *e, *safe;
+
+ spin_lock_bh(&s->idr_lock);
+ idr_remove(&s->conn_idr, con->conid);
+ s->idr_in_use--;
+ spin_unlock_bh(&s->idr_lock);
+ if (con->sock)
+ sock_release(con->sock);
+
+ spin_lock_bh(&con->outqueue_lock);
+ list_for_each_entry_safe(e, safe, &con->outqueue, list) {
+ list_del(&e->list);
+ kfree(e);
+ }
+ spin_unlock_bh(&con->outqueue_lock);
+ kfree(con);
+}
+
+static void conn_put(struct tipc_conn *con)
+{
+ kref_put(&con->kref, tipc_conn_kref_release);
+}
+
+static void conn_get(struct tipc_conn *con)
+{
+ kref_get(&con->kref);
+}
+
+static void tipc_conn_close(struct tipc_conn *con)
+{
+ struct sock *sk = con->sock->sk;
+ bool disconnect = false;
+
+ write_lock_bh(&sk->sk_callback_lock);
+ disconnect = test_and_clear_bit(CF_CONNECTED, &con->flags);
+
+ if (disconnect) {
+ sk->sk_user_data = NULL;
+ tipc_conn_delete_sub(con, NULL);
+ }
+ write_unlock_bh(&sk->sk_callback_lock);
+
+ /* Handle concurrent calls from sending and receiving threads */
+ if (!disconnect)
+ return;
+
+ /* Don't flush pending works, -just let them expire */
+ kernel_sock_shutdown(con->sock, SHUT_RDWR);
+
+ conn_put(con);
+}
+
+static struct tipc_conn *tipc_conn_alloc(struct tipc_topsrv *s)
+{
+ struct tipc_conn *con;
+ int ret;
+
+ con = kzalloc(sizeof(*con), GFP_ATOMIC);
+ if (!con)
+ return ERR_PTR(-ENOMEM);
+
+ kref_init(&con->kref);
+ INIT_LIST_HEAD(&con->outqueue);
+ INIT_LIST_HEAD(&con->sub_list);
+ spin_lock_init(&con->outqueue_lock);
+ spin_lock_init(&con->sub_lock);
+ INIT_WORK(&con->swork, tipc_conn_send_work);
+ INIT_WORK(&con->rwork, tipc_conn_recv_work);
+
+ spin_lock_bh(&s->idr_lock);
+ ret = idr_alloc(&s->conn_idr, con, 0, 0, GFP_ATOMIC);
+ if (ret < 0) {
+ kfree(con);
+ spin_unlock_bh(&s->idr_lock);
+ return ERR_PTR(-ENOMEM);
+ }
+ con->conid = ret;
+ s->idr_in_use++;
+ spin_unlock_bh(&s->idr_lock);
+
+ set_bit(CF_CONNECTED, &con->flags);
+ con->server = s;
+
+ return con;
+}
+
+static struct tipc_conn *tipc_conn_lookup(struct tipc_topsrv *s, int conid)
+{
+ struct tipc_conn *con;
+
+ spin_lock_bh(&s->idr_lock);
+ con = idr_find(&s->conn_idr, conid);
+ if (!connected(con) || !kref_get_unless_zero(&con->kref))
+ con = NULL;
+ spin_unlock_bh(&s->idr_lock);
+ return con;
+}
+
+/* tipc_conn_delete_sub - delete a specific or all subscriptions
+ * for a given subscriber
+ */
+static void tipc_conn_delete_sub(struct tipc_conn *con, struct tipc_subscr *s)
+{
+ struct tipc_net *tn = tipc_net(con->server->net);
+ struct list_head *sub_list = &con->sub_list;
+ struct tipc_subscription *sub, *tmp;
+
+ spin_lock_bh(&con->sub_lock);
+ list_for_each_entry_safe(sub, tmp, sub_list, sub_list) {
+ if (!s || !memcmp(s, &sub->evt.s, sizeof(*s))) {
+ tipc_sub_unsubscribe(sub);
+ atomic_dec(&tn->subscription_count);
+ } else if (s) {
+ break;
+ }
+ }
+ spin_unlock_bh(&con->sub_lock);
+}
+
+static void tipc_conn_send_to_sock(struct tipc_conn *con)
+{
+ struct list_head *queue = &con->outqueue;
+ struct tipc_topsrv *srv = con->server;
+ struct outqueue_entry *e;
+ struct tipc_event *evt;
+ struct msghdr msg;
+ struct kvec iov;
+ int count = 0;
+ int ret;
+
+ spin_lock_bh(&con->outqueue_lock);
+
+ while (!list_empty(queue)) {
+ e = list_first_entry(queue, struct outqueue_entry, list);
+ evt = &e->evt;
+ spin_unlock_bh(&con->outqueue_lock);
+
+ if (e->inactive)
+ tipc_conn_delete_sub(con, &evt->s);
+
+ memset(&msg, 0, sizeof(msg));
+ msg.msg_flags = MSG_DONTWAIT;
+ iov.iov_base = evt;
+ iov.iov_len = sizeof(*evt);
+ msg.msg_name = NULL;
+
+ if (con->sock) {
+ ret = kernel_sendmsg(con->sock, &msg, &iov,
+ 1, sizeof(*evt));
+ if (ret == -EWOULDBLOCK || ret == 0) {
+ cond_resched();
+ return;
+ } else if (ret < 0) {
+ return tipc_conn_close(con);
+ }
+ } else {
+ tipc_topsrv_kern_evt(srv->net, evt);
+ }
+
+ /* Don't starve users filling buffers */
+ if (++count >= MAX_SEND_MSG_COUNT) {
+ cond_resched();
+ count = 0;
+ }
+ spin_lock_bh(&con->outqueue_lock);
+ list_del(&e->list);
+ kfree(e);
+ }
+ spin_unlock_bh(&con->outqueue_lock);
+}
+
+static void tipc_conn_send_work(struct work_struct *work)
+{
+ struct tipc_conn *con = container_of(work, struct tipc_conn, swork);
+
+ if (connected(con))
+ tipc_conn_send_to_sock(con);
+
+ conn_put(con);
+}
+
+/* tipc_conn_queue_evt() - interrupt level call from a subscription instance
+ * The queued work is launched into tipc_send_work()->tipc_send_to_sock()
+ */
+void tipc_topsrv_queue_evt(struct net *net, int conid,
+ u32 event, struct tipc_event *evt)
+{
+ struct tipc_topsrv *srv = tipc_topsrv(net);
+ struct outqueue_entry *e;
+ struct tipc_conn *con;
+
+ con = tipc_conn_lookup(srv, conid);
+ if (!con)
+ return;
+
+ if (!connected(con))
+ goto err;
+
+ e = kmalloc(sizeof(*e), GFP_ATOMIC);
+ if (!e)
+ goto err;
+ e->inactive = (event == TIPC_SUBSCR_TIMEOUT);
+ memcpy(&e->evt, evt, sizeof(*evt));
+ spin_lock_bh(&con->outqueue_lock);
+ list_add_tail(&e->list, &con->outqueue);
+ spin_unlock_bh(&con->outqueue_lock);
+
+ if (queue_work(srv->send_wq, &con->swork))
+ return;
+err:
+ conn_put(con);
+}
+
+/* tipc_conn_write_space - interrupt callback after a sendmsg EAGAIN
+ * Indicates that there now is more space in the send buffer
+ * The queued work is launched into tipc_send_work()->tipc_conn_send_to_sock()
+ */
+static void tipc_conn_write_space(struct sock *sk)
+{
+ struct tipc_conn *con;
+
+ read_lock_bh(&sk->sk_callback_lock);
+ con = sk->sk_user_data;
+ if (connected(con)) {
+ conn_get(con);
+ if (!queue_work(con->server->send_wq, &con->swork))
+ conn_put(con);
+ }
+ read_unlock_bh(&sk->sk_callback_lock);
+}
+
+static int tipc_conn_rcv_sub(struct tipc_topsrv *srv,
+ struct tipc_conn *con,
+ struct tipc_subscr *s)
+{
+ struct tipc_net *tn = tipc_net(srv->net);
+ struct tipc_subscription *sub;
+
+ if (tipc_sub_read(s, filter) & TIPC_SUB_CANCEL) {
+ tipc_conn_delete_sub(con, s);
+ return 0;
+ }
+ if (atomic_read(&tn->subscription_count) >= TIPC_MAX_SUBSCR) {
+ pr_warn("Subscription rejected, max (%u)\n", TIPC_MAX_SUBSCR);
+ return -1;
+ }
+ sub = tipc_sub_subscribe(srv->net, s, con->conid);
+ if (!sub)
+ return -1;
+ atomic_inc(&tn->subscription_count);
+ spin_lock_bh(&con->sub_lock);
+ list_add(&sub->sub_list, &con->sub_list);
+ spin_unlock_bh(&con->sub_lock);
+ return 0;
+}
+
+static int tipc_conn_rcv_from_sock(struct tipc_conn *con)
+{
+ struct tipc_topsrv *srv = con->server;
+ struct sock *sk = con->sock->sk;
+ struct msghdr msg = {};
+ struct tipc_subscr s;
+ struct kvec iov;
+ int ret;
+
+ iov.iov_base = &s;
+ iov.iov_len = sizeof(s);
+ msg.msg_name = NULL;
+ iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &iov, 1, iov.iov_len);
+ ret = sock_recvmsg(con->sock, &msg, MSG_DONTWAIT);
+ if (ret == -EWOULDBLOCK)
+ return -EWOULDBLOCK;
+ if (ret > 0) {
+ read_lock_bh(&sk->sk_callback_lock);
+ ret = tipc_conn_rcv_sub(srv, con, &s);
+ read_unlock_bh(&sk->sk_callback_lock);
+ }
+ if (ret < 0)
+ tipc_conn_close(con);
+
+ return ret;
+}
+
+static void tipc_conn_recv_work(struct work_struct *work)
+{
+ struct tipc_conn *con = container_of(work, struct tipc_conn, rwork);
+ int count = 0;
+
+ while (connected(con)) {
+ if (tipc_conn_rcv_from_sock(con))
+ break;
+
+ /* Don't flood Rx machine */
+ if (++count >= MAX_RECV_MSG_COUNT) {
+ cond_resched();
+ count = 0;
+ }
+ }
+ conn_put(con);
+}
+
+/* tipc_conn_data_ready - interrupt callback indicating the socket has data
+ * The queued work is launched into tipc_recv_work()->tipc_conn_rcv_from_sock()
+ */
+static void tipc_conn_data_ready(struct sock *sk)
+{
+ struct tipc_conn *con;
+
+ read_lock_bh(&sk->sk_callback_lock);
+ con = sk->sk_user_data;
+ if (connected(con)) {
+ conn_get(con);
+ if (!queue_work(con->server->rcv_wq, &con->rwork))
+ conn_put(con);
+ }
+ read_unlock_bh(&sk->sk_callback_lock);
+}
+
+static void tipc_topsrv_accept(struct work_struct *work)
+{
+ struct tipc_topsrv *srv = container_of(work, struct tipc_topsrv, awork);
+ struct socket *lsock = srv->listener;
+ struct socket *newsock;
+ struct tipc_conn *con;
+ struct sock *newsk;
+ int ret;
+
+ while (1) {
+ ret = kernel_accept(lsock, &newsock, O_NONBLOCK);
+ if (ret < 0)
+ return;
+ con = tipc_conn_alloc(srv);
+ if (IS_ERR(con)) {
+ ret = PTR_ERR(con);
+ sock_release(newsock);
+ return;
+ }
+ /* Register callbacks */
+ newsk = newsock->sk;
+ write_lock_bh(&newsk->sk_callback_lock);
+ newsk->sk_data_ready = tipc_conn_data_ready;
+ newsk->sk_write_space = tipc_conn_write_space;
+ newsk->sk_user_data = con;
+ con->sock = newsock;
+ write_unlock_bh(&newsk->sk_callback_lock);
+
+ /* Wake up receive process in case of 'SYN+' message */
+ newsk->sk_data_ready(newsk);
+ }
+}
+
+/* tipc_toprsv_listener_data_ready - interrupt callback with connection request
+ * The queued job is launched into tipc_topsrv_accept()
+ */
+static void tipc_topsrv_listener_data_ready(struct sock *sk)
+{
+ struct tipc_topsrv *srv;
+
+ read_lock_bh(&sk->sk_callback_lock);
+ srv = sk->sk_user_data;
+ if (srv->listener)
+ queue_work(srv->rcv_wq, &srv->awork);
+ read_unlock_bh(&sk->sk_callback_lock);
+}
+
+static int tipc_topsrv_create_listener(struct tipc_topsrv *srv)
+{
+ int imp = TIPC_CRITICAL_IMPORTANCE;
+ struct socket *lsock = NULL;
+ struct sockaddr_tipc saddr;
+ struct sock *sk;
+ int rc;
+
+ rc = sock_create_kern(srv->net, AF_TIPC, SOCK_SEQPACKET, 0, &lsock);
+ if (rc < 0)
+ return rc;
+
+ srv->listener = lsock;
+ sk = lsock->sk;
+ write_lock_bh(&sk->sk_callback_lock);
+ sk->sk_data_ready = tipc_topsrv_listener_data_ready;
+ sk->sk_user_data = srv;
+ write_unlock_bh(&sk->sk_callback_lock);
+
+ rc = kernel_setsockopt(lsock, SOL_TIPC, TIPC_IMPORTANCE,
+ (char *)&imp, sizeof(imp));
+ if (rc < 0)
+ goto err;
+
+ saddr.family = AF_TIPC;
+ saddr.addrtype = TIPC_ADDR_NAMESEQ;
+ saddr.addr.nameseq.type = TIPC_TOP_SRV;
+ saddr.addr.nameseq.lower = TIPC_TOP_SRV;
+ saddr.addr.nameseq.upper = TIPC_TOP_SRV;
+ saddr.scope = TIPC_NODE_SCOPE;
+
+ rc = kernel_bind(lsock, (struct sockaddr *)&saddr, sizeof(saddr));
+ if (rc < 0)
+ goto err;
+ rc = kernel_listen(lsock, 0);
+ if (rc < 0)
+ goto err;
+
+ /* As server's listening socket owner and creator is the same module,
+ * we have to decrease TIPC module reference count to guarantee that
+ * it remains zero after the server socket is created, otherwise,
+ * executing "rmmod" command is unable to make TIPC module deleted
+ * after TIPC module is inserted successfully.
+ *
+ * However, the reference count is ever increased twice in
+ * sock_create_kern(): one is to increase the reference count of owner
+ * of TIPC socket's proto_ops struct; another is to increment the
+ * reference count of owner of TIPC proto struct. Therefore, we must
+ * decrement the module reference count twice to ensure that it keeps
+ * zero after server's listening socket is created. Of course, we
+ * must bump the module reference count twice as well before the socket
+ * is closed.
+ */
+ module_put(lsock->ops->owner);
+ module_put(sk->sk_prot_creator->owner);
+
+ return 0;
+err:
+ sock_release(lsock);
+ return -EINVAL;
+}
+
+bool tipc_topsrv_kern_subscr(struct net *net, u32 port, u32 type, u32 lower,
+ u32 upper, u32 filter, int *conid)
+{
+ struct tipc_subscr sub;
+ struct tipc_conn *con;
+ int rc;
+
+ sub.seq.type = type;
+ sub.seq.lower = lower;
+ sub.seq.upper = upper;
+ sub.timeout = TIPC_WAIT_FOREVER;
+ sub.filter = filter;
+ *(u32 *)&sub.usr_handle = port;
+
+ con = tipc_conn_alloc(tipc_topsrv(net));
+ if (IS_ERR(con))
+ return false;
+
+ *conid = con->conid;
+ con->sock = NULL;
+ rc = tipc_conn_rcv_sub(tipc_topsrv(net), con, &sub);
+ if (rc >= 0)
+ return true;
+ conn_put(con);
+ return false;
+}
+
+void tipc_topsrv_kern_unsubscr(struct net *net, int conid)
+{
+ struct tipc_conn *con;
+
+ con = tipc_conn_lookup(tipc_topsrv(net), conid);
+ if (!con)
+ return;
+
+ test_and_clear_bit(CF_CONNECTED, &con->flags);
+ tipc_conn_delete_sub(con, NULL);
+ conn_put(con);
+ conn_put(con);
+}
+
+static void tipc_topsrv_kern_evt(struct net *net, struct tipc_event *evt)
+{
+ u32 port = *(u32 *)&evt->s.usr_handle;
+ u32 self = tipc_own_addr(net);
+ struct sk_buff_head evtq;
+ struct sk_buff *skb;
+
+ skb = tipc_msg_create(TOP_SRV, 0, INT_H_SIZE, sizeof(*evt),
+ self, self, port, port, 0);
+ if (!skb)
+ return;
+ msg_set_dest_droppable(buf_msg(skb), true);
+ memcpy(msg_data(buf_msg(skb)), evt, sizeof(*evt));
+ skb_queue_head_init(&evtq);
+ __skb_queue_tail(&evtq, skb);
+ tipc_sk_rcv(net, &evtq);
+}
+
+static int tipc_topsrv_work_start(struct tipc_topsrv *s)
+{
+ s->rcv_wq = alloc_ordered_workqueue("tipc_rcv", 0);
+ if (!s->rcv_wq) {
+ pr_err("can't start tipc receive workqueue\n");
+ return -ENOMEM;
+ }
+
+ s->send_wq = alloc_ordered_workqueue("tipc_send", 0);
+ if (!s->send_wq) {
+ pr_err("can't start tipc send workqueue\n");
+ destroy_workqueue(s->rcv_wq);
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+static void tipc_topsrv_work_stop(struct tipc_topsrv *s)
+{
+ destroy_workqueue(s->rcv_wq);
+ destroy_workqueue(s->send_wq);
+}
+
+int tipc_topsrv_start(struct net *net)
+{
+ struct tipc_net *tn = tipc_net(net);
+ const char name[] = "topology_server";
+ struct tipc_topsrv *srv;
+ int ret;
+
+ srv = kzalloc(sizeof(*srv), GFP_ATOMIC);
+ if (!srv)
+ return -ENOMEM;
+
+ srv->net = net;
+ srv->max_rcvbuf_size = sizeof(struct tipc_subscr);
+ INIT_WORK(&srv->awork, tipc_topsrv_accept);
+
+ strncpy(srv->name, name, strlen(name) + 1);
+ tn->topsrv = srv;
+ atomic_set(&tn->subscription_count, 0);
+
+ spin_lock_init(&srv->idr_lock);
+ idr_init(&srv->conn_idr);
+ srv->idr_in_use = 0;
+
+ ret = tipc_topsrv_work_start(srv);
+ if (ret < 0)
+ return ret;
+
+ ret = tipc_topsrv_create_listener(srv);
+ if (ret < 0)
+ tipc_topsrv_work_stop(srv);
+
+ return ret;
+}
+
+void tipc_topsrv_stop(struct net *net)
+{
+ struct tipc_topsrv *srv = tipc_topsrv(net);
+ struct socket *lsock = srv->listener;
+ struct tipc_conn *con;
+ int id;
+
+ spin_lock_bh(&srv->idr_lock);
+ for (id = 0; srv->idr_in_use; id++) {
+ con = idr_find(&srv->conn_idr, id);
+ if (con) {
+ spin_unlock_bh(&srv->idr_lock);
+ tipc_conn_close(con);
+ spin_lock_bh(&srv->idr_lock);
+ }
+ }
+ __module_get(lsock->ops->owner);
+ __module_get(lsock->sk->sk_prot_creator->owner);
+ srv->listener = NULL;
+ spin_unlock_bh(&srv->idr_lock);
+ sock_release(lsock);
+ tipc_topsrv_work_stop(srv);
+ idr_destroy(&srv->conn_idr);
+ kfree(srv);
+}
--- /dev/null
+/*
+ * net/tipc/server.h: Include file for TIPC server code
+ *
+ * Copyright (c) 2012-2013, Wind River Systems
+ * Copyright (c) 2017, Ericsson AB
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _TIPC_SERVER_H
+#define _TIPC_SERVER_H
+
+#include "core.h"
+
+#define TIPC_SERVER_NAME_LEN 32
+#define TIPC_SUB_CLUSTER_SCOPE 0x20
+#define TIPC_SUB_NODE_SCOPE 0x40
+#define TIPC_SUB_NO_STATUS 0x80
+
+void tipc_topsrv_queue_evt(struct net *net, int conid,
+ u32 event, struct tipc_event *evt);
+
+bool tipc_topsrv_kern_subscr(struct net *net, u32 port, u32 type, u32 lower,
+ u32 upper, u32 filter, int *conid);
+void tipc_topsrv_kern_unsubscr(struct net *net, int conid);
+
+#endif
goto out;
}
lock_sock(sk);
- memcpy(crypto_info_aes_gcm_128->iv, ctx->iv,
+ memcpy(crypto_info_aes_gcm_128->iv,
+ ctx->iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE,
TLS_CIPHER_AES_GCM_128_IV_SIZE);
+ memcpy(crypto_info_aes_gcm_128->rec_seq, ctx->rec_seq,
+ TLS_CIPHER_AES_GCM_128_REC_SEQ_SIZE);
release_sock(sk);
if (copy_to_user(optval,
crypto_info_aes_gcm_128,
rc = copy_from_user(crypto_info, optval, sizeof(*crypto_info));
if (rc) {
rc = -EFAULT;
- goto out;
+ goto err_crypto_info;
}
/* check version */
int addr_len, int flags);
static int unix_socketpair(struct socket *, struct socket *);
static int unix_accept(struct socket *, struct socket *, int, bool);
-static int unix_getname(struct socket *, struct sockaddr *, int *, int);
+static int unix_getname(struct socket *, struct sockaddr *, int);
static __poll_t unix_poll(struct file *, struct socket *, poll_table *);
static __poll_t unix_dgram_poll(struct file *, struct socket *,
poll_table *);
}
-static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, int peer)
+static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int peer)
{
struct sock *sk = sock->sk;
struct unix_sock *u;
if (!u->addr) {
sunaddr->sun_family = AF_UNIX;
sunaddr->sun_path[0] = 0;
- *uaddr_len = sizeof(short);
+ err = sizeof(short);
} else {
struct unix_address *addr = u->addr;
- *uaddr_len = addr->len;
- memcpy(sunaddr, addr->name, *uaddr_len);
+ err = addr->len;
+ memcpy(sunaddr, addr->name, addr->len);
}
unix_state_unlock(sk);
sock_put(sk);
}
/* We use paged skbs for stream sockets, and limit occupancy to 32768
- * bytes, and a minimun of a full page.
+ * bytes, and a minimum of a full page.
*/
#define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768))
static struct pernet_operations unix_net_ops = {
.init = unix_net_init,
.exit = unix_net_exit,
+ .async = true,
};
static int __init af_unix_init(void)
}
static int vsock_getname(struct socket *sock,
- struct sockaddr *addr, int *addr_len, int peer)
+ struct sockaddr *addr, int peer)
{
int err;
struct sock *sk;
*/
BUILD_BUG_ON(sizeof(*vm_addr) > 128);
memcpy(addr, vm_addr, sizeof(*vm_addr));
- *addr_len = sizeof(*vm_addr);
+ err = sizeof(*vm_addr);
out:
release_sock(sk);
static struct pernet_operations cfg80211_pernet_ops = {
.exit = cfg80211_pernet_exit,
+ .async = true,
};
static int __init cfg80211_init(void)
enum nl80211_bss_scan_width scan_width;
struct ieee80211_supported_band *sband =
rdev->wiphy.bands[setup->chandef.chan->band];
- scan_width = cfg80211_chandef_to_scan_width(&setup->chandef);
- setup->basic_rates = ieee80211_mandatory_rates(sband,
- scan_width);
+
+ if (setup->chandef.chan->band == NL80211_BAND_2GHZ) {
+ int i;
+
+ /*
+ * Older versions selected the mandatory rates for
+ * 2.4 GHz as well, but were broken in that only
+ * 1 Mbps was regarded as a mandatory rate. Keep
+ * using just 1 Mbps as the default basic rate for
+ * mesh to be interoperable with older versions.
+ */
+ for (i = 0; i < sband->n_bitrates; i++) {
+ if (sband->bitrates[i].bitrate == 10) {
+ setup->basic_rates = BIT(i);
+ break;
+ }
+ }
+ } else {
+ scan_width = cfg80211_chandef_to_scan_width(&setup->chandef);
+ setup->basic_rates = ieee80211_mandatory_rates(sband,
+ scan_width);
+ }
}
err = cfg80211_chandef_dfs_required(&rdev->wiphy,
[NL80211_ATTR_FILS_CACHE_ID] = { .len = 2 },
[NL80211_ATTR_PMK] = { .type = NLA_BINARY, .len = PMK_MAX_LEN },
[NL80211_ATTR_SCHED_SCAN_MULTI] = { .type = NLA_FLAG },
+ [NL80211_ATTR_EXTERNAL_AUTH_SUPPORT] = { .type = NLA_FLAG },
};
/* policy for the key attributes */
return false;
return true;
case NL80211_CMD_CONNECT:
- /* SAE not supported yet */
- if (auth_type == NL80211_AUTHTYPE_SAE)
+ if (!(rdev->wiphy.features & NL80211_FEATURE_SAE) &&
+ auth_type == NL80211_AUTHTYPE_SAE)
return false;
+
/* FILS with SK PFS or PK not supported yet */
if (auth_type == NL80211_AUTHTYPE_FILS_SK_PFS ||
auth_type == NL80211_AUTHTYPE_FILS_PK)
PUT_SINFO_U64(RX_DROP_MISC, rx_dropped_misc);
PUT_SINFO_U64(BEACON_RX, rx_beacon);
PUT_SINFO(BEACON_SIGNAL_AVG, rx_beacon_signal_avg, u8);
+ PUT_SINFO(ACK_SIGNAL, ack_signal, u8);
#undef PUT_SINFO
#undef PUT_SINFO_U64
return genlmsg_reply(msg, info);
nla_put_failure:
- genlmsg_cancel(msg, hdr);
out:
nlmsg_free(msg);
return -ENOBUFS;
nla_put_failure_rcu:
rcu_read_unlock();
nla_put_failure:
- genlmsg_cancel(msg, hdr);
put_failure:
nlmsg_free(msg);
return -EMSGSIZE;
*flags = nla_get_u32(attrs[NL80211_ATTR_SCAN_FLAGS]);
- if ((*flags & NL80211_SCAN_FLAG_LOW_PRIORITY) &&
- !(wiphy->features & NL80211_FEATURE_LOW_PRIORITY_SCAN))
+ if (((*flags & NL80211_SCAN_FLAG_LOW_PRIORITY) &&
+ !(wiphy->features & NL80211_FEATURE_LOW_PRIORITY_SCAN)) ||
+ ((*flags & NL80211_SCAN_FLAG_LOW_SPAN) &&
+ !wiphy_ext_feature_isset(wiphy,
+ NL80211_EXT_FEATURE_LOW_SPAN_SCAN)) ||
+ ((*flags & NL80211_SCAN_FLAG_LOW_POWER) &&
+ !wiphy_ext_feature_isset(wiphy,
+ NL80211_EXT_FEATURE_LOW_POWER_SCAN)) ||
+ ((*flags & NL80211_SCAN_FLAG_HIGH_ACCURACY) &&
+ !wiphy_ext_feature_isset(wiphy,
+ NL80211_EXT_FEATURE_HIGH_ACCURACY_SCAN)))
return -EOPNOTSUPP;
if (*flags & NL80211_SCAN_FLAG_RANDOM_ADDR) {
return -EINVAL;
}
+ if (nla_get_flag(info->attrs[NL80211_ATTR_EXTERNAL_AUTH_SUPPORT])) {
+ if (!info->attrs[NL80211_ATTR_SOCKET_OWNER]) {
+ GENL_SET_ERR_MSG(info,
+ "external auth requires connection ownership");
+ return -EINVAL;
+ }
+ connect.flags |= CONNECT_REQ_EXTERNAL_AUTH_SUPPORT;
+ }
+
wdev_lock(dev->ieee80211_ptr);
err = cfg80211_connect(rdev, dev, &connect, connkeys,
return ret;
}
+static int nl80211_external_auth(struct sk_buff *skb, struct genl_info *info)
+{
+ struct cfg80211_registered_device *rdev = info->user_ptr[0];
+ struct net_device *dev = info->user_ptr[1];
+ struct cfg80211_external_auth_params params;
+
+ if (!rdev->ops->external_auth)
+ return -EOPNOTSUPP;
+
+ if (!info->attrs[NL80211_ATTR_SSID])
+ return -EINVAL;
+
+ if (!info->attrs[NL80211_ATTR_BSSID])
+ return -EINVAL;
+
+ if (!info->attrs[NL80211_ATTR_STATUS_CODE])
+ return -EINVAL;
+
+ memset(¶ms, 0, sizeof(params));
+
+ params.ssid.ssid_len = nla_len(info->attrs[NL80211_ATTR_SSID]);
+ if (params.ssid.ssid_len == 0 ||
+ params.ssid.ssid_len > IEEE80211_MAX_SSID_LEN)
+ return -EINVAL;
+ memcpy(params.ssid.ssid, nla_data(info->attrs[NL80211_ATTR_SSID]),
+ params.ssid.ssid_len);
+
+ memcpy(params.bssid, nla_data(info->attrs[NL80211_ATTR_BSSID]),
+ ETH_ALEN);
+
+ params.status = nla_get_u16(info->attrs[NL80211_ATTR_STATUS_CODE]);
+
+ return rdev_external_auth(rdev, dev, ¶ms);
+}
+
#define NL80211_FLAG_NEED_WIPHY 0x01
#define NL80211_FLAG_NEED_NETDEV 0x02
#define NL80211_FLAG_NEED_RTNL 0x04
.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
NL80211_FLAG_NEED_RTNL,
},
+ {
+ .cmd = NL80211_CMD_EXTERNAL_AUTH,
+ .doit = nl80211_external_auth,
+ .policy = nl80211_policy,
+ .flags = GENL_ADMIN_PERM,
+ .internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
+ NL80211_FLAG_NEED_RTNL,
+ },
};
return;
nla_put_failure:
- genlmsg_cancel(msg, hdr);
nlmsg_free(msg);
}
return;
nla_put_failure:
- genlmsg_cancel(msg, hdr);
nlmsg_free(msg);
}
return;
nla_put_failure:
- genlmsg_cancel(msg, hdr);
nlmsg_free(msg);
}
return;
nla_put_failure:
- genlmsg_cancel(msg, hdr);
nlmsg_free(msg);
}
return;
nla_put_failure:
- genlmsg_cancel(msg, hdr);
nlmsg_free(msg);
}
return;
nla_put_failure:
- genlmsg_cancel(msg, hdr);
nlmsg_free(msg);
}
return;
nla_put_failure:
- genlmsg_cancel(msg, hdr);
nlmsg_free(msg);
}
return;
nla_put_failure:
- genlmsg_cancel(msg, hdr);
nlmsg_free(msg);
}
return;
nla_put_failure:
- genlmsg_cancel(msg, hdr);
nlmsg_free(msg);
}
EXPORT_SYMBOL(cfg80211_notify_new_peer_candidate);
return;
nla_put_failure:
- genlmsg_cancel(msg, hdr);
nlmsg_free(msg);
}
return;
nla_put_failure:
- genlmsg_cancel(msg, hdr);
nlmsg_free(msg);
}
return;
nla_put_failure:
- genlmsg_cancel(msg, hdr);
nlmsg_free(msg);
}
return;
nla_put_failure:
- genlmsg_cancel(msg, hdr);
nlmsg_free(msg);
}
EXPORT_SYMBOL(cfg80211_conn_failed);
return true;
nla_put_failure:
- genlmsg_cancel(msg, hdr);
nlmsg_free(msg);
return true;
}
return genlmsg_unicast(wiphy_net(&rdev->wiphy), msg, nlportid);
nla_put_failure:
- genlmsg_cancel(msg, hdr);
nlmsg_free(msg);
return -ENOBUFS;
}
return;
nla_put_failure:
- genlmsg_cancel(msg, hdr);
nlmsg_free(msg);
}
EXPORT_SYMBOL(cfg80211_mgmt_tx_status);
return;
nla_put_failure:
- genlmsg_cancel(msg, hdr);
nlmsg_free(msg);
}
return;
nla_put_failure:
- genlmsg_cancel(msg, hdr);
nlmsg_free(msg);
}
return;
nla_put_failure:
- genlmsg_cancel(msg, hdr);
nlmsg_free(msg);
}
return;
nla_put_failure:
- genlmsg_cancel(msg, hdr);
nlmsg_free(msg);
}
+void cfg80211_sta_opmode_change_notify(struct net_device *dev, const u8 *mac,
+ struct sta_opmode_info *sta_opmode,
+ gfp_t gfp)
+{
+ struct sk_buff *msg;
+ struct wireless_dev *wdev = dev->ieee80211_ptr;
+ struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
+ void *hdr;
+
+ if (WARN_ON(!mac))
+ return;
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp);
+ if (!msg)
+ return;
+
+ hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_STA_OPMODE_CHANGED);
+ if (!hdr) {
+ nlmsg_free(msg);
+ return;
+ }
+
+ if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx))
+ goto nla_put_failure;
+
+ if (nla_put_u32(msg, NL80211_ATTR_IFINDEX, dev->ifindex))
+ goto nla_put_failure;
+
+ if (nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, mac))
+ goto nla_put_failure;
+
+ if ((sta_opmode->changed & STA_OPMODE_SMPS_MODE_CHANGED) &&
+ nla_put_u8(msg, NL80211_ATTR_SMPS_MODE, sta_opmode->smps_mode))
+ goto nla_put_failure;
+
+ if ((sta_opmode->changed & STA_OPMODE_MAX_BW_CHANGED) &&
+ nla_put_u8(msg, NL80211_ATTR_CHANNEL_WIDTH, sta_opmode->bw))
+ goto nla_put_failure;
+
+ if ((sta_opmode->changed & STA_OPMODE_N_SS_CHANGED) &&
+ nla_put_u8(msg, NL80211_ATTR_NSS, sta_opmode->rx_nss))
+ goto nla_put_failure;
+
+ genlmsg_end(msg, hdr);
+
+ genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0,
+ NL80211_MCGRP_MLME, gfp);
+
+ return;
+
+nla_put_failure:
+ nlmsg_free(msg);
+}
+EXPORT_SYMBOL(cfg80211_sta_opmode_change_notify);
+
void cfg80211_probe_status(struct net_device *dev, const u8 *addr,
- u64 cookie, bool acked, gfp_t gfp)
+ u64 cookie, bool acked, s32 ack_signal,
+ bool is_valid_ack_signal, gfp_t gfp)
{
struct wireless_dev *wdev = dev->ieee80211_ptr;
struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, addr) ||
nla_put_u64_64bit(msg, NL80211_ATTR_COOKIE, cookie,
NL80211_ATTR_PAD) ||
- (acked && nla_put_flag(msg, NL80211_ATTR_ACK)))
+ (acked && nla_put_flag(msg, NL80211_ATTR_ACK)) ||
+ (is_valid_ack_signal && nla_put_s32(msg, NL80211_ATTR_ACK_SIGNAL,
+ ack_signal)))
goto nla_put_failure;
genlmsg_end(msg, hdr);
return;
nla_put_failure:
- genlmsg_cancel(msg, hdr);
nlmsg_free(msg);
}
EXPORT_SYMBOL(cfg80211_probe_status);
nla_put_failure:
spin_unlock_bh(&rdev->beacon_registrations_lock);
- if (hdr)
- genlmsg_cancel(msg, hdr);
nlmsg_free(msg);
}
EXPORT_SYMBOL(cfg80211_report_obss_beacon);
return;
nla_put_failure:
- genlmsg_cancel(msg, hdr);
nlmsg_free(msg);
}
EXPORT_SYMBOL(cfg80211_tdls_oper_request);
return;
nla_put_failure:
- if (hdr)
- genlmsg_cancel(msg, hdr);
nlmsg_free(msg);
}
EXPORT_SYMBOL(cfg80211_crit_proto_stopped);
nlmsg_free(msg);
}
+int cfg80211_external_auth_request(struct net_device *dev,
+ struct cfg80211_external_auth_params *params,
+ gfp_t gfp)
+{
+ struct wireless_dev *wdev = dev->ieee80211_ptr;
+ struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
+ struct sk_buff *msg;
+ void *hdr;
+
+ if (!wdev->conn_owner_nlportid)
+ return -EINVAL;
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp);
+ if (!msg)
+ return -ENOMEM;
+
+ hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_EXTERNAL_AUTH);
+ if (!hdr)
+ goto nla_put_failure;
+
+ if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) ||
+ nla_put_u32(msg, NL80211_ATTR_IFINDEX, dev->ifindex) ||
+ nla_put_u32(msg, NL80211_ATTR_AKM_SUITES, params->key_mgmt_suite) ||
+ nla_put_u32(msg, NL80211_ATTR_EXTERNAL_AUTH_ACTION,
+ params->action) ||
+ nla_put(msg, NL80211_ATTR_BSSID, ETH_ALEN, params->bssid) ||
+ nla_put(msg, NL80211_ATTR_SSID, params->ssid.ssid_len,
+ params->ssid.ssid))
+ goto nla_put_failure;
+
+ genlmsg_end(msg, hdr);
+ genlmsg_unicast(wiphy_net(&rdev->wiphy), msg,
+ wdev->conn_owner_nlportid);
+ return 0;
+
+ nla_put_failure:
+ nlmsg_free(msg);
+ return -ENOBUFS;
+}
+EXPORT_SYMBOL(cfg80211_external_auth_request);
+
/* initialisation/exit functions */
int __init nl80211_init(void)
trace_rdev_return_int(&rdev->wiphy, ret);
return ret;
}
+
+static inline int
+rdev_external_auth(struct cfg80211_registered_device *rdev,
+ struct net_device *dev,
+ struct cfg80211_external_auth_params *params)
+{
+ int ret = -EOPNOTSUPP;
+
+ trace_rdev_external_auth(&rdev->wiphy, dev, params);
+ if (rdev->ops->external_auth)
+ ret = rdev->ops->external_auth(&rdev->wiphy, dev, params);
+ trace_rdev_return_int(&rdev->wiphy, ret);
+ return ret;
+}
+
#endif /* __CFG80211_RDEV_OPS */
wdev->current_bss = NULL;
wdev->ssid_len = 0;
wdev->conn_owner_nlportid = 0;
+ kzfree(wdev->connect_keys);
+ wdev->connect_keys = NULL;
nl80211_send_disconnected(rdev, dev, reason, ie, ie_len, from_ap);
WIPHY_PR_ARG, NETDEV_PR_ARG, MAC_PR_ARG(aa))
);
+TRACE_EVENT(rdev_external_auth,
+ TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
+ struct cfg80211_external_auth_params *params),
+ TP_ARGS(wiphy, netdev, params),
+ TP_STRUCT__entry(WIPHY_ENTRY
+ NETDEV_ENTRY
+ MAC_ENTRY(bssid)
+ __array(u8, ssid, IEEE80211_MAX_SSID_LEN + 1)
+ __field(u16, status)
+ ),
+ TP_fast_assign(WIPHY_ASSIGN;
+ NETDEV_ASSIGN;
+ MAC_ASSIGN(bssid, params->bssid);
+ memset(__entry->ssid, 0, IEEE80211_MAX_SSID_LEN + 1);
+ memcpy(__entry->ssid, params->ssid.ssid,
+ params->ssid.ssid_len);
+ __entry->status = params->status;
+ ),
+ TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", bssid: " MAC_PR_FMT
+ ", ssid: %s, status: %u", WIPHY_PR_ARG, NETDEV_PR_ARG,
+ __entry->bssid, __entry->ssid, __entry->status)
+);
+
/*************************************************************
* cfg80211 exported functions traces *
*************************************************************/
EXPORT_SYMBOL(ieee80211_get_mesh_hdrlen);
int ieee80211_data_to_8023_exthdr(struct sk_buff *skb, struct ethhdr *ehdr,
- const u8 *addr, enum nl80211_iftype iftype)
+ const u8 *addr, enum nl80211_iftype iftype,
+ u8 data_offset)
{
struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
struct {
if (unlikely(!ieee80211_is_data_present(hdr->frame_control)))
return -1;
- hdrlen = ieee80211_hdrlen(hdr->frame_control);
+ hdrlen = ieee80211_hdrlen(hdr->frame_control) + data_offset;
if (skb->len < hdrlen + 8)
return -1;
static struct pernet_operations wext_pernet_ops = {
.init = wext_pernet_init,
.exit = wext_pernet_exit,
+ .async = true,
};
static int __init wireless_nlevent_init(void)
}
static int x25_getname(struct socket *sock, struct sockaddr *uaddr,
- int *uaddr_len, int peer)
+ int peer)
{
struct sockaddr_x25 *sx25 = (struct sockaddr_x25 *)uaddr;
struct sock *sk = sock->sk;
sx25->sx25_addr = x25->source_addr;
sx25->sx25_family = AF_X25;
- *uaddr_len = sizeof(*sx25);
+ rc = sizeof(*sx25);
out:
return rc;
}
}
- pr_debug("invalid PLP frame %02X %02X %02X\n",
- frame[0], frame[1], frame[2]);
+ pr_debug("invalid PLP frame %3ph\n", frame);
return X25_ILLEGAL;
}
static struct pernet_operations __net_initdata xfrm_net_ops = {
.init = xfrm_net_init,
.exit = xfrm_net_exit,
+ .async = true,
};
void __init xfrm_init(void)
hostprogs-y += xdp_monitor
hostprogs-y += xdp_rxq_info
hostprogs-y += syscall_tp
+hostprogs-y += cpustat
# Libbpf dependencies
LIBBPF := ../../tools/lib/bpf/bpf.o ../../tools/lib/bpf/nlattr.o
xdp_monitor-objs := bpf_load.o $(LIBBPF) xdp_monitor_user.o
xdp_rxq_info-objs := bpf_load.o $(LIBBPF) xdp_rxq_info_user.o
syscall_tp-objs := bpf_load.o $(LIBBPF) syscall_tp_user.o
+cpustat-objs := bpf_load.o $(LIBBPF) cpustat_user.o
# Tell kbuild to always build the programs
always := $(hostprogs-y)
always += xdp_rxq_info_kern.o
always += xdp2skb_meta_kern.o
always += syscall_tp_kern.o
+always += cpustat_kern.o
HOSTCFLAGS += -I$(objtree)/usr/include
HOSTCFLAGS += -I$(srctree)/tools/lib/
HOSTLOADLIBES_xdp_monitor += -lelf
HOSTLOADLIBES_xdp_rxq_info += -lelf
HOSTLOADLIBES_syscall_tp += -lelf
+HOSTLOADLIBES_cpustat += -lelf
# Allows pointing LLC/CLANG to a LLVM backend with bpf support, redefine on cmdline:
# make samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/version.h>
+#include <linux/ptrace.h>
+#include <uapi/linux/bpf.h>
+#include "bpf_helpers.h"
+
+/*
+ * The CPU number, cstate number and pstate number are based
+ * on 96boards Hikey with octa CA53 CPUs.
+ *
+ * Every CPU have three idle states for cstate:
+ * WFI, CPU_OFF, CLUSTER_OFF
+ *
+ * Every CPU have 5 operating points:
+ * 208MHz, 432MHz, 729MHz, 960MHz, 1200MHz
+ *
+ * This code is based on these assumption and other platforms
+ * need to adjust these definitions.
+ */
+#define MAX_CPU 8
+#define MAX_PSTATE_ENTRIES 5
+#define MAX_CSTATE_ENTRIES 3
+
+static int cpu_opps[] = { 208000, 432000, 729000, 960000, 1200000 };
+
+/*
+ * my_map structure is used to record cstate and pstate index and
+ * timestamp (Idx, Ts), when new event incoming we need to update
+ * combination for new state index and timestamp (Idx`, Ts`).
+ *
+ * Based on (Idx, Ts) and (Idx`, Ts`) we can calculate the time
+ * interval for the previous state: Duration(Idx) = Ts` - Ts.
+ *
+ * Every CPU has one below array for recording state index and
+ * timestamp, and record for cstate and pstate saperately:
+ *
+ * +--------------------------+
+ * | cstate timestamp |
+ * +--------------------------+
+ * | cstate index |
+ * +--------------------------+
+ * | pstate timestamp |
+ * +--------------------------+
+ * | pstate index |
+ * +--------------------------+
+ */
+#define MAP_OFF_CSTATE_TIME 0
+#define MAP_OFF_CSTATE_IDX 1
+#define MAP_OFF_PSTATE_TIME 2
+#define MAP_OFF_PSTATE_IDX 3
+#define MAP_OFF_NUM 4
+
+struct bpf_map_def SEC("maps") my_map = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(u32),
+ .value_size = sizeof(u64),
+ .max_entries = MAX_CPU * MAP_OFF_NUM,
+};
+
+/* cstate_duration records duration time for every idle state per CPU */
+struct bpf_map_def SEC("maps") cstate_duration = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(u32),
+ .value_size = sizeof(u64),
+ .max_entries = MAX_CPU * MAX_CSTATE_ENTRIES,
+};
+
+/* pstate_duration records duration time for every operating point per CPU */
+struct bpf_map_def SEC("maps") pstate_duration = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(u32),
+ .value_size = sizeof(u64),
+ .max_entries = MAX_CPU * MAX_PSTATE_ENTRIES,
+};
+
+/*
+ * The trace events for cpu_idle and cpu_frequency are taken from:
+ * /sys/kernel/debug/tracing/events/power/cpu_idle/format
+ * /sys/kernel/debug/tracing/events/power/cpu_frequency/format
+ *
+ * These two events have same format, so define one common structure.
+ */
+struct cpu_args {
+ u64 pad;
+ u32 state;
+ u32 cpu_id;
+};
+
+/* calculate pstate index, returns MAX_PSTATE_ENTRIES for failure */
+static u32 find_cpu_pstate_idx(u32 frequency)
+{
+ u32 i;
+
+ for (i = 0; i < sizeof(cpu_opps) / sizeof(u32); i++) {
+ if (frequency == cpu_opps[i])
+ return i;
+ }
+
+ return i;
+}
+
+SEC("tracepoint/power/cpu_idle")
+int bpf_prog1(struct cpu_args *ctx)
+{
+ u64 *cts, *pts, *cstate, *pstate, prev_state, cur_ts, delta;
+ u32 key, cpu, pstate_idx;
+ u64 *val;
+
+ if (ctx->cpu_id > MAX_CPU)
+ return 0;
+
+ cpu = ctx->cpu_id;
+
+ key = cpu * MAP_OFF_NUM + MAP_OFF_CSTATE_TIME;
+ cts = bpf_map_lookup_elem(&my_map, &key);
+ if (!cts)
+ return 0;
+
+ key = cpu * MAP_OFF_NUM + MAP_OFF_CSTATE_IDX;
+ cstate = bpf_map_lookup_elem(&my_map, &key);
+ if (!cstate)
+ return 0;
+
+ key = cpu * MAP_OFF_NUM + MAP_OFF_PSTATE_TIME;
+ pts = bpf_map_lookup_elem(&my_map, &key);
+ if (!pts)
+ return 0;
+
+ key = cpu * MAP_OFF_NUM + MAP_OFF_PSTATE_IDX;
+ pstate = bpf_map_lookup_elem(&my_map, &key);
+ if (!pstate)
+ return 0;
+
+ prev_state = *cstate;
+ *cstate = ctx->state;
+
+ if (!*cts) {
+ *cts = bpf_ktime_get_ns();
+ return 0;
+ }
+
+ cur_ts = bpf_ktime_get_ns();
+ delta = cur_ts - *cts;
+ *cts = cur_ts;
+
+ /*
+ * When state doesn't equal to (u32)-1, the cpu will enter
+ * one idle state; for this case we need to record interval
+ * for the pstate.
+ *
+ * OPP2
+ * +---------------------+
+ * OPP1 | |
+ * ---------+ |
+ * | Idle state
+ * +---------------
+ *
+ * |<- pstate duration ->|
+ * ^ ^
+ * pts cur_ts
+ */
+ if (ctx->state != (u32)-1) {
+
+ /* record pstate after have first cpu_frequency event */
+ if (!*pts)
+ return 0;
+
+ delta = cur_ts - *pts;
+
+ pstate_idx = find_cpu_pstate_idx(*pstate);
+ if (pstate_idx >= MAX_PSTATE_ENTRIES)
+ return 0;
+
+ key = cpu * MAX_PSTATE_ENTRIES + pstate_idx;
+ val = bpf_map_lookup_elem(&pstate_duration, &key);
+ if (val)
+ __sync_fetch_and_add((long *)val, delta);
+
+ /*
+ * When state equal to (u32)-1, the cpu just exits from one
+ * specific idle state; for this case we need to record
+ * interval for the pstate.
+ *
+ * OPP2
+ * -----------+
+ * | OPP1
+ * | +-----------
+ * | Idle state |
+ * +---------------------+
+ *
+ * |<- cstate duration ->|
+ * ^ ^
+ * cts cur_ts
+ */
+ } else {
+
+ key = cpu * MAX_CSTATE_ENTRIES + prev_state;
+ val = bpf_map_lookup_elem(&cstate_duration, &key);
+ if (val)
+ __sync_fetch_and_add((long *)val, delta);
+ }
+
+ /* Update timestamp for pstate as new start time */
+ if (*pts)
+ *pts = cur_ts;
+
+ return 0;
+}
+
+SEC("tracepoint/power/cpu_frequency")
+int bpf_prog2(struct cpu_args *ctx)
+{
+ u64 *pts, *cstate, *pstate, prev_state, cur_ts, delta;
+ u32 key, cpu, pstate_idx;
+ u64 *val;
+
+ cpu = ctx->cpu_id;
+
+ key = cpu * MAP_OFF_NUM + MAP_OFF_PSTATE_TIME;
+ pts = bpf_map_lookup_elem(&my_map, &key);
+ if (!pts)
+ return 0;
+
+ key = cpu * MAP_OFF_NUM + MAP_OFF_PSTATE_IDX;
+ pstate = bpf_map_lookup_elem(&my_map, &key);
+ if (!pstate)
+ return 0;
+
+ key = cpu * MAP_OFF_NUM + MAP_OFF_CSTATE_IDX;
+ cstate = bpf_map_lookup_elem(&my_map, &key);
+ if (!cstate)
+ return 0;
+
+ prev_state = *pstate;
+ *pstate = ctx->state;
+
+ if (!*pts) {
+ *pts = bpf_ktime_get_ns();
+ return 0;
+ }
+
+ cur_ts = bpf_ktime_get_ns();
+ delta = cur_ts - *pts;
+ *pts = cur_ts;
+
+ /* When CPU is in idle, bail out to skip pstate statistics */
+ if (*cstate != (u32)(-1))
+ return 0;
+
+ /*
+ * The cpu changes to another different OPP (in below diagram
+ * change frequency from OPP3 to OPP1), need recording interval
+ * for previous frequency OPP3 and update timestamp as start
+ * time for new frequency OPP1.
+ *
+ * OPP3
+ * +---------------------+
+ * OPP2 | |
+ * ---------+ |
+ * | OPP1
+ * +---------------
+ *
+ * |<- pstate duration ->|
+ * ^ ^
+ * pts cur_ts
+ */
+ pstate_idx = find_cpu_pstate_idx(*pstate);
+ if (pstate_idx >= MAX_PSTATE_ENTRIES)
+ return 0;
+
+ key = cpu * MAX_PSTATE_ENTRIES + pstate_idx;
+ val = bpf_map_lookup_elem(&pstate_duration, &key);
+ if (val)
+ __sync_fetch_and_add((long *)val, delta);
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
+u32 _version SEC("version") = LINUX_VERSION_CODE;
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <sched.h>
+#include <string.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <linux/bpf.h>
+#include <locale.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+#include <sys/wait.h>
+
+#include "libbpf.h"
+#include "bpf_load.h"
+
+#define MAX_CPU 8
+#define MAX_PSTATE_ENTRIES 5
+#define MAX_CSTATE_ENTRIES 3
+#define MAX_STARS 40
+
+#define CPUFREQ_MAX_SYSFS_PATH "/sys/devices/system/cpu/cpu0/cpufreq/scaling_max_freq"
+#define CPUFREQ_LOWEST_FREQ "208000"
+#define CPUFREQ_HIGHEST_FREQ "12000000"
+
+struct cpu_stat_data {
+ unsigned long cstate[MAX_CSTATE_ENTRIES];
+ unsigned long pstate[MAX_PSTATE_ENTRIES];
+};
+
+static struct cpu_stat_data stat_data[MAX_CPU];
+
+static void cpu_stat_print(void)
+{
+ int i, j;
+ char state_str[sizeof("cstate-9")];
+ struct cpu_stat_data *data;
+
+ /* Clear screen */
+ printf("\033[2J");
+
+ /* Header */
+ printf("\nCPU states statistics:\n");
+ printf("%-10s ", "state(ms)");
+
+ for (i = 0; i < MAX_CSTATE_ENTRIES; i++) {
+ sprintf(state_str, "cstate-%d", i);
+ printf("%-11s ", state_str);
+ }
+
+ for (i = 0; i < MAX_PSTATE_ENTRIES; i++) {
+ sprintf(state_str, "pstate-%d", i);
+ printf("%-11s ", state_str);
+ }
+
+ printf("\n");
+
+ for (j = 0; j < MAX_CPU; j++) {
+ data = &stat_data[j];
+
+ printf("CPU-%-6d ", j);
+ for (i = 0; i < MAX_CSTATE_ENTRIES; i++)
+ printf("%-11ld ", data->cstate[i] / 1000000);
+
+ for (i = 0; i < MAX_PSTATE_ENTRIES; i++)
+ printf("%-11ld ", data->pstate[i] / 1000000);
+
+ printf("\n");
+ }
+}
+
+static void cpu_stat_update(int cstate_fd, int pstate_fd)
+{
+ unsigned long key, value;
+ int c, i;
+
+ for (c = 0; c < MAX_CPU; c++) {
+ for (i = 0; i < MAX_CSTATE_ENTRIES; i++) {
+ key = c * MAX_CSTATE_ENTRIES + i;
+ bpf_map_lookup_elem(cstate_fd, &key, &value);
+ stat_data[c].cstate[i] = value;
+ }
+
+ for (i = 0; i < MAX_PSTATE_ENTRIES; i++) {
+ key = c * MAX_PSTATE_ENTRIES + i;
+ bpf_map_lookup_elem(pstate_fd, &key, &value);
+ stat_data[c].pstate[i] = value;
+ }
+ }
+}
+
+/*
+ * This function is copied from 'idlestat' tool function
+ * idlestat_wake_all() in idlestate.c.
+ *
+ * It sets the self running task affinity to cpus one by one so can wake up
+ * the specific CPU to handle scheduling; this results in all cpus can be
+ * waken up once and produce ftrace event 'trace_cpu_idle'.
+ */
+static int cpu_stat_inject_cpu_idle_event(void)
+{
+ int rcpu, i, ret;
+ cpu_set_t cpumask;
+ cpu_set_t original_cpumask;
+
+ ret = sysconf(_SC_NPROCESSORS_CONF);
+ if (ret < 0)
+ return -1;
+
+ rcpu = sched_getcpu();
+ if (rcpu < 0)
+ return -1;
+
+ /* Keep track of the CPUs we will run on */
+ sched_getaffinity(0, sizeof(original_cpumask), &original_cpumask);
+
+ for (i = 0; i < ret; i++) {
+
+ /* Pointless to wake up ourself */
+ if (i == rcpu)
+ continue;
+
+ /* Pointless to wake CPUs we will not run on */
+ if (!CPU_ISSET(i, &original_cpumask))
+ continue;
+
+ CPU_ZERO(&cpumask);
+ CPU_SET(i, &cpumask);
+
+ sched_setaffinity(0, sizeof(cpumask), &cpumask);
+ }
+
+ /* Enable all the CPUs of the original mask */
+ sched_setaffinity(0, sizeof(original_cpumask), &original_cpumask);
+ return 0;
+}
+
+/*
+ * It's possible to have no any frequency change for long time and cannot
+ * get ftrace event 'trace_cpu_frequency' for long period, this introduces
+ * big deviation for pstate statistics.
+ *
+ * To solve this issue, below code forces to set 'scaling_max_freq' to 208MHz
+ * for triggering ftrace event 'trace_cpu_frequency' and then recovery back to
+ * the maximum frequency value 1.2GHz.
+ */
+static int cpu_stat_inject_cpu_frequency_event(void)
+{
+ int len, fd;
+
+ fd = open(CPUFREQ_MAX_SYSFS_PATH, O_WRONLY);
+ if (fd < 0) {
+ printf("failed to open scaling_max_freq, errno=%d\n", errno);
+ return fd;
+ }
+
+ len = write(fd, CPUFREQ_LOWEST_FREQ, strlen(CPUFREQ_LOWEST_FREQ));
+ if (len < 0) {
+ printf("failed to open scaling_max_freq, errno=%d\n", errno);
+ goto err;
+ }
+
+ len = write(fd, CPUFREQ_HIGHEST_FREQ, strlen(CPUFREQ_HIGHEST_FREQ));
+ if (len < 0) {
+ printf("failed to open scaling_max_freq, errno=%d\n", errno);
+ goto err;
+ }
+
+err:
+ close(fd);
+ return len;
+}
+
+static void int_exit(int sig)
+{
+ cpu_stat_inject_cpu_idle_event();
+ cpu_stat_inject_cpu_frequency_event();
+ cpu_stat_update(map_fd[1], map_fd[2]);
+ cpu_stat_print();
+ exit(0);
+}
+
+int main(int argc, char **argv)
+{
+ char filename[256];
+ int ret;
+
+ snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+
+ if (load_bpf_file(filename)) {
+ printf("%s", bpf_log_buf);
+ return 1;
+ }
+
+ ret = cpu_stat_inject_cpu_idle_event();
+ if (ret < 0)
+ return 1;
+
+ ret = cpu_stat_inject_cpu_frequency_event();
+ if (ret < 0)
+ return 1;
+
+ signal(SIGINT, int_exit);
+ signal(SIGTERM, int_exit);
+
+ while (1) {
+ cpu_stat_update(map_fd[1], map_fd[2]);
+ cpu_stat_print();
+ sleep(5);
+ }
+
+ return 0;
+}
key.tunnel_tos = 0;
key.tunnel_ttl = 64;
- ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), BPF_F_ZERO_CSUM_TX);
+ ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
+ BPF_F_ZERO_CSUM_TX | BPF_F_SEQ_NUMBER);
if (ret < 0) {
ERROR(ret);
return TC_ACT_SHOT;
key.tunnel_label = 0xabcde;
ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
- BPF_F_TUNINFO_IPV6 | BPF_F_ZERO_CSUM_TX);
+ BPF_F_TUNINFO_IPV6 | BPF_F_ZERO_CSUM_TX |
+ BPF_F_SEQ_NUMBER);
if (ret < 0) {
ERROR(ret);
return TC_ACT_SHOT;
[ -n "$msg" ] && echo "ERROR: $msg"
+ test_cgrp2_sock -d ${CGRP_MNT}/sockopts
ip li del cgrp2_sock
umount ${CGRP_MNT}
}
function cleanup {
+ if [ -d /tmp/cgroupv2/foo ]; then
+ test_cgrp2_sock -d /tmp/cgroupv2/foo
+ fi
ip link del veth0b
ip netns delete at_ns0
umount /tmp/cgroupv2
function add_gre_tunnel {
# in namespace
ip netns exec at_ns0 \
- ip link add dev $DEV_NS type $TYPE key 2 local 172.16.1.100 remote 172.16.1.200
+ ip link add dev $DEV_NS type $TYPE seq key 2 \
+ local 172.16.1.100 remote 172.16.1.200
ip netns exec at_ns0 ip link set dev $DEV_NS up
ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
# in namespace
ip netns exec at_ns0 \
- ip link add dev $DEV_NS type $TYPE flowlabel 0xbcdef key 2 \
+ ip link add dev $DEV_NS type $TYPE seq flowlabel 0xbcdef key 2 \
local ::11 remote ::22
ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
#include <string.h>
#include <unistd.h>
#include <libgen.h>
+#include <sys/resource.h>
#include "bpf_load.h"
#include "bpf_util.h"
int main(int argc, char **argv)
{
+ struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
const char *optstr = "SN";
char filename[256];
int ret, opt, key = 0;
return 1;
}
+ if (setrlimit(RLIMIT_MEMLOCK, &r)) {
+ perror("setrlimit(RLIMIT_MEMLOCK)");
+ return 1;
+ }
+
ifindex_in = strtoul(argv[optind], NULL, 0);
ifindex_out = strtoul(argv[optind + 1], NULL, 0);
printf("input: %d output: %d\n", ifindex_in, ifindex_out);
hostprogs-y := sockmap
# Libbpf dependencies
-LIBBPF := ../../tools/lib/bpf/bpf.o
+LIBBPF := ../../tools/lib/bpf/bpf.o ../../tools/lib/bpf/nlattr.o
HOSTCFLAGS += -I$(objtree)/usr/include
HOSTCFLAGS += -I$(srctree)/tools/lib/
else
fprintf(stderr, "unknown test\n");
out:
+ bpf_prog_detach2(prog_fd[2], cg_fd, BPF_CGROUP_SOCK_OPS);
close(s1);
close(s2);
close(p1);
#include <linux/cred.h>
#include <linux/key-type.h>
#include <linux/digsig.h>
+#include <linux/vmalloc.h>
#include <crypto/public_key.h>
#include <keys/system_keyring.h>
#include <keys/big_key-type.h>
#include <crypto/aead.h>
+struct big_key_buf {
+ unsigned int nr_pages;
+ void *virt;
+ struct scatterlist *sg;
+ struct page *pages[];
+};
+
/*
* Layout of key payload words.
*/
/*
* Encrypt/decrypt big_key data
*/
-static int big_key_crypt(enum big_key_op op, u8 *data, size_t datalen, u8 *key)
+static int big_key_crypt(enum big_key_op op, struct big_key_buf *buf, size_t datalen, u8 *key)
{
int ret;
- struct scatterlist sgio;
struct aead_request *aead_req;
/* We always use a zero nonce. The reason we can get away with this is
* because we're using a different randomly generated key for every
return -ENOMEM;
memset(zero_nonce, 0, sizeof(zero_nonce));
- sg_init_one(&sgio, data, datalen + (op == BIG_KEY_ENC ? ENC_AUTHTAG_SIZE : 0));
- aead_request_set_crypt(aead_req, &sgio, &sgio, datalen, zero_nonce);
+ aead_request_set_crypt(aead_req, buf->sg, buf->sg, datalen, zero_nonce);
aead_request_set_callback(aead_req, CRYPTO_TFM_REQ_MAY_SLEEP, NULL, NULL);
aead_request_set_ad(aead_req, 0);
return ret;
}
+/*
+ * Free up the buffer.
+ */
+static void big_key_free_buffer(struct big_key_buf *buf)
+{
+ unsigned int i;
+
+ if (buf->virt) {
+ memset(buf->virt, 0, buf->nr_pages * PAGE_SIZE);
+ vunmap(buf->virt);
+ }
+
+ for (i = 0; i < buf->nr_pages; i++)
+ if (buf->pages[i])
+ __free_page(buf->pages[i]);
+
+ kfree(buf);
+}
+
+/*
+ * Allocate a buffer consisting of a set of pages with a virtual mapping
+ * applied over them.
+ */
+static void *big_key_alloc_buffer(size_t len)
+{
+ struct big_key_buf *buf;
+ unsigned int npg = (len + PAGE_SIZE - 1) >> PAGE_SHIFT;
+ unsigned int i, l;
+
+ buf = kzalloc(sizeof(struct big_key_buf) +
+ sizeof(struct page) * npg +
+ sizeof(struct scatterlist) * npg,
+ GFP_KERNEL);
+ if (!buf)
+ return NULL;
+
+ buf->nr_pages = npg;
+ buf->sg = (void *)(buf->pages + npg);
+ sg_init_table(buf->sg, npg);
+
+ for (i = 0; i < buf->nr_pages; i++) {
+ buf->pages[i] = alloc_page(GFP_KERNEL);
+ if (!buf->pages[i])
+ goto nomem;
+
+ l = min_t(size_t, len, PAGE_SIZE);
+ sg_set_page(&buf->sg[i], buf->pages[i], l, 0);
+ len -= l;
+ }
+
+ buf->virt = vmap(buf->pages, buf->nr_pages, VM_MAP, PAGE_KERNEL);
+ if (!buf->virt)
+ goto nomem;
+
+ return buf;
+
+nomem:
+ big_key_free_buffer(buf);
+ return NULL;
+}
+
/*
* Preparse a big key
*/
int big_key_preparse(struct key_preparsed_payload *prep)
{
+ struct big_key_buf *buf;
struct path *path = (struct path *)&prep->payload.data[big_key_path];
struct file *file;
u8 *enckey;
- u8 *data = NULL;
ssize_t written;
- size_t datalen = prep->datalen;
+ size_t datalen = prep->datalen, enclen = datalen + ENC_AUTHTAG_SIZE;
int ret;
- ret = -EINVAL;
if (datalen <= 0 || datalen > 1024 * 1024 || !prep->data)
- goto error;
+ return -EINVAL;
/* Set an arbitrary quota */
prep->quotalen = 16;
*
* File content is stored encrypted with randomly generated key.
*/
- size_t enclen = datalen + ENC_AUTHTAG_SIZE;
loff_t pos = 0;
- data = kmalloc(enclen, GFP_KERNEL);
- if (!data)
+ buf = big_key_alloc_buffer(enclen);
+ if (!buf)
return -ENOMEM;
- memcpy(data, prep->data, datalen);
+ memcpy(buf->virt, prep->data, datalen);
/* generate random key */
enckey = kmalloc(ENC_KEY_SIZE, GFP_KERNEL);
goto err_enckey;
/* encrypt aligned data */
- ret = big_key_crypt(BIG_KEY_ENC, data, datalen, enckey);
+ ret = big_key_crypt(BIG_KEY_ENC, buf, datalen, enckey);
if (ret)
goto err_enckey;
goto err_enckey;
}
- written = kernel_write(file, data, enclen, &pos);
+ written = kernel_write(file, buf->virt, enclen, &pos);
if (written != enclen) {
ret = written;
if (written >= 0)
*path = file->f_path;
path_get(path);
fput(file);
- kzfree(data);
+ big_key_free_buffer(buf);
} else {
/* Just store the data in a buffer */
void *data = kmalloc(datalen, GFP_KERNEL);
err_enckey:
kzfree(enckey);
error:
- kzfree(data);
+ big_key_free_buffer(buf);
return ret;
}
return datalen;
if (datalen > BIG_KEY_FILE_THRESHOLD) {
+ struct big_key_buf *buf;
struct path *path = (struct path *)&key->payload.data[big_key_path];
struct file *file;
- u8 *data;
u8 *enckey = (u8 *)key->payload.data[big_key_data];
size_t enclen = datalen + ENC_AUTHTAG_SIZE;
loff_t pos = 0;
- data = kmalloc(enclen, GFP_KERNEL);
- if (!data)
+ buf = big_key_alloc_buffer(enclen);
+ if (!buf)
return -ENOMEM;
file = dentry_open(path, O_RDONLY, current_cred());
}
/* read file to kernel and decrypt */
- ret = kernel_read(file, data, enclen, &pos);
+ ret = kernel_read(file, buf->virt, enclen, &pos);
if (ret >= 0 && ret != enclen) {
ret = -EIO;
goto err_fput;
}
- ret = big_key_crypt(BIG_KEY_DEC, data, enclen, enckey);
+ ret = big_key_crypt(BIG_KEY_DEC, buf, enclen, enckey);
if (ret)
goto err_fput;
ret = datalen;
/* copy decrypted data to user */
- if (copy_to_user(buffer, data, datalen) != 0)
+ if (copy_to_user(buffer, buf->virt, datalen) != 0)
ret = -EFAULT;
err_fput:
fput(file);
error:
- kzfree(data);
+ big_key_free_buffer(buf);
} else {
ret = datalen;
if (copy_to_user(buffer, key->payload.data[big_key_data],
static struct pernet_operations selinux_net_ops = {
.init = selinux_nf_register,
.exit = selinux_nf_unregister,
+ .async = true,
};
static int __init selinux_nf_ip_init(void)
static struct pernet_operations smack_net_ops = {
.init = smack_nf_register,
.exit = smack_nf_unregister,
+ .async = true,
};
static int __init smack_nf_ip_init(void)
return 0;
{
const int error = sock->ops->getname(sock, (struct sockaddr *)
- &addr, &addr_len, 0);
+ &addr, 0);
- if (error)
+ if (error < 0)
return error;
+ addr_len = error;
}
address.protocol = type;
address.operation = TOMOYO_NETWORK_LISTEN;
config AC97_BUS_NEW
tristate
- select AC97
help
This is the new AC97 bus type, successor of AC97_BUS. The ported
drivers which benefit from the AC97 automatic probing should "select"
{
struct snd_seq_client *client = file->private_data;
int written = 0, len;
- int err = -EINVAL;
+ int err;
struct snd_seq_event event;
if (!(snd_seq_file_flags(file) & SNDRV_SEQ_LFLG_OUTPUT))
/* allocate the pool now if the pool is not allocated yet */
if (client->pool->size > 0 && !snd_seq_write_pool_allocated(client)) {
- if (snd_seq_pool_init(client->pool) < 0)
+ mutex_lock(&client->ioctl_mutex);
+ err = snd_seq_pool_init(client->pool);
+ mutex_unlock(&client->ioctl_mutex);
+ if (err < 0)
return -ENOMEM;
}
/* only process whole events */
+ err = -EINVAL;
while (count >= sizeof(struct snd_seq_event)) {
/* Read in the event header from the user */
len = sizeof(event);
spec->parse_flags = HDA_PINCFG_NO_HP_FIXUP;
}
+static void alc269_fixup_pincfg_U7x7_headset_mic(struct hda_codec *codec,
+ const struct hda_fixup *fix,
+ int action)
+{
+ unsigned int cfg_headphone = snd_hda_codec_get_pincfg(codec, 0x21);
+ unsigned int cfg_headset_mic = snd_hda_codec_get_pincfg(codec, 0x19);
+
+ if (cfg_headphone && cfg_headset_mic == 0x411111f0)
+ snd_hda_codec_set_pincfg(codec, 0x19,
+ (cfg_headphone & ~AC_DEFCFG_DEVICE) |
+ (AC_JACK_MIC_IN << AC_DEFCFG_DEVICE_SHIFT));
+}
+
static void alc269_fixup_hweq(struct hda_codec *codec,
const struct hda_fixup *fix, int action)
{
}
}
+static void alc_fixup_tpt470_dock(struct hda_codec *codec,
+ const struct hda_fixup *fix, int action)
+{
+ static const struct hda_pintbl pincfgs[] = {
+ { 0x17, 0x21211010 }, /* dock headphone */
+ { 0x19, 0x21a11010 }, /* dock mic */
+ { }
+ };
+ struct alc_spec *spec = codec->spec;
+
+ if (action == HDA_FIXUP_ACT_PRE_PROBE) {
+ spec->parse_flags = HDA_PINCFG_NO_HP_FIXUP;
+ /* Enable DOCK device */
+ snd_hda_codec_write(codec, 0x17, 0,
+ AC_VERB_SET_CONFIG_DEFAULT_BYTES_3, 0);
+ /* Enable DOCK device */
+ snd_hda_codec_write(codec, 0x19, 0,
+ AC_VERB_SET_CONFIG_DEFAULT_BYTES_3, 0);
+ snd_hda_apply_pincfgs(codec, pincfgs);
+ }
+}
+
static void alc_shutup_dell_xps13(struct hda_codec *codec)
{
struct alc_spec *spec = codec->spec;
ALC269_FIXUP_LIFEBOOK_EXTMIC,
ALC269_FIXUP_LIFEBOOK_HP_PIN,
ALC269_FIXUP_LIFEBOOK_NO_HP_TO_LINEOUT,
+ ALC255_FIXUP_LIFEBOOK_U7x7_HEADSET_MIC,
ALC269_FIXUP_AMIC,
ALC269_FIXUP_DMIC,
ALC269VB_FIXUP_AMIC,
ALC700_FIXUP_INTEL_REFERENCE,
ALC274_FIXUP_DELL_BIND_DACS,
ALC274_FIXUP_DELL_AIO_LINEOUT_VERB,
+ ALC298_FIXUP_TPT470_DOCK,
};
static const struct hda_fixup alc269_fixups[] = {
.type = HDA_FIXUP_FUNC,
.v.func = alc269_fixup_pincfg_no_hp_to_lineout,
},
+ [ALC255_FIXUP_LIFEBOOK_U7x7_HEADSET_MIC] = {
+ .type = HDA_FIXUP_FUNC,
+ .v.func = alc269_fixup_pincfg_U7x7_headset_mic,
+ },
[ALC269_FIXUP_AMIC] = {
.type = HDA_FIXUP_PINS,
.v.pins = (const struct hda_pintbl[]) {
.chained = true,
.chain_id = ALC274_FIXUP_DELL_BIND_DACS
},
+ [ALC298_FIXUP_TPT470_DOCK] = {
+ .type = HDA_FIXUP_FUNC,
+ .v.func = alc_fixup_tpt470_dock,
+ .chained = true,
+ .chain_id = ALC293_FIXUP_LENOVO_SPK_NOISE
+ },
};
static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x1028, 0x075d, "Dell AIO", ALC298_FIXUP_SPK_VOLUME),
SND_PCI_QUIRK(0x1028, 0x0798, "Dell Inspiron 17 7000 Gaming", ALC256_FIXUP_DELL_INSPIRON_7559_SUBWOOFER),
SND_PCI_QUIRK(0x1028, 0x082a, "Dell XPS 13 9360", ALC256_FIXUP_DELL_XPS_13_HEADPHONE_NOISE),
+ SND_PCI_QUIRK(0x1028, 0x084b, "Dell", ALC274_FIXUP_DELL_AIO_LINEOUT_VERB),
+ SND_PCI_QUIRK(0x1028, 0x084e, "Dell", ALC274_FIXUP_DELL_AIO_LINEOUT_VERB),
SND_PCI_QUIRK(0x1028, 0x164a, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x1028, 0x164b, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x103c, 0x1586, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC2),
SND_PCI_QUIRK(0x10cf, 0x159f, "Lifebook E780", ALC269_FIXUP_LIFEBOOK_NO_HP_TO_LINEOUT),
SND_PCI_QUIRK(0x10cf, 0x15dc, "Lifebook T731", ALC269_FIXUP_LIFEBOOK_HP_PIN),
SND_PCI_QUIRK(0x10cf, 0x1757, "Lifebook E752", ALC269_FIXUP_LIFEBOOK_HP_PIN),
+ SND_PCI_QUIRK(0x10cf, 0x1629, "Lifebook U7x7", ALC255_FIXUP_LIFEBOOK_U7x7_HEADSET_MIC),
SND_PCI_QUIRK(0x10cf, 0x1845, "Lifebook U904", ALC269_FIXUP_LIFEBOOK_EXTMIC),
SND_PCI_QUIRK(0x10ec, 0x10f2, "Intel Reference board", ALC700_FIXUP_INTEL_REFERENCE),
SND_PCI_QUIRK(0x144d, 0xc109, "Samsung Ativ book 9 (NP900X3G)", ALC269_FIXUP_INV_DMIC),
SND_PCI_QUIRK(0x17aa, 0x2218, "Thinkpad X1 Carbon 2nd", ALC292_FIXUP_TPT440_DOCK),
SND_PCI_QUIRK(0x17aa, 0x2223, "ThinkPad T550", ALC292_FIXUP_TPT440_DOCK),
SND_PCI_QUIRK(0x17aa, 0x2226, "ThinkPad X250", ALC292_FIXUP_TPT440_DOCK),
+ SND_PCI_QUIRK(0x17aa, 0x222d, "Thinkpad", ALC298_FIXUP_TPT470_DOCK),
+ SND_PCI_QUIRK(0x17aa, 0x222e, "Thinkpad", ALC298_FIXUP_TPT470_DOCK),
SND_PCI_QUIRK(0x17aa, 0x2231, "Thinkpad T560", ALC292_FIXUP_TPT460),
SND_PCI_QUIRK(0x17aa, 0x2233, "Thinkpad", ALC292_FIXUP_TPT460),
+ SND_PCI_QUIRK(0x17aa, 0x2245, "Thinkpad T470", ALC298_FIXUP_TPT470_DOCK),
+ SND_PCI_QUIRK(0x17aa, 0x2246, "Thinkpad", ALC298_FIXUP_TPT470_DOCK),
+ SND_PCI_QUIRK(0x17aa, 0x2247, "Thinkpad", ALC298_FIXUP_TPT470_DOCK),
+ SND_PCI_QUIRK(0x17aa, 0x224b, "Thinkpad", ALC298_FIXUP_TPT470_DOCK),
+ SND_PCI_QUIRK(0x17aa, 0x224c, "Thinkpad", ALC298_FIXUP_TPT470_DOCK),
+ SND_PCI_QUIRK(0x17aa, 0x224d, "Thinkpad", ALC298_FIXUP_TPT470_DOCK),
SND_PCI_QUIRK(0x17aa, 0x30bb, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY),
SND_PCI_QUIRK(0x17aa, 0x30e2, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY),
SND_PCI_QUIRK(0x17aa, 0x310c, "ThinkCentre Station", ALC294_FIXUP_LENOVO_MIC_LOCATION),
SND_PCI_QUIRK(0x17aa, 0x5050, "Thinkpad T560p", ALC292_FIXUP_TPT460),
SND_PCI_QUIRK(0x17aa, 0x5051, "Thinkpad L460", ALC292_FIXUP_TPT460),
SND_PCI_QUIRK(0x17aa, 0x5053, "Thinkpad T460", ALC292_FIXUP_TPT460),
+ SND_PCI_QUIRK(0x17aa, 0x505d, "Thinkpad", ALC298_FIXUP_TPT470_DOCK),
+ SND_PCI_QUIRK(0x17aa, 0x505f, "Thinkpad", ALC298_FIXUP_TPT470_DOCK),
+ SND_PCI_QUIRK(0x17aa, 0x5062, "Thinkpad", ALC298_FIXUP_TPT470_DOCK),
SND_PCI_QUIRK(0x17aa, 0x5109, "Thinkpad", ALC269_FIXUP_LIMIT_INT_MIC_BOOST),
+ SND_PCI_QUIRK(0x17aa, 0x511e, "Thinkpad", ALC298_FIXUP_TPT470_DOCK),
+ SND_PCI_QUIRK(0x17aa, 0x511f, "Thinkpad", ALC298_FIXUP_TPT470_DOCK),
SND_PCI_QUIRK(0x17aa, 0x3bf8, "Quanta FL1", ALC269_FIXUP_PCM_44K),
SND_PCI_QUIRK(0x17aa, 0x9e54, "LENOVO NB", ALC269_FIXUP_LENOVO_EAPD),
SND_PCI_QUIRK(0x1b7d, 0xa831, "Ordissimo EVE2 ", ALC269VB_FIXUP_ORDISSIMO_EVE2), /* Also known as Malata PC-B1303 */
{0x12, 0xb7a60130},
{0x14, 0x90170110},
{0x21, 0x02211020}),
+ SND_HDA_PIN_QUIRK(0x10ec0256, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE,
+ {0x12, 0x90a60130},
+ {0x14, 0x90170110},
+ {0x14, 0x01011020},
+ {0x21, 0x0221101f}),
SND_HDA_PIN_QUIRK(0x10ec0256, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE,
ALC256_STANDARD_PINS),
SND_HDA_PIN_QUIRK(0x10ec0256, 0x1043, "ASUS", ALC256_FIXUP_ASUS_MIC,
{0x12, 0x90a60120},
{0x14, 0x90170110},
{0x21, 0x0321101f}),
+ SND_HDA_PIN_QUIRK(0x10ec0289, 0x1028, "Dell", ALC225_FIXUP_DELL1_MIC_NO_PRESENCE,
+ {0x12, 0xb7a60130},
+ {0x14, 0x90170110},
+ {0x21, 0x04211020}),
SND_HDA_PIN_QUIRK(0x10ec0290, 0x103c, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1,
ALC290_STANDARD_PINS,
{0x15, 0x04211040},
int validx, int *value_ret)
{
struct snd_usb_audio *chip = cval->head.mixer->chip;
- unsigned char buf[4 + 3 * sizeof(__u32)]; /* enough space for one range */
+ /* enough space for one range */
+ unsigned char buf[sizeof(__u16) + 3 * sizeof(__u32)];
unsigned char *val;
- int idx = 0, ret, size;
+ int idx = 0, ret, val_size, size;
__u8 bRequest;
+ val_size = uac2_ctl_value_size(cval->val_type);
+
if (request == UAC_GET_CUR) {
bRequest = UAC2_CS_CUR;
- size = uac2_ctl_value_size(cval->val_type);
+ size = val_size;
} else {
bRequest = UAC2_CS_RANGE;
- size = sizeof(buf);
+ size = sizeof(__u16) + 3 * val_size;
}
memset(buf, 0, sizeof(buf));
val = buf + sizeof(__u16);
break;
case UAC_GET_MAX:
- val = buf + sizeof(__u16) * 2;
+ val = buf + sizeof(__u16) + val_size;
break;
case UAC_GET_RES:
- val = buf + sizeof(__u16) * 3;
+ val = buf + sizeof(__u16) + val_size * 2;
break;
default:
return -EINVAL;
}
- *value_ret = convert_signed_value(cval, snd_usb_combine_bytes(val, sizeof(__u16)));
+ *value_ret = convert_signed_value(cval,
+ snd_usb_combine_bytes(val, val_size));
return 0;
}
ep = 0x86;
iface = usb_ifnum_to_if(dev, 2);
+ if (!iface || iface->num_altsetting == 0)
+ return -EINVAL;
+
+ alts = &iface->altsetting[1];
+ goto add_sync_ep;
+ case USB_ID(0x1397, 0x0002):
+ ep = 0x81;
+ iface = usb_ifnum_to_if(dev, 1);
+
if (!iface || iface->num_altsetting == 0)
return -EINVAL;
return SNDRV_PCM_FMTBIT_DSD_U32_BE;
break;
- /* Amanero Combo384 USB interface with native DSD support */
- case USB_ID(0x16d0, 0x071a):
+ /* Amanero Combo384 USB based DACs with native DSD support */
+ case USB_ID(0x16d0, 0x071a): /* Amanero - Combo384 */
+ case USB_ID(0x2ab6, 0x0004): /* T+A DAC8DSD-V2.0, MP1000E-V2.0, MP2000R-V2.0, MP2500R-V2.0, MP3100HV-V2.0 */
+ case USB_ID(0x2ab6, 0x0005): /* T+A USB HD Audio 1 */
+ case USB_ID(0x2ab6, 0x0006): /* T+A USB HD Audio 2 */
if (fp->altsetting == 2) {
switch (le16_to_cpu(chip->dev->descriptor.bcdDevice)) {
case 0x199:
#define KVM_REG_PPC_TIDR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xbc)
#define KVM_REG_PPC_PSSCR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xbd)
+#define KVM_REG_PPC_DEC_EXPIRY (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xbe)
+
/* Transactional Memory checkpointed state:
* This is all GPRs, all VSX regs and a subset of SPRs
*/
+++ /dev/null
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-/*
- * S390 version
- *
- * Derived from "include/asm-i386/unistd.h"
- */
-
-#ifndef _UAPI_ASM_S390_UNISTD_H_
-#define _UAPI_ASM_S390_UNISTD_H_
-
-/*
- * This file contains the system call numbers.
- */
-
-#define __NR_exit 1
-#define __NR_fork 2
-#define __NR_read 3
-#define __NR_write 4
-#define __NR_open 5
-#define __NR_close 6
-#define __NR_restart_syscall 7
-#define __NR_creat 8
-#define __NR_link 9
-#define __NR_unlink 10
-#define __NR_execve 11
-#define __NR_chdir 12
-#define __NR_mknod 14
-#define __NR_chmod 15
-#define __NR_lseek 19
-#define __NR_getpid 20
-#define __NR_mount 21
-#define __NR_umount 22
-#define __NR_ptrace 26
-#define __NR_alarm 27
-#define __NR_pause 29
-#define __NR_utime 30
-#define __NR_access 33
-#define __NR_nice 34
-#define __NR_sync 36
-#define __NR_kill 37
-#define __NR_rename 38
-#define __NR_mkdir 39
-#define __NR_rmdir 40
-#define __NR_dup 41
-#define __NR_pipe 42
-#define __NR_times 43
-#define __NR_brk 45
-#define __NR_signal 48
-#define __NR_acct 51
-#define __NR_umount2 52
-#define __NR_ioctl 54
-#define __NR_fcntl 55
-#define __NR_setpgid 57
-#define __NR_umask 60
-#define __NR_chroot 61
-#define __NR_ustat 62
-#define __NR_dup2 63
-#define __NR_getppid 64
-#define __NR_getpgrp 65
-#define __NR_setsid 66
-#define __NR_sigaction 67
-#define __NR_sigsuspend 72
-#define __NR_sigpending 73
-#define __NR_sethostname 74
-#define __NR_setrlimit 75
-#define __NR_getrusage 77
-#define __NR_gettimeofday 78
-#define __NR_settimeofday 79
-#define __NR_symlink 83
-#define __NR_readlink 85
-#define __NR_uselib 86
-#define __NR_swapon 87
-#define __NR_reboot 88
-#define __NR_readdir 89
-#define __NR_mmap 90
-#define __NR_munmap 91
-#define __NR_truncate 92
-#define __NR_ftruncate 93
-#define __NR_fchmod 94
-#define __NR_getpriority 96
-#define __NR_setpriority 97
-#define __NR_statfs 99
-#define __NR_fstatfs 100
-#define __NR_socketcall 102
-#define __NR_syslog 103
-#define __NR_setitimer 104
-#define __NR_getitimer 105
-#define __NR_stat 106
-#define __NR_lstat 107
-#define __NR_fstat 108
-#define __NR_lookup_dcookie 110
-#define __NR_vhangup 111
-#define __NR_idle 112
-#define __NR_wait4 114
-#define __NR_swapoff 115
-#define __NR_sysinfo 116
-#define __NR_ipc 117
-#define __NR_fsync 118
-#define __NR_sigreturn 119
-#define __NR_clone 120
-#define __NR_setdomainname 121
-#define __NR_uname 122
-#define __NR_adjtimex 124
-#define __NR_mprotect 125
-#define __NR_sigprocmask 126
-#define __NR_create_module 127
-#define __NR_init_module 128
-#define __NR_delete_module 129
-#define __NR_get_kernel_syms 130
-#define __NR_quotactl 131
-#define __NR_getpgid 132
-#define __NR_fchdir 133
-#define __NR_bdflush 134
-#define __NR_sysfs 135
-#define __NR_personality 136
-#define __NR_afs_syscall 137 /* Syscall for Andrew File System */
-#define __NR_getdents 141
-#define __NR_flock 143
-#define __NR_msync 144
-#define __NR_readv 145
-#define __NR_writev 146
-#define __NR_getsid 147
-#define __NR_fdatasync 148
-#define __NR__sysctl 149
-#define __NR_mlock 150
-#define __NR_munlock 151
-#define __NR_mlockall 152
-#define __NR_munlockall 153
-#define __NR_sched_setparam 154
-#define __NR_sched_getparam 155
-#define __NR_sched_setscheduler 156
-#define __NR_sched_getscheduler 157
-#define __NR_sched_yield 158
-#define __NR_sched_get_priority_max 159
-#define __NR_sched_get_priority_min 160
-#define __NR_sched_rr_get_interval 161
-#define __NR_nanosleep 162
-#define __NR_mremap 163
-#define __NR_query_module 167
-#define __NR_poll 168
-#define __NR_nfsservctl 169
-#define __NR_prctl 172
-#define __NR_rt_sigreturn 173
-#define __NR_rt_sigaction 174
-#define __NR_rt_sigprocmask 175
-#define __NR_rt_sigpending 176
-#define __NR_rt_sigtimedwait 177
-#define __NR_rt_sigqueueinfo 178
-#define __NR_rt_sigsuspend 179
-#define __NR_pread64 180
-#define __NR_pwrite64 181
-#define __NR_getcwd 183
-#define __NR_capget 184
-#define __NR_capset 185
-#define __NR_sigaltstack 186
-#define __NR_sendfile 187
-#define __NR_getpmsg 188
-#define __NR_putpmsg 189
-#define __NR_vfork 190
-#define __NR_pivot_root 217
-#define __NR_mincore 218
-#define __NR_madvise 219
-#define __NR_getdents64 220
-#define __NR_readahead 222
-#define __NR_setxattr 224
-#define __NR_lsetxattr 225
-#define __NR_fsetxattr 226
-#define __NR_getxattr 227
-#define __NR_lgetxattr 228
-#define __NR_fgetxattr 229
-#define __NR_listxattr 230
-#define __NR_llistxattr 231
-#define __NR_flistxattr 232
-#define __NR_removexattr 233
-#define __NR_lremovexattr 234
-#define __NR_fremovexattr 235
-#define __NR_gettid 236
-#define __NR_tkill 237
-#define __NR_futex 238
-#define __NR_sched_setaffinity 239
-#define __NR_sched_getaffinity 240
-#define __NR_tgkill 241
-/* Number 242 is reserved for tux */
-#define __NR_io_setup 243
-#define __NR_io_destroy 244
-#define __NR_io_getevents 245
-#define __NR_io_submit 246
-#define __NR_io_cancel 247
-#define __NR_exit_group 248
-#define __NR_epoll_create 249
-#define __NR_epoll_ctl 250
-#define __NR_epoll_wait 251
-#define __NR_set_tid_address 252
-#define __NR_fadvise64 253
-#define __NR_timer_create 254
-#define __NR_timer_settime 255
-#define __NR_timer_gettime 256
-#define __NR_timer_getoverrun 257
-#define __NR_timer_delete 258
-#define __NR_clock_settime 259
-#define __NR_clock_gettime 260
-#define __NR_clock_getres 261
-#define __NR_clock_nanosleep 262
-/* Number 263 is reserved for vserver */
-#define __NR_statfs64 265
-#define __NR_fstatfs64 266
-#define __NR_remap_file_pages 267
-#define __NR_mbind 268
-#define __NR_get_mempolicy 269
-#define __NR_set_mempolicy 270
-#define __NR_mq_open 271
-#define __NR_mq_unlink 272
-#define __NR_mq_timedsend 273
-#define __NR_mq_timedreceive 274
-#define __NR_mq_notify 275
-#define __NR_mq_getsetattr 276
-#define __NR_kexec_load 277
-#define __NR_add_key 278
-#define __NR_request_key 279
-#define __NR_keyctl 280
-#define __NR_waitid 281
-#define __NR_ioprio_set 282
-#define __NR_ioprio_get 283
-#define __NR_inotify_init 284
-#define __NR_inotify_add_watch 285
-#define __NR_inotify_rm_watch 286
-#define __NR_migrate_pages 287
-#define __NR_openat 288
-#define __NR_mkdirat 289
-#define __NR_mknodat 290
-#define __NR_fchownat 291
-#define __NR_futimesat 292
-#define __NR_unlinkat 294
-#define __NR_renameat 295
-#define __NR_linkat 296
-#define __NR_symlinkat 297
-#define __NR_readlinkat 298
-#define __NR_fchmodat 299
-#define __NR_faccessat 300
-#define __NR_pselect6 301
-#define __NR_ppoll 302
-#define __NR_unshare 303
-#define __NR_set_robust_list 304
-#define __NR_get_robust_list 305
-#define __NR_splice 306
-#define __NR_sync_file_range 307
-#define __NR_tee 308
-#define __NR_vmsplice 309
-#define __NR_move_pages 310
-#define __NR_getcpu 311
-#define __NR_epoll_pwait 312
-#define __NR_utimes 313
-#define __NR_fallocate 314
-#define __NR_utimensat 315
-#define __NR_signalfd 316
-#define __NR_timerfd 317
-#define __NR_eventfd 318
-#define __NR_timerfd_create 319
-#define __NR_timerfd_settime 320
-#define __NR_timerfd_gettime 321
-#define __NR_signalfd4 322
-#define __NR_eventfd2 323
-#define __NR_inotify_init1 324
-#define __NR_pipe2 325
-#define __NR_dup3 326
-#define __NR_epoll_create1 327
-#define __NR_preadv 328
-#define __NR_pwritev 329
-#define __NR_rt_tgsigqueueinfo 330
-#define __NR_perf_event_open 331
-#define __NR_fanotify_init 332
-#define __NR_fanotify_mark 333
-#define __NR_prlimit64 334
-#define __NR_name_to_handle_at 335
-#define __NR_open_by_handle_at 336
-#define __NR_clock_adjtime 337
-#define __NR_syncfs 338
-#define __NR_setns 339
-#define __NR_process_vm_readv 340
-#define __NR_process_vm_writev 341
-#define __NR_s390_runtime_instr 342
-#define __NR_kcmp 343
-#define __NR_finit_module 344
-#define __NR_sched_setattr 345
-#define __NR_sched_getattr 346
-#define __NR_renameat2 347
-#define __NR_seccomp 348
-#define __NR_getrandom 349
-#define __NR_memfd_create 350
-#define __NR_bpf 351
-#define __NR_s390_pci_mmio_write 352
-#define __NR_s390_pci_mmio_read 353
-#define __NR_execveat 354
-#define __NR_userfaultfd 355
-#define __NR_membarrier 356
-#define __NR_recvmmsg 357
-#define __NR_sendmmsg 358
-#define __NR_socket 359
-#define __NR_socketpair 360
-#define __NR_bind 361
-#define __NR_connect 362
-#define __NR_listen 363
-#define __NR_accept4 364
-#define __NR_getsockopt 365
-#define __NR_setsockopt 366
-#define __NR_getsockname 367
-#define __NR_getpeername 368
-#define __NR_sendto 369
-#define __NR_sendmsg 370
-#define __NR_recvfrom 371
-#define __NR_recvmsg 372
-#define __NR_shutdown 373
-#define __NR_mlock2 374
-#define __NR_copy_file_range 375
-#define __NR_preadv2 376
-#define __NR_pwritev2 377
-#define __NR_s390_guarded_storage 378
-#define __NR_statx 379
-#define __NR_s390_sthyi 380
-#define NR_syscalls 381
-
-/*
- * There are some system calls that are not present on 64 bit, some
- * have a different name although they do the same (e.g. __NR_chown32
- * is __NR_chown on 64 bit).
- */
-#ifndef __s390x__
-
-#define __NR_time 13
-#define __NR_lchown 16
-#define __NR_setuid 23
-#define __NR_getuid 24
-#define __NR_stime 25
-#define __NR_setgid 46
-#define __NR_getgid 47
-#define __NR_geteuid 49
-#define __NR_getegid 50
-#define __NR_setreuid 70
-#define __NR_setregid 71
-#define __NR_getrlimit 76
-#define __NR_getgroups 80
-#define __NR_setgroups 81
-#define __NR_fchown 95
-#define __NR_ioperm 101
-#define __NR_setfsuid 138
-#define __NR_setfsgid 139
-#define __NR__llseek 140
-#define __NR__newselect 142
-#define __NR_setresuid 164
-#define __NR_getresuid 165
-#define __NR_setresgid 170
-#define __NR_getresgid 171
-#define __NR_chown 182
-#define __NR_ugetrlimit 191 /* SuS compliant getrlimit */
-#define __NR_mmap2 192
-#define __NR_truncate64 193
-#define __NR_ftruncate64 194
-#define __NR_stat64 195
-#define __NR_lstat64 196
-#define __NR_fstat64 197
-#define __NR_lchown32 198
-#define __NR_getuid32 199
-#define __NR_getgid32 200
-#define __NR_geteuid32 201
-#define __NR_getegid32 202
-#define __NR_setreuid32 203
-#define __NR_setregid32 204
-#define __NR_getgroups32 205
-#define __NR_setgroups32 206
-#define __NR_fchown32 207
-#define __NR_setresuid32 208
-#define __NR_getresuid32 209
-#define __NR_setresgid32 210
-#define __NR_getresgid32 211
-#define __NR_chown32 212
-#define __NR_setuid32 213
-#define __NR_setgid32 214
-#define __NR_setfsuid32 215
-#define __NR_setfsgid32 216
-#define __NR_fcntl64 221
-#define __NR_sendfile64 223
-#define __NR_fadvise64_64 264
-#define __NR_fstatat64 293
-
-#else
-
-#define __NR_select 142
-#define __NR_getrlimit 191 /* SuS compliant getrlimit */
-#define __NR_lchown 198
-#define __NR_getuid 199
-#define __NR_getgid 200
-#define __NR_geteuid 201
-#define __NR_getegid 202
-#define __NR_setreuid 203
-#define __NR_setregid 204
-#define __NR_getgroups 205
-#define __NR_setgroups 206
-#define __NR_fchown 207
-#define __NR_setresuid 208
-#define __NR_getresuid 209
-#define __NR_setresgid 210
-#define __NR_getresgid 211
-#define __NR_chown 212
-#define __NR_setuid 213
-#define __NR_setgid 214
-#define __NR_setfsuid 215
-#define __NR_setfsgid 216
-#define __NR_newfstatat 293
-
-#endif
-
-#endif /* _UAPI_ASM_S390_UNISTD_H_ */
#define X86_FEATURE_MBA ( 7*32+18) /* Memory Bandwidth Allocation */
#define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* "" Fill RSB on context switches */
+#define X86_FEATURE_SEV ( 7*32+20) /* AMD Secure Encrypted Virtualization */
#define X86_FEATURE_USE_IBPB ( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled */
=============
| **bpftool** **prog { show | list }** [*PROG*]
-| **bpftool** **prog dump xlated** *PROG* [{**file** *FILE* | **opcodes**}]
+| **bpftool** **prog dump xlated** *PROG* [{**file** *FILE* | **opcodes** | **visual**}]
| **bpftool** **prog dump jited** *PROG* [{**file** *FILE* | **opcodes**}]
| **bpftool** **prog pin** *PROG* *FILE*
| **bpftool** **prog load** *OBJ* *FILE*
Output will start with program ID followed by program type and
zero or more named attributes (depending on kernel version).
- **bpftool prog dump xlated** *PROG* [{ **file** *FILE* | **opcodes** }]
- Dump eBPF instructions of the program from the kernel.
- If *FILE* is specified image will be written to a file,
- otherwise it will be disassembled and printed to stdout.
+ **bpftool prog dump xlated** *PROG* [{ **file** *FILE* | **opcodes** | **visual** }]
+ Dump eBPF instructions of the program from the kernel. By
+ default, eBPF will be disassembled and printed to standard
+ output in human-readable format. In this case, **opcodes**
+ controls if raw opcodes should be printed as well.
- **opcodes** controls if raw opcodes will be printed.
+ If **file** is specified, the binary image will instead be
+ written to *FILE*.
+
+ If **visual** is specified, control flow graph (CFG) will be
+ built instead, and eBPF instructions will be presented with
+ CFG in DOT format, on standard output.
**bpftool prog dump jited** *PROG* [{ **file** *FILE* | **opcodes** }]
Dump jited image (host machine code) of the program.
# Deal with simplest keywords
case $prev in
- help|key|opcodes)
+ help|key|opcodes|visual)
return 0
;;
tag)
return 0
;;
*)
- _bpftool_once_attr 'file'
+ _bpftool_once_attr 'file'
+ if _bpftool_search_list 'xlated'; then
+ COMPREPLY+=( $( compgen -W 'opcodes visual' -- \
+ "$cur" ) )
+ else
COMPREPLY+=( $( compgen -W 'opcodes' -- \
"$cur" ) )
- return 0
- ;;
+ fi
+ return 0
+ ;;
esac
;;
pin)
--- /dev/null
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/*
+ * Copyright (C) 2018 Netronome Systems, Inc.
+ *
+ * This software is dual licensed under the GNU General License Version 2,
+ * June 1991 as shown in the file COPYING in the top-level directory of this
+ * source tree or the BSD 2-Clause License provided below. You have the
+ * option to license this software under the complete terms of either license.
+ *
+ * The BSD 2-Clause License:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/list.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "cfg.h"
+#include "main.h"
+#include "xlated_dumper.h"
+
+struct cfg {
+ struct list_head funcs;
+ int func_num;
+};
+
+struct func_node {
+ struct list_head l;
+ struct list_head bbs;
+ struct bpf_insn *start;
+ struct bpf_insn *end;
+ int idx;
+ int bb_num;
+};
+
+struct bb_node {
+ struct list_head l;
+ struct list_head e_prevs;
+ struct list_head e_succs;
+ struct bpf_insn *head;
+ struct bpf_insn *tail;
+ int idx;
+};
+
+#define EDGE_FLAG_EMPTY 0x0
+#define EDGE_FLAG_FALLTHROUGH 0x1
+#define EDGE_FLAG_JUMP 0x2
+struct edge_node {
+ struct list_head l;
+ struct bb_node *src;
+ struct bb_node *dst;
+ int flags;
+};
+
+#define ENTRY_BLOCK_INDEX 0
+#define EXIT_BLOCK_INDEX 1
+#define NUM_FIXED_BLOCKS 2
+#define func_prev(func) list_prev_entry(func, l)
+#define func_next(func) list_next_entry(func, l)
+#define bb_prev(bb) list_prev_entry(bb, l)
+#define bb_next(bb) list_next_entry(bb, l)
+#define entry_bb(func) func_first_bb(func)
+#define exit_bb(func) func_last_bb(func)
+#define cfg_first_func(cfg) \
+ list_first_entry(&cfg->funcs, struct func_node, l)
+#define cfg_last_func(cfg) \
+ list_last_entry(&cfg->funcs, struct func_node, l)
+#define func_first_bb(func) \
+ list_first_entry(&func->bbs, struct bb_node, l)
+#define func_last_bb(func) \
+ list_last_entry(&func->bbs, struct bb_node, l)
+
+static struct func_node *cfg_append_func(struct cfg *cfg, struct bpf_insn *insn)
+{
+ struct func_node *new_func, *func;
+
+ list_for_each_entry(func, &cfg->funcs, l) {
+ if (func->start == insn)
+ return func;
+ else if (func->start > insn)
+ break;
+ }
+
+ func = func_prev(func);
+ new_func = calloc(1, sizeof(*new_func));
+ if (!new_func) {
+ p_err("OOM when allocating FUNC node");
+ return NULL;
+ }
+ new_func->start = insn;
+ new_func->idx = cfg->func_num;
+ list_add(&new_func->l, &func->l);
+ cfg->func_num++;
+
+ return new_func;
+}
+
+static struct bb_node *func_append_bb(struct func_node *func,
+ struct bpf_insn *insn)
+{
+ struct bb_node *new_bb, *bb;
+
+ list_for_each_entry(bb, &func->bbs, l) {
+ if (bb->head == insn)
+ return bb;
+ else if (bb->head > insn)
+ break;
+ }
+
+ bb = bb_prev(bb);
+ new_bb = calloc(1, sizeof(*new_bb));
+ if (!new_bb) {
+ p_err("OOM when allocating BB node");
+ return NULL;
+ }
+ new_bb->head = insn;
+ INIT_LIST_HEAD(&new_bb->e_prevs);
+ INIT_LIST_HEAD(&new_bb->e_succs);
+ list_add(&new_bb->l, &bb->l);
+
+ return new_bb;
+}
+
+static struct bb_node *func_insert_dummy_bb(struct list_head *after)
+{
+ struct bb_node *bb;
+
+ bb = calloc(1, sizeof(*bb));
+ if (!bb) {
+ p_err("OOM when allocating BB node");
+ return NULL;
+ }
+
+ INIT_LIST_HEAD(&bb->e_prevs);
+ INIT_LIST_HEAD(&bb->e_succs);
+ list_add(&bb->l, after);
+
+ return bb;
+}
+
+static bool cfg_partition_funcs(struct cfg *cfg, struct bpf_insn *cur,
+ struct bpf_insn *end)
+{
+ struct func_node *func, *last_func;
+
+ func = cfg_append_func(cfg, cur);
+ if (!func)
+ return true;
+
+ for (; cur < end; cur++) {
+ if (cur->code != (BPF_JMP | BPF_CALL))
+ continue;
+ if (cur->src_reg != BPF_PSEUDO_CALL)
+ continue;
+ func = cfg_append_func(cfg, cur + cur->off + 1);
+ if (!func)
+ return true;
+ }
+
+ last_func = cfg_last_func(cfg);
+ last_func->end = end - 1;
+ func = cfg_first_func(cfg);
+ list_for_each_entry_from(func, &last_func->l, l) {
+ func->end = func_next(func)->start - 1;
+ }
+
+ return false;
+}
+
+static bool func_partition_bb_head(struct func_node *func)
+{
+ struct bpf_insn *cur, *end;
+ struct bb_node *bb;
+
+ cur = func->start;
+ end = func->end;
+ INIT_LIST_HEAD(&func->bbs);
+ bb = func_append_bb(func, cur);
+ if (!bb)
+ return true;
+
+ for (; cur <= end; cur++) {
+ if (BPF_CLASS(cur->code) == BPF_JMP) {
+ u8 opcode = BPF_OP(cur->code);
+
+ if (opcode == BPF_EXIT || opcode == BPF_CALL)
+ continue;
+
+ bb = func_append_bb(func, cur + cur->off + 1);
+ if (!bb)
+ return true;
+
+ if (opcode != BPF_JA) {
+ bb = func_append_bb(func, cur + 1);
+ if (!bb)
+ return true;
+ }
+ }
+ }
+
+ return false;
+}
+
+static void func_partition_bb_tail(struct func_node *func)
+{
+ unsigned int bb_idx = NUM_FIXED_BLOCKS;
+ struct bb_node *bb, *last;
+
+ last = func_last_bb(func);
+ last->tail = func->end;
+ bb = func_first_bb(func);
+ list_for_each_entry_from(bb, &last->l, l) {
+ bb->tail = bb_next(bb)->head - 1;
+ bb->idx = bb_idx++;
+ }
+
+ last->idx = bb_idx++;
+ func->bb_num = bb_idx;
+}
+
+static bool func_add_special_bb(struct func_node *func)
+{
+ struct bb_node *bb;
+
+ bb = func_insert_dummy_bb(&func->bbs);
+ if (!bb)
+ return true;
+ bb->idx = ENTRY_BLOCK_INDEX;
+
+ bb = func_insert_dummy_bb(&func_last_bb(func)->l);
+ if (!bb)
+ return true;
+ bb->idx = EXIT_BLOCK_INDEX;
+
+ return false;
+}
+
+static bool func_partition_bb(struct func_node *func)
+{
+ if (func_partition_bb_head(func))
+ return true;
+
+ func_partition_bb_tail(func);
+
+ return false;
+}
+
+static struct bb_node *func_search_bb_with_head(struct func_node *func,
+ struct bpf_insn *insn)
+{
+ struct bb_node *bb;
+
+ list_for_each_entry(bb, &func->bbs, l) {
+ if (bb->head == insn)
+ return bb;
+ }
+
+ return NULL;
+}
+
+static struct edge_node *new_edge(struct bb_node *src, struct bb_node *dst,
+ int flags)
+{
+ struct edge_node *e;
+
+ e = calloc(1, sizeof(*e));
+ if (!e) {
+ p_err("OOM when allocating edge node");
+ return NULL;
+ }
+
+ if (src)
+ e->src = src;
+ if (dst)
+ e->dst = dst;
+
+ e->flags |= flags;
+
+ return e;
+}
+
+static bool func_add_bb_edges(struct func_node *func)
+{
+ struct bpf_insn *insn;
+ struct edge_node *e;
+ struct bb_node *bb;
+
+ bb = entry_bb(func);
+ e = new_edge(bb, bb_next(bb), EDGE_FLAG_FALLTHROUGH);
+ if (!e)
+ return true;
+ list_add_tail(&e->l, &bb->e_succs);
+
+ bb = exit_bb(func);
+ e = new_edge(bb_prev(bb), bb, EDGE_FLAG_FALLTHROUGH);
+ if (!e)
+ return true;
+ list_add_tail(&e->l, &bb->e_prevs);
+
+ bb = entry_bb(func);
+ bb = bb_next(bb);
+ list_for_each_entry_from(bb, &exit_bb(func)->l, l) {
+ e = new_edge(bb, NULL, EDGE_FLAG_EMPTY);
+ if (!e)
+ return true;
+ e->src = bb;
+
+ insn = bb->tail;
+ if (BPF_CLASS(insn->code) != BPF_JMP ||
+ BPF_OP(insn->code) == BPF_EXIT) {
+ e->dst = bb_next(bb);
+ e->flags |= EDGE_FLAG_FALLTHROUGH;
+ list_add_tail(&e->l, &bb->e_succs);
+ continue;
+ } else if (BPF_OP(insn->code) == BPF_JA) {
+ e->dst = func_search_bb_with_head(func,
+ insn + insn->off + 1);
+ e->flags |= EDGE_FLAG_JUMP;
+ list_add_tail(&e->l, &bb->e_succs);
+ continue;
+ }
+
+ e->dst = bb_next(bb);
+ e->flags |= EDGE_FLAG_FALLTHROUGH;
+ list_add_tail(&e->l, &bb->e_succs);
+
+ e = new_edge(bb, NULL, EDGE_FLAG_JUMP);
+ if (!e)
+ return true;
+ e->src = bb;
+ e->dst = func_search_bb_with_head(func, insn + insn->off + 1);
+ list_add_tail(&e->l, &bb->e_succs);
+ }
+
+ return false;
+}
+
+static bool cfg_build(struct cfg *cfg, struct bpf_insn *insn, unsigned int len)
+{
+ int cnt = len / sizeof(*insn);
+ struct func_node *func;
+
+ INIT_LIST_HEAD(&cfg->funcs);
+
+ if (cfg_partition_funcs(cfg, insn, insn + cnt))
+ return true;
+
+ list_for_each_entry(func, &cfg->funcs, l) {
+ if (func_partition_bb(func) || func_add_special_bb(func))
+ return true;
+
+ if (func_add_bb_edges(func))
+ return true;
+ }
+
+ return false;
+}
+
+static void cfg_destroy(struct cfg *cfg)
+{
+ struct func_node *func, *func2;
+
+ list_for_each_entry_safe(func, func2, &cfg->funcs, l) {
+ struct bb_node *bb, *bb2;
+
+ list_for_each_entry_safe(bb, bb2, &func->bbs, l) {
+ struct edge_node *e, *e2;
+
+ list_for_each_entry_safe(e, e2, &bb->e_prevs, l) {
+ list_del(&e->l);
+ free(e);
+ }
+
+ list_for_each_entry_safe(e, e2, &bb->e_succs, l) {
+ list_del(&e->l);
+ free(e);
+ }
+
+ list_del(&bb->l);
+ free(bb);
+ }
+
+ list_del(&func->l);
+ free(func);
+ }
+}
+
+static void draw_bb_node(struct func_node *func, struct bb_node *bb)
+{
+ const char *shape;
+
+ if (bb->idx == ENTRY_BLOCK_INDEX || bb->idx == EXIT_BLOCK_INDEX)
+ shape = "Mdiamond";
+ else
+ shape = "record";
+
+ printf("\tfn_%d_bb_%d [shape=%s,style=filled,label=\"",
+ func->idx, bb->idx, shape);
+
+ if (bb->idx == ENTRY_BLOCK_INDEX) {
+ printf("ENTRY");
+ } else if (bb->idx == EXIT_BLOCK_INDEX) {
+ printf("EXIT");
+ } else {
+ unsigned int start_idx;
+ struct dump_data dd = {};
+
+ printf("{");
+ kernel_syms_load(&dd);
+ start_idx = bb->head - func->start;
+ dump_xlated_for_graph(&dd, bb->head, bb->tail, start_idx);
+ kernel_syms_destroy(&dd);
+ printf("}");
+ }
+
+ printf("\"];\n\n");
+}
+
+static void draw_bb_succ_edges(struct func_node *func, struct bb_node *bb)
+{
+ const char *style = "\"solid,bold\"";
+ const char *color = "black";
+ int func_idx = func->idx;
+ struct edge_node *e;
+ int weight = 10;
+
+ if (list_empty(&bb->e_succs))
+ return;
+
+ list_for_each_entry(e, &bb->e_succs, l) {
+ printf("\tfn_%d_bb_%d:s -> fn_%d_bb_%d:n [style=%s, color=%s, weight=%d, constraint=true",
+ func_idx, e->src->idx, func_idx, e->dst->idx,
+ style, color, weight);
+ printf("];\n");
+ }
+}
+
+static void func_output_bb_def(struct func_node *func)
+{
+ struct bb_node *bb;
+
+ list_for_each_entry(bb, &func->bbs, l) {
+ draw_bb_node(func, bb);
+ }
+}
+
+static void func_output_edges(struct func_node *func)
+{
+ int func_idx = func->idx;
+ struct bb_node *bb;
+
+ list_for_each_entry(bb, &func->bbs, l) {
+ draw_bb_succ_edges(func, bb);
+ }
+
+ /* Add an invisible edge from ENTRY to EXIT, this is to
+ * improve the graph layout.
+ */
+ printf("\tfn_%d_bb_%d:s -> fn_%d_bb_%d:n [style=\"invis\", constraint=true];\n",
+ func_idx, ENTRY_BLOCK_INDEX, func_idx, EXIT_BLOCK_INDEX);
+}
+
+static void cfg_dump(struct cfg *cfg)
+{
+ struct func_node *func;
+
+ printf("digraph \"DOT graph for eBPF program\" {\n");
+ list_for_each_entry(func, &cfg->funcs, l) {
+ printf("subgraph \"cluster_%d\" {\n\tstyle=\"dashed\";\n\tcolor=\"black\";\n\tlabel=\"func_%d ()\";\n",
+ func->idx, func->idx);
+ func_output_bb_def(func);
+ func_output_edges(func);
+ printf("}\n");
+ }
+ printf("}\n");
+}
+
+void dump_xlated_cfg(void *buf, unsigned int len)
+{
+ struct bpf_insn *insn = buf;
+ struct cfg cfg;
+
+ memset(&cfg, 0, sizeof(cfg));
+ if (cfg_build(&cfg, insn, len))
+ return;
+
+ cfg_dump(&cfg);
+
+ cfg_destroy(&cfg);
+}
--- /dev/null
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/*
+ * Copyright (C) 2018 Netronome Systems, Inc.
+ *
+ * This software is dual licensed under the GNU General License Version 2,
+ * June 1991 as shown in the file COPYING in the top-level directory of this
+ * source tree or the BSD 2-Clause License provided below. You have the
+ * option to license this software under the complete terms of either license.
+ *
+ * The BSD 2-Clause License:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __BPF_TOOL_CFG_H
+#define __BPF_TOOL_CFG_H
+
+void dump_xlated_cfg(void *buf, unsigned int len);
+
+#endif /* __BPF_TOOL_CFG_H */
#include "main.h"
+#define BATCH_LINE_LEN_MAX 65536
+#define BATCH_ARG_NB_MAX 4096
+
const char *bin_name;
static int last_argc;
static char **last_argv;
}
}
+/* Split command line into argument vector. */
+static int make_args(char *line, char *n_argv[], int maxargs, int cmd_nb)
+{
+ static const char ws[] = " \t\r\n";
+ char *cp = line;
+ int n_argc = 0;
+
+ while (*cp) {
+ /* Skip leading whitespace. */
+ cp += strspn(cp, ws);
+
+ if (*cp == '\0')
+ break;
+
+ if (n_argc >= (maxargs - 1)) {
+ p_err("too many arguments to command %d", cmd_nb);
+ return -1;
+ }
+
+ /* Word begins with quote. */
+ if (*cp == '\'' || *cp == '"') {
+ char quote = *cp++;
+
+ n_argv[n_argc++] = cp;
+ /* Find ending quote. */
+ cp = strchr(cp, quote);
+ if (!cp) {
+ p_err("unterminated quoted string in command %d",
+ cmd_nb);
+ return -1;
+ }
+ } else {
+ n_argv[n_argc++] = cp;
+
+ /* Find end of word. */
+ cp += strcspn(cp, ws);
+ if (*cp == '\0')
+ break;
+ }
+
+ /* Separate words. */
+ *cp++ = 0;
+ }
+ n_argv[n_argc] = NULL;
+
+ return n_argc;
+}
+
static int do_batch(int argc, char **argv);
static const struct cmd cmds[] = {
static int do_batch(int argc, char **argv)
{
+ char buf[BATCH_LINE_LEN_MAX], contline[BATCH_LINE_LEN_MAX];
+ char *n_argv[BATCH_ARG_NB_MAX];
unsigned int lines = 0;
- char *n_argv[4096];
- char buf[65536];
int n_argc;
FILE *fp;
+ char *cp;
int err;
int i;
}
NEXT_ARG();
- fp = fopen(*argv, "r");
+ if (!strcmp(*argv, "-"))
+ fp = stdin;
+ else
+ fp = fopen(*argv, "r");
if (!fp) {
p_err("Can't open file (%s): %s", *argv, strerror(errno));
return -1;
if (json_output)
jsonw_start_array(json_wtr);
while (fgets(buf, sizeof(buf), fp)) {
+ cp = strchr(buf, '#');
+ if (cp)
+ *cp = '\0';
+
if (strlen(buf) == sizeof(buf) - 1) {
errno = E2BIG;
break;
}
- n_argc = 0;
- n_argv[n_argc] = strtok(buf, " \t\n");
-
- while (n_argv[n_argc]) {
- n_argc++;
- if (n_argc == ARRAY_SIZE(n_argv)) {
- p_err("line %d has too many arguments, skip",
+ /* Append continuation lines if any (coming after a line ending
+ * with '\' in the batch file).
+ */
+ while ((cp = strstr(buf, "\\\n")) != NULL) {
+ if (!fgets(contline, sizeof(contline), fp) ||
+ strlen(contline) == 0) {
+ p_err("missing continuation line on command %d",
lines);
- n_argc = 0;
- break;
+ err = -1;
+ goto err_close;
+ }
+
+ cp = strchr(contline, '#');
+ if (cp)
+ *cp = '\0';
+
+ if (strlen(buf) + strlen(contline) + 1 > sizeof(buf)) {
+ p_err("command %d is too long", lines);
+ err = -1;
+ goto err_close;
}
- n_argv[n_argc] = strtok(NULL, " \t\n");
+ buf[strlen(buf) - 2] = '\0';
+ strcat(buf, contline);
}
+ n_argc = make_args(buf, n_argv, BATCH_ARG_NB_MAX, lines);
if (!n_argc)
continue;
+ if (n_argc < 0)
+ goto err_close;
if (json_output) {
jsonw_start_object(json_wtr);
}
if (errno && errno != ENOENT) {
- perror("reading batch file failed");
+ p_err("reading batch file failed: %s", strerror(errno));
err = -1;
} else {
- p_info("processed %d lines", lines);
+ p_info("processed %d commands", lines);
err = 0;
}
err_close:
- fclose(fp);
+ if (fp != stdin)
+ fclose(fp);
if (json_output)
jsonw_end_array(json_wtr);
#include <bpf.h>
#include <libbpf.h>
+#include "cfg.h"
#include "main.h"
-#include "disasm.h"
+#include "xlated_dumper.h"
static const char * const prog_type_name[] = {
[BPF_PROG_TYPE_UNSPEC] = "unspec",
return err;
}
-#define SYM_MAX_NAME 256
-
-struct kernel_sym {
- unsigned long address;
- char name[SYM_MAX_NAME];
-};
-
-struct dump_data {
- unsigned long address_call_base;
- struct kernel_sym *sym_mapping;
- __u32 sym_count;
- char scratch_buff[SYM_MAX_NAME];
-};
-
-static int kernel_syms_cmp(const void *sym_a, const void *sym_b)
-{
- return ((struct kernel_sym *)sym_a)->address -
- ((struct kernel_sym *)sym_b)->address;
-}
-
-static void kernel_syms_load(struct dump_data *dd)
-{
- struct kernel_sym *sym;
- char buff[256];
- void *tmp, *address;
- FILE *fp;
-
- fp = fopen("/proc/kallsyms", "r");
- if (!fp)
- return;
-
- while (!feof(fp)) {
- if (!fgets(buff, sizeof(buff), fp))
- break;
- tmp = realloc(dd->sym_mapping,
- (dd->sym_count + 1) *
- sizeof(*dd->sym_mapping));
- if (!tmp) {
-out:
- free(dd->sym_mapping);
- dd->sym_mapping = NULL;
- fclose(fp);
- return;
- }
- dd->sym_mapping = tmp;
- sym = &dd->sym_mapping[dd->sym_count];
- if (sscanf(buff, "%p %*c %s", &address, sym->name) != 2)
- continue;
- sym->address = (unsigned long)address;
- if (!strcmp(sym->name, "__bpf_call_base")) {
- dd->address_call_base = sym->address;
- /* sysctl kernel.kptr_restrict was set */
- if (!sym->address)
- goto out;
- }
- if (sym->address)
- dd->sym_count++;
- }
-
- fclose(fp);
-
- qsort(dd->sym_mapping, dd->sym_count,
- sizeof(*dd->sym_mapping), kernel_syms_cmp);
-}
-
-static void kernel_syms_destroy(struct dump_data *dd)
-{
- free(dd->sym_mapping);
-}
-
-static struct kernel_sym *kernel_syms_search(struct dump_data *dd,
- unsigned long key)
-{
- struct kernel_sym sym = {
- .address = key,
- };
-
- return dd->sym_mapping ?
- bsearch(&sym, dd->sym_mapping, dd->sym_count,
- sizeof(*dd->sym_mapping), kernel_syms_cmp) : NULL;
-}
-
-static void print_insn(struct bpf_verifier_env *env, const char *fmt, ...)
-{
- va_list args;
-
- va_start(args, fmt);
- vprintf(fmt, args);
- va_end(args);
-}
-
-static const char *print_call_pcrel(struct dump_data *dd,
- struct kernel_sym *sym,
- unsigned long address,
- const struct bpf_insn *insn)
-{
- if (sym)
- snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
- "%+d#%s", insn->off, sym->name);
- else
- snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
- "%+d#0x%lx", insn->off, address);
- return dd->scratch_buff;
-}
-
-static const char *print_call_helper(struct dump_data *dd,
- struct kernel_sym *sym,
- unsigned long address)
-{
- if (sym)
- snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
- "%s", sym->name);
- else
- snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
- "0x%lx", address);
- return dd->scratch_buff;
-}
-
-static const char *print_call(void *private_data,
- const struct bpf_insn *insn)
-{
- struct dump_data *dd = private_data;
- unsigned long address = dd->address_call_base + insn->imm;
- struct kernel_sym *sym;
-
- sym = kernel_syms_search(dd, address);
- if (insn->src_reg == BPF_PSEUDO_CALL)
- return print_call_pcrel(dd, sym, address, insn);
- else
- return print_call_helper(dd, sym, address);
-}
-
-static const char *print_imm(void *private_data,
- const struct bpf_insn *insn,
- __u64 full_imm)
-{
- struct dump_data *dd = private_data;
-
- if (insn->src_reg == BPF_PSEUDO_MAP_FD)
- snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
- "map[id:%u]", insn->imm);
- else
- snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
- "0x%llx", (unsigned long long)full_imm);
- return dd->scratch_buff;
-}
-
-static void dump_xlated_plain(struct dump_data *dd, void *buf,
- unsigned int len, bool opcodes)
-{
- const struct bpf_insn_cbs cbs = {
- .cb_print = print_insn,
- .cb_call = print_call,
- .cb_imm = print_imm,
- .private_data = dd,
- };
- struct bpf_insn *insn = buf;
- bool double_insn = false;
- unsigned int i;
-
- for (i = 0; i < len / sizeof(*insn); i++) {
- if (double_insn) {
- double_insn = false;
- continue;
- }
-
- double_insn = insn[i].code == (BPF_LD | BPF_IMM | BPF_DW);
-
- printf("% 4d: ", i);
- print_bpf_insn(&cbs, NULL, insn + i, true);
-
- if (opcodes) {
- printf(" ");
- fprint_hex(stdout, insn + i, 8, " ");
- if (double_insn && i < len - 1) {
- printf(" ");
- fprint_hex(stdout, insn + i + 1, 8, " ");
- }
- printf("\n");
- }
- }
-}
-
-static void print_insn_json(struct bpf_verifier_env *env, const char *fmt, ...)
-{
- unsigned int l = strlen(fmt);
- char chomped_fmt[l];
- va_list args;
-
- va_start(args, fmt);
- if (l > 0) {
- strncpy(chomped_fmt, fmt, l - 1);
- chomped_fmt[l - 1] = '\0';
- }
- jsonw_vprintf_enquote(json_wtr, chomped_fmt, args);
- va_end(args);
-}
-
-static void dump_xlated_json(struct dump_data *dd, void *buf,
- unsigned int len, bool opcodes)
-{
- const struct bpf_insn_cbs cbs = {
- .cb_print = print_insn_json,
- .cb_call = print_call,
- .cb_imm = print_imm,
- .private_data = dd,
- };
- struct bpf_insn *insn = buf;
- bool double_insn = false;
- unsigned int i;
-
- jsonw_start_array(json_wtr);
- for (i = 0; i < len / sizeof(*insn); i++) {
- if (double_insn) {
- double_insn = false;
- continue;
- }
- double_insn = insn[i].code == (BPF_LD | BPF_IMM | BPF_DW);
-
- jsonw_start_object(json_wtr);
- jsonw_name(json_wtr, "disasm");
- print_bpf_insn(&cbs, NULL, insn + i, true);
-
- if (opcodes) {
- jsonw_name(json_wtr, "opcodes");
- jsonw_start_object(json_wtr);
-
- jsonw_name(json_wtr, "code");
- jsonw_printf(json_wtr, "\"0x%02hhx\"", insn[i].code);
-
- jsonw_name(json_wtr, "src_reg");
- jsonw_printf(json_wtr, "\"0x%hhx\"", insn[i].src_reg);
-
- jsonw_name(json_wtr, "dst_reg");
- jsonw_printf(json_wtr, "\"0x%hhx\"", insn[i].dst_reg);
-
- jsonw_name(json_wtr, "off");
- print_hex_data_json((uint8_t *)(&insn[i].off), 2);
-
- jsonw_name(json_wtr, "imm");
- if (double_insn && i < len - 1)
- print_hex_data_json((uint8_t *)(&insn[i].imm),
- 12);
- else
- print_hex_data_json((uint8_t *)(&insn[i].imm),
- 4);
- jsonw_end_object(json_wtr);
- }
- jsonw_end_object(json_wtr);
- }
- jsonw_end_array(json_wtr);
-}
-
static int do_dump(int argc, char **argv)
{
struct bpf_prog_info info = {};
unsigned int buf_size;
char *filepath = NULL;
bool opcodes = false;
+ bool visual = false;
unsigned char *buf;
__u32 *member_len;
__u64 *member_ptr;
} else if (is_prefix(*argv, "opcodes")) {
opcodes = true;
NEXT_ARG();
+ } else if (is_prefix(*argv, "visual")) {
+ visual = true;
+ NEXT_ARG();
}
if (argc) {
n < 0 ? strerror(errno) : "short write");
goto err_free;
}
- } else {
- if (member_len == &info.jited_prog_len) {
- const char *name = NULL;
-
- if (info.ifindex) {
- name = ifindex_to_bfd_name_ns(info.ifindex,
- info.netns_dev,
- info.netns_ino);
- if (!name)
- goto err_free;
- }
- disasm_print_insn(buf, *member_len, opcodes, name);
- } else {
- kernel_syms_load(&dd);
- if (json_output)
- dump_xlated_json(&dd, buf, *member_len, opcodes);
- else
- dump_xlated_plain(&dd, buf, *member_len, opcodes);
- kernel_syms_destroy(&dd);
+ if (json_output)
+ jsonw_null(json_wtr);
+ } else if (member_len == &info.jited_prog_len) {
+ const char *name = NULL;
+
+ if (info.ifindex) {
+ name = ifindex_to_bfd_name_ns(info.ifindex,
+ info.netns_dev,
+ info.netns_ino);
+ if (!name)
+ goto err_free;
}
+
+ disasm_print_insn(buf, *member_len, opcodes, name);
+ } else if (visual) {
+ if (json_output)
+ jsonw_null(json_wtr);
+ else
+ dump_xlated_cfg(buf, *member_len);
+ } else {
+ kernel_syms_load(&dd);
+ if (json_output)
+ dump_xlated_json(&dd, buf, *member_len, opcodes);
+ else
+ dump_xlated_plain(&dd, buf, *member_len, opcodes);
+ kernel_syms_destroy(&dd);
}
free(buf);
fprintf(stderr,
"Usage: %s %s { show | list } [PROG]\n"
- " %s %s dump xlated PROG [{ file FILE | opcodes }]\n"
+ " %s %s dump xlated PROG [{ file FILE | opcodes | visual }]\n"
" %s %s dump jited PROG [{ file FILE | opcodes }]\n"
" %s %s pin PROG FILE\n"
" %s %s load OBJ FILE\n"
--- /dev/null
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/*
+ * Copyright (C) 2018 Netronome Systems, Inc.
+ *
+ * This software is dual licensed under the GNU General License Version 2,
+ * June 1991 as shown in the file COPYING in the top-level directory of this
+ * source tree or the BSD 2-Clause License provided below. You have the
+ * option to license this software under the complete terms of either license.
+ *
+ * The BSD 2-Clause License:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+
+#include "disasm.h"
+#include "json_writer.h"
+#include "main.h"
+#include "xlated_dumper.h"
+
+static int kernel_syms_cmp(const void *sym_a, const void *sym_b)
+{
+ return ((struct kernel_sym *)sym_a)->address -
+ ((struct kernel_sym *)sym_b)->address;
+}
+
+void kernel_syms_load(struct dump_data *dd)
+{
+ struct kernel_sym *sym;
+ char buff[256];
+ void *tmp, *address;
+ FILE *fp;
+
+ fp = fopen("/proc/kallsyms", "r");
+ if (!fp)
+ return;
+
+ while (!feof(fp)) {
+ if (!fgets(buff, sizeof(buff), fp))
+ break;
+ tmp = realloc(dd->sym_mapping,
+ (dd->sym_count + 1) *
+ sizeof(*dd->sym_mapping));
+ if (!tmp) {
+out:
+ free(dd->sym_mapping);
+ dd->sym_mapping = NULL;
+ fclose(fp);
+ return;
+ }
+ dd->sym_mapping = tmp;
+ sym = &dd->sym_mapping[dd->sym_count];
+ if (sscanf(buff, "%p %*c %s", &address, sym->name) != 2)
+ continue;
+ sym->address = (unsigned long)address;
+ if (!strcmp(sym->name, "__bpf_call_base")) {
+ dd->address_call_base = sym->address;
+ /* sysctl kernel.kptr_restrict was set */
+ if (!sym->address)
+ goto out;
+ }
+ if (sym->address)
+ dd->sym_count++;
+ }
+
+ fclose(fp);
+
+ qsort(dd->sym_mapping, dd->sym_count,
+ sizeof(*dd->sym_mapping), kernel_syms_cmp);
+}
+
+void kernel_syms_destroy(struct dump_data *dd)
+{
+ free(dd->sym_mapping);
+}
+
+static struct kernel_sym *kernel_syms_search(struct dump_data *dd,
+ unsigned long key)
+{
+ struct kernel_sym sym = {
+ .address = key,
+ };
+
+ return dd->sym_mapping ?
+ bsearch(&sym, dd->sym_mapping, dd->sym_count,
+ sizeof(*dd->sym_mapping), kernel_syms_cmp) : NULL;
+}
+
+static void print_insn(struct bpf_verifier_env *env, const char *fmt, ...)
+{
+ va_list args;
+
+ va_start(args, fmt);
+ vprintf(fmt, args);
+ va_end(args);
+}
+
+static void
+print_insn_for_graph(struct bpf_verifier_env *env, const char *fmt, ...)
+{
+ char buf[64], *p;
+ va_list args;
+
+ va_start(args, fmt);
+ vsnprintf(buf, sizeof(buf), fmt, args);
+ va_end(args);
+
+ p = buf;
+ while (*p != '\0') {
+ if (*p == '\n') {
+ memmove(p + 3, p, strlen(buf) + 1 - (p - buf));
+ /* Align each instruction dump row left. */
+ *p++ = '\\';
+ *p++ = 'l';
+ /* Output multiline concatenation. */
+ *p++ = '\\';
+ } else if (*p == '<' || *p == '>' || *p == '|' || *p == '&') {
+ memmove(p + 1, p, strlen(buf) + 1 - (p - buf));
+ /* Escape special character. */
+ *p++ = '\\';
+ }
+
+ p++;
+ }
+
+ printf("%s", buf);
+}
+
+static void print_insn_json(struct bpf_verifier_env *env, const char *fmt, ...)
+{
+ unsigned int l = strlen(fmt);
+ char chomped_fmt[l];
+ va_list args;
+
+ va_start(args, fmt);
+ if (l > 0) {
+ strncpy(chomped_fmt, fmt, l - 1);
+ chomped_fmt[l - 1] = '\0';
+ }
+ jsonw_vprintf_enquote(json_wtr, chomped_fmt, args);
+ va_end(args);
+}
+
+static const char *print_call_pcrel(struct dump_data *dd,
+ struct kernel_sym *sym,
+ unsigned long address,
+ const struct bpf_insn *insn)
+{
+ if (sym)
+ snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
+ "%+d#%s", insn->off, sym->name);
+ else
+ snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
+ "%+d#0x%lx", insn->off, address);
+ return dd->scratch_buff;
+}
+
+static const char *print_call_helper(struct dump_data *dd,
+ struct kernel_sym *sym,
+ unsigned long address)
+{
+ if (sym)
+ snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
+ "%s", sym->name);
+ else
+ snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
+ "0x%lx", address);
+ return dd->scratch_buff;
+}
+
+static const char *print_call(void *private_data,
+ const struct bpf_insn *insn)
+{
+ struct dump_data *dd = private_data;
+ unsigned long address = dd->address_call_base + insn->imm;
+ struct kernel_sym *sym;
+
+ sym = kernel_syms_search(dd, address);
+ if (insn->src_reg == BPF_PSEUDO_CALL)
+ return print_call_pcrel(dd, sym, address, insn);
+ else
+ return print_call_helper(dd, sym, address);
+}
+
+static const char *print_imm(void *private_data,
+ const struct bpf_insn *insn,
+ __u64 full_imm)
+{
+ struct dump_data *dd = private_data;
+
+ if (insn->src_reg == BPF_PSEUDO_MAP_FD)
+ snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
+ "map[id:%u]", insn->imm);
+ else
+ snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
+ "0x%llx", (unsigned long long)full_imm);
+ return dd->scratch_buff;
+}
+
+void dump_xlated_json(struct dump_data *dd, void *buf, unsigned int len,
+ bool opcodes)
+{
+ const struct bpf_insn_cbs cbs = {
+ .cb_print = print_insn_json,
+ .cb_call = print_call,
+ .cb_imm = print_imm,
+ .private_data = dd,
+ };
+ struct bpf_insn *insn = buf;
+ bool double_insn = false;
+ unsigned int i;
+
+ jsonw_start_array(json_wtr);
+ for (i = 0; i < len / sizeof(*insn); i++) {
+ if (double_insn) {
+ double_insn = false;
+ continue;
+ }
+ double_insn = insn[i].code == (BPF_LD | BPF_IMM | BPF_DW);
+
+ jsonw_start_object(json_wtr);
+ jsonw_name(json_wtr, "disasm");
+ print_bpf_insn(&cbs, NULL, insn + i, true);
+
+ if (opcodes) {
+ jsonw_name(json_wtr, "opcodes");
+ jsonw_start_object(json_wtr);
+
+ jsonw_name(json_wtr, "code");
+ jsonw_printf(json_wtr, "\"0x%02hhx\"", insn[i].code);
+
+ jsonw_name(json_wtr, "src_reg");
+ jsonw_printf(json_wtr, "\"0x%hhx\"", insn[i].src_reg);
+
+ jsonw_name(json_wtr, "dst_reg");
+ jsonw_printf(json_wtr, "\"0x%hhx\"", insn[i].dst_reg);
+
+ jsonw_name(json_wtr, "off");
+ print_hex_data_json((uint8_t *)(&insn[i].off), 2);
+
+ jsonw_name(json_wtr, "imm");
+ if (double_insn && i < len - 1)
+ print_hex_data_json((uint8_t *)(&insn[i].imm),
+ 12);
+ else
+ print_hex_data_json((uint8_t *)(&insn[i].imm),
+ 4);
+ jsonw_end_object(json_wtr);
+ }
+ jsonw_end_object(json_wtr);
+ }
+ jsonw_end_array(json_wtr);
+}
+
+void dump_xlated_plain(struct dump_data *dd, void *buf, unsigned int len,
+ bool opcodes)
+{
+ const struct bpf_insn_cbs cbs = {
+ .cb_print = print_insn,
+ .cb_call = print_call,
+ .cb_imm = print_imm,
+ .private_data = dd,
+ };
+ struct bpf_insn *insn = buf;
+ bool double_insn = false;
+ unsigned int i;
+
+ for (i = 0; i < len / sizeof(*insn); i++) {
+ if (double_insn) {
+ double_insn = false;
+ continue;
+ }
+
+ double_insn = insn[i].code == (BPF_LD | BPF_IMM | BPF_DW);
+
+ printf("% 4d: ", i);
+ print_bpf_insn(&cbs, NULL, insn + i, true);
+
+ if (opcodes) {
+ printf(" ");
+ fprint_hex(stdout, insn + i, 8, " ");
+ if (double_insn && i < len - 1) {
+ printf(" ");
+ fprint_hex(stdout, insn + i + 1, 8, " ");
+ }
+ printf("\n");
+ }
+ }
+}
+
+void dump_xlated_for_graph(struct dump_data *dd, void *buf_start, void *buf_end,
+ unsigned int start_idx)
+{
+ const struct bpf_insn_cbs cbs = {
+ .cb_print = print_insn_for_graph,
+ .cb_call = print_call,
+ .cb_imm = print_imm,
+ .private_data = dd,
+ };
+ struct bpf_insn *insn_start = buf_start;
+ struct bpf_insn *insn_end = buf_end;
+ struct bpf_insn *cur = insn_start;
+
+ for (; cur <= insn_end; cur++) {
+ printf("% 4d: ", (int)(cur - insn_start + start_idx));
+ print_bpf_insn(&cbs, NULL, cur, true);
+ if (cur != insn_end)
+ printf(" | ");
+ }
+}
--- /dev/null
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/*
+ * Copyright (C) 2018 Netronome Systems, Inc.
+ *
+ * This software is dual licensed under the GNU General License Version 2,
+ * June 1991 as shown in the file COPYING in the top-level directory of this
+ * source tree or the BSD 2-Clause License provided below. You have the
+ * option to license this software under the complete terms of either license.
+ *
+ * The BSD 2-Clause License:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __BPF_TOOL_XLATED_DUMPER_H
+#define __BPF_TOOL_XLATED_DUMPER_H
+
+#define SYM_MAX_NAME 256
+
+struct kernel_sym {
+ unsigned long address;
+ char name[SYM_MAX_NAME];
+};
+
+struct dump_data {
+ unsigned long address_call_base;
+ struct kernel_sym *sym_mapping;
+ __u32 sym_count;
+ char scratch_buff[SYM_MAX_NAME];
+};
+
+void kernel_syms_load(struct dump_data *dd);
+void kernel_syms_destroy(struct dump_data *dd);
+void dump_xlated_json(struct dump_data *dd, void *buf, unsigned int len,
+ bool opcodes);
+void dump_xlated_plain(struct dump_data *dd, void *buf, unsigned int len,
+ bool opcodes);
+void dump_xlated_for_graph(struct dump_data *dd, void *buf, void *buf_end,
+ unsigned int start_index);
+
+#endif
# SPDX-License-Identifier: GPL-2.0
# Makefile for cgroup tools
-CC = $(CROSS_COMPILE)gcc
CFLAGS = -Wall -Wextra
all: cgroup_event_listener
# (this improves performance and avoids hard-to-debug behaviour);
MAKEFLAGS += -r
-CC = $(CROSS_COMPILE)gcc
-LD = $(CROSS_COMPILE)ld
CFLAGS += -O2 -Wall -g -D_GNU_SOURCE -I$(OUTPUT)include
ALL_TARGETS := lsgpio gpio-hammer gpio-event-mon
# SPDX-License-Identifier: GPL-2.0
# Makefile for Hyper-V tools
-CC = $(CROSS_COMPILE)gcc
WARNINGS = -Wall -Wextra
CFLAGS = $(WARNINGS) -g $(shell getconf LFS_CFLAGS)
# (this improves performance and avoids hard-to-debug behaviour);
MAKEFLAGS += -r
-CC = $(CROSS_COMPILE)gcc
-LD = $(CROSS_COMPILE)ld
CFLAGS += -O2 -Wall -g -D_GNU_SOURCE -I$(OUTPUT)include
ALL_TARGETS := iio_event_monitor lsiio iio_generic_buffer
I915_MOCS_CACHED,
};
+/*
+ * Different engines serve different roles, and there may be more than one
+ * engine serving each role. enum drm_i915_gem_engine_class provides a
+ * classification of the role of the engine, which may be used when requesting
+ * operations to be performed on a certain subset of engines, or for providing
+ * information about that group.
+ */
+enum drm_i915_gem_engine_class {
+ I915_ENGINE_CLASS_RENDER = 0,
+ I915_ENGINE_CLASS_COPY = 1,
+ I915_ENGINE_CLASS_VIDEO = 2,
+ I915_ENGINE_CLASS_VIDEO_ENHANCE = 3,
+
+ I915_ENGINE_CLASS_INVALID = -1
+};
+
+/**
+ * DOC: perf_events exposed by i915 through /sys/bus/event_sources/drivers/i915
+ *
+ */
+
+enum drm_i915_pmu_engine_sample {
+ I915_SAMPLE_BUSY = 0,
+ I915_SAMPLE_WAIT = 1,
+ I915_SAMPLE_SEMA = 2
+};
+
+#define I915_PMU_SAMPLE_BITS (4)
+#define I915_PMU_SAMPLE_MASK (0xf)
+#define I915_PMU_SAMPLE_INSTANCE_BITS (8)
+#define I915_PMU_CLASS_SHIFT \
+ (I915_PMU_SAMPLE_BITS + I915_PMU_SAMPLE_INSTANCE_BITS)
+
+#define __I915_PMU_ENGINE(class, instance, sample) \
+ ((class) << I915_PMU_CLASS_SHIFT | \
+ (instance) << I915_PMU_SAMPLE_BITS | \
+ (sample))
+
+#define I915_PMU_ENGINE_BUSY(class, instance) \
+ __I915_PMU_ENGINE(class, instance, I915_SAMPLE_BUSY)
+
+#define I915_PMU_ENGINE_WAIT(class, instance) \
+ __I915_PMU_ENGINE(class, instance, I915_SAMPLE_WAIT)
+
+#define I915_PMU_ENGINE_SEMA(class, instance) \
+ __I915_PMU_ENGINE(class, instance, I915_SAMPLE_SEMA)
+
+#define __I915_PMU_OTHER(x) (__I915_PMU_ENGINE(0xff, 0xff, 0xf) + 1 + (x))
+
+#define I915_PMU_ACTUAL_FREQUENCY __I915_PMU_OTHER(0)
+#define I915_PMU_REQUESTED_FREQUENCY __I915_PMU_OTHER(1)
+#define I915_PMU_INTERRUPTS __I915_PMU_OTHER(2)
+#define I915_PMU_RC6_RESIDENCY __I915_PMU_OTHER(3)
+
+#define I915_PMU_LAST I915_PMU_RC6_RESIDENCY
+
/* Each region is a minimum of 16k, and there are at most 255 of them.
*/
#define I915_NR_TEX_REGIONS 255 /* table size 2k - maximum due to use
*/
#define I915_PARAM_HAS_EXEC_FENCE_ARRAY 49
+/*
+ * Query whether every context (both per-file default and user created) is
+ * isolated (insofar as HW supports). If this parameter is not true, then
+ * freshly created contexts may inherit values from an existing context,
+ * rather than default HW values. If true, it also ensures (insofar as HW
+ * supports) that all state set by this context will not leak to any other
+ * context.
+ *
+ * As not every engine across every gen support contexts, the returned
+ * value reports the support of context isolation for individual engines by
+ * returning a bitmask of each engine class set to true if that class supports
+ * isolation.
+ */
+#define I915_PARAM_HAS_CONTEXT_ISOLATION 50
+
+/* Frequency of the command streamer timestamps given by the *_TIMESTAMP
+ * registers. This used to be fixed per platform but from CNL onwards, this
+ * might vary depending on the parts.
+ */
+#define I915_PARAM_CS_TIMESTAMP_FREQUENCY 51
+
typedef struct drm_i915_getparam {
__s32 param;
/*
IFLA_IF_NETNSID,
IFLA_CARRIER_UP_COUNT,
IFLA_CARRIER_DOWN_COUNT,
+ IFLA_NEW_IFINDEX,
__IFLA_MAX
};
/* Available with KVM_CAP_S390_CMMA_MIGRATION */
#define KVM_S390_GET_CMMA_BITS _IOWR(KVMIO, 0xb8, struct kvm_s390_cmma_log)
#define KVM_S390_SET_CMMA_BITS _IOW(KVMIO, 0xb9, struct kvm_s390_cmma_log)
+/* Memory Encryption Commands */
+#define KVM_MEMORY_ENCRYPT_OP _IOWR(KVMIO, 0xba, unsigned long)
+
+struct kvm_enc_region {
+ __u64 addr;
+ __u64 size;
+};
+
+#define KVM_MEMORY_ENCRYPT_REG_REGION _IOR(KVMIO, 0xbb, struct kvm_enc_region)
+#define KVM_MEMORY_ENCRYPT_UNREG_REGION _IOR(KVMIO, 0xbc, struct kvm_enc_region)
+
+/* Secure Encrypted Virtualization command */
+enum sev_cmd_id {
+ /* Guest initialization commands */
+ KVM_SEV_INIT = 0,
+ KVM_SEV_ES_INIT,
+ /* Guest launch commands */
+ KVM_SEV_LAUNCH_START,
+ KVM_SEV_LAUNCH_UPDATE_DATA,
+ KVM_SEV_LAUNCH_UPDATE_VMSA,
+ KVM_SEV_LAUNCH_SECRET,
+ KVM_SEV_LAUNCH_MEASURE,
+ KVM_SEV_LAUNCH_FINISH,
+ /* Guest migration commands (outgoing) */
+ KVM_SEV_SEND_START,
+ KVM_SEV_SEND_UPDATE_DATA,
+ KVM_SEV_SEND_UPDATE_VMSA,
+ KVM_SEV_SEND_FINISH,
+ /* Guest migration commands (incoming) */
+ KVM_SEV_RECEIVE_START,
+ KVM_SEV_RECEIVE_UPDATE_DATA,
+ KVM_SEV_RECEIVE_UPDATE_VMSA,
+ KVM_SEV_RECEIVE_FINISH,
+ /* Guest status and debug commands */
+ KVM_SEV_GUEST_STATUS,
+ KVM_SEV_DBG_DECRYPT,
+ KVM_SEV_DBG_ENCRYPT,
+ /* Guest certificates commands */
+ KVM_SEV_CERT_EXPORT,
+
+ KVM_SEV_NR_MAX,
+};
+
+struct kvm_sev_cmd {
+ __u32 id;
+ __u64 data;
+ __u32 error;
+ __u32 sev_fd;
+};
+
+struct kvm_sev_launch_start {
+ __u32 handle;
+ __u32 policy;
+ __u64 dh_uaddr;
+ __u32 dh_len;
+ __u64 session_uaddr;
+ __u32 session_len;
+};
+
+struct kvm_sev_launch_update_data {
+ __u64 uaddr;
+ __u32 len;
+};
+
+
+struct kvm_sev_launch_secret {
+ __u64 hdr_uaddr;
+ __u32 hdr_len;
+ __u64 guest_uaddr;
+ __u32 guest_len;
+ __u64 trans_uaddr;
+ __u32 trans_len;
+};
+
+struct kvm_sev_launch_measure {
+ __u64 uaddr;
+ __u32 len;
+};
+
+struct kvm_sev_guest_status {
+ __u32 handle;
+ __u32 policy;
+ __u32 state;
+};
+
+struct kvm_sev_dbg {
+ __u64 src_uaddr;
+ __u64 dst_uaddr;
+ __u32 len;
+};
#define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0)
#define KVM_DEV_ASSIGN_PCI_2_3 (1 << 1)
PREFIX ?= /usr
SBINDIR ?= sbin
INSTALL ?= install
-CC = $(CROSS_COMPILE)gcc
TARGET = freefall
# SPDX-License-Identifier: GPL-2.0
# Makefile for LEDs tools
-CC = $(CROSS_COMPILE)gcc
CFLAGS = -Wall -Wextra -g -I../../include/uapi
all: uledmon led_hw_brightness_mon
prog->insns = new_insn;
prog->main_prog_cnt = prog->insns_cnt;
prog->insns_cnt = new_cnt;
+ pr_debug("added %zd insn from %s to prog %s\n",
+ text->insns_cnt, text->section_name,
+ prog->section_name);
}
insn = &prog->insns[relo->insn_idx];
insn->imm += prog->main_prog_cnt - relo->insn_idx;
- pr_debug("added %zd insn from %s to prog %s\n",
- text->insns_cnt, text->section_name, prog->section_name);
return 0;
}
* This is a fairly uncommon pattern which is new for GCC 6. As of this
* writing, there are 11 occurrences of it in the allmodconfig kernel.
*
+ * As of GCC 7 there are quite a few more of these and the 'in between' code
+ * is significant. Esp. with KASAN enabled some of the code between the mov
+ * and jmpq uses .rodata itself, which can confuse things.
+ *
* TODO: Once we have DWARF CFI and smarter instruction decoding logic,
* ensure the same register is used in the mov and jump instructions.
+ *
+ * NOTE: RETPOLINE made it harder still to decode dynamic jumps.
*/
static struct rela *find_switch_table(struct objtool_file *file,
struct symbol *func,
text_rela->addend + 4);
if (!rodata_rela)
return NULL;
+
file->ignore_unreachables = true;
return rodata_rela;
}
/* case 3 */
- func_for_each_insn_continue_reverse(file, func, insn) {
+ /*
+ * Backward search using the @first_jump_src links, these help avoid
+ * much of the 'in between' code. Which avoids us getting confused by
+ * it.
+ */
+ for (insn = list_prev_entry(insn, list);
+
+ &insn->list != &file->insn_list &&
+ insn->sec == func->sec &&
+ insn->offset >= func->offset;
+
+ insn = insn->first_jump_src ?: list_prev_entry(insn, list)) {
+
if (insn->type == INSN_JUMP_DYNAMIC)
break;
return NULL;
}
+
static int add_func_switch_tables(struct objtool_file *file,
struct symbol *func)
{
- struct instruction *insn, *prev_jump = NULL;
+ struct instruction *insn, *last = NULL, *prev_jump = NULL;
struct rela *rela, *prev_rela = NULL;
int ret;
func_for_each_insn(file, func, insn) {
+ if (!last)
+ last = insn;
+
+ /*
+ * Store back-pointers for unconditional forward jumps such
+ * that find_switch_table() can back-track using those and
+ * avoid some potentially confusing code.
+ */
+ if (insn->type == INSN_JUMP_UNCONDITIONAL && insn->jump_dest &&
+ insn->offset > last->offset &&
+ insn->jump_dest->offset > insn->offset &&
+ !insn->jump_dest->first_jump_src) {
+
+ insn->jump_dest->first_jump_src = insn;
+ last = insn->jump_dest;
+ }
+
if (insn->type != INSN_JUMP_DYNAMIC)
continue;
if (is_kasan_insn(insn) || is_ubsan_insn(insn))
return true;
- if (insn->type == INSN_JUMP_UNCONDITIONAL && insn->jump_dest) {
- insn = insn->jump_dest;
- continue;
+ if (insn->type == INSN_JUMP_UNCONDITIONAL) {
+ if (insn->jump_dest &&
+ insn->jump_dest->func == insn->func) {
+ insn = insn->jump_dest;
+ continue;
+ }
+
+ break;
}
if (insn->offset + insn->len >= insn->func->offset + insn->func->len)
break;
+
insn = list_next_entry(insn, list);
}
bool alt_group, visited, dead_end, ignore, hint, save, restore, ignore_alts;
struct symbol *call_dest;
struct instruction *jump_dest;
+ struct instruction *first_jump_src;
struct list_head alts;
struct symbol *func;
struct stack_op stack_op;
-i::
Specify input perf data file path.
+-f::
+--force::
+ Don't complain, do it.
+
-v::
--verbose::
Be more verbose (show counter open errors, etc).
$(eval $(1) = $(2)))
endef
-# Allow setting CC and AR and LD, or setting CROSS_COMPILE as a prefix.
-$(call allow-override,CC,$(CROSS_COMPILE)gcc)
-$(call allow-override,AR,$(CROSS_COMPILE)ar)
-$(call allow-override,LD,$(CROSS_COMPILE)ld)
-$(call allow-override,CXX,$(CROSS_COMPILE)g++)
-
LD += $(EXTRA_LDFLAGS)
HOSTCC ?= gcc
out := $(OUTPUT)arch/s390/include/generated/asm
header := $(out)/syscalls_64.c
-sysdef := $(srctree)/tools/arch/s390/include/uapi/asm/unistd.h
-sysprf := $(srctree)/tools/perf/arch/s390/entry/syscalls/
+syskrn := $(srctree)/arch/s390/kernel/syscalls/syscall.tbl
+sysprf := $(srctree)/tools/perf/arch/s390/entry/syscalls
+sysdef := $(sysprf)/syscall.tbl
systbl := $(sysprf)/mksyscalltbl
# Create output directory if not already present
_dummy := $(shell [ -d '$(out)' ] || mkdir -p '$(out)')
$(header): $(sysdef) $(systbl)
- $(Q)$(SHELL) '$(systbl)' '$(CC)' $(sysdef) > $@
+ @(test -d ../../kernel -a -d ../../tools -a -d ../perf && ( \
+ (diff -B $(sysdef) $(syskrn) >/dev/null) \
+ || echo "Warning: Kernel ABI header at '$(sysdef)' differs from latest version at '$(syskrn)'" >&2 )) || true
+ $(Q)$(SHELL) '$(systbl)' $(sysdef) > $@
clean::
$(call QUIET_CLEAN, s390) $(RM) $(header)
#
# Generate system call table for perf
#
-#
-# Copyright IBM Corp. 2017
+# Copyright IBM Corp. 2017, 2018
# Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
#
-gcc=$1
-input=$2
+SYSCALL_TBL=$1
-if ! test -r $input; then
+if ! test -r $SYSCALL_TBL; then
echo "Could not read input file" >&2
exit 1
fi
create_table()
{
- local max_nr
+ local max_nr nr abi sc discard
echo 'static const char *syscalltbl_s390_64[] = {'
- while read sc nr; do
+ while read nr abi sc discard; do
printf '\t[%d] = "%s",\n' $nr $sc
max_nr=$nr
done
echo "#define SYSCALLTBL_S390_64_MAX_ID $max_nr"
}
-
-$gcc -m64 -E -dM -x c $input \
- |sed -ne 's/^#define __NR_//p' \
- |sort -t' ' -k2 -nu \
+grep -E "^[[:digit:]]+[[:space:]]+(common|64)" $SYSCALL_TBL \
+ |sort -k1 -n \
|create_table
--- /dev/null
+# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
+#
+# System call table for s390
+#
+# Format:
+#
+# <nr> <abi> <syscall> <entry-64bit> <compat-entry>
+#
+# where <abi> can be common, 64, or 32
+
+1 common exit sys_exit sys_exit
+2 common fork sys_fork sys_fork
+3 common read sys_read compat_sys_s390_read
+4 common write sys_write compat_sys_s390_write
+5 common open sys_open compat_sys_open
+6 common close sys_close sys_close
+7 common restart_syscall sys_restart_syscall sys_restart_syscall
+8 common creat sys_creat compat_sys_creat
+9 common link sys_link compat_sys_link
+10 common unlink sys_unlink compat_sys_unlink
+11 common execve sys_execve compat_sys_execve
+12 common chdir sys_chdir compat_sys_chdir
+13 32 time - compat_sys_time
+14 common mknod sys_mknod compat_sys_mknod
+15 common chmod sys_chmod compat_sys_chmod
+16 32 lchown - compat_sys_s390_lchown16
+19 common lseek sys_lseek compat_sys_lseek
+20 common getpid sys_getpid sys_getpid
+21 common mount sys_mount compat_sys_mount
+22 common umount sys_oldumount compat_sys_oldumount
+23 32 setuid - compat_sys_s390_setuid16
+24 32 getuid - compat_sys_s390_getuid16
+25 32 stime - compat_sys_stime
+26 common ptrace sys_ptrace compat_sys_ptrace
+27 common alarm sys_alarm sys_alarm
+29 common pause sys_pause sys_pause
+30 common utime sys_utime compat_sys_utime
+33 common access sys_access compat_sys_access
+34 common nice sys_nice sys_nice
+36 common sync sys_sync sys_sync
+37 common kill sys_kill sys_kill
+38 common rename sys_rename compat_sys_rename
+39 common mkdir sys_mkdir compat_sys_mkdir
+40 common rmdir sys_rmdir compat_sys_rmdir
+41 common dup sys_dup sys_dup
+42 common pipe sys_pipe compat_sys_pipe
+43 common times sys_times compat_sys_times
+45 common brk sys_brk compat_sys_brk
+46 32 setgid - compat_sys_s390_setgid16
+47 32 getgid - compat_sys_s390_getgid16
+48 common signal sys_signal compat_sys_signal
+49 32 geteuid - compat_sys_s390_geteuid16
+50 32 getegid - compat_sys_s390_getegid16
+51 common acct sys_acct compat_sys_acct
+52 common umount2 sys_umount compat_sys_umount
+54 common ioctl sys_ioctl compat_sys_ioctl
+55 common fcntl sys_fcntl compat_sys_fcntl
+57 common setpgid sys_setpgid sys_setpgid
+60 common umask sys_umask sys_umask
+61 common chroot sys_chroot compat_sys_chroot
+62 common ustat sys_ustat compat_sys_ustat
+63 common dup2 sys_dup2 sys_dup2
+64 common getppid sys_getppid sys_getppid
+65 common getpgrp sys_getpgrp sys_getpgrp
+66 common setsid sys_setsid sys_setsid
+67 common sigaction sys_sigaction compat_sys_sigaction
+70 32 setreuid - compat_sys_s390_setreuid16
+71 32 setregid - compat_sys_s390_setregid16
+72 common sigsuspend sys_sigsuspend compat_sys_sigsuspend
+73 common sigpending sys_sigpending compat_sys_sigpending
+74 common sethostname sys_sethostname compat_sys_sethostname
+75 common setrlimit sys_setrlimit compat_sys_setrlimit
+76 32 getrlimit - compat_sys_old_getrlimit
+77 common getrusage sys_getrusage compat_sys_getrusage
+78 common gettimeofday sys_gettimeofday compat_sys_gettimeofday
+79 common settimeofday sys_settimeofday compat_sys_settimeofday
+80 32 getgroups - compat_sys_s390_getgroups16
+81 32 setgroups - compat_sys_s390_setgroups16
+83 common symlink sys_symlink compat_sys_symlink
+85 common readlink sys_readlink compat_sys_readlink
+86 common uselib sys_uselib compat_sys_uselib
+87 common swapon sys_swapon compat_sys_swapon
+88 common reboot sys_reboot compat_sys_reboot
+89 common readdir - compat_sys_old_readdir
+90 common mmap sys_old_mmap compat_sys_s390_old_mmap
+91 common munmap sys_munmap compat_sys_munmap
+92 common truncate sys_truncate compat_sys_truncate
+93 common ftruncate sys_ftruncate compat_sys_ftruncate
+94 common fchmod sys_fchmod sys_fchmod
+95 32 fchown - compat_sys_s390_fchown16
+96 common getpriority sys_getpriority sys_getpriority
+97 common setpriority sys_setpriority sys_setpriority
+99 common statfs sys_statfs compat_sys_statfs
+100 common fstatfs sys_fstatfs compat_sys_fstatfs
+101 32 ioperm - -
+102 common socketcall sys_socketcall compat_sys_socketcall
+103 common syslog sys_syslog compat_sys_syslog
+104 common setitimer sys_setitimer compat_sys_setitimer
+105 common getitimer sys_getitimer compat_sys_getitimer
+106 common stat sys_newstat compat_sys_newstat
+107 common lstat sys_newlstat compat_sys_newlstat
+108 common fstat sys_newfstat compat_sys_newfstat
+110 common lookup_dcookie sys_lookup_dcookie compat_sys_lookup_dcookie
+111 common vhangup sys_vhangup sys_vhangup
+112 common idle - -
+114 common wait4 sys_wait4 compat_sys_wait4
+115 common swapoff sys_swapoff compat_sys_swapoff
+116 common sysinfo sys_sysinfo compat_sys_sysinfo
+117 common ipc sys_s390_ipc compat_sys_s390_ipc
+118 common fsync sys_fsync sys_fsync
+119 common sigreturn sys_sigreturn compat_sys_sigreturn
+120 common clone sys_clone compat_sys_clone
+121 common setdomainname sys_setdomainname compat_sys_setdomainname
+122 common uname sys_newuname compat_sys_newuname
+124 common adjtimex sys_adjtimex compat_sys_adjtimex
+125 common mprotect sys_mprotect compat_sys_mprotect
+126 common sigprocmask sys_sigprocmask compat_sys_sigprocmask
+127 common create_module - -
+128 common init_module sys_init_module compat_sys_init_module
+129 common delete_module sys_delete_module compat_sys_delete_module
+130 common get_kernel_syms - -
+131 common quotactl sys_quotactl compat_sys_quotactl
+132 common getpgid sys_getpgid sys_getpgid
+133 common fchdir sys_fchdir sys_fchdir
+134 common bdflush sys_bdflush compat_sys_bdflush
+135 common sysfs sys_sysfs compat_sys_sysfs
+136 common personality sys_s390_personality sys_s390_personality
+137 common afs_syscall - -
+138 32 setfsuid - compat_sys_s390_setfsuid16
+139 32 setfsgid - compat_sys_s390_setfsgid16
+140 32 _llseek - compat_sys_llseek
+141 common getdents sys_getdents compat_sys_getdents
+142 32 _newselect - compat_sys_select
+142 64 select sys_select -
+143 common flock sys_flock sys_flock
+144 common msync sys_msync compat_sys_msync
+145 common readv sys_readv compat_sys_readv
+146 common writev sys_writev compat_sys_writev
+147 common getsid sys_getsid sys_getsid
+148 common fdatasync sys_fdatasync sys_fdatasync
+149 common _sysctl sys_sysctl compat_sys_sysctl
+150 common mlock sys_mlock compat_sys_mlock
+151 common munlock sys_munlock compat_sys_munlock
+152 common mlockall sys_mlockall sys_mlockall
+153 common munlockall sys_munlockall sys_munlockall
+154 common sched_setparam sys_sched_setparam compat_sys_sched_setparam
+155 common sched_getparam sys_sched_getparam compat_sys_sched_getparam
+156 common sched_setscheduler sys_sched_setscheduler compat_sys_sched_setscheduler
+157 common sched_getscheduler sys_sched_getscheduler sys_sched_getscheduler
+158 common sched_yield sys_sched_yield sys_sched_yield
+159 common sched_get_priority_max sys_sched_get_priority_max sys_sched_get_priority_max
+160 common sched_get_priority_min sys_sched_get_priority_min sys_sched_get_priority_min
+161 common sched_rr_get_interval sys_sched_rr_get_interval compat_sys_sched_rr_get_interval
+162 common nanosleep sys_nanosleep compat_sys_nanosleep
+163 common mremap sys_mremap compat_sys_mremap
+164 32 setresuid - compat_sys_s390_setresuid16
+165 32 getresuid - compat_sys_s390_getresuid16
+167 common query_module - -
+168 common poll sys_poll compat_sys_poll
+169 common nfsservctl - -
+170 32 setresgid - compat_sys_s390_setresgid16
+171 32 getresgid - compat_sys_s390_getresgid16
+172 common prctl sys_prctl compat_sys_prctl
+173 common rt_sigreturn sys_rt_sigreturn compat_sys_rt_sigreturn
+174 common rt_sigaction sys_rt_sigaction compat_sys_rt_sigaction
+175 common rt_sigprocmask sys_rt_sigprocmask compat_sys_rt_sigprocmask
+176 common rt_sigpending sys_rt_sigpending compat_sys_rt_sigpending
+177 common rt_sigtimedwait sys_rt_sigtimedwait compat_sys_rt_sigtimedwait
+178 common rt_sigqueueinfo sys_rt_sigqueueinfo compat_sys_rt_sigqueueinfo
+179 common rt_sigsuspend sys_rt_sigsuspend compat_sys_rt_sigsuspend
+180 common pread64 sys_pread64 compat_sys_s390_pread64
+181 common pwrite64 sys_pwrite64 compat_sys_s390_pwrite64
+182 32 chown - compat_sys_s390_chown16
+183 common getcwd sys_getcwd compat_sys_getcwd
+184 common capget sys_capget compat_sys_capget
+185 common capset sys_capset compat_sys_capset
+186 common sigaltstack sys_sigaltstack compat_sys_sigaltstack
+187 common sendfile sys_sendfile64 compat_sys_sendfile
+188 common getpmsg - -
+189 common putpmsg - -
+190 common vfork sys_vfork sys_vfork
+191 32 ugetrlimit - compat_sys_getrlimit
+191 64 getrlimit sys_getrlimit -
+192 32 mmap2 - compat_sys_s390_mmap2
+193 32 truncate64 - compat_sys_s390_truncate64
+194 32 ftruncate64 - compat_sys_s390_ftruncate64
+195 32 stat64 - compat_sys_s390_stat64
+196 32 lstat64 - compat_sys_s390_lstat64
+197 32 fstat64 - compat_sys_s390_fstat64
+198 32 lchown32 - compat_sys_lchown
+198 64 lchown sys_lchown -
+199 32 getuid32 - sys_getuid
+199 64 getuid sys_getuid -
+200 32 getgid32 - sys_getgid
+200 64 getgid sys_getgid -
+201 32 geteuid32 - sys_geteuid
+201 64 geteuid sys_geteuid -
+202 32 getegid32 - sys_getegid
+202 64 getegid sys_getegid -
+203 32 setreuid32 - sys_setreuid
+203 64 setreuid sys_setreuid -
+204 32 setregid32 - sys_setregid
+204 64 setregid sys_setregid -
+205 32 getgroups32 - compat_sys_getgroups
+205 64 getgroups sys_getgroups -
+206 32 setgroups32 - compat_sys_setgroups
+206 64 setgroups sys_setgroups -
+207 32 fchown32 - sys_fchown
+207 64 fchown sys_fchown -
+208 32 setresuid32 - sys_setresuid
+208 64 setresuid sys_setresuid -
+209 32 getresuid32 - compat_sys_getresuid
+209 64 getresuid sys_getresuid -
+210 32 setresgid32 - sys_setresgid
+210 64 setresgid sys_setresgid -
+211 32 getresgid32 - compat_sys_getresgid
+211 64 getresgid sys_getresgid -
+212 32 chown32 - compat_sys_chown
+212 64 chown sys_chown -
+213 32 setuid32 - sys_setuid
+213 64 setuid sys_setuid -
+214 32 setgid32 - sys_setgid
+214 64 setgid sys_setgid -
+215 32 setfsuid32 - sys_setfsuid
+215 64 setfsuid sys_setfsuid -
+216 32 setfsgid32 - sys_setfsgid
+216 64 setfsgid sys_setfsgid -
+217 common pivot_root sys_pivot_root compat_sys_pivot_root
+218 common mincore sys_mincore compat_sys_mincore
+219 common madvise sys_madvise compat_sys_madvise
+220 common getdents64 sys_getdents64 compat_sys_getdents64
+221 32 fcntl64 - compat_sys_fcntl64
+222 common readahead sys_readahead compat_sys_s390_readahead
+223 32 sendfile64 - compat_sys_sendfile64
+224 common setxattr sys_setxattr compat_sys_setxattr
+225 common lsetxattr sys_lsetxattr compat_sys_lsetxattr
+226 common fsetxattr sys_fsetxattr compat_sys_fsetxattr
+227 common getxattr sys_getxattr compat_sys_getxattr
+228 common lgetxattr sys_lgetxattr compat_sys_lgetxattr
+229 common fgetxattr sys_fgetxattr compat_sys_fgetxattr
+230 common listxattr sys_listxattr compat_sys_listxattr
+231 common llistxattr sys_llistxattr compat_sys_llistxattr
+232 common flistxattr sys_flistxattr compat_sys_flistxattr
+233 common removexattr sys_removexattr compat_sys_removexattr
+234 common lremovexattr sys_lremovexattr compat_sys_lremovexattr
+235 common fremovexattr sys_fremovexattr compat_sys_fremovexattr
+236 common gettid sys_gettid sys_gettid
+237 common tkill sys_tkill sys_tkill
+238 common futex sys_futex compat_sys_futex
+239 common sched_setaffinity sys_sched_setaffinity compat_sys_sched_setaffinity
+240 common sched_getaffinity sys_sched_getaffinity compat_sys_sched_getaffinity
+241 common tgkill sys_tgkill sys_tgkill
+243 common io_setup sys_io_setup compat_sys_io_setup
+244 common io_destroy sys_io_destroy compat_sys_io_destroy
+245 common io_getevents sys_io_getevents compat_sys_io_getevents
+246 common io_submit sys_io_submit compat_sys_io_submit
+247 common io_cancel sys_io_cancel compat_sys_io_cancel
+248 common exit_group sys_exit_group sys_exit_group
+249 common epoll_create sys_epoll_create sys_epoll_create
+250 common epoll_ctl sys_epoll_ctl compat_sys_epoll_ctl
+251 common epoll_wait sys_epoll_wait compat_sys_epoll_wait
+252 common set_tid_address sys_set_tid_address compat_sys_set_tid_address
+253 common fadvise64 sys_fadvise64_64 compat_sys_s390_fadvise64
+254 common timer_create sys_timer_create compat_sys_timer_create
+255 common timer_settime sys_timer_settime compat_sys_timer_settime
+256 common timer_gettime sys_timer_gettime compat_sys_timer_gettime
+257 common timer_getoverrun sys_timer_getoverrun sys_timer_getoverrun
+258 common timer_delete sys_timer_delete sys_timer_delete
+259 common clock_settime sys_clock_settime compat_sys_clock_settime
+260 common clock_gettime sys_clock_gettime compat_sys_clock_gettime
+261 common clock_getres sys_clock_getres compat_sys_clock_getres
+262 common clock_nanosleep sys_clock_nanosleep compat_sys_clock_nanosleep
+264 32 fadvise64_64 - compat_sys_s390_fadvise64_64
+265 common statfs64 sys_statfs64 compat_sys_statfs64
+266 common fstatfs64 sys_fstatfs64 compat_sys_fstatfs64
+267 common remap_file_pages sys_remap_file_pages compat_sys_remap_file_pages
+268 common mbind sys_mbind compat_sys_mbind
+269 common get_mempolicy sys_get_mempolicy compat_sys_get_mempolicy
+270 common set_mempolicy sys_set_mempolicy compat_sys_set_mempolicy
+271 common mq_open sys_mq_open compat_sys_mq_open
+272 common mq_unlink sys_mq_unlink compat_sys_mq_unlink
+273 common mq_timedsend sys_mq_timedsend compat_sys_mq_timedsend
+274 common mq_timedreceive sys_mq_timedreceive compat_sys_mq_timedreceive
+275 common mq_notify sys_mq_notify compat_sys_mq_notify
+276 common mq_getsetattr sys_mq_getsetattr compat_sys_mq_getsetattr
+277 common kexec_load sys_kexec_load compat_sys_kexec_load
+278 common add_key sys_add_key compat_sys_add_key
+279 common request_key sys_request_key compat_sys_request_key
+280 common keyctl sys_keyctl compat_sys_keyctl
+281 common waitid sys_waitid compat_sys_waitid
+282 common ioprio_set sys_ioprio_set sys_ioprio_set
+283 common ioprio_get sys_ioprio_get sys_ioprio_get
+284 common inotify_init sys_inotify_init sys_inotify_init
+285 common inotify_add_watch sys_inotify_add_watch compat_sys_inotify_add_watch
+286 common inotify_rm_watch sys_inotify_rm_watch sys_inotify_rm_watch
+287 common migrate_pages sys_migrate_pages compat_sys_migrate_pages
+288 common openat sys_openat compat_sys_openat
+289 common mkdirat sys_mkdirat compat_sys_mkdirat
+290 common mknodat sys_mknodat compat_sys_mknodat
+291 common fchownat sys_fchownat compat_sys_fchownat
+292 common futimesat sys_futimesat compat_sys_futimesat
+293 32 fstatat64 - compat_sys_s390_fstatat64
+293 64 newfstatat sys_newfstatat -
+294 common unlinkat sys_unlinkat compat_sys_unlinkat
+295 common renameat sys_renameat compat_sys_renameat
+296 common linkat sys_linkat compat_sys_linkat
+297 common symlinkat sys_symlinkat compat_sys_symlinkat
+298 common readlinkat sys_readlinkat compat_sys_readlinkat
+299 common fchmodat sys_fchmodat compat_sys_fchmodat
+300 common faccessat sys_faccessat compat_sys_faccessat
+301 common pselect6 sys_pselect6 compat_sys_pselect6
+302 common ppoll sys_ppoll compat_sys_ppoll
+303 common unshare sys_unshare compat_sys_unshare
+304 common set_robust_list sys_set_robust_list compat_sys_set_robust_list
+305 common get_robust_list sys_get_robust_list compat_sys_get_robust_list
+306 common splice sys_splice compat_sys_splice
+307 common sync_file_range sys_sync_file_range compat_sys_s390_sync_file_range
+308 common tee sys_tee compat_sys_tee
+309 common vmsplice sys_vmsplice compat_sys_vmsplice
+310 common move_pages sys_move_pages compat_sys_move_pages
+311 common getcpu sys_getcpu compat_sys_getcpu
+312 common epoll_pwait sys_epoll_pwait compat_sys_epoll_pwait
+313 common utimes sys_utimes compat_sys_utimes
+314 common fallocate sys_fallocate compat_sys_s390_fallocate
+315 common utimensat sys_utimensat compat_sys_utimensat
+316 common signalfd sys_signalfd compat_sys_signalfd
+317 common timerfd - -
+318 common eventfd sys_eventfd sys_eventfd
+319 common timerfd_create sys_timerfd_create sys_timerfd_create
+320 common timerfd_settime sys_timerfd_settime compat_sys_timerfd_settime
+321 common timerfd_gettime sys_timerfd_gettime compat_sys_timerfd_gettime
+322 common signalfd4 sys_signalfd4 compat_sys_signalfd4
+323 common eventfd2 sys_eventfd2 sys_eventfd2
+324 common inotify_init1 sys_inotify_init1 sys_inotify_init1
+325 common pipe2 sys_pipe2 compat_sys_pipe2
+326 common dup3 sys_dup3 sys_dup3
+327 common epoll_create1 sys_epoll_create1 sys_epoll_create1
+328 common preadv sys_preadv compat_sys_preadv
+329 common pwritev sys_pwritev compat_sys_pwritev
+330 common rt_tgsigqueueinfo sys_rt_tgsigqueueinfo compat_sys_rt_tgsigqueueinfo
+331 common perf_event_open sys_perf_event_open compat_sys_perf_event_open
+332 common fanotify_init sys_fanotify_init sys_fanotify_init
+333 common fanotify_mark sys_fanotify_mark compat_sys_fanotify_mark
+334 common prlimit64 sys_prlimit64 compat_sys_prlimit64
+335 common name_to_handle_at sys_name_to_handle_at compat_sys_name_to_handle_at
+336 common open_by_handle_at sys_open_by_handle_at compat_sys_open_by_handle_at
+337 common clock_adjtime sys_clock_adjtime compat_sys_clock_adjtime
+338 common syncfs sys_syncfs sys_syncfs
+339 common setns sys_setns sys_setns
+340 common process_vm_readv sys_process_vm_readv compat_sys_process_vm_readv
+341 common process_vm_writev sys_process_vm_writev compat_sys_process_vm_writev
+342 common s390_runtime_instr sys_s390_runtime_instr sys_s390_runtime_instr
+343 common kcmp sys_kcmp compat_sys_kcmp
+344 common finit_module sys_finit_module compat_sys_finit_module
+345 common sched_setattr sys_sched_setattr compat_sys_sched_setattr
+346 common sched_getattr sys_sched_getattr compat_sys_sched_getattr
+347 common renameat2 sys_renameat2 compat_sys_renameat2
+348 common seccomp sys_seccomp compat_sys_seccomp
+349 common getrandom sys_getrandom compat_sys_getrandom
+350 common memfd_create sys_memfd_create compat_sys_memfd_create
+351 common bpf sys_bpf compat_sys_bpf
+352 common s390_pci_mmio_write sys_s390_pci_mmio_write compat_sys_s390_pci_mmio_write
+353 common s390_pci_mmio_read sys_s390_pci_mmio_read compat_sys_s390_pci_mmio_read
+354 common execveat sys_execveat compat_sys_execveat
+355 common userfaultfd sys_userfaultfd sys_userfaultfd
+356 common membarrier sys_membarrier sys_membarrier
+357 common recvmmsg sys_recvmmsg compat_sys_recvmmsg
+358 common sendmmsg sys_sendmmsg compat_sys_sendmmsg
+359 common socket sys_socket sys_socket
+360 common socketpair sys_socketpair compat_sys_socketpair
+361 common bind sys_bind compat_sys_bind
+362 common connect sys_connect compat_sys_connect
+363 common listen sys_listen sys_listen
+364 common accept4 sys_accept4 compat_sys_accept4
+365 common getsockopt sys_getsockopt compat_sys_getsockopt
+366 common setsockopt sys_setsockopt compat_sys_setsockopt
+367 common getsockname sys_getsockname compat_sys_getsockname
+368 common getpeername sys_getpeername compat_sys_getpeername
+369 common sendto sys_sendto compat_sys_sendto
+370 common sendmsg sys_sendmsg compat_sys_sendmsg
+371 common recvfrom sys_recvfrom compat_sys_recvfrom
+372 common recvmsg sys_recvmsg compat_sys_recvmsg
+373 common shutdown sys_shutdown sys_shutdown
+374 common mlock2 sys_mlock2 compat_sys_mlock2
+375 common copy_file_range sys_copy_file_range compat_sys_copy_file_range
+376 common preadv2 sys_preadv2 compat_sys_preadv2
+377 common pwritev2 sys_pwritev2 compat_sys_pwritev2
+378 common s390_guarded_storage sys_s390_guarded_storage compat_sys_s390_guarded_storage
+379 common statx sys_statx compat_sys_statx
+380 common s390_sthyi sys_s390_sthyi compat_sys_s390_sthyi
c2c_browser__update_nr_entries(browser);
while (1) {
- key = hist_browser__run(browser, "? - help");
+ key = hist_browser__run(browser, "? - help", true);
switch (key) {
case 's':
c2c_browser__update_nr_entries(browser);
while (1) {
- key = hist_browser__run(browser, "? - help");
+ key = hist_browser__run(browser, "? - help", true);
switch (key) {
case 'q':
case 1:
ret = perf_evlist__tui_browse_hists(evlist, help, NULL,
rep->min_percent,
- &session->header.env);
+ &session->header.env,
+ true);
/*
* Usually "ret" is the last pressed key, and we only
* care if the key notifies us to switch data file.
printf("%-*.*s\n", win_width, win_width, graph_dotted_line);
- if (hists->stats.nr_lost_warned !=
- hists->stats.nr_events[PERF_RECORD_LOST]) {
+ if (!top->record_opts.overwrite &&
+ (hists->stats.nr_lost_warned !=
+ hists->stats.nr_events[PERF_RECORD_LOST])) {
hists->stats.nr_lost_warned =
hists->stats.nr_events[PERF_RECORD_LOST];
color_fprintf(stdout, PERF_COLOR_RED,
perf_evlist__tui_browse_hists(top->evlist, help, &hbt,
top->min_percent,
- &top->session->header.env);
+ &top->session->header.env,
+ !top->record_opts.overwrite);
done = 1;
return NULL;
static void perf_top__mmap_read_idx(struct perf_top *top, int idx)
{
+ struct record_opts *opts = &top->record_opts;
+ struct perf_evlist *evlist = top->evlist;
struct perf_sample sample;
struct perf_evsel *evsel;
+ struct perf_mmap *md;
struct perf_session *session = top->session;
union perf_event *event;
struct machine *machine;
+ u64 end, start;
int ret;
- while ((event = perf_evlist__mmap_read(top->evlist, idx)) != NULL) {
- ret = perf_evlist__parse_sample(top->evlist, event, &sample);
+ md = opts->overwrite ? &evlist->overwrite_mmap[idx] : &evlist->mmap[idx];
+ if (perf_mmap__read_init(md, opts->overwrite, &start, &end) < 0)
+ return;
+
+ while ((event = perf_mmap__read_event(md, opts->overwrite, &start, end)) != NULL) {
+ ret = perf_evlist__parse_sample(evlist, event, &sample);
if (ret) {
pr_err("Can't parse sample, err = %d\n", ret);
goto next_event;
} else
++session->evlist->stats.nr_unknown_events;
next_event:
- perf_evlist__mmap_consume(top->evlist, idx);
+ perf_mmap__consume(md, opts->overwrite);
}
+
+ perf_mmap__read_done(md);
}
static void perf_top__mmap_read(struct perf_top *top)
{
+ bool overwrite = top->record_opts.overwrite;
+ struct perf_evlist *evlist = top->evlist;
+ unsigned long long start, end;
int i;
+ start = rdclock();
+ if (overwrite)
+ perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_DATA_PENDING);
+
for (i = 0; i < top->evlist->nr_mmaps; i++)
perf_top__mmap_read_idx(top, i);
+
+ if (overwrite) {
+ perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY);
+ perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_RUNNING);
+ }
+ end = rdclock();
+
+ if ((end - start) > (unsigned long long)top->delay_secs * NSEC_PER_SEC)
+ ui__warning("Too slow to read ring buffer.\n"
+ "Please try increasing the period (-c) or\n"
+ "decreasing the freq (-F) or\n"
+ "limiting the number of CPUs (-C)\n");
+}
+
+/*
+ * Check per-event overwrite term.
+ * perf top should support consistent term for all events.
+ * - All events don't have per-event term
+ * E.g. "cpu/cpu-cycles/,cpu/instructions/"
+ * Nothing change, return 0.
+ * - All events have same per-event term
+ * E.g. "cpu/cpu-cycles,no-overwrite/,cpu/instructions,no-overwrite/
+ * Using the per-event setting to replace the opts->overwrite if
+ * they are different, then return 0.
+ * - Events have different per-event term
+ * E.g. "cpu/cpu-cycles,overwrite/,cpu/instructions,no-overwrite/"
+ * Return -1
+ * - Some of the event set per-event term, but some not.
+ * E.g. "cpu/cpu-cycles/,cpu/instructions,no-overwrite/"
+ * Return -1
+ */
+static int perf_top__overwrite_check(struct perf_top *top)
+{
+ struct record_opts *opts = &top->record_opts;
+ struct perf_evlist *evlist = top->evlist;
+ struct perf_evsel_config_term *term;
+ struct list_head *config_terms;
+ struct perf_evsel *evsel;
+ int set, overwrite = -1;
+
+ evlist__for_each_entry(evlist, evsel) {
+ set = -1;
+ config_terms = &evsel->config_terms;
+ list_for_each_entry(term, config_terms, list) {
+ if (term->type == PERF_EVSEL__CONFIG_TERM_OVERWRITE)
+ set = term->val.overwrite ? 1 : 0;
+ }
+
+ /* no term for current and previous event (likely) */
+ if ((overwrite < 0) && (set < 0))
+ continue;
+
+ /* has term for both current and previous event, compare */
+ if ((overwrite >= 0) && (set >= 0) && (overwrite != set))
+ return -1;
+
+ /* no term for current event but has term for previous one */
+ if ((overwrite >= 0) && (set < 0))
+ return -1;
+
+ /* has term for current event */
+ if ((overwrite < 0) && (set >= 0)) {
+ /* if it's first event, set overwrite */
+ if (evsel == perf_evlist__first(evlist))
+ overwrite = set;
+ else
+ return -1;
+ }
+ }
+
+ if ((overwrite >= 0) && (opts->overwrite != overwrite))
+ opts->overwrite = overwrite;
+
+ return 0;
+}
+
+static int perf_top_overwrite_fallback(struct perf_top *top,
+ struct perf_evsel *evsel)
+{
+ struct record_opts *opts = &top->record_opts;
+ struct perf_evlist *evlist = top->evlist;
+ struct perf_evsel *counter;
+
+ if (!opts->overwrite)
+ return 0;
+
+ /* only fall back when first event fails */
+ if (evsel != perf_evlist__first(evlist))
+ return 0;
+
+ evlist__for_each_entry(evlist, counter)
+ counter->attr.write_backward = false;
+ opts->overwrite = false;
+ ui__warning("fall back to non-overwrite mode\n");
+ return 1;
}
static int perf_top__start_counters(struct perf_top *top)
struct perf_evlist *evlist = top->evlist;
struct record_opts *opts = &top->record_opts;
+ if (perf_top__overwrite_check(top)) {
+ ui__error("perf top only support consistent per-event "
+ "overwrite setting for all events\n");
+ goto out_err;
+ }
+
perf_evlist__config(evlist, opts, &callchain_param);
evlist__for_each_entry(evlist, counter) {
try_again:
if (perf_evsel__open(counter, top->evlist->cpus,
top->evlist->threads) < 0) {
+
+ /*
+ * Specially handle overwrite fall back.
+ * Because perf top is the only tool which has
+ * overwrite mode by default, support
+ * both overwrite and non-overwrite mode, and
+ * require consistent mode for all events.
+ *
+ * May move it to generic code with more tools
+ * have similar attribute.
+ */
+ if (perf_missing_features.write_backward &&
+ perf_top_overwrite_fallback(top, counter))
+ goto try_again;
+
if (perf_evsel__fallback(counter, errno, msg, sizeof(msg))) {
if (verbose > 0)
ui__warning("%s\n", msg);
perf_top__mmap_read(top);
- if (hits == top->samples)
+ if (opts->overwrite || (hits == top->samples))
ret = perf_evlist__poll(top->evlist, 100);
if (resize) {
.uses_mmap = true,
},
.proc_map_timeout = 500,
+ .overwrite = 1,
},
.max_stack = sysctl_perf_event_max_stack,
.sym_pcnt_filter = 5,
arch/s390/include/uapi/asm/kvm_perf.h
arch/s390/include/uapi/asm/ptrace.h
arch/s390/include/uapi/asm/sie.h
-arch/s390/include/uapi/asm/unistd.h
arch/arm/include/uapi/asm/kvm.h
arch/arm64/include/uapi/asm/kvm.h
arch/alpha/include/uapi/asm/errno.h
--- /dev/null
+[
+ {,
+ "EventCode": "0x7A",
+ "EventName": "BR_INDIRECT_SPEC",
+ "BriefDescription": "Branch speculatively executed - Indirect branch"
+ },
+ {,
+ "EventCode": "0xC9",
+ "EventName": "BR_COND",
+ "BriefDescription": "Conditional branch executed"
+ },
+ {,
+ "EventCode": "0xCA",
+ "EventName": "BR_INDIRECT_MISPRED",
+ "BriefDescription": "Indirect branch mispredicted"
+ },
+ {,
+ "EventCode": "0xCB",
+ "EventName": "BR_INDIRECT_MISPRED_ADDR",
+ "BriefDescription": "Indirect branch mispredicted because of address miscompare"
+ },
+ {,
+ "EventCode": "0xCC",
+ "EventName": "BR_COND_MISPRED",
+ "BriefDescription": "Conditional branch mispredicted"
+ }
+]
--- /dev/null
+[
+ {,
+ "EventCode": "0x60",
+ "EventName": "BUS_ACCESS_LD",
+ "BriefDescription": "Bus access - Read"
+ },
+ {,
+ "EventCode": "0x61",
+ "EventName": "BUS_ACCESS_ST",
+ "BriefDescription": "Bus access - Write"
+ },
+ {,
+ "EventCode": "0xC0",
+ "EventName": "EXT_MEM_REQ",
+ "BriefDescription": "External memory request"
+ },
+ {,
+ "EventCode": "0xC1",
+ "EventName": "EXT_MEM_REQ_NC",
+ "BriefDescription": "Non-cacheable external memory request"
+ }
+]
--- /dev/null
+[
+ {,
+ "EventCode": "0xC2",
+ "EventName": "PREFETCH_LINEFILL",
+ "BriefDescription": "Linefill because of prefetch"
+ },
+ {,
+ "EventCode": "0xC3",
+ "EventName": "PREFETCH_LINEFILL_DROP",
+ "BriefDescription": "Instruction Cache Throttle occurred"
+ },
+ {,
+ "EventCode": "0xC4",
+ "EventName": "READ_ALLOC_ENTER",
+ "BriefDescription": "Entering read allocate mode"
+ },
+ {,
+ "EventCode": "0xC5",
+ "EventName": "READ_ALLOC",
+ "BriefDescription": "Read allocate mode"
+ },
+ {,
+ "EventCode": "0xC8",
+ "EventName": "EXT_SNOOP",
+ "BriefDescription": "SCU Snooped data from another CPU for this CPU"
+ }
+]
--- /dev/null
+[
+ {,
+ "EventCode": "0x60",
+ "EventName": "BUS_ACCESS_LD",
+ "BriefDescription": "Bus access - Read"
+ },
+ {,
+ "EventCode": "0x61",
+ "EventName": "BUS_ACCESS_ST",
+ "BriefDescription": "Bus access - Write"
+ },
+ {,
+ "EventCode": "0xC0",
+ "EventName": "EXT_MEM_REQ",
+ "BriefDescription": "External memory request"
+ },
+ {,
+ "EventCode": "0xC1",
+ "EventName": "EXT_MEM_REQ_NC",
+ "BriefDescription": "Non-cacheable external memory request"
+ }
+]
--- /dev/null
+[
+ {,
+ "EventCode": "0x86",
+ "EventName": "EXC_IRQ",
+ "BriefDescription": "Exception taken, IRQ"
+ },
+ {,
+ "EventCode": "0x87",
+ "EventName": "EXC_FIQ",
+ "BriefDescription": "Exception taken, FIQ"
+ },
+ {,
+ "EventCode": "0xC6",
+ "EventName": "PRE_DECODE_ERR",
+ "BriefDescription": "Pre-decode error"
+ },
+ {,
+ "EventCode": "0xD0",
+ "EventName": "L1I_CACHE_ERR",
+ "BriefDescription": "L1 Instruction Cache (data or tag) memory error"
+ },
+ {,
+ "EventCode": "0xD1",
+ "EventName": "L1D_CACHE_ERR",
+ "BriefDescription": "L1 Data Cache (data, tag or dirty) memory error, correctable or non-correctable"
+ },
+ {,
+ "EventCode": "0xD2",
+ "EventName": "TLB_ERR",
+ "BriefDescription": "TLB memory error"
+ }
+]
--- /dev/null
+[
+ {,
+ "EventCode": "0xC7",
+ "EventName": "STALL_SB_FULL",
+ "BriefDescription": "Data Write operation that stalls the pipeline because the store buffer is full"
+ },
+ {,
+ "EventCode": "0xE0",
+ "EventName": "OTHER_IQ_DEP_STALL",
+ "BriefDescription": "Cycles that the DPU IQ is empty and that is not because of a recent micro-TLB miss, instruction cache miss or pre-decode error"
+ },
+ {,
+ "EventCode": "0xE1",
+ "EventName": "IC_DEP_STALL",
+ "BriefDescription": "Cycles the DPU IQ is empty and there is an instruction cache miss being processed"
+ },
+ {,
+ "EventCode": "0xE2",
+ "EventName": "IUTLB_DEP_STALL",
+ "BriefDescription": "Cycles the DPU IQ is empty and there is an instruction micro-TLB miss being processed"
+ },
+ {,
+ "EventCode": "0xE3",
+ "EventName": "DECODE_DEP_STALL",
+ "BriefDescription": "Cycles the DPU IQ is empty and there is a pre-decode error being processed"
+ },
+ {,
+ "EventCode": "0xE4",
+ "EventName": "OTHER_INTERLOCK_STALL",
+ "BriefDescription": "Cycles there is an interlock other than Advanced SIMD/Floating-point instructions or load/store instruction"
+ },
+ {,
+ "EventCode": "0xE5",
+ "EventName": "AGU_DEP_STALL",
+ "BriefDescription": "Cycles there is an interlock for a load/store instruction waiting for data to calculate the address in the AGU"
+ },
+ {,
+ "EventCode": "0xE6",
+ "EventName": "SIMD_DEP_STALL",
+ "BriefDescription": "Cycles there is an interlock for an Advanced SIMD/Floating-point operation."
+ },
+ {,
+ "EventCode": "0xE7",
+ "EventName": "LD_DEP_STALL",
+ "BriefDescription": "Cycles there is a stall in the Wr stage because of a load miss"
+ },
+ {,
+ "EventCode": "0xE8",
+ "EventName": "ST_DEP_STALL",
+ "BriefDescription": "Cycles there is a stall in the Wr stage because of a store"
+ }
+]
#
#Family-model,Version,Filename,EventType
0x00000000420f5160,v1,cavium,core
+0x00000000410fd03[[:xdigit:]],v1,cortex-a53,core
int i;
for (i = 0; i < evlist->nr_mmaps; i++) {
+ struct perf_mmap *map = &evlist->overwrite_mmap[i];
union perf_event *event;
+ u64 start, end;
- perf_mmap__read_catchup(&evlist->overwrite_mmap[i]);
- while ((event = perf_mmap__read_backward(&evlist->overwrite_mmap[i])) != NULL) {
+ perf_mmap__read_init(map, true, &start, &end);
+ while ((event = perf_mmap__read_event(map, true, &start, end)) != NULL) {
const u32 type = event->header.type;
switch (type) {
return TEST_FAIL;
}
}
+ perf_mmap__read_done(map);
}
return TEST_OK;
}
expected[4]="rtt min.*"
expected[5]="[0-9]+\.[0-9]+[[:space:]]+probe_libc:inet_pton:\([[:xdigit:]]+\)"
expected[6]=".*inet_pton[[:space:]]\($libc\)$"
- expected[7]="getaddrinfo[[:space:]]\($libc\)$"
- expected[8]=".*\(.*/bin/ping.*\)$"
-
- perf trace --no-syscalls -e probe_libc:inet_pton/max-stack=3/ ping -6 -c 1 ::1 2>&1 | grep -v ^$ | while read line ; do
+ case "$(uname -m)" in
+ s390x)
+ eventattr='call-graph=dwarf'
+ expected[7]="gaih_inet[[:space:]]\(inlined\)$"
+ expected[8]="__GI_getaddrinfo[[:space:]]\(inlined\)$"
+ expected[9]="main[[:space:]]\(.*/bin/ping.*\)$"
+ expected[10]="__libc_start_main[[:space:]]\($libc\)$"
+ expected[11]="_start[[:space:]]\(.*/bin/ping.*\)$"
+ ;;
+ *)
+ eventattr='max-stack=3'
+ expected[7]="getaddrinfo[[:space:]]\($libc\)$"
+ expected[8]=".*\(.*/bin/ping.*\)$"
+ ;;
+ esac
+
+ perf trace --no-syscalls -e probe_libc:inet_pton/$eventattr/ ping -6 -c 1 ::1 2>&1 | grep -v ^$ | while read line ; do
echo $line
echo "$line" | egrep -q "${expected[$idx]}"
if [ $? -ne 0 ] ; then
exit 1
fi
let idx+=1
- [ $idx -eq 9 ] && break
+ [ -z "${expected[$idx]}" ] && break
done
}
return browser->title ? browser->title(browser, bf, size) : 0;
}
-int hist_browser__run(struct hist_browser *browser, const char *help)
+int hist_browser__run(struct hist_browser *browser, const char *help,
+ bool warn_lost_event)
{
int key;
char title[160];
nr_entries = hist_browser__nr_entries(browser);
ui_browser__update_nr_entries(&browser->b, nr_entries);
- if (browser->hists->stats.nr_lost_warned !=
- browser->hists->stats.nr_events[PERF_RECORD_LOST]) {
+ if (warn_lost_event &&
+ (browser->hists->stats.nr_lost_warned !=
+ browser->hists->stats.nr_events[PERF_RECORD_LOST])) {
browser->hists->stats.nr_lost_warned =
browser->hists->stats.nr_events[PERF_RECORD_LOST];
ui_browser__warn_lost_events(&browser->b);
bool left_exits,
struct hist_browser_timer *hbt,
float min_pcnt,
- struct perf_env *env)
+ struct perf_env *env,
+ bool warn_lost_event)
{
struct hists *hists = evsel__hists(evsel);
struct hist_browser *browser = perf_evsel_browser__new(evsel, hbt, env);
nr_options = 0;
- key = hist_browser__run(browser, helpline);
+ key = hist_browser__run(browser, helpline,
+ warn_lost_event);
if (browser->he_selection != NULL) {
thread = hist_browser__selected_thread(browser);
static int perf_evsel_menu__run(struct perf_evsel_menu *menu,
int nr_events, const char *help,
- struct hist_browser_timer *hbt)
+ struct hist_browser_timer *hbt,
+ bool warn_lost_event)
{
struct perf_evlist *evlist = menu->b.priv;
struct perf_evsel *pos;
case K_TIMER:
hbt->timer(hbt->arg);
- if (!menu->lost_events_warned && menu->lost_events) {
+ if (!menu->lost_events_warned &&
+ menu->lost_events &&
+ warn_lost_event) {
ui_browser__warn_lost_events(&menu->b);
menu->lost_events_warned = true;
}
key = perf_evsel__hists_browse(pos, nr_events, help,
true, hbt,
menu->min_pcnt,
- menu->env);
+ menu->env,
+ warn_lost_event);
ui_browser__show_title(&menu->b, title);
switch (key) {
case K_TAB:
int nr_entries, const char *help,
struct hist_browser_timer *hbt,
float min_pcnt,
- struct perf_env *env)
+ struct perf_env *env,
+ bool warn_lost_event)
{
struct perf_evsel *pos;
struct perf_evsel_menu menu = {
menu.b.width = line_len;
}
- return perf_evsel_menu__run(&menu, nr_entries, help, hbt);
+ return perf_evsel_menu__run(&menu, nr_entries, help,
+ hbt, warn_lost_event);
}
int perf_evlist__tui_browse_hists(struct perf_evlist *evlist, const char *help,
struct hist_browser_timer *hbt,
float min_pcnt,
- struct perf_env *env)
+ struct perf_env *env,
+ bool warn_lost_event)
{
int nr_entries = evlist->nr_entries;
return perf_evsel__hists_browse(first, nr_entries, help,
false, hbt, min_pcnt,
- env);
+ env, warn_lost_event);
}
if (symbol_conf.event_group) {
}
return __perf_evlist__tui_browse_hists(evlist, nr_entries, help,
- hbt, min_pcnt, env);
+ hbt, min_pcnt, env,
+ warn_lost_event);
}
struct hist_browser *hist_browser__new(struct hists *hists);
void hist_browser__delete(struct hist_browser *browser);
-int hist_browser__run(struct hist_browser *browser, const char *help);
+int hist_browser__run(struct hist_browser *browser, const char *help,
+ bool warn_lost_event);
void hist_browser__init(struct hist_browser *browser,
struct hists *hists);
#endif /* _PERF_UI_BROWSER_HISTS_H_ */
return perf_mmap__read_forward(md);
}
-union perf_event *perf_evlist__mmap_read_backward(struct perf_evlist *evlist, int idx)
-{
- struct perf_mmap *md = &evlist->mmap[idx];
-
- /*
- * No need to check messup for backward ring buffer:
- * We can always read arbitrary long data from a backward
- * ring buffer unless we forget to pause it before reading.
- */
- return perf_mmap__read_backward(md);
-}
-
union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx)
{
return perf_evlist__mmap_read_forward(evlist, idx);
}
-void perf_evlist__mmap_read_catchup(struct perf_evlist *evlist, int idx)
-{
- perf_mmap__read_catchup(&evlist->mmap[idx]);
-}
-
void perf_evlist__mmap_consume(struct perf_evlist *evlist, int idx)
{
perf_mmap__consume(&evlist->mmap[idx], false);
union perf_event *perf_evlist__mmap_read_forward(struct perf_evlist *evlist,
int idx);
-union perf_event *perf_evlist__mmap_read_backward(struct perf_evlist *evlist,
- int idx);
-void perf_evlist__mmap_read_catchup(struct perf_evlist *evlist, int idx);
-
void perf_evlist__mmap_consume(struct perf_evlist *evlist, int idx);
int perf_evlist__open(struct perf_evlist *evlist);
#include "sane_ctype.h"
-static struct {
- bool sample_id_all;
- bool exclude_guest;
- bool mmap2;
- bool cloexec;
- bool clockid;
- bool clockid_wrong;
- bool lbr_flags;
- bool write_backward;
- bool group_read;
-} perf_missing_features;
+struct perf_missing_features perf_missing_features;
static clockid_t clockid;
u32 val32[2];
};
+struct perf_missing_features {
+ bool sample_id_all;
+ bool exclude_guest;
+ bool mmap2;
+ bool cloexec;
+ bool clockid;
+ bool clockid_wrong;
+ bool lbr_flags;
+ bool write_backward;
+ bool group_read;
+};
+
+extern struct perf_missing_features perf_missing_features;
+
struct cpu_map;
struct target;
struct thread_map;
int perf_evlist__tui_browse_hists(struct perf_evlist *evlist, const char *help,
struct hist_browser_timer *hbt,
float min_pcnt,
- struct perf_env *env);
+ struct perf_env *env,
+ bool warn_lost_event);
int script_browse(const char *script_opt);
#else
static inline
const char *help __maybe_unused,
struct hist_browser_timer *hbt __maybe_unused,
float min_pcnt __maybe_unused,
- struct perf_env *env __maybe_unused)
+ struct perf_env *env __maybe_unused,
+ bool warn_lost_event __maybe_unused)
{
return 0;
}
/* When check_messup is true, 'end' must points to a good entry */
static union perf_event *perf_mmap__read(struct perf_mmap *map,
- u64 start, u64 end, u64 *prev)
+ u64 *startp, u64 end)
{
unsigned char *data = map->base + page_size;
union perf_event *event = NULL;
- int diff = end - start;
+ int diff = end - *startp;
if (diff >= (int)sizeof(event->header)) {
size_t size;
- event = (union perf_event *)&data[start & map->mask];
+ event = (union perf_event *)&data[*startp & map->mask];
size = event->header.size;
- if (size < sizeof(event->header) || diff < (int)size) {
- event = NULL;
- goto broken_event;
- }
+ if (size < sizeof(event->header) || diff < (int)size)
+ return NULL;
/*
* Event straddles the mmap boundary -- header should always
* be inside due to u64 alignment of output.
*/
- if ((start & map->mask) + size != ((start + size) & map->mask)) {
- unsigned int offset = start;
+ if ((*startp & map->mask) + size != ((*startp + size) & map->mask)) {
+ unsigned int offset = *startp;
unsigned int len = min(sizeof(*event), size), cpy;
void *dst = map->event_copy;
event = (union perf_event *)map->event_copy;
}
- start += size;
+ *startp += size;
}
-broken_event:
- if (prev)
- *prev = start;
-
return event;
}
+/*
+ * legacy interface for mmap read.
+ * Don't use it. Use perf_mmap__read_event().
+ */
union perf_event *perf_mmap__read_forward(struct perf_mmap *map)
{
u64 head;
- u64 old = map->prev;
/*
* Check if event was unmapped due to a POLLHUP/POLLERR.
head = perf_mmap__read_head(map);
- return perf_mmap__read(map, old, head, &map->prev);
+ return perf_mmap__read(map, &map->prev, head);
}
-union perf_event *perf_mmap__read_backward(struct perf_mmap *map)
+/*
+ * Read event from ring buffer one by one.
+ * Return one event for each call.
+ *
+ * Usage:
+ * perf_mmap__read_init()
+ * while(event = perf_mmap__read_event()) {
+ * //process the event
+ * perf_mmap__consume()
+ * }
+ * perf_mmap__read_done()
+ */
+union perf_event *perf_mmap__read_event(struct perf_mmap *map,
+ bool overwrite,
+ u64 *startp, u64 end)
{
- u64 head, end;
- u64 start = map->prev;
+ union perf_event *event;
/*
* Check if event was unmapped due to a POLLHUP/POLLERR.
if (!refcount_read(&map->refcnt))
return NULL;
- head = perf_mmap__read_head(map);
- if (!head)
+ if (startp == NULL)
return NULL;
- /*
- * 'head' pointer starts from 0. Kernel minus sizeof(record) form
- * it each time when kernel writes to it, so in fact 'head' is
- * negative. 'end' pointer is made manually by adding the size of
- * the ring buffer to 'head' pointer, means the validate data can
- * read is the whole ring buffer. If 'end' is positive, the ring
- * buffer has not fully filled, so we must adjust 'end' to 0.
- *
- * However, since both 'head' and 'end' is unsigned, we can't
- * simply compare 'end' against 0. Here we compare '-head' and
- * the size of the ring buffer, where -head is the number of bytes
- * kernel write to the ring buffer.
- */
- if (-head < (u64)(map->mask + 1))
- end = 0;
- else
- end = head + map->mask + 1;
-
- return perf_mmap__read(map, start, end, &map->prev);
-}
+ /* non-overwirte doesn't pause the ringbuffer */
+ if (!overwrite)
+ end = perf_mmap__read_head(map);
-void perf_mmap__read_catchup(struct perf_mmap *map)
-{
- u64 head;
+ event = perf_mmap__read(map, startp, end);
- if (!refcount_read(&map->refcnt))
- return;
+ if (!overwrite)
+ map->prev = *startp;
- head = perf_mmap__read_head(map);
- map->prev = head;
+ return event;
}
static bool perf_mmap__empty(struct perf_mmap *map)
return -1;
}
-int perf_mmap__push(struct perf_mmap *md, bool overwrite,
- void *to, int push(void *to, void *buf, size_t size))
+/*
+ * Report the start and end of the available data in ringbuffer
+ */
+int perf_mmap__read_init(struct perf_mmap *md, bool overwrite,
+ u64 *startp, u64 *endp)
{
u64 head = perf_mmap__read_head(md);
u64 old = md->prev;
- u64 end = head, start = old;
unsigned char *data = md->base + page_size;
unsigned long size;
- void *buf;
- int rc = 0;
- start = overwrite ? head : old;
- end = overwrite ? old : head;
+ *startp = overwrite ? head : old;
+ *endp = overwrite ? old : head;
- if (start == end)
- return 0;
+ if (*startp == *endp)
+ return -EAGAIN;
- size = end - start;
+ size = *endp - *startp;
if (size > (unsigned long)(md->mask) + 1) {
if (!overwrite) {
WARN_ONCE(1, "failed to keep up with mmap data. (warn only once)\n");
md->prev = head;
perf_mmap__consume(md, overwrite);
- return 0;
+ return -EAGAIN;
}
/*
* Backward ring buffer is full. We still have a chance to read
* most of data from it.
*/
- if (overwrite_rb_find_range(data, md->mask, head, &start, &end))
- return -1;
+ if (overwrite_rb_find_range(data, md->mask, head, startp, endp))
+ return -EINVAL;
}
+ return 0;
+}
+
+int perf_mmap__push(struct perf_mmap *md, bool overwrite,
+ void *to, int push(void *to, void *buf, size_t size))
+{
+ u64 head = perf_mmap__read_head(md);
+ u64 end, start;
+ unsigned char *data = md->base + page_size;
+ unsigned long size;
+ void *buf;
+ int rc = 0;
+
+ rc = perf_mmap__read_init(md, overwrite, &start, &end);
+ if (rc < 0)
+ return (rc == -EAGAIN) ? 0 : -1;
+
+ size = end - start;
+
if ((start & md->mask) + size != (end & md->mask)) {
buf = &data[start & md->mask];
size = md->mask + 1 - (start & md->mask);
out:
return rc;
}
+
+/*
+ * Mandatory for overwrite mode
+ * The direction of overwrite mode is backward.
+ * The last perf_mmap__read() will set tail to map->prev.
+ * Need to correct the map->prev to head which is the end of next read.
+ */
+void perf_mmap__read_done(struct perf_mmap *map)
+{
+ map->prev = perf_mmap__read_head(map);
+}
void perf_mmap__consume(struct perf_mmap *map, bool overwrite);
-void perf_mmap__read_catchup(struct perf_mmap *md);
-
static inline u64 perf_mmap__read_head(struct perf_mmap *mm)
{
struct perf_event_mmap_page *pc = mm->base;
}
union perf_event *perf_mmap__read_forward(struct perf_mmap *map);
-union perf_event *perf_mmap__read_backward(struct perf_mmap *map);
+
+union perf_event *perf_mmap__read_event(struct perf_mmap *map,
+ bool overwrite,
+ u64 *startp, u64 end);
int perf_mmap__push(struct perf_mmap *md, bool backward,
void *to, int push(void *to, void *buf, size_t size));
size_t perf_mmap__mmap_len(struct perf_mmap *map);
+int perf_mmap__read_init(struct perf_mmap *md, bool overwrite,
+ u64 *startp, u64 *endp);
+void perf_mmap__read_done(struct perf_mmap *map);
#endif /*__PERF_MMAP_H */
return n;
}
-static int hex(char ch)
-{
- if ((ch >= '0') && (ch <= '9'))
- return ch - '0';
- if ((ch >= 'a') && (ch <= 'f'))
- return ch - 'a' + 10;
- if ((ch >= 'A') && (ch <= 'F'))
- return ch - 'A' + 10;
- return -1;
-}
-
/*
* While we find nice hex chars, build a long_val.
* Return number of chars processed.
*/
int hex2u64(const char *ptr, u64 *long_val)
{
- const char *p = ptr;
- *long_val = 0;
-
- while (*p) {
- const int hex_val = hex(*p);
+ char *p;
- if (hex_val < 0)
- break;
-
- *long_val = (*long_val << 4) | hex_val;
- p++;
- }
+ *long_val = strtoull(ptr, &p, 16);
return p - ptr;
}
# to compile vs uClibc, that can be done here as well.
CROSS = #/usr/i386-linux-uclibc/usr/bin/i386-uclibc-
CROSS_COMPILE ?= $(CROSS)
-CC = $(CROSS_COMPILE)gcc
-LD = $(CROSS_COMPILE)gcc
-STRIP = $(CROSS_COMPILE)strip
HOSTCC = gcc
# check if compiler option is supported
CC_NO_CLANG := $(shell $(CC) -dM -E -x c /dev/null | grep -Fq "__clang__"; echo $$?)
+# Makefiles suck: This macro sets a default value of $(2) for the
+# variable named by $(1), unless the variable has been set by
+# environment or command line. This is necessary for CC and AR
+# because make sets default values, so the simpler ?= approach
+# won't work as expected.
+define allow-override
+ $(if $(or $(findstring environment,$(origin $(1))),\
+ $(findstring command line,$(origin $(1)))),,\
+ $(eval $(1) = $(2)))
+endef
+
+# Allow setting various cross-compile vars or setting CROSS_COMPILE as a prefix.
+$(call allow-override,CC,$(CROSS_COMPILE)gcc)
+$(call allow-override,AR,$(CROSS_COMPILE)ar)
+$(call allow-override,LD,$(CROSS_COMPILE)ld)
+$(call allow-override,CXX,$(CROSS_COMPILE)g++)
+$(call allow-override,STRIP,$(CROSS_COMPILE)strip)
+
ifeq ($(CC_NO_CLANG), 1)
EXTRA_WARNINGS += -Wstrict-aliasing=3
endif
# (this improves performance and avoids hard-to-debug behaviour);
MAKEFLAGS += -r
-CC = $(CROSS_COMPILE)gcc
-LD = $(CROSS_COMPILE)ld
CFLAGS += -O2 -Wall -g -D_GNU_SOURCE -I$(OUTPUT)include
ALL_TARGETS := spidev_test spidev_fdx
test_tcpbpf_user
test_verifier_log
feature
+test_libbpf_open
include ../lib.mk
-BPFOBJ := $(OUTPUT)/libbpf.a cgroup_helpers.c
+BPFOBJ := $(OUTPUT)/libbpf.a
$(TEST_GEN_PROGS): $(BPFOBJ)
$(TEST_GEN_PROGS_EXTENDED): $(OUTPUT)/libbpf.a
+$(OUTPUT)/test_dev_cgroup: cgroup_helpers.c
+
.PHONY: force
# force a rebuild of BPFOBJ when its dependencies are updated
--- /dev/null
+#include <sys/resource.h>
+#include <stdio.h>
+
+static __attribute__((constructor)) void bpf_rlimit_ctor(void)
+{
+ struct rlimit rlim_old, rlim_new = {
+ .rlim_cur = RLIM_INFINITY,
+ .rlim_max = RLIM_INFINITY,
+ };
+
+ getrlimit(RLIMIT_MEMLOCK, &rlim_old);
+ /* For the sake of running the test cases, we temporarily
+ * set rlimit to infinity in order for kernel to focus on
+ * errors from actual test cases and not getting noise
+ * from hitting memlock limits. The limit is on per-process
+ * basis and not a global one, hence destructor not really
+ * needed here.
+ */
+ if (setrlimit(RLIMIT_MEMLOCK, &rlim_new) < 0) {
+ perror("Unable to lift memlock rlimit");
+ /* Trying out lower limit, but expect potential test
+ * case failures from this!
+ */
+ rlim_new.rlim_cur = rlim_old.rlim_cur + (1UL << 20);
+ rlim_new.rlim_max = rlim_old.rlim_max + (1UL << 20);
+ setrlimit(RLIMIT_MEMLOCK, &rlim_new);
+ }
+}
#include <stddef.h>
#include <stdbool.h>
-#include <sys/resource.h>
-
#include <linux/unistd.h>
#include <linux/filter.h>
#include <linux/bpf_perf_event.h>
#include <bpf/bpf.h>
#include "../../../include/linux/filter.h"
+#include "bpf_rlimit.h"
#ifndef ARRAY_SIZE
# define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
int main(int argc, char **argv)
{
unsigned int from = 0, to = ARRAY_SIZE(tests);
- struct rlimit rinf = { RLIM_INFINITY, RLIM_INFINITY };
-
- setrlimit(RLIMIT_MEMLOCK, &rinf);
if (argc == 3) {
unsigned int l = atoi(argv[argc - 2]);
#include <errno.h>
#include <assert.h>
#include <sys/time.h>
-#include <sys/resource.h>
#include <linux/bpf.h>
#include <bpf/bpf.h>
#include <bpf/libbpf.h>
#include "cgroup_helpers.h"
+#include "bpf_rlimit.h"
#define DEV_CGROUP_PROG "./dev_cgroup.o"
int main(int argc, char **argv)
{
- struct rlimit limit = { RLIM_INFINITY, RLIM_INFINITY };
struct bpf_object *obj;
int error = EXIT_FAILURE;
int prog_fd, cgroup_fd;
__u32 prog_cnt;
- if (setrlimit(RLIMIT_MEMLOCK, &limit) < 0)
- perror("Unable to lift memlock rlimit");
-
if (bpf_prog_load(DEV_CGROUP_PROG, BPF_PROG_TYPE_CGROUP_DEVICE,
&obj, &prog_fd)) {
printf("Failed to load DEV_CGROUP program\n");
#include <unistd.h>
#include <arpa/inet.h>
#include <sys/time.h>
-#include <sys/resource.h>
#include <bpf/bpf.h>
+
#include "bpf_util.h"
+#include "bpf_rlimit.h"
struct tlpm_node {
struct tlpm_node *next;
int main(void)
{
- struct rlimit limit = { RLIM_INFINITY, RLIM_INFINITY };
- int i, ret;
+ int i;
/* we want predictable, pseudo random tests */
srand(0xf00ba1);
- /* allow unlimited locked memory */
- ret = setrlimit(RLIMIT_MEMLOCK, &limit);
- if (ret < 0)
- perror("Unable to lift memlock rlimit");
-
test_lpm_basic();
test_lpm_order();
test_lpm_map(i);
test_lpm_ipaddr();
-
test_lpm_delete();
-
test_lpm_get_next_key();
-
test_lpm_multi_thread();
printf("test_lpm: OK\n");
#include <time.h>
#include <sys/wait.h>
-#include <sys/resource.h>
#include <bpf/bpf.h>
+
#include "bpf_util.h"
+#include "bpf_rlimit.h"
#define LOCAL_FREE_TARGET (128)
#define PERCPU_FREE_TARGET (4)
int main(int argc, char **argv)
{
- struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
int map_types[] = {BPF_MAP_TYPE_LRU_HASH,
BPF_MAP_TYPE_LRU_PERCPU_HASH};
int map_flags[] = {0, BPF_F_NO_COMMON_LRU};
setbuf(stdout, NULL);
- assert(!setrlimit(RLIMIT_MEMLOCK, &r));
-
nr_cpus = bpf_num_possible_cpus();
assert(nr_cpus != -1);
printf("nr_cpus:%d\n\n", nr_cpus);
#include <stdlib.h>
#include <sys/wait.h>
-#include <sys/resource.h>
#include <linux/bpf.h>
#include <bpf/bpf.h>
#include <bpf/libbpf.h>
+
#include "bpf_util.h"
+#include "bpf_rlimit.h"
static int map_flags;
fd = bpf_create_map(BPF_MAP_TYPE_HASH, i, j,
2, map_flags);
if (fd < 0) {
+ if (errno == ENOMEM)
+ return;
printf("Failed to create hashmap key=%d value=%d '%s'\n",
i, j, strerror(errno));
exit(1);
int main(void)
{
- struct rlimit rinf = { RLIM_INFINITY, RLIM_INFINITY };
-
- setrlimit(RLIMIT_MEMLOCK, &rinf);
-
map_flags = 0;
run_all_tests();
#include <sys/ioctl.h>
#include <sys/wait.h>
-#include <sys/resource.h>
#include <sys/types.h>
#include <fcntl.h>
#include <linux/err.h>
#include <bpf/bpf.h>
#include <bpf/libbpf.h>
+
#include "test_iptunnel_common.h"
#include "bpf_util.h"
#include "bpf_endian.h"
+#include "bpf_rlimit.h"
static int error_cnt, pass_cnt;
int main(void)
{
- struct rlimit rinf = { RLIM_INFINITY, RLIM_INFINITY };
-
- setrlimit(RLIMIT_MEMLOCK, &rinf);
-
test_pkt_access();
test_xdp();
test_l4lb_all();
#include <assert.h>
#include <sys/socket.h>
-#include <sys/resource.h>
#include <linux/filter.h>
#include <linux/bpf.h>
#include <bpf/bpf.h>
#include "../../../include/linux/filter.h"
+#include "bpf_rlimit.h"
static struct bpf_insn prog[BPF_MAXINSNS];
int main(void)
{
- struct rlimit rinf = { RLIM_INFINITY, RLIM_INFINITY };
uint32_t tests = 0;
int i, fd_map;
- setrlimit(RLIMIT_MEMLOCK, &rinf);
fd_map = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(int),
sizeof(int), 1, BPF_F_NO_PREALLOC);
assert(fd_map > 0);
#include <linux/if_ether.h>
#include <linux/if_packet.h>
#include <linux/ip.h>
-#include <linux/in6.h>
#include <linux/types.h>
#include <linux/socket.h>
#include <linux/tcp.h>
#include <linux/ptrace.h>
#include <linux/bpf.h>
#include <sys/ioctl.h>
+#include <sys/time.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <bpf/bpf.h>
#include <bpf/libbpf.h>
#include "bpf_util.h"
+#include "bpf_rlimit.h"
#include <linux/perf_event.h>
#include "test_tcpbpf.h"
#include <limits.h>
#include <sys/capability.h>
-#include <sys/resource.h>
#include <linux/unistd.h>
#include <linux/filter.h>
# define CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS 1
# endif
#endif
-
+#include "bpf_rlimit.h"
#include "../../../include/linux/filter.h"
#ifndef ARRAY_SIZE
#define F_NEEDS_EFFICIENT_UNALIGNED_ACCESS (1 << 0)
#define F_LOAD_WITH_STRICT_ALIGNMENT (1 << 1)
+#define UNPRIV_SYSCTL "kernel/unprivileged_bpf_disabled"
+static bool unpriv_disabled = false;
+
struct bpf_test {
const char *descr;
struct bpf_insn insns[MAX_INSNS];
.result_unpriv = REJECT,
.result = ACCEPT,
},
+ {
+ "runtime/jit: tail_call within bounds, prog once",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_LD_MAP_FD(BPF_REG_2, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_tail_call),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_prog = { 1 },
+ .result = ACCEPT,
+ .retval = 42,
+ },
+ {
+ "runtime/jit: tail_call within bounds, prog loop",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_3, 1),
+ BPF_LD_MAP_FD(BPF_REG_2, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_tail_call),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_prog = { 1 },
+ .result = ACCEPT,
+ .retval = 41,
+ },
+ {
+ "runtime/jit: tail_call within bounds, no prog",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_3, 2),
+ BPF_LD_MAP_FD(BPF_REG_2, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_tail_call),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_prog = { 1 },
+ .result = ACCEPT,
+ .retval = 1,
+ },
+ {
+ "runtime/jit: tail_call out of bounds",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_3, 256),
+ BPF_LD_MAP_FD(BPF_REG_2, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_tail_call),
+ BPF_MOV64_IMM(BPF_REG_0, 2),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_prog = { 1 },
+ .result = ACCEPT,
+ .retval = 2,
+ },
+ {
+ "runtime/jit: pass negative index to tail_call",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_3, -1),
+ BPF_LD_MAP_FD(BPF_REG_2, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_tail_call),
+ BPF_MOV64_IMM(BPF_REG_0, 2),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_prog = { 1 },
+ .result = ACCEPT,
+ .retval = 2,
+ },
+ {
+ "runtime/jit: pass > 32bit index to tail_call",
+ .insns = {
+ BPF_LD_IMM64(BPF_REG_3, 0x100000000ULL),
+ BPF_LD_MAP_FD(BPF_REG_2, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_tail_call),
+ BPF_MOV64_IMM(BPF_REG_0, 2),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_prog = { 2 },
+ .result = ACCEPT,
+ .retval = 42,
+ },
{
"stack pointer arithmetic",
.insns = {
.result = REJECT,
.prog_type = BPF_PROG_TYPE_TRACEPOINT,
},
+ {
+ "jit: lsh, rsh, arsh by 1",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_MOV64_IMM(BPF_REG_1, 0xff),
+ BPF_ALU64_IMM(BPF_LSH, BPF_REG_1, 1),
+ BPF_ALU32_IMM(BPF_LSH, BPF_REG_1, 1),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0x3fc, 1),
+ BPF_EXIT_INSN(),
+ BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 1),
+ BPF_ALU32_IMM(BPF_RSH, BPF_REG_1, 1),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0xff, 1),
+ BPF_EXIT_INSN(),
+ BPF_ALU64_IMM(BPF_ARSH, BPF_REG_1, 1),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0x7f, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_IMM(BPF_REG_0, 2),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .retval = 2,
+ },
+ {
+ "jit: mov32 for ldimm64, 1",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 2),
+ BPF_LD_IMM64(BPF_REG_1, 0xfeffffffffffffffULL),
+ BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 32),
+ BPF_LD_IMM64(BPF_REG_2, 0xfeffffffULL),
+ BPF_JMP_REG(BPF_JEQ, BPF_REG_1, BPF_REG_2, 1),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .retval = 2,
+ },
+ {
+ "jit: mov32 for ldimm64, 2",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_LD_IMM64(BPF_REG_1, 0x1ffffffffULL),
+ BPF_LD_IMM64(BPF_REG_2, 0xffffffffULL),
+ BPF_JMP_REG(BPF_JEQ, BPF_REG_1, BPF_REG_2, 1),
+ BPF_MOV64_IMM(BPF_REG_0, 2),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .retval = 2,
+ },
+ {
+ "jit: various mul tests",
+ .insns = {
+ BPF_LD_IMM64(BPF_REG_2, 0xeeff0d413122ULL),
+ BPF_LD_IMM64(BPF_REG_0, 0xfefefeULL),
+ BPF_LD_IMM64(BPF_REG_1, 0xefefefULL),
+ BPF_ALU64_REG(BPF_MUL, BPF_REG_0, BPF_REG_1),
+ BPF_JMP_REG(BPF_JEQ, BPF_REG_0, BPF_REG_2, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ BPF_LD_IMM64(BPF_REG_3, 0xfefefeULL),
+ BPF_ALU64_REG(BPF_MUL, BPF_REG_3, BPF_REG_1),
+ BPF_JMP_REG(BPF_JEQ, BPF_REG_3, BPF_REG_2, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV32_REG(BPF_REG_2, BPF_REG_2),
+ BPF_LD_IMM64(BPF_REG_0, 0xfefefeULL),
+ BPF_ALU32_REG(BPF_MUL, BPF_REG_0, BPF_REG_1),
+ BPF_JMP_REG(BPF_JEQ, BPF_REG_0, BPF_REG_2, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ BPF_LD_IMM64(BPF_REG_3, 0xfefefeULL),
+ BPF_ALU32_REG(BPF_MUL, BPF_REG_3, BPF_REG_1),
+ BPF_JMP_REG(BPF_JEQ, BPF_REG_3, BPF_REG_2, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ BPF_LD_IMM64(BPF_REG_0, 0x952a7bbcULL),
+ BPF_LD_IMM64(BPF_REG_1, 0xfefefeULL),
+ BPF_LD_IMM64(BPF_REG_2, 0xeeff0d413122ULL),
+ BPF_ALU32_REG(BPF_MUL, BPF_REG_2, BPF_REG_1),
+ BPF_JMP_REG(BPF_JEQ, BPF_REG_2, BPF_REG_0, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_IMM(BPF_REG_0, 2),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .retval = 2,
+ },
+
};
static int probe_filter_length(const struct bpf_insn *fp)
return fd;
}
+static int create_prog_dummy1(void)
+{
+ struct bpf_insn prog[] = {
+ BPF_MOV64_IMM(BPF_REG_0, 42),
+ BPF_EXIT_INSN(),
+ };
+
+ return bpf_load_program(BPF_PROG_TYPE_SOCKET_FILTER, prog,
+ ARRAY_SIZE(prog), "GPL", 0, NULL, 0);
+}
+
+static int create_prog_dummy2(int mfd, int idx)
+{
+ struct bpf_insn prog[] = {
+ BPF_MOV64_IMM(BPF_REG_3, idx),
+ BPF_LD_MAP_FD(BPF_REG_2, mfd),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_tail_call),
+ BPF_MOV64_IMM(BPF_REG_0, 41),
+ BPF_EXIT_INSN(),
+ };
+
+ return bpf_load_program(BPF_PROG_TYPE_SOCKET_FILTER, prog,
+ ARRAY_SIZE(prog), "GPL", 0, NULL, 0);
+}
+
static int create_prog_array(void)
{
- int fd;
+ int p1key = 0, p2key = 1;
+ int mfd, p1fd, p2fd;
- fd = bpf_create_map(BPF_MAP_TYPE_PROG_ARRAY, sizeof(int),
- sizeof(int), 4, 0);
- if (fd < 0)
+ mfd = bpf_create_map(BPF_MAP_TYPE_PROG_ARRAY, sizeof(int),
+ sizeof(int), 4, 0);
+ if (mfd < 0) {
printf("Failed to create prog array '%s'!\n", strerror(errno));
+ return -1;
+ }
- return fd;
+ p1fd = create_prog_dummy1();
+ p2fd = create_prog_dummy2(mfd, p2key);
+ if (p1fd < 0 || p2fd < 0)
+ goto out;
+ if (bpf_map_update_elem(mfd, &p1key, &p1fd, BPF_ANY) < 0)
+ goto out;
+ if (bpf_map_update_elem(mfd, &p2key, &p2fd, BPF_ANY) < 0)
+ goto out;
+ close(p2fd);
+ close(p1fd);
+
+ return mfd;
+out:
+ close(p2fd);
+ close(p1fd);
+ close(mfd);
+ return -1;
}
static int create_map_in_map(void)
goto fail_log;
}
if (!strstr(bpf_vlog, expected_err) && !reject_from_alignment) {
- printf("FAIL\nUnexpected error message!\n");
+ printf("FAIL\nUnexpected error message!\n\tEXP: %s\n\tRES: %s\n",
+ expected_err, bpf_vlog);
goto fail_log;
}
}
return ret;
}
+static void get_unpriv_disabled()
+{
+ char buf[2];
+ FILE *fd;
+
+ fd = fopen("/proc/sys/"UNPRIV_SYSCTL, "r");
+ if (fgets(buf, 2, fd) == buf && atoi(buf))
+ unpriv_disabled = true;
+ fclose(fd);
+}
+
static int do_test(bool unpriv, unsigned int from, unsigned int to)
{
- int i, passes = 0, errors = 0;
+ int i, passes = 0, errors = 0, skips = 0;
for (i = from; i < to; i++) {
struct bpf_test *test = &tests[i];
/* Program types that are not supported by non-root we
* skip right away.
*/
- if (!test->prog_type) {
+ if (!test->prog_type && unpriv_disabled) {
+ printf("#%d/u %s SKIP\n", i, test->descr);
+ skips++;
+ } else if (!test->prog_type) {
if (!unpriv)
set_admin(false);
printf("#%d/u %s ", i, test->descr);
set_admin(true);
}
- if (!unpriv) {
+ if (unpriv) {
+ printf("#%d/p %s SKIP\n", i, test->descr);
+ skips++;
+ } else {
printf("#%d/p %s ", i, test->descr);
do_test_single(test, false, &passes, &errors);
}
}
- printf("Summary: %d PASSED, %d FAILED\n", passes, errors);
+ printf("Summary: %d PASSED, %d SKIPPED, %d FAILED\n", passes,
+ skips, errors);
return errors ? EXIT_FAILURE : EXIT_SUCCESS;
}
int main(int argc, char **argv)
{
- struct rlimit rinf = { RLIM_INFINITY, RLIM_INFINITY };
- struct rlimit rlim = { 1 << 20, 1 << 20 };
unsigned int from = 0, to = ARRAY_SIZE(tests);
bool unpriv = !is_admin();
}
}
- setrlimit(RLIMIT_MEMLOCK, unpriv ? &rlim : &rinf);
+ get_unpriv_disabled();
+ if (unpriv && unpriv_disabled) {
+ printf("Cannot run as unprivileged user with sysctl %s.\n",
+ UNPRIV_SYSCTL);
+ return EXIT_FAILURE;
+ }
+
return do_test(unpriv, from, to);
}
#include <string.h>
#include <unistd.h>
#include <sys/time.h>
-#include <sys/resource.h>
#include <linux/bpf.h>
#include <linux/filter.h>
#include <bpf/bpf.h>
+#include "bpf_rlimit.h"
+
#define LOG_SIZE (1 << 20)
#define err(str...) printf("ERROR: " str)
int main(int argc, char **argv)
{
- struct rlimit limit = { RLIM_INFINITY, RLIM_INFINITY };
char full_log[LOG_SIZE];
char log[LOG_SIZE];
size_t want_len;
int i;
- /* allow unlimited locked memory to have more consistent error code */
- if (setrlimit(RLIMIT_MEMLOCK, &limit) < 0)
- perror("Unable to lift memlock rlimit");
-
memset(log, 1, LOG_SIZE);
/* Test incorrect attr */
CFLAGS += -I../../../../usr/include/
TEST_PROGS := run_tests.sh
+TEST_FILES := run_fuse_test.sh
TEST_GEN_FILES := memfd_test fuse_mnt fuse_test
fuse_mnt.o: CFLAGS += $(shell pkg-config fuse --cflags)
CFLAGS += -I../../../../usr/include/
TEST_PROGS := run_netsocktests run_afpackettests test_bpf.sh netdevice.sh rtnetlink.sh
-TEST_PROGS += fib_tests.sh
+TEST_PROGS += fib_tests.sh fib-onlink-tests.sh
TEST_GEN_FILES = socket
TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy
TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa
--- /dev/null
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# IPv4 and IPv6 onlink tests
+
+PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no}
+
+# Network interfaces
+# - odd in current namespace; even in peer ns
+declare -A NETIFS
+# default VRF
+NETIFS[p1]=veth1
+NETIFS[p2]=veth2
+NETIFS[p3]=veth3
+NETIFS[p4]=veth4
+# VRF
+NETIFS[p5]=veth5
+NETIFS[p6]=veth6
+NETIFS[p7]=veth7
+NETIFS[p8]=veth8
+
+# /24 network
+declare -A V4ADDRS
+V4ADDRS[p1]=169.254.1.1
+V4ADDRS[p2]=169.254.1.2
+V4ADDRS[p3]=169.254.3.1
+V4ADDRS[p4]=169.254.3.2
+V4ADDRS[p5]=169.254.5.1
+V4ADDRS[p6]=169.254.5.2
+V4ADDRS[p7]=169.254.7.1
+V4ADDRS[p8]=169.254.7.2
+
+# /64 network
+declare -A V6ADDRS
+V6ADDRS[p1]=2001:db8:101::1
+V6ADDRS[p2]=2001:db8:101::2
+V6ADDRS[p3]=2001:db8:301::1
+V6ADDRS[p4]=2001:db8:301::2
+V6ADDRS[p5]=2001:db8:501::1
+V6ADDRS[p6]=2001:db8:501::2
+V6ADDRS[p7]=2001:db8:701::1
+V6ADDRS[p8]=2001:db8:701::2
+
+# Test networks:
+# [1] = default table
+# [2] = VRF
+#
+# /32 host routes
+declare -A TEST_NET4
+TEST_NET4[1]=169.254.101
+TEST_NET4[2]=169.254.102
+# /128 host routes
+declare -A TEST_NET6
+TEST_NET6[1]=2001:db8:101
+TEST_NET6[2]=2001:db8:102
+
+# connected gateway
+CONGW[1]=169.254.1.254
+CONGW[2]=169.254.5.254
+
+# recursive gateway
+RECGW4[1]=169.254.11.254
+RECGW4[2]=169.254.12.254
+RECGW6[1]=2001:db8:11::64
+RECGW6[2]=2001:db8:12::64
+
+# for v4 mapped to v6
+declare -A TEST_NET4IN6IN6
+TEST_NET4IN6[1]=10.1.1.254
+TEST_NET4IN6[2]=10.2.1.254
+
+# mcast address
+MCAST6=ff02::1
+
+
+PEER_NS=bart
+PEER_CMD="ip netns exec ${PEER_NS}"
+VRF=lisa
+VRF_TABLE=1101
+PBR_TABLE=101
+
+################################################################################
+# utilities
+
+log_test()
+{
+ local rc=$1
+ local expected=$2
+ local msg="$3"
+
+ if [ ${rc} -eq ${expected} ]; then
+ nsuccess=$((nsuccess+1))
+ printf "\n TEST: %-50s [ OK ]\n" "${msg}"
+ else
+ nfail=$((nfail+1))
+ printf "\n TEST: %-50s [FAIL]\n" "${msg}"
+ if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+ echo
+ echo "hit enter to continue, 'q' to quit"
+ read a
+ [ "$a" = "q" ] && exit 1
+ fi
+ fi
+}
+
+log_section()
+{
+ echo
+ echo "######################################################################"
+ echo "TEST SECTION: $*"
+ echo "######################################################################"
+}
+
+log_subsection()
+{
+ echo
+ echo "#########################################"
+ echo "TEST SUBSECTION: $*"
+}
+
+run_cmd()
+{
+ echo
+ echo "COMMAND: $*"
+ eval $*
+}
+
+get_linklocal()
+{
+ local dev=$1
+ local pfx
+ local addr
+
+ addr=$(${pfx} ip -6 -br addr show dev ${dev} | \
+ awk '{
+ for (i = 3; i <= NF; ++i) {
+ if ($i ~ /^fe80/)
+ print $i
+ }
+ }'
+ )
+ addr=${addr/\/*}
+
+ [ -z "$addr" ] && return 1
+
+ echo $addr
+
+ return 0
+}
+
+################################################################################
+#
+
+setup()
+{
+ echo
+ echo "########################################"
+ echo "Configuring interfaces"
+
+ set -e
+
+ # create namespace
+ ip netns add ${PEER_NS}
+ ip -netns ${PEER_NS} li set lo up
+
+ # add vrf table
+ ip li add ${VRF} type vrf table ${VRF_TABLE}
+ ip li set ${VRF} up
+ ip ro add table ${VRF_TABLE} unreachable default
+ ip -6 ro add table ${VRF_TABLE} unreachable default
+
+ # create test interfaces
+ ip li add ${NETIFS[p1]} type veth peer name ${NETIFS[p2]}
+ ip li add ${NETIFS[p3]} type veth peer name ${NETIFS[p4]}
+ ip li add ${NETIFS[p5]} type veth peer name ${NETIFS[p6]}
+ ip li add ${NETIFS[p7]} type veth peer name ${NETIFS[p8]}
+
+ # enslave vrf interfaces
+ for n in 5 7; do
+ ip li set ${NETIFS[p${n}]} vrf ${VRF}
+ done
+
+ # add addresses
+ for n in 1 3 5 7; do
+ ip li set ${NETIFS[p${n}]} up
+ ip addr add ${V4ADDRS[p${n}]}/24 dev ${NETIFS[p${n}]}
+ ip addr add ${V6ADDRS[p${n}]}/64 dev ${NETIFS[p${n}]}
+ done
+
+ # move peer interfaces to namespace and add addresses
+ for n in 2 4 6 8; do
+ ip li set ${NETIFS[p${n}]} netns ${PEER_NS} up
+ ip -netns ${PEER_NS} addr add ${V4ADDRS[p${n}]}/24 dev ${NETIFS[p${n}]}
+ ip -netns ${PEER_NS} addr add ${V6ADDRS[p${n}]}/64 dev ${NETIFS[p${n}]}
+ done
+
+ set +e
+
+ # let DAD complete - assume default of 1 probe
+ sleep 1
+}
+
+cleanup()
+{
+ # make sure we start from a clean slate
+ ip netns del ${PEER_NS} 2>/dev/null
+ for n in 1 3 5 7; do
+ ip link del ${NETIFS[p${n}]} 2>/dev/null
+ done
+ ip link del ${VRF} 2>/dev/null
+ ip ro flush table ${VRF_TABLE}
+ ip -6 ro flush table ${VRF_TABLE}
+}
+
+################################################################################
+# IPv4 tests
+#
+
+run_ip()
+{
+ local table="$1"
+ local prefix="$2"
+ local gw="$3"
+ local dev="$4"
+ local exp_rc="$5"
+ local desc="$6"
+
+ # dev arg may be empty
+ [ -n "${dev}" ] && dev="dev ${dev}"
+
+ run_cmd ip ro add table "${table}" "${prefix}"/32 via "${gw}" "${dev}" onlink
+ log_test $? ${exp_rc} "${desc}"
+}
+
+valid_onlink_ipv4()
+{
+ # - unicast connected, unicast recursive
+ #
+ log_subsection "default VRF - main table"
+
+ run_ip 254 ${TEST_NET4[1]}.1 ${CONGW[1]} ${NETIFS[p1]} 0 "unicast connected"
+ run_ip 254 ${TEST_NET4[1]}.2 ${RECGW4[1]} ${NETIFS[p1]} 0 "unicast recursive"
+
+ log_subsection "VRF ${VRF}"
+
+ run_ip ${VRF_TABLE} ${TEST_NET4[2]}.1 ${CONGW[2]} ${NETIFS[p5]} 0 "unicast connected"
+ run_ip ${VRF_TABLE} ${TEST_NET4[2]}.2 ${RECGW4[2]} ${NETIFS[p5]} 0 "unicast recursive"
+
+ log_subsection "VRF device, PBR table"
+
+ run_ip ${PBR_TABLE} ${TEST_NET4[2]}.3 ${CONGW[2]} ${NETIFS[p5]} 0 "unicast connected"
+ run_ip ${PBR_TABLE} ${TEST_NET4[2]}.4 ${RECGW4[2]} ${NETIFS[p5]} 0 "unicast recursive"
+}
+
+invalid_onlink_ipv4()
+{
+ run_ip 254 ${TEST_NET4[1]}.11 ${V4ADDRS[p1]} ${NETIFS[p1]} 2 \
+ "Invalid gw - local unicast address"
+
+ run_ip ${VRF_TABLE} ${TEST_NET4[2]}.11 ${V4ADDRS[p5]} ${NETIFS[p5]} 2 \
+ "Invalid gw - local unicast address, VRF"
+
+ run_ip 254 ${TEST_NET4[1]}.101 ${V4ADDRS[p1]} "" 2 "No nexthop device given"
+
+ run_ip 254 ${TEST_NET4[1]}.102 ${V4ADDRS[p3]} ${NETIFS[p1]} 2 \
+ "Gateway resolves to wrong nexthop device"
+
+ run_ip ${VRF_TABLE} ${TEST_NET4[2]}.103 ${V4ADDRS[p7]} ${NETIFS[p5]} 2 \
+ "Gateway resolves to wrong nexthop device - VRF"
+}
+
+################################################################################
+# IPv6 tests
+#
+
+run_ip6()
+{
+ local table="$1"
+ local prefix="$2"
+ local gw="$3"
+ local dev="$4"
+ local exp_rc="$5"
+ local desc="$6"
+
+ # dev arg may be empty
+ [ -n "${dev}" ] && dev="dev ${dev}"
+
+ run_cmd ip -6 ro add table "${table}" "${prefix}"/128 via "${gw}" "${dev}" onlink
+ log_test $? ${exp_rc} "${desc}"
+}
+
+valid_onlink_ipv6()
+{
+ # - unicast connected, unicast recursive, v4-mapped
+ #
+ log_subsection "default VRF - main table"
+
+ run_ip6 254 ${TEST_NET6[1]}::1 ${V6ADDRS[p1]/::*}::64 ${NETIFS[p1]} 0 "unicast connected"
+ run_ip6 254 ${TEST_NET6[1]}::2 ${RECGW6[1]} ${NETIFS[p1]} 0 "unicast recursive"
+ run_ip6 254 ${TEST_NET6[1]}::3 ::ffff:${TEST_NET4IN6[1]} ${NETIFS[p1]} 0 "v4-mapped"
+
+ log_subsection "VRF ${VRF}"
+
+ run_ip6 ${VRF_TABLE} ${TEST_NET6[2]}::1 ${V6ADDRS[p5]/::*}::64 ${NETIFS[p5]} 0 "unicast connected"
+ run_ip6 ${VRF_TABLE} ${TEST_NET6[2]}::2 ${RECGW6[2]} ${NETIFS[p5]} 0 "unicast recursive"
+ run_ip6 ${VRF_TABLE} ${TEST_NET6[2]}::3 ::ffff:${TEST_NET4IN6[2]} ${NETIFS[p5]} 0 "v4-mapped"
+
+ log_subsection "VRF device, PBR table"
+
+ run_ip6 ${PBR_TABLE} ${TEST_NET6[2]}::4 ${V6ADDRS[p5]/::*}::64 ${NETIFS[p5]} 0 "unicast connected"
+ run_ip6 ${PBR_TABLE} ${TEST_NET6[2]}::5 ${RECGW6[2]} ${NETIFS[p5]} 0 "unicast recursive"
+ run_ip6 ${PBR_TABLE} ${TEST_NET6[2]}::6 ::ffff:${TEST_NET4IN6[2]} ${NETIFS[p5]} 0 "v4-mapped"
+}
+
+invalid_onlink_ipv6()
+{
+ local lladdr
+
+ lladdr=$(get_linklocal ${NETIFS[p1]}) || return 1
+
+ run_ip6 254 ${TEST_NET6[1]}::11 ${V6ADDRS[p1]} ${NETIFS[p1]} 2 \
+ "Invalid gw - local unicast address"
+ run_ip6 254 ${TEST_NET6[1]}::12 ${lladdr} ${NETIFS[p1]} 2 \
+ "Invalid gw - local linklocal address"
+ run_ip6 254 ${TEST_NET6[1]}::12 ${MCAST6} ${NETIFS[p1]} 2 \
+ "Invalid gw - multicast address"
+
+ lladdr=$(get_linklocal ${NETIFS[p5]}) || return 1
+ run_ip6 ${VRF_TABLE} ${TEST_NET6[2]}::11 ${V6ADDRS[p5]} ${NETIFS[p5]} 2 \
+ "Invalid gw - local unicast address, VRF"
+ run_ip6 ${VRF_TABLE} ${TEST_NET6[2]}::12 ${lladdr} ${NETIFS[p5]} 2 \
+ "Invalid gw - local linklocal address, VRF"
+ run_ip6 ${VRF_TABLE} ${TEST_NET6[2]}::12 ${MCAST6} ${NETIFS[p5]} 2 \
+ "Invalid gw - multicast address, VRF"
+
+ run_ip6 254 ${TEST_NET6[1]}::101 ${V6ADDRS[p1]} "" 2 \
+ "No nexthop device given"
+
+ # default VRF validation is done against LOCAL table
+ # run_ip6 254 ${TEST_NET6[1]}::102 ${V6ADDRS[p3]/::[0-9]/::64} ${NETIFS[p1]} 2 \
+ # "Gateway resolves to wrong nexthop device"
+
+ run_ip6 ${VRF_TABLE} ${TEST_NET6[2]}::103 ${V6ADDRS[p7]/::[0-9]/::64} ${NETIFS[p5]} 2 \
+ "Gateway resolves to wrong nexthop device - VRF"
+}
+
+run_onlink_tests()
+{
+ log_section "IPv4 onlink"
+ log_subsection "Valid onlink commands"
+ valid_onlink_ipv4
+ log_subsection "Invalid onlink commands"
+ invalid_onlink_ipv4
+
+ log_section "IPv6 onlink"
+ log_subsection "Valid onlink commands"
+ valid_onlink_ipv6
+ invalid_onlink_ipv6
+}
+
+################################################################################
+# main
+
+nsuccess=0
+nfail=0
+
+cleanup
+setup
+run_onlink_tests
+cleanup
+
+if [ "$TESTS" != "none" ]; then
+ printf "\nTests passed: %3d\n" ${nsuccess}
+ printf "Tests failed: %3d\n" ${nfail}
+fi
ret=0
-check_err()
-{
- if [ $ret -eq 0 ]; then
- ret=$1
- fi
-}
+PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no}
-check_fail()
+log_test()
{
- if [ $1 -eq 0 ]; then
+ local rc=$1
+ local expected=$2
+ local msg="$3"
+
+ if [ ${rc} -eq ${expected} ]; then
+ printf " %-60s [ OK ]\n" "${msg}"
+ else
ret=1
+ printf " %-60s [FAIL]\n" "${msg}"
+ if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+ echo
+ echo "hit enter to continue, 'q' to quit"
+ read a
+ [ "$a" = "q" ] && exit 1
+ fi
fi
}
-netns_create()
+setup()
{
- local testns=$1
+ set -e
+ ip netns add testns
+ ip -netns testns link set dev lo up
+
+ ip -netns testns link add dummy0 type dummy
+ ip -netns testns link set dev dummy0 up
+ ip -netns testns address add 198.51.100.1/24 dev dummy0
+ ip -netns testns -6 address add 2001:db8:1::1/64 dev dummy0
+ set +e
- ip netns add $testns
- ip netns exec $testns ip link set dev lo up
}
-fib_unreg_unicast_test()
+cleanup()
{
- ret=0
-
- netns_create "testns"
-
- ip netns exec testns ip link add dummy0 type dummy
- ip netns exec testns ip link set dev dummy0 up
+ ip -netns testns link del dev dummy0 &> /dev/null
+ ip netns del testns
+}
- ip netns exec testns ip address add 198.51.100.1/24 dev dummy0
- ip netns exec testns ip -6 address add 2001:db8:1::1/64 dev dummy0
+fib_unreg_unicast_test()
+{
+ echo
+ echo "Single path route test"
- ip netns exec testns ip route get fibmatch 198.51.100.2 &> /dev/null
- check_err $?
- ip netns exec testns ip -6 route get fibmatch 2001:db8:1::2 &> /dev/null
- check_err $?
+ setup
- ip netns exec testns ip link del dev dummy0
- check_err $?
+ echo " Start point"
+ ip -netns testns route get fibmatch 198.51.100.2 &> /dev/null
+ log_test $? 0 "IPv4 fibmatch"
+ ip -netns testns -6 route get fibmatch 2001:db8:1::2 &> /dev/null
+ log_test $? 0 "IPv6 fibmatch"
- ip netns exec testns ip route get fibmatch 198.51.100.2 &> /dev/null
- check_fail $?
- ip netns exec testns ip -6 route get fibmatch 2001:db8:1::2 &> /dev/null
- check_fail $?
+ set -e
+ ip -netns testns link del dev dummy0
+ set +e
- ip netns del testns
+ echo " Nexthop device deleted"
+ ip -netns testns route get fibmatch 198.51.100.2 &> /dev/null
+ log_test $? 2 "IPv4 fibmatch - no route"
+ ip -netns testns -6 route get fibmatch 2001:db8:1::2 &> /dev/null
+ log_test $? 2 "IPv6 fibmatch - no route"
- if [ $ret -ne 0 ]; then
- echo "FAIL: unicast route test"
- return 1
- fi
- echo "PASS: unicast route test"
+ cleanup
}
fib_unreg_multipath_test()
{
- ret=0
-
- netns_create "testns"
- ip netns exec testns ip link add dummy0 type dummy
- ip netns exec testns ip link set dev dummy0 up
+ echo
+ echo "Multipath route test"
- ip netns exec testns ip link add dummy1 type dummy
- ip netns exec testns ip link set dev dummy1 up
+ setup
- ip netns exec testns ip address add 198.51.100.1/24 dev dummy0
- ip netns exec testns ip -6 address add 2001:db8:1::1/64 dev dummy0
+ set -e
+ ip -netns testns link add dummy1 type dummy
+ ip -netns testns link set dev dummy1 up
+ ip -netns testns address add 192.0.2.1/24 dev dummy1
+ ip -netns testns -6 address add 2001:db8:2::1/64 dev dummy1
- ip netns exec testns ip address add 192.0.2.1/24 dev dummy1
- ip netns exec testns ip -6 address add 2001:db8:2::1/64 dev dummy1
-
- ip netns exec testns ip route add 203.0.113.0/24 \
+ ip -netns testns route add 203.0.113.0/24 \
nexthop via 198.51.100.2 dev dummy0 \
nexthop via 192.0.2.2 dev dummy1
- ip netns exec testns ip -6 route add 2001:db8:3::/64 \
+ ip -netns testns -6 route add 2001:db8:3::/64 \
nexthop via 2001:db8:1::2 dev dummy0 \
nexthop via 2001:db8:2::2 dev dummy1
+ set +e
+
+ echo " Start point"
+ ip -netns testns route get fibmatch 203.0.113.1 &> /dev/null
+ log_test $? 0 "IPv4 fibmatch"
+ ip -netns testns -6 route get fibmatch 2001:db8:3::1 &> /dev/null
+ log_test $? 0 "IPv6 fibmatch"
- ip netns exec testns ip route get fibmatch 203.0.113.1 &> /dev/null
- check_err $?
- ip netns exec testns ip -6 route get fibmatch 2001:db8:3::1 &> /dev/null
- check_err $?
+ set -e
+ ip -netns testns link del dev dummy0
+ set +e
- ip netns exec testns ip link del dev dummy0
- check_err $?
+ echo " One nexthop device deleted"
+ ip -netns testns route get fibmatch 203.0.113.1 &> /dev/null
+ log_test $? 2 "IPv4 - multipath route removed on delete"
- ip netns exec testns ip route get fibmatch 203.0.113.1 &> /dev/null
- check_fail $?
- ip netns exec testns ip -6 route get fibmatch 2001:db8:3::1 &> /dev/null
+ ip -netns testns -6 route get fibmatch 2001:db8:3::1 &> /dev/null
# In IPv6 we do not flush the entire multipath route.
- check_err $?
+ log_test $? 0 "IPv6 - multipath down to single path"
- ip netns exec testns ip link del dev dummy1
+ set -e
+ ip -netns testns link del dev dummy1
+ set +e
- ip netns del testns
+ echo " Second nexthop device deleted"
+ ip -netns testns -6 route get fibmatch 2001:db8:3::1 &> /dev/null
+ log_test $? 2 "IPv6 - no route"
- if [ $ret -ne 0 ]; then
- echo "FAIL: multipath route test"
- return 1
- fi
- echo "PASS: multipath route test"
+ cleanup
}
fib_unreg_test()
{
- echo "Running netdev unregister tests"
-
fib_unreg_unicast_test
fib_unreg_multipath_test
}
fib_down_unicast_test()
{
- ret=0
-
- netns_create "testns"
-
- ip netns exec testns ip link add dummy0 type dummy
- ip netns exec testns ip link set dev dummy0 up
-
- ip netns exec testns ip address add 198.51.100.1/24 dev dummy0
- ip netns exec testns ip -6 address add 2001:db8:1::1/64 dev dummy0
+ echo
+ echo "Single path, admin down"
- ip netns exec testns ip route get fibmatch 198.51.100.2 &> /dev/null
- check_err $?
- ip netns exec testns ip -6 route get fibmatch 2001:db8:1::2 &> /dev/null
- check_err $?
+ setup
- ip netns exec testns ip link set dev dummy0 down
- check_err $?
+ echo " Start point"
+ ip -netns testns route get fibmatch 198.51.100.2 &> /dev/null
+ log_test $? 0 "IPv4 fibmatch"
+ ip -netns testns -6 route get fibmatch 2001:db8:1::2 &> /dev/null
+ log_test $? 0 "IPv6 fibmatch"
- ip netns exec testns ip route get fibmatch 198.51.100.2 &> /dev/null
- check_fail $?
- ip netns exec testns ip -6 route get fibmatch 2001:db8:1::2 &> /dev/null
- check_fail $?
+ set -e
+ ip -netns testns link set dev dummy0 down
+ set +e
- ip netns exec testns ip link del dev dummy0
+ echo " Route deleted on down"
+ ip -netns testns route get fibmatch 198.51.100.2 &> /dev/null
+ log_test $? 2 "IPv4 fibmatch"
+ ip -netns testns -6 route get fibmatch 2001:db8:1::2 &> /dev/null
+ log_test $? 2 "IPv6 fibmatch"
- ip netns del testns
-
- if [ $ret -ne 0 ]; then
- echo "FAIL: unicast route test"
- return 1
- fi
- echo "PASS: unicast route test"
+ cleanup
}
fib_down_multipath_test_do()
local down_dev=$1
local up_dev=$2
- ip netns exec testns ip route get fibmatch 203.0.113.1 \
+ ip -netns testns route get fibmatch 203.0.113.1 \
oif $down_dev &> /dev/null
- check_fail $?
- ip netns exec testns ip -6 route get fibmatch 2001:db8:3::1 \
+ log_test $? 2 "IPv4 fibmatch on down device"
+ ip -netns testns -6 route get fibmatch 2001:db8:3::1 \
oif $down_dev &> /dev/null
- check_fail $?
+ log_test $? 2 "IPv6 fibmatch on down device"
- ip netns exec testns ip route get fibmatch 203.0.113.1 \
+ ip -netns testns route get fibmatch 203.0.113.1 \
oif $up_dev &> /dev/null
- check_err $?
- ip netns exec testns ip -6 route get fibmatch 2001:db8:3::1 \
+ log_test $? 0 "IPv4 fibmatch on up device"
+ ip -netns testns -6 route get fibmatch 2001:db8:3::1 \
oif $up_dev &> /dev/null
- check_err $?
+ log_test $? 0 "IPv6 fibmatch on up device"
- ip netns exec testns ip route get fibmatch 203.0.113.1 | \
+ ip -netns testns route get fibmatch 203.0.113.1 | \
grep $down_dev | grep -q "dead linkdown"
- check_err $?
- ip netns exec testns ip -6 route get fibmatch 2001:db8:3::1 | \
+ log_test $? 0 "IPv4 flags on down device"
+ ip -netns testns -6 route get fibmatch 2001:db8:3::1 | \
grep $down_dev | grep -q "dead linkdown"
- check_err $?
+ log_test $? 0 "IPv6 flags on down device"
- ip netns exec testns ip route get fibmatch 203.0.113.1 | \
+ ip -netns testns route get fibmatch 203.0.113.1 | \
grep $up_dev | grep -q "dead linkdown"
- check_fail $?
- ip netns exec testns ip -6 route get fibmatch 2001:db8:3::1 | \
+ log_test $? 1 "IPv4 flags on up device"
+ ip -netns testns -6 route get fibmatch 2001:db8:3::1 | \
grep $up_dev | grep -q "dead linkdown"
- check_fail $?
+ log_test $? 1 "IPv6 flags on up device"
}
fib_down_multipath_test()
{
- ret=0
+ echo
+ echo "Admin down multipath"
- netns_create "testns"
+ setup
- ip netns exec testns ip link add dummy0 type dummy
- ip netns exec testns ip link set dev dummy0 up
+ set -e
+ ip -netns testns link add dummy1 type dummy
+ ip -netns testns link set dev dummy1 up
- ip netns exec testns ip link add dummy1 type dummy
- ip netns exec testns ip link set dev dummy1 up
+ ip -netns testns address add 192.0.2.1/24 dev dummy1
+ ip -netns testns -6 address add 2001:db8:2::1/64 dev dummy1
- ip netns exec testns ip address add 198.51.100.1/24 dev dummy0
- ip netns exec testns ip -6 address add 2001:db8:1::1/64 dev dummy0
-
- ip netns exec testns ip address add 192.0.2.1/24 dev dummy1
- ip netns exec testns ip -6 address add 2001:db8:2::1/64 dev dummy1
-
- ip netns exec testns ip route add 203.0.113.0/24 \
+ ip -netns testns route add 203.0.113.0/24 \
nexthop via 198.51.100.2 dev dummy0 \
nexthop via 192.0.2.2 dev dummy1
- ip netns exec testns ip -6 route add 2001:db8:3::/64 \
+ ip -netns testns -6 route add 2001:db8:3::/64 \
nexthop via 2001:db8:1::2 dev dummy0 \
nexthop via 2001:db8:2::2 dev dummy1
+ set +e
+
+ echo " Verify start point"
+ ip -netns testns route get fibmatch 203.0.113.1 &> /dev/null
+ log_test $? 0 "IPv4 fibmatch"
- ip netns exec testns ip route get fibmatch 203.0.113.1 &> /dev/null
- check_err $?
- ip netns exec testns ip -6 route get fibmatch 2001:db8:3::1 &> /dev/null
- check_err $?
+ ip -netns testns -6 route get fibmatch 2001:db8:3::1 &> /dev/null
+ log_test $? 0 "IPv6 fibmatch"
- ip netns exec testns ip link set dev dummy0 down
- check_err $?
+ set -e
+ ip -netns testns link set dev dummy0 down
+ set +e
+ echo " One device down, one up"
fib_down_multipath_test_do "dummy0" "dummy1"
- ip netns exec testns ip link set dev dummy0 up
- check_err $?
- ip netns exec testns ip link set dev dummy1 down
- check_err $?
+ set -e
+ ip -netns testns link set dev dummy0 up
+ ip -netns testns link set dev dummy1 down
+ set +e
+ echo " Other device down and up"
fib_down_multipath_test_do "dummy1" "dummy0"
- ip netns exec testns ip link set dev dummy0 down
- check_err $?
+ set -e
+ ip -netns testns link set dev dummy0 down
+ set +e
- ip netns exec testns ip route get fibmatch 203.0.113.1 &> /dev/null
- check_fail $?
- ip netns exec testns ip -6 route get fibmatch 2001:db8:3::1 &> /dev/null
- check_fail $?
+ echo " Both devices down"
+ ip -netns testns route get fibmatch 203.0.113.1 &> /dev/null
+ log_test $? 2 "IPv4 fibmatch"
+ ip -netns testns -6 route get fibmatch 2001:db8:3::1 &> /dev/null
+ log_test $? 2 "IPv6 fibmatch"
- ip netns exec testns ip link del dev dummy1
- ip netns exec testns ip link del dev dummy0
-
- ip netns del testns
-
- if [ $ret -ne 0 ]; then
- echo "FAIL: multipath route test"
- return 1
- fi
- echo "PASS: multipath route test"
+ ip -netns testns link del dev dummy1
+ cleanup
}
fib_down_test()
{
- echo "Running netdev down tests"
-
fib_down_unicast_test
fib_down_multipath_test
}
+# Local routes should not be affected when carrier changes.
fib_carrier_local_test()
{
- ret=0
+ echo
+ echo "Local carrier tests - single path"
- # Local routes should not be affected when carrier changes.
- netns_create "testns"
+ setup
- ip netns exec testns ip link add dummy0 type dummy
- ip netns exec testns ip link set dev dummy0 up
+ set -e
+ ip -netns testns link set dev dummy0 carrier on
+ set +e
- ip netns exec testns ip link set dev dummy0 carrier on
+ echo " Start point"
+ ip -netns testns route get fibmatch 198.51.100.1 &> /dev/null
+ log_test $? 0 "IPv4 fibmatch"
+ ip -netns testns -6 route get fibmatch 2001:db8:1::1 &> /dev/null
+ log_test $? 0 "IPv6 fibmatch"
- ip netns exec testns ip address add 198.51.100.1/24 dev dummy0
- ip netns exec testns ip -6 address add 2001:db8:1::1/64 dev dummy0
-
- ip netns exec testns ip route get fibmatch 198.51.100.1 &> /dev/null
- check_err $?
- ip netns exec testns ip -6 route get fibmatch 2001:db8:1::1 &> /dev/null
- check_err $?
-
- ip netns exec testns ip route get fibmatch 198.51.100.1 | \
+ ip -netns testns route get fibmatch 198.51.100.1 | \
grep -q "linkdown"
- check_fail $?
- ip netns exec testns ip -6 route get fibmatch 2001:db8:1::1 | \
+ log_test $? 1 "IPv4 - no linkdown flag"
+ ip -netns testns -6 route get fibmatch 2001:db8:1::1 | \
grep -q "linkdown"
- check_fail $?
+ log_test $? 1 "IPv6 - no linkdown flag"
- ip netns exec testns ip link set dev dummy0 carrier off
+ set -e
+ ip -netns testns link set dev dummy0 carrier off
+ sleep 1
+ set +e
- ip netns exec testns ip route get fibmatch 198.51.100.1 &> /dev/null
- check_err $?
- ip netns exec testns ip -6 route get fibmatch 2001:db8:1::1 &> /dev/null
- check_err $?
+ echo " Carrier off on nexthop"
+ ip -netns testns route get fibmatch 198.51.100.1 &> /dev/null
+ log_test $? 0 "IPv4 fibmatch"
+ ip -netns testns -6 route get fibmatch 2001:db8:1::1 &> /dev/null
+ log_test $? 0 "IPv6 fibmatch"
- ip netns exec testns ip route get fibmatch 198.51.100.1 | \
+ ip -netns testns route get fibmatch 198.51.100.1 | \
grep -q "linkdown"
- check_fail $?
- ip netns exec testns ip -6 route get fibmatch 2001:db8:1::1 | \
+ log_test $? 1 "IPv4 - linkdown flag set"
+ ip -netns testns -6 route get fibmatch 2001:db8:1::1 | \
grep -q "linkdown"
- check_fail $?
+ log_test $? 1 "IPv6 - linkdown flag set"
- ip netns exec testns ip address add 192.0.2.1/24 dev dummy0
- ip netns exec testns ip -6 address add 2001:db8:2::1/64 dev dummy0
+ set -e
+ ip -netns testns address add 192.0.2.1/24 dev dummy0
+ ip -netns testns -6 address add 2001:db8:2::1/64 dev dummy0
+ set +e
- ip netns exec testns ip route get fibmatch 192.0.2.1 &> /dev/null
- check_err $?
- ip netns exec testns ip -6 route get fibmatch 2001:db8:2::1 &> /dev/null
- check_err $?
+ echo " Route to local address with carrier down"
+ ip -netns testns route get fibmatch 192.0.2.1 &> /dev/null
+ log_test $? 0 "IPv4 fibmatch"
+ ip -netns testns -6 route get fibmatch 2001:db8:2::1 &> /dev/null
+ log_test $? 0 "IPv6 fibmatch"
- ip netns exec testns ip route get fibmatch 192.0.2.1 | \
+ ip -netns testns route get fibmatch 192.0.2.1 | \
grep -q "linkdown"
- check_fail $?
- ip netns exec testns ip -6 route get fibmatch 2001:db8:2::1 | \
+ log_test $? 1 "IPv4 linkdown flag set"
+ ip -netns testns -6 route get fibmatch 2001:db8:2::1 | \
grep -q "linkdown"
- check_fail $?
+ log_test $? 1 "IPv6 linkdown flag set"
- ip netns exec testns ip link del dev dummy0
-
- ip netns del testns
-
- if [ $ret -ne 0 ]; then
- echo "FAIL: local route carrier test"
- return 1
- fi
- echo "PASS: local route carrier test"
+ cleanup
}
fib_carrier_unicast_test()
{
ret=0
- netns_create "testns"
-
- ip netns exec testns ip link add dummy0 type dummy
- ip netns exec testns ip link set dev dummy0 up
+ echo
+ echo "Single path route carrier test"
- ip netns exec testns ip link set dev dummy0 carrier on
+ setup
- ip netns exec testns ip address add 198.51.100.1/24 dev dummy0
- ip netns exec testns ip -6 address add 2001:db8:1::1/64 dev dummy0
+ set -e
+ ip -netns testns link set dev dummy0 carrier on
+ set +e
- ip netns exec testns ip route get fibmatch 198.51.100.2 &> /dev/null
- check_err $?
- ip netns exec testns ip -6 route get fibmatch 2001:db8:1::2 &> /dev/null
- check_err $?
+ echo " Start point"
+ ip -netns testns route get fibmatch 198.51.100.2 &> /dev/null
+ log_test $? 0 "IPv4 fibmatch"
+ ip -netns testns -6 route get fibmatch 2001:db8:1::2 &> /dev/null
+ log_test $? 0 "IPv6 fibmatch"
- ip netns exec testns ip route get fibmatch 198.51.100.2 | \
+ ip -netns testns route get fibmatch 198.51.100.2 | \
grep -q "linkdown"
- check_fail $?
- ip netns exec testns ip -6 route get fibmatch 2001:db8:1::2 | \
+ log_test $? 1 "IPv4 no linkdown flag"
+ ip -netns testns -6 route get fibmatch 2001:db8:1::2 | \
grep -q "linkdown"
- check_fail $?
+ log_test $? 1 "IPv6 no linkdown flag"
- ip netns exec testns ip link set dev dummy0 carrier off
+ set -e
+ ip -netns testns link set dev dummy0 carrier off
+ set +e
- ip netns exec testns ip route get fibmatch 198.51.100.2 &> /dev/null
- check_err $?
- ip netns exec testns ip -6 route get fibmatch 2001:db8:1::2 &> /dev/null
- check_err $?
+ echo " Carrier down"
+ ip -netns testns route get fibmatch 198.51.100.2 &> /dev/null
+ log_test $? 0 "IPv4 fibmatch"
+ ip -netns testns -6 route get fibmatch 2001:db8:1::2 &> /dev/null
+ log_test $? 0 "IPv6 fibmatch"
- ip netns exec testns ip route get fibmatch 198.51.100.2 | \
+ ip -netns testns route get fibmatch 198.51.100.2 | \
grep -q "linkdown"
- check_err $?
- ip netns exec testns ip -6 route get fibmatch 2001:db8:1::2 | \
+ log_test $? 0 "IPv4 linkdown flag set"
+ ip -netns testns -6 route get fibmatch 2001:db8:1::2 | \
grep -q "linkdown"
- check_err $?
+ log_test $? 0 "IPv6 linkdown flag set"
- ip netns exec testns ip address add 192.0.2.1/24 dev dummy0
- ip netns exec testns ip -6 address add 2001:db8:2::1/64 dev dummy0
+ set -e
+ ip -netns testns address add 192.0.2.1/24 dev dummy0
+ ip -netns testns -6 address add 2001:db8:2::1/64 dev dummy0
+ set +e
- ip netns exec testns ip route get fibmatch 192.0.2.2 &> /dev/null
- check_err $?
- ip netns exec testns ip -6 route get fibmatch 2001:db8:2::2 &> /dev/null
- check_err $?
+ echo " Second address added with carrier down"
+ ip -netns testns route get fibmatch 192.0.2.2 &> /dev/null
+ log_test $? 0 "IPv4 fibmatch"
+ ip -netns testns -6 route get fibmatch 2001:db8:2::2 &> /dev/null
+ log_test $? 0 "IPv6 fibmatch"
- ip netns exec testns ip route get fibmatch 192.0.2.2 | \
+ ip -netns testns route get fibmatch 192.0.2.2 | \
grep -q "linkdown"
- check_err $?
- ip netns exec testns ip -6 route get fibmatch 2001:db8:2::2 | \
+ log_test $? 0 "IPv4 linkdown flag set"
+ ip -netns testns -6 route get fibmatch 2001:db8:2::2 | \
grep -q "linkdown"
- check_err $?
+ log_test $? 0 "IPv6 linkdown flag set"
- ip netns exec testns ip link del dev dummy0
-
- ip netns del testns
-
- if [ $ret -ne 0 ]; then
- echo "FAIL: unicast route carrier test"
- return 1
- fi
- echo "PASS: unicast route carrier test"
+ cleanup
}
fib_carrier_test()
{
- echo "Running netdev carrier change tests"
-
fib_carrier_local_test
fib_carrier_unicast_test
}
exit 0
fi
+# start clean
+cleanup &> /dev/null
+
fib_test
exit $ret
--- /dev/null
+forwarding.config
--- /dev/null
+Motivation
+==========
+
+One of the nice things about network namespaces is that they allow one
+to easily create and test complex environments.
+
+Unfortunately, these namespaces can not be used with actual switching
+ASICs, as their ports can not be migrated to other network namespaces
+(NETIF_F_NETNS_LOCAL) and most of them probably do not support the
+L1-separation provided by namespaces.
+
+However, a similar kind of flexibility can be achieved by using VRFs and
+by looping the switch ports together. For example:
+
+ br0
+ +
+ vrf-h1 | vrf-h2
+ + +---+----+ +
+ | | | |
+ 192.0.2.1/24 + + + + 192.0.2.2/24
+ swp1 swp2 swp3 swp4
+ + + + +
+ | | | |
+ +--------+ +--------+
+
+The VRFs act as lightweight namespaces representing hosts connected to
+the switch.
+
+This approach for testing switch ASICs has several advantages over the
+traditional method that requires multiple physical machines, to name a
+few:
+
+1. Only the device under test (DUT) is being tested without noise from
+other system.
+
+2. Ability to easily provision complex topologies. Testing bridging
+between 4-ports LAGs or 8-way ECMP requires many physical links that are
+not always available. With the VRF-based approach one merely needs to
+loopback more ports.
+
+These tests are written with switch ASICs in mind, but they can be run
+on any Linux box using veth pairs to emulate physical loopbacks.
+
+Guidelines for Writing Tests
+============================
+
+o Where possible, reuse an existing topology for different tests instead
+ of recreating the same topology.
+o Where possible, IPv6 and IPv4 addresses shall conform to RFC 3849 and
+ RFC 5737, respectively.
+o Where possible, tests shall be written so that they can be reused by
+ multiple topologies and added to lib.sh.
+o Checks shall be added to lib.sh for any external dependencies.
+o Code shall be checked using ShellCheck [1] prior to submission.
+
+1. https://www.shellcheck.net/
--- /dev/null
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+NUM_NETIFS=4
+CHECK_TC="yes"
+source lib.sh
+
+h1_create()
+{
+ simple_if_init $h1 192.0.2.1/24 2001:db8:1::1/64
+}
+
+h1_destroy()
+{
+ simple_if_fini $h1 192.0.2.1/24 2001:db8:1::1/64
+}
+
+h2_create()
+{
+ simple_if_init $h2 192.0.2.2/24 2001:db8:1::2/64
+}
+
+h2_destroy()
+{
+ simple_if_fini $h2 192.0.2.2/24 2001:db8:1::2/64
+}
+
+switch_create()
+{
+ # 10 Seconds ageing time.
+ ip link add dev br0 type bridge vlan_filtering 1 ageing_time 1000 \
+ mcast_snooping 0
+
+ ip link set dev $swp1 master br0
+ ip link set dev $swp2 master br0
+
+ ip link set dev br0 up
+ ip link set dev $swp1 up
+ ip link set dev $swp2 up
+}
+
+switch_destroy()
+{
+ ip link set dev $swp2 down
+ ip link set dev $swp1 down
+
+ ip link del dev br0
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ vrf_prepare
+
+ h1_create
+ h2_create
+
+ switch_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ switch_destroy
+
+ h2_destroy
+ h1_destroy
+
+ vrf_cleanup
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+ping_test $h1 192.0.2.2
+ping6_test $h1 2001:db8:1::2
+learning_test "br0" $swp1 $h1 $h2
+flood_test $swp2 $h1 $h2
+
+exit $EXIT_STATUS
--- /dev/null
+CONFIG_BRIDGE=m
+CONFIG_VLAN_8021Q=m
+CONFIG_BRIDGE_VLAN_FILTERING=y
+CONFIG_NET_L3_MASTER_DEV=y
+CONFIG_IPV6_MULTIPLE_TABLES=y
+CONFIG_NET_VRF=m
+CONFIG_BPF_SYSCALL=y
+CONFIG_CGROUP_BPF=y
+CONFIG_NET_CLS_FLOWER=m
+CONFIG_NET_SCH_INGRESS=m
+CONFIG_NET_ACT_GACT=m
+CONFIG_VETH=m
--- /dev/null
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+##############################################################################
+# Topology description. p1 looped back to p2, p3 to p4 and so on.
+declare -A NETIFS
+
+NETIFS[p1]=veth0
+NETIFS[p2]=veth1
+NETIFS[p3]=veth2
+NETIFS[p4]=veth3
+NETIFS[p5]=veth4
+NETIFS[p6]=veth5
+NETIFS[p7]=veth6
+NETIFS[p8]=veth7
+
+##############################################################################
+# Defines
+
+# IPv4 ping utility name
+PING=ping
+# IPv6 ping utility name. Some distributions use 'ping' for IPv6.
+PING6=ping6
+# Packet generator. Some distributions use 'mz'.
+MZ=mausezahn
+# Time to wait after interfaces participating in the test are all UP
+WAIT_TIME=5
+# Whether to pause on failure or not.
+PAUSE_ON_FAIL=no
+# Whether to pause on cleanup or not.
+PAUSE_ON_CLEANUP=no
--- /dev/null
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+##############################################################################
+# Defines
+
+# Can be overridden by the configuration file.
+PING=${PING:=ping}
+PING6=${PING6:=ping6}
+MZ=${MZ:=mausezahn}
+WAIT_TIME=${WAIT_TIME:=5}
+PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no}
+PAUSE_ON_CLEANUP=${PAUSE_ON_CLEANUP:=no}
+
+if [[ -f forwarding.config ]]; then
+ source forwarding.config
+fi
+
+##############################################################################
+# Sanity checks
+
+check_tc_version()
+{
+ tc -j &> /dev/null
+ if [[ $? -ne 0 ]]; then
+ echo "SKIP: iproute2 too old; tc is missing JSON support"
+ exit 1
+ fi
+
+ tc filter help 2>&1 | grep block &> /dev/null
+ if [[ $? -ne 0 ]]; then
+ echo "SKIP: iproute2 too old; tc is missing shared block support"
+ exit 1
+ fi
+}
+
+if [[ "$(id -u)" -ne 0 ]]; then
+ echo "SKIP: need root privileges"
+ exit 0
+fi
+
+if [[ "$CHECK_TC" = "yes" ]]; then
+ check_tc_version
+fi
+
+if [[ ! -x "$(command -v jq)" ]]; then
+ echo "SKIP: jq not installed"
+ exit 1
+fi
+
+if [[ ! -x "$(command -v $MZ)" ]]; then
+ echo "SKIP: $MZ not installed"
+ exit 0
+fi
+
+if [[ ! -v NUM_NETIFS ]]; then
+ echo "SKIP: importer does not define \"NUM_NETIFS\""
+ exit 0
+fi
+
+##############################################################################
+# Command line options handling
+
+count=0
+
+while [[ $# -gt 0 ]]; do
+ if [[ "$count" -eq "0" ]]; then
+ unset NETIFS
+ declare -A NETIFS
+ fi
+ count=$((count + 1))
+ NETIFS[p$count]="$1"
+ shift
+done
+
+##############################################################################
+# Network interfaces configuration
+
+for i in $(eval echo {1..$NUM_NETIFS}); do
+ ip link show dev ${NETIFS[p$i]} &> /dev/null
+ if [[ $? -ne 0 ]]; then
+ echo "SKIP: could not find all required interfaces"
+ exit 0
+ fi
+done
+
+##############################################################################
+# Helpers
+
+# Exit status to return at the end. Set in case one of the tests fails.
+EXIT_STATUS=0
+# Per-test return value. Clear at the beginning of each test.
+RET=0
+
+check_err()
+{
+ local err=$1
+ local msg=$2
+
+ if [[ $RET -eq 0 && $err -ne 0 ]]; then
+ RET=$err
+ retmsg=$msg
+ fi
+}
+
+check_fail()
+{
+ local err=$1
+ local msg=$2
+
+ if [[ $RET -eq 0 && $err -eq 0 ]]; then
+ RET=1
+ retmsg=$msg
+ fi
+}
+
+log_test()
+{
+ local test_name=$1
+ local opt_str=$2
+
+ if [[ $# -eq 2 ]]; then
+ opt_str="($opt_str)"
+ fi
+
+ if [[ $RET -ne 0 ]]; then
+ EXIT_STATUS=1
+ printf "TEST: %-60s [FAIL]\n" "$test_name $opt_str"
+ if [[ ! -z "$retmsg" ]]; then
+ printf "\t%s\n" "$retmsg"
+ fi
+ if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+ echo "Hit enter to continue, 'q' to quit"
+ read a
+ [ "$a" = "q" ] && exit 1
+ fi
+ return 1
+ fi
+
+ printf "TEST: %-60s [PASS]\n" "$test_name $opt_str"
+ return 0
+}
+
+log_info()
+{
+ local msg=$1
+
+ echo "INFO: $msg"
+}
+
+setup_wait()
+{
+ for i in $(eval echo {1..$NUM_NETIFS}); do
+ while true; do
+ ip link show dev ${NETIFS[p$i]} up \
+ | grep 'state UP' &> /dev/null
+ if [[ $? -ne 0 ]]; then
+ sleep 1
+ else
+ break
+ fi
+ done
+ done
+
+ # Make sure links are ready.
+ sleep $WAIT_TIME
+}
+
+pre_cleanup()
+{
+ if [ "${PAUSE_ON_CLEANUP}" = "yes" ]; then
+ echo "Pausing before cleanup, hit any key to continue"
+ read
+ fi
+}
+
+vrf_prepare()
+{
+ ip -4 rule add pref 32765 table local
+ ip -4 rule del pref 0
+ ip -6 rule add pref 32765 table local
+ ip -6 rule del pref 0
+}
+
+vrf_cleanup()
+{
+ ip -6 rule add pref 0 table local
+ ip -6 rule del pref 32765
+ ip -4 rule add pref 0 table local
+ ip -4 rule del pref 32765
+}
+
+__last_tb_id=0
+declare -A __TB_IDS
+
+__vrf_td_id_assign()
+{
+ local vrf_name=$1
+
+ __last_tb_id=$((__last_tb_id + 1))
+ __TB_IDS[$vrf_name]=$__last_tb_id
+ return $__last_tb_id
+}
+
+__vrf_td_id_lookup()
+{
+ local vrf_name=$1
+
+ return ${__TB_IDS[$vrf_name]}
+}
+
+vrf_create()
+{
+ local vrf_name=$1
+ local tb_id
+
+ __vrf_td_id_assign $vrf_name
+ tb_id=$?
+
+ ip link add dev $vrf_name type vrf table $tb_id
+ ip -4 route add table $tb_id unreachable default metric 4278198272
+ ip -6 route add table $tb_id unreachable default metric 4278198272
+}
+
+vrf_destroy()
+{
+ local vrf_name=$1
+ local tb_id
+
+ __vrf_td_id_lookup $vrf_name
+ tb_id=$?
+
+ ip -6 route del table $tb_id unreachable default metric 4278198272
+ ip -4 route del table $tb_id unreachable default metric 4278198272
+ ip link del dev $vrf_name
+}
+
+__addr_add_del()
+{
+ local if_name=$1
+ local add_del=$2
+ local array
+
+ shift
+ shift
+ array=("${@}")
+
+ for addrstr in "${array[@]}"; do
+ ip address $add_del $addrstr dev $if_name
+ done
+}
+
+simple_if_init()
+{
+ local if_name=$1
+ local vrf_name
+ local array
+
+ shift
+ vrf_name=v$if_name
+ array=("${@}")
+
+ vrf_create $vrf_name
+ ip link set dev $if_name master $vrf_name
+ ip link set dev $vrf_name up
+ ip link set dev $if_name up
+
+ __addr_add_del $if_name add "${array[@]}"
+}
+
+simple_if_fini()
+{
+ local if_name=$1
+ local vrf_name
+ local array
+
+ shift
+ vrf_name=v$if_name
+ array=("${@}")
+
+ __addr_add_del $if_name del "${array[@]}"
+
+ ip link set dev $if_name down
+ vrf_destroy $vrf_name
+}
+
+master_name_get()
+{
+ local if_name=$1
+
+ ip -j link show dev $if_name | jq -r '.[]["master"]'
+}
+
+link_stats_tx_packets_get()
+{
+ local if_name=$1
+
+ ip -j -s link show dev $if_name | jq '.[]["stats64"]["tx"]["packets"]'
+}
+
+mac_get()
+{
+ local if_name=$1
+
+ ip -j link show dev $if_name | jq -r '.[]["address"]'
+}
+
+bridge_ageing_time_get()
+{
+ local bridge=$1
+ local ageing_time
+
+ # Need to divide by 100 to convert to seconds.
+ ageing_time=$(ip -j -d link show dev $bridge \
+ | jq '.[]["linkinfo"]["info_data"]["ageing_time"]')
+ echo $((ageing_time / 100))
+}
+
+forwarding_enable()
+{
+ ipv4_fwd=$(sysctl -n net.ipv4.conf.all.forwarding)
+ ipv6_fwd=$(sysctl -n net.ipv6.conf.all.forwarding)
+
+ sysctl -q -w net.ipv4.conf.all.forwarding=1
+ sysctl -q -w net.ipv6.conf.all.forwarding=1
+}
+
+forwarding_restore()
+{
+ sysctl -q -w net.ipv6.conf.all.forwarding=$ipv6_fwd
+ sysctl -q -w net.ipv4.conf.all.forwarding=$ipv4_fwd
+}
+
+tc_offload_check()
+{
+ for i in $(eval echo {1..$NUM_NETIFS}); do
+ ethtool -k ${NETIFS[p$i]} \
+ | grep "hw-tc-offload: on" &> /dev/null
+ if [[ $? -ne 0 ]]; then
+ return 1
+ fi
+ done
+
+ return 0
+}
+
+##############################################################################
+# Tests
+
+ping_test()
+{
+ local if_name=$1
+ local dip=$2
+ local vrf_name
+
+ RET=0
+
+ vrf_name=$(master_name_get $if_name)
+ ip vrf exec $vrf_name $PING $dip -c 10 -i 0.1 -w 2 &> /dev/null
+ check_err $?
+ log_test "ping"
+}
+
+ping6_test()
+{
+ local if_name=$1
+ local dip=$2
+ local vrf_name
+
+ RET=0
+
+ vrf_name=$(master_name_get $if_name)
+ ip vrf exec $vrf_name $PING6 $dip -c 10 -i 0.1 -w 2 &> /dev/null
+ check_err $?
+ log_test "ping6"
+}
+
+learning_test()
+{
+ local bridge=$1
+ local br_port1=$2 # Connected to `host1_if`.
+ local host1_if=$3
+ local host2_if=$4
+ local mac=de:ad:be:ef:13:37
+ local ageing_time
+
+ RET=0
+
+ bridge -j fdb show br $bridge brport $br_port1 \
+ | jq -e ".[] | select(.mac == \"$mac\")" &> /dev/null
+ check_fail $? "Found FDB record when should not"
+
+ # Disable unknown unicast flooding on `br_port1` to make sure
+ # packets are only forwarded through the port after a matching
+ # FDB entry was installed.
+ bridge link set dev $br_port1 flood off
+
+ tc qdisc add dev $host1_if ingress
+ tc filter add dev $host1_if ingress protocol ip pref 1 handle 101 \
+ flower dst_mac $mac action drop
+
+ $MZ $host2_if -c 1 -p 64 -b $mac -t ip -q
+ sleep 1
+
+ tc -j -s filter show dev $host1_if ingress \
+ | jq -e ".[] | select(.options.handle == 101) \
+ | select(.options.actions[0].stats.packets == 1)" &> /dev/null
+ check_fail $? "Packet reached second host when should not"
+
+ $MZ $host1_if -c 1 -p 64 -a $mac -t ip -q
+ sleep 1
+
+ bridge -j fdb show br $bridge brport $br_port1 \
+ | jq -e ".[] | select(.mac == \"$mac\")" &> /dev/null
+ check_err $? "Did not find FDB record when should"
+
+ $MZ $host2_if -c 1 -p 64 -b $mac -t ip -q
+ sleep 1
+
+ tc -j -s filter show dev $host1_if ingress \
+ | jq -e ".[] | select(.options.handle == 101) \
+ | select(.options.actions[0].stats.packets == 1)" &> /dev/null
+ check_err $? "Packet did not reach second host when should"
+
+ # Wait for 10 seconds after the ageing time to make sure FDB
+ # record was aged-out.
+ ageing_time=$(bridge_ageing_time_get $bridge)
+ sleep $((ageing_time + 10))
+
+ bridge -j fdb show br $bridge brport $br_port1 \
+ | jq -e ".[] | select(.mac == \"$mac\")" &> /dev/null
+ check_fail $? "Found FDB record when should not"
+
+ bridge link set dev $br_port1 learning off
+
+ $MZ $host1_if -c 1 -p 64 -a $mac -t ip -q
+ sleep 1
+
+ bridge -j fdb show br $bridge brport $br_port1 \
+ | jq -e ".[] | select(.mac == \"$mac\")" &> /dev/null
+ check_fail $? "Found FDB record when should not"
+
+ bridge link set dev $br_port1 learning on
+
+ tc filter del dev $host1_if ingress protocol ip pref 1 handle 101 flower
+ tc qdisc del dev $host1_if ingress
+
+ bridge link set dev $br_port1 flood on
+
+ log_test "FDB learning"
+}
+
+flood_test_do()
+{
+ local should_flood=$1
+ local mac=$2
+ local ip=$3
+ local host1_if=$4
+ local host2_if=$5
+ local err=0
+
+ # Add an ACL on `host2_if` which will tell us whether the packet
+ # was flooded to it or not.
+ tc qdisc add dev $host2_if ingress
+ tc filter add dev $host2_if ingress protocol ip pref 1 handle 101 \
+ flower dst_mac $mac action drop
+
+ $MZ $host1_if -c 1 -p 64 -b $mac -B $ip -t ip -q
+ sleep 1
+
+ tc -j -s filter show dev $host2_if ingress \
+ | jq -e ".[] | select(.options.handle == 101) \
+ | select(.options.actions[0].stats.packets == 1)" &> /dev/null
+ if [[ $? -ne 0 && $should_flood == "true" || \
+ $? -eq 0 && $should_flood == "false" ]]; then
+ err=1
+ fi
+
+ tc filter del dev $host2_if ingress protocol ip pref 1 handle 101 flower
+ tc qdisc del dev $host2_if ingress
+
+ return $err
+}
+
+flood_unicast_test()
+{
+ local br_port=$1
+ local host1_if=$2
+ local host2_if=$3
+ local mac=de:ad:be:ef:13:37
+ local ip=192.0.2.100
+
+ RET=0
+
+ bridge link set dev $br_port flood off
+
+ flood_test_do false $mac $ip $host1_if $host2_if
+ check_err $? "Packet flooded when should not"
+
+ bridge link set dev $br_port flood on
+
+ flood_test_do true $mac $ip $host1_if $host2_if
+ check_err $? "Packet was not flooded when should"
+
+ log_test "Unknown unicast flood"
+}
+
+flood_multicast_test()
+{
+ local br_port=$1
+ local host1_if=$2
+ local host2_if=$3
+ local mac=01:00:5e:00:00:01
+ local ip=239.0.0.1
+
+ RET=0
+
+ bridge link set dev $br_port mcast_flood off
+
+ flood_test_do false $mac $ip $host1_if $host2_if
+ check_err $? "Packet flooded when should not"
+
+ bridge link set dev $br_port mcast_flood on
+
+ flood_test_do true $mac $ip $host1_if $host2_if
+ check_err $? "Packet was not flooded when should"
+
+ log_test "Unregistered multicast flood"
+}
+
+flood_test()
+{
+ # `br_port` is connected to `host2_if`
+ local br_port=$1
+ local host1_if=$2
+ local host2_if=$3
+
+ flood_unicast_test $br_port $host1_if $host2_if
+ flood_multicast_test $br_port $host1_if $host2_if
+}
--- /dev/null
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+NUM_NETIFS=4
+source lib.sh
+
+h1_create()
+{
+ vrf_create "vrf-h1"
+ ip link set dev $h1 master vrf-h1
+
+ ip link set dev vrf-h1 up
+ ip link set dev $h1 up
+
+ ip address add 192.0.2.2/24 dev $h1
+ ip address add 2001:db8:1::2/64 dev $h1
+
+ ip route add 198.51.100.0/24 vrf vrf-h1 nexthop via 192.0.2.1
+ ip route add 2001:db8:2::/64 vrf vrf-h1 nexthop via 2001:db8:1::1
+}
+
+h1_destroy()
+{
+ ip route del 2001:db8:2::/64 vrf vrf-h1
+ ip route del 198.51.100.0/24 vrf vrf-h1
+
+ ip address del 2001:db8:1::2/64 dev $h1
+ ip address del 192.0.2.2/24 dev $h1
+
+ ip link set dev $h1 down
+ vrf_destroy "vrf-h1"
+}
+
+h2_create()
+{
+ vrf_create "vrf-h2"
+ ip link set dev $h2 master vrf-h2
+
+ ip link set dev vrf-h2 up
+ ip link set dev $h2 up
+
+ ip address add 198.51.100.2/24 dev $h2
+ ip address add 2001:db8:2::2/64 dev $h2
+
+ ip route add 192.0.2.0/24 vrf vrf-h2 nexthop via 198.51.100.1
+ ip route add 2001:db8:1::/64 vrf vrf-h2 nexthop via 2001:db8:2::1
+}
+
+h2_destroy()
+{
+ ip route del 2001:db8:1::/64 vrf vrf-h2
+ ip route del 192.0.2.0/24 vrf vrf-h2
+
+ ip address del 2001:db8:2::2/64 dev $h2
+ ip address del 198.51.100.2/24 dev $h2
+
+ ip link set dev $h2 down
+ vrf_destroy "vrf-h2"
+}
+
+router_create()
+{
+ ip link set dev $rp1 up
+ ip link set dev $rp2 up
+
+ ip address add 192.0.2.1/24 dev $rp1
+ ip address add 2001:db8:1::1/64 dev $rp1
+
+ ip address add 198.51.100.1/24 dev $rp2
+ ip address add 2001:db8:2::1/64 dev $rp2
+}
+
+router_destroy()
+{
+ ip address del 2001:db8:2::1/64 dev $rp2
+ ip address del 198.51.100.1/24 dev $rp2
+
+ ip address del 2001:db8:1::1/64 dev $rp1
+ ip address del 192.0.2.1/24 dev $rp1
+
+ ip link set dev $rp2 down
+ ip link set dev $rp1 down
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ rp1=${NETIFS[p2]}
+
+ rp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ vrf_prepare
+
+ h1_create
+ h2_create
+
+ router_create
+
+ forwarding_enable
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ forwarding_restore
+
+ router_destroy
+
+ h2_destroy
+ h1_destroy
+
+ vrf_cleanup
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+ping_test $h1 198.51.100.2
+ping6_test $h1 2001:db8:2::2
+
+exit $EXIT_STATUS
--- /dev/null
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+NUM_NETIFS=8
+source lib.sh
+
+h1_create()
+{
+ vrf_create "vrf-h1"
+ ip link set dev $h1 master vrf-h1
+
+ ip link set dev vrf-h1 up
+ ip link set dev $h1 up
+
+ ip address add 192.0.2.2/24 dev $h1
+ ip address add 2001:db8:1::2/64 dev $h1
+
+ ip route add 198.51.100.0/24 vrf vrf-h1 nexthop via 192.0.2.1
+ ip route add 2001:db8:2::/64 vrf vrf-h1 nexthop via 2001:db8:1::1
+}
+
+h1_destroy()
+{
+ ip route del 2001:db8:2::/64 vrf vrf-h1
+ ip route del 198.51.100.0/24 vrf vrf-h1
+
+ ip address del 2001:db8:1::2/64 dev $h1
+ ip address del 192.0.2.2/24 dev $h1
+
+ ip link set dev $h1 down
+ vrf_destroy "vrf-h1"
+}
+
+h2_create()
+{
+ vrf_create "vrf-h2"
+ ip link set dev $h2 master vrf-h2
+
+ ip link set dev vrf-h2 up
+ ip link set dev $h2 up
+
+ ip address add 198.51.100.2/24 dev $h2
+ ip address add 2001:db8:2::2/64 dev $h2
+
+ ip route add 192.0.2.0/24 vrf vrf-h2 nexthop via 198.51.100.1
+ ip route add 2001:db8:1::/64 vrf vrf-h2 nexthop via 2001:db8:2::1
+}
+
+h2_destroy()
+{
+ ip route del 2001:db8:1::/64 vrf vrf-h2
+ ip route del 192.0.2.0/24 vrf vrf-h2
+
+ ip address del 2001:db8:2::2/64 dev $h2
+ ip address del 198.51.100.2/24 dev $h2
+
+ ip link set dev $h2 down
+ vrf_destroy "vrf-h2"
+}
+
+router1_create()
+{
+ vrf_create "vrf-r1"
+ ip link set dev $rp11 master vrf-r1
+ ip link set dev $rp12 master vrf-r1
+ ip link set dev $rp13 master vrf-r1
+
+ ip link set dev vrf-r1 up
+ ip link set dev $rp11 up
+ ip link set dev $rp12 up
+ ip link set dev $rp13 up
+
+ ip address add 192.0.2.1/24 dev $rp11
+ ip address add 2001:db8:1::1/64 dev $rp11
+
+ ip address add 169.254.2.12/24 dev $rp12
+ ip address add fe80:2::12/64 dev $rp12
+
+ ip address add 169.254.3.13/24 dev $rp13
+ ip address add fe80:3::13/64 dev $rp13
+
+ ip route add 198.51.100.0/24 vrf vrf-r1 \
+ nexthop via 169.254.2.22 dev $rp12 \
+ nexthop via 169.254.3.23 dev $rp13
+ ip route add 2001:db8:2::/64 vrf vrf-r1 \
+ nexthop via fe80:2::22 dev $rp12 \
+ nexthop via fe80:3::23 dev $rp13
+}
+
+router1_destroy()
+{
+ ip route del 2001:db8:2::/64 vrf vrf-r1
+ ip route del 198.51.100.0/24 vrf vrf-r1
+
+ ip address del fe80:3::13/64 dev $rp13
+ ip address del 169.254.3.13/24 dev $rp13
+
+ ip address del fe80:2::12/64 dev $rp12
+ ip address del 169.254.2.12/24 dev $rp12
+
+ ip address del 2001:db8:1::1/64 dev $rp11
+ ip address del 192.0.2.1/24 dev $rp11
+
+ ip link set dev $rp13 down
+ ip link set dev $rp12 down
+ ip link set dev $rp11 down
+
+ vrf_destroy "vrf-r1"
+}
+
+router2_create()
+{
+ vrf_create "vrf-r2"
+ ip link set dev $rp21 master vrf-r2
+ ip link set dev $rp22 master vrf-r2
+ ip link set dev $rp23 master vrf-r2
+
+ ip link set dev vrf-r2 up
+ ip link set dev $rp21 up
+ ip link set dev $rp22 up
+ ip link set dev $rp23 up
+
+ ip address add 198.51.100.1/24 dev $rp21
+ ip address add 2001:db8:2::1/64 dev $rp21
+
+ ip address add 169.254.2.22/24 dev $rp22
+ ip address add fe80:2::22/64 dev $rp22
+
+ ip address add 169.254.3.23/24 dev $rp23
+ ip address add fe80:3::23/64 dev $rp23
+
+ ip route add 192.0.2.0/24 vrf vrf-r2 \
+ nexthop via 169.254.2.12 dev $rp22 \
+ nexthop via 169.254.3.13 dev $rp23
+ ip route add 2001:db8:1::/64 vrf vrf-r2 \
+ nexthop via fe80:2::12 dev $rp22 \
+ nexthop via fe80:3::13 dev $rp23
+}
+
+router2_destroy()
+{
+ ip route del 2001:db8:1::/64 vrf vrf-r2
+ ip route del 192.0.2.0/24 vrf vrf-r2
+
+ ip address del fe80:3::23/64 dev $rp23
+ ip address del 169.254.3.23/24 dev $rp23
+
+ ip address del fe80:2::22/64 dev $rp22
+ ip address del 169.254.2.22/24 dev $rp22
+
+ ip address del 2001:db8:2::1/64 dev $rp21
+ ip address del 198.51.100.1/24 dev $rp21
+
+ ip link set dev $rp23 down
+ ip link set dev $rp22 down
+ ip link set dev $rp21 down
+
+ vrf_destroy "vrf-r2"
+}
+
+multipath_eval()
+{
+ local desc="$1"
+ local weight_rp12=$2
+ local weight_rp13=$3
+ local packets_rp12=$4
+ local packets_rp13=$5
+ local weights_ratio packets_ratio diff
+
+ RET=0
+
+ if [[ "$packets_rp12" -eq "0" || "$packets_rp13" -eq "0" ]]; then
+ check_err 1 "Packet difference is 0"
+ log_test "Multipath"
+ log_info "Expected ratio $weights_ratio"
+ return
+ fi
+
+ if [[ "$weight_rp12" -gt "$weight_rp13" ]]; then
+ weights_ratio=$(echo "scale=2; $weight_rp12 / $weight_rp13" \
+ | bc -l)
+ packets_ratio=$(echo "scale=2; $packets_rp12 / $packets_rp13" \
+ | bc -l)
+ else
+ weights_ratio=$(echo "scale=2; $weight_rp13 / $weight_rp12" | \
+ bc -l)
+ packets_ratio=$(echo "scale=2; $packets_rp13 / $packets_rp12" | \
+ bc -l)
+ fi
+
+ diff=$(echo $weights_ratio - $packets_ratio | bc -l)
+ diff=${diff#-}
+
+ test "$(echo "$diff / $weights_ratio > 0.1" | bc -l)" -eq 0
+ check_err $? "Too large discrepancy between expected and measured ratios"
+ log_test "$desc"
+ log_info "Expected ratio $weights_ratio Measured ratio $packets_ratio"
+}
+
+multipath4_test()
+{
+ local desc="$1"
+ local weight_rp12=$2
+ local weight_rp13=$3
+ local t0_rp12 t0_rp13 t1_rp12 t1_rp13
+ local packets_rp12 packets_rp13
+ local hash_policy
+
+ # Transmit multiple flows from h1 to h2 and make sure they are
+ # distributed between both multipath links (rp12 and rp13)
+ # according to the configured weights.
+ hash_policy=$(sysctl -n net.ipv4.fib_multipath_hash_policy)
+ sysctl -q -w net.ipv4.fib_multipath_hash_policy=1
+ ip route replace 198.51.100.0/24 vrf vrf-r1 \
+ nexthop via 169.254.2.22 dev $rp12 weight $weight_rp12 \
+ nexthop via 169.254.3.23 dev $rp13 weight $weight_rp13
+
+ t0_rp12=$(link_stats_tx_packets_get $rp12)
+ t0_rp13=$(link_stats_tx_packets_get $rp13)
+
+ ip vrf exec vrf-h1 $MZ -q -p 64 -A 192.0.2.2 -B 198.51.100.2 \
+ -d 1msec -t udp "sp=1024,dp=0-32768"
+
+ t1_rp12=$(link_stats_tx_packets_get $rp12)
+ t1_rp13=$(link_stats_tx_packets_get $rp13)
+
+ let "packets_rp12 = $t1_rp12 - $t0_rp12"
+ let "packets_rp13 = $t1_rp13 - $t0_rp13"
+ multipath_eval "$desc" $weight_rp12 $weight_rp13 $packets_rp12 $packets_rp13
+
+ # Restore settings.
+ ip route replace 198.51.100.0/24 vrf vrf-r1 \
+ nexthop via 169.254.2.22 dev $rp12 \
+ nexthop via 169.254.3.23 dev $rp13
+ sysctl -q -w net.ipv4.fib_multipath_hash_policy=$hash_policy
+}
+
+multipath6_l4_test()
+{
+ local desc="$1"
+ local weight_rp12=$2
+ local weight_rp13=$3
+ local t0_rp12 t0_rp13 t1_rp12 t1_rp13
+ local packets_rp12 packets_rp13
+ local hash_policy
+
+ # Transmit multiple flows from h1 to h2 and make sure they are
+ # distributed between both multipath links (rp12 and rp13)
+ # according to the configured weights.
+ hash_policy=$(sysctl -n net.ipv6.fib_multipath_hash_policy)
+ sysctl -q -w net.ipv6.fib_multipath_hash_policy=1
+
+ ip route replace 2001:db8:2::/64 vrf vrf-r1 \
+ nexthop via fe80:2::22 dev $rp12 weight $weight_rp12 \
+ nexthop via fe80:3::23 dev $rp13 weight $weight_rp13
+
+ t0_rp12=$(link_stats_tx_packets_get $rp12)
+ t0_rp13=$(link_stats_tx_packets_get $rp13)
+
+ $MZ $h1 -6 -q -p 64 -A 2001:db8:1::2 -B 2001:db8:2::2 \
+ -d 1msec -t udp "sp=1024,dp=0-32768"
+
+ t1_rp12=$(link_stats_tx_packets_get $rp12)
+ t1_rp13=$(link_stats_tx_packets_get $rp13)
+
+ let "packets_rp12 = $t1_rp12 - $t0_rp12"
+ let "packets_rp13 = $t1_rp13 - $t0_rp13"
+ multipath_eval "$desc" $weight_rp12 $weight_rp13 $packets_rp12 $packets_rp13
+
+ ip route replace 2001:db8:2::/64 vrf vrf-r1 \
+ nexthop via fe80:2::22 dev $rp12 \
+ nexthop via fe80:3::23 dev $rp13
+
+ sysctl -q -w net.ipv6.fib_multipath_hash_policy=$hash_policy
+}
+
+multipath6_test()
+{
+ local desc="$1"
+ local weight_rp12=$2
+ local weight_rp13=$3
+ local t0_rp12 t0_rp13 t1_rp12 t1_rp13
+ local packets_rp12 packets_rp13
+
+ ip route replace 2001:db8:2::/64 vrf vrf-r1 \
+ nexthop via fe80:2::22 dev $rp12 weight $weight_rp12 \
+ nexthop via fe80:3::23 dev $rp13 weight $weight_rp13
+
+ t0_rp12=$(link_stats_tx_packets_get $rp12)
+ t0_rp13=$(link_stats_tx_packets_get $rp13)
+
+ # Generate 16384 echo requests, each with a random flow label.
+ for _ in $(seq 1 16384); do
+ ip vrf exec vrf-h1 $PING6 2001:db8:2::2 -F 0 -c 1 -q &> /dev/null
+ done
+
+ t1_rp12=$(link_stats_tx_packets_get $rp12)
+ t1_rp13=$(link_stats_tx_packets_get $rp13)
+
+ let "packets_rp12 = $t1_rp12 - $t0_rp12"
+ let "packets_rp13 = $t1_rp13 - $t0_rp13"
+ multipath_eval "$desc" $weight_rp12 $weight_rp13 $packets_rp12 $packets_rp13
+
+ ip route replace 2001:db8:2::/64 vrf vrf-r1 \
+ nexthop via fe80:2::22 dev $rp12 \
+ nexthop via fe80:3::23 dev $rp13
+}
+
+multipath_test()
+{
+ log_info "Running IPv4 multipath tests"
+ multipath4_test "ECMP" 1 1
+ multipath4_test "Weighted MP 2:1" 2 1
+ multipath4_test "Weighted MP 11:45" 11 45
+
+ log_info "Running IPv6 multipath tests"
+ multipath6_test "ECMP" 1 1
+ multipath6_test "Weighted MP 2:1" 2 1
+ multipath6_test "Weighted MP 11:45" 11 45
+
+ log_info "Running IPv6 L4 hash multipath tests"
+ multipath6_l4_test "ECMP" 1 1
+ multipath6_l4_test "Weighted MP 2:1" 2 1
+ multipath6_l4_test "Weighted MP 11:45" 11 45
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ rp11=${NETIFS[p2]}
+
+ rp12=${NETIFS[p3]}
+ rp22=${NETIFS[p4]}
+
+ rp13=${NETIFS[p5]}
+ rp23=${NETIFS[p6]}
+
+ rp21=${NETIFS[p7]}
+ h2=${NETIFS[p8]}
+
+ vrf_prepare
+
+ h1_create
+ h2_create
+
+ router1_create
+ router2_create
+
+ forwarding_enable
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ forwarding_restore
+
+ router2_destroy
+ router1_destroy
+
+ h2_destroy
+ h1_destroy
+
+ vrf_cleanup
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+ping_test $h1 198.51.100.2
+ping6_test $h1 2001:db8:2::2
+multipath_test
+
+exit $EXIT_STATUS
--- /dev/null
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+NUM_NETIFS=4
+source tc_common.sh
+source lib.sh
+
+tcflags="skip_hw"
+
+h1_create()
+{
+ simple_if_init $h1 192.0.2.1/24
+}
+
+h1_destroy()
+{
+ simple_if_fini $h1 192.0.2.1/24
+}
+
+h2_create()
+{
+ simple_if_init $h2 192.0.2.2/24
+ tc qdisc add dev $h2 clsact
+}
+
+h2_destroy()
+{
+ tc qdisc del dev $h2 clsact
+ simple_if_fini $h2 192.0.2.2/24
+}
+
+switch_create()
+{
+ simple_if_init $swp1 192.0.2.2/24
+ tc qdisc add dev $swp1 clsact
+
+ simple_if_init $swp2 192.0.2.1/24
+}
+
+switch_destroy()
+{
+ simple_if_fini $swp2 192.0.2.1/24
+
+ tc qdisc del dev $swp1 clsact
+ simple_if_fini $swp1 192.0.2.2/24
+}
+
+mirred_egress_redirect_test()
+{
+ RET=0
+
+ tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+ $tcflags dst_ip 192.0.2.2 action drop
+
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip -q
+
+ tc_check_packets "dev $h2 ingress" 101 1
+ check_fail $? "Matched without redirect rule inserted"
+
+ tc filter add dev $swp1 ingress protocol ip pref 1 handle 101 flower \
+ $tcflags dst_ip 192.0.2.2 action mirred egress redirect \
+ dev $swp2
+
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip -q
+
+ tc_check_packets "dev $h2 ingress" 101 1
+ check_err $? "Did not match incoming redirected packet"
+
+ tc filter del dev $swp1 ingress protocol ip pref 1 handle 101 flower
+ tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+
+ log_test "mirred egress redirect ($tcflags)"
+}
+
+gact_drop_and_ok_test()
+{
+ RET=0
+
+ tc filter add dev $swp1 ingress protocol ip pref 2 handle 102 flower \
+ skip_hw dst_ip 192.0.2.2 action drop
+
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip -q
+
+ tc_check_packets "dev $swp1 ingress" 102 1
+ check_err $? "Packet was not dropped"
+
+ tc filter add dev $swp1 ingress protocol ip pref 1 handle 101 flower \
+ $tcflags dst_ip 192.0.2.2 action ok
+
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip -q
+
+ tc_check_packets "dev $swp1 ingress" 101 1
+ check_err $? "Did not see trapped packet"
+
+ tc filter del dev $swp1 ingress protocol ip pref 2 handle 102 flower
+ tc filter del dev $swp1 ingress protocol ip pref 1 handle 101 flower
+
+ log_test "gact drop and ok ($tcflags)"
+}
+
+gact_trap_test()
+{
+ RET=0
+
+ tc filter add dev $swp1 ingress protocol ip pref 1 handle 101 flower \
+ skip_hw dst_ip 192.0.2.2 action drop
+ tc filter add dev $swp1 ingress protocol ip pref 3 handle 103 flower \
+ $tcflags dst_ip 192.0.2.2 action mirred egress redirect \
+ dev $swp2
+
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip -q
+
+ tc_check_packets "dev $swp1 ingress" 101 1
+ check_fail $? "Saw packet without trap rule inserted"
+
+ tc filter add dev $swp1 ingress protocol ip pref 2 handle 102 flower \
+ $tcflags dst_ip 192.0.2.2 action trap
+
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip -q
+
+ tc_check_packets "dev $swp1 ingress" 102 1
+ check_err $? "Packet was not trapped"
+
+ tc_check_packets "dev $swp1 ingress" 101 1
+ check_err $? "Did not see trapped packet"
+
+ tc filter del dev $swp1 ingress protocol ip pref 3 handle 103 flower
+ tc filter del dev $swp1 ingress protocol ip pref 2 handle 102 flower
+ tc filter del dev $swp1 ingress protocol ip pref 1 handle 101 flower
+
+ log_test "trap ($tcflags)"
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ h1mac=$(mac_get $h1)
+ h2mac=$(mac_get $h2)
+
+ swp1origmac=$(mac_get $swp1)
+ swp2origmac=$(mac_get $swp2)
+ ip link set $swp1 address $h2mac
+ ip link set $swp2 address $h1mac
+
+ vrf_prepare
+
+ h1_create
+ h2_create
+ switch_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ switch_destroy
+ h2_destroy
+ h1_destroy
+
+ vrf_cleanup
+
+ ip link set $swp2 address $swp2origmac
+ ip link set $swp1 address $swp1origmac
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+gact_drop_and_ok_test
+mirred_egress_redirect_test
+
+tc_offload_check
+if [[ $? -ne 0 ]]; then
+ log_info "Could not test offloaded functionality"
+else
+ tcflags="skip_sw"
+ gact_drop_and_ok_test
+ mirred_egress_redirect_test
+ gact_trap_test
+fi
+
+exit $EXIT_STATUS
--- /dev/null
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+NUM_NETIFS=2
+source tc_common.sh
+source lib.sh
+
+tcflags="skip_hw"
+
+h1_create()
+{
+ simple_if_init $h1 192.0.2.1/24
+}
+
+h1_destroy()
+{
+ simple_if_fini $h1 192.0.2.1/24
+}
+
+h2_create()
+{
+ simple_if_init $h2 192.0.2.2/24
+ tc qdisc add dev $h2 clsact
+}
+
+h2_destroy()
+{
+ tc qdisc del dev $h2 clsact
+ simple_if_fini $h2 192.0.2.2/24
+}
+
+unreachable_chain_test()
+{
+ RET=0
+
+ tc filter add dev $h2 ingress chain 1 protocol ip pref 1 handle 1101 \
+ flower $tcflags dst_mac $h2mac action drop
+
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip -q
+
+ tc_check_packets "dev $h2 ingress" 1101 1
+ check_fail $? "matched on filter in unreachable chain"
+
+ tc filter del dev $h2 ingress chain 1 protocol ip pref 1 handle 1101 \
+ flower
+
+ log_test "unreachable chain ($tcflags)"
+}
+
+gact_goto_chain_test()
+{
+ RET=0
+
+ tc filter add dev $h2 ingress chain 1 protocol ip pref 1 handle 1101 \
+ flower $tcflags dst_mac $h2mac action drop
+ tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \
+ $tcflags dst_mac $h2mac action drop
+ tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+ $tcflags dst_mac $h2mac action goto chain 1
+
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip -q
+
+ tc_check_packets "dev $h2 ingress" 102 1
+ check_fail $? "Matched on a wrong filter"
+
+ tc_check_packets "dev $h2 ingress" 101 1
+ check_err $? "Did not match on correct filter with goto chain action"
+
+ tc_check_packets "dev $h2 ingress" 1101 1
+ check_err $? "Did not match on correct filter in chain 1"
+
+ tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+ tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower
+ tc filter del dev $h2 ingress chain 1 protocol ip pref 1 handle 1101 \
+ flower
+
+ log_test "gact goto chain ($tcflags)"
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ h2=${NETIFS[p2]}
+ h1mac=$(mac_get $h1)
+ h2mac=$(mac_get $h2)
+
+ vrf_prepare
+
+ h1_create
+ h2_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ h2_destroy
+ h1_destroy
+
+ vrf_cleanup
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+unreachable_chain_test
+gact_goto_chain_test
+
+tc_offload_check
+if [[ $? -ne 0 ]]; then
+ log_info "Could not test offloaded functionality"
+else
+ tcflags="skip_sw"
+ unreachable_chain_test
+ gact_goto_chain_test
+fi
+
+exit $EXIT_STATUS
--- /dev/null
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+CHECK_TC="yes"
+
+tc_check_packets()
+{
+ local id=$1
+ local handle=$2
+ local count=$3
+ local ret
+
+ output="$(tc -j -s filter show $id)"
+ # workaround the jq bug which causes jq to return 0 in case input is ""
+ ret=$?
+ if [[ $ret -ne 0 ]]; then
+ return $ret
+ fi
+ echo $output | \
+ jq -e ".[] \
+ | select(.options.handle == $handle) \
+ | select(.options.actions[0].stats.packets == $count)" \
+ &> /dev/null
+ return $?
+}
--- /dev/null
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+NUM_NETIFS=2
+source tc_common.sh
+source lib.sh
+
+tcflags="skip_hw"
+
+h1_create()
+{
+ simple_if_init $h1 192.0.2.1/24 198.51.100.1/24
+}
+
+h1_destroy()
+{
+ simple_if_fini $h1 192.0.2.1/24 198.51.100.1/24
+}
+
+h2_create()
+{
+ simple_if_init $h2 192.0.2.2/24 198.51.100.2/24
+ tc qdisc add dev $h2 clsact
+}
+
+h2_destroy()
+{
+ tc qdisc del dev $h2 clsact
+ simple_if_fini $h2 192.0.2.2/24 198.51.100.2/24
+}
+
+match_dst_mac_test()
+{
+ local dummy_mac=de:ad:be:ef:aa:aa
+
+ RET=0
+
+ tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+ $tcflags dst_mac $dummy_mac action drop
+ tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \
+ $tcflags dst_mac $h2mac action drop
+
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip -q
+
+ tc_check_packets "dev $h2 ingress" 101 1
+ check_fail $? "Matched on a wrong filter"
+
+ tc_check_packets "dev $h2 ingress" 102 1
+ check_err $? "Did not match on correct filter"
+
+ tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+ tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower
+
+ log_test "dst_mac match ($tcflags)"
+}
+
+match_src_mac_test()
+{
+ local dummy_mac=de:ad:be:ef:aa:aa
+
+ RET=0
+
+ tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+ $tcflags src_mac $dummy_mac action drop
+ tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \
+ $tcflags src_mac $h1mac action drop
+
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip -q
+
+ tc_check_packets "dev $h2 ingress" 101 1
+ check_fail $? "Matched on a wrong filter"
+
+ tc_check_packets "dev $h2 ingress" 102 1
+ check_err $? "Did not match on correct filter"
+
+ tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+ tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower
+
+ log_test "src_mac match ($tcflags)"
+}
+
+match_dst_ip_test()
+{
+ RET=0
+
+ tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+ $tcflags dst_ip 198.51.100.2 action drop
+ tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \
+ $tcflags dst_ip 192.0.2.2 action drop
+ tc filter add dev $h2 ingress protocol ip pref 3 handle 103 flower \
+ $tcflags dst_ip 192.0.2.0/24 action drop
+
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip -q
+
+ tc_check_packets "dev $h2 ingress" 101 1
+ check_fail $? "Matched on a wrong filter"
+
+ tc_check_packets "dev $h2 ingress" 102 1
+ check_err $? "Did not match on correct filter"
+
+ tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower
+
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip -q
+
+ tc_check_packets "dev $h2 ingress" 103 1
+ check_err $? "Did not match on correct filter with mask"
+
+ tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+ tc filter del dev $h2 ingress protocol ip pref 3 handle 103 flower
+
+ log_test "dst_ip match ($tcflags)"
+}
+
+match_src_ip_test()
+{
+ RET=0
+
+ tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+ $tcflags src_ip 198.51.100.1 action drop
+ tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \
+ $tcflags src_ip 192.0.2.1 action drop
+ tc filter add dev $h2 ingress protocol ip pref 3 handle 103 flower \
+ $tcflags src_ip 192.0.2.0/24 action drop
+
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip -q
+
+ tc_check_packets "dev $h2 ingress" 101 1
+ check_fail $? "Matched on a wrong filter"
+
+ tc_check_packets "dev $h2 ingress" 102 1
+ check_err $? "Did not match on correct filter"
+
+ tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower
+
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip -q
+
+ tc_check_packets "dev $h2 ingress" 103 1
+ check_err $? "Did not match on correct filter with mask"
+
+ tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+ tc filter del dev $h2 ingress protocol ip pref 3 handle 103 flower
+
+ log_test "src_ip match ($tcflags)"
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ h2=${NETIFS[p2]}
+ h1mac=$(mac_get $h1)
+ h2mac=$(mac_get $h2)
+
+ vrf_prepare
+
+ h1_create
+ h2_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ h2_destroy
+ h1_destroy
+
+ vrf_cleanup
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+match_dst_mac_test
+match_src_mac_test
+match_dst_ip_test
+match_src_ip_test
+
+tc_offload_check
+if [[ $? -ne 0 ]]; then
+ log_info "Could not test offloaded functionality"
+else
+ tcflags="skip_sw"
+ match_dst_mac_test
+ match_src_mac_test
+ match_dst_ip_test
+ match_src_ip_test
+fi
+
+exit $EXIT_STATUS
--- /dev/null
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+NUM_NETIFS=4
+source tc_common.sh
+source lib.sh
+
+tcflags="skip_hw"
+
+h1_create()
+{
+ simple_if_init $h1 192.0.2.1/24
+}
+
+h1_destroy()
+{
+ simple_if_fini $h1 192.0.2.1/24
+}
+
+h2_create()
+{
+ simple_if_init $h2 192.0.2.1/24
+}
+
+h2_destroy()
+{
+ simple_if_fini $h2 192.0.2.1/24
+}
+
+switch_create()
+{
+ simple_if_init $swp1 192.0.2.2/24
+ tc qdisc add dev $swp1 ingress_block 22 egress_block 23 clsact
+
+ simple_if_init $swp2 192.0.2.2/24
+ tc qdisc add dev $swp2 ingress_block 22 egress_block 23 clsact
+}
+
+switch_destroy()
+{
+ tc qdisc del dev $swp2 clsact
+ simple_if_fini $swp2 192.0.2.2/24
+
+ tc qdisc del dev $swp1 clsact
+ simple_if_fini $swp1 192.0.2.2/24
+}
+
+shared_block_test()
+{
+ RET=0
+
+ tc filter add block 22 protocol ip pref 1 handle 101 flower \
+ $tcflags dst_ip 192.0.2.2 action drop
+
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $swmac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip -q
+
+ tc_check_packets "block 22" 101 1
+ check_err $? "Did not match first incoming packet on a block"
+
+ $MZ $h2 -c 1 -p 64 -a $h2mac -b $swmac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip -q
+
+ tc_check_packets "block 22" 101 2
+ check_err $? "Did not match second incoming packet on a block"
+
+ tc filter del block 22 protocol ip pref 1 handle 101 flower
+
+ log_test "shared block ($tcflags)"
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ h1mac=$(mac_get $h1)
+ h2mac=$(mac_get $h2)
+
+ swmac=$(mac_get $swp1)
+ swp2origmac=$(mac_get $swp2)
+ ip link set $swp2 address $swmac
+
+ vrf_prepare
+
+ h1_create
+ h2_create
+ switch_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ switch_destroy
+ h2_destroy
+ h1_destroy
+
+ vrf_cleanup
+
+ ip link set $swp2 address $swp2origmac
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+shared_block_test
+
+tc_offload_check
+if [[ $? -ne 0 ]]; then
+ log_info "Could not test offloaded functionality"
+else
+ tcflags="skip_sw"
+ shared_block_test
+fi
+
+exit $EXIT_STATUS
--- /dev/null
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+#
+# Execute a subprocess in a network namespace
+
+set -e
+
+readonly NETNS="ns-$(mktemp -u XXXXXX)"
+
+setup() {
+ ip netns add "${NETNS}"
+ ip -netns "${NETNS}" link set lo up
+}
+
+cleanup() {
+ ip netns del "${NETNS}"
+}
+
+trap cleanup EXIT
+setup
+
+"$@"
+exit "$?"
* - SOCK_DGRAM
* - SOCK_RAW
*
+ * PF_RDS
+ * - SOCK_SEQPACKET
+ *
* Start this program on two connected hosts, one in send mode and
* the other with option '-r' to put it in receiver mode.
*
#include <sys/types.h>
#include <sys/wait.h>
#include <unistd.h>
+#include <linux/rds.h>
#ifndef SO_EE_ORIGIN_ZEROCOPY
#define SO_EE_ORIGIN_ZEROCOPY 5
return fd;
}
-static bool do_sendmsg(int fd, struct msghdr *msg, bool do_zerocopy)
+static void add_zcopy_cookie(struct msghdr *msg, uint32_t cookie)
+{
+ struct cmsghdr *cm;
+
+ if (!msg->msg_control)
+ error(1, errno, "NULL cookie");
+ cm = (void *)msg->msg_control;
+ cm->cmsg_len = CMSG_LEN(sizeof(cookie));
+ cm->cmsg_level = SOL_RDS;
+ cm->cmsg_type = RDS_CMSG_ZCOPY_COOKIE;
+ memcpy(CMSG_DATA(cm), &cookie, sizeof(cookie));
+}
+
+static bool do_sendmsg(int fd, struct msghdr *msg, bool do_zerocopy, int domain)
{
int ret, len, i, flags;
+ static uint32_t cookie;
+ char ckbuf[CMSG_SPACE(sizeof(cookie))];
len = 0;
for (i = 0; i < msg->msg_iovlen; i++)
len += msg->msg_iov[i].iov_len;
flags = MSG_DONTWAIT;
- if (do_zerocopy)
+ if (do_zerocopy) {
flags |= MSG_ZEROCOPY;
+ if (domain == PF_RDS) {
+ memset(&msg->msg_control, 0, sizeof(msg->msg_control));
+ msg->msg_controllen = CMSG_SPACE(sizeof(cookie));
+ msg->msg_control = (struct cmsghdr *)ckbuf;
+ add_zcopy_cookie(msg, ++cookie);
+ }
+ }
ret = sendmsg(fd, msg, flags);
if (ret == -1 && errno == EAGAIN)
if (do_zerocopy && ret)
expected_completions++;
}
+ if (do_zerocopy && domain == PF_RDS) {
+ msg->msg_control = NULL;
+ msg->msg_controllen = 0;
+ }
return true;
}
msg->msg_iov[0].iov_len = payload_len + extra_len;
extra_len = 0;
- do_sendmsg(fd, msg, do_zerocopy);
+ do_sendmsg(fd, msg, do_zerocopy,
+ (cfg_dst_addr.ss_family == AF_INET ?
+ PF_INET : PF_INET6));
}
do_setsockopt(fd, IPPROTO_UDP, UDP_CORK, 0);
if (cfg_zerocopy)
do_setsockopt(fd, SOL_SOCKET, SO_ZEROCOPY, 1);
- if (domain != PF_PACKET)
+ if (domain != PF_PACKET && domain != PF_RDS)
if (connect(fd, (void *) &cfg_dst_addr, cfg_alen))
error(1, errno, "connect");
+ if (domain == PF_RDS) {
+ if (bind(fd, (void *) &cfg_src_addr, cfg_alen))
+ error(1, errno, "bind");
+ }
+
return fd;
}
-static bool do_recv_completion(int fd)
+static uint32_t do_process_zerocopy_cookies(struct rds_zcopy_cookies *ck)
+{
+ int i;
+
+ if (ck->num > RDS_MAX_ZCOOKIES)
+ error(1, 0, "Returned %d cookies, max expected %d\n",
+ ck->num, RDS_MAX_ZCOOKIES);
+ for (i = 0; i < ck->num; i++)
+ if (cfg_verbose >= 2)
+ fprintf(stderr, "%d\n", ck->cookies[i]);
+ return ck->num;
+}
+
+static bool do_recvmsg_completion(int fd)
+{
+ char cmsgbuf[CMSG_SPACE(sizeof(struct rds_zcopy_cookies))];
+ struct rds_zcopy_cookies *ck;
+ struct cmsghdr *cmsg;
+ struct msghdr msg;
+ bool ret = false;
+
+ memset(&msg, 0, sizeof(msg));
+ msg.msg_control = cmsgbuf;
+ msg.msg_controllen = sizeof(cmsgbuf);
+
+ if (recvmsg(fd, &msg, MSG_DONTWAIT))
+ return ret;
+
+ if (msg.msg_flags & MSG_CTRUNC)
+ error(1, errno, "recvmsg notification: truncated");
+
+ for (cmsg = CMSG_FIRSTHDR(&msg); cmsg; cmsg = CMSG_NXTHDR(&msg, cmsg)) {
+ if (cmsg->cmsg_level == SOL_RDS &&
+ cmsg->cmsg_type == RDS_CMSG_ZCOPY_COMPLETION) {
+
+ ck = (struct rds_zcopy_cookies *)CMSG_DATA(cmsg);
+ completions += do_process_zerocopy_cookies(ck);
+ ret = true;
+ break;
+ }
+ error(0, 0, "ignoring cmsg at level %d type %d\n",
+ cmsg->cmsg_level, cmsg->cmsg_type);
+ }
+ return ret;
+}
+
+static bool do_recv_completion(int fd, int domain)
{
struct sock_extended_err *serr;
struct msghdr msg = {};
int ret, zerocopy;
char control[100];
+ if (domain == PF_RDS)
+ return do_recvmsg_completion(fd);
+
msg.msg_control = control;
msg.msg_controllen = sizeof(control);
cm->cmsg_level, cm->cmsg_type);
serr = (void *) CMSG_DATA(cm);
+
if (serr->ee_origin != SO_EE_ORIGIN_ZEROCOPY)
error(1, 0, "serr: wrong origin: %u", serr->ee_origin);
if (serr->ee_errno != 0)
}
/* Read all outstanding messages on the errqueue */
-static void do_recv_completions(int fd)
+static void do_recv_completions(int fd, int domain)
{
- while (do_recv_completion(fd)) {}
+ while (do_recv_completion(fd, domain)) {}
}
/* Wait for all remaining completions on the errqueue */
-static void do_recv_remaining_completions(int fd)
+static void do_recv_remaining_completions(int fd, int domain)
{
int64_t tstop = gettimeofday_ms() + cfg_waittime_ms;
while (completions < expected_completions &&
gettimeofday_ms() < tstop) {
- if (do_poll(fd, POLLERR))
- do_recv_completions(fd);
+ if (do_poll(fd, domain == PF_RDS ? POLLIN : POLLERR))
+ do_recv_completions(fd, domain);
}
if (completions < expected_completions)
msg.msg_iovlen++;
}
+ if (domain == PF_RDS) {
+ msg.msg_name = &cfg_dst_addr;
+ msg.msg_namelen = (cfg_dst_addr.ss_family == AF_INET ?
+ sizeof(struct sockaddr_in) :
+ sizeof(struct sockaddr_in6));
+ }
+
iov[2].iov_base = payload;
iov[2].iov_len = cfg_payload_len;
msg.msg_iovlen++;
if (cfg_cork)
do_sendmsg_corked(fd, &msg);
else
- do_sendmsg(fd, &msg, cfg_zerocopy);
+ do_sendmsg(fd, &msg, cfg_zerocopy, domain);
while (!do_poll(fd, POLLOUT)) {
if (cfg_zerocopy)
- do_recv_completions(fd);
+ do_recv_completions(fd, domain);
}
} while (gettimeofday_ms() < tstop);
if (cfg_zerocopy)
- do_recv_remaining_completions(fd);
+ do_recv_remaining_completions(fd, domain);
if (close(fd))
error(1, errno, "close");
40 /* max tcp options */;
int c;
char *daddr = NULL, *saddr = NULL;
+ char *cfg_test;
cfg_payload_len = max_payload_len;
break;
}
}
+
+ cfg_test = argv[argc - 1];
+ if (strcmp(cfg_test, "rds") == 0) {
+ if (!daddr)
+ error(1, 0, "-D <server addr> required for PF_RDS\n");
+ if (!cfg_rx && !saddr)
+ error(1, 0, "-S <client addr> required for PF_RDS\n");
+ }
setup_sockaddr(cfg_family, daddr, &cfg_dst_addr);
setup_sockaddr(cfg_family, saddr, &cfg_src_addr);
do_test(cfg_family, SOCK_STREAM, 0);
else if (!strcmp(cfg_test, "udp"))
do_test(cfg_family, SOCK_DGRAM, 0);
+ else if (!strcmp(cfg_test, "rds"))
+ do_test(PF_RDS, SOCK_SEQPACKET, 0);
else
error(1, 0, "unknown cfg_test %s", cfg_test);
#include <linux/filter.h>
#include <linux/bpf.h>
#include <linux/if_packet.h>
+#include <net/if.h>
#include <net/ethernet.h>
#include <netinet/ip.h>
#include <netinet/udp.h>
* @return -1 if mode is bad, a valid socket otherwise */
static int sock_fanout_open(uint16_t typeflags, uint16_t group_id)
{
+ struct sockaddr_ll addr = {0};
int fd, val;
- fd = socket(PF_PACKET, SOCK_RAW, htons(ETH_P_IP));
+ fd = socket(PF_PACKET, SOCK_RAW, 0);
if (fd < 0) {
perror("socket packet");
exit(1);
}
+ pair_udp_setfilter(fd);
+
+ addr.sll_family = AF_PACKET;
+ addr.sll_protocol = htons(ETH_P_IP);
+ addr.sll_ifindex = if_nametoindex("lo");
+ if (addr.sll_ifindex == 0) {
+ perror("if_nametoindex");
+ exit(1);
+ }
+ if (bind(fd, (void *) &addr, sizeof(addr))) {
+ perror("bind packet");
+ exit(1);
+ }
+
val = (((int) typeflags) << 16) | group_id;
if (setsockopt(fd, SOL_PACKET, PACKET_FANOUT, &val, sizeof(val))) {
if (close(fd)) {
return -1;
}
- pair_udp_setfilter(fd);
return fd;
}
static void sock_fanout_set_ebpf(int fd)
{
+ static char log_buf[65536];
+
const int len_off = __builtin_offsetof(struct __sk_buff, len);
struct bpf_insn prog[] = {
{ BPF_ALU64 | BPF_MOV | BPF_X, 6, 1, 0, 0 },
{ BPF_ALU | BPF_MOV | BPF_K, 0, 0, 0, 0 },
{ BPF_JMP | BPF_EXIT, 0, 0, 0, 0 }
};
- char log_buf[512];
union bpf_attr attr;
int pfd;
if ((!(ret[0] == expect[0] && ret[1] == expect[1])) &&
(!(ret[0] == expect[1] && ret[1] == expect[0]))) {
- fprintf(stderr, "ERROR: incorrect queue lengths\n");
+ fprintf(stderr, "warning: incorrect queue lengths\n");
return 1;
}
uint8_t type = typeflags & 0xFF;
int fds[2], fds_udp[2][2], ret;
- fprintf(stderr, "test: datapath 0x%hx\n", typeflags);
+ fprintf(stderr, "\ntest: datapath 0x%hx ports %hu,%hu\n",
+ typeflags, PORT_BASE, PORT_BASE + port_off);
fds[0] = sock_fanout_open(typeflags, 0);
fds[1] = sock_fanout_open(typeflags, 0);
const int expect_cpu1[2][2] = { { 0, 20 }, { 0, 20 } };
const int expect_bpf[2][2] = { { 15, 5 }, { 15, 20 } };
const int expect_uniqueid[2][2] = { { 20, 20}, { 20, 20 } };
- int port_off = 2, tries = 5, ret;
+ int port_off = 2, tries = 20, ret;
test_control_single();
test_control_group();
/* find a set of ports that do not collide onto the same socket */
ret = test_datapath(PACKET_FANOUT_HASH, port_off,
expect_hash[0], expect_hash[1]);
- while (ret && tries--) {
+ while (ret) {
fprintf(stderr, "info: trying alternate ports (%d)\n", tries);
ret = test_datapath(PACKET_FANOUT_HASH, ++port_off,
expect_hash[0], expect_hash[1]);
+ if (!--tries) {
+ fprintf(stderr, "too many collisions\n");
+ return 1;
+ }
}
ret |= test_datapath(PACKET_FANOUT_HASH | PACKET_FANOUT_FLAG_ROLLOVER,
ip link help gretap 2>&1 | grep -q "^Usage:"
if [ $? -ne 0 ];then
echo "SKIP: gretap: iproute2 too old"
+ ip netns del "$testns"
return 1
fi
if [ $ret -ne 0 ]; then
echo "FAIL: gretap"
+ ip netns del "$testns"
return 1
fi
echo "PASS: gretap"
ip link help ip6gretap 2>&1 | grep -q "^Usage:"
if [ $? -ne 0 ];then
echo "SKIP: ip6gretap: iproute2 too old"
+ ip netns del "$testns"
return 1
fi
if [ $ret -ne 0 ]; then
echo "FAIL: ip6gretap"
+ ip netns del "$testns"
return 1
fi
echo "PASS: ip6gretap"
if [ $ret -ne 0 ]; then
echo "FAIL: erspan"
+ ip netns del "$testns"
return 1
fi
echo "PASS: erspan"
if [ $ret -ne 0 ]; then
echo "FAIL: ip6erspan"
+ ip netns del "$testns"
return 1
fi
echo "PASS: ip6erspan"
echo "--------------------"
echo "running psock_fanout test"
echo "--------------------"
-./psock_fanout
+./in_netns.sh ./psock_fanout
if [ $? -ne 0 ]; then
echo "[FAIL]"
else
echo "--------------------"
echo "running psock_tpacket test"
echo "--------------------"
-./psock_tpacket
+./in_netns.sh ./psock_tpacket
if [ $? -ne 0 ]; then
echo "[FAIL]"
else
void sighandler(int sig, siginfo_t *info, void *ctx)
{
- struct ucontext *ucp = ctx;
+ ucontext_t *ucp = ctx;
if (!testing) {
signal(sig, SIG_DFL);
#define SECCOMP_FILTER_FLAG_LOG 2
#endif
+#ifndef PTRACE_SECCOMP_GET_METADATA
+#define PTRACE_SECCOMP_GET_METADATA 0x420d
+
+struct seccomp_metadata {
+ __u64 filter_off; /* Input: which filter */
+ __u64 flags; /* Output: filter's flags */
+};
+#endif
+
#ifndef seccomp
int seccomp(unsigned int op, unsigned int flags, void *args)
{
EXPECT_EQ(errno, EOPNOTSUPP);
}
+TEST(get_metadata)
+{
+ pid_t pid;
+ int pipefd[2];
+ char buf;
+ struct seccomp_metadata md;
+
+ ASSERT_EQ(0, pipe(pipefd));
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+ if (pid == 0) {
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ };
+ struct sock_fprog prog = {
+ .len = (unsigned short)ARRAY_SIZE(filter),
+ .filter = filter,
+ };
+
+ /* one with log, one without */
+ ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER,
+ SECCOMP_FILTER_FLAG_LOG, &prog));
+ ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog));
+
+ ASSERT_EQ(0, close(pipefd[0]));
+ ASSERT_EQ(1, write(pipefd[1], "1", 1));
+ ASSERT_EQ(0, close(pipefd[1]));
+
+ while (1)
+ sleep(100);
+ }
+
+ ASSERT_EQ(0, close(pipefd[1]));
+ ASSERT_EQ(1, read(pipefd[0], &buf, 1));
+
+ ASSERT_EQ(0, ptrace(PTRACE_ATTACH, pid));
+ ASSERT_EQ(pid, waitpid(pid, NULL, 0));
+
+ md.filter_off = 0;
+ ASSERT_EQ(sizeof(md), ptrace(PTRACE_SECCOMP_GET_METADATA, pid, sizeof(md), &md));
+ EXPECT_EQ(md.flags, SECCOMP_FILTER_FLAG_LOG);
+ EXPECT_EQ(md.filter_off, 0);
+
+ md.filter_off = 1;
+ ASSERT_EQ(sizeof(md), ptrace(PTRACE_SECCOMP_GET_METADATA, pid, sizeof(md), &md));
+ EXPECT_EQ(md.flags, 0);
+ EXPECT_EQ(md.filter_off, 1);
+
+ ASSERT_EQ(0, kill(pid, SIGKILL));
+}
+
/*
* TODO:
* - add microbenchmarks
* The kernel must have network namespace support
-* The kernel must have veth support available, as a veth pair is created
+* The kernel must have veth support available, as a veth pair is created
prior to running the tests.
-* All tc-related features must be built in or available as modules.
- To check what is required in current setup run:
+* All tc-related features being tested must be built in or available as
+ modules. To check what is required in current setup run:
./tdc.py -c
Note:
RUNNING TDC
-----------
-To use tdc, root privileges are required. tdc will not run otherwise.
+To use tdc, root privileges are required. This is because the
+commands being tested must be run as root. The code that enforces
+execution by root uid has been moved into a plugin (see PLUGIN
+ARCHITECTURE, below).
-All tests are executed inside a network namespace to prevent conflicts
-within the host.
+If nsPlugin is linked, all tests are executed inside a network
+namespace to prevent conflicts within the host.
Running tdc without any arguments will run all tests. Refer to the section
on command line arguments for more information, or run:
the failed test in the TAP output.
+OVERVIEW OF TDC EXECUTION
+-------------------------
+
+One run of tests is considered a "test suite" (this will be refined in the
+future). A test suite has one or more test cases in it.
+
+A test case has four stages:
+
+ - setup
+ - execute
+ - verify
+ - teardown
+
+The setup and teardown stages can run zero or more commands. The setup
+stage does some setup if the test needs it. The teardown stage undoes
+the setup and returns the system to a "neutral" state so any other test
+can be run next. These two stages require any commands run to return
+success, but do not otherwise verify the results.
+
+The execute and verify stages each run one command. The execute stage
+tests the return code against one or more acceptable values. The
+verify stage checks the return code for success, and also compares
+the stdout with a regular expression.
+
+Each of the commands in any stage will run in a shell instance.
+
+
USER-DEFINED CONSTANTS
----------------------
Example:
$TC qdisc add dev $DEV1 ingress
+The NAMES values are used to substitute into the commands in the test cases.
+
COMMAND LINE ARGUMENTS
----------------------
Run tdc.py -h to see the full list of available arguments.
--p PATH Specify the tc executable located at PATH to be used on this
- test run
--c Show the available test case categories in this test file
--c CATEGORY Run only tests that belong to CATEGORY
--f FILE Read test cases from the JSON file named FILE
--l [CATEGORY] List all test cases in the JSON file. If CATEGORY is
- specified, list test cases matching that category.
--s ID Show the test case matching ID
--e ID Execute the test case identified by ID
--i Generate unique ID numbers for test cases with no existing
- ID number
+usage: tdc.py [-h] [-p PATH] [-D DIR [DIR ...]] [-f FILE [FILE ...]]
+ [-c [CATG [CATG ...]]] [-e ID [ID ...]] [-l] [-s] [-i] [-v]
+ [-d DEVICE] [-n NS] [-V]
+
+Linux TC unit tests
+
+optional arguments:
+ -h, --help show this help message and exit
+ -p PATH, --path PATH The full path to the tc executable to use
+ -v, --verbose Show the commands that are being run
+ -d DEVICE, --device DEVICE
+ Execute the test case in flower category
+
+selection:
+ select which test cases: files plus directories; filtered by categories
+ plus testids
+
+ -D DIR [DIR ...], --directory DIR [DIR ...]
+ Collect tests from the specified directory(ies)
+ (default [tc-tests])
+ -f FILE [FILE ...], --file FILE [FILE ...]
+ Run tests from the specified file(s)
+ -c [CATG [CATG ...]], --category [CATG [CATG ...]]
+ Run tests only from the specified category/ies, or if
+ no category/ies is/are specified, list known
+ categories.
+ -e ID [ID ...], --execute ID [ID ...]
+ Execute the specified test cases with specified IDs
+
+action:
+ select action to perform on selected test cases
+
+ -l, --list List all test cases, or those only within the
+ specified category
+ -s, --show Display the selected test cases
+ -i, --id Generate ID numbers for new test cases
+
+netns:
+ options for nsPlugin(run commands in net namespace)
+
+ -n NS, --namespace NS
+ Run commands in namespace NS
+
+valgrind:
+ options for valgrindPlugin (run command under test under Valgrind)
+
+ -V, --valgrind Run commands under valgrind
+
+
+PLUGIN ARCHITECTURE
+-------------------
+
+There is now a plugin architecture, and some of the functionality that
+was in the tdc.py script has been moved into the plugins.
+
+The plugins are in the directory plugin-lib. The are executed from
+directory plugins. Put symbolic links from plugins to plugin-lib,
+and name them according to the order you want them to run.
+
+Example:
+
+bjb@bee:~/work/tc-testing$ ls -l plugins
+total 4
+lrwxrwxrwx 1 bjb bjb 27 Oct 4 16:12 10-rootPlugin.py -> ../plugin-lib/rootPlugin.py
+lrwxrwxrwx 1 bjb bjb 25 Oct 12 17:55 20-nsPlugin.py -> ../plugin-lib/nsPlugin.py
+-rwxr-xr-x 1 bjb bjb 0 Sep 29 15:56 __init__.py
+
+The plugins are a subclass of TdcPlugin, defined in TdcPlugin.py and
+must be called "SubPlugin" so tdc can find them. They are
+distinguished from each other in the python program by their module
+name.
+
+This base class supplies "hooks" to run extra functions. These hooks are as follows:
+
+pre- and post-suite
+pre- and post-case
+pre- and post-execute stage
+adjust-command (runs in all stages and receives the stage name)
+
+The pre-suite hook receives the number of tests and an array of test ids.
+This allows you to dump out the list of skipped tests in the event of a
+failure during setup or teardown stage.
+
+The pre-case hook receives the ordinal number and test id of the current test.
+
+The adjust-command hook receives the stage id (see list below) and the
+full command to be executed. This allows for last-minute adjustment
+of the command.
+
+The stages are identified by the following strings:
+
+ - pre (pre-suite)
+ - setup
+ - command
+ - verify
+ - teardown
+ - post (post-suite)
+
+
+To write a plugin, you need to inherit from TdcPlugin in
+TdcPlugin.py. To use the plugin, you have to put the
+implementation file in plugin-lib, and add a symbolic link to it from
+plugins. It will be detected at run time and invoked at the
+appropriate times. There are a few examples in the plugin-lib
+directory:
+
+ - rootPlugin.py:
+ implements the enforcement of running as root
+ - nsPlugin.py:
+ sets up a network namespace and runs all commands in that namespace
+ - valgrindPlugin.py
+ runs each command in the execute stage under valgrind,
+ and checks for leaks.
+ This plugin will output an extra test for each test in the test file,
+ one is the existing output as to whether the test passed or failed,
+ and the other is a test whether the command leaked memory or not.
+ (This one is a preliminary version, it may not work quite right yet,
+ but the overall template is there and it should only need tweaks.)
ACKNOWLEDGEMENTS
- Add support for multiple versions of tc to run successively
-- Improve error messages when tdc aborts its run
+- Improve error messages when tdc aborts its run. Partially done - still
+ need to better handle problems in pre- and post-suite.
-- Allow tdc to write its results to file
+- Use python logger module for debug/verbose output
+
+- Allow tdc to write its results to file.
+ Maybe use python logger module for this too.
+
+- A better implementation of the "hooks". Currently, every plugin
+ will attempt to run a function at every hook point. Could be
+ changed so that plugin __init__ methods will register functions to
+ be run in the various predefined times. Then if a plugin does not
+ require action at a specific point, no penalty will be paid for
+ trying to run a function that will do nothing.
+
+- Proper exception handling - make an exception class and use it
+
+- a TestCase class, for easier testcase handling, searching, comparison
+
+- a TestSuite class
+ and a way to configure a test suite,
+ to automate running multiple "test suites" with different requirements
+
+- super simple test case example using ls, touch, etc
--- /dev/null
+#!/usr/bin/env python3
+
+class TdcPlugin:
+ def __init__(self):
+ super().__init__()
+ print(' -- {}.__init__'.format(self.sub_class))
+
+ def pre_suite(self, testcount, testidlist):
+ '''run commands before test_runner goes into a test loop'''
+ self.testcount = testcount
+ self.testidlist = testidlist
+ if self.args.verbose > 1:
+ print(' -- {}.pre_suite'.format(self.sub_class))
+
+ def post_suite(self, index):
+ '''run commands after test_runner completes the test loop
+ index is the last ordinal number of test that was attempted'''
+ if self.args.verbose > 1:
+ print(' -- {}.post_suite'.format(self.sub_class))
+
+ def pre_case(self, test_ordinal, testid):
+ '''run commands before test_runner does one test'''
+ if self.args.verbose > 1:
+ print(' -- {}.pre_case'.format(self.sub_class))
+ self.args.testid = testid
+ self.args.test_ordinal = test_ordinal
+
+ def post_case(self):
+ '''run commands after test_runner does one test'''
+ if self.args.verbose > 1:
+ print(' -- {}.post_case'.format(self.sub_class))
+
+ def pre_execute(self):
+ '''run command before test-runner does the execute step'''
+ if self.args.verbose > 1:
+ print(' -- {}.pre_execute'.format(self.sub_class))
+
+ def post_execute(self):
+ '''run command after test-runner does the execute step'''
+ if self.args.verbose > 1:
+ print(' -- {}.post_execute'.format(self.sub_class))
+
+ def adjust_command(self, stage, command):
+ '''adjust the command'''
+ if self.args.verbose > 1:
+ print(' -- {}.adjust_command {}'.format(self.sub_class, stage))
+
+ # if stage == 'pre':
+ # pass
+ # elif stage == 'setup':
+ # pass
+ # elif stage == 'execute':
+ # pass
+ # elif stage == 'verify':
+ # pass
+ # elif stage == 'teardown':
+ # pass
+ # elif stage == 'post':
+ # pass
+ # else:
+ # pass
+
+ return command
+
+ def add_args(self, parser):
+ '''Get the plugin args from the command line'''
+ self.argparser = parser
+ return self.argparser
+
+ def check_args(self, args, remaining):
+ '''Check that the args are set correctly'''
+ self.args = args
+ if self.args.verbose > 1:
+ print(' -- {}.check_args'.format(self.sub_class))
--- /dev/null
+tdc - Adding plugins for tdc
+
+Author: Brenda J. Butler - bjb@mojatatu.com
+
+ADDING PLUGINS
+--------------
+
+A new plugin should be written in python as a class that inherits from TdcPlugin.
+There are some examples in plugin-lib.
+
+The plugin can be used to add functionality to the test framework,
+such as:
+
+- adding commands to be run before and/or after the test suite
+- adding commands to be run before and/or after the test cases
+- adding commands to be run before and/or after the execute phase of the test cases
+- ability to alter the command to be run in any phase:
+ pre (the pre-suite stage)
+ prepare
+ execute
+ verify
+ teardown
+ post (the post-suite stage)
+- ability to add to the command line args, and use them at run time
+
+
+The functions in the class should follow the following interfaces:
+
+ def __init__(self)
+ def pre_suite(self, testcount, testidlist) # see "PRE_SUITE" below
+ def post_suite(self, ordinal) # see "SKIPPING" below
+ def pre_case(self, test_ordinal, testid) # see "PRE_CASE" below
+ def post_case(self)
+ def pre_execute(self)
+ def post_execute(self)
+ def adjust_command(self, stage, command) # see "ADJUST" below
+ def add_args(self, parser) # see "ADD_ARGS" below
+ def check_args(self, args, remaining) # see "CHECK_ARGS" below
+
+
+PRE_SUITE
+
+This method takes a testcount (number of tests to be run) and
+testidlist (array of test ids for tests that will be run). This is
+useful for various things, including when an exception occurs and the
+rest of the tests must be skipped. The info is stored in the object,
+and the post_suite method can refer to it when dumping the "skipped"
+TAP output. The tdc.py script will do that for the test suite as
+defined in the test case, but if the plugin is being used to run extra
+tests on each test (eg, check for memory leaks on associated
+co-processes) then that other tap output can be generated in the
+post-suite method using this info passed in to the pre_suite method.
+
+
+SKIPPING
+
+The post_suite method will receive the ordinal number of the last
+test to be attempted. It can use this info when outputting
+the TAP output for the extra test cases.
+
+
+PRE_CASE
+
+The pre_case method will receive the ordinal number of the test
+and the test id. Useful for outputing the extra test results.
+
+
+ADJUST
+
+The adjust_command method receives a string representing
+the execution stage and a string which is the actual command to be
+executed. The plugin can adjust the command, based on the stage of
+execution.
+
+The stages are represented by the following strings:
+
+ 'pre'
+ 'setup'
+ 'command'
+ 'verify'
+ 'teardown'
+ 'post'
+
+The adjust_command method must return the adjusted command so tdc
+can use it.
+
+
+ADD_ARGS
+
+The add_args method receives the argparser object and can add
+arguments to it. Care should be taken that the new arguments do not
+conflict with any from tdc.py or from other plugins that will be used
+concurrently.
+
+The add_args method should return the argparser object.
+
+
+CHECK_ARGS
+
+The check_args method is so that the plugin can do validation on
+the args, if needed. If there is a problem, and Exception should
+be raised, with a string that explains the problem.
+
+eg: raise Exception('plugin xxx, arg -y is wrong, fix it')
Include the 'id' field, but do not assign a value. Running tdc with the -i
option will generate a unique ID for that test case.
-tdc will recursively search the 'tc' subdirectory for .json files. Any
-test case files you create in these directories will automatically be included.
-If you wish to store your custom test cases elsewhere, be sure to run tdc
-with the -f argument and the path to your file.
+tdc will recursively search the 'tc-tests' subdirectory (or the
+directories named with the -D option) for .json files. Any test case
+files you create in these directories will automatically be included.
+If you wish to store your custom test cases elsewhere, be sure to run
+tdc with the -f argument and the path to your file, or the -D argument
+and the path to your directory(ies).
-Be aware of required escape characters in the JSON data - particularly when
-defining the match pattern. Refer to the tctests.json file for examples when
-in doubt.
+Be aware of required escape characters in the JSON data - particularly
+when defining the match pattern. Refer to the supplied json test files
+for examples when in doubt. The match pattern is written in json, and
+will be used by python. So the match pattern will be a python regular
+expression, but should be written using json syntax.
TEST CASE STRUCTURE
If an error is detected during the setup/teardown process, execution of the
tests will immediately stop with an error message and the namespace in which
the tests are run will be destroyed. This is to prevent inaccurate results
-in the test cases.
+in the test cases. tdc will output a series of TAP results for the skipped
+tests.
Repeated failures of the setup/teardown may indicate a problem with the test
case, or possibly even a bug in one of the commands that are not being tested.
individual command into a list, with the command being first, followed by all
acceptable exit codes for the command.
+Example:
+
+A pair of setup commands. The first can have exit code 0, 1 or 255, the
+second must have exit code 0.
+
+ "setup": [
+ [
+ "$TC actions flush action gact",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action reclassify index 65536"
+ ],
--- /dev/null
+tdc.py will look for plugins in a directory plugins off the cwd.
+Make a set of numbered symbolic links from there to the actual plugins.
+Eg:
+
+tdc.py
+plugin-lib/
+plugins/
+ __init__.py
+ 10-rootPlugin.py -> ../plugin-lib/rootPlugin.py
+ 20-valgrindPlugin.py -> ../plugin-lib/valgrindPlugin.py
+ 30-nsPlugin.py -> ../plugin-lib/nsPlugin.py
+
+
+tdc.py will find them and use them.
+
+
+rootPlugin
+ Check if the uid is root. If not, bail out.
+
+valgrindPlugin
+ Run the command under test with valgrind, and produce an extra set of TAP results for the memory tests.
+ This plugin will write files to the cwd, called vgnd-xxx.log. These will contain
+ the valgrind output for test xxx. Any file matching the glob 'vgnd-*.log' will be
+ deleted at the end of the run.
+
+nsPlugin
+ Run all the commands in a network namespace.
--- /dev/null
+import os
+import signal
+from string import Template
+import subprocess
+import time
+from TdcPlugin import TdcPlugin
+
+from tdc_config import *
+
+class SubPlugin(TdcPlugin):
+ def __init__(self):
+ self.sub_class = 'ns/SubPlugin'
+ super().__init__()
+
+ def pre_suite(self, testcount, testidlist):
+ '''run commands before test_runner goes into a test loop'''
+ super().pre_suite(testcount, testidlist)
+
+ if self.args.namespace:
+ self._ns_create()
+
+ def post_suite(self, index):
+ '''run commands after test_runner goes into a test loop'''
+ super().post_suite(index)
+ if self.args.verbose:
+ print('{}.post_suite'.format(self.sub_class))
+
+ if self.args.namespace:
+ self._ns_destroy()
+
+ def add_args(self, parser):
+ super().add_args(parser)
+ self.argparser_group = self.argparser.add_argument_group(
+ 'netns',
+ 'options for nsPlugin(run commands in net namespace)')
+ self.argparser_group.add_argument(
+ '-n', '--namespace', action='store_true',
+ help='Run commands in namespace')
+ return self.argparser
+
+ def adjust_command(self, stage, command):
+ super().adjust_command(stage, command)
+ cmdform = 'list'
+ cmdlist = list()
+
+ if not self.args.namespace:
+ return command
+
+ if self.args.verbose:
+ print('{}.adjust_command'.format(self.sub_class))
+
+ if not isinstance(command, list):
+ cmdform = 'str'
+ cmdlist = command.split()
+ else:
+ cmdlist = command
+ if stage == 'setup' or stage == 'execute' or stage == 'verify' or stage == 'teardown':
+ if self.args.verbose:
+ print('adjust_command: stage is {}; inserting netns stuff in command [{}] list [{}]'.format(stage, command, cmdlist))
+ cmdlist.insert(0, self.args.NAMES['NS'])
+ cmdlist.insert(0, 'exec')
+ cmdlist.insert(0, 'netns')
+ cmdlist.insert(0, 'ip')
+ else:
+ pass
+
+ if cmdform == 'str':
+ command = ' '.join(cmdlist)
+ else:
+ command = cmdlist
+
+ if self.args.verbose:
+ print('adjust_command: return command [{}]'.format(command))
+ return command
+
+ def _ns_create(self):
+ '''
+ Create the network namespace in which the tests will be run and set up
+ the required network devices for it.
+ '''
+ if self.args.namespace:
+ cmd = 'ip netns add {}'.format(self.args.NAMES['NS'])
+ self._exec_cmd('pre', cmd)
+ cmd = 'ip link add $DEV0 type veth peer name $DEV1'
+ self._exec_cmd('pre', cmd)
+ cmd = 'ip link set $DEV1 netns {}'.format(self.args.NAMES['NS'])
+ self._exec_cmd('pre', cmd)
+ cmd = 'ip link set $DEV0 up'
+ self._exec_cmd('pre', cmd)
+ cmd = 'ip -n {} link set $DEV1 up'.format(self.args.NAMES['NS'])
+ self._exec_cmd('pre', cmd)
+ if self.args.device:
+ cmd = 'ip link set $DEV2 netns {}'.format(self.args.NAMES['NS'])
+ self._exec_cmd('pre', cmd)
+ cmd = 'ip -n {} link set $DEV2 up'.format(self.args.NAMES['NS'])
+ self._exec_cmd('pre', cmd)
+
+ def _ns_destroy(self):
+ '''
+ Destroy the network namespace for testing (and any associated network
+ devices as well)
+ '''
+ if self.args.namespace:
+ cmd = 'ip netns delete {}'.format(self.args.NAMES['NS'])
+ self._exec_cmd('post', cmd)
+
+ def _exec_cmd(self, stage, command):
+ '''
+ Perform any required modifications on an executable command, then run
+ it in a subprocess and return the results.
+ '''
+ if '$' in command:
+ command = self._replace_keywords(command)
+
+ self.adjust_command(stage, command)
+ if self.args.verbose:
+ print('_exec_cmd: command "{}"'.format(command))
+ proc = subprocess.Popen(command,
+ shell=True,
+ stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE,
+ env=ENVIR)
+ (rawout, serr) = proc.communicate()
+
+ if proc.returncode != 0 and len(serr) > 0:
+ foutput = serr.decode("utf-8")
+ else:
+ foutput = rawout.decode("utf-8")
+
+ proc.stdout.close()
+ proc.stderr.close()
+ return proc, foutput
+
+ def _replace_keywords(self, cmd):
+ """
+ For a given executable command, substitute any known
+ variables contained within NAMES with the correct values
+ """
+ tcmd = Template(cmd)
+ subcmd = tcmd.safe_substitute(self.args.NAMES)
+ return subcmd
--- /dev/null
+import os
+import sys
+from TdcPlugin import TdcPlugin
+
+from tdc_config import *
+
+
+class SubPlugin(TdcPlugin):
+ def __init__(self):
+ self.sub_class = 'root/SubPlugin'
+ super().__init__()
+
+ def pre_suite(self, testcount, testidlist):
+ # run commands before test_runner goes into a test loop
+ super().pre_suite(testcount, testidlist)
+
+ if os.geteuid():
+ print('This script must be run with root privileges', file=sys.stderr)
+ exit(1)
--- /dev/null
+'''
+run the command under test, under valgrind and collect memory leak info
+as a separate test.
+'''
+
+
+import os
+import re
+import signal
+from string import Template
+import subprocess
+import time
+from TdcPlugin import TdcPlugin
+
+from tdc_config import *
+
+def vp_extract_num_from_string(num_as_string_maybe_with_commas):
+ return int(num_as_string_maybe_with_commas.replace(',',''))
+
+class SubPlugin(TdcPlugin):
+ def __init__(self):
+ self.sub_class = 'valgrind/SubPlugin'
+ self.tap = ''
+ super().__init__()
+
+ def pre_suite(self, testcount, testidlist):
+ '''run commands before test_runner goes into a test loop'''
+ super().pre_suite(testcount, testidlist)
+ if self.args.verbose > 1:
+ print('{}.pre_suite'.format(self.sub_class))
+ if self.args.valgrind:
+ self._add_to_tap('1..{}\n'.format(self.testcount))
+
+ def post_suite(self, index):
+ '''run commands after test_runner goes into a test loop'''
+ super().post_suite(index)
+ self._add_to_tap('\n|---\n')
+ if self.args.verbose > 1:
+ print('{}.post_suite'.format(self.sub_class))
+ print('{}'.format(self.tap))
+ if self.args.verbose < 4:
+ subprocess.check_output('rm -f vgnd-*.log', shell=True)
+
+ def add_args(self, parser):
+ super().add_args(parser)
+ self.argparser_group = self.argparser.add_argument_group(
+ 'valgrind',
+ 'options for valgrindPlugin (run command under test under Valgrind)')
+
+ self.argparser_group.add_argument(
+ '-V', '--valgrind', action='store_true',
+ help='Run commands under valgrind')
+
+ return self.argparser
+
+ def adjust_command(self, stage, command):
+ super().adjust_command(stage, command)
+ cmdform = 'list'
+ cmdlist = list()
+
+ if not self.args.valgrind:
+ return command
+
+ if self.args.verbose > 1:
+ print('{}.adjust_command'.format(self.sub_class))
+
+ if not isinstance(command, list):
+ cmdform = 'str'
+ cmdlist = command.split()
+ else:
+ cmdlist = command
+
+ if stage == 'execute':
+ if self.args.verbose > 1:
+ print('adjust_command: stage is {}; inserting valgrind stuff in command [{}] list [{}]'.
+ format(stage, command, cmdlist))
+ cmdlist.insert(0, '--track-origins=yes')
+ cmdlist.insert(0, '--show-leak-kinds=definite,indirect')
+ cmdlist.insert(0, '--leak-check=full')
+ cmdlist.insert(0, '--log-file=vgnd-{}.log'.format(self.args.testid))
+ cmdlist.insert(0, '-v') # ask for summary of non-leak errors
+ cmdlist.insert(0, ENVIR['VALGRIND_BIN'])
+ else:
+ pass
+
+ if cmdform == 'str':
+ command = ' '.join(cmdlist)
+ else:
+ command = cmdlist
+
+ if self.args.verbose > 1:
+ print('adjust_command: return command [{}]'.format(command))
+ return command
+
+ def post_execute(self):
+ if not self.args.valgrind:
+ return
+
+ self.definitely_lost_re = re.compile(
+ r'definitely lost:\s+([,0-9]+)\s+bytes in\s+([,0-9]+)\sblocks', re.MULTILINE | re.DOTALL)
+ self.indirectly_lost_re = re.compile(
+ r'indirectly lost:\s+([,0-9]+)\s+bytes in\s+([,0-9]+)\s+blocks', re.MULTILINE | re.DOTALL)
+ self.possibly_lost_re = re.compile(
+ r'possibly lost:\s+([,0-9]+)bytes in\s+([,0-9]+)\s+blocks', re.MULTILINE | re.DOTALL)
+ self.non_leak_error_re = re.compile(
+ r'ERROR SUMMARY:\s+([,0-9]+) errors from\s+([,0-9]+)\s+contexts', re.MULTILINE | re.DOTALL)
+
+ def_num = 0
+ ind_num = 0
+ pos_num = 0
+ nle_num = 0
+
+ # what about concurrent test runs? Maybe force them to be in different directories?
+ with open('vgnd-{}.log'.format(self.args.testid)) as vfd:
+ content = vfd.read()
+ def_mo = self.definitely_lost_re.search(content)
+ ind_mo = self.indirectly_lost_re.search(content)
+ pos_mo = self.possibly_lost_re.search(content)
+ nle_mo = self.non_leak_error_re.search(content)
+
+ if def_mo:
+ def_num = int(def_mo.group(2))
+ if ind_mo:
+ ind_num = int(ind_mo.group(2))
+ if pos_mo:
+ pos_num = int(pos_mo.group(2))
+ if nle_mo:
+ nle_num = int(nle_mo.group(1))
+
+ mem_results = ''
+ if (def_num > 0) or (ind_num > 0) or (pos_num > 0) or (nle_num > 0):
+ mem_results += 'not '
+
+ mem_results += 'ok {} - {}-mem # {}\n'.format(
+ self.args.test_ordinal, self.args.testid, 'memory leak check')
+ self._add_to_tap(mem_results)
+ if mem_results.startswith('not '):
+ print('{}'.format(content))
+ self._add_to_tap(content)
+
+ def _add_to_tap(self, more_tap_output):
+ self.tap += more_tap_output
import os
import sys
import argparse
+import importlib
import json
import subprocess
+import time
+import traceback
from collections import OrderedDict
from string import Template
from tdc_config import *
from tdc_helper import *
-
-USE_NS = True
+import TdcPlugin
+
+
+class PluginMgrTestFail(Exception):
+ def __init__(self, stage, output, message):
+ self.stage = stage
+ self.output = output
+ self.message = message
+
+class PluginMgr:
+ def __init__(self, argparser):
+ super().__init__()
+ self.plugins = {}
+ self.plugin_instances = []
+ self.args = []
+ self.argparser = argparser
+
+ # TODO, put plugins in order
+ plugindir = os.getenv('TDC_PLUGIN_DIR', './plugins')
+ for dirpath, dirnames, filenames in os.walk(plugindir):
+ for fn in filenames:
+ if (fn.endswith('.py') and
+ not fn == '__init__.py' and
+ not fn.startswith('#') and
+ not fn.startswith('.#')):
+ mn = fn[0:-3]
+ foo = importlib.import_module('plugins.' + mn)
+ self.plugins[mn] = foo
+ self.plugin_instances.append(foo.SubPlugin())
+
+ def call_pre_suite(self, testcount, testidlist):
+ for pgn_inst in self.plugin_instances:
+ pgn_inst.pre_suite(testcount, testidlist)
+
+ def call_post_suite(self, index):
+ for pgn_inst in reversed(self.plugin_instances):
+ pgn_inst.post_suite(index)
+
+ def call_pre_case(self, test_ordinal, testid):
+ for pgn_inst in self.plugin_instances:
+ try:
+ pgn_inst.pre_case(test_ordinal, testid)
+ except Exception as ee:
+ print('exception {} in call to pre_case for {} plugin'.
+ format(ee, pgn_inst.__class__))
+ print('test_ordinal is {}'.format(test_ordinal))
+ print('testid is {}'.format(testid))
+ raise
+
+ def call_post_case(self):
+ for pgn_inst in reversed(self.plugin_instances):
+ pgn_inst.post_case()
+
+ def call_pre_execute(self):
+ for pgn_inst in self.plugin_instances:
+ pgn_inst.pre_execute()
+
+ def call_post_execute(self):
+ for pgn_inst in reversed(self.plugin_instances):
+ pgn_inst.post_execute()
+
+ def call_add_args(self, parser):
+ for pgn_inst in self.plugin_instances:
+ parser = pgn_inst.add_args(parser)
+ return parser
+
+ def call_check_args(self, args, remaining):
+ for pgn_inst in self.plugin_instances:
+ pgn_inst.check_args(args, remaining)
+
+ def call_adjust_command(self, stage, command):
+ for pgn_inst in self.plugin_instances:
+ command = pgn_inst.adjust_command(stage, command)
+ return command
+
+ @staticmethod
+ def _make_argparser(args):
+ self.argparser = argparse.ArgumentParser(
+ description='Linux TC unit tests')
def replace_keywords(cmd):
return subcmd
-def exec_cmd(command, nsonly=True):
+def exec_cmd(args, pm, stage, command):
"""
Perform any required modifications on an executable command, then run
it in a subprocess and return the results.
"""
- if (USE_NS and nsonly):
- command = 'ip netns exec $NS ' + command
-
+ if len(command.strip()) == 0:
+ return None, None
if '$' in command:
command = replace_keywords(command)
+ command = pm.call_adjust_command(stage, command)
+ if args.verbose > 0:
+ print('command "{}"'.format(command))
proc = subprocess.Popen(command,
shell=True,
stdout=subprocess.PIPE,
- stderr=subprocess.PIPE)
+ stderr=subprocess.PIPE,
+ env=ENVIR)
(rawout, serr) = proc.communicate()
if proc.returncode != 0 and len(serr) > 0:
return proc, foutput
-def prepare_env(cmdlist):
+def prepare_env(args, pm, stage, prefix, cmdlist, output = None):
"""
- Execute the setup/teardown commands for a test case. Optionally
- terminate test execution if the command fails.
+ Execute the setup/teardown commands for a test case.
+ Optionally terminate test execution if the command fails.
"""
+ if args.verbose > 0:
+ print('{}'.format(prefix))
for cmdinfo in cmdlist:
- if (type(cmdinfo) == list):
+ if isinstance(cmdinfo, list):
exit_codes = cmdinfo[1:]
cmd = cmdinfo[0]
else:
exit_codes = [0]
cmd = cmdinfo
- if (len(cmd) == 0):
+ if not cmd:
continue
- (proc, foutput) = exec_cmd(cmd)
+ (proc, foutput) = exec_cmd(args, pm, stage, cmd)
+
+ if proc and (proc.returncode not in exit_codes):
+ print('', file=sys.stderr)
+ print("{} *** Could not execute: \"{}\"".format(prefix, cmd),
+ file=sys.stderr)
+ print("\n{} *** Error message: \"{}\"".format(prefix, foutput),
+ file=sys.stderr)
+ print("\n{} *** Aborting test run.".format(prefix), file=sys.stderr)
+ print("\n\n{} *** stdout ***".format(proc.stdout), file=sys.stderr)
+ print("\n\n{} *** stderr ***".format(proc.stderr), file=sys.stderr)
+ raise PluginMgrTestFail(
+ stage, output,
+ '"{}" did not complete successfully'.format(prefix))
+
+def run_one_test(pm, args, index, tidx):
+ result = True
+ tresult = ""
+ tap = ""
+ if args.verbose > 0:
+ print("\t====================\n=====> ", end="")
+ print("Test " + tidx["id"] + ": " + tidx["name"])
+
+ pm.call_pre_case(index, tidx['id'])
+ prepare_env(args, pm, 'setup', "-----> prepare stage", tidx["setup"])
+
+ if (args.verbose > 0):
+ print('-----> execute stage')
+ pm.call_pre_execute()
+ (p, procout) = exec_cmd(args, pm, 'execute', tidx["cmdUnderTest"])
+ exit_code = p.returncode
+ pm.call_post_execute()
+
+ if (exit_code != int(tidx["expExitCode"])):
+ result = False
+ print("exit:", exit_code, int(tidx["expExitCode"]))
+ print(procout)
+ else:
+ if args.verbose > 0:
+ print('-----> verify stage')
+ match_pattern = re.compile(
+ str(tidx["matchPattern"]), re.DOTALL | re.MULTILINE)
+ (p, procout) = exec_cmd(args, pm, 'verify', tidx["verifyCmd"])
+ if procout:
+ match_index = re.findall(match_pattern, procout)
+ if len(match_index) != int(tidx["matchCount"]):
+ result = False
+ elif int(tidx["matchCount"]) != 0:
+ result = False
+
+ if not result:
+ tresult += 'not '
+ tresult += 'ok {} - {} # {}\n'.format(str(index), tidx['id'], tidx['name'])
+ tap += tresult
+
+ if result == False:
+ if procout:
+ tap += procout
+ else:
+ tap += 'No output!\n'
- if proc.returncode not in exit_codes:
- print
- print("Could not execute:")
- print(cmd)
- print("\nError message:")
- print(foutput)
- print("\nAborting test run.")
- ns_destroy()
- exit(1)
+ prepare_env(args, pm, 'teardown', '-----> teardown stage', tidx['teardown'], procout)
+ pm.call_post_case()
+ index += 1
-def test_runner(filtered_tests, args):
+ return tap
+
+def test_runner(pm, args, filtered_tests):
"""
Driver function for the unit tests.
testlist = filtered_tests
tcount = len(testlist)
index = 1
- tap = str(index) + ".." + str(tcount) + "\n"
-
+ tap = ''
+ badtest = None
+ stage = None
+ emergency_exit = False
+ emergency_exit_message = ''
+
+ if args.notap:
+ if args.verbose:
+ tap = 'notap requested: omitting test plan\n'
+ else:
+ tap = str(index) + ".." + str(tcount) + "\n"
+ try:
+ pm.call_pre_suite(tcount, [tidx['id'] for tidx in testlist])
+ except Exception as ee:
+ ex_type, ex, ex_tb = sys.exc_info()
+ print('Exception {} {} (caught in pre_suite).'.
+ format(ex_type, ex))
+ # when the extra print statements are uncommented,
+ # the traceback does not appear between them
+ # (it appears way earlier in the tdc.py output)
+ # so don't bother ...
+ # print('--------------------(')
+ # print('traceback')
+ traceback.print_tb(ex_tb)
+ # print('--------------------)')
+ emergency_exit_message = 'EMERGENCY EXIT, call_pre_suite failed with exception {} {}\n'.format(ex_type, ex)
+ emergency_exit = True
+ stage = 'pre-SUITE'
+
+ if emergency_exit:
+ pm.call_post_suite(index)
+ return emergency_exit_message
+ if args.verbose > 1:
+ print('give test rig 2 seconds to stabilize')
+ time.sleep(2)
for tidx in testlist:
- result = True
- tresult = ""
if "flower" in tidx["category"] and args.device == None:
+ if args.verbose > 1:
+ print('Not executing test {} {} because DEV2 not defined'.
+ format(tidx['id'], tidx['name']))
continue
- print("Test " + tidx["id"] + ": " + tidx["name"])
- prepare_env(tidx["setup"])
- (p, procout) = exec_cmd(tidx["cmdUnderTest"])
- exit_code = p.returncode
-
- if (exit_code != int(tidx["expExitCode"])):
- result = False
- print("exit:", exit_code, int(tidx["expExitCode"]))
- print(procout)
- else:
- match_pattern = re.compile(str(tidx["matchPattern"]), re.DOTALL)
- (p, procout) = exec_cmd(tidx["verifyCmd"])
- match_index = re.findall(match_pattern, procout)
- if len(match_index) != int(tidx["matchCount"]):
- result = False
-
- if result == True:
- tresult += "ok "
- else:
- tresult += "not ok "
- tap += tresult + str(index) + " " + tidx["id"] + " " + tidx["name"] + "\n"
-
- if result == False:
- tap += procout
-
- prepare_env(tidx["teardown"])
+ try:
+ badtest = tidx # in case it goes bad
+ tap += run_one_test(pm, args, index, tidx)
+ except PluginMgrTestFail as pmtf:
+ ex_type, ex, ex_tb = sys.exc_info()
+ stage = pmtf.stage
+ message = pmtf.message
+ output = pmtf.output
+ print(message)
+ print('Exception {} {} (caught in test_runner, running test {} {} {} stage {})'.
+ format(ex_type, ex, index, tidx['id'], tidx['name'], stage))
+ print('---------------')
+ print('traceback')
+ traceback.print_tb(ex_tb)
+ print('---------------')
+ if stage == 'teardown':
+ print('accumulated output for this test:')
+ if pmtf.output:
+ print(pmtf.output)
+ print('---------------')
+ break
index += 1
- return tap
-
-
-def ns_create():
- """
- Create the network namespace in which the tests will be run and set up
- the required network devices for it.
- """
- if (USE_NS):
- cmd = 'ip netns add $NS'
- exec_cmd(cmd, False)
- cmd = 'ip link add $DEV0 type veth peer name $DEV1'
- exec_cmd(cmd, False)
- cmd = 'ip link set $DEV1 netns $NS'
- exec_cmd(cmd, False)
- cmd = 'ip link set $DEV0 up'
- exec_cmd(cmd, False)
- cmd = 'ip -n $NS link set $DEV1 up'
- exec_cmd(cmd, False)
- cmd = 'ip link set $DEV2 netns $NS'
- exec_cmd(cmd, False)
- cmd = 'ip -n $NS link set $DEV2 up'
- exec_cmd(cmd, False)
-
-
-def ns_destroy():
- """
- Destroy the network namespace for testing (and any associated network
- devices as well)
- """
- if (USE_NS):
- cmd = 'ip netns delete $NS'
- exec_cmd(cmd, False)
+ # if we failed in setup or teardown,
+ # fill in the remaining tests with ok-skipped
+ count = index
+ if not args.notap:
+ tap += 'about to flush the tap output if tests need to be skipped\n'
+ if tcount + 1 != index:
+ for tidx in testlist[index - 1:]:
+ msg = 'skipped - previous {} failed'.format(stage)
+ tap += 'ok {} - {} # {} {} {}\n'.format(
+ count, tidx['id'], msg, index, badtest.get('id', '--Unknown--'))
+ count += 1
+
+ tap += 'done flushing skipped test tap output\n'
+ pm.call_post_suite(index)
+ return tap
def has_blank_ids(idlist):
"""
"""
Set the command line arguments for tdc.
"""
- parser.add_argument('-p', '--path', type=str,
- help='The full path to the tc executable to use')
- parser.add_argument('-c', '--category', type=str, nargs='?', const='+c',
- help='Run tests only from the specified category, or if no category is specified, list known categories.')
- parser.add_argument('-f', '--file', type=str,
- help='Run tests from the specified file')
- parser.add_argument('-l', '--list', type=str, nargs='?', const="++", metavar='CATEGORY',
- help='List all test cases, or those only within the specified category')
- parser.add_argument('-s', '--show', type=str, nargs=1, metavar='ID', dest='showID',
- help='Display the test case with specified id')
- parser.add_argument('-e', '--execute', type=str, nargs=1, metavar='ID',
- help='Execute the single test case with specified ID')
- parser.add_argument('-i', '--id', action='store_true', dest='gen_id',
- help='Generate ID numbers for new test cases')
+ parser.add_argument(
+ '-p', '--path', type=str,
+ help='The full path to the tc executable to use')
+ sg = parser.add_argument_group(
+ 'selection', 'select which test cases: ' +
+ 'files plus directories; filtered by categories plus testids')
+ ag = parser.add_argument_group(
+ 'action', 'select action to perform on selected test cases')
+
+ sg.add_argument(
+ '-D', '--directory', nargs='+', metavar='DIR',
+ help='Collect tests from the specified directory(ies) ' +
+ '(default [tc-tests])')
+ sg.add_argument(
+ '-f', '--file', nargs='+', metavar='FILE',
+ help='Run tests from the specified file(s)')
+ sg.add_argument(
+ '-c', '--category', nargs='*', metavar='CATG', default=['+c'],
+ help='Run tests only from the specified category/ies, ' +
+ 'or if no category/ies is/are specified, list known categories.')
+ sg.add_argument(
+ '-e', '--execute', nargs='+', metavar='ID',
+ help='Execute the specified test cases with specified IDs')
+ ag.add_argument(
+ '-l', '--list', action='store_true',
+ help='List all test cases, or those only within the specified category')
+ ag.add_argument(
+ '-s', '--show', action='store_true', dest='showID',
+ help='Display the selected test cases')
+ ag.add_argument(
+ '-i', '--id', action='store_true', dest='gen_id',
+ help='Generate ID numbers for new test cases')
+ parser.add_argument(
+ '-v', '--verbose', action='count', default=0,
+ help='Show the commands that are being run')
+ parser.add_argument(
+ '-N', '--notap', action='store_true',
+ help='Suppress tap results for command under test')
parser.add_argument('-d', '--device',
help='Execute the test case in flower category')
return parser
-def check_default_settings(args):
+def check_default_settings(args, remaining, pm):
"""
- Process any arguments overriding the default settings, and ensure the
- settings are correct.
+ Process any arguments overriding the default settings,
+ and ensure the settings are correct.
"""
# Allow for overriding specific settings
global NAMES
if args.path != None:
- NAMES['TC'] = args.path
+ NAMES['TC'] = args.path
if args.device != None:
- NAMES['DEV2'] = args.device
+ NAMES['DEV2'] = args.device
if not os.path.isfile(NAMES['TC']):
print("The specified tc path " + NAMES['TC'] + " does not exist.")
exit(1)
+ pm.call_check_args(args, remaining)
+
def get_id_list(alltests):
"""
for c in alltests:
if (c["id"] == ""):
while True:
- newid = str('%04x' % random.randrange(16**4))
+ newid = str('{:04x}'.format(random.randrange(16**4)))
if (does_id_exist(alltests, newid)):
continue
else:
json.dump(testlist, outfile, indent=4)
outfile.close()
+def filter_tests_by_id(args, testlist):
+ '''
+ Remove tests from testlist that are not in the named id list.
+ If id list is empty, return empty list.
+ '''
+ newlist = list()
+ if testlist and args.execute:
+ target_ids = args.execute
+
+ if isinstance(target_ids, list) and (len(target_ids) > 0):
+ newlist = list(filter(lambda x: x['id'] in target_ids, testlist))
+ return newlist
+
+def filter_tests_by_category(args, testlist):
+ '''
+ Remove tests from testlist that are not in a named category.
+ '''
+ answer = list()
+ if args.category and testlist:
+ test_ids = list()
+ for catg in set(args.category):
+ if catg == '+c':
+ continue
+ print('considering category {}'.format(catg))
+ for tc in testlist:
+ if catg in tc['category'] and tc['id'] not in test_ids:
+ answer.append(tc)
+ test_ids.append(tc['id'])
+
+ return answer
def get_test_cases(args):
"""
If a test case file is specified, retrieve tests from that file.
Otherwise, glob for all json files in subdirectories and load from
each one.
+ Also, if requested, filter by category, and add tests matching
+ certain ids.
"""
import fnmatch
- if args.file != None:
- if not os.path.isfile(args.file):
- print("The specified test case file " + args.file + " does not exist.")
- exit(1)
- flist = [args.file]
- else:
- flist = []
- for root, dirnames, filenames in os.walk('tc-tests'):
+
+ flist = []
+ testdirs = ['tc-tests']
+
+ if args.file:
+ # at least one file was specified - remove the default directory
+ testdirs = []
+
+ for ff in args.file:
+ if not os.path.isfile(ff):
+ print("IGNORING file " + ff + "\n\tBECAUSE does not exist.")
+ else:
+ flist.append(os.path.abspath(ff))
+
+ if args.directory:
+ testdirs = args.directory
+
+ for testdir in testdirs:
+ for root, dirnames, filenames in os.walk(testdir):
for filename in fnmatch.filter(filenames, '*.json'):
- flist.append(os.path.join(root, filename))
- alltests = list()
+ candidate = os.path.abspath(os.path.join(root, filename))
+ if candidate not in testdirs:
+ flist.append(candidate)
+
+ alltestcases = list()
for casefile in flist:
- alltests = alltests + (load_from_file(casefile))
- return alltests
+ alltestcases = alltestcases + (load_from_file(casefile))
+ allcatlist = get_test_categories(alltestcases)
+ allidlist = get_id_list(alltestcases)
-def set_operation_mode(args):
+ testcases_by_cats = get_categorized_testlist(alltestcases, allcatlist)
+ idtestcases = filter_tests_by_id(args, alltestcases)
+ cattestcases = filter_tests_by_category(args, alltestcases)
+
+ cat_ids = [x['id'] for x in cattestcases]
+ if args.execute:
+ if args.category:
+ alltestcases = cattestcases + [x for x in idtestcases if x['id'] not in cat_ids]
+ else:
+ alltestcases = idtestcases
+ else:
+ if cat_ids:
+ alltestcases = cattestcases
+ else:
+ # just accept the existing value of alltestcases,
+ # which has been filtered by file/directory
+ pass
+
+ return allcatlist, allidlist, testcases_by_cats, alltestcases
+
+
+def set_operation_mode(pm, args):
"""
Load the test case data and process remaining arguments to determine
what the script should do for this run, and call the appropriate
function.
"""
- alltests = get_test_cases(args)
+ ucat, idlist, testcases, alltests = get_test_cases(args)
if args.gen_id:
- idlist = get_id_list(alltests)
if (has_blank_ids(idlist)):
alltests = generate_case_ids(alltests)
else:
print("Please correct them before continuing.")
exit(1)
- ucat = get_test_categories(alltests)
-
if args.showID:
- show_test_case_by_id(alltests, args.showID[0])
+ for atest in alltests:
+ print_test_case(atest)
exit(0)
- if args.execute:
- target_id = args.execute[0]
- else:
- target_id = ""
-
- if args.category:
- if (args.category == '+c'):
- print("Available categories:")
- print_sll(ucat)
- exit(0)
- else:
- target_category = args.category
- else:
- target_category = ""
-
-
- testcases = get_categorized_testlist(alltests, ucat)
+ if isinstance(args.category, list) and (len(args.category) == 0):
+ print("Available categories:")
+ print_sll(ucat)
+ exit(0)
if args.list:
- if (args.list == "++"):
+ if args.list:
list_test_cases(alltests)
exit(0)
- elif(len(args.list) > 0):
- if (args.list not in ucat):
- print("Unknown category " + args.list)
- print("Available categories:")
- print_sll(ucat)
- exit(1)
- list_test_cases(testcases[args.list])
- exit(0)
-
- if (os.geteuid() != 0):
- print("This script must be run with root privileges.\n")
- exit(1)
-
- ns_create()
-
- if (len(target_category) == 0):
- if (len(target_id) > 0):
- alltests = list(filter(lambda x: target_id in x['id'], alltests))
- if (len(alltests) == 0):
- print("Cannot find a test case with ID matching " + target_id)
- exit(1)
- catresults = test_runner(alltests, args)
- print("All test results: " + "\n\n" + catresults)
- elif (len(target_category) > 0):
- if (target_category == "flower") and args.device == None:
- print("Please specify a NIC device (-d) to run category flower")
- exit(1)
- if (target_category not in ucat):
- print("Specified category is not present in this file.")
- exit(1)
- else:
- catresults = test_runner(testcases[target_category], args)
- print("Category " + target_category + "\n\n" + catresults)
-
- ns_destroy()
+ if len(alltests):
+ catresults = test_runner(pm, args, alltests)
+ else:
+ catresults = 'No tests found\n'
+ if args.notap:
+ print('Tap output suppression requested\n')
+ else:
+ print('All test results: \n\n{}'.format(catresults))
def main():
"""
"""
parser = args_parse()
parser = set_args(parser)
+ pm = PluginMgr(parser)
+ parser = pm.call_add_args(parser)
(args, remaining) = parser.parse_known_args()
- check_default_settings(args)
+ args.NAMES = NAMES
+ check_default_settings(args, remaining, pm)
+ if args.verbose > 2:
+ print('args is {}'.format(args))
- set_operation_mode(args)
+ set_operation_mode(pm, args)
exit(0)
for i in range(0x100):
for j in range(0x100):
for k in range(0x100):
- mac = ("%02x:%02x:%02x" % (i, j, k))
+ mac = ("{:02x}:{:02x}:{:02x}".format(i, j, k))
src_mac = "e4:11:00:" + mac
dst_mac = "e4:12:00:" + mac
- cmd = ("filter add dev %s %s protocol ip parent ffff: flower %s "
- "src_mac %s dst_mac %s action drop %s" %
+ cmd = ("filter add dev {} {} protocol ip parent ffff: flower {} "
+ "src_mac {} dst_mac {} action drop {}".format
(device, prio, skip, src_mac, dst_mac, share_action))
- file.write("%s\n" % cmd)
+ file.write("{}\n".format(cmd))
index += 1
if index >= number:
file.close()
def print_test_case(tcase):
""" Pretty-printing of a given test case. """
+ print('\n==============\nTest {}\t{}\n'.format(tcase['id'], tcase['name']))
for k in tcase.keys():
if (isinstance(tcase[k], list)):
print(k + ":")
print_list(tcase[k])
else:
- print(k + ": " + tcase[k])
-
-
-def show_test_case_by_id(testlist, caseID):
- """ Find the specified test case to pretty-print. """
- if not any(d.get('id', None) == caseID for d in testlist):
- print("That ID does not exist.")
- exit(1)
- else:
- print_test_case(next((d for d in testlist if d['id'] == caseID)))
-
-
+ if not ((k == 'id') or (k == 'name')):
+ print(k + ": " + str(tcase[k]))
.PHONY: all all_32 all_64 warn_32bit_failure clean
-TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt ptrace_syscall test_mremap_vdso \
- check_initial_reg_state sigreturn ldt_gdt iopl mpx-mini-test ioperm \
+UNAME_M := $(shell uname -m)
+CAN_BUILD_I386 := $(shell ./check_cc.sh $(CC) trivial_32bit_program.c -m32)
+CAN_BUILD_X86_64 := $(shell ./check_cc.sh $(CC) trivial_64bit_program.c)
+
+TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt test_mremap_vdso \
+ check_initial_reg_state sigreturn iopl mpx-mini-test ioperm \
protection_keys test_vdso test_vsyscall
TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault test_syscall_vdso unwind_vdso \
test_FCMOV test_FCOMI test_FISTTP \
vdso_restorer
-TARGETS_C_64BIT_ONLY := fsgsbase sysret_rip 5lvl
+TARGETS_C_64BIT_ONLY := fsgsbase sysret_rip
+# Some selftests require 32bit support enabled also on 64bit systems
+TARGETS_C_32BIT_NEEDED := ldt_gdt ptrace_syscall
-TARGETS_C_32BIT_ALL := $(TARGETS_C_BOTHBITS) $(TARGETS_C_32BIT_ONLY)
+TARGETS_C_32BIT_ALL := $(TARGETS_C_BOTHBITS) $(TARGETS_C_32BIT_ONLY) $(TARGETS_C_32BIT_NEEDED)
TARGETS_C_64BIT_ALL := $(TARGETS_C_BOTHBITS) $(TARGETS_C_64BIT_ONLY)
+ifeq ($(CAN_BUILD_I386)$(CAN_BUILD_X86_64),11)
+TARGETS_C_64BIT_ALL += $(TARGETS_C_32BIT_NEEDED)
+endif
+
BINARIES_32 := $(TARGETS_C_32BIT_ALL:%=%_32)
BINARIES_64 := $(TARGETS_C_64BIT_ALL:%=%_64)
CFLAGS := -O2 -g -std=gnu99 -pthread -Wall -no-pie
-UNAME_M := $(shell uname -m)
-CAN_BUILD_I386 := $(shell ./check_cc.sh $(CC) trivial_32bit_program.c -m32)
-CAN_BUILD_X86_64 := $(shell ./check_cc.sh $(CC) trivial_64bit_program.c)
-
define gen-target-rule-32
$(1) $(1)_32: $(OUTPUT)/$(1)_32
.PHONY: $(1) $(1)_32
ifeq ($(CAN_BUILD_I386),1)
all: all_32
TEST_PROGS += $(BINARIES_32)
+EXTRA_CFLAGS += -DCAN_BUILD_32
$(foreach t,$(TARGETS_C_32BIT_ALL),$(eval $(call gen-target-rule-32,$(t))))
endif
ifeq ($(CAN_BUILD_X86_64),1)
all: all_64
TEST_PROGS += $(BINARIES_64)
+EXTRA_CFLAGS += -DCAN_BUILD_64
$(foreach t,$(TARGETS_C_64BIT_ALL),$(eval $(call gen-target-rule-64,$(t))))
endif
return si->si_upper;
}
#else
+
+/*
+ * This deals with old version of _sigfault in some distros:
+ *
+
+old _sigfault:
+ struct {
+ void *si_addr;
+ } _sigfault;
+
+new _sigfault:
+ struct {
+ void __user *_addr;
+ int _trapno;
+ short _addr_lsb;
+ union {
+ struct {
+ void __user *_lower;
+ void __user *_upper;
+ } _addr_bnd;
+ __u32 _pkey;
+ };
+ } _sigfault;
+ *
+ */
+
static inline void **__si_bounds_hack(siginfo_t *si)
{
void *sigfault = &si->_sifields._sigfault;
void *end_sigfault = sigfault + sizeof(si->_sifields._sigfault);
- void **__si_lower = end_sigfault;
+ int *trapno = (int*)end_sigfault;
+ /* skip _trapno and _addr_lsb */
+ void **__si_lower = (void**)(trapno + 2);
return __si_lower;
}
static inline void *__si_bounds_upper(siginfo_t *si)
{
- return (*__si_bounds_hack(si)) + sizeof(void *);
+ return *(__si_bounds_hack(si) + 1);
}
#endif
return forkret;
}
-void davecmp(void *_a, void *_b, int len)
-{
- int i;
- unsigned long *a = _a;
- unsigned long *b = _b;
-
- for (i = 0; i < len / sizeof(*a); i++) {
- if (a[i] == b[i])
- continue;
-
- dprintf3("[%3d]: a: %016lx b: %016lx\n", i, a[i], b[i]);
- }
-}
-
-void dumpit(char *f)
-{
- int fd = open(f, O_RDONLY);
- char buf[100];
- int nr_read;
-
- dprintf2("maps fd: %d\n", fd);
- do {
- nr_read = read(fd, &buf[0], sizeof(buf));
- write(1, buf, nr_read);
- } while (nr_read > 0);
- close(fd);
-}
-
#define PKEY_DISABLE_ACCESS 0x1
#define PKEY_DISABLE_WRITE 0x2
int main()
{
+#ifdef CAN_BUILD_32
int tmp;
+#endif
sethandler(SIGTRAP, sigtrap, 0);
: : "c" (post_nop) : "r11");
check_result();
#endif
-
+#ifdef CAN_BUILD_32
printf("[RUN]\tSet TF and check int80\n");
set_eflags(get_eflags() | X86_EFLAGS_TF);
asm volatile ("int $0x80" : "=a" (tmp) : "a" (SYS_getpid)
: INT80_CLOBBERS);
check_result();
+#endif
/*
* This test is particularly interesting if fast syscalls use
vdso_size += PAGE_SIZE;
}
+#ifdef __i386__
/* Glibc is likely to explode now - exit with raw syscall */
asm volatile ("int $0x80" : : "a" (__NR_exit), "b" (!!ret));
+#else /* __x86_64__ */
+ syscall(SYS_exit, ret);
+#endif
} else {
int status;
# endif
#endif
+/* max length of lines in /proc/self/maps - anything longer is skipped here */
+#define MAPS_LINE_LEN 128
+
int nerrs = 0;
+typedef long (*getcpu_t)(unsigned *, unsigned *, void *);
+
+getcpu_t vgetcpu;
+getcpu_t vdso_getcpu;
+
+static void *vsyscall_getcpu(void)
+{
#ifdef __x86_64__
-# define VSYS(x) (x)
+ FILE *maps;
+ char line[MAPS_LINE_LEN];
+ bool found = false;
+
+ maps = fopen("/proc/self/maps", "r");
+ if (!maps) /* might still be present, but ignore it here, as we test vDSO not vsyscall */
+ return NULL;
+
+ while (fgets(line, MAPS_LINE_LEN, maps)) {
+ char r, x;
+ void *start, *end;
+ char name[MAPS_LINE_LEN];
+
+ /* sscanf() is safe here as strlen(name) >= strlen(line) */
+ if (sscanf(line, "%p-%p %c-%cp %*x %*x:%*x %*u %s",
+ &start, &end, &r, &x, name) != 5)
+ continue;
+
+ if (strcmp(name, "[vsyscall]"))
+ continue;
+
+ /* assume entries are OK, as we test vDSO here not vsyscall */
+ found = true;
+ break;
+ }
+
+ fclose(maps);
+
+ if (!found) {
+ printf("Warning: failed to find vsyscall getcpu\n");
+ return NULL;
+ }
+ return (void *) (0xffffffffff600800);
#else
-# define VSYS(x) 0
+ return NULL;
#endif
+}
-typedef long (*getcpu_t)(unsigned *, unsigned *, void *);
-
-const getcpu_t vgetcpu = (getcpu_t)VSYS(0xffffffffff600800);
-getcpu_t vdso_getcpu;
-void fill_function_pointers()
+static void fill_function_pointers()
{
void *vdso = dlopen("linux-vdso.so.1",
RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD);
vdso_getcpu = (getcpu_t)dlsym(vdso, "__vdso_getcpu");
if (!vdso_getcpu)
printf("Warning: failed to find getcpu in vDSO\n");
+
+ vgetcpu = (getcpu_t) vsyscall_getcpu();
}
static long sys_getcpu(unsigned * cpu, unsigned * node,
# endif
#endif
+/* max length of lines in /proc/self/maps - anything longer is skipped here */
+#define MAPS_LINE_LEN 128
+
static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
int flags)
{
#ifdef __x86_64__
int nerrs = 0;
FILE *maps;
- char line[128];
+ char line[MAPS_LINE_LEN];
bool found = false;
maps = fopen("/proc/self/maps", "r");
return 0;
}
- while (fgets(line, sizeof(line), maps)) {
+ while (fgets(line, MAPS_LINE_LEN, maps)) {
char r, x;
void *start, *end;
- char name[128];
+ char name[MAPS_LINE_LEN];
+
+ /* sscanf() is safe here as strlen(name) >= strlen(line) */
if (sscanf(line, "%p-%p %c-%cp %*x %*x:%*x %*u %s",
&start, &end, &r, &x, name) != 5)
continue;
# SPDX-License-Identifier: GPL-2.0
# Makefile for USB tools
-CC = $(CROSS_COMPILE)gcc
PTHREAD_LIBS = -lpthread
WARNINGS = -Wall -Wextra
CFLAGS = $(WARNINGS) -g -I../include
LIB_DIR = ../lib/api
LIBS = $(LIB_DIR)/libapi.a
-CC = $(CROSS_COMPILE)gcc
CFLAGS = -Wall -Wextra -I../lib/
LDFLAGS = $(LIBS)
SBINDIR ?= sbin
INSTALL ?= install
CFLAGS += -D__EXPORTED_HEADERS__ -I../../include/uapi -I../../include
-CC = $(CROSS_COMPILE)gcc
TARGET = dell-smbios-example